From 7bd3a33ae6d2b820bc44a206f9b81b96840219fd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 17 Jun 2020 11:31:32 -0700
Subject: libbpf: Bump version to 0.1.0

Bump libbpf version to 0.1.0, as new development cycle starts.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200617183132.1970836-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.map | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f732c77b7ed0..c914347f5065 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -270,3 +270,6 @@ LIBBPF_0.0.9 {
 		ring_buffer__new;
 		ring_buffer__poll;
 } LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+} LIBBPF_0.0.9;
-- 
cgit 


From d56b74b9e1b8d747171dc6ff60315c00c41562ce Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 18 Jun 2020 16:46:32 -0700
Subject: tools/bpf: Add verifier tests for 32bit pointer/scalar arithmetic

Added two test_verifier subtests for 32bit pointer/scalar arithmetic
with BPF_SUB operator. They are passing verifier now.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200618234632.3321367-1-yhs@fb.com
---
 .../selftests/bpf/verifier/value_ptr_arith.c       | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 97ee658e1242..ed4e76b24649 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -836,3 +836,41 @@
 	.errstr = "R0 invalid mem access 'inv'",
 	.errstr_unpriv = "R0 pointer -= pointer prohibited",
 },
+{
+	"32bit pkt_ptr -= scalar",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+	BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
+	BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+	"32bit scalar -= pkt_ptr",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+	BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
+	BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
-- 
cgit 


From bb8dc2695a7db4f35c1de94d212f86229bb4a5d2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 15:20:24 -0700
Subject: tools/bpftool: Relicense bpftool's BPF profiler prog as dual-license
 GPL/BSD

Relicense it to be compatible with the rest of bpftool files.

Suggested-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200619222024.519774-1-andriin@fb.com
---
 tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 20034c12f7c5..c9d196ddb670 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (c) 2020 Facebook
 #include "profiler.h"
 #include <linux/bpf.h>
@@ -116,4 +116,4 @@ int BPF_PROG(fexit_XXX)
 	return 0;
 }
 
-char LICENSE[] SEC("license") = "GPL";
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
-- 
cgit 


From 8735e6eaa43899d20a1a54b40e79bfa6b324b107 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Sat, 20 Jun 2020 00:54:47 +0200
Subject: selftests: add selftest for the VRF strict mode

The new strict mode functionality is tested in different configurations and
on different network namespaces.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/vrf_strict_mode_test.sh  | 390 +++++++++++++++++++++
 1 file changed, 390 insertions(+)
 create mode 100755 tools/testing/selftests/net/vrf_strict_mode_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..5274f4a1fba1
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,390 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+ip_expand_args()
+{
+	local nsname=$1
+	local nsarg=""
+
+	if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+		nsarg="-netns ${nsname}"
+	fi
+
+	echo "${nsarg}"
+}
+
+vrf_count()
+{
+	local nsname=$1
+	local nsarg="$(ip_expand_args ${nsname})"
+
+	ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+	local nsname=$1
+	local tableid=$2
+	local nsarg="$(ip_expand_args ${nsname})"
+
+	ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+	local nsname=$1
+	local vrfname=$2
+	local vrftable=$3
+	local nsarg="$(ip_expand_args ${nsname})"
+
+	ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+	local nsname=$1
+	local vrfname=$2
+	local vrftable=$3
+	local cnt
+	local rc
+
+	add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+	cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+	log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+	local nsname=$1
+	local vrfname=$2
+	local vrftable=$3
+	local cnt
+	local rc
+
+	add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+	cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+	log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+	local nsname=$1
+	local vrfname=$2
+	local nsarg="$(ip_expand_args ${nsname})"
+
+	ip ${nsarg} link del ${vrfname}
+	log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+	local nsname=$1
+	local addr=$2
+	local vrfname=$3
+	local nsarg="$(ip_expand_args ${nsname})"
+
+	ip ${nsarg} link set dev ${vrfname} up && \
+		ip ${nsarg} addr add ${addr} dev ${vrfname}
+	log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+	local nsname=$1
+	local rval
+	local rc=0
+	local nsexec=""
+
+	if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+		# a custom network namespace is provided
+		nsexec="ip netns exec ${nsname}"
+	fi
+
+	rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+		grep -E "^[0-1]$")" &> /dev/null
+	if [ $? -ne 0 ]; then
+		# set errors
+		rval=255
+		rc=1
+	fi
+
+	# on success, rval can be only 0 or 1; on error, rval is equal to 255
+	echo ${rval}
+	return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+	local nsname=$1
+	local expected=$2
+	local res
+
+	res="$(read_strict_mode ${nsname})"
+	log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+	local nsname=$1
+	local val=$2
+	local nsexec=""
+
+	if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+		# a custom network namespace is provided
+		nsexec="ip netns exec ${nsname}"
+	fi
+
+	${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+	local nsname=$1
+
+	set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+	local nsname=$1
+
+	set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+	local nsname=$1
+
+	disable_strict_mode ${nsname}
+	log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+	local nsname=$1
+
+	enable_strict_mode ${nsname}
+	log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+	local nsname=$1
+
+	enable_strict_mode ${nsname}
+	log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+	local nsname=$1
+	local strictmode
+	local vrfcnt
+
+	vrfcnt=$(vrf_count ${nsname})
+	strictmode=$(read_strict_mode ${nsname})
+	log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+	modprobe vrf
+
+	ip netns add testns
+	ip netns exec testns ip link set lo up
+}
+
+cleanup()
+{
+	ip netns del testns 2>/dev/null
+
+	ip link del vrf100 2>/dev/null
+	ip link del vrf101 2>/dev/null
+	ip link del vrf102 2>/dev/null
+
+	echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+	vrf_strict_mode_check_support init
+
+	strict_mode_check_default init
+
+	add_vrf_and_check init vrf100 100
+	config_vrf_and_check init 172.16.100.1/24 vrf100
+
+	enable_strict_mode_and_check init
+
+	add_vrf_and_check_fail init vrf101 100
+
+	disable_strict_mode_and_check init
+
+	add_vrf_and_check init vrf101 100
+	config_vrf_and_check init 172.16.101.1/24 vrf101
+
+	enable_strict_mode_and_check_fail init
+
+	del_vrf_and_check init vrf101
+
+	enable_strict_mode_and_check init
+
+	add_vrf_and_check init vrf102 102
+	config_vrf_and_check init 172.16.102.1/24 vrf102
+
+	# the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+	vrf_strict_mode_check_support testns
+
+	strict_mode_check_default testns
+
+	enable_strict_mode_and_check testns
+
+	add_vrf_and_check testns vrf100 100
+	config_vrf_and_check testns 10.0.100.1/24 vrf100
+
+	add_vrf_and_check_fail testns vrf101 100
+
+	add_vrf_and_check_fail testns vrf102 100
+
+	add_vrf_and_check testns vrf200 200
+
+	disable_strict_mode_and_check testns
+
+	add_vrf_and_check testns vrf101 100
+
+	add_vrf_and_check testns vrf102 100
+
+	#the strict_mode is disabled in the testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+	read_strict_mode_compare_and_check init 1
+
+	read_strict_mode_compare_and_check testns 0
+
+	del_vrf_and_check testns vrf101
+
+	del_vrf_and_check testns vrf102
+
+	disable_strict_mode_and_check init
+
+	enable_strict_mode_and_check testns
+
+	enable_strict_mode_and_check init
+	enable_strict_mode_and_check init
+
+	disable_strict_mode_and_check testns
+	disable_strict_mode_and_check testns
+
+	read_strict_mode_compare_and_check init 1
+
+	read_strict_mode_compare_and_check testns 0
+}
+
+vrf_strict_mode_tests()
+{
+	log_section "VRF strict_mode test on init network namespace"
+	vrf_strict_mode_tests_init
+
+	log_section "VRF strict_mode test on testns network namespace"
+	vrf_strict_mode_tests_testns
+
+	log_section "VRF strict_mode test mixing init and testns network namespaces"
+	vrf_strict_mode_tests_mix
+}
+
+vrf_strict_mode_check_support()
+{
+	local nsname=$1
+	local output
+	local rc
+
+	output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+	if [ -z "${output}" ]; then
+		modinfo vrf || return $?
+	fi
+
+	# we do not care about the value of the strict_mode; we only check if
+	# the strict_mode parameter is available or not.
+	read_strict_mode ${nsname} &>/dev/null; rc=$?
+	log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+	return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+cleanup &> /dev/null
+
+setup
+vrf_strict_mode_tests
+cleanup
+
+print_log_test_results
+
+exit $ret
-- 
cgit 


From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 19 Jun 2020 14:11:43 -0700
Subject: bpf: Support access to bpf map fields

There are multiple use-cases when it's convenient to have access to bpf
map fields, both `struct bpf_map` and map type specific struct-s such as
`struct bpf_array`, `struct bpf_htab`, etc.

For example while working with sock arrays it can be necessary to
calculate the key based on map->max_entries (some_hash % max_entries).
Currently this is solved by communicating max_entries via "out-of-band"
channel, e.g. via additional map with known key to get info about target
map. That works, but is not very convenient and error-prone while
working with many maps.

In other cases necessary data is dynamic (i.e. unknown at loading time)
and it's impossible to get it at all. For example while working with a
hash table it can be convenient to know how much capacity is already
used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case).

At the same time kernel knows this info and can provide it to bpf
program.

Fill this gap by adding support to access bpf map fields from bpf
program for both `struct bpf_map` and map type specific fields.

Support is implemented via btf_struct_access() so that a user can define
their own `struct bpf_map` or map type specific struct in their program
with only necessary fields and preserve_access_index attribute, cast a
map to this struct and use a field.

For example:

	struct bpf_map {
		__u32 max_entries;
	} __attribute__((preserve_access_index));

	struct bpf_array {
		struct bpf_map map;
		__u32 elem_size;
	} __attribute__((preserve_access_index));

	struct {
		__uint(type, BPF_MAP_TYPE_ARRAY);
		__uint(max_entries, 4);
		__type(key, __u32);
		__type(value, __u32);
	} m_array SEC(".maps");

	SEC("cgroup_skb/egress")
	int cg_skb(void *ctx)
	{
		struct bpf_array *array = (struct bpf_array *)&m_array;
		struct bpf_map *map = (struct bpf_map *)&m_array;

		/* .. use map->max_entries or array->map.max_entries .. */
	}

Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock
in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of
corresponding struct. Only reading from map fields is supported.

For btf_struct_access() to work there should be a way to know btf id of
a struct that corresponds to a map type. To get btf id there should be a
way to get a stringified name of map-specific struct, such as
"bpf_array", "bpf_htab", etc for a map type. Two new fields are added to
`struct bpf_map_ops` to handle it:
* .map_btf_name keeps a btf name of a struct returned by map_alloc();
* .map_btf_id is used to cache btf id of that struct.

To make btf ids calculation cheaper they're calculated once while
preparing btf_vmlinux and cached same way as it's done for btf_id field
of `struct bpf_func_proto`

While calculating btf ids, struct names are NOT checked for collision.
Collisions will be checked as a part of the work to prepare btf ids used
in verifier in compile time that should land soon. The only known
collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs
net/core/sock_map.c) was fixed earlier.

Both new fields .map_btf_name and .map_btf_id must be set for a map type
for the feature to work. If neither is set for a map type, verifier will
return ENOTSUPP on a try to access map_ptr of corresponding type. If
just one of them set, it's verifier misconfiguration.

Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for
BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be
supported separately.

The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by
perfmon_capable() so that unpriv programs won't have access to bpf map
fields.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com
---
 tools/testing/selftests/bpf/verifier/map_ptr_mixing.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@
 	.fixup_map_in_map = { 16 },
 	.fixup_map_array_48b = { 13 },
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'map_ptr'",
+	.errstr = "only read from bpf_array is supported",
 },
 {
 	"cond: two branches returning different map pointers for lookup (tail, tail)",
-- 
cgit 


From b1b53d413f16c6b5078edb127e660e67332e4d2f Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 19 Jun 2020 14:11:45 -0700
Subject: selftests/bpf: Test access to bpf map pointer

Add selftests to test access to map pointers from bpf program for all
map types except struct_ops (that one would need additional work).

verifier test focuses mostly on scenarios that must be rejected.

prog_tests test focuses on accessing multiple fields both scalar and a
nested struct from bpf program and verifies that those fields have
expected values.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/139a6a17f8016491e39347849b951525335c6eb4.1592600985.git.rdna@fb.com
---
 tools/testing/selftests/bpf/prog_tests/map_ptr.c |  32 ++
 tools/testing/selftests/bpf/progs/map_ptr_kern.c | 686 +++++++++++++++++++++++
 tools/testing/selftests/bpf/verifier/map_ptr.c   |  62 ++
 3 files changed, 780 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_ptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/map_ptr_kern.c
 create mode 100644 tools/testing/selftests/bpf/verifier/map_ptr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..c230a573c373
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.skel.h"
+
+void test_map_ptr(void)
+{
+	struct map_ptr_kern *skel;
+	__u32 duration = 0, retval;
+	char buf[128];
+	int err;
+
+	skel = map_ptr_kern__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+		return;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+				sizeof(pkt_v4), buf, NULL, &retval, NULL);
+
+	if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+		goto cleanup;
+
+	if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
+		  skel->bss->g_map_type, skel->bss->g_line))
+		goto cleanup;
+
+cleanup:
+	map_ptr_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..473665cac67e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,686 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+
+#define VERIFY_TYPE(type, func) ({	\
+	g_map_type = type;		\
+	if (!func())			\
+		return 0;		\
+})
+
+
+#define VERIFY(expr) ({		\
+	g_line = __LINE__;	\
+	if (!(expr))		\
+		return 0;	\
+})
+
+struct bpf_map_memory {
+	__u32 pages;
+} __attribute__((preserve_access_index));
+
+struct bpf_map {
+	enum bpf_map_type map_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 id;
+	struct bpf_map_memory memory;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+				       __u32 value_size, __u32 max_entries)
+{
+	VERIFY(map->map_type == g_map_type);
+	VERIFY(map->key_size == key_size);
+	VERIFY(map->value_size == value_size);
+	VERIFY(map->max_entries == max_entries);
+	VERIFY(map->id > 0);
+	VERIFY(map->memory.pages > 0);
+
+	return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+				    struct bpf_map *direct)
+{
+	VERIFY(indirect->map_type == direct->map_type);
+	VERIFY(indirect->key_size == direct->key_size);
+	VERIFY(indirect->value_size == direct->value_size);
+	VERIFY(indirect->max_entries == direct->max_entries);
+	VERIFY(indirect->id == direct->id);
+	VERIFY(indirect->memory.pages == direct->memory.pages);
+
+	return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+			__u32 key_size, __u32 value_size, __u32 max_entries)
+{
+	VERIFY(check_bpf_map_ptr(indirect, direct));
+	VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+				    max_entries));
+	return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+				struct bpf_map *direct)
+{
+	VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+		     MAX_ENTRIES));
+	return 1;
+}
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+struct bpf_htab {
+	struct bpf_map map;
+	atomic_t count;
+	__u32 n_buckets;
+	__u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_hash SEC(".maps");
+
+static inline int check_hash(void)
+{
+	struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_hash;
+	int i;
+
+	VERIFY(check_default(&hash->map, map));
+
+	VERIFY(hash->n_buckets == MAX_ENTRIES);
+	VERIFY(hash->elem_size == 64);
+
+	VERIFY(hash->count.counter == 0);
+	for (i = 0; i < HALF_ENTRIES; ++i) {
+		const __u32 key = i;
+		const __u32 val = 1;
+
+		if (bpf_map_update_elem(hash, &key, &val, 0))
+			return 0;
+	}
+	VERIFY(hash->count.counter == HALF_ENTRIES);
+
+	return 1;
+}
+
+struct bpf_array {
+	struct bpf_map map;
+	__u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+	struct bpf_array *array = (struct bpf_array *)&m_array;
+	struct bpf_map *map = (struct bpf_map *)&m_array;
+	int i, n_lookups = 0, n_keys = 0;
+
+	VERIFY(check_default(&array->map, map));
+
+	VERIFY(array->elem_size == 8);
+
+	for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+		const __u32 key = i;
+		__u32 *val = bpf_map_lookup_elem(array, &key);
+
+		++n_lookups;
+		if (val)
+			++n_keys;
+	}
+
+	VERIFY(n_lookups == MAX_ENTRIES);
+	VERIFY(n_keys == MAX_ENTRIES);
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+	struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+	struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+	VERIFY(check_default(&prog_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+	struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+	struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+	VERIFY(check_default(&perf_event_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+	struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+	VERIFY(check_default(&percpu_hash->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+	struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+	VERIFY(check_default(&percpu_array->map, map));
+
+	return 1;
+}
+
+struct bpf_stack_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+	struct bpf_stack_map *stack_trace =
+		(struct bpf_stack_map *)&m_stack_trace;
+	struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+	VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+		     MAX_ENTRIES));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+	struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+	struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+	VERIFY(check_default(&cgroup_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+	struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+	VERIFY(check_default(&lru_hash->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+	struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+	VERIFY(check_default(&lru_percpu_hash->map, map));
+
+	return 1;
+}
+
+struct lpm_trie {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+	struct bpf_lpm_trie_key trie_key;
+	__u32 data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, struct lpm_key);
+	__type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+	struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+	struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+	VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+		     MAX_ENTRIES));
+
+	return 1;
+}
+
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_ARRAY);
+		__uint(max_entries, 1);
+		__type(key, __u32);
+		__type(value, __u32);
+	});
+} m_array_of_maps SEC(".maps") = {
+	.values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+	struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+	struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+
+	VERIFY(check_default(&array_of_maps->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+	__array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+	.values = {
+		[2] = &inner_map,
+	},
+};
+
+static inline int check_hash_of_maps(void)
+{
+	struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+	struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+
+	VERIFY(check_default(&hash_of_maps->map, map));
+
+	return 1;
+}
+
+struct bpf_dtab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+	struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+	struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+	VERIFY(check_default(&devmap->map, map));
+
+	return 1;
+}
+
+struct bpf_stab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+	struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+	struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+	VERIFY(check_default(&sockmap->map, map));
+
+	return 1;
+}
+
+struct bpf_cpu_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+	struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+	struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+	VERIFY(check_default(&cpumap->map, map));
+
+	return 1;
+}
+
+struct xsk_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_XSKMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+	struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+	struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+	VERIFY(check_default(&xskmap->map, map));
+
+	return 1;
+}
+
+struct bpf_shtab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+	struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+	struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+	VERIFY(check_default(&sockhash->map, map));
+
+	return 1;
+}
+
+struct bpf_cgroup_storage_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+	struct bpf_cgroup_storage_map *cgroup_storage =
+		(struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+	VERIFY(check(&cgroup_storage->map, map,
+		     sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct reuseport_array {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+	struct reuseport_array *reuseport_sockarray =
+		(struct reuseport_array *)&m_reuseport_sockarray;
+	struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+	VERIFY(check_default(&reuseport_sockarray->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+	struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+		(struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+	VERIFY(check(&percpu_cgroup_storage->map, map,
+		     sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct bpf_queue_stack {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_QUEUE);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+	struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+	struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+	VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+	struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+	struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+	VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+	return 1;
+}
+
+struct bpf_sk_storage_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+	struct bpf_sk_storage_map *sk_storage =
+		(struct bpf_sk_storage_map *)&m_sk_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+	VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+	struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+	VERIFY(check_default(&devmap_hash->map, map));
+
+	return 1;
+}
+
+struct bpf_ringbuf_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+	struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+	struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+	VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+
+	return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+	VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+	VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+	VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+	VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+	VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+	VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+	VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+		    check_reuseport_sockarray);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+		    check_percpu_cgroup_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+	VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+	VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+	return 1;
+}
+
+__u32 _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
new file mode 100644
index 000000000000..b52209db8250
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -0,0 +1,62 @@
+{
+	"bpf_map_ptr: read with negative offset rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "R1 is bpf_array invalid negative access: off=-8",
+},
+{
+	"bpf_map_ptr: write rejected",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 3 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "only read from bpf_array is supported",
+},
+{
+	"bpf_map_ptr: read non-existent field rejected",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+},
+{
+	"bpf_map_ptr: read ops field accepted",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = ACCEPT,
+	.retval = 1,
+},
-- 
cgit 


From 1bdb6c9a1c43fdf9b83b2331dfc6229bd2e71d9b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 20 Jun 2020 23:21:12 -0700
Subject: libbpf: Add a bunch of attribute getters/setters for map definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a bunch of getter for various aspects of BPF map. Some of these attribute
(e.g., key_size, value_size, type, etc) are available right now in struct
bpf_map_def, but this patch adds getter allowing to fetch them individually.
bpf_map_def approach isn't very scalable, when ABI stability requirements are
taken into account. It's much easier to extend libbpf and add support for new
features, when each aspect of BPF map has separate getter/setter.

Getters follow the common naming convention of not explicitly having "get" in
its name: bpf_map__type() returns map type, bpf_map__key_size() returns
key_size. Setters, though, explicitly have set in their name:
bpf_map__set_type(), bpf_map__set_key_size().

This patch ensures we now have a getter and a setter for the following
map attributes:
  - type;
  - max_entries;
  - map_flags;
  - numa_node;
  - key_size;
  - value_size;
  - ifindex.

bpf_map__resize() enforces unnecessary restriction of max_entries > 0. It is
unnecessary, because libbpf actually supports zero max_entries for some cases
(e.g., for PERF_EVENT_ARRAY map) and treats it specially during map creation
time. To allow setting max_entries=0, new bpf_map__set_max_entries() setter is
added. bpf_map__resize()'s behavior is preserved for backwards compatibility
reasons.

Map ifindex getter is added as well. There is a setter already, but no
corresponding getter. Fix this assymetry as well. bpf_map__set_ifindex()
itself is converted from void function into error-returning one, similar to
other setters. The only error returned right now is -EBUSY, if BPF map is
already loaded and has corresponding FD.

One lacking attribute with no ability to get/set or even specify it
declaratively is numa_node. This patch fixes this gap and both adds
programmatic getter/setter, as well as adds support for numa_node field in
BTF-defined map.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200621062112.3006313-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c   | 100 +++++++++++++++++++++++++++++++++++++++++++----
 tools/lib/bpf/libbpf.h   |  30 ++++++++++++--
 tools/lib/bpf/libbpf.map |  14 +++++++
 3 files changed, 134 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 477c679ed945..259a6360475f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -310,6 +310,7 @@ struct bpf_map {
 	int map_ifindex;
 	int inner_map_fd;
 	struct bpf_map_def def;
+	__u32 numa_node;
 	__u32 btf_var_idx;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
@@ -1957,6 +1958,10 @@ static int parse_btf_map_def(struct bpf_object *obj,
 				return -EINVAL;
 			pr_debug("map '%s': found map_flags = %u.\n",
 				 map->name, map->def.map_flags);
+		} else if (strcmp(name, "numa_node") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+				return -EINVAL;
+			pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
 		} else if (strcmp(name, "key_size") == 0) {
 			__u32 sz;
 
@@ -3222,20 +3227,27 @@ err_free_new_name:
 	return err;
 }
 
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+__u32 bpf_map__max_entries(const struct bpf_map *map)
 {
-	if (!map || !max_entries)
-		return -EINVAL;
+	return map->def.max_entries;
+}
 
-	/* If map already created, its attributes can't be changed. */
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
 	if (map->fd >= 0)
 		return -EBUSY;
-
 	map->def.max_entries = max_entries;
-
 	return 0;
 }
 
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+	if (!map || !max_entries)
+		return -EINVAL;
+
+	return bpf_map__set_max_entries(map, max_entries);
+}
+
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -3603,6 +3615,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 	create_attr.map_flags = def->map_flags;
 	create_attr.key_size = def->key_size;
 	create_attr.value_size = def->value_size;
+	create_attr.numa_node = map->numa_node;
 
 	if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
 		int nr_cpus;
@@ -7088,6 +7101,71 @@ const char *bpf_map__name(const struct bpf_map *map)
 	return map ? map->name : NULL;
 }
 
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+	return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.type = type;
+	return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+	return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.map_flags = flags;
+	return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+	return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->numa_node = numa_node;
+	return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+	return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.key_size = size;
+	return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+	return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.value_size = size;
+	return 0;
+}
+
 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
 {
 	return map ? map->btf_key_type_id : 0;
@@ -7140,9 +7218,17 @@ bool bpf_map__is_internal(const struct bpf_map *map)
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
 
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+__u32 bpf_map__ifindex(const struct bpf_map *map)
+{
+	return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
+	if (map->fd >= 0)
+		return -EBUSY;
 	map->map_ifindex = ifindex;
+	return 0;
 }
 
 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 334437af3014..fdd279fb1866 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -418,11 +418,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 
+/* get/set map FD */
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
 LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -430,11 +457,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
 					  const void *data, size_t size);
-LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
-LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c914347f5065..9914e0db4859 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -272,4 +272,18 @@ LIBBPF_0.0.9 {
 } LIBBPF_0.0.8;
 
 LIBBPF_0.1.0 {
+	global:
+		bpf_map__ifindex;
+		bpf_map__key_size;
+		bpf_map__map_flags;
+		bpf_map__max_entries;
+		bpf_map__numa_node;
+		bpf_map__set_key_size;
+		bpf_map__set_map_flags;
+		bpf_map__set_max_entries;
+		bpf_map__set_numa_node;
+		bpf_map__set_type;
+		bpf_map__set_value_size;
+		bpf_map__type;
+		bpf_map__value_size;
 } LIBBPF_0.0.9;
-- 
cgit 


From 13bd5d025602bb5374f54e144dec577b74718493 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sun, 21 Jun 2020 11:34:36 +0300
Subject: selftests: forwarding: Add a test for pedit munge tcp, udp sport,
 dport

Add a test that checks that pedit adjusts port numbers of tcp and udp
packets.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/pedit_l4port.sh       | 198 +++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/pedit_l4port.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
new file mode 100755
index 000000000000..5f20d289ee43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the
+# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the
+# packet looks like expected.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_udp_sport
+	test_udp_dport
+	test_tcp_sport
+	test_tcp_dport
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_l4port_one()
+{
+	local pedit_locus=$1; shift
+	local pedit_prot=$1; shift
+	local pedit_action=$1; shift
+	local match_prot=$1; shift
+	local match_flower=$1; shift
+	local mz_flags=$1; shift
+	local saddr=$1; shift
+	local daddr=$1; shift
+
+	tc filter add $pedit_locus handle 101 pref 1 \
+	   flower action pedit ex munge $pedit_action
+	tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+	   flower skip_hw $match_flower action pass
+
+	RET=0
+
+	$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+	    -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345
+
+	local pkts
+	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+			tc_rule_handle_stats_get "dev $h2 ingress" 101)
+	check_err $? "Expected to get 10 packets, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+	log_test "$pedit_locus pedit $pedit_action"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_l4port()
+{
+	local locus=$1; shift
+	local prot=$1; shift
+	local pedit_port=$1; shift
+	local flower_port=$1; shift
+	local port
+
+	for port in 1 11111 65535; do
+		do_test_pedit_l4port_one "$locus" "$prot"			\
+					 "$prot $pedit_port set $port"		\
+					 ip "ip_proto $prot $flower_port $port"	\
+					 "-A 192.0.2.1 -B 192.0.2.2"
+	done
+}
+
+test_udp_sport()
+{
+	do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port
+	do_test_pedit_l4port "dev $swp2 egress"  udp sport src_port
+}
+
+test_udp_dport()
+{
+	do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port
+	do_test_pedit_l4port "dev $swp2 egress"  udp dport dst_port
+}
+
+test_tcp_sport()
+{
+	do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port
+	do_test_pedit_l4port "dev $swp2 egress"  tcp sport src_port
+}
+
+test_tcp_dport()
+{
+	do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port
+	do_test_pedit_l4port "dev $swp2 egress"  tcp dport dst_port
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From 2e33efe32e019328916ce653dc1265d637261993 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:55 -0700
Subject: libbpf: Generalize libbpf externs support

Switch existing Kconfig externs to be just one of few possible kinds of more
generic externs. This refactoring is in preparation for ksymbol extern
support, added in the follow up patch. There are no functional changes
intended.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 346 +++++++++++++++++++++++++++++--------------------
 1 file changed, 206 insertions(+), 140 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 259a6360475f..ffccb5af32a5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -330,24 +330,35 @@ struct bpf_map {
 
 enum extern_type {
 	EXT_UNKNOWN,
-	EXT_CHAR,
-	EXT_BOOL,
-	EXT_INT,
-	EXT_TRISTATE,
-	EXT_CHAR_ARR,
+	EXT_KCFG,
+};
+
+enum kcfg_type {
+	KCFG_UNKNOWN,
+	KCFG_CHAR,
+	KCFG_BOOL,
+	KCFG_INT,
+	KCFG_TRISTATE,
+	KCFG_CHAR_ARR,
 };
 
 struct extern_desc {
-	const char *name;
+	enum extern_type type;
 	int sym_idx;
 	int btf_id;
-	enum extern_type type;
-	int sz;
-	int align;
-	int data_off;
-	bool is_signed;
-	bool is_weak;
+	int sec_btf_id;
+	const char *name;
 	bool is_set;
+	bool is_weak;
+	union {
+		struct {
+			enum kcfg_type type;
+			int sz;
+			int align;
+			int data_off;
+			bool is_signed;
+		} kcfg;
+	};
 };
 
 static LIST_HEAD(bpf_objects_list);
@@ -1424,19 +1435,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
 	return NULL;
 }
 
-static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
-			     char value)
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+			      char value)
 {
-	switch (ext->type) {
-	case EXT_BOOL:
+	switch (ext->kcfg.type) {
+	case KCFG_BOOL:
 		if (value == 'm') {
-			pr_warn("extern %s=%c should be tristate or char\n",
+			pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
 				ext->name, value);
 			return -EINVAL;
 		}
 		*(bool *)ext_val = value == 'y' ? true : false;
 		break;
-	case EXT_TRISTATE:
+	case KCFG_TRISTATE:
 		if (value == 'y')
 			*(enum libbpf_tristate *)ext_val = TRI_YES;
 		else if (value == 'm')
@@ -1444,14 +1455,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
 		else /* value == 'n' */
 			*(enum libbpf_tristate *)ext_val = TRI_NO;
 		break;
-	case EXT_CHAR:
+	case KCFG_CHAR:
 		*(char *)ext_val = value;
 		break;
-	case EXT_UNKNOWN:
-	case EXT_INT:
-	case EXT_CHAR_ARR:
+	case KCFG_UNKNOWN:
+	case KCFG_INT:
+	case KCFG_CHAR_ARR:
 	default:
-		pr_warn("extern %s=%c should be bool, tristate, or char\n",
+		pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
 			ext->name, value);
 		return -EINVAL;
 	}
@@ -1459,29 +1470,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
 	return 0;
 }
 
-static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
-			     const char *value)
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+			      const char *value)
 {
 	size_t len;
 
-	if (ext->type != EXT_CHAR_ARR) {
-		pr_warn("extern %s=%s should char array\n", ext->name, value);
+	if (ext->kcfg.type != KCFG_CHAR_ARR) {
+		pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
 		return -EINVAL;
 	}
 
 	len = strlen(value);
 	if (value[len - 1] != '"') {
-		pr_warn("extern '%s': invalid string config '%s'\n",
+		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 			ext->name, value);
 		return -EINVAL;
 	}
 
 	/* strip quotes */
 	len -= 2;
-	if (len >= ext->sz) {
-		pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
-			ext->name, value, len, ext->sz - 1);
-		len = ext->sz - 1;
+	if (len >= ext->kcfg.sz) {
+		pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+			ext->name, value, len, ext->kcfg.sz - 1);
+		len = ext->kcfg.sz - 1;
 	}
 	memcpy(ext_val, value + 1, len);
 	ext_val[len] = '\0';
@@ -1508,11 +1519,11 @@ static int parse_u64(const char *value, __u64 *res)
 	return 0;
 }
 
-static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
 {
-	int bit_sz = ext->sz * 8;
+	int bit_sz = ext->kcfg.sz * 8;
 
-	if (ext->sz == 8)
+	if (ext->kcfg.sz == 8)
 		return true;
 
 	/* Validate that value stored in u64 fits in integer of `ext->sz`
@@ -1527,26 +1538,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
 	 *  For unsigned target integer, check that all the (64 - Y) bits are
 	 *  zero.
 	 */
-	if (ext->is_signed)
+	if (ext->kcfg.is_signed)
 		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
 	else
 		return (v >> bit_sz) == 0;
 }
 
-static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
-			     __u64 value)
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+			      __u64 value)
 {
-	if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
-		pr_warn("extern %s=%llu should be integer\n",
+	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+		pr_warn("extern (kcfg) %s=%llu should be integer\n",
 			ext->name, (unsigned long long)value);
 		return -EINVAL;
 	}
-	if (!is_ext_value_in_range(ext, value)) {
-		pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
-			ext->name, (unsigned long long)value, ext->sz);
+	if (!is_kcfg_value_in_range(ext, value)) {
+		pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+			ext->name, (unsigned long long)value, ext->kcfg.sz);
 		return -ERANGE;
 	}
-	switch (ext->sz) {
+	switch (ext->kcfg.sz) {
 		case 1: *(__u8 *)ext_val = value; break;
 		case 2: *(__u16 *)ext_val = value; break;
 		case 4: *(__u32 *)ext_val = value; break;
@@ -1592,30 +1603,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
 	if (!ext || ext->is_set)
 		return 0;
 
-	ext_val = data + ext->data_off;
+	ext_val = data + ext->kcfg.data_off;
 	value = sep + 1;
 
 	switch (*value) {
 	case 'y': case 'n': case 'm':
-		err = set_ext_value_tri(ext, ext_val, *value);
+		err = set_kcfg_value_tri(ext, ext_val, *value);
 		break;
 	case '"':
-		err = set_ext_value_str(ext, ext_val, value);
+		err = set_kcfg_value_str(ext, ext_val, value);
 		break;
 	default:
 		/* assume integer */
 		err = parse_u64(value, &num);
 		if (err) {
-			pr_warn("extern %s=%s should be integer\n",
+			pr_warn("extern (kcfg) %s=%s should be integer\n",
 				ext->name, value);
 			return err;
 		}
-		err = set_ext_value_num(ext, ext_val, num);
+		err = set_kcfg_value_num(ext, ext_val, num);
 		break;
 	}
 	if (err)
 		return err;
-	pr_debug("extern %s=%s\n", ext->name, value);
+	pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
 	return 0;
 }
 
@@ -1686,16 +1697,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
 
 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 {
-	struct extern_desc *last_ext;
+	struct extern_desc *last_ext = NULL, *ext;
 	size_t map_sz;
-	int err;
+	int i, err;
 
-	if (obj->nr_extern == 0)
-		return 0;
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+		if (ext->type == EXT_KCFG)
+			last_ext = ext;
+	}
 
-	last_ext = &obj->externs[obj->nr_extern - 1];
-	map_sz = last_ext->data_off + last_ext->sz;
+	if (!last_ext)
+		return 0;
 
+	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
 	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
 					    obj->efile.symbols_shndx,
 					    NULL, map_sz);
@@ -2714,8 +2729,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 	return -ENOENT;
 }
 
-static enum extern_type find_extern_type(const struct btf *btf, int id,
-					 bool *is_signed)
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+	const struct btf_var_secinfo *vs;
+	const struct btf_type *t;
+	int i, j, n;
+
+	if (!btf)
+		return -ESRCH;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_datasec(t))
+			continue;
+
+		vs = btf_var_secinfos(t);
+		for (j = 0; j < btf_vlen(t); j++, vs++) {
+			if (vs->type == ext_btf_id)
+				return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+				     bool *is_signed)
 {
 	const struct btf_type *t;
 	const char *name;
@@ -2730,29 +2770,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id,
 		int enc = btf_int_encoding(t);
 
 		if (enc & BTF_INT_BOOL)
-			return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
 		if (is_signed)
 			*is_signed = enc & BTF_INT_SIGNED;
 		if (t->size == 1)
-			return EXT_CHAR;
+			return KCFG_CHAR;
 		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
-			return EXT_UNKNOWN;
-		return EXT_INT;
+			return KCFG_UNKNOWN;
+		return KCFG_INT;
 	}
 	case BTF_KIND_ENUM:
 		if (t->size != 4)
-			return EXT_UNKNOWN;
+			return KCFG_UNKNOWN;
 		if (strcmp(name, "libbpf_tristate"))
-			return EXT_UNKNOWN;
-		return EXT_TRISTATE;
+			return KCFG_UNKNOWN;
+		return KCFG_TRISTATE;
 	case BTF_KIND_ARRAY:
 		if (btf_array(t)->nelems == 0)
-			return EXT_UNKNOWN;
-		if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
-			return EXT_UNKNOWN;
-		return EXT_CHAR_ARR;
+			return KCFG_UNKNOWN;
+		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+			return KCFG_UNKNOWN;
+		return KCFG_CHAR_ARR;
 	default:
-		return EXT_UNKNOWN;
+		return KCFG_UNKNOWN;
 	}
 }
 
@@ -2761,23 +2801,29 @@ static int cmp_externs(const void *_a, const void *_b)
 	const struct extern_desc *a = _a;
 	const struct extern_desc *b = _b;
 
-	/* descending order by alignment requirements */
-	if (a->align != b->align)
-		return a->align > b->align ? -1 : 1;
-	/* ascending order by size, within same alignment class */
-	if (a->sz != b->sz)
-		return a->sz < b->sz ? -1 : 1;
+	if (a->type != b->type)
+		return a->type < b->type ? -1 : 1;
+
+	if (a->type == EXT_KCFG) {
+		/* descending order by alignment requirements */
+		if (a->kcfg.align != b->kcfg.align)
+			return a->kcfg.align > b->kcfg.align ? -1 : 1;
+		/* ascending order by size, within same alignment class */
+		if (a->kcfg.sz != b->kcfg.sz)
+			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+	}
+
 	/* resolve ties by name */
 	return strcmp(a->name, b->name);
 }
 
 static int bpf_object__collect_externs(struct bpf_object *obj)
 {
+	struct btf_type *sec, *kcfg_sec = NULL;
 	const struct btf_type *t;
 	struct extern_desc *ext;
-	int i, n, off, btf_id;
-	struct btf_type *sec;
-	const char *ext_name;
+	int i, n, off;
+	const char *ext_name, *sec_name;
 	Elf_Scn *scn;
 	GElf_Shdr sh;
 
@@ -2823,22 +2869,39 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 		ext->name = btf__name_by_offset(obj->btf, t->name_off);
 		ext->sym_idx = i;
 		ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
-		ext->sz = btf__resolve_size(obj->btf, t->type);
-		if (ext->sz <= 0) {
-			pr_warn("failed to resolve size of extern '%s': %d\n",
-				ext_name, ext->sz);
-			return ext->sz;
-		}
-		ext->align = btf__align_of(obj->btf, t->type);
-		if (ext->align <= 0) {
-			pr_warn("failed to determine alignment of extern '%s': %d\n",
-				ext_name, ext->align);
-			return -EINVAL;
-		}
-		ext->type = find_extern_type(obj->btf, t->type,
-					     &ext->is_signed);
-		if (ext->type == EXT_UNKNOWN) {
-			pr_warn("extern '%s' type is unsupported\n", ext_name);
+
+		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+		if (ext->sec_btf_id <= 0) {
+			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+				ext_name, ext->btf_id, ext->sec_btf_id);
+			return ext->sec_btf_id;
+		}
+		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+			kcfg_sec = sec;
+			ext->type = EXT_KCFG;
+			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+			if (ext->kcfg.sz <= 0) {
+				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.sz);
+				return ext->kcfg.sz;
+			}
+			ext->kcfg.align = btf__align_of(obj->btf, t->type);
+			if (ext->kcfg.align <= 0) {
+				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.align);
+				return -EINVAL;
+			}
+			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+						        &ext->kcfg.is_signed);
+			if (ext->kcfg.type == KCFG_UNKNOWN) {
+				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+				return -ENOTSUP;
+			}
+		} else {
+			pr_warn("unrecognized extern section '%s'\n", sec_name);
 			return -ENOTSUP;
 		}
 	}
@@ -2847,42 +2910,40 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	if (!obj->nr_extern)
 		return 0;
 
-	/* sort externs by (alignment, size, name) and calculate their offsets
-	 * within a map */
+	/* sort externs by type, for kcfg ones also by (align, size, name) */
 	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
-	off = 0;
-	for (i = 0; i < obj->nr_extern; i++) {
-		ext = &obj->externs[i];
-		ext->data_off = roundup(off, ext->align);
-		off = ext->data_off + ext->sz;
-		pr_debug("extern #%d: symbol %d, off %u, name %s\n",
-			 i, ext->sym_idx, ext->data_off, ext->name);
-	}
 
-	btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
-	if (btf_id <= 0) {
-		pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
-		return -ESRCH;
-	}
+	if (kcfg_sec) {
+		sec = kcfg_sec;
+		/* for kcfg externs calculate their offsets within a .kconfig map */
+		off = 0;
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KCFG)
+				continue;
 
-	sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
-	sec->size = off;
-	n = btf_vlen(sec);
-	for (i = 0; i < n; i++) {
-		struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
-
-		t = btf__type_by_id(obj->btf, vs->type);
-		ext_name = btf__name_by_offset(obj->btf, t->name_off);
-		ext = find_extern_by_name(obj, ext_name);
-		if (!ext) {
-			pr_warn("failed to find extern definition for BTF var '%s'\n",
-				ext_name);
-			return -ESRCH;
+			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+			off = ext->kcfg.data_off + ext->kcfg.sz;
+			pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n",
+				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+		}
+		sec->size = off;
+		n = btf_vlen(sec);
+		for (i = 0; i < n; i++) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+			t = btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, t->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vs->offset = ext->kcfg.data_off;
 		}
-		vs->offset = ext->data_off;
-		btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 	}
-
 	return 0;
 }
 
@@ -3012,11 +3073,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 				sym_idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
-		pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
-			 i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+		pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
+			 i, ext->name, ext->sym_idx, insn_idx);
 		reloc_desc->type = RELO_EXTERN;
 		reloc_desc->insn_idx = insn_idx;
-		reloc_desc->sym_off = ext->data_off;
+		reloc_desc->sym_off = i; /* sym_off stores extern index */
 		return 0;
 	}
 
@@ -4941,6 +5002,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 	for (i = 0; i < prog->nr_reloc; i++) {
 		struct reloc_desc *relo = &prog->reloc_desc[i];
 		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+		struct extern_desc *ext;
 
 		if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
 			pr_warn("relocation out of range: '%s'\n",
@@ -4959,9 +5021,10 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 			insn[0].imm = obj->maps[relo->map_idx].fd;
 			break;
 		case RELO_EXTERN:
+			ext = &obj->externs[relo->sym_off];
 			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 			insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
-			insn[1].imm = relo->sym_off;
+			insn[1].imm = ext->kcfg.data_off;
 			break;
 		case RELO_CALL:
 			err = bpf_program__reloc_text(prog, obj, relo);
@@ -5585,30 +5648,33 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 {
 	bool need_config = false;
 	struct extern_desc *ext;
+	void *kcfg_data = NULL;
 	int err, i;
-	void *data;
 
 	if (obj->nr_extern == 0)
 		return 0;
 
-	data = obj->maps[obj->kconfig_map_idx].mmaped;
+	if (obj->kconfig_map_idx >= 0)
+		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
 
 	for (i = 0; i < obj->nr_extern; i++) {
 		ext = &obj->externs[i];
 
-		if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
-			void *ext_val = data + ext->data_off;
+		if (ext->type == EXT_KCFG &&
+		    strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+			void *ext_val = kcfg_data + ext->kcfg.data_off;
 			__u32 kver = get_kernel_version();
 
 			if (!kver) {
 				pr_warn("failed to get kernel version\n");
 				return -EINVAL;
 			}
-			err = set_ext_value_num(ext, ext_val, kver);
+			err = set_kcfg_value_num(ext, ext_val, kver);
 			if (err)
 				return err;
-			pr_debug("extern %s=0x%x\n", ext->name, kver);
-		} else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+			pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+		} else if (ext->type == EXT_KCFG &&
+			   strncmp(ext->name, "CONFIG_", 7) == 0) {
 			need_config = true;
 		} else {
 			pr_warn("unrecognized extern '%s'\n", ext->name);
@@ -5616,20 +5682,20 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		}
 	}
 	if (need_config && extra_kconfig) {
-		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
 		if (err)
 			return -EINVAL;
 		need_config = false;
 		for (i = 0; i < obj->nr_extern; i++) {
 			ext = &obj->externs[i];
-			if (!ext->is_set) {
+			if (ext->type == EXT_KCFG && !ext->is_set) {
 				need_config = true;
 				break;
 			}
 		}
 	}
 	if (need_config) {
-		err = bpf_object__read_kconfig_file(obj, data);
+		err = bpf_object__read_kconfig_file(obj, kcfg_data);
 		if (err)
 			return -EINVAL;
 	}
-- 
cgit 


From 1c0c7074fefd769f62dda155e881ca90c9e3e75a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:56 -0700
Subject: libbpf: Add support for extracting kernel symbol addresses

Add support for another (in addition to existing Kconfig) special kind of
externs in BPF code, kernel symbol externs. Such externs allow BPF code to
"know" kernel symbol address and either use it for comparisons with kernel
data structures (e.g., struct file's f_op pointer, to distinguish different
kinds of file), or, with the help of bpf_probe_user_kernel(), to follow
pointers and read data from global variables. Kernel symbol addresses are
found through /proc/kallsyms, which should be present in the system.

Currently, such kernel symbol variables are typeless: they have to be defined
as `extern const void <symbol>` and the only operation you can do (in C code)
with them is to take its address. Such extern should reside in a special
section '.ksyms'. bpf_helpers.h header provides __ksym macro for this. Strong
vs weak semantics stays the same as with Kconfig externs. If symbol is not
found in /proc/kallsyms, this will be a failure for strong (non-weak) extern,
but will be defaulted to 0 for weak externs.

If the same symbol is defined multiple times in /proc/kallsyms, then it will
be error if any of the associated addresses differs. In that case, address is
ambiguous, so libbpf falls on the side of caution, rather than confusing user
with randomly chosen address.

In the future, once kernel is extended with variables BTF information, such
ksym externs will be supported in a typed version, which will allow BPF
program to read variable's contents directly, similarly to how it's done for
fentry/fexit input arguments.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-3-andriin@fb.com
---
 tools/lib/bpf/bpf_helpers.h |   1 +
 tools/lib/bpf/btf.h         |   5 ++
 tools/lib/bpf/libbpf.c      | 144 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 144 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f67dce2af802..a510d8ed716f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -75,5 +75,6 @@ enum libbpf_tristate {
 };
 
 #define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
 
 #endif
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 70c1b7ec2bd0..06cd1731c154 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t)
 	return BTF_INFO_KFLAG(t->info);
 }
 
+static inline bool btf_is_void(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_UNKN;
+}
+
 static inline bool btf_is_int(const struct btf_type *t)
 {
 	return btf_kind(t) == BTF_KIND_INT;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ffccb5af32a5..18461deb1b19 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -285,6 +285,7 @@ struct bpf_struct_ops {
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
 #define STRUCT_OPS_SEC ".struct_ops"
 
 enum libbpf_map_type {
@@ -331,6 +332,7 @@ struct bpf_map {
 enum extern_type {
 	EXT_UNKNOWN,
 	EXT_KCFG,
+	EXT_KSYM,
 };
 
 enum kcfg_type {
@@ -358,6 +360,9 @@ struct extern_desc {
 			int data_off;
 			bool is_signed;
 		} kcfg;
+		struct {
+			unsigned long long addr;
+		} ksym;
 	};
 };
 
@@ -2817,9 +2822,25 @@ static int cmp_externs(const void *_a, const void *_b)
 	return strcmp(a->name, b->name);
 }
 
+static int find_int_btf_id(const struct btf *btf)
+{
+	const struct btf_type *t;
+	int i, n;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (btf_is_int(t) && btf_int_bits(t) == 32)
+			return i;
+	}
+
+	return 0;
+}
+
 static int bpf_object__collect_externs(struct bpf_object *obj)
 {
-	struct btf_type *sec, *kcfg_sec = NULL;
+	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 	const struct btf_type *t;
 	struct extern_desc *ext;
 	int i, n, off;
@@ -2900,6 +2921,17 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
 				return -ENOTSUP;
 			}
+		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+			const struct btf_type *vt;
+
+			ksym_sec = sec;
+			ext->type = EXT_KSYM;
+
+			vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+			if (!btf_is_void(vt)) {
+				pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
+				return -ENOTSUP;
+			}
 		} else {
 			pr_warn("unrecognized extern section '%s'\n", sec_name);
 			return -ENOTSUP;
@@ -2913,6 +2945,46 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	/* sort externs by type, for kcfg ones also by (align, size, name) */
 	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 
+	/* for .ksyms section, we need to turn all externs into allocated
+	 * variables in BTF to pass kernel verification; we do this by
+	 * pretending that each extern is a 8-byte variable
+	 */
+	if (ksym_sec) {
+		/* find existing 4-byte integer type in BTF to use for fake
+		 * extern variables in DATASEC
+		 */
+		int int_btf_id = find_int_btf_id(obj->btf);
+
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KSYM)
+				continue;
+			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+				 i, ext->sym_idx, ext->name);
+		}
+
+		sec = ksym_sec;
+		n = btf_vlen(sec);
+		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+			struct btf_type *vt;
+
+			vt = (void *)btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vt->type = int_btf_id;
+			vs->offset = off;
+			vs->size = sizeof(int);
+		}
+		sec->size = off;
+	}
+
 	if (kcfg_sec) {
 		sec = kcfg_sec;
 		/* for kcfg externs calculate their offsets within a .kconfig map */
@@ -2924,7 +2996,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 
 			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
 			off = ext->kcfg.data_off + ext->kcfg.sz;
-			pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n",
+			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
 				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
 		}
 		sec->size = off;
@@ -5022,9 +5094,14 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 			break;
 		case RELO_EXTERN:
 			ext = &obj->externs[relo->sym_off];
-			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
-			insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
-			insn[1].imm = ext->kcfg.data_off;
+			if (ext->type == EXT_KCFG) {
+				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+				insn[1].imm = ext->kcfg.data_off;
+			} else /* EXT_KSYM */ {
+				insn[0].imm = (__u32)ext->ksym.addr;
+				insn[1].imm = ext->ksym.addr >> 32;
+			}
 			break;
 		case RELO_CALL:
 			err = bpf_program__reloc_text(prog, obj, relo);
@@ -5643,10 +5720,58 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
 	return 0;
 }
 
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+	char sym_type, sym_name[500];
+	unsigned long long sym_addr;
+	struct extern_desc *ext;
+	int ret, err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f) {
+		err = -errno;
+		pr_warn("failed to open /proc/kallsyms: %d\n", err);
+		return err;
+	}
+
+	while (true) {
+		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+			     &sym_addr, &sym_type, sym_name);
+		if (ret == EOF && feof(f))
+			break;
+		if (ret != 3) {
+			pr_warn("failed to read kallasyms entry: %d\n", ret);
+			err = -EINVAL;
+			goto out;
+		}
+
+		ext = find_extern_by_name(obj, sym_name);
+		if (!ext || ext->type != EXT_KSYM)
+			continue;
+
+		if (ext->is_set && ext->ksym.addr != sym_addr) {
+			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+				sym_name, ext->ksym.addr, sym_addr);
+			err = -EINVAL;
+			goto out;
+		}
+		if (!ext->is_set) {
+			ext->is_set = true;
+			ext->ksym.addr = sym_addr;
+			pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+		}
+	}
+
+out:
+	fclose(f);
+	return err;
+}
+
 static int bpf_object__resolve_externs(struct bpf_object *obj,
 				       const char *extra_kconfig)
 {
-	bool need_config = false;
+	bool need_config = false, need_kallsyms = false;
 	struct extern_desc *ext;
 	void *kcfg_data = NULL;
 	int err, i;
@@ -5676,6 +5801,8 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		} else if (ext->type == EXT_KCFG &&
 			   strncmp(ext->name, "CONFIG_", 7) == 0) {
 			need_config = true;
+		} else if (ext->type == EXT_KSYM) {
+			need_kallsyms = true;
 		} else {
 			pr_warn("unrecognized extern '%s'\n", ext->name);
 			return -EINVAL;
@@ -5699,6 +5826,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		if (err)
 			return -EINVAL;
 	}
+	if (need_kallsyms) {
+		err = bpf_object__read_kallsyms_file(obj);
+		if (err)
+			return -EINVAL;
+	}
 	for (i = 0; i < obj->nr_extern; i++) {
 		ext = &obj->externs[i];
 
-- 
cgit 


From b7ddfab20a6af3a0e366000eada63adf6a7683e7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:57 -0700
Subject: selftests/bpf: Add __ksym extern selftest

Validate libbpf is able to handle weak and strong kernel symbol externs in BPF
code correctly.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-4-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/ksyms.c | 71 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_ksyms.c | 32 ++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..e3d6777226a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+static __u64 kallsyms_find(const char *sym)
+{
+	char type, name[500];
+	__u64 addr, res = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return 0;
+
+	while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
+		if (strcmp(name, sym) == 0) {
+			res = addr;
+			goto out;
+		}
+	}
+
+	CHECK(false, "not_found", "symbol %s not found\n", sym);
+out:
+	fclose(f);
+	return res;
+}
+
+void test_ksyms(void)
+{
+	__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
+	const char *btf_path = "/sys/kernel/btf/vmlinux";
+	struct test_ksyms *skel;
+	struct test_ksyms__data *data;
+	struct stat st;
+	__u64 btf_size;
+	int err;
+
+	if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+		return;
+	btf_size = st.st_size;
+
+	skel = test_ksyms__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+		return;
+
+	err = test_ksyms__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	data = skel->data;
+	CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+	      "got 0x%llx, exp 0x%llx\n",
+	      data->out__bpf_link_fops, link_fops_addr);
+	CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+	      "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+	CHECK(data->out__btf_size != btf_size, "btf_size",
+	      "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+	CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
+	      "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+
+cleanup:
+	test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	out__bpf_link_fops = (__u64)&bpf_link_fops;
+	out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+	out__per_cpu_start = (__u64)&__per_cpu_start;
+
+	out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From a479b8ce4ed1457f814be6f67a8447a9af38f235 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:58 -0700
Subject: tools/bpftool: Move map/prog parsing logic into common

Move functions that parse map and prog by id/tag/name/etc outside of
map.c/prog.c, respectively. These functions are used outside of those files
and are generic enough to be in common. This also makes heavy-weight map.c and
prog.c more decoupled from the rest of bpftool files and facilitates more
lightweight bootstrap bpftool variant.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-5-andriin@fb.com
---
 tools/bpf/bpftool/common.c | 308 +++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   |   2 +
 tools/bpf/bpftool/map.c    | 156 -----------------------
 tools/bpf/bpftool/prog.c   | 152 ----------------------
 4 files changed, 310 insertions(+), 308 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c47bdc65de8e..6c864c3683fc 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -581,3 +581,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
 {
 	return vfprintf(stderr, format, args);
 }
+
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+{
+	unsigned int id = 0;
+	int fd, nb_fds = 0;
+	void *tmp;
+	int err;
+
+	while (true) {
+		struct bpf_prog_info info = {};
+		__u32 len = sizeof(info);
+
+		err = bpf_prog_get_next_id(id, &id);
+		if (err) {
+			if (errno != ENOENT) {
+				p_err("%s", strerror(errno));
+				goto err_close_fds;
+			}
+			return nb_fds;
+		}
+
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0) {
+			p_err("can't get prog by id (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fds;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get prog info (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fd;
+		}
+
+		if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+		    (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+			close(fd);
+			continue;
+		}
+
+		if (nb_fds > 0) {
+			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+			if (!tmp) {
+				p_err("failed to realloc");
+				goto err_close_fd;
+			}
+			*fds = tmp;
+		}
+		(*fds)[nb_fds++] = fd;
+	}
+
+err_close_fd:
+	close(fd);
+err_close_fds:
+	while (--nb_fds >= 0)
+		close((*fds)[nb_fds]);
+	return -1;
+}
+
+int prog_parse_fds(int *argc, char ***argv, int **fds)
+{
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		(*fds)[0] = bpf_prog_get_fd_by_id(id);
+		if ((*fds)[0] < 0) {
+			p_err("get by id (%u): %s", id, strerror(errno));
+			return -1;
+		}
+		return 1;
+	} else if (is_prefix(**argv, "tag")) {
+		unsigned char tag[BPF_TAG_SIZE];
+
+		NEXT_ARGP();
+
+		if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+			   tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+		    != BPF_TAG_SIZE) {
+			p_err("can't parse tag");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return prog_fd_by_nametag(tag, fds, true);
+	} else if (is_prefix(**argv, "name")) {
+		char *name;
+
+		NEXT_ARGP();
+
+		name = **argv;
+		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+			p_err("can't parse name");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return prog_fd_by_nametag(name, fds, false);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+		if ((*fds)[0] < 0)
+			return -1;
+		return 1;
+	}
+
+	p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
+	return -1;
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+	int *fds = NULL;
+	int nb_fds, fd;
+
+	fds = malloc(sizeof(int));
+	if (!fds) {
+		p_err("mem alloc failed");
+		return -1;
+	}
+	nb_fds = prog_parse_fds(argc, argv, &fds);
+	if (nb_fds != 1) {
+		if (nb_fds > 1) {
+			p_err("several programs match this handle");
+			while (nb_fds--)
+				close(fds[nb_fds]);
+		}
+		fd = -1;
+		goto exit_free;
+	}
+
+	fd = fds[0];
+exit_free:
+	free(fds);
+	return fd;
+}
+
+static int map_fd_by_name(char *name, int **fds)
+{
+	unsigned int id = 0;
+	int fd, nb_fds = 0;
+	void *tmp;
+	int err;
+
+	while (true) {
+		struct bpf_map_info info = {};
+		__u32 len = sizeof(info);
+
+		err = bpf_map_get_next_id(id, &id);
+		if (err) {
+			if (errno != ENOENT) {
+				p_err("%s", strerror(errno));
+				goto err_close_fds;
+			}
+			return nb_fds;
+		}
+
+		fd = bpf_map_get_fd_by_id(id);
+		if (fd < 0) {
+			p_err("can't get map by id (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fds;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get map info (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fd;
+		}
+
+		if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+			close(fd);
+			continue;
+		}
+
+		if (nb_fds > 0) {
+			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+			if (!tmp) {
+				p_err("failed to realloc");
+				goto err_close_fd;
+			}
+			*fds = tmp;
+		}
+		(*fds)[nb_fds++] = fd;
+	}
+
+err_close_fd:
+	close(fd);
+err_close_fds:
+	while (--nb_fds >= 0)
+		close((*fds)[nb_fds]);
+	return -1;
+}
+
+int map_parse_fds(int *argc, char ***argv, int **fds)
+{
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		(*fds)[0] = bpf_map_get_fd_by_id(id);
+		if ((*fds)[0] < 0) {
+			p_err("get map by id (%u): %s", id, strerror(errno));
+			return -1;
+		}
+		return 1;
+	} else if (is_prefix(**argv, "name")) {
+		char *name;
+
+		NEXT_ARGP();
+
+		name = **argv;
+		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+			p_err("can't parse name");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return map_fd_by_name(name, fds);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+		if ((*fds)[0] < 0)
+			return -1;
+		return 1;
+	}
+
+	p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
+	return -1;
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+	int *fds = NULL;
+	int nb_fds, fd;
+
+	fds = malloc(sizeof(int));
+	if (!fds) {
+		p_err("mem alloc failed");
+		return -1;
+	}
+	nb_fds = map_parse_fds(argc, argv, &fds);
+	if (nb_fds != 1) {
+		if (nb_fds > 1) {
+			p_err("several maps match this handle");
+			while (nb_fds--)
+				close(fds[nb_fds]);
+		}
+		fd = -1;
+		goto exit_free;
+	}
+
+	fd = fds[0];
+exit_free:
+	free(fds);
+	return fd;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+	int err;
+	int fd;
+
+	fd = map_parse_fd(argc, argv);
+	if (fd < 0)
+		return -1;
+
+	err = bpf_obj_get_info_by_fd(fd, info, info_len);
+	if (err) {
+		p_err("can't get map info: %s", strerror(errno));
+		close(fd);
+		return err;
+	}
+
+	return fd;
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 5cdf0bc049bd..4338ab9d86d4 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -210,7 +210,9 @@ int do_iter(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
+int prog_parse_fds(int *argc, char ***argv, int **fds);
 int map_parse_fd(int *argc, char ***argv);
+int map_parse_fds(int *argc, char ***argv, int **fds);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 struct bpf_prog_linfo;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c5fac8068ba1..b9eee19b094c 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -92,162 +92,6 @@ static void *alloc_value(struct bpf_map_info *info)
 		return malloc(info->value_size);
 }
 
-static int map_fd_by_name(char *name, int **fds)
-{
-	unsigned int id = 0;
-	int fd, nb_fds = 0;
-	void *tmp;
-	int err;
-
-	while (true) {
-		struct bpf_map_info info = {};
-		__u32 len = sizeof(info);
-
-		err = bpf_map_get_next_id(id, &id);
-		if (err) {
-			if (errno != ENOENT) {
-				p_err("%s", strerror(errno));
-				goto err_close_fds;
-			}
-			return nb_fds;
-		}
-
-		fd = bpf_map_get_fd_by_id(id);
-		if (fd < 0) {
-			p_err("can't get map by id (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fds;
-		}
-
-		err = bpf_obj_get_info_by_fd(fd, &info, &len);
-		if (err) {
-			p_err("can't get map info (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fd;
-		}
-
-		if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
-			close(fd);
-			continue;
-		}
-
-		if (nb_fds > 0) {
-			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
-			if (!tmp) {
-				p_err("failed to realloc");
-				goto err_close_fd;
-			}
-			*fds = tmp;
-		}
-		(*fds)[nb_fds++] = fd;
-	}
-
-err_close_fd:
-	close(fd);
-err_close_fds:
-	while (--nb_fds >= 0)
-		close((*fds)[nb_fds]);
-	return -1;
-}
-
-static int map_parse_fds(int *argc, char ***argv, int **fds)
-{
-	if (is_prefix(**argv, "id")) {
-		unsigned int id;
-		char *endptr;
-
-		NEXT_ARGP();
-
-		id = strtoul(**argv, &endptr, 0);
-		if (*endptr) {
-			p_err("can't parse %s as ID", **argv);
-			return -1;
-		}
-		NEXT_ARGP();
-
-		(*fds)[0] = bpf_map_get_fd_by_id(id);
-		if ((*fds)[0] < 0) {
-			p_err("get map by id (%u): %s", id, strerror(errno));
-			return -1;
-		}
-		return 1;
-	} else if (is_prefix(**argv, "name")) {
-		char *name;
-
-		NEXT_ARGP();
-
-		name = **argv;
-		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
-			p_err("can't parse name");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return map_fd_by_name(name, fds);
-	} else if (is_prefix(**argv, "pinned")) {
-		char *path;
-
-		NEXT_ARGP();
-
-		path = **argv;
-		NEXT_ARGP();
-
-		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
-		if ((*fds)[0] < 0)
-			return -1;
-		return 1;
-	}
-
-	p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
-	return -1;
-}
-
-int map_parse_fd(int *argc, char ***argv)
-{
-	int *fds = NULL;
-	int nb_fds, fd;
-
-	fds = malloc(sizeof(int));
-	if (!fds) {
-		p_err("mem alloc failed");
-		return -1;
-	}
-	nb_fds = map_parse_fds(argc, argv, &fds);
-	if (nb_fds != 1) {
-		if (nb_fds > 1) {
-			p_err("several maps match this handle");
-			while (nb_fds--)
-				close(fds[nb_fds]);
-		}
-		fd = -1;
-		goto exit_free;
-	}
-
-	fd = fds[0];
-exit_free:
-	free(fds);
-	return fd;
-}
-
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
-{
-	int err;
-	int fd;
-
-	fd = map_parse_fd(argc, argv);
-	if (fd < 0)
-		return -1;
-
-	err = bpf_obj_get_info_by_fd(fd, info, info_len);
-	if (err) {
-		p_err("can't get map info: %s", strerror(errno));
-		close(fd);
-		return err;
-	}
-
-	return fd;
-}
-
 static int do_dump_btf(const struct btf_dumper *d,
 		       struct bpf_map_info *map_info, void *key,
 		       void *value)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a5eff83496f2..53d47610ff58 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -86,158 +86,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 		strftime(buf, size, "%FT%T%z", &load_tm);
 }
 
-static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
-{
-	unsigned int id = 0;
-	int fd, nb_fds = 0;
-	void *tmp;
-	int err;
-
-	while (true) {
-		struct bpf_prog_info info = {};
-		__u32 len = sizeof(info);
-
-		err = bpf_prog_get_next_id(id, &id);
-		if (err) {
-			if (errno != ENOENT) {
-				p_err("%s", strerror(errno));
-				goto err_close_fds;
-			}
-			return nb_fds;
-		}
-
-		fd = bpf_prog_get_fd_by_id(id);
-		if (fd < 0) {
-			p_err("can't get prog by id (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fds;
-		}
-
-		err = bpf_obj_get_info_by_fd(fd, &info, &len);
-		if (err) {
-			p_err("can't get prog info (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fd;
-		}
-
-		if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
-		    (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
-			close(fd);
-			continue;
-		}
-
-		if (nb_fds > 0) {
-			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
-			if (!tmp) {
-				p_err("failed to realloc");
-				goto err_close_fd;
-			}
-			*fds = tmp;
-		}
-		(*fds)[nb_fds++] = fd;
-	}
-
-err_close_fd:
-	close(fd);
-err_close_fds:
-	while (--nb_fds >= 0)
-		close((*fds)[nb_fds]);
-	return -1;
-}
-
-static int prog_parse_fds(int *argc, char ***argv, int **fds)
-{
-	if (is_prefix(**argv, "id")) {
-		unsigned int id;
-		char *endptr;
-
-		NEXT_ARGP();
-
-		id = strtoul(**argv, &endptr, 0);
-		if (*endptr) {
-			p_err("can't parse %s as ID", **argv);
-			return -1;
-		}
-		NEXT_ARGP();
-
-		(*fds)[0] = bpf_prog_get_fd_by_id(id);
-		if ((*fds)[0] < 0) {
-			p_err("get by id (%u): %s", id, strerror(errno));
-			return -1;
-		}
-		return 1;
-	} else if (is_prefix(**argv, "tag")) {
-		unsigned char tag[BPF_TAG_SIZE];
-
-		NEXT_ARGP();
-
-		if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
-			   tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
-		    != BPF_TAG_SIZE) {
-			p_err("can't parse tag");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return prog_fd_by_nametag(tag, fds, true);
-	} else if (is_prefix(**argv, "name")) {
-		char *name;
-
-		NEXT_ARGP();
-
-		name = **argv;
-		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
-			p_err("can't parse name");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return prog_fd_by_nametag(name, fds, false);
-	} else if (is_prefix(**argv, "pinned")) {
-		char *path;
-
-		NEXT_ARGP();
-
-		path = **argv;
-		NEXT_ARGP();
-
-		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
-		if ((*fds)[0] < 0)
-			return -1;
-		return 1;
-	}
-
-	p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
-	return -1;
-}
-
-int prog_parse_fd(int *argc, char ***argv)
-{
-	int *fds = NULL;
-	int nb_fds, fd;
-
-	fds = malloc(sizeof(int));
-	if (!fds) {
-		p_err("mem alloc failed");
-		return -1;
-	}
-	nb_fds = prog_parse_fds(argc, argv, &fds);
-	if (nb_fds != 1) {
-		if (nb_fds > 1) {
-			p_err("several programs match this handle");
-			while (nb_fds--)
-				close(fds[nb_fds]);
-		}
-		fd = -1;
-		goto exit_free;
-	}
-
-	fd = fds[0];
-exit_free:
-	free(fds);
-	return fd;
-}
-
 static void show_prog_maps(int fd, __u32 num_maps)
 {
 	struct bpf_prog_info info = {};
-- 
cgit 


From 16e9b187aba60f9014f11a7e95b878850b6c95e5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:59 -0700
Subject: tools/bpftool: Minimize bootstrap bpftool

Build minimal "bootstrap mode" bpftool to enable skeleton (and, later,
vmlinux.h generation), instead of building almost complete, but slightly
different (w/o skeletons, etc) bpftool to bootstrap complete bpftool build.

Current approach doesn't scale well (engineering-wise) when adding more BPF
programs to bpftool and other complicated functionality, as it requires
constant adjusting of the code to work in both bootstrapped mode and normal
mode.

So it's better to build only minimal bpftool version that supports only BPF
skeleton code generation and BTF-to-C conversion. Thankfully, this is quite
easy to accomplish due to internal modularity of bpftool commands. This will
also allow to keep adding new functionality to bpftool in general, without the
need to care about bootstrap mode for those new parts of bpftool.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-6-andriin@fb.com
---
 tools/bpf/bpftool/.gitignore |  2 +-
 tools/bpf/bpftool/Makefile   | 30 +++++++++++++-----------------
 tools/bpf/bpftool/main.c     | 11 +++++++++--
 tools/bpf/bpftool/main.h     | 27 +++++++++++++++------------
 4 files changed, 38 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 26cde83e1ca3..ce721adf3161 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.d
-/_bpftool
+/bpftool-bootstrap
 /bpftool
 bpftool*.8
 bpf-helpers.*
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e85f101be85..eec2da4d45d2 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -116,40 +116,36 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
 SRCS += $(BFD_SRCS)
 endif
 
+BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+
+BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
-_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
 
-ifeq ($(feature-clang-bpf-global-var),1)
-	__OBJS = $(OBJS)
-else
-	__OBJS = $(_OBJS)
+ifneq ($(feature-clang-bpf-global-var),1)
+	CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS
 endif
 
-$(OUTPUT)_prog.o: prog.c
-	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
-
-$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
-
 skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
 		-I$(srctree)/tools/include/uapi/ \
 		-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
 		-g -O2 -target bpf -c $< -o $@
 
-profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
-	$(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@
 
 $(OUTPUT)prog.o: prog.c profiler.skel.h
-	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)feature.o: | zdep
 
-$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -157,7 +153,7 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
+	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o
 	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 46bd716a9d86..bf4d7487552a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -92,9 +92,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 	if (argc < 1 && cmds[0].func)
 		return cmds[0].func(argc, argv);
 
-	for (i = 0; cmds[i].func; i++)
-		if (is_prefix(*argv, cmds[i].cmd))
+	for (i = 0; cmds[i].cmd; i++) {
+		if (is_prefix(*argv, cmds[i].cmd)) {
+			if (!cmds[i].func) {
+				p_err("command '%s' is not supported in bootstrap mode",
+				      cmds[i].cmd);
+				return -1;
+			}
 			return cmds[i].func(argc - 1, argv + 1);
+		}
+	}
 
 	help(argc - 1, argv + 1);
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 4338ab9d86d4..aad7be74e8a7 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -194,19 +194,22 @@ int mount_bpffs_for_pin(const char *name);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
 int do_pin_fd(int fd, const char *name);
 
-int do_prog(int argc, char **arg);
-int do_map(int argc, char **arg);
-int do_link(int argc, char **arg);
-int do_event_pipe(int argc, char **argv);
-int do_cgroup(int argc, char **arg);
-int do_perf(int argc, char **arg);
-int do_net(int argc, char **arg);
-int do_tracelog(int argc, char **arg);
-int do_feature(int argc, char **argv);
-int do_btf(int argc, char **argv);
+/* commands available in bootstrap mode */
 int do_gen(int argc, char **argv);
-int do_struct_ops(int argc, char **argv);
-int do_iter(int argc, char **argv);
+int do_btf(int argc, char **argv);
+
+/* non-bootstrap only commands */
+int do_prog(int argc, char **arg) __weak;
+int do_map(int argc, char **arg) __weak;
+int do_link(int argc, char **arg) __weak;
+int do_event_pipe(int argc, char **argv) __weak;
+int do_cgroup(int argc, char **arg) __weak;
+int do_perf(int argc, char **arg) __weak;
+int do_net(int argc, char **arg) __weak;
+int do_tracelog(int argc, char **arg) __weak;
+int do_feature(int argc, char **argv) __weak;
+int do_struct_ops(int argc, char **argv) __weak;
+int do_iter(int argc, char **argv) __weak;
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
-- 
cgit 


From 05aca6da3b5ab3c5c6003dbbefc9580d9a6a308b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:00 -0700
Subject: tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h

Adapt Makefile to support BPF skeleton generation beyond single profiler.bpf.c
case. Also add vmlinux.h generation and switch profiler.bpf.c to use it.

clang-bpf-global-var feature is extended and renamed to clang-bpf-co-re to
check for support of preserve_access_index attribute, which, together with BTF
for global variables, is the minimum requirement for modern BPF programs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-7-andriin@fb.com
---
 tools/bpf/bpftool/.gitignore                    |  3 +-
 tools/bpf/bpftool/Makefile                      | 42 ++++++++++++++++------
 tools/bpf/bpftool/skeleton/profiler.bpf.c       |  3 +-
 tools/bpf/bpftool/skeleton/profiler.h           | 46 -------------------------
 tools/build/feature/Makefile                    |  4 +--
 tools/build/feature/test-clang-bpf-co-re.c      |  9 +++++
 tools/build/feature/test-clang-bpf-global-var.c |  4 ---
 7 files changed, 45 insertions(+), 66 deletions(-)
 delete mode 100644 tools/bpf/bpftool/skeleton/profiler.h
 create mode 100644 tools/build/feature/test-clang-bpf-co-re.c
 delete mode 100644 tools/build/feature/test-clang-bpf-global-var.c

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index ce721adf3161..3e601bcfd461 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -7,4 +7,5 @@ bpf-helpers.*
 FEATURE-DUMP.bpftool
 feature
 libbpf
-profiler.skel.h
+/*.skel.h
+/vmlinux.h
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index eec2da4d45d2..bdb6e38c6c5c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -42,6 +42,7 @@ CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
 CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+	-I$(if $(OUTPUT),$(OUTPUT),.) \
 	-I$(srctree)/kernel/bpf/ \
 	-I$(srctree)/tools/include \
 	-I$(srctree)/tools/include/uapi \
@@ -61,9 +62,9 @@ CLANG ?= clang
 
 FEATURE_USER = .bpftool
 FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
-	clang-bpf-global-var
+	clang-bpf-co-re
 FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
-	clang-bpf-global-var
+	clang-bpf-co-re
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -121,20 +122,38 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr
 BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
-ifneq ($(feature-clang-bpf-global-var),1)
-	CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS
-endif
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../vmlinux					\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifneq ($(VMLINUX_BTF),)
+ifeq ($(feature-clang-bpf-co-re),1)
+
+BUILD_BPF_SKELS := 1
+
+$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
 
-skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
+		-I$(if $(OUTPUT),$(OUTPUT),.) \
 		-I$(srctree)/tools/include/uapi/ \
-		-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+		-I$(LIBBPF_PATH) \
+		-I$(srctree)/tools/lib \
 		-g -O2 -target bpf -c $< -o $@
 
-profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o
-	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@
+$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
+
+$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
+
+endif
+endif
 
-$(OUTPUT)prog.o: prog.c profiler.skel.h
+CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS)
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -153,7 +172,7 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o
+	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
 	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
@@ -188,6 +207,7 @@ FORCE:
 zdep:
 	@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
 
+.SECONDARY:
 .PHONY: all FORCE clean install uninstall zdep
 .PHONY: doc doc-clean doc-install doc-uninstall
 .DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index c9d196ddb670..4e3512f700c0 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (c) 2020 Facebook
-#include "profiler.h"
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
deleted file mode 100644
index 1f767e9510f7..000000000000
--- a/tools/bpf/bpftool/skeleton/profiler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __PROFILER_H
-#define __PROFILER_H
-
-/* useful typedefs from vmlinux.h */
-
-typedef signed char __s8;
-typedef unsigned char __u8;
-typedef short int __s16;
-typedef short unsigned int __u16;
-typedef int __s32;
-typedef unsigned int __u32;
-typedef long long int __s64;
-typedef long long unsigned int __u64;
-
-typedef __s8 s8;
-typedef __u8 u8;
-typedef __s16 s16;
-typedef __u16 u16;
-typedef __s32 s32;
-typedef __u32 u32;
-typedef __s64 s64;
-typedef __u64 u64;
-
-enum {
-	false = 0,
-	true = 1,
-};
-
-#ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
-#else
-#define __bitwise__
-#endif
-
-typedef __u16 __bitwise__ __le16;
-typedef __u16 __bitwise__ __be16;
-typedef __u32 __bitwise__ __le32;
-typedef __u32 __bitwise__ __be32;
-typedef __u64 __bitwise__ __le64;
-typedef __u64 __bitwise__ __be64;
-
-typedef __u16 __bitwise__ __sum16;
-typedef __u32 __bitwise__ __wsum;
-
-#endif /* __PROFILER_H */
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b1f0321180f5..88371f7f0369 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -68,7 +68,7 @@ FILES=                                          \
          test-llvm-version.bin			\
          test-libaio.bin			\
          test-libzstd.bin			\
-         test-clang-bpf-global-var.bin		\
+         test-clang-bpf-co-re.bin		\
          test-file-handle.bin			\
          test-libpfm4.bin
 
@@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin:
 $(OUTPUT)test-libzstd.bin:
 	$(BUILD) -lzstd
 
-$(OUTPUT)test-clang-bpf-global-var.bin:
+$(OUTPUT)test-clang-bpf-co-re.bin:
 	$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) |	\
 		grep BTF_KIND_VAR
 
diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c
new file mode 100644
index 000000000000..cb5265bfdd83
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-co-re.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+struct test {
+	int a;
+	int b;
+} __attribute__((preserve_access_index));
+
+volatile struct test global_value_for_test = {};
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
deleted file mode 100644
index 221f1481d52e..000000000000
--- a/tools/build/feature/test-clang-bpf-global-var.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Facebook
-
-volatile int global_value_for_test = 1;
-- 
cgit 


From bd9bedf84b87289b9a87eebfe7917e54373e99f9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:01 -0700
Subject: libbpf: Wrap source argument of BPF_CORE_READ macro in parentheses

Wrap source argument of BPF_CORE_READ family of macros into parentheses to
allow uses like this:

BPF_CORE_READ((struct cast_struct *)src, a, b, c);

Fixes: 7db3822ab991 ("libbpf: Add BPF_CORE_READ/BPF_CORE_READ_INTO helpers")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200619231703.738941-8-andriin@fb.com
---
 tools/lib/bpf/bpf_core_read.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7009dc90e012..eae5cccff761 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -217,7 +217,7 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ_INTO(dst, src, a, ...)				    \
 	({								    \
-		___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__)	    \
+		___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__)   \
 	})
 
 /*
@@ -227,7 +227,7 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ_STR_INTO(dst, src, a, ...)			    \
 	({								    \
-		___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+		___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
 	})
 
 /*
@@ -254,8 +254,8 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ(src, a, ...)					    \
 	({								    \
-		___type(src, a, ##__VA_ARGS__) __r;			    \
-		BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__);	    \
+		___type((src), a, ##__VA_ARGS__) __r;			    \
+		BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__);	    \
 		__r;							    \
 	})
 
-- 
cgit 


From d53dee3fe0138610fcce5721bae9414377c41ec3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:02 -0700
Subject: tools/bpftool: Show info for processes holding BPF map/prog/link/btf
 FDs

Add bpf_iter-based way to find all the processes that hold open FDs against
BPF object (map, prog, link, btf). bpftool always attempts to discover this,
but will silently give up if kernel doesn't yet support bpf_iter BPF programs.
Process name and PID are emitted for each process (task group).

Sample output for each of 4 BPF objects:

$ sudo ./bpftool prog show
2694: cgroup_device  tag 8c42dee26e8cd4c2  gpl
        loaded_at 2020-06-16T15:34:32-0700  uid 0
        xlated 648B  jited 409B  memlock 4096B
        pids systemd(1)
2907: cgroup_skb  name egress  tag 9ad187367cf2b9e8  gpl
        loaded_at 2020-06-16T18:06:54-0700  uid 0
        xlated 48B  jited 59B  memlock 4096B  map_ids 2436
        btf_id 1202
        pids test_progs(2238417), test_progs(2238445)

$ sudo ./bpftool map show
2436: array  name test_cgr.bss  flags 0x400
        key 4B  value 8B  max_entries 1  memlock 8192B
        btf_id 1202
        pids test_progs(2238417), test_progs(2238445)
2445: array  name pid_iter.rodata  flags 0x480
        key 4B  value 4B  max_entries 1  memlock 8192B
        btf_id 1214  frozen
        pids bpftool(2239612)

$ sudo ./bpftool link show
61: cgroup  prog 2908
        cgroup_id 375301  attach_type egress
        pids test_progs(2238417), test_progs(2238445)
62: cgroup  prog 2908
        cgroup_id 375344  attach_type egress
        pids test_progs(2238417), test_progs(2238445)

$ sudo ./bpftool btf show
1202: size 1527B  prog_ids 2908,2907  map_ids 2436
        pids test_progs(2238417), test_progs(2238445)
1242: size 34684B
        pids bpftool(2258892)

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-9-andriin@fb.com
---
 tools/bpf/bpftool/Makefile                |   2 +
 tools/bpf/bpftool/btf.c                   |   6 +
 tools/bpf/bpftool/link.c                  |   7 +
 tools/bpf/bpftool/main.c                  |   1 +
 tools/bpf/bpftool/main.h                  |  27 ++++
 tools/bpf/bpftool/map.c                   |   7 +
 tools/bpf/bpftool/pids.c                  | 229 ++++++++++++++++++++++++++++++
 tools/bpf/bpftool/prog.c                  |   7 +
 tools/bpf/bpftool/skeleton/pid_iter.bpf.c |  80 +++++++++++
 tools/bpf/bpftool/skeleton/pid_iter.h     |  12 ++
 10 files changed, 378 insertions(+)
 create mode 100644 tools/bpf/bpftool/pids.c
 create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.bpf.c
 create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.h

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index bdb6e38c6c5c..06f436e8191a 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -150,6 +150,8 @@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
 
 $(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
 
+$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
+
 endif
 endif
 
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index faac8189b285..fc9bc7a23db6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 			printf("%s%u", n++ == 0 ? "  map_ids " : ",",
 			       obj->obj_id);
 	}
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
 
 	printf("\n");
 }
@@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 			jsonw_uint(json_wtr, obj->obj_id);
 	}
 	jsonw_end_array(json_wtr);	/* map_ids */
+
+	emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
+
 	jsonw_end_object(json_wtr);	/* btf object */
 }
 
@@ -893,6 +897,7 @@ static int do_show(int argc, char **argv)
 			close(fd);
 		return err;
 	}
+	build_obj_refs_table(&refs_table, BPF_OBJ_BTF);
 
 	if (fd >= 0) {
 		err = show_btf(fd, &btf_prog_table, &btf_map_table);
@@ -939,6 +944,7 @@ static int do_show(int argc, char **argv)
 exit_free:
 	delete_btf_table(&btf_prog_table);
 	delete_btf_table(&btf_map_table);
+	delete_obj_refs_table(&refs_table);
 
 	return err;
 }
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index fca57ee8fafe..7329f3134283 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 		}
 		jsonw_end_array(json_wtr);
 	}
+
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 
 	return 0;
@@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 				printf("\n\tpinned %s", obj->path);
 		}
 	}
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
 
 	printf("\n");
 
@@ -257,6 +261,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+	build_obj_refs_table(&refs_table, BPF_OBJ_LINK);
 
 	if (argc == 2) {
 		fd = link_parse_fd(&argc, &argv);
@@ -296,6 +301,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return errno == ENOENT ? 0 : -1;
 }
 
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index bf4d7487552a..4a191fcbeb82 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,6 +31,7 @@ bool relaxed_maps;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 struct pinned_obj_table link_table;
+struct obj_refs_table refs_table;
 
 static void __noreturn clean_and_exit(int i)
 {
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index aad7be74e8a7..ce26271e5f0c 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -127,11 +127,13 @@ static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 extern const char * const map_type_name[];
 extern const size_t map_type_name_size;
 
+/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
 enum bpf_obj_type {
 	BPF_OBJ_UNKNOWN,
 	BPF_OBJ_PROG,
 	BPF_OBJ_MAP,
 	BPF_OBJ_LINK,
+	BPF_OBJ_BTF,
 };
 
 extern const char *bin_name;
@@ -139,12 +141,14 @@ extern const char *bin_name;
 extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
+extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 extern struct pinned_obj_table link_table;
+extern struct obj_refs_table refs_table;
 
 void __printf(1, 2) p_err(const char *fmt, ...);
 void __printf(1, 2) p_info(const char *fmt, ...);
@@ -168,12 +172,35 @@ struct pinned_obj {
 	struct hlist_node hash;
 };
 
+struct obj_refs_table {
+	DECLARE_HASHTABLE(table, 16);
+};
+
+struct obj_ref {
+	int pid;
+	char comm[16];
+};
+
+struct obj_refs {
+	struct hlist_node node;
+	__u32 id;
+	int ref_cnt;
+	struct obj_ref *refs;
+};
+
 struct btf;
 struct bpf_line_info;
 
 int build_pinned_obj_table(struct pinned_obj_table *table,
 			   enum bpf_obj_type type);
 void delete_pinned_obj_table(struct pinned_obj_table *tab);
+__weak int build_obj_refs_table(struct obj_refs_table *table,
+				enum bpf_obj_type type);
+__weak void delete_obj_refs_table(struct obj_refs_table *table);
+__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+			       json_writer_t *json_wtr);
+__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id,
+				const char *prefix);
 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b9eee19b094c..0a6a5d82d380 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -509,6 +509,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 		jsonw_end_array(json_wtr);
 	}
 
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 
 	return 0;
@@ -596,6 +598,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	if (frozen)
 		printf("%sfrozen", info->btf_id ? "  " : "");
 
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
 	printf("\n");
 	return 0;
 }
@@ -654,6 +658,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+	build_obj_refs_table(&refs_table, BPF_OBJ_MAP);
 
 	if (argc == 2)
 		return do_show_subset(argc, argv);
@@ -697,6 +702,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return errno == ENOENT ? 0 : -1;
 }
 
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
new file mode 100644
index 000000000000..3474a91743ff
--- /dev/null
+++ b/tools/bpf/bpftool/pids.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+
+#include "main.h"
+#include "skeleton/pid_iter.h"
+
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+	p_err("bpftool built without PID iterator support");
+	return -ENOTSUP;
+}
+void delete_obj_refs_table(struct obj_refs_table *table) {}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "pid_iter.skel.h"
+
+static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	void *tmp;
+	int i;
+
+	hash_for_each_possible(table->table, refs, node, e->id) {
+		if (refs->id != e->id)
+			continue;
+
+		for (i = 0; i < refs->ref_cnt; i++) {
+			if (refs->refs[i].pid == e->pid)
+				return;
+		}
+
+		tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
+		if (!tmp) {
+			p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
+			      e->id, e->pid, e->comm);
+			return;
+		}
+		refs->refs = tmp;
+		ref = &refs->refs[refs->ref_cnt];
+		ref->pid = e->pid;
+		memcpy(ref->comm, e->comm, sizeof(ref->comm));
+		refs->ref_cnt++;
+
+		return;
+	}
+
+	/* new ref */
+	refs = calloc(1, sizeof(*refs));
+	if (!refs) {
+		p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+		      e->id, e->pid, e->comm);
+		return;
+	}
+
+	refs->id = e->id;
+	refs->refs = malloc(sizeof(*refs->refs));
+	if (!refs->refs) {
+		free(refs);
+		p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+		      e->id, e->pid, e->comm);
+		return;
+	}
+	ref = &refs->refs[0];
+	ref->pid = e->pid;
+	memcpy(ref->comm, e->comm, sizeof(ref->comm));
+	refs->ref_cnt = 1;
+	hash_add(table->table, &refs->node, e->id);
+}
+
+static int __printf(2, 0)
+libbpf_print_none(__maybe_unused enum libbpf_print_level level,
+		  __maybe_unused const char *format,
+		  __maybe_unused va_list args)
+{
+	return 0;
+}
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+	char buf[4096];
+	struct pid_iter_bpf *skel;
+	struct pid_iter_entry *e;
+	int err, ret, fd = -1, i;
+	libbpf_print_fn_t default_print;
+
+	hash_init(table->table);
+	set_max_rlimit();
+
+	skel = pid_iter_bpf__open();
+	if (!skel) {
+		p_err("failed to open PID iterator skeleton");
+		return -1;
+	}
+
+	skel->rodata->obj_type = type;
+
+	/* we don't want output polluted with libbpf errors if bpf_iter is not
+	 * supported
+	 */
+	default_print = libbpf_set_print(libbpf_print_none);
+	err = pid_iter_bpf__load(skel);
+	libbpf_set_print(default_print);
+	if (err) {
+		/* too bad, kernel doesn't support BPF iterators yet */
+		err = 0;
+		goto out;
+	}
+	err = pid_iter_bpf__attach(skel);
+	if (err) {
+		/* if we loaded above successfully, attach has to succeed */
+		p_err("failed to attach PID iterator: %d", err);
+		goto out;
+	}
+
+	fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
+	if (fd < 0) {
+		err = -errno;
+		p_err("failed to create PID iterator session: %d", err);
+		goto out;
+	}
+
+	while (true) {
+		ret = read(fd, buf, sizeof(buf));
+		if (ret < 0) {
+			err = -errno;
+			p_err("failed to read PID iterator output: %d", err);
+			goto out;
+		}
+		if (ret == 0)
+			break;
+		if (ret % sizeof(*e)) {
+			err = -EINVAL;
+			p_err("invalid PID iterator output format");
+			goto out;
+		}
+		ret /= sizeof(*e);
+
+		e = (void *)buf;
+		for (i = 0; i < ret; i++, e++) {
+			add_ref(table, e);
+		}
+	}
+	err = 0;
+out:
+	if (fd >= 0)
+		close(fd);
+	pid_iter_bpf__destroy(skel);
+	return err;
+}
+
+void delete_obj_refs_table(struct obj_refs_table *table)
+{
+	struct obj_refs *refs;
+	struct hlist_node *tmp;
+	unsigned int bkt;
+
+	hash_for_each_safe(table->table, bkt, tmp, refs, node) {
+		hash_del(&refs->node);
+		free(refs->refs);
+		free(refs);
+	}
+}
+
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	int i;
+
+	if (hash_empty(table->table))
+		return;
+
+	hash_for_each_possible(table->table, refs, node, id) {
+		if (refs->id != id)
+			continue;
+		if (refs->ref_cnt == 0)
+			break;
+
+		jsonw_name(json_wtr, "pids");
+		jsonw_start_array(json_wtr);
+		for (i = 0; i < refs->ref_cnt; i++) {
+			ref = &refs->refs[i];
+			jsonw_start_object(json_wtr);
+			jsonw_int_field(json_wtr, "pid", ref->pid);
+			jsonw_string_field(json_wtr, "comm", ref->comm);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_array(json_wtr);
+		break;
+	}
+}
+
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	int i;
+
+	if (hash_empty(table->table))
+		return;
+
+	hash_for_each_possible(table->table, refs, node, id) {
+		if (refs->id != id)
+			continue;
+		if (refs->ref_cnt == 0)
+			break;
+
+		printf("%s", prefix);
+		for (i = 0; i < refs->ref_cnt; i++) {
+			ref = &refs->refs[i];
+			printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
+		}
+		break;
+	}
+}
+
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 53d47610ff58..e21fa8ad2efa 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -190,6 +190,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		jsonw_end_array(json_wtr);
 	}
 
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 }
 
@@ -256,6 +258,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	if (info->btf_id)
 		printf("\n\tbtf_id %d", info->btf_id);
 
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
 	printf("\n");
 }
 
@@ -321,6 +325,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+	build_obj_refs_table(&refs_table, BPF_OBJ_PROG);
 
 	if (argc == 2)
 		return do_show_subset(argc, argv);
@@ -362,6 +367,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return err;
 }
 
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
new file mode 100644
index 000000000000..8468a608911e
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "pid_iter.h"
+
+/* keep in sync with the definition in main.h */
+enum bpf_obj_type {
+	BPF_OBJ_UNKNOWN,
+	BPF_OBJ_PROG,
+	BPF_OBJ_MAP,
+	BPF_OBJ_LINK,
+	BPF_OBJ_BTF,
+};
+
+extern const void bpf_link_fops __ksym;
+extern const void bpf_map_fops __ksym;
+extern const void bpf_prog_fops __ksym;
+extern const void btf_fops __ksym;
+
+const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN;
+
+static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
+{
+	switch (type) {
+	case BPF_OBJ_PROG:
+		return BPF_CORE_READ((struct bpf_prog *)ent, aux, id);
+	case BPF_OBJ_MAP:
+		return BPF_CORE_READ((struct bpf_map *)ent, id);
+	case BPF_OBJ_BTF:
+		return BPF_CORE_READ((struct btf *)ent, id);
+	case BPF_OBJ_LINK:
+		return BPF_CORE_READ((struct bpf_link *)ent, id);
+	default:
+		return 0;
+	}
+}
+
+SEC("iter/task_file")
+int iter(struct bpf_iter__task_file *ctx)
+{
+	struct file *file = ctx->file;
+	struct task_struct *task = ctx->task;
+	struct pid_iter_entry e;
+	const void *fops;
+
+	if (!file || !task)
+		return 0;
+
+	switch (obj_type) {
+	case BPF_OBJ_PROG:
+		fops = &bpf_prog_fops;
+		break;
+	case BPF_OBJ_MAP:
+		fops = &bpf_map_fops;
+		break;
+	case BPF_OBJ_BTF:
+		fops = &btf_fops;
+		break;
+	case BPF_OBJ_LINK:
+		fops = &bpf_link_fops;
+		break;
+	default:
+		return 0;
+	}
+
+	if (file->f_op != fops)
+		return 0;
+
+	e.pid = task->tgid;
+	e.id = get_obj_id(file->private_data, obj_type);
+	bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+	bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h
new file mode 100644
index 000000000000..5692cf257adb
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2020 Facebook */
+#ifndef __PID_ITER_H
+#define __PID_ITER_H
+
+struct pid_iter_entry {
+	__u32 id;
+	int pid;
+	char comm[16];
+};
+
+#endif
-- 
cgit 


From 075c776658190681d2bf9997306f871d6c8a9b36 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:03 -0700
Subject: tools/bpftool: Add documentation and sample output for process info

Add statements about bpftool being able to discover process info, holding
reference to BPF map, prog, link, or BTF. Show example output as well.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-10-andriin@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst  |  5 +++++
 tools/bpf/bpftool/Documentation/bpftool-link.rst | 13 ++++++++++++-
 tools/bpf/bpftool/Documentation/bpftool-map.rst  |  8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 11 +++++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ce3a724f50c1..896f4c6c2870 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -36,6 +36,11 @@ DESCRIPTION
 		  otherwise list all BTF objects currently loaded on the
 		  system.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BTF
+		  objects. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool btf dump** *BTF_SRC*
 		  Dump BTF entries from a given *BTF_SRC*.
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 0e43d7b06c11..38b0949a185b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -37,6 +37,11 @@ DESCRIPTION
 		  zero or more named attributes, some of which depend on type
 		  of link.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  links. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool link pin** *LINK* *FILE*
 		  Pin link *LINK* as *FILE*.
 
@@ -82,6 +87,7 @@ EXAMPLES
 
     10: cgroup  prog 25
             cgroup_id 614  attach_type egress
+            pids test_progs(223)
 
 **# bpftool --json --pretty link show**
 
@@ -91,7 +97,12 @@ EXAMPLES
             "type": "cgroup",
             "prog_id": 25,
             "cgroup_id": 614,
-            "attach_type": "egress"
+            "attach_type": "egress",
+            "pids": [{
+                    "pid": 223,
+                    "comm": "test_progs"
+                }
+            ]
         }
     ]
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 31101643e57c..5bc2123e9944 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -62,6 +62,11 @@ DESCRIPTION
 		  Output will start with map ID followed by map type and
 		  zero or more named attributes (depending on kernel version).
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  maps. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
 		  Create a new map with given parameters and pin it to *bpffs*
 		  as *FILE*.
@@ -180,7 +185,8 @@ EXAMPLES
 ::
 
   10: hash  name some_map  flags 0x0
-	key 4B  value 8B  max_entries 2048  memlock 167936B
+        key 4B  value 8B  max_entries 2048  memlock 167936B
+        pids systemd(1)
 
 The following three commands are equivalent:
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2b254959d488..412ea3d9bf7f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -75,6 +75,11 @@ DESCRIPTION
 		  program run. Activation or deactivation of the feature is
 		  performed via the **kernel.bpf_stats_enabled** sysctl knob.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  programs. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
 		  Dump eBPF instructions of the programs from the kernel. By
 		  default, eBPF will be disassembled and printed to standard
@@ -243,6 +248,7 @@ EXAMPLES
     10: xdp  name some_prog  tag 005a3d2123620c8b  gpl run_time_ns 81632 run_cnt 10
             loaded_at 2017-09-29T20:11:00+0000  uid 0
             xlated 528B  jited 370B  memlock 4096B  map_ids 10
+            pids systemd(1)
 
 **# bpftool --json --pretty prog show**
 
@@ -262,6 +268,11 @@ EXAMPLES
             "bytes_jited": 370,
             "bytes_memlock": 4096,
             "map_ids": [10
+            ],
+            "pids": [{
+                    "pid": 1,
+                    "comm": "systemd"
+                }
             ]
         }
     ]
-- 
cgit 


From bdb7b79b4ce864a724250e1d35948c46f135de36 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 22 Jun 2020 20:22:21 -0700
Subject: bpf: Switch most helper return values from 32-bit int to 64-bit long

Switch most of BPF helper definitions from returning int to long. These
definitions are coming from comments in BPF UAPI header and are used to
generate bpf_helper_defs.h (under libbpf) to be later included and used from
BPF programs.

In actual in-kernel implementation, all the helpers are defined as returning
u64, but due to some historical reasons, most of them are actually defined as
returning int in UAPI (usually, to return 0 on success, and negative value on
error).

This actually causes Clang to quite often generate sub-optimal code, because
compiler believes that return value is 32-bit, and in a lot of cases has to be
up-converted (usually with a pair of 32-bit bit shifts) to 64-bit values,
before they can be used further in BPF code.

Besides just "polluting" the code, these 32-bit shifts quite often cause
problems for cases in which return value matters. This is especially the case
for the family of bpf_probe_read_str() functions. There are few other similar
helpers (e.g., bpf_read_branch_records()), in which return value is used by
BPF program logic to record variable-length data and process it. For such
cases, BPF program logic carefully manages offsets within some array or map to
read variable-length data. For such uses, it's crucial for BPF verifier to
track possible range of register values to prove that all the accesses happen
within given memory bounds. Those extraneous zero-extending bit shifts,
inserted by Clang (and quite often interleaved with other code, which makes
the issues even more challenging and sometimes requires employing extra
per-variable compiler barriers), throws off verifier logic and makes it mark
registers as having unknown variable offset. We'll study this pattern a bit
later below.

Another common pattern is to check return of BPF helper for non-zero state to
detect error conditions and attempt alternative actions in such case. Even in
this simple and straightforward case, this 32-bit vs BPF's native 64-bit mode
quite often leads to sub-optimal and unnecessary extra code. We'll look at
this pattern as well.

Clang's BPF target supports two modes of code generation: ALU32, in which it
is capable of using lower 32-bit parts of registers, and no-ALU32, in which
only full 64-bit registers are being used. ALU32 mode somewhat mitigates the
above described problems, but not in all cases.

This patch switches all the cases in which BPF helpers return 0 or negative
error from returning int to returning long. It is shown below that such change
in definition leads to equivalent or better code. No-ALU32 mode benefits more,
but ALU32 mode doesn't degrade or still gets improved code generation.

Another class of cases switched from int to long are bpf_probe_read_str()-like
helpers, which encode successful case as non-negative values, while still
returning negative value for errors.

In all of such cases, correctness is preserved due to two's complement
encoding of negative values and the fact that all helpers return values with
32-bit absolute value. Two's complement ensures that for negative values
higher 32 bits are all ones and when truncated, leave valid negative 32-bit
value with the same value. Non-negative values have upper 32 bits set to zero
and similarly preserve value when high 32 bits are truncated. This means that
just casting to int/u32 is correct and efficient (and in ALU32 mode doesn't
require any extra shifts).

To minimize the chances of regressions, two code patterns were investigated,
as mentioned above. For both patterns, BPF assembly was analyzed in
ALU32/NO-ALU32 compiler modes, both with current 32-bit int return type and
new 64-bit long return type.

Case 1. Variable-length data reading and concatenation. This is quite
ubiquitous pattern in tracing/monitoring applications, reading data like
process's environment variables, file path, etc. In such case, many pieces of
string-like variable-length data are read into a single big buffer, and at the
end of the process, only a part of array containing actual data is sent to
user-space for further processing. This case is tested in test_varlen.c
selftest (in the next patch). Code flow is roughly as follows:

  void *payload = &sample->payload;
  u64 len;

  len = bpf_probe_read_kernel_str(payload, MAX_SZ1, &source_data1);
  if (len <= MAX_SZ1) {
      payload += len;
      sample->len1 = len;
  }
  len = bpf_probe_read_kernel_str(payload, MAX_SZ2, &source_data2);
  if (len <= MAX_SZ2) {
      payload += len;
      sample->len2 = len;
  }
  /* and so on */
  sample->total_len = payload - &sample->payload;
  /* send over, e.g., perf buffer */

There could be two variations with slightly different code generated: when len
is 64-bit integer and when it is 32-bit integer. Both variations were analysed.
BPF assembly instructions between two successive invocations of
bpf_probe_read_kernel_str() were used to check code regressions. Results are
below, followed by short analysis. Left side is using helpers with int return
type, the right one is after the switch to long.

ALU32 + INT                                ALU32 + LONG
===========                                ============

64-BIT (13 insns):                         64-BIT (10 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   if w0 > 256 goto +9 <LBB0_4>         18:   if r0 > 256 goto +6 <LBB0_4>
  19:   w1 = w0                              19:   r1 = 0 ll
  20:   r1 <<= 32                            21:   *(u64 *)(r1 + 0) = r0
  21:   r1 s>>= 32                           22:   r6 = 0 ll
  22:   r2 = 0 ll                            24:   r6 += r0
  24:   *(u64 *)(r2 + 0) = r1              00000000000000c8 <LBB0_4>:
  25:   r6 = 0 ll                            25:   r1 = r6
  27:   r6 += r1                             26:   w2 = 256
00000000000000e0 <LBB0_4>:                   27:   r3 = 0 ll
  28:   r1 = r6                              29:   call 115
  29:   w2 = 256
  30:   r3 = 0 ll
  32:   call 115

32-BIT (11 insns):                         32-BIT (12 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   if w0 > 256 goto +7 <LBB1_4>         18:   if w0 > 256 goto +8 <LBB1_4>
  19:   r1 = 0 ll                            19:   r1 = 0 ll
  21:   *(u32 *)(r1 + 0) = r0                21:   *(u32 *)(r1 + 0) = r0
  22:   w1 = w0                              22:   r0 <<= 32
  23:   r6 = 0 ll                            23:   r0 >>= 32
  25:   r6 += r1                             24:   r6 = 0 ll
00000000000000d0 <LBB1_4>:                   26:   r6 += r0
  26:   r1 = r6                            00000000000000d8 <LBB1_4>:
  27:   w2 = 256                             27:   r1 = r6
  28:   r3 = 0 ll                            28:   w2 = 256
  30:   call 115                             29:   r3 = 0 ll
                                             31:   call 115

In ALU32 mode, the variant using 64-bit length variable clearly wins and
avoids unnecessary zero-extension bit shifts. In practice, this is even more
important and good, because BPF code won't need to do extra checks to "prove"
that payload/len are within good bounds.

32-bit len is one instruction longer. Clang decided to do 64-to-32 casting
with two bit shifts, instead of equivalent `w1 = w0` assignment. The former
uses extra register. The latter might potentially lose some range information,
but not for 32-bit value. So in this case, verifier infers that r0 is [0, 256]
after check at 18:, and shifting 32 bits left/right keeps that range intact.
We should probably look into Clang's logic and see why it chooses bitshifts
over sub-register assignments for this.

NO-ALU32 + INT                             NO-ALU32 + LONG
==============                             ===============

64-BIT (14 insns):                         64-BIT (10 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   r0 <<= 32                            18:   if r0 > 256 goto +6 <LBB0_4>
  19:   r1 = r0                              19:   r1 = 0 ll
  20:   r1 >>= 32                            21:   *(u64 *)(r1 + 0) = r0
  21:   if r1 > 256 goto +7 <LBB0_4>         22:   r6 = 0 ll
  22:   r0 s>>= 32                           24:   r6 += r0
  23:   r1 = 0 ll                          00000000000000c8 <LBB0_4>:
  25:   *(u64 *)(r1 + 0) = r0                25:   r1 = r6
  26:   r6 = 0 ll                            26:   r2 = 256
  28:   r6 += r0                             27:   r3 = 0 ll
00000000000000e8 <LBB0_4>:                   29:   call 115
  29:   r1 = r6
  30:   r2 = 256
  31:   r3 = 0 ll
  33:   call 115

32-BIT (13 insns):                         32-BIT (13 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   r1 = r0                              18:   r1 = r0
  19:   r1 <<= 32                            19:   r1 <<= 32
  20:   r1 >>= 32                            20:   r1 >>= 32
  21:   if r1 > 256 goto +6 <LBB1_4>         21:   if r1 > 256 goto +6 <LBB1_4>
  22:   r2 = 0 ll                            22:   r2 = 0 ll
  24:   *(u32 *)(r2 + 0) = r0                24:   *(u32 *)(r2 + 0) = r0
  25:   r6 = 0 ll                            25:   r6 = 0 ll
  27:   r6 += r1                             27:   r6 += r1
00000000000000e0 <LBB1_4>:                 00000000000000e0 <LBB1_4>:
  28:   r1 = r6                              28:   r1 = r6
  29:   r2 = 256                             29:   r2 = 256
  30:   r3 = 0 ll                            30:   r3 = 0 ll
  32:   call 115                             32:   call 115

In NO-ALU32 mode, for the case of 64-bit len variable, Clang generates much
superior code, as expected, eliminating unnecessary bit shifts. For 32-bit
len, code is identical.

So overall, only ALU-32 32-bit len case is more-or-less equivalent and the
difference stems from internal Clang decision, rather than compiler lacking
enough information about types.

Case 2. Let's look at the simpler case of checking return result of BPF helper
for errors. The code is very simple:

  long bla;
  if (bpf_probe_read_kenerl(&bla, sizeof(bla), 0))
      return 1;
  else
      return 0;

ALU32 + CHECK (9 insns)                    ALU32 + CHECK (9 insns)
====================================       ====================================
  0:    r1 = r10                             0:    r1 = r10
  1:    r1 += -8                             1:    r1 += -8
  2:    w2 = 8                               2:    w2 = 8
  3:    r3 = 0                               3:    r3 = 0
  4:    call 113                             4:    call 113
  5:    w1 = w0                              5:    r1 = r0
  6:    w0 = 1                               6:    w0 = 1
  7:    if w1 != 0 goto +1 <LBB2_2>          7:    if r1 != 0 goto +1 <LBB2_2>
  8:    w0 = 0                               8:    w0 = 0
0000000000000048 <LBB2_2>:                 0000000000000048 <LBB2_2>:
  9:    exit                                 9:    exit

Almost identical code, the only difference is the use of full register
assignment (r1 = r0) vs half-registers (w1 = w0) in instruction #5. On 32-bit
architectures, new BPF assembly might be slightly less optimal, in theory. But
one can argue that's not a big issue, given that use of full registers is
still prevalent (e.g., for parameter passing).

NO-ALU32 + CHECK (11 insns)                NO-ALU32 + CHECK (9 insns)
====================================       ====================================
  0:    r1 = r10                             0:    r1 = r10
  1:    r1 += -8                             1:    r1 += -8
  2:    r2 = 8                               2:    r2 = 8
  3:    r3 = 0                               3:    r3 = 0
  4:    call 113                             4:    call 113
  5:    r1 = r0                              5:    r1 = r0
  6:    r1 <<= 32                            6:    r0 = 1
  7:    r1 >>= 32                            7:    if r1 != 0 goto +1 <LBB2_2>
  8:    r0 = 1                               8:    r0 = 0
  9:    if r1 != 0 goto +1 <LBB2_2>        0000000000000048 <LBB2_2>:
 10:    r0 = 0                               9:    exit
0000000000000058 <LBB2_2>:
 11:    exit

NO-ALU32 is a clear improvement, getting rid of unnecessary zero-extension bit
shifts.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-1-andriin@fb.com
---
 tools/include/uapi/linux/bpf.h | 192 ++++++++++++++++++++---------------------
 1 file changed, 96 insertions(+), 96 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 19684813faae..be0efee49093 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -653,7 +653,7 @@ union bpf_attr {
  * 		Map value associated to *key*, or **NULL** if no entry was
  * 		found.
  *
- * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
  * 	Description
  * 		Add or update the value of the entry associated to *key* in
  * 		*map* with *value*. *flags* is one of:
@@ -671,13 +671,13 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * long bpf_map_delete_elem(struct bpf_map *map, const void *key)
  * 	Description
  * 		Delete entry with *key* from *map*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
  * 		kernel space address *unsafe_ptr* and store the data in *dst*.
@@ -695,7 +695,7 @@ union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
- * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
  * 	Description
  * 		This helper is a "printk()-like" facility for debugging. It
  * 		prints a message defined by format *fmt* (of size *fmt_size*)
@@ -775,7 +775,7 @@ union bpf_attr {
  * 	Return
  * 		The SMP id of the processor running the program.
  *
- * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
  * 	Description
  * 		Store *len* bytes from address *from* into the packet
  * 		associated to *skb*, at *offset*. *flags* are a combination of
@@ -792,7 +792,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
  * 	Description
  * 		Recompute the layer 3 (e.g. IP) checksum for the packet
  * 		associated to *skb*. Computation is incremental, so the helper
@@ -817,7 +817,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
  * 	Description
  * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
  * 		packet associated to *skb*. Computation is incremental, so the
@@ -849,7 +849,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
  * 	Description
  * 		This special helper is used to trigger a "tail call", or in
  * 		other words, to jump into another eBPF program. The same stack
@@ -880,7 +880,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
  * 	Description
  * 		Clone and redirect the packet associated to *skb* to another
  * 		net device of index *ifindex*. Both ingress and egress
@@ -916,7 +916,7 @@ union bpf_attr {
  * 		A 64-bit integer containing the current GID and UID, and
  * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
  *
- * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * long bpf_get_current_comm(void *buf, u32 size_of_buf)
  * 	Description
  * 		Copy the **comm** attribute of the current task into *buf* of
  * 		*size_of_buf*. The **comm** attribute contains the name of
@@ -953,7 +953,7 @@ union bpf_attr {
  * 	Return
  * 		The classid, or 0 for the default unconfigured classid.
  *
- * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
  * 	Description
  * 		Push a *vlan_tci* (VLAN tag control information) of protocol
  * 		*vlan_proto* to the packet associated to *skb*, then update
@@ -969,7 +969,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * long bpf_skb_vlan_pop(struct sk_buff *skb)
  * 	Description
  * 		Pop a VLAN header from the packet associated to *skb*.
  *
@@ -981,7 +981,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
  * 	Description
  * 		Get tunnel metadata. This helper takes a pointer *key* to an
  * 		empty **struct bpf_tunnel_key** of **size**, that will be
@@ -1032,7 +1032,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
  * 	Description
  * 		Populate tunnel metadata for packet associated to *skb.* The
  * 		tunnel metadata is set to the contents of *key*, of *size*. The
@@ -1098,7 +1098,7 @@ union bpf_attr {
  * 		The value of the perf event counter read from the map, or a
  * 		negative error code in case of failure.
  *
- * int bpf_redirect(u32 ifindex, u64 flags)
+ * long bpf_redirect(u32 ifindex, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*.
  * 		This helper is somewhat similar to **bpf_clone_redirect**\
@@ -1145,7 +1145,7 @@ union bpf_attr {
  * 		The realm of the route for the packet associated to *skb*, or 0
  * 		if none was found.
  *
- * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  * 	Description
  * 		Write raw *data* blob into a special BPF perf event held by
  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1190,7 +1190,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
  * 	Description
  * 		This helper was provided as an easy way to load data from a
  * 		packet. It can be used to load *len* bytes from *offset* from
@@ -1207,7 +1207,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
  * 	Description
  * 		Walk a user or a kernel stack and return its id. To achieve
  * 		this, the helper needs *ctx*, which is a pointer to the context
@@ -1276,7 +1276,7 @@ union bpf_attr {
  * 		The checksum result, or a negative error code in case of
  * 		failure.
  *
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  * 	Description
  * 		Retrieve tunnel options metadata for the packet associated to
  * 		*skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1294,7 +1294,7 @@ union bpf_attr {
  * 	Return
  * 		The size of the option data retrieved.
  *
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  * 	Description
  * 		Set tunnel options metadata for the packet associated to *skb*
  * 		to the option data contained in the raw buffer *opt* of *size*.
@@ -1304,7 +1304,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
  * 	Description
  * 		Change the protocol of the *skb* to *proto*. Currently
  * 		supported are transition from IPv4 to IPv6, and from IPv6 to
@@ -1331,7 +1331,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * long bpf_skb_change_type(struct sk_buff *skb, u32 type)
  * 	Description
  * 		Change the packet type for the packet associated to *skb*. This
  * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
@@ -1358,7 +1358,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
  * 	Description
  * 		Check whether *skb* is a descendant of the cgroup2 held by
  * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
@@ -1389,7 +1389,7 @@ union bpf_attr {
  * 	Return
  * 		A pointer to the current task struct.
  *
- * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * long bpf_probe_write_user(void *dst, const void *src, u32 len)
  * 	Description
  * 		Attempt in a safe way to write *len* bytes from the buffer
  * 		*src* to *dst* in memory. It only works for threads that are in
@@ -1408,7 +1408,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
  * 	Description
  * 		Check whether the probe is being run is the context of a given
  * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
@@ -1420,7 +1420,7 @@ union bpf_attr {
  * 		* 1, if the *skb* task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
- * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
  * 	Description
  * 		Resize (trim or grow) the packet associated to *skb* to the
  * 		new *len*. The *flags* are reserved for future usage, and must
@@ -1444,7 +1444,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len)
  * 	Description
  * 		Pull in non-linear data in case the *skb* is non-linear and not
  * 		all of *len* are part of the linear section. Make *len* bytes
@@ -1500,7 +1500,7 @@ union bpf_attr {
  * 		recalculation the next time the kernel tries to access this
  * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
  *
- * int bpf_get_numa_node_id(void)
+ * long bpf_get_numa_node_id(void)
  * 	Description
  * 		Return the id of the current NUMA node. The primary use case
  * 		for this helper is the selection of sockets for the local NUMA
@@ -1511,7 +1511,7 @@ union bpf_attr {
  * 	Return
  * 		The id of current NUMA node.
  *
- * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
  * 	Description
  * 		Grows headroom of packet associated to *skb* and adjusts the
  * 		offset of the MAC header accordingly, adding *len* bytes of
@@ -1532,7 +1532,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
  * 		it is possible to use a negative value for *delta*. This helper
@@ -1547,7 +1547,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address
  * 		*unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
@@ -1595,14 +1595,14 @@ union bpf_attr {
  * 		is returned (note that **overflowuid** might also be the actual
  * 		UID value for the socket).
  *
- * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * long bpf_set_hash(struct sk_buff *skb, u32 hash)
  * 	Description
  * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
  * 		to value *hash*.
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1630,7 +1630,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
  * 	Description
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
@@ -1676,7 +1676,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the endpoint referenced by *map* at
  * 		index *key*. Depending on its type, this *map* can contain
@@ -1697,7 +1697,7 @@ union bpf_attr {
  * 		**XDP_REDIRECT** on success, or the value of the two lower bits
  * 		of the *flags* argument on error.
  *
- * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the socket referenced by *map* (of type
  * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1708,7 +1708,7 @@ union bpf_attr {
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  * 	Description
  * 		Add an entry to, or update a *map* referencing sockets. The
  * 		*skops* is used as a new value for the entry associated to
@@ -1727,7 +1727,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
  * 		*delta* (which can be positive or negative). Note that this
@@ -1756,7 +1756,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		Read the value of a perf event counter, and store it into *buf*
  * 		of size *buf_size*. This helper relies on a *map* of type
@@ -1806,7 +1806,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		For en eBPF program attached to a perf event, retrieve the
  * 		value of the event counter associated to *ctx* and store it in
@@ -1817,7 +1817,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1842,7 +1842,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * long bpf_override_return(struct pt_regs *regs, u64 rc)
  * 	Description
  * 		Used for error injection, this helper uses kprobes to override
  * 		the return value of the probed function, and to set it to *rc*.
@@ -1867,7 +1867,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
  * 	Description
  * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
  * 		for the full TCP socket associated to *bpf_sock_ops* to
@@ -1911,7 +1911,7 @@ union bpf_attr {
  * 		be set is returned (which comes down to 0 if all bits were set
  * 		as required).
  *
- * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		This helper is used in programs implementing policies at the
  * 		socket level. If the message *msg* is allowed to pass (i.e. if
@@ -1925,7 +1925,7 @@ union bpf_attr {
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
  * 	Description
  * 		For socket policies, apply the verdict of the eBPF program to
  * 		the next *bytes* (number of bytes) of message *msg*.
@@ -1959,7 +1959,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
  * 	Description
  * 		For socket policies, prevent the execution of the verdict eBPF
  * 		program for message *msg* until *bytes* (byte number) have been
@@ -1977,7 +1977,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
  * 	Description
  * 		For socket policies, pull in non-linear data from user space
  * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
@@ -2008,7 +2008,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
  * 	Description
  * 		Bind the socket associated to *ctx* to the address pointed by
  * 		*addr*, of length *addr_len*. This allows for making outgoing
@@ -2026,7 +2026,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
  * 		possible to both shrink and grow the packet tail.
@@ -2040,7 +2040,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
  * 	Description
  * 		Retrieve the XFRM state (IP transform framework, see also
  * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
@@ -2056,7 +2056,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
  * 	Description
  * 		Return a user or a kernel stack in bpf program provided buffer.
  * 		To achieve this, the helper needs *ctx*, which is a pointer
@@ -2089,7 +2089,7 @@ union bpf_attr {
  * 		A non-negative value equal to or less than *size* on success,
  * 		or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
  * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
  * 		it provides an easy way to load *len* bytes from *offset*
@@ -2111,7 +2111,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
  *	Description
  *		Do FIB lookup in kernel tables using parameters in *params*.
  *		If lookup is successful and result shows packet is to be
@@ -2142,7 +2142,7 @@ union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
- * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
  *		The *skops* is used as a new value for the entry associated to
@@ -2161,7 +2161,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		This helper is used in programs implementing policies at the
  *		socket level. If the message *msg* is allowed to pass (i.e. if
@@ -2175,7 +2175,7 @@ union bpf_attr {
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		This helper is used in programs implementing policies at the
  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
@@ -2189,7 +2189,7 @@ union bpf_attr {
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
  *	Description
  *		Encapsulate the packet associated to *skb* within a Layer 3
  *		protocol header. This header is provided in the buffer at
@@ -2226,7 +2226,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
  *	Description
  *		Store *len* bytes from address *from* into the packet
  *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
@@ -2241,7 +2241,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
  *	Description
  *		Adjust the size allocated to TLVs in the outermost IPv6
  *		Segment Routing Header contained in the packet associated to
@@ -2257,7 +2257,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
  *	Description
  *		Apply an IPv6 Segment Routing action of type *action* to the
  *		packet associated to *skb*. Each action takes a parameter
@@ -2286,7 +2286,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_rc_repeat(void *ctx)
+ * long bpf_rc_repeat(void *ctx)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded repeat key message. This delays
@@ -2305,7 +2305,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded key press with *scancode*,
@@ -2370,7 +2370,7 @@ union bpf_attr {
  *	Return
  *		A pointer to the local storage area.
  *
- * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Select a **SO_REUSEPORT** socket from a
  *		**BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
@@ -2471,7 +2471,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * int bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(struct bpf_sock *sock)
  *	Description
  *		Release the reference held by *sock*. *sock* must be a
  *		non-**NULL** pointer that was returned from
@@ -2479,7 +2479,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
  * 	Description
  * 		Push an element *value* in *map*. *flags* is one of:
  *
@@ -2489,19 +2489,19 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * long bpf_map_pop_elem(struct bpf_map *map, void *value)
  * 	Description
  * 		Pop an element from *map*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * long bpf_map_peek_elem(struct bpf_map *map, void *value)
  * 	Description
  * 		Get an element from *map* without removing it.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *	Description
  *		For socket policies, insert *len* bytes into *msg* at offset
  *		*start*.
@@ -2517,7 +2517,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *	Description
  *		Will remove *len* bytes from a *msg* starting at byte *start*.
  *		This may result in **ENOMEM** errors under certain situations if
@@ -2529,7 +2529,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded pointer movement.
@@ -2543,7 +2543,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * long bpf_spin_lock(struct bpf_spin_lock *lock)
  *	Description
  *		Acquire a spinlock represented by the pointer *lock*, which is
  *		stored as part of a value of a map. Taking the lock allows to
@@ -2591,7 +2591,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * long bpf_spin_unlock(struct bpf_spin_lock *lock)
  *	Description
  *		Release the *lock* previously locked by a call to
  *		**bpf_spin_lock**\ (\ *lock*\ ).
@@ -2614,7 +2614,7 @@ union bpf_attr {
  *		A **struct bpf_tcp_sock** pointer on success, or **NULL** in
  *		case of failure.
  *
- * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * long bpf_skb_ecn_set_ce(struct sk_buff *skb)
  *	Description
  *		Set ECN (Explicit Congestion Notification) field of IP header
  *		to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2651,7 +2651,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  * 	Description
  * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
  * 		the listening socket in *sk*.
@@ -2666,7 +2666,7 @@ union bpf_attr {
  * 		0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
  * 		error otherwise.
  *
- * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
  *	Description
  *		Get name of sysctl in /proc/sys/ and copy it into provided by
  *		program buffer *buf* of size *buf_len*.
@@ -2682,7 +2682,7 @@ union bpf_attr {
  *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
  *		truncated name in this case).
  *
- * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
  *	Description
  *		Get current value of sysctl as it is presented in /proc/sys
  *		(incl. newline, etc), and copy it as a string into provided
@@ -2701,7 +2701,7 @@ union bpf_attr {
  *		**-EINVAL** if current value was unavailable, e.g. because
  *		sysctl is uninitialized and read returns -EIO for it.
  *
- * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
  *	Description
  *		Get new value being written by user space to sysctl (before
  *		the actual write happens) and copy it as a string into
@@ -2718,7 +2718,7 @@ union bpf_attr {
  *
  *		**-EINVAL** if sysctl is being read.
  *
- * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
  *	Description
  *		Override new value being written by user space to sysctl with
  *		value provided by program in buffer *buf* of size *buf_len*.
@@ -2735,7 +2735,7 @@ union bpf_attr {
  *
  *		**-EINVAL** if sysctl is being read.
  *
- * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
  *	Description
  *		Convert the initial part of the string from buffer *buf* of
  *		size *buf_len* to a long integer according to the given base
@@ -2759,7 +2759,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
  *	Description
  *		Convert the initial part of the string from buffer *buf* of
  *		size *buf_len* to an unsigned long integer according to the
@@ -2810,7 +2810,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
@@ -2818,7 +2818,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
  *
- * int bpf_send_signal(u32 sig)
+ * long bpf_send_signal(u32 sig)
  *	Description
  *		Send signal *sig* to the process of the current task.
  *		The signal may be delivered to any of this process's threads.
@@ -2859,7 +2859,7 @@ union bpf_attr {
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
  *
- * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  * 	Description
  * 		Write raw *data* blob into a special BPF perf event held by
  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2883,21 +2883,21 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Safely attempt to read *size* bytes from user space address
  * 		*unsafe_ptr* and store the data in *dst*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Safely attempt to read *size* bytes from kernel space address
  * 		*unsafe_ptr* and store the data in *dst*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe user address
  * 		*unsafe_ptr* to *dst*. The *size* should include the
@@ -2941,7 +2941,7 @@ union bpf_attr {
  * 		including the trailing NUL character. On error, a negative
  * 		value.
  *
- * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
  * 		to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
@@ -2949,14 +2949,14 @@ union bpf_attr {
  * 		On success, the strictly positive length of the string, including
  * 		the trailing NUL character. On error, a negative value.
  *
- * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
  *	Description
  *		Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
  *		*rcv_nxt* is the ack_seq to be sent out.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_send_signal_thread(u32 sig)
+ * long bpf_send_signal_thread(u32 sig)
  *	Description
  *		Send signal *sig* to the thread corresponding to the current task.
  *	Return
@@ -2976,7 +2976,7 @@ union bpf_attr {
  *	Return
  *		The 64 bit jiffies
  *
- * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
  *	Description
  *		For an eBPF program attached to a perf event, retrieve the
  *		branch records (**struct perf_branch_entry**) associated to *ctx*
@@ -2995,7 +2995,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if architecture does not support branch records.
  *
- * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
  *	Description
  *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
  *		*namespace* will be returned in *nsdata*.
@@ -3007,7 +3007,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if pidns does not exists for the current task.
  *
- * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  *	Description
  *		Write raw *data* blob into a special BPF perf event held by
  *		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -3062,7 +3062,7 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
  *	Description
  *		Assign the *sk* to the *skb*. When combined with appropriate
  *		routing configuration to receive the packet towards the socket,
@@ -3097,7 +3097,7 @@ union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
- * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
  * 		out the format string.
@@ -3126,7 +3126,7 @@ union bpf_attr {
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len)
  * 	Description
  * 		**bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
  * 		The *m* represents the seq_file. The *data* and *len* represent the
@@ -3221,7 +3221,7 @@ union bpf_attr {
  *	Return
  *		Requested value, or 0, if flags are not recognized.
  *
- * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * long bpf_csum_level(struct sk_buff *skb, u64 level)
  * 	Description
  * 		Change the skbs checksum level by one layer up or down, or
  * 		reset it entirely to none in order to have the stack perform
-- 
cgit 


From 5e85c6bb8e74bd9daa4f5815da373d4ac2cb1a35 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 22 Jun 2020 20:22:22 -0700
Subject: selftests/bpf: Add variable-length data concatenation pattern test

Add selftest that validates variable-length data reading and concatentation
with one big shared data array. This is a common pattern in production use for
monitoring and tracing applications, that potentially can read a lot of data,
but overall read much less. Such pattern allows to determine precisely what
amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-2-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/varlen.c | 56 +++++++++++++++
 tools/testing/selftests/bpf/progs/test_varlen.c | 96 +++++++++++++++++++++++++
 2 files changed, 152 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/varlen.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_varlen.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..7533565e096d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+	CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+	      (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+	int duration = 0, err;
+	struct test_varlen* skel;
+	struct test_varlen__bss *bss;
+	struct test_varlen__data *data;
+	const char str1[] = "Hello, ";
+	const char str2[] = "World!";
+	const char exp_str[] = "Hello, \0World!\0";
+	const int size1 = sizeof(str1);
+	const int size2 = sizeof(str2);
+
+	skel = test_varlen__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+	data = skel->data;
+
+	err = test_varlen__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	bss->test_pid = getpid();
+
+	/* trigger everything */
+	memcpy(bss->buf_in1, str1, size1);
+	memcpy(bss->buf_in2, str2, size2);
+	bss->capture = true;
+	usleep(1);
+	bss->capture = false;
+
+	CHECK_VAL(bss->payload1_len1, size1);
+	CHECK_VAL(bss->payload1_len2, size2);
+	CHECK_VAL(bss->total1, size1 + size2);
+	CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+
+	CHECK_VAL(data->payload2_len1, size1);
+	CHECK_VAL(data->payload2_len2, size2);
+	CHECK_VAL(data->total2, size1 + size2);
+	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+cleanup:
+	test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..09691852debf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+long payload1_len1 = 0;
+long payload1_len2 = 0;
+long total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload1;
+	u64 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len2 = len;
+	}
+
+	total1 = payload - (void *)payload1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handler32(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload2;
+	u32 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len2 = len;
+	}
+
+	total2 = payload - (void *)payload2;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int handler_exit(void *regs)
+{
+	long bla;
+
+	if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+		return 1;
+	else
+		return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit 


From 2fde1747c986cac28fa66d0cffd7577db042640b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 22 Jun 2020 20:22:23 -0700
Subject: selftests/bpf: Add variable-length data concat pattern less than test

Extend original variable-length tests with a case to catch a common
existing pattern of testing for < 0 for errors. Note because
verifier also tracks upper bounds and we know it can not be greater
than MAX_LEN here we can skip upper bound check.

In ALU64 enabled compilation converting from long->int return types
in probe helpers results in extra instruction pattern, <<= 32, s >>= 32.
The trade-off is the non-ALU64 case works. If you really care about
every extra insn (XDP case?) then you probably should be using original
int type.

In addition adding a sext insn to bpf might help the verifier in the
general case to avoid these types of tricks.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/varlen.c | 12 +++++
 tools/testing/selftests/bpf/progs/test_varlen.c | 70 +++++++++++++++++++++++--
 2 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
index 7533565e096d..c75525eab02c 100644
--- a/tools/testing/selftests/bpf/prog_tests/varlen.c
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -51,6 +51,18 @@ void test_varlen(void)
 	CHECK_VAL(data->total2, size1 + size2);
 	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
 	      "doesn't match!");
+
+	CHECK_VAL(data->payload3_len1, size1);
+	CHECK_VAL(data->payload3_len2, size2);
+	CHECK_VAL(data->total3, size1 + size2);
+	CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+
+	CHECK_VAL(data->payload4_len1, size1);
+	CHECK_VAL(data->payload4_len2, size2);
+	CHECK_VAL(data->total4, size1 + size2);
+	CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
 cleanup:
 	test_varlen__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index 09691852debf..cd4b72c55dfe 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -26,8 +26,18 @@ int payload2_len2 = -1;
 int total2 = -1;
 char payload2[MAX_LEN + MAX_LEN] = { 1 };
 
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
 SEC("raw_tp/sys_enter")
-int handler64(void *regs)
+int handler64_unsigned(void *regs)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 	void *payload = payload1;
@@ -54,8 +64,34 @@ int handler64(void *regs)
 	return 0;
 }
 
-SEC("tp_btf/sys_enter")
-int handler32(void *regs)
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload3;
+	long len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len >= 0) {
+		payload += len;
+		payload3_len1 = len;
+	}
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len >= 0) {
+		payload += len;
+		payload3_len2 = len;
+	}
+	total3 = payload - (void *)payload3;
+
+	return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 	void *payload = payload2;
@@ -82,7 +118,33 @@ int handler32(void *regs)
 	return 0;
 }
 
-SEC("tp_btf/sys_exit")
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload4;
+	int len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len >= 0) {
+		payload += len;
+		payload4_len1 = len;
+	}
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len >= 0) {
+		payload += len;
+		payload4_len2 = len;
+	}
+	total4 = payload - (void *)payload4;
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
 int handler_exit(void *regs)
 {
 	long bla;
-- 
cgit 


From 9d9d8cc21e3827b89e414f990016836290de3038 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 23 Jun 2020 12:37:10 +0200
Subject: tools, bpftool: Correctly evaluate $(BUILD_BPF_SKELS) in Makefile

Currently, if the clang-bpf-co-re feature is not available, the build
fails with e.g.

  CC       prog.o
prog.c:1462:10: fatal error: profiler.skel.h: No such file or directory
 1462 | #include "profiler.skel.h"
      |          ^~~~~~~~~~~~~~~~~

This is due to the fact that the BPFTOOL_WITHOUT_SKELETONS macro is not
defined, despite BUILD_BPF_SKELS not being set. Fix this by correctly
evaluating $(BUILD_BPF_SKELS) when deciding on whether to add
-DBPFTOOL_WITHOUT_SKELETONS to CFLAGS.

Fixes: 05aca6da3b5a ("tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623103710.10370-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 06f436e8191a..8c6563e56ffc 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -155,7 +155,7 @@ $(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
 endif
 endif
 
-CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS)
+CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
-- 
cgit 


From 0558c396040734bc1d361919566a581fd41aa539 Mon Sep 17 00:00:00 2001
From: tannerlove <tannerlove@google.com>
Date: Mon, 22 Jun 2020 13:43:24 -0400
Subject: selftests/net: plug rxtimestamp test into kselftest framework

Run rxtimestamp as part of TEST_PROGS. Analogous to other tests, add
new rxtimestamp.sh wrapper script, so that the test runs isolated
from background traffic in a private network namespace.

Also ignore failures of test case #6 by default. This case verifies
that a receive timestamp is not reported if timestamp reporting is
enabled for a socket, but generation is disabled. Receive timestamp
generation has to be enabled globally, as no associated socket is
known yet. A background process that enables rx timestamp generation
therefore causes a false positive. Ntpd is one example that does.

Add a "--strict" option to cause failure in the event that any test
case fails, including test #6. This is useful for environments that
are known to not have such background processes.

Tested:
make -C tools/testing/selftests TARGETS="net" run_tests

Signed-off-by: Tanner Love <tannerlove@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile       |  1 +
 tools/testing/selftests/net/rxtimestamp.c  | 11 +++++++++--
 tools/testing/selftests/net/rxtimestamp.sh |  4 ++++
 3 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/net/rxtimestamp.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 895ec992b2f1..bfacb960450f 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -17,6 +17,7 @@ TEST_PROGS += route_localnet.sh
 TEST_PROGS += reuseaddr_ports_exhausted.sh
 TEST_PROGS += txtimestamp.sh
 TEST_PROGS += vrf-xfrm-tests.sh
+TEST_PROGS += rxtimestamp.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 422e7761254d..d4ea86a13e52 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -44,6 +44,7 @@ struct test_case {
 	struct options sockopt;
 	struct tstamps expected;
 	bool enabled;
+	bool warn_on_fail;
 };
 
 struct sof_flag {
@@ -89,7 +90,7 @@ static struct test_case test_cases[] = {
 	},
 	{
 		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
-		{}
+		warn_on_fail : true
 	},
 	{
 		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
@@ -115,6 +116,7 @@ static struct option long_options[] = {
 	{ "tcp", no_argument, 0, 't' },
 	{ "udp", no_argument, 0, 'u' },
 	{ "ip", no_argument, 0, 'i' },
+	{ "strict", no_argument, 0, 'S' },
 	{ NULL, 0, NULL, 0 },
 };
 
@@ -327,6 +329,7 @@ int main(int argc, char **argv)
 {
 	bool all_protocols = true;
 	bool all_tests = true;
+	bool strict = false;
 	int arg_index = 0;
 	int failures = 0;
 	int s, t;
@@ -363,6 +366,9 @@ int main(int argc, char **argv)
 			all_protocols = false;
 			socket_types[0].enabled = true;
 			break;
+		case 'S':
+			strict = true;
+			break;
 		default:
 			error(1, 0, "Failed to parse parameters.");
 		}
@@ -379,7 +385,8 @@ int main(int argc, char **argv)
 
 			printf("Starting testcase %d...\n", t);
 			if (run_test_case(socket_types[s], test_cases[t])) {
-				failures++;
+				if (strict || !test_cases[t].warn_on_fail)
+					failures++;
 				printf("FAILURE in test case ");
 				print_test_case(&test_cases[t]);
 			}
diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh
new file mode 100755
index 000000000000..91631e88bf46
--- /dev/null
+++ b/tools/testing/selftests/net/rxtimestamp.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./rxtimestamp $@
-- 
cgit 


From 54b66c2255fadc8d78e88b5ffd99b19f7f754f5a Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 23 Jun 2020 22:36:00 +0100
Subject: tools, bpftool: Fix variable shadowing in emit_obj_refs_json()

Building bpftool yields the following complaint:

    pids.c: In function 'emit_obj_refs_json':
    pids.c:175:80: warning: declaration of 'json_wtr' shadows a global declaration [-Wshadow]
      175 | void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
          |                                                                 ~~~~~~~~~~~~~~~^~~~~~~~
    In file included from pids.c:11:
    main.h:141:23: note: shadowed declaration is here
      141 | extern json_writer_t *json_wtr;
          |                       ^~~~~~~~

Let's rename the variable.

v2:
- Rename the variable instead of calling the global json_wtr directly.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623213600.16643-1-quentin@isovalent.com
---
 tools/bpf/bpftool/pids.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 3474a91743ff..2709be4de2b1 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -172,7 +172,8 @@ void delete_obj_refs_table(struct obj_refs_table *table)
 	}
 }
 
-void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+			json_writer_t *json_writer)
 {
 	struct obj_refs *refs;
 	struct obj_ref *ref;
@@ -187,16 +188,16 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *j
 		if (refs->ref_cnt == 0)
 			break;
 
-		jsonw_name(json_wtr, "pids");
-		jsonw_start_array(json_wtr);
+		jsonw_name(json_writer, "pids");
+		jsonw_start_array(json_writer);
 		for (i = 0; i < refs->ref_cnt; i++) {
 			ref = &refs->refs[i];
-			jsonw_start_object(json_wtr);
-			jsonw_int_field(json_wtr, "pid", ref->pid);
-			jsonw_string_field(json_wtr, "comm", ref->comm);
-			jsonw_end_object(json_wtr);
+			jsonw_start_object(json_writer);
+			jsonw_int_field(json_writer, "pid", ref->pid);
+			jsonw_string_field(json_writer, "comm", ref->comm);
+			jsonw_end_object(json_writer);
 		}
-		jsonw_end_array(json_wtr);
+		jsonw_end_array(json_writer);
 		break;
 	}
 }
-- 
cgit 


From 135c783f4794fbdeace4a969dea6eabd27f8a501 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 23 Jun 2020 09:42:07 +0100
Subject: libbpf: Fix spelling mistake "kallasyms" -> "kallsyms"

There is a spelling mistake in a pr_warn message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623084207.149253-1-colin.king@canonical.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 18461deb1b19..deea27aadcef 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5741,7 +5741,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 		if (ret == EOF && feof(f))
 			break;
 		if (ret != 3) {
-			pr_warn("failed to read kallasyms entry: %d\n", ret);
+			pr_warn("failed to read kallsyms entry: %d\n", ret);
 			err = -EINVAL;
 			goto out;
 		}
-- 
cgit 


From 192b6638eea5d40c99964291671fc0371b858f6e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 23 Jun 2020 21:38:05 -0700
Subject: libbpf: Prevent loading vmlinux BTF twice

Prevent loading/parsing vmlinux BTF twice in some cases: for CO-RE relocations
and for BTF-aware hooks (tp_btf, fentry/fexit, etc).

Fixes: a6ed02cac690 ("libbpf: Load btf_vmlinux only once per object.")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200624043805.1794620-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index deea27aadcef..6b4955d170ff 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2504,22 +2504,31 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 
 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 {
+	bool need_vmlinux_btf = false;
 	struct bpf_program *prog;
 	int err;
 
+	/* CO-RE relocations need kernel BTF */
+	if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+		need_vmlinux_btf = true;
+
 	bpf_object__for_each_program(prog, obj) {
 		if (libbpf_prog_needs_vmlinux_btf(prog)) {
-			obj->btf_vmlinux = libbpf_find_kernel_btf();
-			if (IS_ERR(obj->btf_vmlinux)) {
-				err = PTR_ERR(obj->btf_vmlinux);
-				pr_warn("Error loading vmlinux BTF: %d\n", err);
-				obj->btf_vmlinux = NULL;
-				return err;
-			}
-			return 0;
+			need_vmlinux_btf = true;
+			break;
 		}
 	}
 
+	if (!need_vmlinux_btf)
+		return 0;
+
+	obj->btf_vmlinux = libbpf_find_kernel_btf();
+	if (IS_ERR(obj->btf_vmlinux)) {
+		err = PTR_ERR(obj->btf_vmlinux);
+		pr_warn("Error loading vmlinux BTF: %d\n", err);
+		obj->btf_vmlinux = NULL;
+		return err;
+	}
 	return 0;
 }
 
@@ -4945,8 +4954,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 	if (targ_btf_path)
 		targ_btf = btf__parse_elf(targ_btf_path, NULL);
 	else
-		targ_btf = libbpf_find_kernel_btf();
-	if (IS_ERR(targ_btf)) {
+		targ_btf = obj->btf_vmlinux;
+	if (IS_ERR_OR_NULL(targ_btf)) {
 		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
 		return PTR_ERR(targ_btf);
 	}
@@ -4987,7 +4996,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 	}
 
 out:
-	btf__free(targ_btf);
+	/* obj->btf_vmlinux is freed at the end of object load phase */
+	if (targ_btf != obj->btf_vmlinux)
+		btf__free(targ_btf);
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
 			bpf_core_free_cands(entry->value);
-- 
cgit 


From fea549b030152d5336dbd960b357a4d4b841a851 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 24 Jun 2020 11:10:59 -0700
Subject: selftests/bpf: Workaround for get_stack_rawtp test.

./test_progs-no_alu32 -t get_stack_raw_tp
fails due to:

52: (85) call bpf_get_stack#67
53: (bf) r8 = r0
54: (bf) r1 = r8
55: (67) r1 <<= 32
56: (c7) r1 s>>= 32
; if (usize < 0)
57: (c5) if r1 s< 0x0 goto pc+26
 R0=inv(id=0,smax_value=800) R1_w=inv(id=0,umax_value=800,var_off=(0x0; 0x3ff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8_w=inv(id=0,smax_value=800) R9=inv800
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
58: (1f) r9 -= r8
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
59: (bf) r2 = r7
60: (0f) r2 += r1
regs=1 stack=0 before 52: (85) call bpf_get_stack#67
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
61: (bf) r1 = r6
62: (bf) r3 = r9
63: (b7) r4 = 0
64: (85) call bpf_get_stack#67
 R0=inv(id=0,smax_value=800) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,umax_value=800,var_off=(0x0; 0x3ff),s32_max_value=1023,u32_max_value=1023) R3_w=inv(id=0,umax_value=9223372036854776608)
R3 unbounded memory access, use 'var &= const' or 'if (var < const)'

In the C code:
  usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
  if (usize < 0)
          return 0;

  ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
  if (ksize < 0)
          return 0;

We used to have problem with pointer arith in R2.
Now it's a problem with two integers in R3.
'if (usize < 0)' is comparing R1 and makes it [0,800], but R8 stays [-inf,800].
Both registers represent the same 'usize' variable.
Then R9 -= R8 is doing 800 - [-inf, 800]
so the result of "max_len - usize" looks unbounded to the verifier while
it's obvious in C code that "max_len - usize" should be [0, 800].

To workaround the problem convert ksize and usize variables from int to long.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
 SEC("raw_tracepoint/sys_enter")
 int bpf_prog1(void *ctx)
 {
-	int max_len, max_buildid_len, usize, ksize, total_size;
+	int max_len, max_buildid_len, total_size;
 	struct stack_trace_t *data;
+	long usize, ksize;
 	void *raw_data;
 	__u32 key = 0;
 
-- 
cgit 


From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Sat, 20 Jun 2020 18:30:52 +0300
Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt

This patch adds support of SO_KEEPALIVE flag and TCP related options
to bpf_setsockopt() routine. This is helpful if we want to enable or tune
TCP keepalive for applications which don't do it in the userspace code.

v3:
  - update kernel-doc in uapi (Nikita Vetoshkin <nekto0n@yandex-team.ru>)

v4:
  - update kernel-doc in tools too (Alexei Starovoitov)
  - add test to selftests (Alexei Starovoitov)

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru
---
 tools/include/uapi/linux/bpf.h                    |  7 ++++--
 tools/testing/selftests/bpf/progs/connect4_prog.c | 27 +++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d3923e6b860..d9737d51dd19 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1621,10 +1621,13 @@ union bpf_attr {
  *
  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
- * 		  **TCP_BPF_SNDCWND_CLAMP**.
+ * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1ab2c5eba86c..b1b2773c0b9d 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+	int zero = 0, one = 1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+		return 1;
+	if (ctx->type == SOCK_STREAM) {
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+			return 1;
+	}
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	if (bind_to_device(ctx))
 		return 0;
 
+	if (set_keepalive(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
-- 
cgit 


From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:09 -0700
Subject: bpf: Add bpf_skc_to_tcp6_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a tcp6_sock pointer.
The return value could be NULL if the casting is illegal.

A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
so the verifier is able to deduce proper return types for the helper.

Different from the previous BTF_ID based helpers,
the bpf_skc_to_tcp6_sock() argument can be several possible
btf_ids. More specifically, all possible socket data structures
with sock_common appearing in the first in the memory layout.
This patch only added socket types related to tcp and udp.

All possible argument btf_id and return value btf_id
for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
cached. In the future, it is even possible to precompute
these btf_id's at kernel build time.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d9737d51dd19..e90ad07b291a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3255,6 +3255,12 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3392,7 +3398,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(skc_to_tcp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit 


From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:11 -0700
Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers

Three more helpers are added to cast a sock_common pointer to
an tcp_sock, tcp_timewait_sock or a tcp_request_sock for
tracing programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e90ad07b291a..b9412ab275f3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3261,6 +3261,24 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3399,7 +3417,10 @@ union bpf_attr {
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
 	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),
+	FN(skc_to_tcp6_sock),		\
+	FN(skc_to_tcp_sock),		\
+	FN(skc_to_tcp_timewait_sock),	\
+	FN(skc_to_tcp_request_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit 


From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:15 -0700
Subject: bpf: Add bpf_skc_to_udp6_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a udp6_sock pointer.
The return value could be NULL if the casting is illegal.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b9412ab275f3..0cb8ec948816 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3279,6 +3279,12 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3420,7 +3426,8 @@ union bpf_attr {
 	FN(skc_to_tcp6_sock),		\
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),
+	FN(skc_to_tcp_request_sock),	\
+	FN(skc_to_udp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit 


From 84544f5637ff3501876ba96bd48ca900317e08fb Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:16 -0700
Subject: selftests/bpf: Move newer bpf_iter_* type redefining to a new header
 file

Commit b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest
compilable against old vmlinux.h") and Commit dda18a5c0b75
("selftests/bpf: Convert bpf_iter_test_kern{3, 4}.c to define
own bpf_iter_meta") redefined newly introduced types
in bpf programs so the bpf program can still compile
properly with old kernels although loading may fail.

Since this patch set introduced new types and the same
workaround is needed, so let us move the workaround
to a separate header file so they do not clutter
bpf programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230816.3988656-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h       | 49 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 18 +-------
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c      | 18 +-------
 .../testing/selftests/bpf/progs/bpf_iter_netlink.c | 18 +-------
 tools/testing/selftests/bpf/progs/bpf_iter_task.c  | 18 +-------
 .../selftests/bpf/progs/bpf_iter_task_file.c       | 20 +--------
 .../selftests/bpf/progs/bpf_iter_test_kern3.c      | 17 +-------
 .../selftests/bpf/progs/bpf_iter_test_kern4.c      | 17 +-------
 .../bpf/progs/bpf_iter_test_kern_common.h          | 18 +-------
 9 files changed, 57 insertions(+), 136 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..3757e88c6406
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+	struct bpf_iter_meta *meta;
+	struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+	struct bpf_iter_meta *meta;
+	struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+	__u32 fd;
+	struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..08651b23edba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 SEC("iter/bpf_map")
 int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..93a452d1d136 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,25 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
-	struct bpf_iter_meta *meta;
-	struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
 char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index e7b8753eac0b..fda5036fdf75 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -14,17 +9,6 @@ char _license[] SEC("license") = "GPL";
 #define sk_rmem_alloc	sk_backlog.rmem_alloc
 #define sk_refcnt	__sk_common.skc_refcnt
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
-	struct bpf_iter_meta *meta;
-	struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index ee754021f98e..4983087852a0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..8b787baa2654 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,29 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-	__u32 fd;
-	struct file *file;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..ee49493dc125 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 __u32 map1_id = 0, map2_id = 0;
 __u32 map1_accessed = 0, map2_accessed = 0;
 __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 int count = 0;
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
-- 
cgit 


From 647b502e3d5456f5c240b1587112b163c69732e9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:17 -0700
Subject: selftests/bpf: Refactor some net macros to bpf_tracing_net.h

Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c
so net macros, originally from various include/linux header
files, are moved to a new header file
bpf_tracing_net.h. The goal is to improve reuse so
networking tracing programs do not need to
copy these macros every time they use them.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230817.3988962-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c |  7 +------
 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c    |  4 +---
 tools/testing/selftests/bpf/progs/bpf_tracing_net.h     | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_tracing_net.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index 93a452d1d136..d58d9f1642b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
+#include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -8,12 +9,6 @@ char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
 
-#define RTF_GATEWAY		0x0002
-#define IFNAMSIZ		16
-#define fib_nh_gw_family	nh_common.nhc_gw_family
-#define fib_nh_gw6		nh_common.nhc_gw.ipv6
-#define fib_nh_dev		nh_common.nhc_dev
-
 SEC("iter/ipv6_route")
 int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index fda5036fdf75..cec82a419800 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,14 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
+#include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-#define sk_rmem_alloc	sk_backlog.rmem_alloc
-#define sk_refcnt	__sk_common.skc_refcnt
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..1f38a1098727
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define IFNAMSIZ		16
+
+#define RTF_GATEWAY		0x0002
+
+#define fib_nh_dev		nh_common.nhc_dev
+#define fib_nh_gw_family	nh_common.nhc_gw_family
+#define fib_nh_gw6		nh_common.nhc_gw.ipv6
+
+#define sk_rmem_alloc		sk_backlog.rmem_alloc
+#define sk_refcnt		__sk_common.skc_refcnt
+
+#endif
-- 
cgit 


From 3982bfaaef7c80ecf6a065cbf9422165a8e36f75 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:19 -0700
Subject: selftests/bpf: Add more common macros to bpf_tracing_net.h

These newly added macros will be used in subsequent bpf iterator
tcp{4,6} and udp{4,6} programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230819.3989050-1-yhs@fb.com
---
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 1f38a1098727..01378911252b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -2,15 +2,50 @@
 #ifndef __BPF_TRACING_NET_H__
 #define __BPF_TRACING_NET_H__
 
+#define AF_INET			2
+#define AF_INET6		10
+
+#define ICSK_TIME_RETRANS	1
+#define ICSK_TIME_PROBE0	3
+#define ICSK_TIME_LOSS_PROBE	5
+#define ICSK_TIME_REO_TIMEOUT	6
+
 #define IFNAMSIZ		16
 
 #define RTF_GATEWAY		0x0002
 
+#define TCP_INFINITE_SSTHRESH	0x7fffffff
+#define TCP_PINGPONG_THRESH	3
+
 #define fib_nh_dev		nh_common.nhc_dev
 #define fib_nh_gw_family	nh_common.nhc_gw_family
 #define fib_nh_gw6		nh_common.nhc_gw.ipv6
 
+#define inet_daddr		sk.__sk_common.skc_daddr
+#define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
+#define inet_dport		sk.__sk_common.skc_dport
+
+#define ir_loc_addr		req.__req_common.skc_rcv_saddr
+#define ir_num			req.__req_common.skc_num
+#define ir_rmt_addr		req.__req_common.skc_daddr
+#define ir_rmt_port		req.__req_common.skc_dport
+#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family		__sk_common.skc_family
 #define sk_rmem_alloc		sk_backlog.rmem_alloc
 #define sk_refcnt		__sk_common.skc_refcnt
+#define sk_state		__sk_common.skc_state
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32		in6_u.u6_addr32
+
+#define tw_daddr		__tw_common.skc_daddr
+#define tw_rcv_saddr		__tw_common.skc_rcv_saddr
+#define tw_dport		__tw_common.skc_dport
+#define tw_refcnt		__tw_common.skc_refcnt
+#define tw_v6_daddr		__tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr		__tw_common.skc_v6_rcv_saddr
 
 #endif
-- 
cgit 


From 2767c97765cb3d9b54c8e62b468e55cc56854a66 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:20 -0700
Subject: selftests/bpf: Implement sample tcp/tcp6 bpf_iter programs

In my VM, I got identical result compared to /proc/net/{tcp,tcp6}.
For tcp6:
  $ cat /proc/net/tcp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

For tcp:
  $ cat /proc/net/tcp
  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
  $ cat /sys/fs/bpf/p2
  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230820.3989165-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h      |  15 ++
 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c | 234 ++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c | 250 ++++++++++++++++++++++
 3 files changed, 499 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3757e88c6406..bde23e16e777 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -7,6 +7,8 @@
 #define bpf_iter__netlink bpf_iter__netlink___not_used
 #define bpf_iter__task bpf_iter__task___not_used
 #define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -14,6 +16,8 @@
 #undef bpf_iter__netlink
 #undef bpf_iter__task
 #undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -47,3 +51,14 @@ struct bpf_iter__bpf_map {
 	struct bpf_iter_meta *meta;
 	struct bpf_map *map;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+	struct bpf_iter_meta *meta;
+	struct sock_common *sk_common;
+	uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+	struct tcp_sock	tcp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..30fd587cb325
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->rcv_nxt - tp->copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, destp, destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->write_seq - tp->snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = tw->tw_daddr;
+	src  = tw->tw_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, irsk->ir_loc_addr,
+		       irsk->ir_num, irsk->ir_rmt_addr,
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address "
+				    "rem_address   "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET)
+		return 0;
+
+	tp = bpf_skc_to_tcp_sock(sk_common);
+	if (tp)
+		return dump_tcp_sock(seq, tp, uid, seq_num);
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..10dec4392031
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct in6_addr *dest, *src;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->tcp.inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = &sp->sk_v6_daddr;
+	src = &sp->sk_v6_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->tcp.snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(&tp->tcp) ? -1
+								      : tp->tcp.snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	const struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	struct in6_addr *src, *dest;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+	src = &irsk->ir_v6_loc_addr;
+	dest = &irsk->ir_v6_rmt_addr;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3],
+		       irsk->ir_num,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3],
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp6_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address                         "
+				    "remote_address                        "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET6)
+		return 0;
+
+	tp = bpf_skc_to_tcp6_sock(sk_common);
+	if (tp)
+		return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
-- 
cgit 


From ace6d6ec9e9e167047b6c8ca462a0830220640c2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:22 -0700
Subject: selftests/bpf: Implement sample udp/udp6 bpf_iter programs

On my VM, I got identical results between /proc/net/udp[6] and
the udp{4,6} bpf iterator.

For udp6:
  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
  $ cat /proc/net/udp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0

For udp4:
  $ cat /sys/fs/bpf/p4
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
  $ cat /proc/net/udp
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230822.3989299-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h      | 16 +++++
 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c | 71 ++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c | 79 +++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index bde23e16e777..17db3bac518b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -9,6 +9,8 @@
 #define bpf_iter__task_file bpf_iter__task_file___not_used
 #define bpf_iter__tcp bpf_iter__tcp___not_used
 #define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -18,6 +20,8 @@
 #undef bpf_iter__task_file
 #undef bpf_iter__tcp
 #undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -62,3 +66,15 @@ struct tcp6_sock {
 	struct tcp_sock	tcp;
 	struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+	struct bpf_iter_meta *meta;
+	struct udp_sock *udp_sk;
+	uid_t uid __attribute__((aligned(8)));
+	int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+	struct udp_sock	udp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..7053784575e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__be32 dest, src;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq,
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout "
+			       "inode ref pointer drops\n");
+
+	/* filter out udp6 sockets */
+	inet = &udp_sk->inet;
+	if (inet->sk.sk_family == AF_INET6)
+		return 0;
+
+	inet = &udp_sk->inet;
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+		       ctx->bucket, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..c1175a6ecf43
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER				\
+	"  sl  "					\
+	"local_address                         "	\
+	"remote_address                        "	\
+	"st tx_queue rx_queue tr tm->when retrnsmt"	\
+	"   uid  timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	const struct in6_addr *dest, *src;
+	struct udp6_sock *udp6_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+	udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+	if (udp6_sk == (void *)0)
+		return 0;
+
+	inet = &udp_sk->inet;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+	dest  = &inet->sk.sk_v6_daddr;
+	src   = &inet->sk.sk_v6_rcv_saddr;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       ctx->bucket,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
-- 
cgit 


From cfcd75f9bf12301dfdcfe9ff6dfb240997e7745f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:23 -0700
Subject: selftests/bpf: Add tcp/udp iterator programs to selftests

Added tcp{4,6} and udp{4,6} bpf programs into test_progs
selftest so that they at least can load successfully.
  $ ./test_progs -n 3
  ...
  #3/7 tcp4:OK
  #3/8 tcp6:OK
  #3/9 udp4:OK
  #3/10 udp6:OK
  ...
  #3 bpf_iter:OK
  Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230823.3989372-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 68 +++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..1e2e0fced6e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -6,6 +6,10 @@
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -120,6 +124,62 @@ static void test_task_file(void)
 	bpf_iter_task_file__destroy(skel);
 }
 
+static void test_tcp4(void)
+{
+	struct bpf_iter_tcp4 *skel;
+
+	skel = bpf_iter_tcp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp4);
+
+	bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+	struct bpf_iter_tcp6 *skel;
+
+	skel = bpf_iter_tcp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp6);
+
+	bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+	struct bpf_iter_udp4 *skel;
+
+	skel = bpf_iter_udp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp4);
+
+	bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+	struct bpf_iter_udp6 *skel;
+
+	skel = bpf_iter_udp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp6);
+
+	bpf_iter_udp6__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -394,6 +454,14 @@ void test_bpf_iter(void)
 		test_task();
 	if (test__start_subtest("task_file"))
 		test_task_file();
+	if (test__start_subtest("tcp4"))
+		test_tcp4();
+	if (test__start_subtest("tcp6"))
+		test_tcp6();
+	if (test__start_subtest("udp4"))
+		test_udp4();
+	if (test__start_subtest("udp6"))
+		test_udp6();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
-- 
cgit 


From 9023497d8746d355bac8ddbc65797a4f553726fd Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 Jun 2020 16:31:24 +0200
Subject: tools, bpftool: Define prog_type_name array only once

Define prog_type_name in prog.c instead of main.h so it is only defined
once. This leads to a slight decrease in the binary size of bpftool.

Before:

   text	   data	    bss	    dec	    hex	filename
 401032	  11936	1573160	1986128	 1e4e50	bpftool

After:

   text	   data	    bss	    dec	    hex	filename
 399024	  11168	1573160	1983352	 1e4378	bpftool

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200624143124.12914-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/feature.c |  4 ++--
 tools/bpf/bpftool/link.c    |  4 ++--
 tools/bpf/bpftool/main.h    | 33 ++-------------------------------
 tools/bpf/bpftool/map.c     |  4 ++--
 tools/bpf/bpftool/prog.c    | 34 ++++++++++++++++++++++++++++++++++
 5 files changed, 42 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 768bf77df886..1cd75807673e 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -695,7 +695,7 @@ section_program_types(bool *supported_types, const char *define_prefix,
 			    "/*** eBPF program types ***/",
 			    define_prefix);
 
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
 		probe_prog_type(i, supported_types, define_prefix, ifindex);
 
 	print_end_section();
@@ -741,7 +741,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
 		       "	%sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
 		       define_prefix, define_prefix, define_prefix,
 		       define_prefix);
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
 		probe_helpers_for_progtype(i, supported_types[i], define_prefix,
 					   ifindex);
 
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 7329f3134283..326b8fdf0243 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -108,7 +108,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 		if (err)
 			return err;
 
-		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+		if (prog_info.type < prog_type_name_size)
 			jsonw_string_field(json_wtr, "prog_type",
 					   prog_type_name[prog_info.type]);
 		else
@@ -187,7 +187,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		if (err)
 			return err;
 
-		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+		if (prog_info.type < prog_type_name_size)
 			printf("\n\tprog_type %s  ",
 			       prog_type_name[prog_info.type]);
 		else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index ce26271e5f0c..269f1cb6aef5 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -56,37 +56,8 @@
 #define HELP_SPEC_LINK							\
 	"LINK := { id LINK_ID | pinned FILE }"
 
-static const char * const prog_type_name[] = {
-	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
-	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
-	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
-	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
-	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
-	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
-	[BPF_PROG_TYPE_XDP]			= "xdp",
-	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
-	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
-	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
-	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
-	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
-	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
-	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
-	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
-	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
-	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
-	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
-	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
-	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
-	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
-	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
-	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
-	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
-	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
-	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
-	[BPF_PROG_TYPE_TRACING]			= "tracing",
-	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
-	[BPF_PROG_TYPE_EXT]			= "ext",
-};
+extern const char * const prog_type_name[];
+extern const size_t prog_type_name_size;
 
 static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index bbb74d387fb0..42c215b6eae6 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -473,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 		if (owner_prog_type) {
 			unsigned int prog_type = atoi(owner_prog_type);
 
-			if (prog_type < ARRAY_SIZE(prog_type_name))
+			if (prog_type < prog_type_name_size)
 				jsonw_string_field(json_wtr, "owner_prog_type",
 						   prog_type_name[prog_type]);
 			else
@@ -558,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 		if (owner_prog_type) {
 			unsigned int prog_type = atoi(owner_prog_type);
 
-			if (prog_type < ARRAY_SIZE(prog_type_name))
+			if (prog_type < prog_type_name_size)
 				printf("owner_prog_type %s  ",
 				       prog_type_name[prog_type]);
 			else
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e21fa8ad2efa..6863c57effd0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,40 @@
 #include "main.h"
 #include "xlated_dumper.h"
 
+const char * const prog_type_name[] = {
+	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
+	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
+	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
+	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
+	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
+	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
+	[BPF_PROG_TYPE_XDP]			= "xdp",
+	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
+	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
+	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
+	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
+	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
+	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
+	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
+	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
+	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
+	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
+	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
+	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
+	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
+	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
+	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
+	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
+	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
+	[BPF_PROG_TYPE_TRACING]			= "tracing",
+	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
+	[BPF_PROG_TYPE_EXT]			= "ext",
+};
+
+const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
+
 enum dump_mode {
 	DUMP_JITED,
 	DUMP_XLATED,
-- 
cgit 


From 16d37ee3d2b1c30052ba5ebb69556040fc174061 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 Jun 2020 16:31:54 +0200
Subject: tools, bpftool: Define attach_type_name array only once

Define attach_type_name in common.c instead of main.h so it is only
defined once. This leads to a slight decrease in the binary size of
bpftool.

Before:

   text	   data	    bss	    dec	    hex	filename
 399024	  11168	1573160	1983352	 1e4378	bpftool

After:

   text	   data	    bss	    dec	    hex	filename
 398256	  10880	1573160	1982296	 1e3f58	bpftool

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200624143154.13145-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/common.c | 36 ++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   | 36 +-----------------------------------
 2 files changed, 37 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6c864c3683fc..18e5604fe260 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -29,6 +29,42 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+	[BPF_CGROUP_INET_INGRESS]	= "ingress",
+	[BPF_CGROUP_INET_EGRESS]	= "egress",
+	[BPF_CGROUP_INET_SOCK_CREATE]	= "sock_create",
+	[BPF_CGROUP_SOCK_OPS]		= "sock_ops",
+	[BPF_CGROUP_DEVICE]		= "device",
+	[BPF_CGROUP_INET4_BIND]		= "bind4",
+	[BPF_CGROUP_INET6_BIND]		= "bind6",
+	[BPF_CGROUP_INET4_CONNECT]	= "connect4",
+	[BPF_CGROUP_INET6_CONNECT]	= "connect6",
+	[BPF_CGROUP_INET4_POST_BIND]	= "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND]	= "post_bind6",
+	[BPF_CGROUP_INET4_GETPEERNAME]	= "getpeername4",
+	[BPF_CGROUP_INET6_GETPEERNAME]	= "getpeername6",
+	[BPF_CGROUP_INET4_GETSOCKNAME]	= "getsockname4",
+	[BPF_CGROUP_INET6_GETSOCKNAME]	= "getsockname6",
+	[BPF_CGROUP_UDP4_SENDMSG]	= "sendmsg4",
+	[BPF_CGROUP_UDP6_SENDMSG]	= "sendmsg6",
+	[BPF_CGROUP_SYSCTL]		= "sysctl",
+	[BPF_CGROUP_UDP4_RECVMSG]	= "recvmsg4",
+	[BPF_CGROUP_UDP6_RECVMSG]	= "recvmsg6",
+	[BPF_CGROUP_GETSOCKOPT]		= "getsockopt",
+	[BPF_CGROUP_SETSOCKOPT]		= "setsockopt",
+
+	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
+	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
+	[BPF_LIRC_MODE2]		= "lirc_mode2",
+	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
+	[BPF_TRACE_RAW_TP]		= "raw_tp",
+	[BPF_TRACE_FENTRY]		= "fentry",
+	[BPF_TRACE_FEXIT]		= "fexit",
+	[BPF_MODIFY_RETURN]		= "mod_ret",
+	[BPF_LSM_MAC]			= "lsm_mac",
+};
+
 void p_err(const char *fmt, ...)
 {
 	va_list ap;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 269f1cb6aef5..78d34e860713 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -59,41 +59,7 @@
 extern const char * const prog_type_name[];
 extern const size_t prog_type_name_size;
 
-static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
-	[BPF_CGROUP_INET_INGRESS] = "ingress",
-	[BPF_CGROUP_INET_EGRESS] = "egress",
-	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
-	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
-	[BPF_CGROUP_DEVICE] = "device",
-	[BPF_CGROUP_INET4_BIND] = "bind4",
-	[BPF_CGROUP_INET6_BIND] = "bind6",
-	[BPF_CGROUP_INET4_CONNECT] = "connect4",
-	[BPF_CGROUP_INET6_CONNECT] = "connect6",
-	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
-	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
-	[BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
-	[BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
-	[BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
-	[BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
-	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
-	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
-	[BPF_CGROUP_SYSCTL] = "sysctl",
-	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
-	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
-	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
-	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
-	[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
-	[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
-	[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
-	[BPF_LIRC_MODE2] = "lirc_mode2",
-	[BPF_FLOW_DISSECTOR] = "flow_dissector",
-	[BPF_TRACE_RAW_TP] = "raw_tp",
-	[BPF_TRACE_FENTRY] = "fentry",
-	[BPF_TRACE_FEXIT] = "fexit",
-	[BPF_MODIFY_RETURN] = "mod_ret",
-	[BPF_LSM_MAC] = "lsm_mac",
-};
+extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
 
 extern const char * const map_type_name[];
 extern const size_t map_type_name_size;
-- 
cgit 


From d929758101fc0674008169dc1de963e3181c587b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Jun 2020 16:26:28 -0700
Subject: libbpf: Support disabling auto-loading BPF programs

Currently, bpf_object__load() (and by induction skeleton's load), will always
attempt to prepare, relocate, and load into kernel every single BPF program
found inside the BPF object file. This is often convenient and the right thing
to do and what users expect.

But there are plenty of cases (especially with BPF development constantly
picking up the pace), where BPF application is intended to work with old
kernels, with potentially reduced set of features. But on kernels supporting
extra features, it would like to take a full advantage of them, by employing
extra BPF program. This could be a choice of using fentry/fexit over
kprobe/kretprobe, if kernel is recent enough and is built with BTF. Or BPF
program might be providing optimized bpf_iter-based solution that user-space
might want to use, whenever available. And so on.

With libbpf and BPF CO-RE in particular, it's advantageous to not have to
maintain two separate BPF object files to achieve this. So to enable such use
cases, this patch adds ability to request not auto-loading chosen BPF
programs. In such case, libbpf won't attempt to perform relocations (which
might fail due to old kernel), won't try to resolve BTF types for
BTF-aware (tp_btf/fentry/fexit/etc) program types, because BTF might not be
present, and so on. Skeleton will also automatically skip auto-attachment step
for such not loaded BPF programs.

Overall, this feature allows to simplify development and deployment of
real-world BPF applications with complicated compatibility requirements.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200625232629.3444003-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c   | 48 ++++++++++++++++++++++++++++++++++++++++--------
 tools/lib/bpf/libbpf.h   |  2 ++
 tools/lib/bpf/libbpf.map |  2 ++
 3 files changed, 44 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b4955d170ff..4ea7f4f1a691 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -230,6 +230,7 @@ struct bpf_program {
 	struct bpf_insn *insns;
 	size_t insns_cnt, main_prog_cnt;
 	enum bpf_prog_type type;
+	bool load;
 
 	struct reloc_desc *reloc_desc;
 	int nr_reloc;
@@ -541,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
 	prog->type = BPF_PROG_TYPE_UNSPEC;
+	prog->load = true;
 
 	return 0;
 errout:
@@ -2513,6 +2515,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 		need_vmlinux_btf = true;
 
 	bpf_object__for_each_program(prog, obj) {
+		if (!prog->load)
+			continue;
 		if (libbpf_prog_needs_vmlinux_btf(prog)) {
 			need_vmlinux_btf = true;
 			break;
@@ -5445,6 +5449,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
 	int err = 0, fd, i, btf_id;
 
+	if (prog->obj->loaded) {
+		pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
+			prog->name, prog->section_name);
+		return -EINVAL;
+	}
+
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 	     prog->type == BPF_PROG_TYPE_LSM ||
 	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -5533,16 +5543,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog,
 static int
 bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
+	struct bpf_program *prog;
 	size_t i;
 	int err;
 
 	for (i = 0; i < obj->nr_programs; i++) {
-		if (bpf_program__is_function_storage(&obj->programs[i], obj))
+		prog = &obj->programs[i];
+		if (bpf_program__is_function_storage(prog, obj))
 			continue;
-		obj->programs[i].log_level |= log_level;
-		err = bpf_program__load(&obj->programs[i],
-					obj->license,
-					obj->kern_version);
+		if (!prog->load) {
+			pr_debug("prog '%s'('%s'): skipped loading\n",
+				 prog->name, prog->section_name);
+			continue;
+		}
+		prog->log_level |= log_level;
+		err = bpf_program__load(prog, obj->license, obj->kern_version);
 		if (err)
 			return err;
 	}
@@ -5869,12 +5884,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 		return -EINVAL;
 
 	if (obj->loaded) {
-		pr_warn("object should not be loaded twice\n");
+		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 		return -EINVAL;
 	}
 
-	obj->loaded = true;
-
 	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__probe_caps(obj);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -5889,6 +5902,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	btf__free(obj->btf_vmlinux);
 	obj->btf_vmlinux = NULL;
 
+	obj->loaded = true; /* doesn't matter if successfully or not */
+
 	if (err)
 		goto out;
 
@@ -6661,6 +6676,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 	return title;
 }
 
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+	return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	prog->load = autoload;
+	return 0;
+}
+
 int bpf_program__fd(const struct bpf_program *prog)
 {
 	return bpf_program__nth_fd(prog, 0);
@@ -9283,6 +9312,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 		const struct bpf_sec_def *sec_def;
 		const char *sec_name = bpf_program__title(prog, false);
 
+		if (!prog->load)
+			continue;
+
 		sec_def = find_sec_def(sec_name);
 		if (!sec_def || !sec_def->attach_fn)
 			continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index fdd279fb1866..2335971ed0bd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
 					  bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
 
 /* returns program size in bytes */
 LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 9914e0db4859..6544d2cd1ed6 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -286,4 +286,6 @@ LIBBPF_0.1.0 {
 		bpf_map__set_value_size;
 		bpf_map__type;
 		bpf_map__value_size;
+		bpf_program__autoload;
+		bpf_program__set_autoload;
 } LIBBPF_0.0.9;
-- 
cgit 


From 5712174c5c9e3d684ad05d4aaed1e14acda4bb74 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Jun 2020 16:26:29 -0700
Subject: selftests/bpf: Test auto-load disabling logic for BPF programs

Validate that BPF object with broken (in multiple ways) BPF program can still
be successfully loaded, if that broken BPF program is disabled.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200625232629.3444003-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/autoload.c | 41 +++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_autoload.c | 40 ++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/autoload.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_autoload.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+	int duration = 0, err;
+	struct test_autoload* skel;
+
+	skel = test_autoload__open_and_load();
+	/* prog3 should be broken */
+	if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+		goto cleanup;
+
+	skel = test_autoload__open();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		goto cleanup;
+
+	/* don't load prog3 */
+	bpf_program__set_autoload(skel->progs.prog3, false);
+
+	err = test_autoload__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+		goto cleanup;
+
+	err = test_autoload__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	usleep(1);
+
+	CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+	CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+	CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+	test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+	prog1_called = true;
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+	prog2_called = true;
+	return 0;
+}
+
+struct fake_kernel_struct {
+	int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+	struct fake_kernel_struct *fake = (void *)ctx;
+	fake->whatever = 123;
+	prog3_called = true;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 6cf0291f95172db68d8a283854389a1966e43c65 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sat, 27 Jun 2020 01:45:29 +0300
Subject: selftests: forwarding: Add a RED test for SW datapath

This test is inspired by the mlxsw RED selftest. It is much simpler to set
up (also because there is no point in testing PRIO / RED encapsulation). It
tests bare RED, ECN and ECN+nodrop modes of operation. On top of that it
tests RED early_drop and mark qevents.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/sch_red.sh | 492 ++++++++++++++++++++++
 1 file changed, 492 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/sch_red.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
new file mode 100755
index 000000000000..e714bae473fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+# within TBF shaper on $swp3. The two shapers have the same configuration, and
+# thus the resulting stream should fill all available bandwidth on the latter
+# shaper. A second stream is sent from H2 also via $swp3, and used to inject
+# additional traffic. Since all available bandwidth is taken, this traffic has
+# to go to backlog.
+#
+# +--------------------------+                     +--------------------------+
+# | H1                       |                     | H2                       |
+# |     + $h1                |                     |     + $h2                |
+# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
+# |     | TBF 10Mbps         |                     |     |                    |
+# +-----|--------------------+                     +-----|--------------------+
+#       |                                                |
+# +-----|------------------------------------------------|--------------------+
+# | SW  |                                                |                    |
+# |  +--|------------------------------------------------|----------------+   |
+# |  |  + $swp1                                          + $swp2          |   |
+# |  |                               BR                                   |   |
+# |  |                                                                    |   |
+# |  |                                + $swp3                             |   |
+# |  |                                | TBF 10Mbps / RED                  |   |
+# |  +--------------------------------|-----------------------------------+   |
+# |                                   |                                       |
+# +-----------------------------------|---------------------------------------+
+#                                     |
+#                               +-----|--------------------+
+#			        | H3  |                    |
+#			        |     + $h1                |
+#			        |       192.0.2.3/28       |
+#			        |                          |
+#			        +--------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_nodrop_test
+	red_test
+	red_qevent_test
+	ecn_qevent_test
+"
+
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+BACKLOG=30000
+PKTSZ=1400
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	mtu_set $h1 10000
+	tc qdisc replace dev $h1 root handle 1: tbf \
+	   rate 10Mbit burst 10K limit 1M
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 root
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	mtu_set $h2 10000
+}
+
+h2_destroy()
+{
+	mtu_restore $h2
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.3/28
+	mtu_set $h3 10000
+}
+
+h3_destroy()
+{
+	mtu_restore $h3
+	simple_if_fini $h3 192.0.2.3/28
+}
+
+switch_create()
+{
+	ip link add dev br up type bridge
+	ip link set dev $swp1 up master br
+	ip link set dev $swp2 up master br
+	ip link set dev $swp3 up master br
+
+	mtu_set $swp1 10000
+	mtu_set $swp2 10000
+	mtu_set $swp3 10000
+
+	tc qdisc replace dev $swp3 root handle 1: tbf \
+	   rate 10Mbit burst 10K limit 1M
+	ip link add name _drop_test up type dummy
+}
+
+switch_destroy()
+{
+	ip link del dev _drop_test
+	tc qdisc del dev $swp3 root
+
+	mtu_restore $h3
+	mtu_restore $h2
+	mtu_restore $h1
+
+	ip link set dev $swp3 down nomaster
+	ip link set dev $swp2 down nomaster
+	ip link set dev $swp1 down nomaster
+	ip link del dev br
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	h2=${NETIFS[p3]}
+	swp2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3_mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.3 " from host 1"
+	ping_test $h2 192.0.2.3 " from host 2"
+}
+
+get_qdisc_backlog()
+{
+	qdisc_stats_get $swp3 11: .backlog
+}
+
+get_nmarked()
+{
+	qdisc_stats_get $swp3 11: .marked
+}
+
+get_qdisc_npackets()
+{
+	qdisc_stats_get $swp3 11: .packets
+}
+
+get_nmirrored()
+{
+	link_stats_get _drop_test tx packets
+}
+
+send_packets()
+{
+	local proto=$1; shift
+	local pkts=$1; shift
+
+	$MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+	local size=$1; shift
+	local proto=$1; shift
+
+	local i=0
+
+	while :; do
+		local cur=$(get_qdisc_backlog)
+		local diff=$((size - cur))
+		local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
+
+		if ((cur >= size)); then
+			echo $cur
+			return 0
+		elif ((i++ > 10)); then
+			echo $cur
+			return 1
+		fi
+
+		send_packets $proto $pkts "$@"
+		sleep 1
+	done
+}
+
+check_marking()
+{
+	local cond=$1; shift
+
+	local npackets_0=$(get_qdisc_npackets)
+	local nmarked_0=$(get_nmarked)
+	sleep 5
+	local npackets_1=$(get_qdisc_npackets)
+	local nmarked_1=$(get_nmarked)
+
+	local nmarked_d=$((nmarked_1 - nmarked_0))
+	local npackets_d=$((npackets_1 - npackets_0))
+	local pct=$((100 * nmarked_d / npackets_d))
+
+	echo $pct
+	((pct $cond))
+}
+
+check_mirroring()
+{
+	local cond=$1; shift
+
+	local npackets_0=$(get_qdisc_npackets)
+	local nmirrored_0=$(get_nmirrored)
+	sleep 5
+	local npackets_1=$(get_qdisc_npackets)
+	local nmirrored_1=$(get_nmirrored)
+
+	local nmirrored_d=$((nmirrored_1 - nmirrored_0))
+	local npackets_d=$((npackets_1 - npackets_0))
+	local pct=$((100 * nmirrored_d / npackets_d))
+
+	echo $pct
+	((pct $cond))
+}
+
+ecn_test_common()
+{
+	local name=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Build the below-the-limit backlog using UDP. We could use TCP just
+	# fine, but this way we get a proof that UDP is accepted when queue
+	# length is below the limit. The main stream is using TCP, and if the
+	# limit is misconfigured, we would see this traffic being ECN marked.
+	RET=0
+	backlog=$(build_backlog $((2 * limit / 3)) udp)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "$name backlog < limit"
+
+	# Now push TCP, because non-TCP traffic would be early-dropped after the
+	# backlog crosses the limit, and we want to make sure that the backlog
+	# is above the limit.
+	RET=0
+	backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking ">= 95")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+	log_test "$name backlog > limit"
+}
+
+do_ecn_test()
+{
+	local limit=$1; shift
+	local name=ECN
+
+	$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+		-a own -b $h3_mac -t tcp -q tos=0x01 &
+	sleep 1
+
+	ecn_test_common "$name" $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, it should all get dropped, and backlog
+	# building should fail.
+	RET=0
+	build_backlog $((2 * limit)) udp >/dev/null
+	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+	log_test "$name backlog > limit: UDP early-dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+	local limit=$1; shift
+	local name="ECN nodrop"
+
+	$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+		-a own -b $h3_mac -t tcp -q tos=0x01 &
+	sleep 1
+
+	ecn_test_common "$name" $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, in nodrop mode, make sure it goes to
+	# backlog as well.
+	RET=0
+	build_backlog $((2 * limit)) udp >/dev/null
+	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+	log_test "$name backlog > limit: UDP not dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_red_test()
+{
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Use ECN-capable TCP to verify there's no marking even though the queue
+	# is above limit.
+	$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+		-a own -b $h3_mac -t tcp -q tos=0x01 &
+
+	# Pushing below the queue limit should work.
+	RET=0
+	backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "RED backlog < limit"
+
+	# Pushing above should not.
+	RET=0
+	backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+	check_fail $? "Traffic went into backlog instead of being early-dropped"
+	pct=$(check_marking "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "RED backlog > limit"
+
+	stop_traffic
+	sleep 1
+}
+
+do_red_qevent_test()
+{
+	local limit=$1; shift
+	local backlog
+	local base
+	local now
+	local pct
+
+	RET=0
+
+	$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+		-a own -b $h3_mac -t udp -q &
+	sleep 1
+
+	tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+	   action mirred egress mirror dev _drop_test
+
+	# Push to the queue until it's at the limit. The configured limit is
+	# rounded by the qdisc, so this is the best we can do to get to the real
+	# limit.
+	build_backlog $((3 * limit / 2)) udp >/dev/null
+
+	base=$(get_nmirrored)
+	send_packets udp 100
+	sleep 1
+	now=$(get_nmirrored)
+	((now >= base + 100))
+	check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
+
+	tc filter del block 10 pref 1234 handle 102 matchall
+
+	base=$(get_nmirrored)
+	send_packets udp 100
+	sleep 1
+	now=$(get_nmirrored)
+	((now == base))
+	check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
+
+	log_test "RED early_dropped packets mirrored"
+
+	stop_traffic
+	sleep 1
+}
+
+do_ecn_qevent_test()
+{
+	local limit=$1; shift
+	local name=ECN
+
+	RET=0
+
+	$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+		-a own -b $h3_mac -t tcp -q tos=0x01 &
+	sleep 1
+
+	tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+	   action mirred egress mirror dev _drop_test
+
+	backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_mirroring "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
+
+	backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_mirroring ">= 95")
+	check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
+
+	tc filter del block 10 pref 1234 handle 102 matchall
+
+	log_test "ECN marked packets mirrored"
+
+	stop_traffic
+	sleep 1
+}
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
+	   limit 1M avpkt $PKTSZ probability 1 \
+	   min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 parent 1:1
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+	do_ecn_test $BACKLOG
+	uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+	do_ecn_nodrop_test $BACKLOG
+	uninstall_qdisc
+}
+
+red_test()
+{
+	install_qdisc
+	do_red_test $BACKLOG
+	uninstall_qdisc
+}
+
+red_qevent_test()
+{
+	install_qdisc qevent early_drop block 10
+	do_red_qevent_test $BACKLOG
+	uninstall_qdisc
+}
+
+ecn_qevent_test()
+{
+	install_qdisc ecn qevent mark block 10
+	do_ecn_qevent_test $BACKLOG
+	uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From dd9e67ff8086adeaf257a17eee9ee73dcb2c1c13 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Mon, 29 Jun 2020 23:46:19 +0300
Subject: selftests: forwarding: ethtool: Move different_speeds_get() to
 ethtool_lib

Currently different_speeds_get() is used only by ethtool.sh tests.
The function can be useful for another tests that check ethtool
configurations.

Move the function to ethtool_lib in order to allow other tests to use
it.

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/ethtool.sh     | 17 -----------------
 tools/testing/selftests/net/forwarding/ethtool_lib.sh | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index eb8e2a23bbb4..ea7a11a9f788 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -50,23 +50,6 @@ cleanup()
 	h1_destroy
 }
 
-different_speeds_get()
-{
-	local dev1=$1; shift
-	local dev2=$1; shift
-	local with_mode=$1; shift
-	local adver=$1; shift
-
-	local -a speeds_arr
-
-	speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
-	if [[ ${#speeds_arr[@]} < 2 ]]; then
-		check_err 1 "cannot check different speeds. There are not enough speeds"
-	fi
-
-	echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
-
 same_speeds_autoneg_off()
 {
 	# Check that when each of the reported speeds is forced, the links come
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..9188e624dec0 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -67,3 +67,20 @@ common_speeds_get()
 		<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
 		<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
 }
+
+different_speeds_get()
+{
+	local dev1=$1; shift
+	local dev2=$1; shift
+	local with_mode=$1; shift
+	local adver=$1; shift
+
+	local -a speeds_arr
+
+	speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+	if [[ ${#speeds_arr[@]} < 2 ]]; then
+		check_err 1 "cannot check different speeds. There are not enough speeds"
+	fi
+
+	echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
-- 
cgit 


From 0433045c27bf9ae71e1c0300c278582407dd0e0b Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Mon, 29 Jun 2020 23:46:20 +0300
Subject: selftests: forwarding: forwarding.config.sample: Add port with no
 cable connected

Add NETIF_NO_CABLE port to tests topology.

The port can also be declared as an environment variable and tests can be
run like that:
NETIF_NO_CABLE=eth9 ./test.sh eth{1..8}

The NETIF_NO_CABLE port will be used by ethtool_extended_state test.

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/forwarding.config.sample | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e2adb533c8fc..b802c14d2950 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,6 +14,9 @@ NETIFS[p6]=veth5
 NETIFS[p7]=veth6
 NETIFS[p8]=veth7
 
+# Port that does not have a cable connected.
+NETIF_NO_CABLE=eth8
+
 ##############################################################################
 # Defines
 
-- 
cgit 


From 7d10bcce98cd44ea7c040380397114e0ac94422f Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Mon, 29 Jun 2020 23:46:21 +0300
Subject: selftests: forwarding: Add tests for ethtool extended state

Add tests to check ethtool report about extended state.
The tests configure several states and verify that the correct extended
state is reported by ethtool.

Check extended state with substate (Autoneg) and extended state without
substate (No cable).

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/ethtool_extended_state.sh       | 102 +++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/ethtool_extended_state.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
new file mode 100755
index 000000000000..4b42dfd4efd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	autoneg
+	autoneg_force_mode
+	no_cable
+"
+
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+	swp3=$NETIF_NO_CABLE
+}
+
+ethtool_extended_state_check()
+{
+	local dev=$1; shift
+	local expected_ext_state=$1; shift
+	local expected_ext_substate=${1:-""}; shift
+
+	local ext_state=$(ethtool $dev | grep "Link detected" \
+		| cut -d "(" -f2 | cut -d ")" -f1)
+	local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+		| sed -e 's/^[[:space:]]*//')
+	ext_state=$(echo $ext_state | cut -d "," -f1)
+
+	[[ $ext_state == $expected_ext_state ]]
+	check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
+
+	[[ $ext_substate == $expected_ext_substate ]]
+	check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+}
+
+autoneg()
+{
+	RET=0
+
+	ip link set dev $swp1 up
+
+	sleep 4
+	ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+
+	log_test "Autoneg, No partner detected"
+
+	ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+	RET=0
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+	local speed1=${speeds_arr[0]}
+	local speed2=${speeds_arr[1]}
+
+	ethtool_set $swp1 speed $speed1 autoneg off
+	ethtool_set $swp2 speed $speed2 autoneg off
+
+	sleep 4
+	ethtool_extended_state_check $swp1 "Autoneg" \
+		"No partner detected during force mode"
+
+	ethtool_extended_state_check $swp2 "Autoneg" \
+		"No partner detected during force mode"
+
+	log_test "Autoneg, No partner detected during force mode"
+
+	ethtool -s $swp2 autoneg on
+	ethtool -s $swp1 autoneg on
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+no_cable()
+{
+	RET=0
+
+	ip link set dev $swp3 up
+
+	sleep 1
+	ethtool_extended_state_check $swp3 "No cable"
+
+	log_test "No cable"
+
+	ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From ec23eb705620234421fd48fc2382490fcfbafc37 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 29 Jun 2020 17:47:58 -0700
Subject: tools/bpftool: Allow substituting custom vmlinux.h for the build

In some build contexts (e.g., Travis CI build for outdated kernel), vmlinux.h,
generated from available kernel, doesn't contain all the types necessary for
BPF program compilation. For such set up, the most maintainable way to deal
with this problem is to keep pre-generated (almost up-to-date) vmlinux.h
checked in and use it for compilation purposes. bpftool after that can deal
with kernel missing some of the features in runtime with no problems.

To that effect, allow to specify path to custom vmlinux.h to bpftool's
Makefile with VMLINUX_H variable.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630004759.521530-1-andriin@fb.com
---
 tools/bpf/bpftool/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8c6563e56ffc..273da1615503 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -122,20 +122,24 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr
 BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../vmlinux					\
 		     /sys/kernel/btf/vmlinux				\
 		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 
-ifneq ($(VMLINUX_BTF),)
+ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
 ifeq ($(feature-clang-bpf-co-re),1)
 
 BUILD_BPF_SKELS := 1
 
 $(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+ifeq ($(VMLINUX_H),)
 	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
+else
+	$(Q)cp "$(VMLINUX_H)" $@
+endif
 
 $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
-- 
cgit 


From ca4db6389d611eee2eb7c1dfe710b62d8ea06772 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 29 Jun 2020 17:47:59 -0700
Subject: selftests/bpf: Allow substituting custom vmlinux.h for selftests
 build

Similarly to bpftool Makefile, allow to specify custom location of vmlinux.h
to be used during the build. This allows simpler testing setups with
checked-in pre-generated vmlinux.h.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630004759.521530-2-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22aaec74ea0a..1f9c696b3edf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -134,12 +134,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../../vmlinux				\
 		     /sys/kernel/btf/vmlinux				\
 		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 
 $(OUTPUT)/runqslower: $(BPFOBJ)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
@@ -182,8 +182,13 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
 	mkdir -p $@
 
 $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
 	$(call msg,GEN,,$@)
 	$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+	$(call msg,CP,,$@)
+	cp "$(VMLINUX_H)" $@
+endif
 
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
-- 
cgit 


From 30ad688094bcfe8721bfd4003f6a20c9b6ddf964 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 30 Jun 2020 08:21:24 -0700
Subject: libbpf: Make bpf_endian co-exist with vmlinux.h

Make bpf_endian.h compatible with vmlinux.h. It is a frequent request from
users wanting to use bpf_endian.h in their BPF applications using CO-RE and
vmlinux.h.

To achieve that, re-implement byte swap macros and drop all the header
includes. This way it can be used both with linux header includes, as well as
with a vmlinux.h.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630152125.3631920-2-andriin@fb.com
---
 tools/lib/bpf/bpf_endian.h | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
index fbe28008450f..ec9db4feca9f 100644
--- a/tools/lib/bpf/bpf_endian.h
+++ b/tools/lib/bpf/bpf_endian.h
@@ -2,8 +2,35 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
-#include <linux/stddef.h>
-#include <linux/swab.h>
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)(			\
+			  ___bpf_mvb(x, 16, 0, 1) |	\
+			  ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)(			\
+			  ___bpf_mvb(x, 32, 0, 3) |	\
+			  ___bpf_mvb(x, 32, 1, 2) |	\
+			  ___bpf_mvb(x, 32, 2, 1) |	\
+			  ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)(			\
+			  ___bpf_mvb(x, 64, 0, 7) |	\
+			  ___bpf_mvb(x, 64, 1, 6) |	\
+			  ___bpf_mvb(x, 64, 2, 5) |	\
+			  ___bpf_mvb(x, 64, 3, 4) |	\
+			  ___bpf_mvb(x, 64, 4, 3) |	\
+			  ___bpf_mvb(x, 64, 5, 2) |	\
+			  ___bpf_mvb(x, 64, 6, 1) |	\
+			  ___bpf_mvb(x, 64, 7, 0)))
 
 /* LLVM's BPF target selects the endianness of the CPU
  * it compiles on, or the user specifies (bpfel/bpfeb),
@@ -23,16 +50,16 @@
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 # define __bpf_ntohs(x)			__builtin_bswap16(x)
 # define __bpf_htons(x)			__builtin_bswap16(x)
-# define __bpf_constant_ntohs(x)	___constant_swab16(x)
-# define __bpf_constant_htons(x)	___constant_swab16(x)
+# define __bpf_constant_ntohs(x)	___bpf_swab16(x)
+# define __bpf_constant_htons(x)	___bpf_swab16(x)
 # define __bpf_ntohl(x)			__builtin_bswap32(x)
 # define __bpf_htonl(x)			__builtin_bswap32(x)
-# define __bpf_constant_ntohl(x)	___constant_swab32(x)
-# define __bpf_constant_htonl(x)	___constant_swab32(x)
+# define __bpf_constant_ntohl(x)	___bpf_swab32(x)
+# define __bpf_constant_htonl(x)	___bpf_swab32(x)
 # define __bpf_be64_to_cpu(x)		__builtin_bswap64(x)
 # define __bpf_cpu_to_be64(x)		__builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x)	___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x)	___constant_swab64(x)
+# define __bpf_constant_be64_to_cpu(x)	___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x)	___bpf_swab64(x)
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 # define __bpf_ntohs(x)			(x)
 # define __bpf_htons(x)			(x)
-- 
cgit 


From 8c18311067d0f0d5f332b9e1f3859eb15e23332d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 30 Jun 2020 08:21:25 -0700
Subject: selftests/bpf: Add byte swapping selftest

Add simple selftest validating byte swap built-ins and compile-time macros.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630152125.3631920-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/endian.c | 53 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_endian.c | 37 +++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/endian.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_endian.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+	struct test_endian* skel;
+	struct test_endian__bss *bss;
+	int err;
+
+	skel = test_endian__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+
+	bss->in16 = IN16;
+	bss->in32 = IN32;
+	bss->in64 = IN64;
+
+	err = test_endian__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	usleep(1);
+
+	CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out16, (__u64)OUT16);
+	CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out32, (__u64)OUT32);
+	CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out64, (__u64)OUT64);
+
+	CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const16, (__u64)OUT16);
+	CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const32, (__u64)OUT32);
+	CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+	test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+	out16 = __builtin_bswap16(in16);
+	out32 = __builtin_bswap32(in32);
+	out64 = __builtin_bswap64(in64);
+	const16 = ___bpf_swab16(IN16);
+	const32 = ___bpf_swab32(IN32);
+	const64 = ___bpf_swab64(IN64);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 29 Jun 2020 23:28:44 -0700
Subject: bpf: Introduce helper bpf_get_task_stack()

Introduce helper bpf_get_task_stack(), which dumps stack trace of given
task. This is different to bpf_get_stack(), which gets stack track of
current task. One potential use case of bpf_get_task_stack() is to call
it from bpf_iter__task and dump all /proc/<pid>/stack to a seq_file.

bpf_get_task_stack() uses stack_trace_save_tsk() instead of
get_perf_callchain() for kernel stack. The benefit of this choice is that
stack_trace_save_tsk() doesn't require changes in arch/. The downside of
using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the
stack trace to unsigned long array. For 32-bit systems, we need to
translate it to u64 array.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com
---
 tools/include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0cb8ec948816..da9bf35a26f8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3285,6 +3285,39 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
+ *	Description
+ *		Return a user or a kernel stack in bpf program provided buffer.
+ *		To achieve this, the helper needs *task*, which is a valid
+ *		pointer to struct task_struct. To store the stacktrace, the
+ *		bpf program provides *buf* with	a nonnegative *size*.
+ *
+ *		The last argument, *flags*, holds the number of stack frames to
+ *		skip (from 0 to 255), masked with
+ *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *		the following flags:
+ *
+ *		**BPF_F_USER_STACK**
+ *			Collect a user space stack instead of a kernel stack.
+ *		**BPF_F_USER_BUILD_ID**
+ *			Collect buildid+offset instead of ips for user stack,
+ *			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *		**bpf_get_task_stack**\ () can collect up to
+ *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *		to sufficient large buffer size. Note that
+ *		this limit can be controlled with the **sysctl** program, and
+ *		that it should be manually increased in order to profile long
+ *		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *		::
+ *
+ *			# sysctl kernel.perf_event_max_stack=<new value>
+ *	Return
+ *		A non-negative value equal to or less than *size* on success,
+ *		or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3427,7 +3460,9 @@ union bpf_attr {
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
 	FN(skc_to_tcp_request_sock),	\
-	FN(skc_to_udp6_sock),
+	FN(skc_to_udp6_sock),		\
+	FN(get_task_stack),		\
+	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit 


From c7568114bc56cf3ec0bd9eb117bbe7cad3d30e11 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 29 Jun 2020 23:28:46 -0700
Subject: selftests/bpf: Add bpf_iter test with bpf_get_task_stack()

The new test is similar to other bpf_iter tests. It dumps all
/proc/<pid>/stack to a seq_file. Here is some example output:

pid:     2873 num_entries:        3
[<0>] worker_thread+0xc6/0x380
[<0>] kthread+0x135/0x150
[<0>] ret_from_fork+0x22/0x30

pid:     2874 num_entries:        9
[<0>] __bpf_get_stack+0x15e/0x250
[<0>] bpf_prog_22a400774977bb30_dump_task_stack+0x4a/0xb3c
[<0>] bpf_iter_run_prog+0x81/0x170
[<0>] __task_seq_show+0x58/0x80
[<0>] bpf_seq_read+0x1c3/0x3b0
[<0>] vfs_read+0x9e/0x170
[<0>] ksys_read+0xa7/0xe0
[<0>] do_syscall_64+0x4c/0xa0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Note: bpf_iter test as-is doesn't print the contents of the seq_file. To
see the example above, it is necessary to add printf() to do_dummy_read.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630062846.664389-5-songliubraving@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 17 ++++++++++
 .../selftests/bpf/progs/bpf_iter_task_stack.c      | 37 ++++++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1e2e0fced6e8..fed42755416d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -5,6 +5,7 @@
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
 #include "bpf_iter_tcp4.skel.h"
 #include "bpf_iter_tcp6.skel.h"
@@ -110,6 +111,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_stack(void)
+{
+	struct bpf_iter_task_stack *skel;
+
+	skel = bpf_iter_task_stack__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_stack);
+
+	bpf_iter_task_stack__destroy(skel);
+}
+
 static void test_task_file(void)
 {
 	struct bpf_iter_task_file *skel;
@@ -452,6 +467,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_stack"))
+		test_task_stack();
 	if (test__start_subtest("task_file"))
 		test_task_file();
 	if (test__start_subtest("tcp4"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..e40d32a2ed93
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH   64
+unsigned long entries[MAX_STACK_TRACE_DEPTH];
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	long i, retlen;
+
+	if (task == (void *)0)
+		return 0;
+
+	retlen = bpf_get_task_stack(task, entries,
+				    MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+	if (retlen < 0)
+		return 0;
+
+	BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+		       retlen / SIZE_OF_ULONG);
+	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+		if (retlen > i * SIZE_OF_ULONG)
+			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+	}
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}
-- 
cgit 


From 8d821b5db70723d27ee749b4870de90760606918 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Wed, 1 Jul 2020 10:53:15 -0700
Subject: selftests/bpf: Switch test_vmlinux to use hrtimer_range_start_ns.

The test_vmlinux test uses hrtimer_nanosleep as hook to test tracing
programs. But in a kernel built by clang, which performs more aggresive
inlining, that function gets inlined into its caller SyS_nanosleep.
Therefore, even though fentry and kprobe do hook on the function,
they aren't triggered by the call to nanosleep in the test.

A possible fix is switching to use a function that is less likely to
be inlined, such as hrtimer_range_start_ns. The EXPORT_SYMBOL functions
shouldn't be inlined based on the description of [1], therefore safe
to use for this test. Also the arguments of this function include the
duration of sleep, therefore suitable for test verification.

[1] af3b56289be1 time: don't inline EXPORT_SYMBOL functions

Tested:
 In a clang build kernel, before this change, the test fails:

 test_vmlinux:PASS:skel_open 0 nsec
 test_vmlinux:PASS:skel_attach 0 nsec
 test_vmlinux:PASS:tp 0 nsec
 test_vmlinux:PASS:raw_tp 0 nsec
 test_vmlinux:PASS:tp_btf 0 nsec
 test_vmlinux:FAIL:kprobe not called
 test_vmlinux:FAIL:fentry not called

 After switching to hrtimer_range_start_ns, the test passes:

 test_vmlinux:PASS:skel_open 0 nsec
 test_vmlinux:PASS:skel_attach 0 nsec
 test_vmlinux:PASS:tp 0 nsec
 test_vmlinux:PASS:raw_tp 0 nsec
 test_vmlinux:PASS:tp_btf 0 nsec
 test_vmlinux:PASS:kprobe 0 nsec
 test_vmlinux:PASS:fentry 0 nsec

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200701175315.1161242-1-haoluo@google.com
---
 tools/testing/selftests/bpf/progs/test_vmlinux.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..29fa09d6a6c6 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 	return 0;
 }
 
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
-	       ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+	       const enum hrtimer_mode mode)
 {
-	if (rqtp == MY_TV_NSEC)
+	if (tim == MY_TV_NSEC)
 		kprobe_called = true;
 	return 0;
 }
 
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
-	     ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+	     const enum hrtimer_mode mode)
 {
-	if (rqtp == MY_TV_NSEC)
+	if (tim == MY_TV_NSEC)
 		fentry_called = true;
 	return 0;
 }
-- 
cgit 


From 17bbf925c6f86be8c08bc473cb5327c173154596 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 1 Jul 2020 14:28:16 -0700
Subject: tools/bpftool: Turn off -Wnested-externs warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turn off -Wnested-externs to avoid annoying warnings in BUILD_BUG_ON macro when
compiling bpftool:

In file included from /data/users/andriin/linux/tools/include/linux/build_bug.h:5,
                 from /data/users/andriin/linux/tools/include/linux/kernel.h:8,
                 from /data/users/andriin/linux/kernel/bpf/disasm.h:10,
                 from /data/users/andriin/linux/kernel/bpf/disasm.c:8:
/data/users/andriin/linux/kernel/bpf/disasm.c: In function ‘__func_get_name’:
/data/users/andriin/linux/tools/include/linux/compiler.h:37:38: warning: nested extern declaration of ‘__compiletime_assert_0’ [-Wnested-externs]
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
                                      ^~~~~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/compiler.h:16:15: note: in definition of macro ‘__compiletime_assert’
   extern void prefix ## suffix(void) __compiletime_error(msg); \
               ^~~~~~
/data/users/andriin/linux/tools/include/linux/compiler.h:37:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^~~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^~~~~~~~~~~~~~~~
/data/users/andriin/linux/kernel/bpf/disasm.c:20:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
  ^~~~~~~~~~~~

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200701212816.2072340-1-andriin@fb.com
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 273da1615503..51bd520ed437 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -40,7 +40,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
-CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
+CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(if $(OUTPUT),$(OUTPUT),.) \
 	-I$(srctree)/kernel/bpf/ \
-- 
cgit 


From 6c92bd5cd4650c39dd929565ee172984c680fead Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:07 +0200
Subject: selftests/bpf: Test_progs indicate to shell on non-actions

When a user selects a non-existing test the summary is printed with
indication 0 for all info types, and shell "success" (EXIT_SUCCESS) is
indicated. This can be understood by a human end-user, but for shell
scripting is it useful to indicate a shell failure (EXIT_FAILURE).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363984736.930467.17956007131403952343.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 54fa5fa688ce..da70a4f72f54 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -687,5 +687,8 @@ int main(int argc, char **argv)
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
 
+	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+		return EXIT_FAILURE;
+
 	return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
 }
-- 
cgit 


From 643e7233aa948901dce81d4573c91ed99fdd272e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:12 +0200
Subject: selftests/bpf: Test_progs option for getting number of tests

It can be practial to get the number of tests that test_progs contain.
This could for example be used to create a shell for-loop construct that
runs the individual tests.

Like:
 for N in $(seq 1 $(./test_progs -c)); do
   ./test_progs -n $N 2>&1 > result_test_${N}.log &
 done ; wait

V2: Add the ability to return the count for the selected tests. This is
useful for getting a count e.g. after excluding some tests with option -b.
The current beakers test script like to report the max test count upfront.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363985244.930467.12617117873058936829.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 18 ++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index da70a4f72f54..a5dba14b2025 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -366,6 +366,7 @@ enum ARG_KEYS {
 	ARG_TEST_NAME_BLACKLIST = 'b',
 	ARG_VERIFIER_STATS = 's',
 	ARG_VERBOSE = 'v',
+	ARG_GET_TEST_CNT = 'c',
 };
 
 static const struct argp_option opts[] = {
@@ -379,6 +380,8 @@ static const struct argp_option opts[] = {
 	  "Output verifier statistics", },
 	{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
 	  "Verbose output (use -vv or -vvv for progressively verbose output)" },
+	{ "count", ARG_GET_TEST_CNT, NULL, 0,
+	  "Get number of selected top-level tests " },
 	{},
 };
 
@@ -511,6 +514,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			}
 		}
 		break;
+	case ARG_GET_TEST_CNT:
+		env->get_test_cnt = true;
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -654,6 +660,11 @@ int main(int argc, char **argv)
 				test->test_num, test->test_name))
 			continue;
 
+		if (env.get_test_cnt) {
+			env.succ_cnt++;
+			continue;
+		}
+
 		test->run_test();
 		/* ensure last sub-test is finalized properly */
 		if (test->subtest_name)
@@ -677,9 +688,16 @@ int main(int argc, char **argv)
 			cleanup_cgroup_environment();
 	}
 	stdio_restore();
+
+	if (env.get_test_cnt) {
+		printf("%d\n", env.succ_cnt);
+		goto out;
+	}
+
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
+out:
 	free_str_set(&env.test_selector.blacklist);
 	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4503c926aca..0030584619c3 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,7 @@ struct test_env {
 	enum verbosity verbosity;
 
 	bool jit_enabled;
+	bool get_test_cnt;
 
 	struct prog_test_def *test;
 	FILE *stdout;
-- 
cgit 


From c1f1f3656eee3a59a40e1805699041ec1c14ab83 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:17 +0200
Subject: selftests/bpf: Test_progs option for listing test names

The program test_progs have some very useful ability to specify a list of
test name substrings for selecting which tests to run.

This patch add the ability to list the selected test names without running
them. This is practical for seeing which tests gets selected with given
select arguments (which can also contain a exclude list via --name-blacklist).

This output can also be used by shell-scripts in a for-loop:

 for N in $(./test_progs --list -t xdp); do \
   ./test_progs -t $N 2>&1 > result_test_${N}.log & \
 done ; wait

This features can also be used for looking up a test number and returning
a testname. If the selection was empty then a shell EXIT_FAILURE is
returned.  This is useful for scripting. e.g. like this:

 n=1;
 while [ $(./test_progs --list -n $n) ] ; do \
   ./test_progs -n $n ; n=$(( n+1 )); \
 done

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363985751.930467.9610992940793316982.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 15 +++++++++++++++
 tools/testing/selftests/bpf/test_progs.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index a5dba14b2025..ef05d2f0e7bb 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -367,6 +367,7 @@ enum ARG_KEYS {
 	ARG_VERIFIER_STATS = 's',
 	ARG_VERBOSE = 'v',
 	ARG_GET_TEST_CNT = 'c',
+	ARG_LIST_TEST_NAMES = 'l',
 };
 
 static const struct argp_option opts[] = {
@@ -382,6 +383,8 @@ static const struct argp_option opts[] = {
 	  "Verbose output (use -vv or -vvv for progressively verbose output)" },
 	{ "count", ARG_GET_TEST_CNT, NULL, 0,
 	  "Get number of selected top-level tests " },
+	{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
+	  "List test names that would run (without running them) " },
 	{},
 };
 
@@ -517,6 +520,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case ARG_GET_TEST_CNT:
 		env->get_test_cnt = true;
 		break;
+	case ARG_LIST_TEST_NAMES:
+		env->list_test_names = true;
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -665,6 +671,12 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (env.list_test_names) {
+			fprintf(env.stdout, "%s\n", test->test_name);
+			env.succ_cnt++;
+			continue;
+		}
+
 		test->run_test();
 		/* ensure last sub-test is finalized properly */
 		if (test->subtest_name)
@@ -694,6 +706,9 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
+	if (env.list_test_names)
+		goto out;
+
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 0030584619c3..ec31f382e7fd 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -67,6 +67,7 @@ struct test_env {
 
 	bool jit_enabled;
 	bool get_test_cnt;
+	bool list_test_names;
 
 	struct prog_test_def *test;
 	FILE *stdout;
-- 
cgit 


From 767659f65000e6a19fee3b9bf6cda2839968329a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 30 Jun 2020 21:24:44 +0200
Subject: selftests: mptcp: add option to specify size of file to transfer

The script generates two random files that are then sent via tcp and
mptcp connections.

In order to compare throughput over consecutive runs add an option
to provide the file size on the command line: "-f 128000".

Also add an option, -t, to enable tcp tests. This is useful to
compare throughput of mptcp connections and tcp connections.

Example: run tests with a 4mb file size, 300ms delay 0.01% loss,
default gso/tso/gro settings and with large write/blocking io:

mptcp_connect.sh -t -f $((4 * 1024 * 1024)) -d 300 -l 0.01%  -r 0 -e "" -m mmap

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 52 ++++++++++++++++------
 1 file changed, 39 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index acf02e156d20..8f7145c413b9 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
 
 time_start=$(date +%s)
 
-optstring="S:R:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:f:t"
 ret=0
 sin=""
 sout=""
@@ -21,6 +21,8 @@ testmode=""
 sndbuf=0
 rcvbuf=0
 options_log=true
+do_tcp=0
+filesize=0
 
 if [ $tc_loss -eq 100 ];then
 	tc_loss=1%
@@ -40,9 +42,11 @@ usage() {
 	echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
 	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
 	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+	echo -e "\t-f: size of file to transfer in bytes (default random)"
 	echo -e "\t-S: set sndbuf value (default: use kernel default)"
 	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
 	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+	echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
 }
 
 while getopts "$optstring" option;do
@@ -94,6 +98,12 @@ while getopts "$optstring" option;do
 	"m")
 		testmode="$OPTARG"
 		;;
+	"f")
+		filesize="$OPTARG"
+		;;
+	"t")
+		do_tcp=$((do_tcp+1))
+		;;
 	"?")
 		usage $0
 		exit 1
@@ -449,20 +459,25 @@ make_file()
 {
 	local name=$1
 	local who=$2
+	local SIZE=$filesize
+	local ksize
+	local rem
 
-	local SIZE TSIZE
-	SIZE=$((RANDOM % (1024 * 8)))
-	TSIZE=$((SIZE * 1024))
+	if [ $SIZE -eq 0 ]; then
+		local MAXSIZE=$((1024 * 1024 * 8))
+		local MINSIZE=$((1024 * 256))
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+		SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
+	fi
 
-	SIZE=$((RANDOM % 1024))
-	SIZE=$((SIZE + 128))
-	TSIZE=$((TSIZE + SIZE))
-	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+	ksize=$((SIZE / 1024))
+	rem=$((SIZE - (ksize * 1024)))
+
+	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
+	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
 
-	echo "Created $name (size $TSIZE) containing data sent by $who"
+	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
 }
 
 run_tests_lo()
@@ -497,9 +512,11 @@ run_tests_lo()
 		return 1
 	fi
 
-	# don't bother testing fallback tcp except for loopback case.
-	if [ ${listener_ns} != ${connector_ns} ]; then
-		return 0
+	if [ $do_tcp -eq 0 ]; then
+		# don't bother testing fallback tcp except for loopback case.
+		if [ ${listener_ns} != ${connector_ns} ]; then
+			return 0
+		fi
 	fi
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
@@ -516,6 +533,15 @@ run_tests_lo()
 		return 1
 	fi
 
+	if [ $do_tcp -gt 1 ] ;then
+		do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+		lret=$?
+		if [ $lret -ne 0 ]; then
+			ret=$lret
+			return 1
+		fi
+	fi
+
 	return 0
 }
 
-- 
cgit 


From 99126abec5e5778583b959cb164f1888374917d3 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 1 Jul 2020 17:48:52 -0700
Subject: bpf: selftests: A few improvements to network_helpers.c

This patch makes a few changes to the network_helpers.c

1) Enforce SO_RCVTIMEO and SO_SNDTIMEO
   This patch enforces timeout to the network fds through setsockopt
   SO_RCVTIMEO and SO_SNDTIMEO.

   It will remove the need for SOCK_NONBLOCK that requires a more demanding
   timeout logic with epoll/select, e.g. epoll_create, epoll_ctrl, and
   then epoll_wait for timeout.

   That removes the need for connect_wait() from the
   cgroup_skb_sk_lookup.c. The needed change is made in
   cgroup_skb_sk_lookup.c.

2) start_server():
   Add optional addr_str and port to start_server().
   That removes the need of the start_server_with_port().  The caller
   can pass addr_str==NULL and/or port==0.

   I have a future tcp-hdr-opt test that will pass a non-NULL addr_str
   and it is in general useful for other future tests.

   "int timeout_ms" is also added to control the timeout
   on the "accept(listen_fd)".

3) connect_to_fd(): Fully use the server_fd.
   The server sock address has already been obtained from
   getsockname(server_fd).  The sockaddr includes the family,
   so the "int family" arg is redundant.

   Since the server address is obtained from server_fd,  there
   is little reason not to get the server's socket type from the
   server_fd also.  getsockopt(server_fd) can be used to do that,
   so "int type" arg is also removed.

   "int timeout_ms" is added.

4) connect_fd_to_fd():
   "int timeout_ms" is added.
   Some code is also refactored to connect_fd_to_addr() which is
   shared with connect_to_fd().

5) Preserve errno:
   Some callers need to check errno, e.g. cgroup_skb_sk_lookup.c.
   Make changes to do it more consistently in save_errno_close()
   and log_err().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200702004852.2103003-1-kafai@fb.com
---
 tools/testing/selftests/bpf/network_helpers.c      | 157 ++++++++++++---------
 tools/testing/selftests/bpf/network_helpers.h      |   9 +-
 .../bpf/prog_tests/cgroup_skb_sk_lookup.c          |  12 +-
 .../selftests/bpf/prog_tests/connect_force_port.c  |  10 +-
 .../selftests/bpf/prog_tests/load_bytes_relative.c |   4 +-
 tools/testing/selftests/bpf/prog_tests/tcp_rtt.c   |   4 +-
 6 files changed, 110 insertions(+), 86 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e36dd1a1780d..acd08715be2e 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -7,8 +7,6 @@
 
 #include <arpa/inet.h>
 
-#include <sys/epoll.h>
-
 #include <linux/err.h>
 #include <linux/in.h>
 #include <linux/in6.h>
@@ -17,8 +15,13 @@
 #include "network_helpers.h"
 
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
-	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_err(MSG, ...) ({						\
+			int __save = errno;				\
+			fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+				__FILE__, __LINE__, clean_errno(),	\
+				##__VA_ARGS__);				\
+			errno = __save;					\
+})
 
 struct ipv4_packet pkt_v4 = {
 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -37,7 +40,34 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-int start_server_with_port(int family, int type, __u16 port)
+static int settimeo(int fd, int timeout_ms)
+{
+	struct timeval timeout = { .tv_sec = 3 };
+
+	if (timeout_ms > 0) {
+		timeout.tv_sec = timeout_ms / 1000;
+		timeout.tv_usec = (timeout_ms % 1000) * 1000;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+		       sizeof(timeout))) {
+		log_err("Failed to set SO_RCVTIMEO");
+		return -1;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+		       sizeof(timeout))) {
+		log_err("Failed to set SO_SNDTIMEO");
+		return -1;
+	}
+
+	return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+		 int timeout_ms)
 {
 	struct sockaddr_storage addr = {};
 	socklen_t len;
@@ -48,120 +78,119 @@ int start_server_with_port(int family, int type, __u16 port)
 
 		sin->sin_family = AF_INET;
 		sin->sin_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+			log_err("inet_pton(AF_INET, %s)", addr_str);
+			return -1;
+		}
 		len = sizeof(*sin);
 	} else {
 		struct sockaddr_in6 *sin6 = (void *)&addr;
 
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+			log_err("inet_pton(AF_INET6, %s)", addr_str);
+			return -1;
+		}
 		len = sizeof(*sin6);
 	}
 
-	fd = socket(family, type | SOCK_NONBLOCK, 0);
+	fd = socket(family, type, 0);
 	if (fd < 0) {
 		log_err("Failed to create server socket");
 		return -1;
 	}
 
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
 	if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
 		log_err("Failed to bind socket");
-		close(fd);
-		return -1;
+		goto error_close;
 	}
 
 	if (type == SOCK_STREAM) {
 		if (listen(fd, 1) < 0) {
 			log_err("Failed to listed on socket");
-			close(fd);
-			return -1;
+			goto error_close;
 		}
 	}
 
 	return fd;
-}
 
-int start_server(int family, int type)
-{
-	return start_server_with_port(family, type, 0);
+error_close:
+	save_errno_close(fd);
+	return -1;
 }
 
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
-
-int connect_to_fd(int family, int type, int server_fd)
+static int connect_fd_to_addr(int fd,
+			      const struct sockaddr_storage *addr,
+			      socklen_t addrlen)
 {
-	int fd, save_errno;
-
-	fd = socket(family, type, 0);
-	if (fd < 0) {
-		log_err("Failed to create client socket");
+	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
+		log_err("Failed to connect to server");
 		return -1;
 	}
 
-	if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
-		save_errno = errno;
-		close(fd);
-		errno = save_errno;
-		return -1;
-	}
-
-	return fd;
+	return 0;
 }
 
-int connect_fd_to_fd(int client_fd, int server_fd)
+int connect_to_fd(int server_fd, int timeout_ms)
 {
 	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int save_errno;
+	struct sockaddr_in *addr_in;
+	socklen_t addrlen, optlen;
+	int fd, type;
 
-	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
-		       timeo_optlen)) {
-		log_err("Failed to set SO_RCVTIMEO");
+	optlen = sizeof(type);
+	if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+		log_err("getsockopt(SOL_TYPE)");
 		return -1;
 	}
 
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+	addrlen = sizeof(addr);
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
 		log_err("Failed to get server addr");
 		return -1;
 	}
 
-	if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
-		if (errno != EINPROGRESS) {
-			save_errno = errno;
-			log_err("Failed to connect to server");
-			errno = save_errno;
-		}
+	addr_in = (struct sockaddr_in *)&addr;
+	fd = socket(addr_in->sin_family, type, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
 		return -1;
 	}
 
-	return 0;
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
+	if (connect_fd_to_addr(fd, &addr, addrlen))
+		goto error_close;
+
+	return fd;
+
+error_close:
+	save_errno_close(fd);
+	return -1;
 }
 
-int connect_wait(int fd)
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
 {
-	struct epoll_event ev = {}, events[2];
-	int timeout_ms = 1000;
-	int efd, nfd;
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
 
-	efd = epoll_create1(EPOLL_CLOEXEC);
-	if (efd < 0) {
-		log_err("Failed to open epoll fd");
+	if (settimeo(client_fd, timeout_ms))
 		return -1;
-	}
-
-	ev.events = EPOLLRDHUP | EPOLLOUT;
-	ev.data.fd = fd;
 
-	if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
-		log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
-		close(efd);
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
 		return -1;
 	}
 
-	nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
-	if (nfd < 0)
-		log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+	if (connect_fd_to_addr(client_fd, &addr, len))
+		return -1;
 
-	close(efd);
-	return nfd;
+	return 0;
 }
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 6a8009605670..f580e82fda58 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,10 +33,9 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
-int start_server(int family, int type);
-int start_server_with_port(int family, int type, __u16 port);
-int connect_to_fd(int family, int type, int server_fd);
-int connect_fd_to_fd(int client_fd, int server_fd);
-int connect_wait(int client_fd);
+int start_server(int family, int type, const char *addr, __u16 port,
+		 int timeout_ms);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 059047af7df3..464edc1c1708 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
 	socklen_t addr_len = sizeof(addr);
 	__u32 duration = 0;
 
-	serv_sk = start_server(AF_INET6, SOCK_STREAM);
+	serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
 		return;
 
@@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
 	*g_serv_port = addr.sin6_port;
 
 	/* Client outside of test cgroup should fail to connect by timeout. */
-	err = connect_fd_to_fd(out_sk, serv_sk);
+	err = connect_fd_to_fd(out_sk, serv_sk, 1000);
 	if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
 		  "unexpected result err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = connect_wait(out_sk);
-	if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
-		goto cleanup;
-
 	/* Client inside test cgroup should connect just fine. */
-	in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+	in_sk = connect_to_fd(serv_sk, 0);
 	if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
 		goto cleanup;
 
@@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void)
 	 * differs from that of testing cgroup. Moving selftests process to
 	 * testing cgroup won't change cgroup id of an already created socket.
 	 */
-	out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	out_sk = socket(AF_INET6, SOCK_STREAM, 0);
 	if (CHECK_FAIL(out_sk < 0))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 17bbf76812ca..9229db2f5ca5 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
 		goto close_bpf_object;
 	}
 
-	fd = connect_to_fd(family, type, server_fd);
+	fd = connect_to_fd(server_fd, 0);
 	if (fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -137,25 +137,25 @@ void test_connect_force_port(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+	server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+	server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index c1168e4a9036..5a2a689dbb68 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -23,7 +23,7 @@ void test_load_bytes_relative(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server(AF_INET, SOCK_STREAM);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
@@ -49,7 +49,7 @@ void test_load_bytes_relative(void)
 	if (CHECK_FAIL(err))
 		goto close_bpf_object;
 
-	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+	client_fd = connect_to_fd(server_fd, 0);
 	if (CHECK_FAIL(client_fd < 0))
 		goto close_bpf_object;
 	close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 9013a0c01eed..d207e968e6b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
 		goto close_bpf_object;
 	}
 
-	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+	client_fd = connect_to_fd(server_fd, 0);
 	if (client_fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -161,7 +161,7 @@ void test_tcp_rtt(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server(AF_INET, SOCK_STREAM);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
-- 
cgit 


From 811d7e375d08312dba23f3b6bf7e58ec14aa5dcb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 1 Jul 2020 17:48:58 -0700
Subject: bpf: selftests: Restore netns after each test

It is common for networking tests creating its netns and making its own
setting under this new netns (e.g. changing tcp sysctl).  If the test
forgot to restore to the original netns, it would affect the
result of other tests.

This patch saves the original netns at the beginning and then restores it
after every test.  Since the restore "setns()" is not expensive, it does it
on all tests without tracking if a test has created a new netns or not.

The new restore_netns() could also be done in test__end_subtest() such
that each subtest will get an automatic netns reset.  However,
the individual test would lose flexibility to have total control
on netns for its own subtests.  In some cases, forcing a test to do
unnecessary netns re-configure for each subtest is time consuming.
e.g. In my vm, forcing netns re-configure on each subtest in sk_assign.c
increased the runtime from 1s to 8s.  On top of that,  test_progs.c
is also doing per-test (instead of per-subtest) cleanup for cgroup.
Thus, this patch also does per-test restore_netns().  The only existing
per-subtest cleanup is reset_affinity() and no test is depending on this.
Thus, it is removed from test__end_subtest() to give a consistent
expectation to the individual tests.  test_progs.c only ensures
any affinity/netns/cgroup change made by an earlier test does not
affect the following tests.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200702004858.2103728-1-kafai@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 23 +++++++++++++++++++++--
 tools/testing/selftests/bpf/test_progs.h |  2 ++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index ef05d2f0e7bb..104e833d0087 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -121,6 +121,24 @@ static void reset_affinity() {
 	}
 }
 
+static void save_netns(void)
+{
+	env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (env.saved_netns_fd == -1) {
+		perror("open(/proc/self/ns/net)");
+		exit(-1);
+	}
+}
+
+static void restore_netns(void)
+{
+	if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+		stdio_restore();
+		perror("setns(CLONE_NEWNS)");
+		exit(-1);
+	}
+}
+
 void test__end_subtest()
 {
 	struct prog_test_def *test = env.test;
@@ -138,8 +156,6 @@ void test__end_subtest()
 	       test->test_num, test->subtest_num,
 	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
 
-	reset_affinity();
-
 	free(test->subtest_name);
 	test->subtest_name = NULL;
 }
@@ -655,6 +671,7 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+	save_netns();
 	stdio_hijack();
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
@@ -696,6 +713,7 @@ int main(int argc, char **argv)
 			test->error_cnt ? "FAIL" : "OK");
 
 		reset_affinity();
+		restore_netns();
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
@@ -719,6 +737,7 @@ out:
 	free_str_set(&env.subtest_selector.blacklist);
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
+	close(env.saved_netns_fd);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_FAILURE;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index ec31f382e7fd..6e09bf738473 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -80,6 +80,8 @@ struct test_env {
 	int sub_succ_cnt; /* successful sub-tests */
 	int fail_cnt; /* total failed tests + sub-tests */
 	int skip_cnt; /* skipped tests */
+
+	int saved_netns_fd;
 };
 
 extern struct test_env env;
-- 
cgit 


From 8ae4121bd89e3dce27b519ed469efbc15423af18 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 2 Jul 2020 21:31:59 -0700
Subject: bpf: Fix bpftool without skeleton code enabled

Fix segfault from bpftool by adding emit_obj_refs_plain when skeleton
code is disabled.

Tested by deleting BUILD_BPF_SKELS in Makefile. We found this doing
backports for Cilium when a testing image pulled in latest bpf-next
bpftool, but kept using an older clang-7.

  # ./bpftool prog show
  Error: bpftool built without PID iterator support
  3: cgroup_skb  tag 7be49e3934a125ba  gpl
          loaded_at 2020-07-01T08:01:29-0700  uid 0
  Segmentation fault

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Reported-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/159375071997.14984.17404504293832961401.stgit@john-XPS-13-9370
---
 tools/bpf/bpftool/pids.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 2709be4de2b1..7d5416667c85 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -19,6 +19,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 	return -ENOTSUP;
 }
 void delete_obj_refs_table(struct obj_refs_table *table) {}
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
 
 #else /* BPFTOOL_WITHOUT_SKELETONS */
 
-- 
cgit 


From 9ff79af3331277c69ac61cc75b2392eb3284e305 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 3 Jul 2020 11:17:19 -0700
Subject: selftests/bpf: Fix compilation error of bpf_iter_task_stack.c

BPF selftests show a compilation error as follows:

  libbpf: invalid relo for 'entries' in special section 0xfff2; forgot to
  initialize global var?..

Fix it by initializing 'entries' to zeros.

Fixes: c7568114bc56 ("selftests/bpf: Add bpf_iter test with bpf_get_task_stack()")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200703181719.3747072-1-songliubraving@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index e40d32a2ed93..50e59a2e142e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -7,7 +7,7 @@
 char _license[] SEC("license") = "GPL";
 
 #define MAX_STACK_TRACE_DEPTH   64
-unsigned long entries[MAX_STACK_TRACE_DEPTH];
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
 #define SIZE_OF_ULONG (sizeof(unsigned long))
 
 SEC("iter/task")
-- 
cgit 


From b0d754ef35041f921a1a03319b570fdfac9a027e Mon Sep 17 00:00:00 2001
From: tannerlove <tannerlove@google.com>
Date: Fri, 3 Jul 2020 14:53:06 -0400
Subject: selftests/net: add ipv6 test coverage in rxtimestamp test

Add the options --ipv4, --ipv6 to specify running over ipv4 and/or
ipv6. If neither is specified, then run both.

Signed-off-by: Tanner Love <tannerlove@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rxtimestamp.c | 85 +++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index d4ea86a13e52..c599d371cbbe 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -117,6 +117,8 @@ static struct option long_options[] = {
 	{ "udp", no_argument, 0, 'u' },
 	{ "ip", no_argument, 0, 'i' },
 	{ "strict", no_argument, 0, 'S' },
+	{ "ipv4", no_argument, 0, '4' },
+	{ "ipv6", no_argument, 0, '6' },
 	{ NULL, 0, NULL, 0 },
 };
 
@@ -272,37 +274,55 @@ void config_so_flags(int rcv, struct options o)
 		error(1, errno, "Failed to set SO_TIMESTAMPING");
 }
 
-bool run_test_case(struct socket_type s, struct test_case t)
+bool run_test_case(struct socket_type *s, int test_num, char ip_version,
+		   bool strict)
 {
-	int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+	union {
+		struct sockaddr_in6 addr6;
+		struct sockaddr_in addr4;
+		struct sockaddr addr_un;
+	} addr;
 	int read_size = op_size;
-	struct sockaddr_in addr;
+	int src, dst, rcv, port;
+	socklen_t addr_size;
 	bool failed = false;
-	int src, dst, rcv;
 
-	src = socket(AF_INET, s.type, s.protocol);
+	port = (s->type == SOCK_RAW) ? 0 : next_port++;
+	memset(&addr, 0, sizeof(addr));
+	if (ip_version == '4') {
+		addr.addr4.sin_family = AF_INET;
+		addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr.addr4.sin_port = htons(port);
+		addr_size = sizeof(addr.addr4);
+		if (s->type == SOCK_RAW)
+			read_size += 20;  /* for IPv4 header */
+	} else {
+		addr.addr6.sin6_family = AF_INET6;
+		addr.addr6.sin6_addr = in6addr_loopback;
+		addr.addr6.sin6_port = htons(port);
+		addr_size = sizeof(addr.addr6);
+	}
+	printf("Starting testcase %d over ipv%c...\n", test_num, ip_version);
+	src = socket(addr.addr_un.sa_family, s->type,
+		     s->protocol);
 	if (src < 0)
 		error(1, errno, "Failed to open src socket");
 
-	dst = socket(AF_INET, s.type, s.protocol);
+	dst = socket(addr.addr_un.sa_family, s->type,
+		     s->protocol);
 	if (dst < 0)
 		error(1, errno, "Failed to open dst socket");
 
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	addr.sin_port = htons(port);
-
-	if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+	if (bind(dst, &addr.addr_un, addr_size) < 0)
 		error(1, errno, "Failed to bind to port %d", port);
 
-	if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+	if (s->type == SOCK_STREAM && (listen(dst, 1) < 0))
 		error(1, errno, "Failed to listen");
 
-	if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+	if (connect(src, &addr.addr_un, addr_size) < 0)
 		error(1, errno, "Failed to connect");
 
-	if (s.type == SOCK_STREAM) {
+	if (s->type == SOCK_STREAM) {
 		rcv = accept(dst, NULL, NULL);
 		if (rcv < 0)
 			error(1, errno, "Failed to accept");
@@ -311,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t)
 		rcv = dst;
 	}
 
-	config_so_flags(rcv, t.sockopt);
+	config_so_flags(rcv, test_cases[test_num].sockopt);
 	usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
 	do_send(src);
 
-	if (s.type == SOCK_RAW)
-		read_size += 20;  /* for IP header */
-	failed = do_recv(rcv, read_size, t.expected);
+	failed = do_recv(rcv, read_size, test_cases[test_num].expected);
 
 	close(rcv);
 	close(src);
 
+	if (failed) {
+		printf("FAILURE in testcase %d over ipv%c ", test_num,
+		       ip_version);
+		print_test_case(&test_cases[test_num]);
+		if (!strict && test_cases[test_num].warn_on_fail)
+			failed = false;
+	}
 	return failed;
 }
 
@@ -329,6 +354,8 @@ int main(int argc, char **argv)
 {
 	bool all_protocols = true;
 	bool all_tests = true;
+	bool cfg_ipv4 = false;
+	bool cfg_ipv6 = false;
 	bool strict = false;
 	int arg_index = 0;
 	int failures = 0;
@@ -369,6 +396,12 @@ int main(int argc, char **argv)
 		case 'S':
 			strict = true;
 			break;
+		case '4':
+			cfg_ipv4 = true;
+			break;
+		case '6':
+			cfg_ipv6 = true;
+			break;
 		default:
 			error(1, 0, "Failed to parse parameters.");
 		}
@@ -382,14 +415,14 @@ int main(int argc, char **argv)
 		for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
 			if (!all_tests && !test_cases[t].enabled)
 				continue;
-
-			printf("Starting testcase %d...\n", t);
-			if (run_test_case(socket_types[s], test_cases[t])) {
-				if (strict || !test_cases[t].warn_on_fail)
+			if (cfg_ipv4 || !cfg_ipv6)
+				if (run_test_case(&socket_types[s], t, '4',
+						  strict))
+					failures++;
+			if (cfg_ipv6 || !cfg_ipv4)
+				if (run_test_case(&socket_types[s], t, '6',
+						  strict))
 					failures++;
-				printf("FAILURE in test case ");
-				print_test_case(&test_cases[t]);
-			}
 		}
 	}
 	if (!failures)
-- 
cgit 


From f551e2fdaf81b7b561bb5dc590da13f21c4c1295 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Sat, 4 Jul 2020 16:45:14 -0400
Subject: selftests/net: update initializer syntax to use c99 designators

Before, clang version 9 threw errors such as: error:
use of GNU old-style field designator extension [-Werror,-Wgnu-designator]
                { tstamp: true, swtstamp: true }
                  ^~~~~~~
                  .tstamp =
Fix these warnings in tools/testing/selftests/net in the same manner as
commit 121e357ac728 ("selftests/harness: Update named initializer syntax").
N.B. rxtimestamp.c is the only affected file in the directory.

Signed-off-by: Tanner Love <tannerlove@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rxtimestamp.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index c599d371cbbe..221fdece47d4 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -68,44 +68,44 @@ static struct socket_type socket_types[] = {
 static struct test_case test_cases[] = {
 	{ {}, {} },
 	{
-		{ so_timestamp: 1 },
-		{ tstamp: true }
+		{ .so_timestamp = 1 },
+		{ .tstamp = true }
 	},
 	{
-		{ so_timestampns: 1 },
-		{ tstampns: true }
+		{ .so_timestampns = 1 },
+		{ .tstampns = true }
 	},
 	{
-		{ so_timestamp: 1, so_timestampns: 1 },
-		{ tstampns: true }
+		{ .so_timestamp = 1, .so_timestampns = 1 },
+		{ .tstampns = true }
 	},
 	{
-		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE },
 		{}
 	},
 	{
 		/* Loopback device does not support hw timestamps. */
-		{ so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+		{ .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE },
 		{}
 	},
 	{
-		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
-		warn_on_fail : true
+		{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE },
+		.warn_on_fail = true
 	},
 	{
-		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+		{ .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE
 			| SOF_TIMESTAMPING_RX_HARDWARE },
 		{}
 	},
 	{
-		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+		{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
 			| SOF_TIMESTAMPING_RX_SOFTWARE },
-		{ swtstamp: true }
+		{ .swtstamp = true }
 	},
 	{
-		{ so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+		{ .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
 			| SOF_TIMESTAMPING_RX_SOFTWARE },
-		{ tstamp: true, swtstamp: true }
+		{ .tstamp = true, .swtstamp = true }
 	},
 };
 
-- 
cgit 


From 0b8241fe3c4e172332f9fa690db6689e78e8e27f Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Mon, 6 Jul 2020 14:44:08 +0200
Subject: selftests: mptcp: capture pcap on both sides

When investigating performance issues that involve latency / loss /
reordering it is useful to have the pcap from the sender-side as it
allows to easier infer the state of the sender's congestion-control,
loss-recovery, etc.

Allow the selftests to capture a pcap on both sender and receiver so
that this information is not lost when reproducing.

This patch also improves the file names. Instead of:

  ns4-5ee79a56-X4O6gS-ns3-5ee79a56-X4O6gS-MPTCP-MPTCP-10.0.3.1.pcap

We now have something like for the same test:

  5ee79a56-X4O6gS-ns3-ns4-MPTCP-MPTCP-10.0.3.1-10030-connector.pcap
  5ee79a56-X4O6gS-ns3-ns4-MPTCP-MPTCP-10.0.3.1-10030-listener.pcap

It was a connection from ns3 to ns4, better to start with ns3 then. The
port is also added, easier to find the trace we want.

Co-developed-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 8f7145c413b9..c0589e071f20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -395,10 +395,14 @@ do_transfer()
 			capuser="-Z $SUDO_USER"
 		fi
 
-		local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+		local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
+		local capopt="-i any -s 65535 -B 32768 ${capuser}"
 
-		ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
-		local cappid=$!
+		ip netns exec ${listener_ns}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
+		local cappid_listener=$!
+
+		ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+		local cappid_connector=$!
 
 		sleep 1
 	fi
@@ -423,7 +427,8 @@ do_transfer()
 
 	if $capture; then
 		sleep 1
-		kill $cappid
+		kill ${cappid_listener}
+		kill ${cappid_connector}
 	fi
 
 	local duration
-- 
cgit 


From e8b012e9fabe7eafc1b2c72414e174547683860d Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 6 Jul 2020 16:01:26 -0700
Subject: libbpf: Add support for BPF_CGROUP_INET_SOCK_RELEASE

Add auto-detection for the cgroup/sock_release programs.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200706230128.4073544-3-sdf@google.com
---
 tools/include/uapi/linux/bpf.h | 1 +
 tools/lib/bpf/libbpf.c         | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index da9bf35a26f8..548a749aebb3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -226,6 +226,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
 	BPF_XDP_DEVMAP,
+	BPF_CGROUP_INET_SOCK_RELEASE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4ea7f4f1a691..88a483627a2b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6917,6 +6917,10 @@ static const struct bpf_sec_def section_defs[] = {
 	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
 						BPF_CGROUP_INET_EGRESS),
 	BPF_APROG_COMPAT("cgroup/skb",		BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_EAPROG_SEC("cgroup/sock_create",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_CREATE),
+	BPF_EAPROG_SEC("cgroup/sock_release",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET_SOCK_RELEASE),
 	BPF_APROG_SEC("cgroup/sock",		BPF_PROG_TYPE_CGROUP_SOCK,
 						BPF_CGROUP_INET_SOCK_CREATE),
 	BPF_EAPROG_SEC("cgroup/post_bind4",	BPF_PROG_TYPE_CGROUP_SOCK,
-- 
cgit 


From db94cc0b4805968a0357ed2507730cdf77adf174 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 6 Jul 2020 16:01:27 -0700
Subject: bpftool: Add support for BPF_CGROUP_INET_SOCK_RELEASE

Support attaching to BPF_CGROUP_INET_SOCK_RELEASE and properly
display attach type upon prog dump.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200706230128.4073544-4-sdf@google.com
---
 tools/bpf/bpftool/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 18e5604fe260..29f4e7611ae8 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -33,6 +33,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_CGROUP_INET_INGRESS]	= "ingress",
 	[BPF_CGROUP_INET_EGRESS]	= "egress",
 	[BPF_CGROUP_INET_SOCK_CREATE]	= "sock_create",
+	[BPF_CGROUP_INET_SOCK_RELEASE]	= "sock_release",
 	[BPF_CGROUP_SOCK_OPS]		= "sock_ops",
 	[BPF_CGROUP_DEVICE]		= "device",
 	[BPF_CGROUP_INET4_BIND]		= "bind4",
-- 
cgit 


From 65ffd797861a44ff97081de1db01e4aef716ed46 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 6 Jul 2020 16:01:28 -0700
Subject: selftests/bpf: Test BPF_CGROUP_INET_SOCK_RELEASE

Simple test that enforces a single SOCK_DGRAM socket per cgroup.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200706230128.4073544-5-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/udp_limit.c | 75 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/udp_limit.c      | 42 ++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/udp_limit.c
 create mode 100644 tools/testing/selftests/bpf/progs/udp_limit.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
new file mode 100644
index 000000000000..2aba09d4d01b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "udp_limit.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+static int duration;
+
+void test_udp_limit(void)
+{
+	struct udp_limit *skel;
+	int fd1 = -1, fd2 = -1;
+	int cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/udp_limit");
+	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+		return;
+
+	skel = udp_limit__open_and_load();
+	if (CHECK(!skel, "skel-load", "errno %d", errno))
+		goto close_cgroup_fd;
+
+	skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+	skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
+	if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
+		  "cg-attach", "sock %ld sock_release %ld",
+		  PTR_ERR(skel->links.sock),
+		  PTR_ERR(skel->links.sock_release)))
+		goto close_skeleton;
+
+	/* BPF program enforces a single UDP socket per cgroup,
+	 * verify that.
+	 */
+	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+	if (CHECK(fd1 < 0, "fd1", "errno %d", errno))
+		goto close_skeleton;
+
+	fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+	if (CHECK(fd2 >= 0, "fd2", "errno %d", errno))
+		goto close_skeleton;
+
+	/* We can reopen again after close. */
+	close(fd1);
+	fd1 = -1;
+
+	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+	if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno))
+		goto close_skeleton;
+
+	/* Make sure the program was invoked the expected
+	 * number of times:
+	 * - open fd1           - BPF_CGROUP_INET_SOCK_CREATE
+	 * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE
+	 * - close fd1          - BPF_CGROUP_INET_SOCK_RELEASE
+	 * - open fd1 again     - BPF_CGROUP_INET_SOCK_CREATE
+	 */
+	if (CHECK(skel->bss->invocations != 4, "bss-invocations",
+		  "invocations=%d", skel->bss->invocations))
+		goto close_skeleton;
+
+	/* We should still have a single socket in use */
+	if (CHECK(skel->bss->in_use != 1, "bss-in_use",
+		  "in_use=%d", skel->bss->in_use))
+		goto close_skeleton;
+
+close_skeleton:
+	if (fd1 >= 0)
+		close(fd1);
+	if (fd2 >= 0)
+		close(fd2);
+	udp_limit__destroy(skel);
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
new file mode 100644
index 000000000000..8429b22525a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/socket.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int invocations = 0, in_use = 0;
+
+SEC("cgroup/sock_create")
+int sock(struct bpf_sock *ctx)
+{
+	__u32 key;
+
+	if (ctx->type != SOCK_DGRAM)
+		return 1;
+
+	__sync_fetch_and_add(&invocations, 1);
+
+	if (in_use > 0) {
+		/* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called
+		 * when we return an error from the BPF
+		 * program!
+		 */
+		return 0;
+	}
+
+	__sync_fetch_and_add(&in_use, 1);
+	return 1;
+}
+
+SEC("cgroup/sock_release")
+int sock_release(struct bpf_sock *ctx)
+{
+	__u32 key;
+
+	if (ctx->type != SOCK_DGRAM)
+		return 1;
+
+	__sync_fetch_and_add(&invocations, 1);
+	__sync_fetch_and_add(&in_use, -1);
+	return 1;
+}
-- 
cgit 


From 5cfd607b49db2b7c05fccc2915a528d97bc8f17d Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Wed, 8 Jul 2020 03:48:55 +0900
Subject: selftests: bpf: Remove unused bpf_map_def_legacy struct

samples/bpf no longer use bpf_map_def_legacy and instead use the
libbpf's bpf_map_def or new BTF-defined MAP format. This commit removes
unused bpf_map_def_legacy struct from selftests/bpf/bpf_legacy.h.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200707184855.30968-5-danieltimlee@gmail.com
---
 tools/testing/selftests/bpf/bpf_legacy.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 6f8988738bc1..719ab56cdb5d 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,20 +2,6 @@
 #ifndef __BPF_LEGACY__
 #define __BPF_LEGACY__
 
-/*
- * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
- * use outside of samples/bpf
- */
-struct bpf_map_def_legacy {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-	unsigned int inner_map_idx;
-	unsigned int numa_node;
-};
-
 #define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
 	struct ____btf_map_##name {				\
 		type_key key;					\
-- 
cgit 


From 625eb8e85e913273cd9441329a82b4e3496b30cd Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@netronome.com>
Date: Wed, 8 Jul 2020 13:08:27 +0200
Subject: bpf: Fix another bpftool segfault without skeleton code enabled

emit_obj_refs_json needs to added the same as with emit_obj_refs_plain
to prevent segfaults, similar to Commit "8ae4121bd89e bpf: Fix bpftool
without skeleton code enabled"). See the error below:

    # ./bpftool -p prog
    {
        "error": "bpftool built without PID iterator support"
    },[{
            "id": 2,
            "type": "cgroup_skb",
            "tag": "7be49e3934a125ba",
            "gpl_compatible": true,
            "loaded_at": 1594052789,
            "uid": 0,
            "bytes_xlated": 296,
            "jited": true,
            "bytes_jited": 203,
            "bytes_memlock": 4096,
            "map_ids": [2,3
    Segmentation fault (core dumped)

The same happens for ./bpftool -p map, as well as ./bpftool -j prog/map.

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200708110827.7673-1-louis.peens@netronome.com
---
 tools/bpf/bpftool/pids.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 7d5416667c85..c0d23ce4a6f4 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -20,6 +20,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 }
 void delete_obj_refs_table(struct obj_refs_table *table) {}
 void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {}
 
 #else /* BPFTOOL_WITHOUT_SKELETONS */
 
-- 
cgit 


From 3220fb667842a9725cbb71656f406eadb03c094b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 7 Jul 2020 09:12:19 +0200
Subject: selftests/bpf: test_progs use another shell exit on non-actions

This is a follow up adjustment to commit 6c92bd5cd465 ("selftests/bpf:
Test_progs indicate to shell on non-actions"), that returns shell exit
indication EXIT_FAILURE (value 1) when user selects a non-existing test.

The problem with using EXIT_FAILURE is that a shell script cannot tell
the difference between a non-existing test and the test failing.

This patch uses value 2 as shell exit indication.
(Aside note unrecognized option parameters use value 64).

Fixes: 6c92bd5cd465 ("selftests/bpf: Test_progs indicate to shell on non-actions")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159410593992.1093222.90072558386094370.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 104e833d0087..65d3f8686e29 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -12,6 +12,8 @@
 #include <string.h>
 #include <execinfo.h> /* backtrace */
 
+#define EXIT_NO_TEST		2
+
 /* defined in test_progs.h */
 struct test_env env = {};
 
@@ -740,7 +742,7 @@ out:
 	close(env.saved_netns_fd);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
-		return EXIT_FAILURE;
+		return EXIT_NO_TEST;
 
 	return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
 }
-- 
cgit 


From b8c50df0cb3eb9008f8372e4ff0317eee993b8d1 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 7 Jul 2020 09:12:25 +0200
Subject: selftests/bpf: test_progs avoid minus shell exit codes

There are a number of places in test_progs that use minus-1 as the argument
to exit(). This is confusing as a process exit status is masked to be a
number between 0 and 255 as defined in man exit(3). Thus, users will see
status 255 instead of minus-1.

This patch use positive exit code 3 instead of minus-1. These cases are put
in the same group of infrastructure setup errors.

Fixes: fd27b1835e70 ("selftests/bpf: Reset process and thread affinity after each test/sub-test")
Fixes: 811d7e375d08 ("bpf: selftests: Restore netns after each test")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159410594499.1093222.11080787853132708654.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 65d3f8686e29..b1e4dadacd9b 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -13,6 +13,7 @@
 #include <execinfo.h> /* backtrace */
 
 #define EXIT_NO_TEST		2
+#define EXIT_ERR_SETUP_INFRA	3
 
 /* defined in test_progs.h */
 struct test_env env = {};
@@ -113,13 +114,13 @@ static void reset_affinity() {
 	if (err < 0) {
 		stdio_restore();
 		fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
-		exit(-1);
+		exit(EXIT_ERR_SETUP_INFRA);
 	}
 	err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
 	if (err < 0) {
 		stdio_restore();
 		fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
-		exit(-1);
+		exit(EXIT_ERR_SETUP_INFRA);
 	}
 }
 
@@ -128,7 +129,7 @@ static void save_netns(void)
 	env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
 	if (env.saved_netns_fd == -1) {
 		perror("open(/proc/self/ns/net)");
-		exit(-1);
+		exit(EXIT_ERR_SETUP_INFRA);
 	}
 }
 
@@ -137,7 +138,7 @@ static void restore_netns(void)
 	if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
 		stdio_restore();
 		perror("setns(CLONE_NEWNS)");
-		exit(-1);
+		exit(EXIT_ERR_SETUP_INFRA);
 	}
 }
 
-- 
cgit 


From bfc96656a766bd0566565d8a7b6232b2b036fdfb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:13 -0700
Subject: libbpf: Make BTF finalization strict

With valid ELF and valid BTF, there is no reason (apart from bugs) why BTF
finalization should fail. So make it strict and return error if it fails. This
makes CO-RE relocation more reliable, as they are not going to be just
silently skipped, if BTF finalization failed.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 88a483627a2b..efd857ebd5ee 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2473,19 +2473,11 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 		return 0;
 
 	err = btf__finalize_data(obj, obj->btf);
-	if (!err)
-		return 0;
-
-	pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
-	btf__free(obj->btf);
-	obj->btf = NULL;
-	btf_ext__free(obj->btf_ext);
-	obj->btf_ext = NULL;
-
-	if (libbpf_needs_btf(obj)) {
-		pr_warn("BTF is required, but is missing or corrupted.\n");
-		return -ENOENT;
+	if (err) {
+		pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+		return err;
 	}
+
 	return 0;
 }
 
-- 
cgit 


From 81372e121802fd57892a0b44d93cc747d9568627 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:14 -0700
Subject: libbpf: Add btf__set_fd() for more control over loaded BTF FD

Add setter for BTF FD to allow application more fine-grained control in more
advanced scenarios. Storing BTF FD inside `struct btf` provides little benefit
and probably would be better done differently (e.g., btf__load() could just
return FD on success), but we are stuck with this due to backwards
compatibility. The main problem is that it's impossible to load BTF and than
free user-space memory, but keep FD intact, because `struct btf` assumes
ownership of that FD upon successful load and will attempt to close it during
btf__free(). To allow callers (e.g., libbpf itself for BTF sanitization) to
have more control over this, add btf__set_fd() to allow to reset FD
arbitrarily, if necessary.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-3-andriin@fb.com
---
 tools/lib/bpf/btf.c      | 7 ++++++-
 tools/lib/bpf/btf.h      | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index bfef3d606b54..c8861c9e3635 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -389,7 +389,7 @@ void btf__free(struct btf *btf)
 	if (!btf)
 		return;
 
-	if (btf->fd != -1)
+	if (btf->fd >= 0)
 		close(btf->fd);
 
 	free(btf->data);
@@ -700,6 +700,11 @@ int btf__fd(const struct btf *btf)
 	return btf->fd;
 }
 
+void btf__set_fd(struct btf *btf, int fd)
+{
+	btf->fd = fd;
+}
+
 const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
 {
 	*size = btf->data_size;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 06cd1731c154..173eff23c472 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -79,6 +79,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
 LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 6544d2cd1ed6..c5d5c7664c3b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -288,4 +288,5 @@ LIBBPF_0.1.0 {
 		bpf_map__value_size;
 		bpf_program__autoload;
 		bpf_program__set_autoload;
+		btf__set_fd;
 } LIBBPF_0.0.9;
-- 
cgit 


From 0f0e55d8247c0742a9b026fc097fcc605383b9db Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:15 -0700
Subject: libbpf: Improve BTF sanitization handling

Change sanitization process to preserve original BTF, which might be used by
libbpf itself for Kconfig externs, CO-RE relocs, etc, even if kernel is old
and doesn't support BTF. To achieve that, if libbpf detects the need for BTF
sanitization, it would clone original BTF, sanitize it in-place, attempt to
load it into kernel, and if successful, will preserve loaded BTF FD in
original `struct btf`, while freeing sanitized local copy.

If kernel doesn't support any BTF, original btf and btf_ext will still be
preserved to be used later for CO-RE relocation and other BTF-dependent libbpf
features, which don't dependon kernel BTF support.

Patch takes care to not specify BTF and BTF.ext features when loading BPF
programs and/or maps, if it was detected that kernel doesn't support BTF
features.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-4-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 103 ++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 45 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index efd857ebd5ee..460e5790a95f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2338,18 +2338,23 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx)
 	return false;
 }
 
-static void bpf_object__sanitize_btf(struct bpf_object *obj)
+static bool btf_needs_sanitization(struct bpf_object *obj)
+{
+	bool has_func_global = obj->caps.btf_func_global;
+	bool has_datasec = obj->caps.btf_datasec;
+	bool has_func = obj->caps.btf_func;
+
+	return !has_func || !has_datasec || !has_func_global;
+}
+
+static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 {
 	bool has_func_global = obj->caps.btf_func_global;
 	bool has_datasec = obj->caps.btf_datasec;
 	bool has_func = obj->caps.btf_func;
-	struct btf *btf = obj->btf;
 	struct btf_type *t;
 	int i, j, vlen;
 
-	if (!obj->btf || (has_func && has_datasec && has_func_global))
-		return;
-
 	for (i = 1; i <= btf__get_nr_types(btf); i++) {
 		t = (struct btf_type *)btf__type_by_id(btf, i);
 
@@ -2402,17 +2407,6 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj)
 	}
 }
 
-static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
-{
-	if (!obj->btf_ext)
-		return;
-
-	if (!obj->caps.btf_func) {
-		btf_ext__free(obj->btf_ext);
-		obj->btf_ext = NULL;
-	}
-}
-
 static bool libbpf_needs_btf(const struct bpf_object *obj)
 {
 	return obj->efile.btf_maps_shndx >= 0 ||
@@ -2530,30 +2524,50 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 
 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 {
+	struct btf *kern_btf = obj->btf;
+	bool btf_mandatory, sanitize;
 	int err = 0;
 
 	if (!obj->btf)
 		return 0;
 
-	bpf_object__sanitize_btf(obj);
-	bpf_object__sanitize_btf_ext(obj);
+	sanitize = btf_needs_sanitization(obj);
+	if (sanitize) {
+		const void *orig_data;
+		void *san_data;
+		__u32 sz;
 
-	err = btf__load(obj->btf);
-	if (err) {
-		pr_warn("Error loading %s into kernel: %d.\n",
-			BTF_ELF_SEC, err);
-		btf__free(obj->btf);
-		obj->btf = NULL;
-		/* btf_ext can't exist without btf, so free it as well */
-		if (obj->btf_ext) {
-			btf_ext__free(obj->btf_ext);
-			obj->btf_ext = NULL;
-		}
+		/* clone BTF to sanitize a copy and leave the original intact */
+		orig_data = btf__get_raw_data(obj->btf, &sz);
+		san_data = malloc(sz);
+		if (!san_data)
+			return -ENOMEM;
+		memcpy(san_data, orig_data, sz);
+		kern_btf = btf__new(san_data, sz);
+		if (IS_ERR(kern_btf))
+			return PTR_ERR(kern_btf);
 
-		if (kernel_needs_btf(obj))
-			return err;
+		bpf_object__sanitize_btf(obj, kern_btf);
 	}
-	return 0;
+
+	err = btf__load(kern_btf);
+	if (sanitize) {
+		if (!err) {
+			/* move fd to libbpf's BTF */
+			btf__set_fd(obj->btf, btf__fd(kern_btf));
+			btf__set_fd(kern_btf, -1);
+		}
+		btf__free(kern_btf);
+	}
+	if (err) {
+		btf_mandatory = kernel_needs_btf(obj);
+		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
+			btf_mandatory ? "BTF is mandatory, can't proceed."
+				      : "BTF is optional, ignoring.");
+		if (!btf_mandatory)
+			err = 0;
+	}
+	return err;
 }
 
 static int bpf_object__elf_collect(struct bpf_object *obj)
@@ -3777,7 +3791,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 	create_attr.btf_fd = 0;
 	create_attr.btf_key_type_id = 0;
 	create_attr.btf_value_type_id = 0;
-	if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+	if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
 		create_attr.btf_fd = btf__fd(obj->btf);
 		create_attr.btf_key_type_id = map->btf_key_type_id;
 		create_attr.btf_value_type_id = map->btf_value_type_id;
@@ -5361,18 +5375,17 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 		load_attr.kern_version = kern_version;
 		load_attr.prog_ifindex = prog->prog_ifindex;
 	}
-	/* if .BTF.ext was loaded, kernel supports associated BTF for prog */
-	if (prog->obj->btf_ext)
-		btf_fd = bpf_object__btf_fd(prog->obj);
-	else
-		btf_fd = -1;
-	load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
-	load_attr.func_info = prog->func_info;
-	load_attr.func_info_rec_size = prog->func_info_rec_size;
-	load_attr.func_info_cnt = prog->func_info_cnt;
-	load_attr.line_info = prog->line_info;
-	load_attr.line_info_rec_size = prog->line_info_rec_size;
-	load_attr.line_info_cnt = prog->line_info_cnt;
+	/* specify func_info/line_info only if kernel supports them */
+	btf_fd = bpf_object__btf_fd(prog->obj);
+	if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+		load_attr.prog_btf_fd = btf_fd;
+		load_attr.func_info = prog->func_info;
+		load_attr.func_info_rec_size = prog->func_info_rec_size;
+		load_attr.func_info_cnt = prog->func_info_cnt;
+		load_attr.line_info = prog->line_info;
+		load_attr.line_info_rec_size = prog->line_info_rec_size;
+		load_attr.line_info_cnt = prog->line_info_cnt;
+	}
 	load_attr.log_level = prog->log_level;
 	load_attr.prog_flags = prog->prog_flags;
 
-- 
cgit 


From fcda189a5133b6fe50eb63d1062a15be4df9e3de Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:16 -0700
Subject: selftests/bpf: Add test relying only on CO-RE and no recent kernel
 features

Add a test that relies on CO-RE, but doesn't expect any of the recent
features, not available on old kernels. This is useful for Travis CI tests
running against very old kernels (e.g., libbpf has 4.9 kernel testing now), to
verify that CO-RE still works, even if kernel itself doesn't support BTF yet,
as long as there is .BTF embedded into vmlinux image by pahole. Given most of
CO-RE doesn't require any kernel awareness of BTF, it is a useful test to
validate that libbpf's BTF sanitization is working well even with ancient
kernels.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-5-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/core_retro.c  | 33 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_core_retro.c  | 30 ++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/core_retro.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_retro.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
new file mode 100644
index 000000000000..78e30d3a23d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_core_retro.skel.h"
+
+void test_core_retro(void)
+{
+	int err, zero = 0, res, duration = 0;
+	struct test_core_retro *skel;
+
+	/* load program */
+	skel = test_core_retro__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+		goto out_close;
+
+	/* attach probe */
+	err = test_core_retro__attach(skel);
+	if (CHECK(err, "attach_kprobe", "err %d\n", err))
+		goto out_close;
+
+	/* trigger */
+	usleep(1);
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
+	if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+		goto out_close;
+
+	CHECK(res != getpid(), "pid_check", "got %d != exp %d\n", res, getpid());
+
+out_close:
+	test_core_retro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
new file mode 100644
index 000000000000..75c60c3c29cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct {
+	int tgid;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} results SEC(".maps");
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
+{
+	struct task_struct *task = (void *)bpf_get_current_task();
+	int tgid = BPF_CORE_READ(task, tgid);
+	int zero = 0;
+
+	bpf_map_update_elem(&results, &zero, &tgid, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 0e289487308236903b19273f2ddb4f0adf732b9e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:17 -0700
Subject: libbpf: Handle missing BPF_OBJ_GET_INFO_BY_FD gracefully in
 perf_buffer

perf_buffer__new() is relying on BPF_OBJ_GET_INFO_BY_FD availability for few
sanity checks. OBJ_GET_INFO for maps is actually much more recent feature than
perf_buffer support itself, so this causes unnecessary problems on old kernels
before BPF_OBJ_GET_INFO_BY_FD was added.

This patch makes those sanity checks optional and just assumes best if command
is not supported. If user specified something incorrectly (e.g., wrong map
type), kernel will reject it later anyway, except user won't get a nice
explanation as to why it failed. This seems like a good trade off for
supporting perf_buffer on old kernels.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-6-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 460e5790a95f..6602eb479596 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8591,7 +8591,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
 					      struct perf_buffer_params *p)
 {
 	const char *online_cpus_file = "/sys/devices/system/cpu/online";
-	struct bpf_map_info map = {};
+	struct bpf_map_info map;
 	char msg[STRERR_BUFSIZE];
 	struct perf_buffer *pb;
 	bool *online = NULL;
@@ -8604,19 +8604,28 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
 		return ERR_PTR(-EINVAL);
 	}
 
+	/* best-effort sanity checks */
+	memset(&map, 0, sizeof(map));
 	map_info_len = sizeof(map);
 	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
 	if (err) {
 		err = -errno;
-		pr_warn("failed to get map info for map FD %d: %s\n",
-			map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
-		return ERR_PTR(err);
-	}
-
-	if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
-		pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
-			map.name);
-		return ERR_PTR(-EINVAL);
+		/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
+		 * -EBADFD, -EFAULT, or -E2BIG on real error
+		 */
+		if (err != -EINVAL) {
+			pr_warn("failed to get map info for map FD %d: %s\n",
+				map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+			return ERR_PTR(err);
+		}
+		pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
+			 map_fd);
+	} else {
+		if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+			pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+				map.name);
+			return ERR_PTR(-EINVAL);
+		}
 	}
 
 	pb = calloc(1, sizeof(*pb));
@@ -8648,7 +8657,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
 			err = pb->cpu_cnt;
 			goto error;
 		}
-		if (map.max_entries < pb->cpu_cnt)
+		if (map.max_entries && map.max_entries < pb->cpu_cnt)
 			pb->cpu_cnt = map.max_entries;
 	}
 
-- 
cgit 


From 6984cbc6dfa280687367b9660d8c830518239851 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 7 Jul 2020 18:53:18 -0700
Subject: selftests/bpf: Switch perf_buffer test to tracepoint and skeleton

Switch perf_buffer test to use skeleton to avoid use of bpf_prog_load() and
make test a bit more succinct. Also switch BPF program to use tracepoint
instead of kprobe, as that allows to support older kernels, which had
tracepoint support before kprobe support in the form that libbpf expects
(i.e., libbpf expects /sys/bus/event_source/devices/kprobe/type, which doesn't
always exist on old kernels).

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200708015318.3827358-7-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/perf_buffer.c | 42 +++++++---------------
 .../testing/selftests/bpf/progs/test_perf_buffer.c |  4 +--
 2 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index a122ce3b360e..c33ec180b3f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,6 +4,7 @@
 #include <sched.h>
 #include <sys/socket.h>
 #include <test_progs.h>
+#include "test_perf_buffer.skel.h"
 #include "bpf/libbpf_internal.h"
 
 /* AddressSanitizer sometimes crashes due to data dereference below, due to
@@ -25,16 +26,11 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 
 void test_perf_buffer(void)
 {
-	int err, prog_fd, on_len, nr_on_cpus = 0,  nr_cpus, i, duration = 0;
-	const char *prog_name = "kprobe/sys_nanosleep";
-	const char *file = "./test_perf_buffer.o";
+	int err, on_len, nr_on_cpus = 0,  nr_cpus, i, duration = 0;
 	struct perf_buffer_opts pb_opts = {};
-	struct bpf_map *perf_buf_map;
+	struct test_perf_buffer *skel;
 	cpu_set_t cpu_set, cpu_seen;
-	struct bpf_program *prog;
-	struct bpf_object *obj;
 	struct perf_buffer *pb;
-	struct bpf_link *link;
 	bool *online;
 
 	nr_cpus = libbpf_num_possible_cpus();
@@ -51,33 +47,21 @@ void test_perf_buffer(void)
 			nr_on_cpus++;
 
 	/* load program */
-	err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
-	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
-		obj = NULL;
-		goto out_close;
-	}
-
-	prog = bpf_object__find_program_by_title(obj, prog_name);
-	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+	skel = test_perf_buffer__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
 		goto out_close;
 
-	/* load map */
-	perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
-	if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
-		goto out_close;
-
-	/* attach kprobe */
-	link = bpf_program__attach_kprobe(prog, false /* retprobe */,
-					  SYS_NANOSLEEP_KPROBE_NAME);
-	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+	/* attach probe */
+	err = test_perf_buffer__attach(skel);
+	if (CHECK(err, "attach_kprobe", "err %d\n", err))
 		goto out_close;
 
 	/* set up perf buffer */
 	pb_opts.sample_cb = on_sample;
 	pb_opts.ctx = &cpu_seen;
-	pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+	pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
 	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
-		goto out_detach;
+		goto out_close;
 
 	/* trigger kprobe on every CPU */
 	CPU_ZERO(&cpu_seen);
@@ -94,7 +78,7 @@ void test_perf_buffer(void)
 					     &cpu_set);
 		if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
 				 i, err))
-			goto out_detach;
+			goto out_close;
 
 		usleep(1);
 	}
@@ -110,9 +94,7 @@ void test_perf_buffer(void)
 
 out_free_pb:
 	perf_buffer__free(pb);
-out_detach:
-	bpf_link__destroy(link);
 out_close:
-	bpf_object__close(obj);
+	test_perf_buffer__destroy(skel);
 	free(online);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ad59c4c9aba8..8207a2dc2f9d 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -12,8 +12,8 @@ struct {
 	__uint(value_size, sizeof(int));
 } perf_buf_map SEC(".maps");
 
-SEC("kprobe/sys_nanosleep")
-int BPF_KPROBE(handle_sys_nanosleep_entry)
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
 {
 	int cpu = bpf_get_smp_processor_id();
 
-- 
cgit 


From df62f2ec3df698e16bdc3dd44de1d337a9eac6b3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Jul 2020 15:12:42 +0200
Subject: selftests/mptcp: add diag interface tests

basic functional test, triggering the msk diag interface
code. Require appropriate iproute2 support, skip elsewhere.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/Makefile        |   2 +-
 tools/testing/selftests/net/mptcp/diag.sh         | 121 ++++++++++++++++++++++
 tools/testing/selftests/net/mptcp/mptcp_connect.c |  22 +++-
 3 files changed, 140 insertions(+), 5 deletions(-)
 create mode 100755 tools/testing/selftests/net/mptcp/diag.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f50976ee7d44..aa254aefc2c3 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,7 @@ KSFT_KHDR_INSTALL := 1
 
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
 
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
 
 TEST_GEN_FILES = mptcp_connect pm_nl_ctl
 
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
new file mode 100755
index 000000000000..39edce4f541c
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns="ns1-$rndh"
+ksft_skip=4
+test_cnt=1
+ret=0
+pids=()
+
+flush_pids()
+{
+	# mptcp_connect in join mode will sleep a bit before completing,
+	# give it some time
+	sleep 1.1
+
+	for pid in ${pids[@]}; do
+		[ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
+	done
+	pids=()
+}
+
+cleanup()
+{
+	ip netns del $ns
+	for pid in ${pids[@]}; do
+		[ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
+	done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+ss -h | grep -q MPTCP
+if [ $? -ne 0 ];then
+	echo "SKIP: ss tool does not support MPTCP"
+	exit $ksft_skip
+fi
+
+__chk_nr()
+{
+	local condition="$1"
+	local expected=$2
+	local msg nr
+
+	shift 2
+	msg=$*
+	nr=$(ss -inmHMN $ns | $condition)
+
+	printf "%-50s" "$msg"
+	if [ $nr != $expected ]; then
+		echo "[ fail ] expected $expected found $nr"
+		ret=$test_cnt
+	else
+		echo "[  ok  ]"
+	fi
+	test_cnt=$((test_cnt+1))
+}
+
+chk_msk_nr()
+{
+	__chk_nr "grep -c token:" $*
+}
+
+chk_msk_fallback_nr()
+{
+		__chk_nr "grep -c fallback" $*
+}
+
+chk_msk_remote_key_nr()
+{
+		__chk_nr "grep -c remote_key" $*
+}
+
+
+trap cleanup EXIT
+ip netns add $ns
+ip -n $ns link set dev lo up
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+sleep 0.1
+pids[0]=$!
+chk_msk_nr 0 "no msk on netns creation"
+
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+sleep 0.1
+pids[1]=$!
+chk_msk_nr 2 "after MPC handshake "
+chk_msk_remote_key_nr 2 "....chk remote_key"
+chk_msk_fallback_nr 0 "....chk no fallback"
+flush_pids
+
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
+pids[0]=$!
+sleep 0.1
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
+pids[1]=$!
+sleep 0.1
+chk_msk_fallback_nr 1 "check fallback"
+flush_pids
+
+NR_CLIENTS=100
+for I in `seq 1 $NR_CLIENTS`; do
+	echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null  &
+	pids[$((I*2))]=$!
+done
+sleep 0.1
+
+for I in `seq 1 $NR_CLIENTS`; do
+	echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
+	pids[$((I*2 + 1))]=$!
+done
+sleep 1.5
+
+chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+flush_pids
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cedee5b952ba..cad6f73a5fd0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -11,6 +11,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
+#include <signal.h>
 #include <unistd.h>
 
 #include <sys/poll.h>
@@ -36,6 +37,7 @@ extern int optind;
 
 static int  poll_timeout = 10 * 1000;
 static bool listen_mode;
+static bool quit;
 
 enum cfg_mode {
 	CFG_MODE_POLL,
@@ -52,11 +54,12 @@ static int pf = AF_INET;
 static int cfg_sndbuf;
 static int cfg_rcvbuf;
 static bool cfg_join;
+static int cfg_wait;
 
 static void die_usage(void)
 {
 	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
-		"[-l] connect_address\n");
+		"[-l] [-w sec] connect_address\n");
 	fprintf(stderr, "\t-6 use ipv6\n");
 	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
 	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
@@ -65,9 +68,15 @@ static void die_usage(void)
 	fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
 	fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
 	fprintf(stderr, "\t-u -- check mptcp ulp\n");
+	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
 	exit(1);
 }
 
+static void handle_signal(int nr)
+{
+	quit = true;
+}
+
 static const char *getxinfo_strerr(int err)
 {
 	if (err == EAI_SYSTEM)
@@ -418,8 +427,8 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_join)
-		usleep(400000);
+	if (cfg_wait)
+		usleep(cfg_wait);
 
 	close(peerfd);
 	return 0;
@@ -812,11 +821,12 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
+	while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
 			cfg_mode = CFG_MODE_POLL;
+			cfg_wait = 400000;
 			break;
 		case 'l':
 			listen_mode = true;
@@ -850,6 +860,9 @@ static void parse_opts(int argc, char **argv)
 		case 'R':
 			cfg_rcvbuf = parse_int(optarg);
 			break;
+		case 'w':
+			cfg_wait = atoi(optarg)*1000000;
+			break;
 		}
 	}
 
@@ -865,6 +878,7 @@ int main(int argc, char *argv[])
 {
 	init_rng();
 
+	signal(SIGUSR1, handle_signal);
 	parse_opts(argc, argv);
 
 	if (tcpulp_audit)
-- 
cgit 


From f3348a82e72795ce218264fd00ef5cf5137836a9 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@mellanox.com>
Date: Thu, 9 Jul 2020 16:18:22 +0300
Subject: selftests: net: Add port split test

Test port split configuration using previously added number of port lanes
attribute.

Check that all the splittable ports are successfully split to their maximum
number of lanes and below, and that those which are not splittable fail to
be split.

Test output example:

TEST: swp4 is unsplittable                                         [ OK ]
TEST: split port swp53 into 4                                      [ OK ]
TEST: Unsplit port pci/0000:03:00.0/25                             [ OK ]
TEST: split port swp53 into 2                                      [ OK ]
TEST: Unsplit port pci/0000:03:00.0/25                             [ OK ]

Signed-off-by: Danielle Ratson <danieller@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile              |   1 +
 tools/testing/selftests/net/devlink_port_split.py | 277 ++++++++++++++++++++++
 2 files changed, 278 insertions(+)
 create mode 100755 tools/testing/selftests/net/devlink_port_split.py

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index bfacb960450f..9491bbaa0831 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh
 TEST_PROGS += txtimestamp.sh
 TEST_PROGS += vrf-xfrm-tests.sh
 TEST_PROGS += rxtimestamp.sh
+TEST_PROGS += devlink_port_split.py
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
new file mode 100755
index 000000000000..58bb7e9b88ce
--- /dev/null
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+    """
+    Run a command in subprocess.
+    Return: Tuple of (stdout, stderr).
+    """
+
+    p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+    stdout, stderr = p.communicate()
+    stdout, stderr = stdout.decode(), stderr.decode()
+
+    if stderr != "" and not should_fail:
+        print("Error sending command: %s" % cmd)
+        print(stdout)
+        print(stderr)
+    return stdout, stderr
+
+
+class devlink_ports(object):
+    """
+    Class that holds information on the devlink ports, required to the tests;
+    if_names: A list of interfaces in the devlink ports.
+    """
+
+    def get_if_names(dev):
+        """
+        Get a list of physical devlink ports.
+        Return: Array of tuples (bus_info/port, if_name).
+        """
+
+        arr = []
+
+        cmd = "devlink -j port show"
+        stdout, stderr = run_command(cmd)
+        assert stderr == ""
+        ports = json.loads(stdout)['port']
+
+        for port in ports:
+            if dev in port:
+                if ports[port]['flavour'] == 'physical':
+                    arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+        return arr
+
+    def __init__(self, dev):
+        self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+    """
+    Get the $port's maximum number of lanes.
+    Return: number of lanes, e.g. 1, 2, 4 and 8.
+    """
+
+    cmd = "devlink -j port show %s" % port
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+    values = list(json.loads(stdout)['port'].values())[0]
+
+    if 'lanes' in values:
+        lanes = values['lanes']
+    else:
+        lanes = 0
+    return lanes
+
+
+def get_split_ability(port):
+    """
+    Get the $port split ability.
+    Return: split ability, true or false.
+    """
+
+    cmd = "devlink -j port show %s" % port.name
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+    values = list(json.loads(stdout)['port'].values())[0]
+
+    return values['splittable']
+
+
+def split(k, port, should_fail=False):
+    """
+    Split $port into $k ports.
+    If should_fail == True, the split should fail. Otherwise, should pass.
+    Return: Array of sub ports after splitting.
+            If the $port wasn't split, the array will be empty.
+    """
+
+    cmd = "devlink port split %s count %s" % (port.bus_info, k)
+    stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+    if should_fail:
+        if not test(stderr != "", "%s is unsplittable" % port.name):
+            print("split an unsplittable port %s" % port.name)
+            return create_split_group(port, k)
+    else:
+        if stderr == "":
+            return create_split_group(port, k)
+        print("didn't split a splittable port %s" % port.name)
+
+    return []
+
+
+def unsplit(port):
+    """
+    Unsplit $port.
+    """
+
+    cmd = "devlink port unsplit %s" % port
+    stdout, stderr = run_command(cmd)
+    test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+    """
+    Check if $port exists in the devlink ports.
+    Return: True is so, False otherwise.
+    """
+
+    return any(dev_port.name == port
+               for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+    """
+    Check if every port in the list $ports exists in the devlink ports and has
+    $lanes number of lanes after splitting.
+    Return: True if both are True, False otherwise.
+    """
+
+    for port in ports:
+        max_lanes = get_max_lanes(port)
+        if not exists(port, dev):
+            print("port %s doesn't exist in devlink ports" % port)
+            return False
+        if max_lanes != lanes:
+            print("port %s has %d lanes, but %s were expected"
+                  % (port, lanes, max_lanes))
+            return False
+    return True
+
+
+def test(cond, msg):
+    """
+    Check $cond and print a message accordingly.
+    Return: True is pass, False otherwise.
+    """
+
+    if cond:
+        print("TEST: %-60s [ OK ]" % msg)
+    else:
+        print("TEST: %-60s [FAIL]" % msg)
+
+    return cond
+
+
+def create_split_group(port, k):
+    """
+    Create the split group for $port.
+    Return: Array with $k elements, which are the split port group.
+    """
+
+    return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+    """
+    Test that splitting of unsplittable port fails.
+    """
+
+    # split to max
+    new_split_group = split(k, port, should_fail=True)
+
+    if new_split_group != []:
+        unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+    """
+    Test that splitting of splittable port passes correctly.
+    """
+
+    new_split_group = split(k, port)
+
+    # Once the split command ends, it takes some time to the sub ifaces'
+    # to get their names. Use udevadm to continue only when all current udev
+    # events are handled.
+    cmd = "udevadm settle"
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+
+    if new_split_group != []:
+        test(exists_and_lanes(new_split_group, lanes/k, dev),
+             "split port %s into %s" % (port.name, k))
+
+    unsplit(port.bus_info)
+
+
+def make_parser():
+    parser = argparse.ArgumentParser(description='A test for port splitting.')
+    parser.add_argument('--dev',
+                        help='The devlink handle of the device under test. ' +
+                             'The default is the first registered devlink ' +
+                             'handle.')
+
+    return parser
+
+
+def main(cmdline=None):
+    parser = make_parser()
+    args = parser.parse_args(cmdline)
+
+    dev = args.dev
+    if not dev:
+        cmd = "devlink -j dev show"
+        stdout, stderr = run_command(cmd)
+        assert stderr == ""
+
+        devs = json.loads(stdout)['dev']
+        dev = list(devs.keys())[0]
+
+    cmd = "devlink dev show %s" % dev
+    stdout, stderr = run_command(cmd)
+    if stderr != "":
+        print("devlink device %s can not be found" % dev)
+        sys.exit(1)
+
+    ports = devlink_ports(dev)
+
+    for port in ports.if_names:
+        max_lanes = get_max_lanes(port.name)
+
+        # If max lanes is 0, do not test port splitting at all
+        if max_lanes == 0:
+            continue
+
+        # If 1 lane, shouldn't be able to split
+        elif max_lanes == 1:
+            test(not get_split_ability(port),
+                 "%s should not be able to split" % port.name)
+            split_unsplittable_port(port, max_lanes)
+
+        # Else, splitting should pass and all the split ports should exist.
+        else:
+            lane = max_lanes
+            test(get_split_ability(port),
+                 "%s should be able to split" % port.name)
+            while lane > 1:
+                split_splittable_port(port, lane, max_lanes, dev)
+
+                lane //= 2
+
+
+if __name__ == "__main__":
+    main()
-- 
cgit 


From 5c3320d7fece4612d4a413aa3c8e82cdb5b49fcb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 9 Jul 2020 18:10:23 -0700
Subject: libbpf: Fix memory leak and optimize BTF sanitization

Coverity's static analysis helpfully reported a memory leak introduced by
0f0e55d8247c ("libbpf: Improve BTF sanitization handling"). While fixing it,
I realized that btf__new() already creates a memory copy, so there is no need
to do this. So this patch also fixes misleading btf__new() signature to make
data into a `const void *` input parameter. And it avoids unnecessary memory
allocation and copy in BTF sanitization code altogether.

Fixes: 0f0e55d8247c ("libbpf: Improve BTF sanitization handling")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200710011023.1655008-1-andriin@fb.com
---
 tools/lib/bpf/btf.c    |  2 +-
 tools/lib/bpf/btf.h    |  2 +-
 tools/lib/bpf/libbpf.c | 11 +++--------
 3 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index c8861c9e3635..c9e760e120dc 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -397,7 +397,7 @@ void btf__free(struct btf *btf)
 	free(btf);
 }
 
-struct btf *btf__new(__u8 *data, __u32 size)
+struct btf *btf__new(const void *data, __u32 size)
 {
 	struct btf *btf;
 	int err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 173eff23c472..a3b7ef9b737f 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -63,7 +63,7 @@ struct btf_ext_header {
 };
 
 LIBBPF_API void btf__free(struct btf *btf);
-LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
 LIBBPF_API struct btf *btf__parse_elf(const char *path,
 				      struct btf_ext **btf_ext);
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6602eb479596..25e4f77be8d7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2533,17 +2533,12 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
-		const void *orig_data;
-		void *san_data;
+		const void *raw_data;
 		__u32 sz;
 
 		/* clone BTF to sanitize a copy and leave the original intact */
-		orig_data = btf__get_raw_data(obj->btf, &sz);
-		san_data = malloc(sz);
-		if (!san_data)
-			return -ENOMEM;
-		memcpy(san_data, orig_data, sz);
-		kern_btf = btf__new(san_data, sz);
+		raw_data = btf__get_raw_data(obj->btf, &sz);
+		kern_btf = btf__new(raw_data, sz);
 		if (IS_ERR(kern_btf))
 			return PTR_ERR(kern_btf);
 
-- 
cgit 


From 91f430b2c49d2d8f4445824f70a8cf7b73e1094c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 9 Jul 2020 17:42:49 -0700
Subject: selftests: net: add a test for UDP tunnel info infra

Add validating the UDP tunnel infra works.

$ ./udp_tunnel_nic.sh
PASSED all 383 checks

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/udp_tunnel_nic.sh        | 786 +++++++++++++++++++++
 1 file changed, 786 insertions(+)
 create mode 100644 tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100644
index 000000000000..ba1d53b9f815
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,786 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+    echo "ERROR:" $@
+    EXIT_STATUS=1
+    ((num_errors++))
+    ((num_cases++))
+}
+
+function pass_cnt {
+    ((num_cases++))
+}
+
+function cleanup_tuns {
+    for dev in "${clean_up_devs[@]}"; do
+	[ -e /sys/class/net/$dev ] && ip link del dev $dev
+    done
+    clean_up_devs=( )
+}
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_tuns
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+    local dev=$1
+    local dstport=$2
+    local lower=$3
+    local ipver=$4
+    local flags=$5
+
+    local group ipfl
+
+    [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+    [ "$ipver" != '6' ] || ipfl="-6"
+
+    [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+    ip $ipfl link add $dev type vxlan \
+       group $group \
+       dev $lower \
+       dstport $dstport \
+       $flags
+
+    ip link set dev $dev up
+
+    clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+    check_tables
+}
+
+function new_geneve {
+    local dev=$1
+    local dstport=$2
+    local ipver=$3
+    local flags=$4
+
+    local group ipfl
+
+    [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+    [ "$ipver" != '6' ] || ipfl="-6"
+
+    [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+    ip $ipfl link add $dev type geneve \
+       remote $remote  \
+       dstport $dstport \
+       $flags
+
+    ip link set dev $dev up
+
+    clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+    check_tables
+}
+
+function del_dev {
+    local dev=$1
+
+    ip link del dev $dev
+    check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+    local port=$1
+    local type=$2
+
+    echo $((port << 16 | type))
+}
+
+function pre {
+    local val=$1
+
+    echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+    local val=$1
+    local port=$((val >> 16))
+    local type=$((val & 0xffff))
+
+    case $type in
+	1)
+	    type_name="vxlan"
+	    ;;
+	2)
+	    type_name="geneve"
+	    ;;
+	4)
+	    type_name="vxlan-gpe"
+	    ;;
+	*)
+	    type_name="bit X"
+	    ;;
+    esac
+
+    echo "port $port, $type_name"
+}
+
+function check_table {
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+    local -n expected=$2
+    local last=$3
+
+    read -a have < $path
+
+    if [ ${#expected[@]} -ne ${#have[@]} ]; then
+	echo "check_table: BAD NUMBER OF ITEMS"
+	return 0
+    fi
+
+    for i in "${!expected[@]}"; do
+	if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+	    pp_expected=`pre_ethtool ${expected[i]}`
+	    ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+	    if [ $? -ne 0 -a $last -ne 0 ]; then
+		err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+		echo "       check_table: ethtool does not contain '$pp_expected'"
+		ethtool --show-tunnels $NSIM_NETDEV
+		return 0
+
+	    fi
+	fi
+
+	if [ ${expected[i]} != ${have[i]} ]; then
+	    if [ $last -ne 0 ]; then
+		err_cnt "table $1 on port $port: $pfx - $msg"
+		echo "       check_table: wrong entry $i"
+		echo "       expected: `pre ${expected[i]}`"
+		echo "       have:     `pre ${have[i]}`"
+		return 0
+	    fi
+	    return 1
+	fi
+    done
+
+    pass_cnt
+    return 0
+}
+
+function check_tables {
+    # Need retries in case we have workqueue making the changes
+    local retries=10
+
+    while ! check_table 0 exp0 $((retries == 0)); do
+	sleep 0.02
+	((retries--))
+    done
+    while ! check_table 1 exp1 $((retries == 0)); do
+	sleep 0.02
+	((retries--))
+    done
+}
+
+function print_table {
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+    read -a have < $path
+
+    tree $NSIM_DEV_DFS/
+
+    echo "Port $port table $1:"
+
+    for i in "${!have[@]}"; do
+	echo "    `pre ${have[i]}`"
+    done
+
+}
+
+function print_tables {
+    print_table 0
+    print_table 1
+}
+
+function get_netdev_name {
+    local -n old=$1
+
+    new=$(ls /sys/class/net)
+
+    for netdev in $new; do
+	for check in $old; do
+            [ $netdev == $check ] && break
+	done
+
+	if [ $netdev != $check ]; then
+	    echo $netdev
+	    break
+	fi
+    done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+    old_netdevs=$(ls /sys/class/net)
+    if [ $port -eq 0 ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/new_device
+    else
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+	echo 1 > $NSIM_DEV_SYS/new_port
+    fi
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+    msg="new NIC device created"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    check_tables
+
+    msg="VxLAN v4 devices"
+    exp0=( `mke 4789 1` 0 0 0 )
+    new_vxlan vxlan0 4789 $NSIM_NETDEV
+    new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+    msg="VxLAN v4 devices go down"
+    exp0=( 0 0 0 0 )
+    ifconfig vxlan1 down
+    ifconfig vxlan0 down
+    check_tables
+
+    msg="VxLAN v6 devices"
+    exp0=( `mke 4789 1` 0 0 0 )
+    new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+    for ifc in vxlan0 vxlan1; do
+	ifconfig $ifc up
+    done
+
+    new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+    msg="another VxLAN v6 devices"
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+    msg="Geneve device"
+    exp1=( `mke 6081 2` 0 0 0 )
+    new_geneve gnv0 6081
+
+    msg="NIC device goes down"
+    ifconfig $NSIM_NETDEV down
+    if [ $port -eq 1 ]; then
+	exp0=( 0 0 0 0 )
+	exp1=( 0 0 0 0 )
+    fi
+    check_tables
+    msg="NIC device goes up again"
+    ifconfig $NSIM_NETDEV up
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    exp1=( `mke 6081 2` 0 0 0 )
+    check_tables
+
+    cleanup_tuns
+
+    msg="tunnels destroyed"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    check_tables
+
+    modprobe -r geneve
+    modprobe -r vxlan
+    modprobe -r udp_tunnel
+
+    check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+    err_cnt "netdevsim depends on udp_tunnel"
+else
+    pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+modprobe -r vxlan
+modprobe -r udp_tunnel
+
+msg="remove tunnels"
+exp0=( 0 0 0 0 )
+check_tables
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+    local pfx=$1
+
+    msg="create VxLANs 1/5"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="create VxLANs 2/5"
+    exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+    new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+    msg="create VxLANs 3/5"
+    exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+    new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+    msg="create VxLANs 4/5"
+    exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+    new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+    msg="create VxLANs 5/5"
+    new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+    local pfx=$1
+
+    msg="create GENEVE 1/5"
+    exp1=( `mke 20000 2` 0 0 0 )
+    new_geneve gnv0 20000
+
+    msg="create GENEVE 2/5"
+    exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+    new_geneve gnv1 20001
+
+    msg="create GENEVE 3/5"
+    exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+    new_geneve gnv2 20002
+
+    msg="create GENEVE 4/5"
+    exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+    new_geneve gnv3 20003
+
+    msg="create GENEVE 5/5"
+    new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    overflow_table0 "overflow NIC table"
+    overflow_table1 "overflow NIC table"
+
+    msg="replace VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+    del_dev vxlan1
+
+    msg="vacate VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+    del_dev vxlan2
+
+    msg="replace GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+    del_dev gnv1
+
+    msg="vacate GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+    del_dev gnv2
+
+    msg="table sharing - share"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+    new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+    msg="table sharing - overflow"
+    new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+    msg="table sharing - overflow v6"
+    new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+    exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+    del_dev gnv4
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    overflow_table0 "overflow NIC table"
+    overflow_table1 "overflow NIC table"
+
+    msg="replace VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+    del_dev vxlan1
+
+    msg="vacate VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+    del_dev vxlan2
+
+    msg="replace GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+    del_dev gnv1
+
+    msg="vacate GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+    del_dev gnv2
+
+    msg="table sharing - share"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+    new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+    msg="table sharing - overflow"
+    new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+    msg="table sharing - overflow v6"
+    new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+    exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+    del_dev gnv4
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    overflow_table0 "destroy NIC"
+    overflow_table1 "destroy NIC"
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v6"
+    new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+    ip link set dev vxlanA0 down
+    ip link set dev vxlanA0 up
+    check_tables
+
+    msg="create VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="down VxLANs v4"
+    exp0=( 0 0 0 0 )
+    ip link set dev vxlan0 down
+    check_tables
+
+    msg="up VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ip link set dev vxlan0 up
+    check_tables
+
+    msg="destroy VxLANs v4"
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+
+    msg="recreate VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    del_dev vxlanA0
+    del_dev vxlanA1
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+
+    msg="1 - create VxLANs v6"
+    exp0=( 0 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="1 - create VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="1 - remove VxLANs v4"
+    del_dev vxlan0
+
+    msg="1 - remove VxLANs v6"
+    exp0=( 0 0 0 0 )
+    del_dev vxlanA0
+
+    msg="2 - create GENEVE"
+    exp1=( `mke 20000 2` 0 0 0 )
+    new_geneve gnv0 20000
+
+    msg="2 - destroy GENEVE"
+    echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+    exp1=( `mke 20000 2` 0 0 0 )
+    del_dev gnv0
+
+    msg="2 - create second GENEVE"
+    exp1=( 0 `mke 20001 2` 0 0 )
+    new_geneve gnv0 20001
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v4"
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="turn off"
+    exp0=( 0 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn on"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="remove both"
+    del_dev vxlanA0
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+    check_tables
+
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+    msg="create VxLANs v4 - off"
+    exp0=( 0 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="created off - turn on"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    ifconfig $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v4"
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    check_tables
+
+    msg="NIC device goes down"
+    ifconfig $NSIM_NETDEV down
+    if [ $port -eq 1 ]; then
+	exp0=( 0 0 0 0 )
+	exp1=( 0 0 0 0 )
+    fi
+    check_tables
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    check_tables
+
+    msg="NIC device goes up again"
+    ifconfig $NSIM_NETDEV up
+    exp0=( `mke 10000 1` 0 0 0 )
+    check_tables
+
+    msg="remove both"
+    del_dev vxlanA0
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+    check_tables
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    check_tables
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $num_cases checks"
+else
+    echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
-- 
cgit 


From fbbb68de80a45ad66b66600bee275485c5073aa7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 11 Jul 2020 23:53:21 +0200
Subject: bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object

The resolve_btfids tool scans elf object for .BTF_ids section
and resolves its symbols with BTF ID values.

It will be used to during linking time to resolve arrays of BTF
ID values used in verifier, so these IDs do not need to be
resolved in runtime.

The expected layout of .BTF_ids section is described in main.c
header. Related kernel changes are coming in following changes.

Build issue reported by 0-DAY CI Kernel Test Service.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200711215329.41165-2-jolsa@kernel.org
---
 tools/bpf/resolve_btfids/Build    |  10 +
 tools/bpf/resolve_btfids/Makefile |  77 ++++
 tools/bpf/resolve_btfids/main.c   | 721 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 808 insertions(+)
 create mode 100644 tools/bpf/resolve_btfids/Build
 create mode 100644 tools/bpf/resolve_btfids/Makefile
 create mode 100644 tools/bpf/resolve_btfids/main.c

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/Build b/tools/bpf/resolve_btfids/Build
new file mode 100644
index 000000000000..ae82da03f9bf
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Build
@@ -0,0 +1,10 @@
+resolve_btfids-y += main.o
+resolve_btfids-y += rbtree.o
+resolve_btfids-y += zalloc.o
+resolve_btfids-y += string.o
+resolve_btfids-y += ctype.o
+resolve_btfids-y += str_error_r.o
+
+$(OUTPUT)%.o: ../../lib/%.c FORCE
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
new file mode 100644
index 000000000000..948378ca73d4
--- /dev/null
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../scripts/Makefile.include
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+ifeq ($(V),1)
+  Q =
+  msg =
+else
+  Q = @
+  msg = @printf '  %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
+  MAKEFLAGS=--no-print-directory
+endif
+
+OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
+
+LIBBPF_SRC := $(srctree)/tools/lib/bpf/
+SUBCMD_SRC := $(srctree)/tools/lib/subcmd/
+
+BPFOBJ     := $(OUTPUT)/libbpf.a
+SUBCMDOBJ  := $(OUTPUT)/libsubcmd.a
+
+BINARY     := $(OUTPUT)/resolve_btfids
+BINARY_IN  := $(BINARY)-in.o
+
+all: $(BINARY)
+
+$(OUTPUT):
+	$(call msg,MKDIR,,$@)
+	$(Q)mkdir -p $(OUTPUT)
+
+$(SUBCMDOBJ): fixdep FORCE
+	$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT)
+
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)  OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+
+CFLAGS := -g \
+          -I$(srctree)/tools/include \
+          -I$(srctree)/tools/include/uapi \
+          -I$(LIBBPF_SRC) \
+          -I$(SUBCMD_SRC)
+
+LIBS = -lelf -lz
+
+export srctree OUTPUT CFLAGS Q
+include $(srctree)/tools/build/Makefile.include
+
+$(BINARY_IN): fixdep FORCE
+	$(Q)$(MAKE) $(build)=resolve_btfids
+
+$(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
+	$(call msg,LINK,$@)
+	$(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
+
+libsubcmd-clean:
+	$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(OUTPUT) clean
+
+libbpf-clean:
+	$(Q)$(MAKE) -C $(LIBBPF_SRC) OUTPUT=$(OUTPUT) clean
+
+clean: libsubcmd-clean libbpf-clean fixdep-clean
+	$(call msg,CLEAN,$(BINARY))
+	$(Q)$(RM) -f $(BINARY); \
+	find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+tags:
+	$(call msg,GEN,,tags)
+	$(Q)ctags -R . $(LIBBPF_SRC) $(SUBCMD_SRC)
+
+FORCE:
+
+.PHONY: all FORCE clean tags
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
new file mode 100644
index 000000000000..6956b6350cad
--- /dev/null
+++ b/tools/bpf/resolve_btfids/main.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * resolve_btfids scans Elf object for .BTF_ids section and resolves
+ * its symbols with BTF ID values.
+ *
+ * Each symbol points to 4 bytes data and is expected to have
+ * following name syntax:
+ *
+ * __BTF_ID__<type>__<symbol>[__<id>]
+ *
+ * type is:
+ *
+ *   func    - lookup BTF_KIND_FUNC symbol with <symbol> name
+ *             and store its ID into the data:
+ *
+ *             __BTF_ID__func__vfs_close__1:
+ *             .zero 4
+ *
+ *   struct  - lookup BTF_KIND_STRUCT symbol with <symbol> name
+ *             and store its ID into the data:
+ *
+ *             __BTF_ID__struct__sk_buff__1:
+ *             .zero 4
+ *
+ *   union   - lookup BTF_KIND_UNION symbol with <symbol> name
+ *             and store its ID into the data:
+ *
+ *             __BTF_ID__union__thread_union__1:
+ *             .zero 4
+ *
+ *   typedef - lookup BTF_KIND_TYPEDEF symbol with <symbol> name
+ *             and store its ID into the data:
+ *
+ *             __BTF_ID__typedef__pid_t__1:
+ *             .zero 4
+ *
+ *   set     - store symbol size into first 4 bytes and sort following
+ *             ID list
+ *
+ *             __BTF_ID__set__list:
+ *             .zero 4
+ *             list:
+ *             __BTF_ID__func__vfs_getattr__3:
+ *             .zero 4
+ *             __BTF_ID__func__vfs_fallocate__4:
+ *             .zero 4
+ */
+
+#define  _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/rbtree.h>
+#include <linux/zalloc.h>
+#include <linux/err.h>
+#include <btf.h>
+#include <libbpf.h>
+#include <parse-options.h>
+
+#define BTF_IDS_SECTION	".BTF_ids"
+#define BTF_ID		"__BTF_ID__"
+
+#define BTF_STRUCT	"struct"
+#define BTF_UNION	"union"
+#define BTF_TYPEDEF	"typedef"
+#define BTF_FUNC	"func"
+#define BTF_SET		"set"
+
+#define ADDR_CNT	100
+
+struct btf_id {
+	struct rb_node	 rb_node;
+	char		*name;
+	union {
+		int	 id;
+		int	 cnt;
+	};
+	int		 addr_cnt;
+	Elf64_Addr	 addr[ADDR_CNT];
+};
+
+struct object {
+	const char *path;
+	const char *btf;
+
+	struct {
+		int		 fd;
+		Elf		*elf;
+		Elf_Data	*symbols;
+		Elf_Data	*idlist;
+		int		 symbols_shndx;
+		int		 idlist_shndx;
+		size_t		 strtabidx;
+		unsigned long	 idlist_addr;
+	} efile;
+
+	struct rb_root	sets;
+	struct rb_root	structs;
+	struct rb_root	unions;
+	struct rb_root	typedefs;
+	struct rb_root	funcs;
+
+	int nr_funcs;
+	int nr_structs;
+	int nr_unions;
+	int nr_typedefs;
+};
+
+static int verbose;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	if (var >= level) {
+		va_start(args, fmt);
+		ret = vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+	return ret;
+}
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_debug(fmt, ...) \
+	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+	eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+static bool is_btf_id(const char *name)
+{
+	return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
+}
+
+static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
+{
+	struct rb_node *p = root->rb_node;
+	struct btf_id *id;
+	int cmp;
+
+	while (p) {
+		id = rb_entry(p, struct btf_id, rb_node);
+		cmp = strcmp(id->name, name);
+		if (cmp < 0)
+			p = p->rb_left;
+		else if (cmp > 0)
+			p = p->rb_right;
+		else
+			return id;
+	}
+	return NULL;
+}
+
+static struct btf_id*
+btf_id__add(struct rb_root *root, char *name, bool unique)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct btf_id *id;
+	int cmp;
+
+	while (*p != NULL) {
+		parent = *p;
+		id = rb_entry(parent, struct btf_id, rb_node);
+		cmp = strcmp(id->name, name);
+		if (cmp < 0)
+			p = &(*p)->rb_left;
+		else if (cmp > 0)
+			p = &(*p)->rb_right;
+		else
+			return unique ? NULL : id;
+	}
+
+	id = zalloc(sizeof(*id));
+	if (id) {
+		pr_debug("adding symbol %s\n", name);
+		id->name = name;
+		rb_link_node(&id->rb_node, parent, p);
+		rb_insert_color(&id->rb_node, root);
+	}
+	return id;
+}
+
+static char *get_id(const char *prefix_end)
+{
+	/*
+	 * __BTF_ID__func__vfs_truncate__0
+	 * prefix_end =  ^
+	 */
+	char *p, *id = strdup(prefix_end + sizeof("__") - 1);
+
+	if (id) {
+		/*
+		 * __BTF_ID__func__vfs_truncate__0
+		 * id =            ^
+		 *
+		 * cut the unique id part
+		 */
+		p = strrchr(id, '_');
+		p--;
+		if (*p != '_') {
+			free(id);
+			return NULL;
+		}
+		*p = '\0';
+	}
+	return id;
+}
+
+static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
+{
+	char *id;
+
+	id = get_id(name + size);
+	if (!id) {
+		pr_err("FAILED to parse symbol name: %s\n", name);
+		return NULL;
+	}
+
+	return btf_id__add(root, id, false);
+}
+
+static int elf_collect(struct object *obj)
+{
+	Elf_Scn *scn = NULL;
+	size_t shdrstrndx;
+	int idx = 0;
+	Elf *elf;
+	int fd;
+
+	fd = open(obj->path, O_RDWR, 0666);
+	if (fd == -1) {
+		pr_err("FAILED cannot open %s: %s\n",
+			obj->path, strerror(errno));
+		return -1;
+	}
+
+	elf_version(EV_CURRENT);
+
+	elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
+	if (!elf) {
+		pr_err("FAILED cannot create ELF descriptor: %s\n",
+			elf_errmsg(-1));
+		return -1;
+	}
+
+	obj->efile.fd  = fd;
+	obj->efile.elf = elf;
+
+	elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
+
+	if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
+		pr_err("FAILED cannot get shdr str ndx\n");
+		return -1;
+	}
+
+	/*
+	 * Scan all the elf sections and look for save data
+	 * from .BTF_ids section and symbols.
+	 */
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		Elf_Data *data;
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			pr_err("FAILED get section(%d) header\n", idx);
+			return -1;
+		}
+
+		name = elf_strptr(elf, shdrstrndx, sh.sh_name);
+		if (!name) {
+			pr_err("FAILED get section(%d) name\n", idx);
+			return -1;
+		}
+
+		data = elf_getdata(scn, 0);
+		if (!data) {
+			pr_err("FAILED to get section(%d) data from %s\n",
+				idx, name);
+			return -1;
+		}
+
+		pr_debug2("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+			  idx, name, (unsigned long) data->d_size,
+			  (int) sh.sh_link, (unsigned long) sh.sh_flags,
+			  (int) sh.sh_type);
+
+		if (sh.sh_type == SHT_SYMTAB) {
+			obj->efile.symbols       = data;
+			obj->efile.symbols_shndx = idx;
+			obj->efile.strtabidx     = sh.sh_link;
+		} else if (!strcmp(name, BTF_IDS_SECTION)) {
+			obj->efile.idlist       = data;
+			obj->efile.idlist_shndx = idx;
+			obj->efile.idlist_addr  = sh.sh_addr;
+		}
+	}
+
+	return 0;
+}
+
+static int symbols_collect(struct object *obj)
+{
+	Elf_Scn *scn = NULL;
+	int n, i, err = 0;
+	GElf_Shdr sh;
+	char *name;
+
+	scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
+	if (!scn)
+		return -1;
+
+	if (gelf_getshdr(scn, &sh) != &sh)
+		return -1;
+
+	n = sh.sh_size / sh.sh_entsize;
+
+	/*
+	 * Scan symbols and look for the ones starting with
+	 * __BTF_ID__* over .BTF_ids section.
+	 */
+	for (i = 0; !err && i < n; i++) {
+		char *tmp, *prefix;
+		struct btf_id *id;
+		GElf_Sym sym;
+		int err = -1;
+
+		if (!gelf_getsym(obj->efile.symbols, i, &sym))
+			return -1;
+
+		if (sym.st_shndx != obj->efile.idlist_shndx)
+			continue;
+
+		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+				  sym.st_name);
+
+		if (!is_btf_id(name))
+			continue;
+
+		/*
+		 * __BTF_ID__TYPE__vfs_truncate__0
+		 * prefix =  ^
+		 */
+		prefix = name + sizeof(BTF_ID) - 1;
+
+		/* struct */
+		if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
+			obj->nr_structs++;
+			id = add_symbol(&obj->structs, prefix, sizeof(BTF_STRUCT) - 1);
+		/* union  */
+		} else if (!strncmp(prefix, BTF_UNION, sizeof(BTF_UNION) - 1)) {
+			obj->nr_unions++;
+			id = add_symbol(&obj->unions, prefix, sizeof(BTF_UNION) - 1);
+		/* typedef */
+		} else if (!strncmp(prefix, BTF_TYPEDEF, sizeof(BTF_TYPEDEF) - 1)) {
+			obj->nr_typedefs++;
+			id = add_symbol(&obj->typedefs, prefix, sizeof(BTF_TYPEDEF) - 1);
+		/* func */
+		} else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
+			obj->nr_funcs++;
+			id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
+		/* set */
+		} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
+			id = add_symbol(&obj->sets, prefix, sizeof(BTF_SET) - 1);
+			/*
+			 * SET objects store list's count, which is encoded
+			 * in symbol's size, together with 'cnt' field hence
+			 * that - 1.
+			 */
+			if (id)
+				id->cnt = sym.st_size / sizeof(int) - 1;
+		} else {
+			pr_err("FAILED unsupported prefix %s\n", prefix);
+			return -1;
+		}
+
+		if (!id)
+			return -ENOMEM;
+
+		if (id->addr_cnt >= ADDR_CNT) {
+			pr_err("FAILED symbol %s crossed the number of allowed lists",
+				id->name);
+			return -1;
+		}
+		id->addr[id->addr_cnt++] = sym.st_value;
+	}
+
+	return 0;
+}
+
+static struct btf *btf__parse_raw(const char *file)
+{
+	struct btf *btf;
+	struct stat st;
+	__u8 *buf;
+	FILE *f;
+
+	if (stat(file, &st))
+		return NULL;
+
+	f = fopen(file, "rb");
+	if (!f)
+		return NULL;
+
+	buf = malloc(st.st_size);
+	if (!buf) {
+		btf = ERR_PTR(-ENOMEM);
+		goto exit_close;
+	}
+
+	if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
+		btf = ERR_PTR(-EINVAL);
+		goto exit_free;
+	}
+
+	btf = btf__new(buf, st.st_size);
+
+exit_free:
+	free(buf);
+exit_close:
+	fclose(f);
+	return btf;
+}
+
+static bool is_btf_raw(const char *file)
+{
+	__u16 magic = 0;
+	int fd, nb_read;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return false;
+
+	nb_read = read(fd, &magic, sizeof(magic));
+	close(fd);
+	return nb_read == sizeof(magic) && magic == BTF_MAGIC;
+}
+
+static struct btf *btf_open(const char *path)
+{
+	if (is_btf_raw(path))
+		return btf__parse_raw(path);
+	else
+		return btf__parse_elf(path, NULL);
+}
+
+static int symbols_resolve(struct object *obj)
+{
+	int nr_typedefs = obj->nr_typedefs;
+	int nr_structs  = obj->nr_structs;
+	int nr_unions   = obj->nr_unions;
+	int nr_funcs    = obj->nr_funcs;
+	int err, type_id;
+	struct btf *btf;
+	__u32 nr;
+
+	btf = btf_open(obj->btf ?: obj->path);
+	err = libbpf_get_error(btf);
+	if (err) {
+		pr_err("FAILED: load BTF from %s: %s",
+			obj->path, strerror(err));
+		return -1;
+	}
+
+	err = -1;
+	nr  = btf__get_nr_types(btf);
+
+	/*
+	 * Iterate all the BTF types and search for collected symbol IDs.
+	 */
+	for (type_id = 1; type_id <= nr; type_id++) {
+		const struct btf_type *type;
+		struct rb_root *root;
+		struct btf_id *id;
+		const char *str;
+		int *nr;
+
+		type = btf__type_by_id(btf, type_id);
+		if (!type) {
+			pr_err("FAILED: malformed BTF, can't resolve type for ID %d\n",
+				type_id);
+			goto out;
+		}
+
+		if (btf_is_func(type) && nr_funcs) {
+			nr   = &nr_funcs;
+			root = &obj->funcs;
+		} else if (btf_is_struct(type) && nr_structs) {
+			nr   = &nr_structs;
+			root = &obj->structs;
+		} else if (btf_is_union(type) && nr_unions) {
+			nr   = &nr_unions;
+			root = &obj->unions;
+		} else if (btf_is_typedef(type) && nr_typedefs) {
+			nr   = &nr_typedefs;
+			root = &obj->typedefs;
+		} else
+			continue;
+
+		str = btf__name_by_offset(btf, type->name_off);
+		if (!str) {
+			pr_err("FAILED: malformed BTF, can't resolve name for ID %d\n",
+				type_id);
+			goto out;
+		}
+
+		id = btf_id__find(root, str);
+		if (id) {
+			id->id = type_id;
+			(*nr)--;
+		}
+	}
+
+	err = 0;
+out:
+	btf__free(btf);
+	return err;
+}
+
+static int id_patch(struct object *obj, struct btf_id *id)
+{
+	Elf_Data *data = obj->efile.idlist;
+	int *ptr = data->d_buf;
+	int i;
+
+	if (!id->id) {
+		pr_err("FAILED unresolved symbol %s\n", id->name);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < id->addr_cnt; i++) {
+		unsigned long addr = id->addr[i];
+		unsigned long idx = addr - obj->efile.idlist_addr;
+
+		pr_debug("patching addr %5lu: ID %7d [%s]\n",
+			 idx, id->id, id->name);
+
+		if (idx >= data->d_size) {
+			pr_err("FAILED patching index %lu out of bounds %lu\n",
+				idx, data->d_size);
+			return -1;
+		}
+
+		idx = idx / sizeof(int);
+		ptr[idx] = id->id;
+	}
+
+	return 0;
+}
+
+static int __symbols_patch(struct object *obj, struct rb_root *root)
+{
+	struct rb_node *next;
+	struct btf_id *id;
+
+	next = rb_first(root);
+	while (next) {
+		id = rb_entry(next, struct btf_id, rb_node);
+
+		if (id_patch(obj, id))
+			return -1;
+
+		next = rb_next(next);
+	}
+	return 0;
+}
+
+static int cmp_id(const void *pa, const void *pb)
+{
+	const int *a = pa, *b = pb;
+
+	return *a - *b;
+}
+
+static int sets_patch(struct object *obj)
+{
+	Elf_Data *data = obj->efile.idlist;
+	int *ptr = data->d_buf;
+	struct rb_node *next;
+
+	next = rb_first(&obj->sets);
+	while (next) {
+		unsigned long addr, idx;
+		struct btf_id *id;
+		int *base;
+		int cnt;
+
+		id   = rb_entry(next, struct btf_id, rb_node);
+		addr = id->addr[0];
+		idx  = addr - obj->efile.idlist_addr;
+
+		/* sets are unique */
+		if (id->addr_cnt != 1) {
+			pr_err("FAILED malformed data for set '%s'\n",
+				id->name);
+			return -1;
+		}
+
+		idx = idx / sizeof(int);
+		base = &ptr[idx] + 1;
+		cnt = ptr[idx];
+
+		pr_debug("sorting  addr %5lu: cnt %6d [%s]\n",
+			 (idx + 1) * sizeof(int), cnt, id->name);
+
+		qsort(base, cnt, sizeof(int), cmp_id);
+
+		next = rb_next(next);
+	}
+}
+
+static int symbols_patch(struct object *obj)
+{
+	int err;
+
+	if (__symbols_patch(obj, &obj->structs)  ||
+	    __symbols_patch(obj, &obj->unions)   ||
+	    __symbols_patch(obj, &obj->typedefs) ||
+	    __symbols_patch(obj, &obj->funcs)    ||
+	    __symbols_patch(obj, &obj->sets))
+		return -1;
+
+	if (sets_patch(obj))
+		return -1;
+
+	elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
+
+	err = elf_update(obj->efile.elf, ELF_C_WRITE);
+	if (err < 0) {
+		pr_err("FAILED elf_update(WRITE): %s\n",
+			elf_errmsg(-1));
+	}
+
+	pr_debug("update %s for %s\n",
+		 err >= 0 ? "ok" : "failed", obj->path);
+	return err < 0 ? -1 : 0;
+}
+
+static const char * const resolve_btfids_usage[] = {
+	"resolve_btfids [<options>] <ELF object>",
+	NULL
+};
+
+int main(int argc, const char **argv)
+{
+	bool no_fail = false;
+	struct object obj = {
+		.efile = {
+			.idlist_shndx  = -1,
+			.symbols_shndx = -1,
+		},
+		.structs  = RB_ROOT,
+		.unions   = RB_ROOT,
+		.typedefs = RB_ROOT,
+		.funcs    = RB_ROOT,
+		.sets     = RB_ROOT,
+	};
+	struct option btfid_options[] = {
+		OPT_INCR('v', "verbose", &verbose,
+			 "be more verbose (show errors, etc)"),
+		OPT_STRING(0, "btf", &obj.btf, "BTF data",
+			   "BTF data"),
+		OPT_BOOLEAN(0, "no-fail", &no_fail,
+			   "do not fail if " BTF_IDS_SECTION " section is not found"),
+		OPT_END()
+	};
+	int err = -1;
+
+	argc = parse_options(argc, argv, btfid_options, resolve_btfids_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (argc != 1)
+		usage_with_options(resolve_btfids_usage, btfid_options);
+
+	obj.path = argv[0];
+
+	if (elf_collect(&obj))
+		goto out;
+
+	/*
+	 * We did not find .BTF_ids section or symbols section,
+	 * nothing to do..
+	 */
+	if (obj.efile.idlist_shndx == -1 ||
+	    obj.efile.symbols_shndx == -1) {
+		if (no_fail)
+			return 0;
+		pr_err("FAILED to find needed sections\n");
+		return -1;
+	}
+
+	if (symbols_collect(&obj))
+		goto out;
+
+	if (symbols_resolve(&obj))
+		goto out;
+
+	if (symbols_patch(&obj))
+		goto out;
+
+	err = 0;
+out:
+	if (obj.efile.elf)
+		elf_end(obj.efile.elf);
+	close(obj.efile.fd);
+	return err;
+}
-- 
cgit 


From 33a57ce0a54d498275f432db04850001175dfdfa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 11 Jul 2020 23:53:22 +0200
Subject: bpf: Compile resolve_btfids tool at kernel compilation start

The resolve_btfids tool will be used during the vmlinux linking,
so it's necessary it's ready for it.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200711215329.41165-3-jolsa@kernel.org
---
 tools/Makefile     | 3 +++
 tools/bpf/Makefile | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/Makefile b/tools/Makefile
index bd778812e915..85af6ebbce91 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -67,6 +67,9 @@ cpupower: FORCE
 cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE
 	$(call descend,$@)
 
+bpf/%: FORCE
+	$(call descend,$@)
+
 liblockdep: FORCE
 	$(call descend,lib/lockdep)
 
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 6df1850f8353..74bc9a105225 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -123,5 +123,12 @@ runqslower_install:
 runqslower_clean:
 	$(call descend,runqslower,clean)
 
+resolve_btfids:
+	$(call descend,resolve_btfids)
+
+resolve_btfids_clean:
+	$(call descend,resolve_btfids,clean)
+
 .PHONY: all install clean bpftool bpftool_install bpftool_clean \
-	runqslower runqslower_install runqslower_clean
+	runqslower runqslower_install runqslower_clean \
+	resolve_btfids resolve_btfids_clean
-- 
cgit 


From e5a0516ec9681daf5c1f0d05144d21430b6ca6d7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 11 Jul 2020 23:53:28 +0200
Subject: tools headers: Adopt verbatim copy of btf_ids.h from kernel sources

It will be needed by bpf selftest for resolve_btfids tool.

Also adding __PASTE macro as btf_ids.h dependency, which is
defined in:

  include/linux/compiler_types.h

but because tools/include do not have this header, I'm putting
the macro into linux/compiler.h header.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200711215329.41165-9-jolsa@kernel.org
---
 tools/include/linux/btf_ids.h  | 87 ++++++++++++++++++++++++++++++++++++++++++
 tools/include/linux/compiler.h |  4 ++
 2 files changed, 91 insertions(+)
 create mode 100644 tools/include/linux/btf_ids.h

(limited to 'tools')

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
new file mode 100644
index 000000000000..fe019774f8a7
--- /dev/null
+++ b/tools/include/linux/btf_ids.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_BTF_IDS_H
+#define _LINUX_BTF_IDS_H
+
+#include <linux/compiler.h> /* for __PASTE */
+
+/*
+ * Following macros help to define lists of BTF IDs placed
+ * in .BTF_ids section. They are initially filled with zeros
+ * (during compilation) and resolved later during the
+ * linking phase by resolve_btfids tool.
+ *
+ * Any change in list layout must be reflected in resolve_btfids
+ * tool logic.
+ */
+
+#define BTF_IDS_SECTION ".BTF_ids"
+
+#define ____BTF_ID(symbol)				\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
+".local " #symbol " ;                          \n"	\
+".type  " #symbol ", @object;                  \n"	\
+".size  " #symbol ", 4;                        \n"	\
+#symbol ":                                     \n"	\
+".zero 4                                       \n"	\
+".popsection;                                  \n");
+
+#define __BTF_ID(symbol) \
+	____BTF_ID(symbol)
+
+#define __ID(prefix) \
+	__PASTE(prefix, __COUNTER__)
+
+/*
+ * The BTF_ID defines unique symbol for each ID pointing
+ * to 4 zero bytes.
+ */
+#define BTF_ID(prefix, name) \
+	__BTF_ID(__ID(__BTF_ID__##prefix##__##name##__))
+
+/*
+ * The BTF_ID_LIST macro defines pure (unsorted) list
+ * of BTF IDs, with following layout:
+ *
+ * BTF_ID_LIST(list1)
+ * BTF_ID(type1, name1)
+ * BTF_ID(type2, name2)
+ *
+ * list1:
+ * __BTF_ID__type1__name1__1:
+ * .zero 4
+ * __BTF_ID__type2__name2__2:
+ * .zero 4
+ *
+ */
+#define __BTF_ID_LIST(name)				\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
+".local " #name ";                             \n"	\
+#name ":;                                      \n"	\
+".popsection;                                  \n");	\
+
+#define BTF_ID_LIST(name)				\
+__BTF_ID_LIST(name)					\
+extern u32 name[];
+
+/*
+ * The BTF_ID_UNUSED macro defines 4 zero bytes.
+ * It's used when we want to define 'unused' entry
+ * in BTF_ID_LIST, like:
+ *
+ *   BTF_ID_LIST(bpf_skb_output_btf_ids)
+ *   BTF_ID(struct, sk_buff)
+ *   BTF_ID_UNUSED
+ *   BTF_ID(struct, task_struct)
+ */
+
+#define BTF_ID_UNUSED					\
+asm(							\
+".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
+".zero 4                                       \n"	\
+".popsection;                                  \n");
+
+
+#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 9f9002734e19..6eac24d44e81 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -201,4 +201,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __fallthrough
 #endif
 
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a, b) a##b
+#define __PASTE(a, b) ___PASTE(a, b)
+
 #endif /* _TOOLS_LINUX_COMPILER_H */
-- 
cgit 


From cc15a20d5f3abc3cbd7911b70156b7b9e2bc7d41 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 11 Jul 2020 23:53:29 +0200
Subject: selftests/bpf: Add test for resolve_btfids

Adding resolve_btfids test under test_progs suite.

It's possible to use btf_ids.h header and its logic in
user space application, so we can add easy test for it.

The test defines BTF_ID_LIST and checks it gets properly
resolved.

For this reason the test_progs binary (and other binaries
that use TRUNNER* macros) is processed with resolve_btfids
tool, which resolves BTF IDs in .BTF_ids section. The BTF
data are taken from btf_data.o object rceated from
progs/btf_data.c.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200711215329.41165-10-jolsa@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  15 ++-
 .../selftests/bpf/prog_tests/resolve_btfids.c      | 111 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/btf_data.c       |  50 ++++++++++
 3 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_data.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1f9c696b3edf..e7a8cf83ba48 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -111,6 +111,7 @@ SCRATCH_DIR := $(OUTPUT)/tools
 BUILD_DIR := $(SCRATCH_DIR)/build
 INCLUDE_DIR := $(SCRATCH_DIR)/include
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
 
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.
@@ -177,7 +178,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
-$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
+$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR):
 	$(call msg,MKDIR,,$@)
 	mkdir -p $@
 
@@ -190,6 +191,16 @@ else
 	cp "$(VMLINUX_H)" $@
 endif
 
+$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids	\
+		       $(TOOLSDIR)/bpf/resolve_btfids/main.c	\
+		       $(TOOLSDIR)/lib/rbtree.c			\
+		       $(TOOLSDIR)/lib/zalloc.c			\
+		       $(TOOLSDIR)/lib/string.c			\
+		       $(TOOLSDIR)/lib/ctype.c			\
+		       $(TOOLSDIR)/lib/str_error_r.c
+	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids	\
+		OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ)
+
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
 # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
@@ -352,9 +363,11 @@ endif
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)		\
+			     $(RESOLVE_BTFIDS)				\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+	$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
 
 endef
 
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
new file mode 100644
index 000000000000..403be6f36cba
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#include <linux/btf_ids.h>
+#include "test_progs.h"
+
+static int duration;
+
+struct symbol {
+	const char	*name;
+	int		 type;
+	int		 id;
+};
+
+struct symbol test_symbols[] = {
+	{ "unused",  BTF_KIND_UNKN,     0 },
+	{ "S",       BTF_KIND_TYPEDEF, -1 },
+	{ "T",       BTF_KIND_TYPEDEF, -1 },
+	{ "U",       BTF_KIND_TYPEDEF, -1 },
+	{ "S",       BTF_KIND_STRUCT,  -1 },
+	{ "U",       BTF_KIND_UNION,   -1 },
+	{ "func",    BTF_KIND_FUNC,    -1 },
+};
+
+BTF_ID_LIST(test_list)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct,  S)
+BTF_ID(union,   U)
+BTF_ID(func,    func)
+
+static int
+__resolve_symbol(struct btf *btf, int type_id)
+{
+	const struct btf_type *type;
+	const char *str;
+	unsigned int i;
+
+	type = btf__type_by_id(btf, type_id);
+	if (!type) {
+		PRINT_FAIL("Failed to get type for ID %d\n", type_id);
+		return -1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+		if (test_symbols[i].id != -1)
+			continue;
+
+		if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
+			continue;
+
+		str = btf__name_by_offset(btf, type->name_off);
+		if (!str) {
+			PRINT_FAIL("Failed to get name for BTF ID %d\n", type_id);
+			return -1;
+		}
+
+		if (!strcmp(str, test_symbols[i].name))
+			test_symbols[i].id = type_id;
+	}
+
+	return 0;
+}
+
+static int resolve_symbols(void)
+{
+	struct btf *btf;
+	int type_id;
+	__u32 nr;
+
+	btf = btf__parse_elf("btf_data.o", NULL);
+	if (CHECK(libbpf_get_error(btf), "resolve",
+		  "Failed to load BTF from btf_data.o\n"))
+		return -1;
+
+	nr = btf__get_nr_types(btf);
+
+	for (type_id = 1; type_id <= nr; type_id++) {
+		if (__resolve_symbol(btf, type_id))
+			break;
+	}
+
+	btf__free(btf);
+	return 0;
+}
+
+int test_resolve_btfids(void)
+{
+	unsigned int i;
+	int ret = 0;
+
+	if (resolve_symbols())
+		return -1;
+
+	/* Check BTF_ID_LIST(test_list) IDs */
+	for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+		ret = CHECK(test_list[i] != test_symbols[i].id,
+			    "id_check",
+			    "wrong ID for %s (%d != %d)\n", test_symbols[i].name,
+			    test_list[i], test_symbols[i].id);
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_data.c b/tools/testing/selftests/bpf/progs/btf_data.c
new file mode 100644
index 000000000000..baa525275bde
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_data.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+struct S {
+	int	a;
+	int	b;
+	int	c;
+};
+
+union U {
+	int	a;
+	int	b;
+	int	c;
+};
+
+struct S1 {
+	int	a;
+	int	b;
+	int	c;
+};
+
+union U1 {
+	int	a;
+	int	b;
+	int	c;
+};
+
+typedef int T;
+typedef int S;
+typedef int U;
+typedef int T1;
+typedef int S1;
+typedef int U1;
+
+struct root_struct {
+	S		m_1;
+	T		m_2;
+	U		m_3;
+	S1		m_4;
+	T1		m_5;
+	U1		m_6;
+	struct S	m_7;
+	struct S1	m_8;
+	union  U	m_9;
+	union  U1	m_10;
+};
+
+int func(struct root_struct *root)
+{
+	return 0;
+}
-- 
cgit 


From 8aa5a33578e9685d06020bd10d1637557423e945 Mon Sep 17 00:00:00 2001
From: Ciara Loftus <ciara.loftus@intel.com>
Date: Wed, 8 Jul 2020 07:28:33 +0000
Subject: xsk: Add new statistics

It can be useful for the user to know the reason behind a dropped packet.
Introduce new counters which track drops on the receive path caused by:
1. rx ring being full
2. fill ring being empty

Also, on the tx path introduce a counter which tracks the number of times
we attempt pull from the tx ring when it is empty.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200708072835.4427-2-ciara.loftus@intel.com
---
 tools/include/uapi/linux/if_xdp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index be328c59389d..a78a8096f4ce 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -73,9 +73,12 @@ struct xdp_umem_reg {
 };
 
 struct xdp_statistics {
-	__u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+	__u64 rx_dropped; /* Dropped for other reasons */
 	__u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
 	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+	__u64 rx_ring_full; /* Dropped due to rx ring being full */
+	__u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */
+	__u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */
 };
 
 struct xdp_options {
-- 
cgit 


From 93776cb9ee91aeed43ba53dcec97ffed4ae6f1f7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 10 Jul 2020 16:26:04 -0700
Subject: tools/bpftool: Remove warning about PID iterator support

Don't emit warning that bpftool was built without PID iterator support. This
error garbles JSON output of otherwise perfectly valid show commands.

Reported-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200710232605.20918-1-andriin@fb.com
---
 tools/bpf/bpftool/pids.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index c0d23ce4a6f4..e3b116325403 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -15,7 +15,6 @@
 
 int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 {
-	p_err("bpftool built without PID iterator support");
 	return -ENOTSUP;
 }
 void delete_obj_refs_table(struct obj_refs_table *table) {}
-- 
cgit 


From 59e8b60bf068180fcadb0ae06ce8f6f835132ce6 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Mon, 13 Jul 2020 12:52:34 +0100
Subject: selftests/bpf: Add selftests verifying bpf_trace_printk() behaviour

Simple selftests that verifies bpf_trace_printk() returns a sensible
value and tracing messages appear.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/1594641154-18897-3-git-send-email-alan.maguire@oracle.com
---
 .../selftests/bpf/prog_tests/trace_printk.c        | 75 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/trace_printk.c   | 21 ++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_printk.c
 create mode 100644 tools/testing/selftests/bpf/progs/trace_printk.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
new file mode 100644
index 000000000000..39b0decb1bb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+#include "trace_printk.skel.h"
+
+#define TRACEBUF	"/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG	"testing,testing"
+
+void test_trace_printk(void)
+{
+	int err, iter = 0, duration = 0, found = 0;
+	struct trace_printk__bss *bss;
+	struct trace_printk *skel;
+	char *buf = NULL;
+	FILE *fp = NULL;
+	size_t buflen;
+
+	skel = trace_printk__open();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	err = trace_printk__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+		goto cleanup;
+
+	bss = skel->bss;
+
+	err = trace_printk__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	fp = fopen(TRACEBUF, "r");
+	if (CHECK(fp == NULL, "could not open trace buffer",
+		  "error %d opening %s", errno, TRACEBUF))
+		goto cleanup;
+
+	/* We do not want to wait forever if this test fails... */
+	fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+	/* wait for tracepoint to trigger */
+	usleep(1);
+	trace_printk__detach(skel);
+
+	if (CHECK(bss->trace_printk_ran == 0,
+		  "bpf_trace_printk never ran",
+		  "ran == %d", bss->trace_printk_ran))
+		goto cleanup;
+
+	if (CHECK(bss->trace_printk_ret <= 0,
+		  "bpf_trace_printk returned <= 0 value",
+		  "got %d", bss->trace_printk_ret))
+		goto cleanup;
+
+	/* verify our search string is in the trace buffer */
+	while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+		if (strstr(buf, SEARCHMSG) != NULL)
+			found++;
+		if (found == bss->trace_printk_ran)
+			break;
+		if (++iter > 1000)
+			break;
+	}
+
+	if (CHECK(!found, "message from bpf_trace_printk not found",
+		  "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+		goto cleanup;
+
+cleanup:
+	trace_printk__destroy(skel);
+	free(buf);
+	if (fp)
+		fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
new file mode 100644
index 000000000000..8ca7f399b670
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020, Oracle and/or its affiliates.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int trace_printk_ret = 0;
+int trace_printk_ran = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int sys_enter(void *ctx)
+{
+	static const char fmt[] = "testing,testing %d\n";
+
+	trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
+					    ++trace_printk_ran);
+	return 0;
+}
-- 
cgit 


From 7c819e701382d4969ca4837b8cbe157895f5d0bf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 13 Jul 2020 16:24:08 -0700
Subject: libbpf: Support stripping modifiers for btf_dump

One important use case when emitting const/volatile/restrict is undesirable is
BPF skeleton generation of DATASEC layout. These are further memory-mapped and
can be written/read from user-space directly.

For important case of .rodata variables, bpftool strips away first-level
modifiers, to make their use on user-space side simple and not requiring extra
type casts to override compiler complaining about writing to const variables.

This logic works mostly fine, but breaks in some more complicated cases. E.g.:

    const volatile int params[10];

Because in BTF it's a chain of ARRAY -> CONST -> VOLATILE -> INT, bpftool
stops at ARRAY and doesn't strip CONST and VOLATILE. In skeleton this variable
will be emitted as is. So when used from user-space, compiler will complain
about writing to const array. This is problematic, as also mentioned in [0].

To solve this for arrays and other non-trivial cases (e.g., inner
const/volatile fields inside the struct), teach btf_dump to strip away any
modifier, when requested. This is done as an extra option on
btf_dump__emit_type_decl() API.

Reported-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200713232409.3062144-2-andriin@fb.com
---
 tools/lib/bpf/btf.h      |  2 ++
 tools/lib/bpf/btf_dump.c | 10 ++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index a3b7ef9b737f..227d3f844801 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -144,6 +144,8 @@ struct btf_dump_emit_type_decl_opts {
 	 * necessary indentation already
 	 */
 	int indent_level;
+	/* strip all the const/volatile/restrict mods */
+	bool strip_mods;
 };
 #define btf_dump_emit_type_decl_opts__last_field indent_level
 
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bbb430317260..e1c344504cae 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -60,6 +60,7 @@ struct btf_dump {
 	const struct btf_ext *btf_ext;
 	btf_dump_printf_fn_t printf_fn;
 	struct btf_dump_opts opts;
+	bool strip_mods;
 
 	/* per-type auxiliary state */
 	struct btf_dump_type_aux_state *type_states;
@@ -1032,7 +1033,9 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
 
 	fname = OPTS_GET(opts, field_name, "");
 	lvl = OPTS_GET(opts, indent_level, 0);
+	d->strip_mods = OPTS_GET(opts, strip_mods, false);
 	btf_dump_emit_type_decl(d, id, fname, lvl);
+	d->strip_mods = false;
 	return 0;
 }
 
@@ -1045,6 +1048,10 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 
 	stack_start = d->decl_stack_cnt;
 	for (;;) {
+		t = btf__type_by_id(d->btf, id);
+		if (d->strip_mods && btf_is_mod(t))
+			goto skip_mod;
+
 		err = btf_dump_push_decl_stack_id(d, id);
 		if (err < 0) {
 			/*
@@ -1056,12 +1063,11 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
 			d->decl_stack_cnt = stack_start;
 			return;
 		}
-
+skip_mod:
 		/* VOID */
 		if (id == 0)
 			break;
 
-		t = btf__type_by_id(d->btf, id);
 		switch (btf_kind(t)) {
 		case BTF_KIND_PTR:
 		case BTF_KIND_VOLATILE:
-- 
cgit 


From 0b20933d8cfe2bf6473e9b581b5d1ed9a2117ecc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 13 Jul 2020 16:24:09 -0700
Subject: tools/bpftool: Strip away modifiers from global variables

Reliably remove all the type modifiers from read-only (.rodata) global
variable definitions, including cases of inner field const modifiers and
arrays of const values.

Also modify one of selftests to ensure that const volatile struct doesn't
prevent user-space from modifying .rodata variable.

Fixes: 985ead416df3 ("bpftool: Add skeleton codegen command")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200713232409.3062144-3-andriin@fb.com
---
 tools/bpf/bpftool/gen.c                           | 23 ++++++++++-------------
 tools/lib/bpf/btf.h                               |  2 +-
 tools/testing/selftests/bpf/prog_tests/skeleton.c |  6 +++---
 tools/testing/selftests/bpf/progs/test_skeleton.c |  6 ++++--
 4 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 10de76b296ba..b59d26e89367 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -88,7 +88,7 @@ static const char *get_map_ident(const struct bpf_map *map)
 		return NULL;
 }
 
-static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args)
+static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args)
 {
 	vprintf(fmt, args);
 }
@@ -104,17 +104,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
 	int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec);
 	const char *sec_ident;
 	char var_ident[256];
+	bool strip_mods = false;
 
-	if (strcmp(sec_name, ".data") == 0)
+	if (strcmp(sec_name, ".data") == 0) {
 		sec_ident = "data";
-	else if (strcmp(sec_name, ".bss") == 0)
+	} else if (strcmp(sec_name, ".bss") == 0) {
 		sec_ident = "bss";
-	else if (strcmp(sec_name, ".rodata") == 0)
+	} else if (strcmp(sec_name, ".rodata") == 0) {
 		sec_ident = "rodata";
-	else if (strcmp(sec_name, ".kconfig") == 0)
+		strip_mods = true;
+	} else if (strcmp(sec_name, ".kconfig") == 0) {
 		sec_ident = "kconfig";
-	else
+	} else {
 		return 0;
+	}
 
 	printf("	struct %s__%s {\n", obj_name, sec_ident);
 	for (i = 0; i < vlen; i++, sec_var++) {
@@ -123,16 +126,10 @@ static int codegen_datasec_def(struct bpf_object *obj,
 		DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
 			.field_name = var_ident,
 			.indent_level = 2,
+			.strip_mods = strip_mods,
 		);
 		int need_off = sec_var->offset, align_off, align;
 		__u32 var_type_id = var->type;
-		const struct btf_type *t;
-
-		t = btf__type_by_id(btf, var_type_id);
-		while (btf_is_mod(t)) {
-			var_type_id = t->type;
-			t = btf__type_by_id(btf, var_type_id);
-		}
 
 		if (off > need_off) {
 			p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 227d3f844801..491c7b41ffdc 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -147,7 +147,7 @@ struct btf_dump_emit_type_decl_opts {
 	/* strip all the const/volatile/restrict mods */
 	bool strip_mods;
 };
-#define btf_dump_emit_type_decl_opts__last_field indent_level
+#define btf_dump_emit_type_decl_opts__last_field strip_mods
 
 LIBBPF_API int
 btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fa153cf67b1b..fe87b77af459 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -41,7 +41,7 @@ void test_skeleton(void)
 	CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL);
 	CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL);
 
-	CHECK(rodata->in6 != 0, "in6", "got %d != exp %d\n", rodata->in6, 0);
+	CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
 	CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
 
 	/* validate we can pre-setup global variables, even in .bss */
@@ -49,7 +49,7 @@ void test_skeleton(void)
 	data->in2 = 11;
 	bss->in3 = 12;
 	bss->in4 = 13;
-	rodata->in6 = 14;
+	rodata->in.in6 = 14;
 
 	err = test_skeleton__load(skel);
 	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
@@ -60,7 +60,7 @@ void test_skeleton(void)
 	CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL);
 	CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12);
 	CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
-	CHECK(rodata->in6 != 14, "in6", "got %d != exp %d\n", rodata->in6, 14);
+	CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
 
 	/* now set new values and attach to get them into outX variables */
 	data->in1 = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 77ae86f44db5..374ccef704e1 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -20,7 +20,9 @@ long long in4 __attribute__((aligned(64))) = 0;
 struct s in5 = {};
 
 /* .rodata section */
-const volatile int in6 = 0;
+const volatile struct {
+	const int in6;
+} in = {};
 
 /* .data section */
 int out1 = -1;
@@ -46,7 +48,7 @@ int handler(const void *ctx)
 	out3 = in3;
 	out4 = in4;
 	out5 = in5;
-	out6 = in6;
+	out6 = in.in6;
 
 	bpf_syscall = CONFIG_BPF_SYSCALL;
 	kern_ver = LINUX_KERNEL_VERSION;
-- 
cgit 


From 1add92121e39ee5e4af21562eb59bd562c8033e3 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Sat, 11 Jul 2020 00:55:15 +0300
Subject: selftests: mlxsw: RED: Test offload of mirror on RED early_drop
 qevent

Add a selftest for offloading a mirror action attached to the block
associated with RED early_drop qevent.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/sch_red_core.sh    | 106 ++++++++++++++++++++-
 .../selftests/drivers/net/mlxsw/sch_red_ets.sh     |  11 +++
 .../selftests/drivers/net/mlxsw/sch_red_root.sh    |   8 ++
 3 files changed, 122 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 0d347d48c112..45042105ead7 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -121,6 +121,7 @@ h1_destroy()
 h2_create()
 {
 	host_create $h2 2
+	tc qdisc add dev $h2 clsact
 
 	# Some of the tests in this suite use multicast traffic. As this traffic
 	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
@@ -141,6 +142,7 @@ h2_create()
 h2_destroy()
 {
 	ethtool -s $h2 autoneg on
+	tc qdisc del dev $h2 clsact
 	host_destroy $h2
 }
 
@@ -336,6 +338,17 @@ get_qdisc_npackets()
 		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
 }
 
+send_packets()
+{
+	local vlan=$1; shift
+	local proto=$1; shift
+	local pkts=$1; shift
+
+	$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+	    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+	    -t $proto -q -c $pkts "$@"
+}
+
 # This sends traffic in an attempt to build a backlog of $size. Returns 0 on
 # success. After 10 failed attempts it bails out and returns 1. It dumps the
 # backlog size to stdout.
@@ -364,9 +377,7 @@ build_backlog()
 			return 1
 		fi
 
-		$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
-		    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
-		    -t $proto -q -c $pkts "$@"
+		send_packets $vlan $proto $pkts "$@"
 	done
 }
 
@@ -531,3 +542,92 @@ do_mc_backlog_test()
 
 	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
 }
+
+do_drop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trigger=$1; shift
+	local subtest=$1; shift
+	local fetch_counter=$1; shift
+	local backlog
+	local base
+	local now
+	local pct
+
+	RET=0
+
+	start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+
+	# Create a bit of a backlog and observe no mirroring due to drops.
+	qevent_rule_install_$subtest
+	base=$($fetch_counter)
+
+	build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+	busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed without buffer pressure"
+
+	qevent_rule_uninstall_$subtest
+
+	# Push to the queue until it's at the limit. The configured limit is
+	# rounded by the qdisc and then by the driver, so this is the best we
+	# can do to get to the real limit of the system. Do this with the rules
+	# uninstalled so that the inevitable drops don't get counted.
+	build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+	qevent_rule_install_$subtest
+	base=$($fetch_counter)
+
+	send_packets $vlan udp 11
+
+	now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
+	check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+
+	# When no extra traffic is injected, there should be no mirroring.
+	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed"
+
+	# When the rule is uninstalled, there should be no mirroring.
+	qevent_rule_uninstall_$subtest
+	send_packets $vlan udp 11
+	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed after uninstall"
+
+	log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+
+	stop_traffic
+	sleep 1
+}
+
+qevent_rule_install_mirror()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+	tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local qevent_name=$1; shift
+
+	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+	   flower skip_sw ip_proto udp \
+	   action drop
+
+	do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+		     qevent_counter_fetch_mirror
+
+	tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1c36c576613b..c8968b041bea 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
 	ecn_nodrop_test
 	red_test
 	mc_backlog_test
+	red_mirror_test
 "
 : ${QDISC:=ets}
 source sch_red_core.sh
@@ -83,6 +84,16 @@ mc_backlog_test()
 	uninstall_qdisc
 }
 
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+
+	do_drop_mirror_test 10 $BACKLOG1 early_drop
+	do_drop_mirror_test 11 $BACKLOG2 early_drop
+
+	uninstall_qdisc
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 558667ea11ec..ede9c38d3eff 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
 	ecn_nodrop_test
 	red_test
 	mc_backlog_test
+	red_mirror_test
 "
 source sch_red_core.sh
 
@@ -57,6 +58,13 @@ mc_backlog_test()
 	uninstall_qdisc
 }
 
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+	do_drop_mirror_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit 


From f7d40ee7eff8fde2b0701b80c2aa3ba354f8f520 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 13 Jul 2020 20:55:48 -0700
Subject: selftests/bpf: Fix merge conflict resolution

Remove double definition of structs.

Fixes: 71930d61025e ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index b9c2756c5c97..7de98a68599a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -7,20 +7,6 @@
 
 char _license[] SEC("license") = "GPL";
 
-#define sk_rmem_alloc	sk_backlog.rmem_alloc
-#define sk_refcnt	__sk_common.skc_refcnt
-
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
-	struct bpf_iter_meta *meta;
-	struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
 static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
-- 
cgit 


From 11bb2f7a45909f4f64afe471875672ae1b84a380 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 14 Jul 2020 12:25:34 +0200
Subject: bpf: Fix cross build for CONFIG_DEBUG_INFO_BTF option

Stephen and 0-DAY CI Kernel Test Service reported broken cross build
for arm (arm-linux-gnueabi-gcc (GCC) 9.3.0), with following output:

   /tmp/ccMS5uth.s: Assembler messages:
   /tmp/ccMS5uth.s:69: Error: unrecognized symbol type ""
   /tmp/ccMS5uth.s:82: Error: unrecognized symbol type ""

Having '@object' for .type  diretive is  wrong because '@' is comment
character for some architectures. Using STT_OBJECT instead that should
work everywhere.

Also using HOST* variables to build resolve_btfids so it's properly
build in crossbuilds (stolen from objtool's Makefile).

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/bpf/20200714102534.299280-2-jolsa@kernel.org
---
 tools/bpf/resolve_btfids/Makefile | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 948378ca73d4..a88cd4426398 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -16,6 +16,20 @@ else
   MAKEFLAGS=--no-print-directory
 endif
 
+# always use the host compiler
+ifneq ($(LLVM),)
+HOSTAR  ?= llvm-ar
+HOSTCC  ?= clang
+HOSTLD  ?= ld.lld
+else
+HOSTAR  ?= ar
+HOSTCC  ?= gcc
+HOSTLD  ?= ld
+endif
+AR       = $(HOSTAR)
+CC       = $(HOSTCC)
+LD       = $(HOSTLD)
+
 OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
 
 LIBBPF_SRC := $(srctree)/tools/lib/bpf/
-- 
cgit 


From ffb3adba64801f70c472303c9e386eb5eaec193d Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 14 Jul 2020 09:34:58 +0200
Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN

This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which
allows to notify the userspace when the node lost the contiuity of
MRP_InTest frames.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index cafedbbfefbe..781e482dc499 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -344,6 +344,7 @@ enum {
 	IFLA_BRPORT_ISOLATED,
 	IFLA_BRPORT_BACKUP_PORT,
 	IFLA_BRPORT_MRP_RING_OPEN,
+	IFLA_BRPORT_MRP_IN_OPEN,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
-- 
cgit 


From afe231d32eb5fa4f4975596958c48c3a77fa1773 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 15 Jul 2020 11:27:30 +0300
Subject: selftests: forwarding: Add tc-police tests

Test tc-police action in various scenarios such as Rx policing, Tx
policing, shared policer and police piped to mirred. The test passes
with both veth pairs and loopbacked ports.

# ./tc_police.sh
TEST: police on rx                                                  [ OK ]
TEST: police on tx                                                  [ OK ]
TEST: police with shared policer - rx                               [ OK ]
TEST: police with shared policer - tx                               [ OK ]
TEST: police rx and mirror                                          [ OK ]
TEST: police tx and mirror                                          [ OK ]

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/tc_police.sh  | 333 +++++++++++++++++++++
 1 file changed, 333 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_police.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
new file mode 100755
index 000000000000..160f9cccdfb7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -0,0 +1,333 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test tc-police action.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |                                                                           |
+# |        198.51.100.2/24                           203.0.113.2/24           |
+# |    + $rp2                                    + $rp3                       |
+# |    |                                         |                            |
+# +----|-----------------------------------------|----------------------------+
+#      |                                         |
+# +----|----------------------------+       +----|----------------------------+
+# |    |  default via 198.51.100.2  |       |    |  default via 203.0.113.2   |
+# |    |                            |       |    |                            |
+# |    | 198.51.100.1/24            |       |    | 203.0.113.1/24             |
+# |    + $h2                        |       |    + $h3                        |
+# | H2 (vrf)                        |       | H3 (vrf)                        |
+# +---------------------------------+       +---------------------------------+
+
+ALL_TESTS="
+	police_rx_test
+	police_tx_test
+	police_shared_test
+	police_rx_mirror_test
+	police_tx_mirror_test
+"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24
+}
+
+h3_create()
+{
+	simple_if_init $h3 203.0.113.1/24
+
+	ip -4 route add default vrf v$h3 nexthop via 203.0.113.2
+
+	tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+
+	ip -4 route del default vrf v$h3 nexthop via 203.0.113.2
+
+	simple_if_fini $h3 203.0.113.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24
+	__addr_add_del $rp2 add 198.51.100.2/24
+	__addr_add_del $rp3 add 203.0.113.2/24
+
+	tc qdisc add dev $rp1 clsact
+	tc qdisc add dev $rp2 clsact
+}
+
+router_destroy()
+{
+	tc qdisc del dev $rp2 clsact
+	tc qdisc del dev $rp1 clsact
+
+	__addr_add_del $rp3 del 203.0.113.2/24
+	__addr_add_del $rp2 del 198.51.100.2/24
+	__addr_add_del $rp1 del 192.0.2.2/24
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+police_common_test()
+{
+	local test_name=$1; shift
+
+	RET=0
+
+	# Rule to measure bandwidth on ingress of $h2
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+	local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+	local er=$((80 * 1000 * 1000))
+	local nr=$(rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	log_test "$test_name"
+
+	{ kill %% && wait %%; } 2>/dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_test()
+{
+	# Rule to police traffic destined to $h2 on ingress of $rp1
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police rate 80mbit burst 16k conform-exceed drop/ok
+
+	police_common_test "police on rx"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_tx_test()
+{
+	# Rule to police traffic destined to $h2 on egress of $rp2
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police rate 80mbit burst 16k conform-exceed drop/ok
+
+	police_common_test "police on tx"
+
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_shared_common_test()
+{
+	local dport=$1; shift
+	local test_name=$1; shift
+
+	RET=0
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=$dport -p 1000 -c 0 -q &
+
+	local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+	local er=$((80 * 1000 * 1000))
+	local nr=$(rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	log_test "$test_name"
+
+	{ kill %% && wait %%; } 2>/dev/null
+}
+
+police_shared_test()
+{
+	# Rule to measure bandwidth on ingress of $h2
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp src_port 12345 \
+		action drop
+
+	# Rule to police traffic destined to $h2 on ingress of $rp1
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police rate 80mbit burst 16k conform-exceed drop/ok \
+		index 10
+
+	# Rule to police a different flow destined to $h2 on egress of $rp2
+	# using same policer
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 22222 \
+		action police index 10
+
+	police_shared_common_test 54321 "police with shared policer - rx"
+
+	police_shared_common_test 22222 "police with shared policer - tx"
+
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_mirror_common_test()
+{
+	local pol_if=$1; shift
+	local dir=$1; shift
+	local test_name=$1; shift
+
+	RET=0
+
+	# Rule to measure bandwidth on ingress of $h2
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	# Rule to measure bandwidth of mirrored traffic on ingress of $h3
+	tc filter add dev $h3 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	# Rule to police traffic destined to $h2 and mirror to $h3
+	tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police rate 80mbit burst 16k conform-exceed drop/pipe \
+		action mirred egress mirror dev $rp3
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+	local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+	local er=$((80 * 1000 * 1000))
+	local nr=$(rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	local t0=$(tc_rule_stats_get $h3 1 ingress .bytes)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
+
+	local er=$((80 * 1000 * 1000))
+	local nr=$(rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	log_test "$test_name"
+
+	{ kill %% && wait %%; } 2>/dev/null
+	tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower
+	tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_mirror_test()
+{
+	police_mirror_common_test $rp1 ingress "police rx and mirror"
+}
+
+police_tx_mirror_test()
+{
+	police_mirror_common_test $rp2 egress "police tx and mirror"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit 


From cb12d176326722e2f4ede4e459e86ea36607c555 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 15 Jul 2020 11:27:31 +0300
Subject: selftests: mlxsw: tc_restrictions: Test tc-police restrictions

Test that upper and lower limits on rate and burst size imposed by the
device are rejected by the kernel.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/tc_restrictions.sh | 76 ++++++++++++++++++++++
 1 file changed, 76 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 9241250c5921..553cb9fad508 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,8 @@ ALL_TESTS="
 	matchall_mirror_behind_flower_ingress_test
 	matchall_sample_behind_flower_ingress_test
 	matchall_mirror_behind_flower_egress_test
+	police_limits_test
+	multi_police_test
 "
 NUM_NETIFS=2
 
@@ -287,6 +289,80 @@ matchall_mirror_behind_flower_egress_test()
 	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
 }
 
+police_limits_test()
+{
+	RET=0
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 0.5kbit burst 1m conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 2.5tbit burst 1g conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too high rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 1m conform-exceed drop/ok
+	check_err $? "Failed to add police action with low rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.9tbit burst 1g conform-exceed drop/ok
+	check_err $? "Failed to add police action with high rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 512b conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low burst size"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 2k conform-exceed drop/ok
+	check_err $? "Failed to add police action with low burst size"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple police
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	check_err $? "Failed to add rule with single police action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/pipe \
+		action police rate 200mbit burst 200k conform-exceed drop/ok
+	check_fail $? "Incorrect success to add rule with two police actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi police"
+}
+
 setup_prepare()
 {
 	swp1=${NETIFS[p1]}
-- 
cgit 


From 5061e773264b11fdc51c61d6619aca76efd3424f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 15 Jul 2020 11:27:32 +0300
Subject: selftests: mlxsw: Add scale test for tc-police

Query the maximum number of supported policers using devlink-resource
and test that this number can be reached by configuring tc filters with
police action. Test that an error is returned in case the maximum number
is exceeded.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/spectrum-2/resource_scale.sh |  2 +-
 .../net/mlxsw/spectrum-2/tc_police_scale.sh        | 16 ++++
 .../drivers/net/mlxsw/spectrum/resource_scale.sh   |  2 +-
 .../drivers/net/mlxsw/spectrum/tc_police_scale.sh  | 16 ++++
 .../selftests/drivers/net/mlxsw/tc_police_scale.sh | 92 ++++++++++++++++++++++
 5 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
 create mode 100644 tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
 create mode 100644 tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index fd583a171db7..d7cf33a3f18d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -28,7 +28,7 @@ cleanup()
 
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	source ${current_test}_scale.sh
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 43ba1b438f6d..43f662401bc3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,7 @@ cleanup()
 devlink_sp_read_kvd_defaults
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="router tc_flower mirror_gre tc_police"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	source ${current_test}_scale.sh
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000000..4b96561c462f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+	simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+tc_police_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_POLICE_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_POLICE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress \
+				prot ip \
+				flower skip_sw \
+				action police rate 10mbit burst 100k \
+				conform-exceed drop/ok
+		EOF
+	done
+
+	tc -b $TC_POLICE_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	tc_police_rules_create $count $should_fail
+
+	offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l)
+	((offload_count == count))
+	check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	__tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_police_h1_create
+	tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+	pre_cleanup
+
+	tc_police_switch_destroy
+	tc_police_h1_destroy
+
+	vrf_cleanup
+}
-- 
cgit 


From 46b171d7d73aedfae8070622f1798ac4b124284c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 15 Jul 2020 11:27:33 +0300
Subject: selftests: mlxsw: Test policers' occupancy

Test that policers shared by different tc filters are correctly
reference counted by observing policers' occupancy via devlink-resource.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/tc_police_occ.sh   | 108 +++++++++++++++++++++
 .../selftests/net/forwarding/devlink_lib.sh        |   5 +
 2 files changed, 113 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000000..448b75c1545a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+	devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+	RET=0
+
+	local occ=$(tc_police_occ_get)
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok \
+		index 10
+	tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+		flower skip_sw action police index 10
+
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 2 handle 102 flower
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f0e6be4c09e9..75fe24bcb9cd 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -98,6 +98,11 @@ devlink_resource_size_set()
 	check_err $? "Failed setting path $path to size $size"
 }
 
+devlink_resource_occ_get()
+{
+	devlink_resource_get "$@" | jq '.["occ"]'
+}
+
 devlink_reload()
 {
 	local still_pending
-- 
cgit 


From 644bfe51fa49c22244d24e896cd3fe3ee2f2cfd1 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 14 Jul 2020 15:56:37 +0200
Subject: cpumap: Formalize map value as a named struct

As it has been already done for devmap, introduce 'struct bpf_cpumap_val'
to formalize the expected values that can be passed in for a CPUMAP.
Update cpumap code to use the struct.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/754f950674665dae6139c061d28c1d982aaf4170.1594734381.git.lorenzo@kernel.org
---
 tools/include/uapi/linux/bpf.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 5e386389913a..109623527358 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3849,6 +3849,15 @@ struct bpf_devmap_val {
 	} bpf_prog;
 };
 
+/* CPUMAP map-value layout
+ *
+ * The struct data-layout of map-value is a configuration interface.
+ * New members can only be added to the end of this structure.
+ */
+struct bpf_cpumap_val {
+	__u32 qsize;	/* queue size to remote target CPU */
+};
+
 enum sk_action {
 	SK_DROP = 0,
 	SK_PASS,
-- 
cgit 


From 9216477449f33cdbc9c9a99d49f500b7fbb81702 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 14 Jul 2020 15:56:38 +0200
Subject: bpf: cpumap: Add the possibility to attach an eBPF program to cpumap

Introduce the capability to attach an eBPF program to cpumap entries.
The idea behind this feature is to add the possibility to define on
which CPU run the eBPF program if the underlying hw does not support
RSS. Current supported verdicts are XDP_DROP and XDP_PASS.

This patch has been tested on Marvell ESPRESSObin using xdp_redirect_cpu
sample available in the kernel tree to identify possible performance
regressions. Results show there are no observable differences in
packet-per-second:

$./xdp_redirect_cpu --progname xdp_cpu_map0 --dev eth0 --cpu 1
rx: 354.8 Kpps
rx: 356.0 Kpps
rx: 356.8 Kpps
rx: 356.3 Kpps
rx: 356.6 Kpps
rx: 356.6 Kpps
rx: 356.7 Kpps
rx: 355.8 Kpps
rx: 356.8 Kpps
rx: 356.8 Kpps

Co-developed-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/5c9febdf903d810b3415732e5cd98491d7d9067a.1594734381.git.lorenzo@kernel.org
---
 tools/include/uapi/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 109623527358..c010b57fce3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -227,6 +227,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_GETSOCKNAME,
 	BPF_XDP_DEVMAP,
 	BPF_CGROUP_INET_SOCK_RELEASE,
+	BPF_XDP_CPUMAP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -3856,6 +3857,10 @@ struct bpf_devmap_val {
  */
 struct bpf_cpumap_val {
 	__u32 qsize;	/* queue size to remote target CPU */
+	union {
+		int   fd;	/* prog fd on map write */
+		__u32 id;	/* prog id on map read */
+	} bpf_prog;
 };
 
 enum sk_action {
-- 
cgit 


From 4be556cf5aef416904743f2cee689351f91445b6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 14 Jul 2020 15:56:40 +0200
Subject: libbpf: Add SEC name for xdp programs attached to CPUMAP

As for DEVMAP, support SEC("xdp_cpumap/") as a short cut for loading
the program with type BPF_PROG_TYPE_XDP and expected attach type
BPF_XDP_CPUMAP.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/33174c41993a6d860d9c7c1f280a2477ee39ed11.1594734381.git.lorenzo@kernel.org
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4489f95f1d1a..f55fd8a5c008 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6912,6 +6912,8 @@ static const struct bpf_sec_def section_defs[] = {
 		.attach_fn = attach_iter),
 	BPF_EAPROG_SEC("xdp_devmap/",		BPF_PROG_TYPE_XDP,
 						BPF_XDP_DEVMAP),
+	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,
+						BPF_XDP_CPUMAP),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
-- 
cgit 


From 05500125021191f703d3759a7e94a37b7257d959 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 14 Jul 2020 15:56:42 +0200
Subject: selftest: Add tests for XDP programs in CPUMAP entries

Similar to what have been done for DEVMAP, introduce tests to verify
ability to add a XDP program to an entry in a CPUMAP.
Verify CPUMAP programs can not be attached to devices as a normal
XDP program, and only programs with BPF_XDP_CPUMAP attach type can
be loaded in a CPUMAP.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/9c632fcea5382ea7b4578bd06b6eddf382c3550b.1594734381.git.lorenzo@kernel.org
---
 .../selftests/bpf/prog_tests/xdp_cpumap_attach.c   | 70 ++++++++++++++++++++++
 .../bpf/progs/test_xdp_with_cpumap_helpers.c       | 36 +++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
new file mode 100644
index 000000000000..0176573fe4e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO	1
+
+void test_xdp_with_cpumap_helpers(void)
+{
+	struct test_xdp_with_cpumap_helpers *skel;
+	struct bpf_prog_info info = {};
+	struct bpf_cpumap_val val = {
+		.qsize = 192,
+	};
+	__u32 duration = 0, idx = 0;
+	__u32 len = sizeof(info);
+	int err, prog_fd, map_fd;
+
+	skel = test_xdp_with_cpumap_helpers__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_xdp_with_cpumap_helpers__open_and_load");
+		return;
+	}
+
+	/* can not attach program with cpumaps that allow programs
+	 * as xdp generic
+	 */
+	prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
+	      "should have failed\n");
+
+	prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+	map_fd = bpf_map__fd(skel->maps.cpu_map);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_close;
+
+	val.bpf_prog.fd = prog_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
+	      err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &idx, &val);
+	CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
+	CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
+	      "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+	/* can not attach BPF_XDP_CPUMAP program to a device */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
+	      "should have failed\n");
+
+	val.qsize = 192;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
+	      "should have failed\n");
+
+out_close:
+	test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+void test_xdp_cpumap_attach(void)
+{
+	if (test__start_subtest("cpumap_with_progs"))
+		test_xdp_with_cpumap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
new file mode 100644
index 000000000000..59ee4f182ff8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO	1
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_cpumap_val));
+	__uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+SEC("xdp_cpumap/dummy_cm")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+	if (ctx->ingress_ifindex == IFINDEX_LO)
+		return XDP_DROP;
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From e81e7a533742c30615f8b15390df6525cef96778 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 15 Jul 2020 15:41:07 -0700
Subject: selftests/bpf: Fix possible hang in sockopt_inherit

Andrii reported that sockopt_inherit occasionally hangs up on 5.5 kernel [0].
This can happen if server_thread runs faster than the main thread.
In that case, pthread_cond_wait will wait forever because
pthread_cond_signal was executed before the main thread was blocking.
Let's move pthread_mutex_lock up a bit to make sure server_thread
runs strictly after the main thread goes to sleep.

(Not sure why this is 5.5 specific, maybe scheduling is less
deterministic? But I was able to confirm that it does indeed
happen in a VM.)

[0] https://lore.kernel.org/bpf/CAEf4BzY0-bVNHmCkMFPgObs=isUAyg-dFzGDY7QWYkmm7rmTSg@mail.gmail.com/

Reported-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200715224107.3591967-1-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8547ecbdc61f..ec281b0363b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -193,11 +193,10 @@ static void run_test(int cgroup_fd)
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_bpf_object;
 
+	pthread_mutex_lock(&server_started_mtx);
 	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
 				      (void *)&server_fd)))
 		goto close_server_fd;
-
-	pthread_mutex_lock(&server_started_mtx);
 	pthread_cond_wait(&server_started, &server_started_mtx);
 	pthread_mutex_unlock(&server_started_mtx);
 
-- 
cgit 


From bfdfa51702dec67e9fcd52568b4cf3c7f799db8b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 15 Jul 2020 18:29:11 -0700
Subject: bpf: Drop duplicated words in uapi helper comments

Drop doubled words "will" and "attach".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/6b9f71ae-4f8e-0259-2c5d-187ddaefe6eb@infradead.org
---
 tools/include/uapi/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c010b57fce3f..7ac3992dacfe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2420,7 +2420,7 @@ union bpf_attr {
  *			Look for an IPv6 socket.
  *
  *		If the *netns* is a negative signed 32-bit integer, then the
- *		socket lookup table in the netns associated with the *ctx* will
+ *		socket lookup table in the netns associated with the *ctx*
  *		will be used. For the TC hooks, this is the netns of the device
  *		in the skb. For socket hooks, this is the netns of the socket.
  *		If *netns* is any other signed 32-bit value greater than or
@@ -2457,7 +2457,7 @@ union bpf_attr {
  *			Look for an IPv6 socket.
  *
  *		If the *netns* is a negative signed 32-bit integer, then the
- *		socket lookup table in the netns associated with the *ctx* will
+ *		socket lookup table in the netns associated with the *ctx*
  *		will be used. For the TC hooks, this is the netns of the device
  *		in the skb. For socket hooks, this is the netns of the socket.
  *		If *netns* is any other signed 32-bit value greater than or
@@ -4000,7 +4000,7 @@ struct bpf_link_info {
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
  * by user and intended to be used by socket (e.g. to bind to, depends on
- * attach attach type).
+ * attach type).
  */
 struct bpf_sock_addr {
 	__u32 user_family;	/* Allows 4-byte read, but no write. */
-- 
cgit 


From a352b32ae969788b706f666f764702cd0ab4a40a Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Fri, 17 Jul 2020 12:35:32 +0200
Subject: bpf: Sync linux/bpf.h to tools/

Newly added program, context type and helper is used by tests in a
subsequent patch. Synchronize the header file.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200717103536.397595-12-jakub@cloudflare.com
---
 tools/include/uapi/linux/bpf.h | 77 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7ac3992dacfe..54d0c886e3ba 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -189,6 +189,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_STRUCT_OPS,
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
+	BPF_PROG_TYPE_SK_LOOKUP,
 };
 
 enum bpf_attach_type {
@@ -228,6 +229,7 @@ enum bpf_attach_type {
 	BPF_XDP_DEVMAP,
 	BPF_CGROUP_INET_SOCK_RELEASE,
 	BPF_XDP_CPUMAP,
+	BPF_SK_LOOKUP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -3069,6 +3071,10 @@ union bpf_attr {
  *
  * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
  *	Description
+ *		Helper is overloaded depending on BPF program type. This
+ *		description applies to **BPF_PROG_TYPE_SCHED_CLS** and
+ *		**BPF_PROG_TYPE_SCHED_ACT** programs.
+ *
  *		Assign the *sk* to the *skb*. When combined with appropriate
  *		routing configuration to receive the packet towards the socket,
  *		will cause *skb* to be delivered to the specified socket.
@@ -3094,6 +3100,56 @@ union bpf_attr {
  *		**-ESOCKTNOSUPPORT** if the socket type is not supported
  *		(reuseport).
  *
+ * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
+ *	Description
+ *		Helper is overloaded depending on BPF program type. This
+ *		description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs.
+ *
+ *		Select the *sk* as a result of a socket lookup.
+ *
+ *		For the operation to succeed passed socket must be compatible
+ *		with the packet description provided by the *ctx* object.
+ *
+ *		L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must
+ *		be an exact match. While IP family (**AF_INET** or
+ *		**AF_INET6**) must be compatible, that is IPv6 sockets
+ *		that are not v6-only can be selected for IPv4 packets.
+ *
+ *		Only TCP listeners and UDP unconnected sockets can be
+ *		selected. *sk* can also be NULL to reset any previous
+ *		selection.
+ *
+ *		*flags* argument can combination of following values:
+ *
+ *		* **BPF_SK_LOOKUP_F_REPLACE** to override the previous
+ *		  socket selection, potentially done by a BPF program
+ *		  that ran before us.
+ *
+ *		* **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip
+ *		  load-balancing within reuseport group for the socket
+ *		  being selected.
+ *
+ *		On success *ctx->sk* will point to the selected socket.
+ *
+ *	Return
+ *		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EAFNOSUPPORT** if socket family (*sk->family*) is
+ *		  not compatible with packet family (*ctx->family*).
+ *
+ *		* **-EEXIST** if socket has been already selected,
+ *		  potentially by another program, and
+ *		  **BPF_SK_LOOKUP_F_REPLACE** flag was not specified.
+ *
+ *		* **-EINVAL** if unsupported flags were specified.
+ *
+ *		* **-EPROTOTYPE** if socket L4 protocol
+ *		  (*sk->protocol*) doesn't match packet protocol
+ *		  (*ctx->protocol*).
+ *
+ *		* **-ESOCKTNOSUPPORT** if socket is not in allowed
+ *		  state (TCP listening or UDP unconnected).
+ *
  * u64 bpf_ktime_get_boot_ns(void)
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
@@ -3607,6 +3663,12 @@ enum {
 	BPF_RINGBUF_HDR_SZ		= 8,
 };
 
+/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */
+enum {
+	BPF_SK_LOOKUP_F_REPLACE		= (1ULL << 0),
+	BPF_SK_LOOKUP_F_NO_REUSEPORT	= (1ULL << 1),
+};
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
@@ -4349,4 +4411,19 @@ struct bpf_pidns_info {
 	__u32 pid;
 	__u32 tgid;
 };
+
+/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+struct bpf_sk_lookup {
+	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+
+	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
+	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+	__u32 remote_ip4;	/* Network byte order */
+	__u32 remote_ip6[4];	/* Network byte order */
+	__u32 remote_port;	/* Network byte order */
+	__u32 local_ip4;	/* Network byte order */
+	__u32 local_ip6[4];	/* Network byte order */
+	__u32 local_port;	/* Host byte order */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit 


From 499dd29d90bb78d4ac4f0d6648e26f4a339829b1 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Fri, 17 Jul 2020 12:35:33 +0200
Subject: libbpf: Add support for SK_LOOKUP program type

Make libbpf aware of the newly added program type, and assign it a
section name.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200717103536.397595-13-jakub@cloudflare.com
---
 tools/lib/bpf/libbpf.c        | 3 +++
 tools/lib/bpf/libbpf.h        | 2 ++
 tools/lib/bpf/libbpf.map      | 2 ++
 tools/lib/bpf/libbpf_probes.c | 3 +++
 4 files changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f55fd8a5c008..846164c79df1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6799,6 +6799,7 @@ BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
 BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
 BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
+BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
 
 enum bpf_attach_type
 bpf_program__get_expected_attach_type(struct bpf_program *prog)
@@ -6981,6 +6982,8 @@ static const struct bpf_sec_def section_defs[] = {
 	BPF_EAPROG_SEC("cgroup/setsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
 						BPF_CGROUP_SETSOCKOPT),
 	BPF_PROG_SEC("struct_ops",		BPF_PROG_TYPE_STRUCT_OPS),
+	BPF_EAPROG_SEC("sk_lookup/",		BPF_PROG_TYPE_SK_LOOKUP,
+						BPF_SK_LOOKUP),
 };
 
 #undef BPF_PROG_SEC_IMPL
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2335971ed0bd..c2272132e929 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -350,6 +350,7 @@ LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
 
 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
@@ -377,6 +378,7 @@ LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c5d5c7664c3b..6f0856abe299 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -287,6 +287,8 @@ LIBBPF_0.1.0 {
 		bpf_map__type;
 		bpf_map__value_size;
 		bpf_program__autoload;
+		bpf_program__is_sk_lookup;
 		bpf_program__set_autoload;
+		bpf_program__set_sk_lookup;
 		btf__set_fd;
 } LIBBPF_0.0.9;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 10cd8d1891f5..5a3d3f078408 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -78,6 +78,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
 		break;
+	case BPF_PROG_TYPE_SK_LOOKUP:
+		xattr.expected_attach_type = BPF_SK_LOOKUP;
+		break;
 	case BPF_PROG_TYPE_KPROBE:
 		xattr.kern_version = get_kernel_version();
 		break;
-- 
cgit 


From 93a3545d812ae7cfe4426374e00a7d8f64ac02e0 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Fri, 17 Jul 2020 12:35:34 +0200
Subject: tools/bpftool: Add name mappings for SK_LOOKUP prog and attach type

Make bpftool show human-friendly identifiers for newly introduced program
and attach type, BPF_PROG_TYPE_SK_LOOKUP and BPF_SK_LOOKUP, respectively.

Also, add the new prog type bash-completion, man page and help message.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717103536.397595-14-jakub@cloudflare.com
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +-
 tools/bpf/bpftool/bash-completion/bpftool        | 2 +-
 tools/bpf/bpftool/common.c                       | 1 +
 tools/bpf/bpftool/prog.c                         | 3 ++-
 4 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 412ea3d9bf7f..82e356b664e8 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -45,7 +45,7 @@ PROG COMMANDS
 |               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
 |		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
 |		**cgroup/getsockopt** | **cgroup/setsockopt** |
-|		**struct_ops** | **fentry** | **fexit** | **freplace**
+|		**struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |	}
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 25b25aca1112..7b137264ea3a 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -479,7 +479,7 @@ _bpftool()
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
                                 cgroup/setsockopt struct_ops \
-                                fentry fexit freplace" -- \
+                                fentry fexit freplace sk_lookup" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 29f4e7611ae8..9b28c69dd8e4 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -64,6 +64,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_TRACE_FEXIT]		= "fexit",
 	[BPF_MODIFY_RETURN]		= "mod_ret",
 	[BPF_LSM_MAC]			= "lsm_mac",
+	[BPF_SK_LOOKUP]			= "sk_lookup",
 };
 
 void p_err(const char *fmt, ...)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 6863c57effd0..3e6ecc6332e2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_TRACING]			= "tracing",
 	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_PROG_TYPE_EXT]			= "ext",
+	[BPF_PROG_TYPE_SK_LOOKUP]		= "sk_lookup",
 };
 
 const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
@@ -1905,7 +1906,7 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
 		"                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
 		"                 cgroup/getsockopt | cgroup/setsockopt |\n"
-		"                 struct_ops | fentry | fexit | freplace }\n"
+		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
 		"       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
-- 
cgit 


From f7726cbea402fc92a3c27226b761a6dbc0390cac Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Fri, 17 Jul 2020 12:35:35 +0200
Subject: selftests/bpf: Add verifier tests for bpf_sk_lookup context access

Exercise verifier access checks for bpf_sk_lookup context fields.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717103536.397595-15-jakub@cloudflare.com
---
 .../testing/selftests/bpf/verifier/ctx_sk_lookup.c | 492 +++++++++++++++++++++
 1 file changed, 492 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
new file mode 100644
index 000000000000..2ad5f974451c
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,492 @@
+{
+	"valid 1,2,4,8-byte reads from bpf_sk_lookup",
+	.insns = {
+		/* 1-byte read from family field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family) + 3),
+		/* 2-byte read from family field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family) + 2),
+		/* 4-byte read from family field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family)),
+
+		/* 1-byte read from protocol field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol) + 3),
+		/* 2-byte read from protocol field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol) + 2),
+		/* 4-byte read from protocol field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol)),
+
+		/* 1-byte read from remote_ip4 field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+		/* 2-byte read from remote_ip4 field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+		/* 4-byte read from remote_ip4 field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+		/* 1-byte read from remote_ip6 field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+		/* 2-byte read from remote_ip6 field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+		/* 4-byte read from remote_ip6 field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6)),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+		/* 1-byte read from remote_port field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port) + 3),
+		/* 2-byte read from remote_port field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port) + 2),
+		/* 4-byte read from remote_port field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port)),
+
+		/* 1-byte read from local_ip4 field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+		/* 2-byte read from local_ip4 field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+		/* 4-byte read from local_ip4 field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4)),
+
+		/* 1-byte read from local_ip6 field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+		/* 2-byte read from local_ip6 field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+		/* 4-byte read from local_ip6 field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6)),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+		/* 1-byte read from local_port field */
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port)),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port) + 1),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port) + 2),
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port) + 3),
+		/* 2-byte read from local_port field */
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port)),
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port) + 2),
+		/* 4-byte read from local_port field */
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port)),
+
+		/* 8-byte read from sk field */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, sk)),
+
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+	"invalid 8-byte read from bpf_sk_lookup family field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, family)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup protocol field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, protocol)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip4)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_ip6)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup remote_port field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, remote_port)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip4)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_ip6)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 8-byte read from bpf_sk_lookup local_port field",
+	.insns = {
+		BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, local_port)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+	"invalid 4-byte read from bpf_sk_lookup sk field",
+	.insns = {
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, sk)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 2-byte read from bpf_sk_lookup sk field",
+	.insns = {
+		BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, sk)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 1-byte read from bpf_sk_lookup sk field",
+	.insns = {
+		BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct bpf_sk_lookup, sk)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+	"invalid 4-byte read past end of bpf_sk_lookup",
+	.insns = {
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    sizeof(struct bpf_sk_lookup)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+	.insns = {
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+	.insns = {
+		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+	"invalid 8-byte write to bpf_sk_lookup",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+		BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 4-byte write to bpf_sk_lookup",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 2-byte write to bpf_sk_lookup",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+		BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 1-byte write to bpf_sk_lookup",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+		BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+	"invalid 4-byte write past end of bpf_sk_lookup",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+			    sizeof(struct bpf_sk_lookup)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.errstr = "invalid bpf_context access",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+},
-- 
cgit 


From 0ab5539f85840d3c4e5a8a4783901c0038f8321e Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Fri, 17 Jul 2020 12:35:36 +0200
Subject: selftests/bpf: Tests for BPF_SK_LOOKUP attach point

Add tests to test_progs that exercise:

 - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
 - redirecting socket lookup to a socket selected by BPF program,
 - failing a socket lookup on BPF program's request,
 - error scenarios for selecting a socket from BPF program,
 - accessing BPF program context,
 - attaching and running multiple BPF programs.

Run log:

  bash-5.0# ./test_progs -n 70
  #70/1 query lookup prog:OK
  #70/2 TCP IPv4 redir port:OK
  #70/3 TCP IPv4 redir addr:OK
  #70/4 TCP IPv4 redir with reuseport:OK
  #70/5 TCP IPv4 redir skip reuseport:OK
  #70/6 TCP IPv6 redir port:OK
  #70/7 TCP IPv6 redir addr:OK
  #70/8 TCP IPv4->IPv6 redir port:OK
  #70/9 TCP IPv6 redir with reuseport:OK
  #70/10 TCP IPv6 redir skip reuseport:OK
  #70/11 UDP IPv4 redir port:OK
  #70/12 UDP IPv4 redir addr:OK
  #70/13 UDP IPv4 redir with reuseport:OK
  #70/14 UDP IPv4 redir skip reuseport:OK
  #70/15 UDP IPv6 redir port:OK
  #70/16 UDP IPv6 redir addr:OK
  #70/17 UDP IPv4->IPv6 redir port:OK
  #70/18 UDP IPv6 redir and reuseport:OK
  #70/19 UDP IPv6 redir skip reuseport:OK
  #70/20 TCP IPv4 drop on lookup:OK
  #70/21 TCP IPv6 drop on lookup:OK
  #70/22 UDP IPv4 drop on lookup:OK
  #70/23 UDP IPv6 drop on lookup:OK
  #70/24 TCP IPv4 drop on reuseport:OK
  #70/25 TCP IPv6 drop on reuseport:OK
  #70/26 UDP IPv4 drop on reuseport:OK
  #70/27 TCP IPv6 drop on reuseport:OK
  #70/28 sk_assign returns EEXIST:OK
  #70/29 sk_assign honors F_REPLACE:OK
  #70/30 sk_assign accepts NULL socket:OK
  #70/31 access ctx->sk:OK
  #70/32 narrow access to ctx v4:OK
  #70/33 narrow access to ctx v6:OK
  #70/34 sk_assign rejects TCP established:OK
  #70/35 sk_assign rejects UDP connected:OK
  #70/36 multi prog - pass, pass:OK
  #70/37 multi prog - drop, drop:OK
  #70/38 multi prog - pass, drop:OK
  #70/39 multi prog - drop, pass:OK
  #70/40 multi prog - pass, redir:OK
  #70/41 multi prog - redir, pass:OK
  #70/42 multi prog - drop, redir:OK
  #70/43 multi prog - redir, drop:OK
  #70/44 multi prog - redir, redir:OK
  #70 sk_lookup:OK
  Summary: 1/44 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717103536.397595-16-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/network_helpers.c      |   58 +-
 tools/testing/selftests/bpf/network_helpers.h      |    2 +
 tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 1282 ++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_sk_lookup.c |  641 ++++++++++
 4 files changed, 1960 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/sk_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_lookup.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index acd08715be2e..f56655690f9b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -73,29 +73,8 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
 	socklen_t len;
 	int fd;
 
-	if (family == AF_INET) {
-		struct sockaddr_in *sin = (void *)&addr;
-
-		sin->sin_family = AF_INET;
-		sin->sin_port = htons(port);
-		if (addr_str &&
-		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
-			log_err("inet_pton(AF_INET, %s)", addr_str);
-			return -1;
-		}
-		len = sizeof(*sin);
-	} else {
-		struct sockaddr_in6 *sin6 = (void *)&addr;
-
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = htons(port);
-		if (addr_str &&
-		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
-			log_err("inet_pton(AF_INET6, %s)", addr_str);
-			return -1;
-		}
-		len = sizeof(*sin6);
-	}
+	if (make_sockaddr(family, addr_str, port, &addr, &len))
+		return -1;
 
 	fd = socket(family, type, 0);
 	if (fd < 0) {
@@ -194,3 +173,36 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
 
 	return 0;
 }
+
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+		  struct sockaddr_storage *addr, socklen_t *len)
+{
+	if (family == AF_INET) {
+		struct sockaddr_in *sin = (void *)addr;
+
+		sin->sin_family = AF_INET;
+		sin->sin_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+			log_err("inet_pton(AF_INET, %s)", addr_str);
+			return -1;
+		}
+		if (len)
+			*len = sizeof(*sin);
+		return 0;
+	} else if (family == AF_INET6) {
+		struct sockaddr_in6 *sin6 = (void *)addr;
+
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+			log_err("inet_pton(AF_INET6, %s)", addr_str);
+			return -1;
+		}
+		if (len)
+			*len = sizeof(*sin6);
+		return 0;
+	}
+	return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index f580e82fda58..c3728f6667e4 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,5 +37,7 @@ int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+		  struct sockaddr_storage *addr, socklen_t *len);
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
new file mode 100644
index 000000000000..f1784ae4565a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ *  - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ *  - redirecting socket lookup to a socket selected by BPF program,
+ *  - failing a socket lookup on BPF program's request,
+ *  - error scenarios for selecting a socket from BPF program,
+ *  - accessing BPF program context,
+ *  - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4		"127.0.0.1"
+#define EXT_IP6		"fd00::1"
+#define EXT_PORT	7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4		"127.0.0.2"
+#define INT_IP4_V6	"::ffff:127.0.0.2"
+#define INT_IP6		"fd00::2"
+#define INT_PORT	8008
+
+#define IO_TIMEOUT_SEC	3
+
+enum server {
+	SERVER_A = 0,
+	SERVER_B = 1,
+	MAX_SERVERS,
+};
+
+enum {
+	PROG1 = 0,
+	PROG2,
+};
+
+struct inet_addr {
+	const char *ip;
+	unsigned short port;
+};
+
+struct test {
+	const char *desc;
+	struct bpf_program *lookup_prog;
+	struct bpf_program *reuseport_prog;
+	struct bpf_map *sock_map;
+	int sotype;
+	struct inet_addr connect_to;
+	struct inet_addr listen_at;
+	enum server accept_on;
+};
+
+static __u32 duration;		/* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+	return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(reuseport_prog);
+	if (prog_fd < 0) {
+		errno = -prog_fd;
+		return -1;
+	}
+
+	err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+			 &prog_fd, sizeof(prog_fd));
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+	return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+		addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+		       struct sockaddr_storage *addr)
+{
+	struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+	int err, family, fd;
+
+	family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+	err = make_sockaddr(family, ip, port, addr, NULL);
+	if (CHECK(err, "make_address", "failed\n"))
+		return -1;
+
+	fd = socket(addr->ss_family, sotype, 0);
+	if (CHECK(fd < 0, "socket", "failed\n")) {
+		log_err("failed to make socket");
+		return -1;
+	}
+
+	err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+	if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+		log_err("failed to set SNDTIMEO");
+		close(fd);
+		return -1;
+	}
+
+	err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+	if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+		log_err("failed to set RCVTIMEO");
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+		       struct bpf_program *reuseport_prog)
+{
+	struct sockaddr_storage addr = {0};
+	const int one = 1;
+	int err, fd = -1;
+
+	fd = make_socket(sotype, ip, port, &addr);
+	if (fd < 0)
+		return -1;
+
+	/* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+	if (sotype == SOCK_DGRAM) {
+		err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+				 sizeof(one));
+		if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+			log_err("failed to enable IP_RECVORIGDSTADDR");
+			goto fail;
+		}
+	}
+
+	if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+		err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+				 sizeof(one));
+		if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+			log_err("failed to enable IPV6_RECVORIGDSTADDR");
+			goto fail;
+		}
+	}
+
+	if (sotype == SOCK_STREAM) {
+		err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+				 sizeof(one));
+		if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+			log_err("failed to enable SO_REUSEADDR");
+			goto fail;
+		}
+	}
+
+	if (reuseport_prog) {
+		err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+				 sizeof(one));
+		if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+			log_err("failed to enable SO_REUSEPORT");
+			goto fail;
+		}
+	}
+
+	err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+	if (CHECK(err, "bind", "failed\n")) {
+		log_err("failed to bind listen socket");
+		goto fail;
+	}
+
+	if (sotype == SOCK_STREAM) {
+		err = listen(fd, SOMAXCONN);
+		if (CHECK(err, "make_server", "listen")) {
+			log_err("failed to listen on port %d", port);
+			goto fail;
+		}
+	}
+
+	/* Late attach reuseport prog so we can have one init path */
+	if (reuseport_prog) {
+		err = attach_reuseport(fd, reuseport_prog);
+		if (CHECK(err, "attach_reuseport", "failed\n")) {
+			log_err("failed to attach reuseport prog");
+			goto fail;
+		}
+	}
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+	struct sockaddr_storage addr = {0};
+	int err, fd;
+
+	fd = make_socket(sotype, ip, port, &addr);
+	if (fd < 0)
+		return -1;
+
+	err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+	if (CHECK(err, "make_client", "connect")) {
+		log_err("failed to connect client socket");
+		goto fail;
+	}
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+static int send_byte(int fd)
+{
+	ssize_t n;
+
+	errno = 0;
+	n = send(fd, "a", 1, 0);
+	if (CHECK(n <= 0, "send_byte", "send")) {
+		log_err("failed/partial send");
+		return -1;
+	}
+	return 0;
+}
+
+static int recv_byte(int fd)
+{
+	char buf[1];
+	ssize_t n;
+
+	n = recv(fd, buf, sizeof(buf), 0);
+	if (CHECK(n <= 0, "recv_byte", "recv")) {
+		log_err("failed/partial recv");
+		return -1;
+	}
+	return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+	char buf[1];
+	int ret, fd;
+	ssize_t n;
+
+	fd = accept(server_fd, NULL, NULL);
+	if (CHECK(fd < 0, "accept", "failed\n")) {
+		log_err("failed to accept");
+		return -1;
+	}
+
+	n = recv(fd, buf, sizeof(buf), 0);
+	if (CHECK(n <= 0, "recv", "failed\n")) {
+		log_err("failed/partial recv");
+		ret = -1;
+		goto close;
+	}
+
+	n = send(fd, buf, n, 0);
+	if (CHECK(n <= 0, "send", "failed\n")) {
+		log_err("failed/partial send");
+		ret = -1;
+		goto close;
+	}
+
+	ret = 0;
+close:
+	close(fd);
+	return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+	struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+	struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+	v6->sin6_family = AF_INET6;
+	v6->sin6_port = v4.sin_port;
+	v6->sin6_addr.s6_addr[10] = 0xff;
+	v6->sin6_addr.s6_addr[11] = 0xff;
+	memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+}
+
+static int udp_recv_send(int server_fd)
+{
+	char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+	struct sockaddr_storage _src_addr = { 0 };
+	struct sockaddr_storage *src_addr = &_src_addr;
+	struct sockaddr_storage *dst_addr = NULL;
+	struct msghdr msg = { 0 };
+	struct iovec iov = { 0 };
+	struct cmsghdr *cm;
+	char buf[1];
+	int ret, fd;
+	ssize_t n;
+
+	iov.iov_base = buf;
+	iov.iov_len = sizeof(buf);
+
+	msg.msg_name = src_addr;
+	msg.msg_namelen = sizeof(*src_addr);
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	errno = 0;
+	n = recvmsg(server_fd, &msg, 0);
+	if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+		log_err("failed to receive");
+		return -1;
+	}
+	if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+		return -1;
+
+	for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+		if ((cm->cmsg_level == SOL_IP &&
+		     cm->cmsg_type == IP_ORIGDSTADDR) ||
+		    (cm->cmsg_level == SOL_IPV6 &&
+		     cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+			dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+			break;
+		}
+		log_err("warning: ignored cmsg at level %d type %d",
+			cm->cmsg_level, cm->cmsg_type);
+	}
+	if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+		return -1;
+
+	/* Server socket bound to IPv4-mapped IPv6 address */
+	if (src_addr->ss_family == AF_INET6 &&
+	    dst_addr->ss_family == AF_INET) {
+		v4_to_v6(dst_addr);
+	}
+
+	/* Reply from original destination address. */
+	fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+	if (CHECK(fd < 0, "socket", "failed\n")) {
+		log_err("failed to create tx socket");
+		return -1;
+	}
+
+	ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+	if (CHECK(ret, "bind", "failed\n")) {
+		log_err("failed to bind tx socket");
+		goto out;
+	}
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	n = sendmsg(fd, &msg, 0);
+	if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+		log_err("failed to send echo reply");
+		ret = -1;
+		goto out;
+	}
+
+	ret = 0;
+out:
+	close(fd);
+	return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+	int err;
+
+	err = send_byte(client_fd);
+	if (err)
+		return -1;
+	err = tcp_recv_send(server_fd);
+	if (err)
+		return -1;
+	err = recv_byte(client_fd);
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+	int err;
+
+	err = send_byte(client_fd);
+	if (err)
+		return -1;
+	err = udp_recv_send(server_fd);
+	if (err)
+		return -1;
+	err = recv_byte(client_fd);
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+	int net_fd;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK(net_fd < 0, "open", "failed\n")) {
+		log_err("failed to open /proc/self/ns/net");
+		return NULL;
+	}
+
+	link = bpf_program__attach_netns(prog, net_fd);
+	if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+		errno = -PTR_ERR(link);
+		log_err("failed to attach program '%s' to netns",
+			bpf_program__name(prog));
+		link = NULL;
+	}
+
+	close(net_fd);
+	return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+	int err, map_fd;
+	uint64_t value;
+
+	map_fd = bpf_map__fd(map);
+	if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+		errno = -map_fd;
+		log_err("failed to get map FD");
+		return -1;
+	}
+
+	value = (uint64_t)sock_fd;
+	err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+		log_err("failed to update redir_map @ %d", index);
+		return -1;
+	}
+
+	return 0;
+}
+
+static __u32 link_info_prog_id(struct bpf_link *link)
+{
+	struct bpf_link_info info = {};
+	__u32 info_len = sizeof(info);
+	int link_fd, err;
+
+	link_fd = bpf_link__fd(link);
+	if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
+		errno = -link_fd;
+		log_err("bpf_link__fd failed");
+		return 0;
+	}
+
+	err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+	if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
+		log_err("bpf_obj_get_info_by_fd");
+		return 0;
+	}
+	if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
+		  "unexpected info len %u\n", info_len))
+		return 0;
+
+	return info.prog_id;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+	struct bpf_link *link[3] = {};
+	__u32 attach_flags = 0;
+	__u32 prog_ids[3] = {};
+	__u32 prog_cnt = 3;
+	__u32 prog_id;
+	int net_fd;
+	int err;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK(net_fd < 0, "open", "failed\n")) {
+		log_err("failed to open /proc/self/ns/net");
+		return;
+	}
+
+	link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+	if (!link[0])
+		goto close;
+	link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+	if (!link[1])
+		goto detach;
+	link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+	if (!link[2])
+		goto detach;
+
+	err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+			     &attach_flags, prog_ids, &prog_cnt);
+	if (CHECK(err, "bpf_prog_query", "failed\n")) {
+		log_err("failed to query lookup prog");
+		goto detach;
+	}
+
+	errno = 0;
+	if (CHECK(attach_flags != 0, "bpf_prog_query",
+		  "wrong attach_flags on query: %u", attach_flags))
+		goto detach;
+	if (CHECK(prog_cnt != 3, "bpf_prog_query",
+		  "wrong program count on query: %u", prog_cnt))
+		goto detach;
+	prog_id = link_info_prog_id(link[0]);
+	CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+	      "invalid program #0 id on query: %u != %u\n",
+	      prog_ids[0], prog_id);
+	prog_id = link_info_prog_id(link[1]);
+	CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+	      "invalid program #1 id on query: %u != %u\n",
+	      prog_ids[1], prog_id);
+	prog_id = link_info_prog_id(link[2]);
+	CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+	      "invalid program #2 id on query: %u != %u\n",
+	      prog_ids[2], prog_id);
+
+detach:
+	if (link[2])
+		bpf_link__destroy(link[2]);
+	if (link[1])
+		bpf_link__destroy(link[1]);
+	if (link[0])
+		bpf_link__destroy(link[0]);
+close:
+	close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+	int client_fd, server_fds[MAX_SERVERS] = { -1 };
+	struct bpf_link *lookup_link;
+	int i, err;
+
+	lookup_link = attach_lookup_prog(t->lookup_prog);
+	if (!lookup_link)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+		server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+					    t->listen_at.port,
+					    t->reuseport_prog);
+		if (server_fds[i] < 0)
+			goto close;
+
+		err = update_lookup_map(t->sock_map, i, server_fds[i]);
+		if (err)
+			goto close;
+
+		/* want just one server for non-reuseport test */
+		if (!t->reuseport_prog)
+			break;
+	}
+
+	client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+	if (client_fd < 0)
+		goto close;
+
+	if (t->sotype == SOCK_STREAM)
+		tcp_echo_test(client_fd, server_fds[t->accept_on]);
+	else
+		udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+	close(client_fd);
+close:
+	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+		if (server_fds[i] != -1)
+			close(server_fds[i]);
+	}
+	bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+	const struct test tests[] = {
+		{
+			.desc		= "TCP IPv4 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { EXT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "TCP IPv4 redir addr",
+			.lookup_prog	= skel->progs.redir_ip4,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "TCP IPv4 redir with reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+			.accept_on	= SERVER_B,
+		},
+		{
+			.desc		= "TCP IPv4 redir skip reuseport",
+			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+			.accept_on	= SERVER_A,
+		},
+		{
+			.desc		= "TCP IPv6 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { EXT_IP6, INT_PORT },
+		},
+		{
+			.desc		= "TCP IPv6 redir addr",
+			.lookup_prog	= skel->progs.redir_ip6,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, EXT_PORT },
+		},
+		{
+			.desc		= "TCP IPv4->IPv6 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4_V6, INT_PORT },
+		},
+		{
+			.desc		= "TCP IPv6 redir with reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+			.accept_on	= SERVER_B,
+		},
+		{
+			.desc		= "TCP IPv6 redir skip reuseport",
+			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+			.accept_on	= SERVER_A,
+		},
+		{
+			.desc		= "UDP IPv4 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { EXT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "UDP IPv4 redir addr",
+			.lookup_prog	= skel->progs.redir_ip4,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "UDP IPv4 redir with reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+			.accept_on	= SERVER_B,
+		},
+		{
+			.desc		= "UDP IPv4 redir skip reuseport",
+			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+			.accept_on	= SERVER_A,
+		},
+		{
+			.desc		= "UDP IPv6 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { EXT_IP6, INT_PORT },
+		},
+		{
+			.desc		= "UDP IPv6 redir addr",
+			.lookup_prog	= skel->progs.redir_ip6,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, EXT_PORT },
+		},
+		{
+			.desc		= "UDP IPv4->IPv6 redir port",
+			.lookup_prog	= skel->progs.redir_port,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.listen_at	= { INT_IP4_V6, INT_PORT },
+			.connect_to	= { EXT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "UDP IPv6 redir and reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+			.accept_on	= SERVER_B,
+		},
+		{
+			.desc		= "UDP IPv6 redir skip reuseport",
+			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+			.accept_on	= SERVER_A,
+		},
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		if (test__start_subtest(t->desc))
+			run_lookup_prog(t);
+	}
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+	struct sockaddr_storage dst = {};
+	int client_fd, server_fd, err;
+	struct bpf_link *lookup_link;
+	ssize_t n;
+
+	lookup_link = attach_lookup_prog(t->lookup_prog);
+	if (!lookup_link)
+		return;
+
+	server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+				t->reuseport_prog);
+	if (server_fd < 0)
+		goto detach;
+
+	client_fd = make_socket(t->sotype, t->connect_to.ip,
+				t->connect_to.port, &dst);
+	if (client_fd < 0)
+		goto close_srv;
+
+	err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+	if (t->sotype == SOCK_DGRAM) {
+		err = send_byte(client_fd);
+		if (err)
+			goto close_all;
+
+		/* Read out asynchronous error */
+		n = recv(client_fd, NULL, 0, 0);
+		err = n == -1;
+	}
+	if (CHECK(!err || errno != ECONNREFUSED, "connect",
+		  "unexpected success or error\n"))
+		log_err("expected ECONNREFUSED on connect");
+
+close_all:
+	close(client_fd);
+close_srv:
+	close(server_fd);
+detach:
+	bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+	const struct test tests[] = {
+		{
+			.desc		= "TCP IPv4 drop on lookup",
+			.lookup_prog	= skel->progs.lookup_drop,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { EXT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "TCP IPv6 drop on lookup",
+			.lookup_prog	= skel->progs.lookup_drop,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { EXT_IP6, EXT_PORT },
+		},
+		{
+			.desc		= "UDP IPv4 drop on lookup",
+			.lookup_prog	= skel->progs.lookup_drop,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { EXT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "UDP IPv6 drop on lookup",
+			.lookup_prog	= skel->progs.lookup_drop,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { EXT_IP6, INT_PORT },
+		},
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		if (test__start_subtest(t->desc))
+			drop_on_lookup(t);
+	}
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+	struct sockaddr_storage dst = { 0 };
+	int client, server1, server2, err;
+	struct bpf_link *lookup_link;
+	ssize_t n;
+
+	lookup_link = attach_lookup_prog(t->lookup_prog);
+	if (!lookup_link)
+		return;
+
+	server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+			      t->reuseport_prog);
+	if (server1 < 0)
+		goto detach;
+
+	err = update_lookup_map(t->sock_map, SERVER_A, server1);
+	if (err)
+		goto detach;
+
+	/* second server on destination address we should never reach */
+	server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+			      NULL /* reuseport prog */);
+	if (server2 < 0)
+		goto close_srv1;
+
+	client = make_socket(t->sotype, t->connect_to.ip,
+			     t->connect_to.port, &dst);
+	if (client < 0)
+		goto close_srv2;
+
+	err = connect(client, (void *)&dst, inetaddr_len(&dst));
+	if (t->sotype == SOCK_DGRAM) {
+		err = send_byte(client);
+		if (err)
+			goto close_all;
+
+		/* Read out asynchronous error */
+		n = recv(client, NULL, 0, 0);
+		err = n == -1;
+	}
+	if (CHECK(!err || errno != ECONNREFUSED, "connect",
+		  "unexpected success or error\n"))
+		log_err("expected ECONNREFUSED on connect");
+
+close_all:
+	close(client);
+close_srv2:
+	close(server2);
+close_srv1:
+	close(server1);
+detach:
+	bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+	const struct test tests[] = {
+		{
+			.desc		= "TCP IPv4 drop on reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.reuseport_drop,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "TCP IPv6 drop on reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.reuseport_drop,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+		},
+		{
+			.desc		= "UDP IPv4 drop on reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.reuseport_drop,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "TCP IPv6 drop on reuseport",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.reuseport_drop,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_STREAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+		},
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		if (test__start_subtest(t->desc))
+			drop_on_reuseport(t);
+	}
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+			  struct bpf_program *lookup_prog,
+			  const char *listen_ip, const char *connect_ip)
+{
+	int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
+	struct bpf_link *lookup_link;
+	int i, err;
+
+	lookup_link = attach_lookup_prog(lookup_prog);
+	if (!lookup_link)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+		server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+		if (server_fds[i] < 0)
+			goto close_servers;
+
+		err = update_lookup_map(skel->maps.redir_map, i,
+					server_fds[i]);
+		if (err)
+			goto close_servers;
+	}
+
+	client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
+	if (client_fd < 0)
+		goto close_servers;
+
+	peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
+	if (CHECK(peer_fd < 0, "accept", "failed\n"))
+		goto close_client;
+
+	close(peer_fd);
+close_client:
+	close(client_fd);
+close_servers:
+	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+		if (server_fds[i] != -1)
+			close(server_fds[i]);
+	}
+	bpf_link__destroy(lookup_link);
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+			     struct bpf_program *lookup_prog)
+{
+	run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+			     struct bpf_program *lookup_prog)
+{
+	run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+				    int sotype)
+{
+	int err, client_fd, connected_fd, server_fd;
+	struct bpf_link *lookup_link;
+
+	server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+	if (server_fd < 0)
+		return;
+
+	connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+	if (connected_fd < 0)
+		goto out_close_server;
+
+	/* Put a connected socket in redirect map */
+	err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+	if (err)
+		goto out_close_connected;
+
+	lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+	if (!lookup_link)
+		goto out_close_connected;
+
+	/* Try to redirect TCP SYN / UDP packet to a connected socket */
+	client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+	if (client_fd < 0)
+		goto out_unlink_prog;
+	if (sotype == SOCK_DGRAM) {
+		send_byte(client_fd);
+		recv_byte(server_fd);
+	}
+
+	close(client_fd);
+out_unlink_prog:
+	bpf_link__destroy(lookup_link);
+out_close_connected:
+	close(connected_fd);
+out_close_server:
+	close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+	if (test__start_subtest("sk_assign returns EEXIST"))
+		run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+	if (test__start_subtest("sk_assign honors F_REPLACE"))
+		run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+	if (test__start_subtest("sk_assign accepts NULL socket"))
+		run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+	if (test__start_subtest("access ctx->sk"))
+		run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+	if (test__start_subtest("narrow access to ctx v4"))
+		run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+	if (test__start_subtest("narrow access to ctx v6"))
+		run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+	if (test__start_subtest("sk_assign rejects TCP established"))
+		run_sk_assign_connected(skel, SOCK_STREAM);
+	if (test__start_subtest("sk_assign rejects UDP connected"))
+		run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+	const char *desc;
+	struct bpf_program *prog1;
+	struct bpf_program *prog2;
+	struct bpf_map *redir_map;
+	struct bpf_map *run_map;
+	int expect_errno;
+	struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+	struct sockaddr_storage dst = {};
+	int map_fd, server_fd, client_fd;
+	struct bpf_link *link1, *link2;
+	int prog_idx, done, err;
+
+	map_fd = bpf_map__fd(t->run_map);
+
+	done = 0;
+	prog_idx = PROG1;
+	err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+	if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+		return;
+	prog_idx = PROG2;
+	err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+	if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+		return;
+
+	link1 = attach_lookup_prog(t->prog1);
+	if (!link1)
+		return;
+	link2 = attach_lookup_prog(t->prog2);
+	if (!link2)
+		goto out_unlink1;
+
+	server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+				t->listen_at.port, NULL);
+	if (server_fd < 0)
+		goto out_unlink2;
+
+	err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+	if (err)
+		goto out_close_server;
+
+	client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+	if (client_fd < 0)
+		goto out_close_server;
+
+	err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+	if (CHECK(err && !t->expect_errno, "connect",
+		  "unexpected error %d\n", errno))
+		goto out_close_client;
+	if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+		  "connect", "unexpected error %d\n", errno))
+		goto out_close_client;
+
+	done = 0;
+	prog_idx = PROG1;
+	err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+	CHECK(err, "bpf_map_lookup_elem", "failed\n");
+	CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+	done = 0;
+	prog_idx = PROG2;
+	err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+	CHECK(err, "bpf_map_lookup_elem", "failed\n");
+	CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+	close(client_fd);
+out_close_server:
+	close(server_fd);
+out_unlink2:
+	bpf_link__destroy(link2);
+out_unlink1:
+	bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+	struct test_multi_prog tests[] = {
+		{
+			.desc		= "multi prog - pass, pass",
+			.prog1		= skel->progs.multi_prog_pass1,
+			.prog2		= skel->progs.multi_prog_pass2,
+			.listen_at	= { EXT_IP4, EXT_PORT },
+		},
+		{
+			.desc		= "multi prog - drop, drop",
+			.prog1		= skel->progs.multi_prog_drop1,
+			.prog2		= skel->progs.multi_prog_drop2,
+			.listen_at	= { EXT_IP4, EXT_PORT },
+			.expect_errno	= ECONNREFUSED,
+		},
+		{
+			.desc		= "multi prog - pass, drop",
+			.prog1		= skel->progs.multi_prog_pass1,
+			.prog2		= skel->progs.multi_prog_drop2,
+			.listen_at	= { EXT_IP4, EXT_PORT },
+			.expect_errno	= ECONNREFUSED,
+		},
+		{
+			.desc		= "multi prog - drop, pass",
+			.prog1		= skel->progs.multi_prog_drop1,
+			.prog2		= skel->progs.multi_prog_pass2,
+			.listen_at	= { EXT_IP4, EXT_PORT },
+			.expect_errno	= ECONNREFUSED,
+		},
+		{
+			.desc		= "multi prog - pass, redir",
+			.prog1		= skel->progs.multi_prog_pass1,
+			.prog2		= skel->progs.multi_prog_redir2,
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "multi prog - redir, pass",
+			.prog1		= skel->progs.multi_prog_redir1,
+			.prog2		= skel->progs.multi_prog_pass2,
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "multi prog - drop, redir",
+			.prog1		= skel->progs.multi_prog_drop1,
+			.prog2		= skel->progs.multi_prog_redir2,
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "multi prog - redir, drop",
+			.prog1		= skel->progs.multi_prog_redir1,
+			.prog2		= skel->progs.multi_prog_drop2,
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+		{
+			.desc		= "multi prog - redir, redir",
+			.prog1		= skel->progs.multi_prog_redir1,
+			.prog2		= skel->progs.multi_prog_redir2,
+			.listen_at	= { INT_IP4, INT_PORT },
+		},
+	};
+	struct test_multi_prog *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		t->redir_map = skel->maps.redir_map;
+		t->run_map = skel->maps.run_map;
+		if (test__start_subtest(t->desc))
+			run_multi_prog_lookup(t);
+	}
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+	if (test__start_subtest("query lookup prog"))
+		query_lookup_prog(skel);
+	test_redirect_lookup(skel);
+	test_drop_on_lookup(skel);
+	test_drop_on_reuseport(skel);
+	test_sk_assign_helper(skel);
+	test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+	static const char * const setup_script[] = {
+		"ip -6 addr add dev lo " EXT_IP6 "/128 nodad",
+		"ip -6 addr add dev lo " INT_IP6 "/128 nodad",
+		"ip link set dev lo up",
+		NULL,
+	};
+	const char * const *cmd;
+	int err;
+
+	err = unshare(CLONE_NEWNET);
+	if (CHECK(err, "unshare", "failed\n")) {
+		log_err("unshare(CLONE_NEWNET)");
+		return -1;
+	}
+
+	for (cmd = setup_script; *cmd; cmd++) {
+		err = system(*cmd);
+		if (CHECK(err, "system", "failed\n")) {
+			log_err("system(%s)", *cmd);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+void test_sk_lookup(void)
+{
+	struct test_sk_lookup *skel;
+	int err;
+
+	err = switch_netns();
+	if (err)
+		return;
+
+	skel = test_sk_lookup__open_and_load();
+	if (CHECK(!skel, "skel open_and_load", "failed\n"))
+		return;
+
+	run_tests(skel);
+
+	test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
new file mode 100644
index 000000000000..bbf8296f4d66
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d)					\
+	bpf_htonl((((__u32)(a) & 0xffU) << 24) |	\
+		  (((__u32)(b) & 0xffU) << 16) |	\
+		  (((__u32)(c) & 0xffU) <<  8) |	\
+		  (((__u32)(d) & 0xffU) <<  0))
+#define IP6(aaaa, bbbb, cccc, dddd)			\
+	{ bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+#define MAX_SOCKS 32
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, MAX_SOCKS);
+	__type(key, __u32);
+	__type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} run_map SEC(".maps");
+
+enum {
+	PROG1 = 0,
+	PROG2,
+};
+
+enum {
+	SERVER_A = 0,
+	SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup/lookup_pass")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+	return SK_PASS;
+}
+
+SEC("sk_lookup/lookup_drop")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+	return SK_DROP;
+}
+
+SEC("sk_reuseport/reuse_pass")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+	return SK_PASS;
+}
+
+SEC("sk_reuseport/reuse_drop")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+	return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup/redir_port")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	if (ctx->local_port != DST_PORT)
+		return SK_PASS;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_PASS;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	bpf_sk_release(sk);
+	return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip4")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	if (ctx->family != AF_INET)
+		return SK_PASS;
+	if (ctx->local_port != DST_PORT)
+		return SK_PASS;
+	if (ctx->local_ip4 != DST_IP4)
+		return SK_PASS;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_PASS;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	bpf_sk_release(sk);
+	return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup/redir_ip6")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	if (ctx->family != AF_INET6)
+		return SK_PASS;
+	if (ctx->local_port != DST_PORT)
+		return SK_PASS;
+	if (ctx->local_ip6[0] != DST_IP6[0] ||
+	    ctx->local_ip6[1] != DST_IP6[1] ||
+	    ctx->local_ip6[2] != DST_IP6[2] ||
+	    ctx->local_ip6[3] != DST_IP6[3])
+		return SK_PASS;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_PASS;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	bpf_sk_release(sk);
+	return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_PASS;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	bpf_sk_release(sk);
+	return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup/select_sock_a_no_reuseport")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_DROP;
+
+	err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+	bpf_sk_release(sk);
+	return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport/select_sock_b")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+	__u32 key = KEY_SERVER_B;
+	int err;
+
+	err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+	return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup/sk_assign_eexist")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err, ret;
+
+	ret = SK_DROP;
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+	if (!sk)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, 0);
+	if (err)
+		goto out;
+	bpf_sk_release(sk);
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, 0);
+	if (err != -EEXIST) {
+		bpf_printk("sk_assign returned %d, expected %d\n",
+			   err, -EEXIST);
+		goto out;
+	}
+
+	ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+	if (sk)
+		bpf_sk_release(sk);
+	return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup/sk_assign_replace_flag")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err, ret;
+
+	ret = SK_DROP;
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, 0);
+	if (err)
+		goto out;
+	bpf_sk_release(sk);
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+	if (!sk)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+	if (err) {
+		bpf_printk("sk_assign returned %d, expected 0\n", err);
+		goto out;
+	}
+
+	ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+	if (sk)
+		bpf_sk_release(sk);
+	return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup/sk_assign_null")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk = NULL;
+	int err, ret;
+
+	ret = SK_DROP;
+
+	err = bpf_sk_assign(ctx, NULL, 0);
+	if (err) {
+		bpf_printk("sk_assign returned %d, expected 0\n", err);
+		goto out;
+	}
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+	if (!sk)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+	if (err) {
+		bpf_printk("sk_assign returned %d, expected 0\n", err);
+		goto out;
+	}
+
+	if (ctx->sk != sk)
+		goto out;
+	err = bpf_sk_assign(ctx, NULL, 0);
+	if (err != -EEXIST)
+		goto out;
+	err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+	if (err)
+		goto out;
+	err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+	if (err)
+		goto out;
+
+	ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+	if (sk)
+		bpf_sk_release(sk);
+	return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup/access_ctx_sk")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+	int err, ret;
+
+	ret = SK_DROP;
+
+	/* Try accessing unassigned (NULL) ctx->sk field */
+	if (ctx->sk && ctx->sk->family != AF_INET)
+		goto out;
+
+	/* Assign a value to ctx->sk */
+	sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk1)
+		goto out;
+	err = bpf_sk_assign(ctx, sk1, 0);
+	if (err)
+		goto out;
+	if (ctx->sk != sk1)
+		goto out;
+
+	/* Access ctx->sk fields */
+	if (ctx->sk->family != AF_INET ||
+	    ctx->sk->type != SOCK_STREAM ||
+	    ctx->sk->state != BPF_TCP_LISTEN)
+		goto out;
+
+	/* Reset selection */
+	err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+	if (err)
+		goto out;
+	if (ctx->sk)
+		goto out;
+
+	/* Assign another socket */
+	sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+	if (!sk2)
+		goto out;
+	err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+	if (err)
+		goto out;
+	if (ctx->sk != sk2)
+		goto out;
+
+	/* Access reassigned ctx->sk fields */
+	if (ctx->sk->family != AF_INET ||
+	    ctx->sk->type != SOCK_STREAM ||
+	    ctx->sk->state != BPF_TCP_LISTEN)
+		goto out;
+
+	ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+	if (sk1)
+		bpf_sk_release(sk1);
+	if (sk2)
+		bpf_sk_release(sk2);
+	return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup/ctx_narrow_access")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err, family;
+	__u16 *half;
+	__u8 *byte;
+	bool v4;
+
+	v4 = (ctx->family == AF_INET);
+
+	/* Narrow loads from family field */
+	byte = (__u8 *)&ctx->family;
+	half = (__u16 *)&ctx->family;
+	if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
+	    byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+		return SK_DROP;
+	if (half[0] != (v4 ? AF_INET : AF_INET6))
+		return SK_DROP;
+
+	byte = (__u8 *)&ctx->protocol;
+	if (byte[0] != IPPROTO_TCP ||
+	    byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+		return SK_DROP;
+	half = (__u16 *)&ctx->protocol;
+	if (half[0] != IPPROTO_TCP)
+		return SK_DROP;
+
+	/* Narrow loads from remote_port field. Expect non-0 value. */
+	byte = (__u8 *)&ctx->remote_port;
+	if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+		return SK_DROP;
+	half = (__u16 *)&ctx->remote_port;
+	if (half[0] == 0)
+		return SK_DROP;
+
+	/* Narrow loads from local_port field. Expect DST_PORT. */
+	byte = (__u8 *)&ctx->local_port;
+	if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
+	    byte[1] != ((DST_PORT >> 8) & 0xff) ||
+	    byte[2] != 0 || byte[3] != 0)
+		return SK_DROP;
+	half = (__u16 *)&ctx->local_port;
+	if (half[0] != DST_PORT)
+		return SK_DROP;
+
+	/* Narrow loads from IPv4 fields */
+	if (v4) {
+		/* Expect non-0.0.0.0 in remote_ip4 */
+		byte = (__u8 *)&ctx->remote_ip4;
+		if (byte[0] == 0 && byte[1] == 0 &&
+		    byte[2] == 0 && byte[3] == 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->remote_ip4;
+		if (half[0] == 0 && half[1] == 0)
+			return SK_DROP;
+
+		/* Expect DST_IP4 in local_ip4 */
+		byte = (__u8 *)&ctx->local_ip4;
+		if (byte[0] != ((DST_IP4 >>  0) & 0xff) ||
+		    byte[1] != ((DST_IP4 >>  8) & 0xff) ||
+		    byte[2] != ((DST_IP4 >> 16) & 0xff) ||
+		    byte[3] != ((DST_IP4 >> 24) & 0xff))
+			return SK_DROP;
+		half = (__u16 *)&ctx->local_ip4;
+		if (half[0] != ((DST_IP4 >>  0) & 0xffff) ||
+		    half[1] != ((DST_IP4 >> 16) & 0xffff))
+			return SK_DROP;
+	} else {
+		/* Expect 0.0.0.0 IPs when family != AF_INET */
+		byte = (__u8 *)&ctx->remote_ip4;
+		if (byte[0] != 0 || byte[1] != 0 &&
+		    byte[2] != 0 || byte[3] != 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->remote_ip4;
+		if (half[0] != 0 || half[1] != 0)
+			return SK_DROP;
+
+		byte = (__u8 *)&ctx->local_ip4;
+		if (byte[0] != 0 || byte[1] != 0 &&
+		    byte[2] != 0 || byte[3] != 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->local_ip4;
+		if (half[0] != 0 || half[1] != 0)
+			return SK_DROP;
+	}
+
+	/* Narrow loads from IPv6 fields */
+	if (!v4) {
+		/* Expenct non-:: IP in remote_ip6 */
+		byte = (__u8 *)&ctx->remote_ip6;
+		if (byte[0] == 0 && byte[1] == 0 &&
+		    byte[2] == 0 && byte[3] == 0 &&
+		    byte[4] == 0 && byte[5] == 0 &&
+		    byte[6] == 0 && byte[7] == 0 &&
+		    byte[8] == 0 && byte[9] == 0 &&
+		    byte[10] == 0 && byte[11] == 0 &&
+		    byte[12] == 0 && byte[13] == 0 &&
+		    byte[14] == 0 && byte[15] == 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->remote_ip6;
+		if (half[0] == 0 && half[1] == 0 &&
+		    half[2] == 0 && half[3] == 0 &&
+		    half[4] == 0 && half[5] == 0 &&
+		    half[6] == 0 && half[7] == 0)
+			return SK_DROP;
+
+		/* Expect DST_IP6 in local_ip6 */
+		byte = (__u8 *)&ctx->local_ip6;
+		if (byte[0] != ((DST_IP6[0] >>  0) & 0xff) ||
+		    byte[1] != ((DST_IP6[0] >>  8) & 0xff) ||
+		    byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
+		    byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
+		    byte[4] != ((DST_IP6[1] >>  0) & 0xff) ||
+		    byte[5] != ((DST_IP6[1] >>  8) & 0xff) ||
+		    byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
+		    byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
+		    byte[8] != ((DST_IP6[2] >>  0) & 0xff) ||
+		    byte[9] != ((DST_IP6[2] >>  8) & 0xff) ||
+		    byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
+		    byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
+		    byte[12] != ((DST_IP6[3] >>  0) & 0xff) ||
+		    byte[13] != ((DST_IP6[3] >>  8) & 0xff) ||
+		    byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
+		    byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+			return SK_DROP;
+		half = (__u16 *)&ctx->local_ip6;
+		if (half[0] != ((DST_IP6[0] >>  0) & 0xffff) ||
+		    half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
+		    half[2] != ((DST_IP6[1] >>  0) & 0xffff) ||
+		    half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
+		    half[4] != ((DST_IP6[2] >>  0) & 0xffff) ||
+		    half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
+		    half[6] != ((DST_IP6[3] >>  0) & 0xffff) ||
+		    half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+			return SK_DROP;
+	} else {
+		/* Expect :: IPs when family != AF_INET6 */
+		byte = (__u8 *)&ctx->remote_ip6;
+		if (byte[0] != 0 || byte[1] != 0 ||
+		    byte[2] != 0 || byte[3] != 0 ||
+		    byte[4] != 0 || byte[5] != 0 ||
+		    byte[6] != 0 || byte[7] != 0 ||
+		    byte[8] != 0 || byte[9] != 0 ||
+		    byte[10] != 0 || byte[11] != 0 ||
+		    byte[12] != 0 || byte[13] != 0 ||
+		    byte[14] != 0 || byte[15] != 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->remote_ip6;
+		if (half[0] != 0 || half[1] != 0 ||
+		    half[2] != 0 || half[3] != 0 ||
+		    half[4] != 0 || half[5] != 0 ||
+		    half[6] != 0 || half[7] != 0)
+			return SK_DROP;
+
+		byte = (__u8 *)&ctx->local_ip6;
+		if (byte[0] != 0 || byte[1] != 0 ||
+		    byte[2] != 0 || byte[3] != 0 ||
+		    byte[4] != 0 || byte[5] != 0 ||
+		    byte[6] != 0 || byte[7] != 0 ||
+		    byte[8] != 0 || byte[9] != 0 ||
+		    byte[10] != 0 || byte[11] != 0 ||
+		    byte[12] != 0 || byte[13] != 0 ||
+		    byte[14] != 0 || byte[15] != 0)
+			return SK_DROP;
+		half = (__u16 *)&ctx->local_ip6;
+		if (half[0] != 0 || half[1] != 0 ||
+		    half[2] != 0 || half[3] != 0 ||
+		    half[4] != 0 || half[5] != 0 ||
+		    half[6] != 0 || half[7] != 0)
+			return SK_DROP;
+	}
+
+	/* Success, redirect to KEY_SERVER_B */
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+	if (sk) {
+		bpf_sk_assign(ctx, sk, 0);
+		bpf_sk_release(sk);
+	}
+	return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup/sk_assign_esocknosupport")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err, ret;
+
+	ret = SK_DROP;
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		goto out;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	if (err != -ESOCKTNOSUPPORT) {
+		bpf_printk("sk_assign returned %d, expected %d\n",
+			   err, -ESOCKTNOSUPPORT);
+		goto out;
+	}
+
+	ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+	if (sk)
+		bpf_sk_release(sk);
+	return ret;
+}
+
+SEC("sk_lookup/multi_prog_pass1")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+	bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+	return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_pass2")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+	bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+	return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_drop1")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+	bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+	return SK_DROP;
+}
+
+SEC("sk_lookup/multi_prog_drop2")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+	bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+	return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+	struct bpf_sock *sk;
+	int err;
+
+	sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+	if (!sk)
+		return SK_DROP;
+
+	err = bpf_sk_assign(ctx, sk, 0);
+	bpf_sk_release(sk);
+	if (err)
+		return SK_DROP;
+
+	return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir1")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+	int ret;
+
+	ret = select_server_a(ctx);
+	bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+	return SK_PASS;
+}
+
+SEC("sk_lookup/multi_prog_redir2")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+	int ret;
+
+	ret = select_server_a(ctx);
+	bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+__u32 _version SEC("version") = 1;
-- 
cgit 


From 55db9c0e853421fa71cac5e6855898601f78a1f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Jul 2020 08:23:15 +0200
Subject: net: remove compat_sys_{get,set}sockopt

Now that the ->compat_{get,set}sockopt proto_ops methods are gone
there is no good reason left to keep the compat syscalls separate.

This fixes the odd use of unsigned int for the compat_setsockopt
optlen and the missing sock_use_custom_sol_socket.

It would also easily allow running the eBPF hooks for the compat
syscalls, but such a large change in behavior does not belong into
a consolidation patch like this one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/asm-generic/unistd.h            | 4 ++--
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++--
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    | 4 ++--
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f4a01305d9a6..c8c189a5f0a6 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -606,9 +606,9 @@ __SYSCALL(__NR_sendto, sys_sendto)
 #define __NR_recvfrom 207
 __SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
 #define __NR_setsockopt 208
-__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
+__SC_COMP(__NR_setsockopt, sys_setsockopt, sys_setsockopt)
 #define __NR_getsockopt 209
-__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
+__SC_COMP(__NR_getsockopt, sys_getsockopt, sys_getsockopt)
 #define __NR_shutdown 210
 __SYSCALL(__NR_shutdown, sys_shutdown)
 #define __NR_sendmsg 211
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 35b61bfc1b1a..b190f2eb2611 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -427,8 +427,8 @@
 336	common	recv				sys_recv			compat_sys_recv
 337	common	recvfrom			sys_recvfrom			compat_sys_recvfrom
 338	common	shutdown			sys_shutdown
-339	common	setsockopt			sys_setsockopt			compat_sys_setsockopt
-340	common	getsockopt			sys_getsockopt			compat_sys_getsockopt
+339	common	setsockopt			sys_setsockopt			sys_setsockopt
+340	common	getsockopt			sys_getsockopt			sys_getsockopt
 341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
 342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
 343	32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b38d48464368..56ae24b6e4be 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
 362  common	connect			sys_connect			compat_sys_connect
 363  common	listen			sys_listen			sys_listen
 364  common	accept4			sys_accept4			compat_sys_accept4
-365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
-366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+365  common	getsockopt		sys_getsockopt			sys_getsockopt
+366  common	setsockopt		sys_setsockopt			sys_setsockopt
 367  common	getsockname		sys_getsockname			compat_sys_getsockname
 368  common	getpeername		sys_getpeername			compat_sys_getpeername
 369  common	sendto			sys_sendto			compat_sys_sendto
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..e008d638e641 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -396,8 +396,8 @@
 538	x32	sendmmsg		compat_sys_sendmmsg
 539	x32	process_vm_readv	compat_sys_process_vm_readv
 540	x32	process_vm_writev	compat_sys_process_vm_writev
-541	x32	setsockopt		compat_sys_setsockopt
-542	x32	getsockopt		compat_sys_getsockopt
+541	x32	setsockopt		sys_setsockopt
+542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat
-- 
cgit 


From 4a09a981002bafdac1decf4116c77bf8f5496326 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Mon, 20 Jul 2020 20:55:58 +0300
Subject: testptp: promote 'perout' variable to int64_t

Since 'perout' holds the nanosecond value of the signal's period, it
should be a 64-bit value. Current assumption is that it cannot be larger
than 1 second.

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/ptp/testptp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index da7a9dda9490..edc1e50768c2 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -35,6 +35,8 @@
 #define CLOCK_INVALID -1
 #endif
 
+#define NSEC_PER_SEC 1000000000LL
+
 /* clock_adjtime is not available in GLIBC < 2.14 */
 #if !__GLIBC_PREREQ(2, 14)
 #include <sys/syscall.h>
@@ -169,7 +171,6 @@ int main(int argc, char *argv[])
 	int list_pins = 0;
 	int pct_offset = 0;
 	int n_samples = 0;
-	int perout = -1;
 	int pin_index = -1, pin_func;
 	int pps = -1;
 	int seconds = 0;
@@ -177,6 +178,7 @@ int main(int argc, char *argv[])
 
 	int64_t t1, t2, tp;
 	int64_t interval, offset;
+	int64_t perout = -1;
 
 	progname = strrchr(argv[0], '/');
 	progname = progname ? 1+progname : argv[0];
@@ -215,7 +217,7 @@ int main(int argc, char *argv[])
 			}
 			break;
 		case 'p':
-			perout = atoi(optarg);
+			perout = atoll(optarg);
 			break;
 		case 'P':
 			pps = atoi(optarg);
@@ -400,8 +402,8 @@ int main(int argc, char *argv[])
 		perout_request.index = index;
 		perout_request.start.sec = ts.tv_sec + 2;
 		perout_request.start.nsec = 0;
-		perout_request.period.sec = 0;
-		perout_request.period.nsec = perout;
+		perout_request.period.sec = perout / NSEC_PER_SEC;
+		perout_request.period.nsec = perout % NSEC_PER_SEC;
 		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
 			perror("PTP_PEROUT_REQUEST");
 		} else {
-- 
cgit 


From 7570ebe0410a1df1c9a44071c18685842d84a7e1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Mon, 20 Jul 2020 20:55:59 +0300
Subject: testptp: add new options for perout phase and pulse width

Extend the example program for PTP ancillary functionality with the
ability to configure not only the periodic output's period (frequency),
but also the phase and duty cycle (pulse width) which were newly
introduced.

The ioctl level also needs to be updated to the new PTP_PEROUT_REQUEST2,
since the original PTP_PEROUT_REQUEST doesn't support this
functionality. For an in-tree testing program, not having explicit
backwards compatibility is fine, as it should always be tested with the
current kernel headers and sources.

Tested with an oscilloscope on the felix switch PHC:

echo '2 0' > /sys/class/ptp/ptp1/pins/switch_1588_dat0
./testptp -d /dev/ptp1 -p 1000000000 -w 100000000 -H 1000 -i 0

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/ptp/testptp.c | 41 +++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index edc1e50768c2..f7911aaeb007 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -134,6 +134,8 @@ static void usage(char *progname)
 		"            1 - external time stamp\n"
 		"            2 - periodic output\n"
 		" -p val     enable output with a period of 'val' nanoseconds\n"
+		" -H val     set output phase to 'val' nanoseconds (requires -p)\n"
+		" -w val     set output pulse width to 'val' nanoseconds (requires -p)\n"
 		" -P val     enable or disable (val=1|0) the system clock PPS\n"
 		" -s         set the ptp clock time from the system time\n"
 		" -S         set the system time from the ptp clock time\n"
@@ -178,11 +180,13 @@ int main(int argc, char *argv[])
 
 	int64_t t1, t2, tp;
 	int64_t interval, offset;
+	int64_t perout_phase = -1;
+	int64_t pulsewidth = -1;
 	int64_t perout = -1;
 
 	progname = strrchr(argv[0], '/');
 	progname = progname ? 1+progname : argv[0];
-	while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
+	while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:p:P:sSt:T:w:z"))) {
 		switch (c) {
 		case 'c':
 			capabilities = 1;
@@ -199,6 +203,9 @@ int main(int argc, char *argv[])
 		case 'g':
 			gettime = 1;
 			break;
+		case 'H':
+			perout_phase = atoll(optarg);
+			break;
 		case 'i':
 			index = atoi(optarg);
 			break;
@@ -235,6 +242,9 @@ int main(int argc, char *argv[])
 			settime = 3;
 			seconds = atoi(optarg);
 			break;
+		case 'w':
+			pulsewidth = atoi(optarg);
+			break;
 		case 'z':
 			flagtest = 1;
 			break;
@@ -393,6 +403,16 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (pulsewidth >= 0 && perout < 0) {
+		puts("-w can only be specified together with -p");
+		return -1;
+	}
+
+	if (perout_phase >= 0 && perout < 0) {
+		puts("-H can only be specified together with -p");
+		return -1;
+	}
+
 	if (perout >= 0) {
 		if (clock_gettime(clkid, &ts)) {
 			perror("clock_gettime");
@@ -400,11 +420,24 @@ int main(int argc, char *argv[])
 		}
 		memset(&perout_request, 0, sizeof(perout_request));
 		perout_request.index = index;
-		perout_request.start.sec = ts.tv_sec + 2;
-		perout_request.start.nsec = 0;
 		perout_request.period.sec = perout / NSEC_PER_SEC;
 		perout_request.period.nsec = perout % NSEC_PER_SEC;
-		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+		perout_request.flags = 0;
+		if (pulsewidth >= 0) {
+			perout_request.flags |= PTP_PEROUT_DUTY_CYCLE;
+			perout_request.on.sec = pulsewidth / NSEC_PER_SEC;
+			perout_request.on.nsec = pulsewidth % NSEC_PER_SEC;
+		}
+		if (perout_phase >= 0) {
+			perout_request.flags |= PTP_PEROUT_PHASE;
+			perout_request.phase.sec = perout_phase / NSEC_PER_SEC;
+			perout_request.phase.nsec = perout_phase % NSEC_PER_SEC;
+		} else {
+			perout_request.start.sec = ts.tv_sec + 2;
+			perout_request.start.nsec = 0;
+		}
+
+		if (ioctl(fd, PTP_PEROUT_REQUEST2, &perout_request)) {
 			perror("PTP_PEROUT_REQUEST");
 		} else {
 			puts("periodic output request okay");
-- 
cgit 


From 2b9843fbe15276a98cae74d5818dd55600e19052 Mon Sep 17 00:00:00 2001
From: Briana Oursler <briana.oursler@gmail.com>
Date: Fri, 17 Jul 2020 14:54:39 -0700
Subject: tc-testing: Add tdc to kselftests

Add tdc to existing kselftest infrastructure so that it can be run with
existing kselftests. TDC now generates objects in objdir/kselftest
without cluttering main objdir, leaves source directory clean, and
installs correctly in kselftest_install, properly adding itself to
run_kselftest.sh script.

Add tc-testing as a target of selftests/Makefile. Create tdc.sh to run
tdc.py targets with correct arguments. To support single target from
selftest/Makefile, combine tc-testing/bpf/Makefile and
tc-testing/Makefile. Move action.c up a directory to tc-testing/.

Tested with:
 make O=/tmp/{objdir} TARGETS="tc-testing" kselftest
 cd /tmp/{objdir}
 cd kselftest
 cd tc-testing
 ./tdc.sh

 make -C tools/testing/selftests/ TARGETS=tc-testing run_tests

 make TARGETS="tc-testing" kselftest
 cd tools/testing/selftests
 ./kselftest_install.sh /tmp/exampledir
 My VM doesn't run all the kselftests so I commented out all except my
 target and net/pmtu.sh then:
 cd /tmp/exampledir && ./run_kselftest.sh

Co-developed-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Briana Oursler <briana.oursler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/Makefile                 |  1 +
 tools/testing/selftests/tc-testing/Makefile      | 33 ++++++++++++++++++++++++
 tools/testing/selftests/tc-testing/action.c      | 23 +++++++++++++++++
 tools/testing/selftests/tc-testing/bpf/Makefile  | 30 ---------------------
 tools/testing/selftests/tc-testing/bpf/action.c  | 23 -----------------
 tools/testing/selftests/tc-testing/tdc.sh        |  5 ++++
 tools/testing/selftests/tc-testing/tdc_config.py |  2 +-
 7 files changed, 63 insertions(+), 54 deletions(-)
 create mode 100644 tools/testing/selftests/tc-testing/Makefile
 create mode 100644 tools/testing/selftests/tc-testing/action.c
 delete mode 100644 tools/testing/selftests/tc-testing/bpf/Makefile
 delete mode 100644 tools/testing/selftests/tc-testing/bpf/action.c
 create mode 100755 tools/testing/selftests/tc-testing/tdc.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1195bd85af38..f4522e0a2cab 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -54,6 +54,7 @@ TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
 TARGETS += sysctl
+TARGETS += tc-testing
 TARGETS += timens
 ifneq (1, $(quicktest))
 TARGETS += timers
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
new file mode 100644
index 000000000000..91fee5c43274
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = $(abspath ../../../..)
+APIDIR := $(top_scrdir)/include/uapi
+TEST_GEN_FILES = action.o
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+CLANG ?= clang
+LLC   ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+  CPU ?= probe
+else
+  CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+	      $(CLANG_SYS_INCLUDES) \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+
+TEST_PROGS += ./tdc.sh
+TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests
diff --git a/tools/testing/selftests/tc-testing/action.c b/tools/testing/selftests/tc-testing/action.c
new file mode 100644
index 000000000000..c32b99b80e19
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+	return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+	s->data = 0x0;
+	return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
deleted file mode 100644
index be5a5e542804..000000000000
--- a/tools/testing/selftests/tc-testing/bpf/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-APIDIR := ../../../../include/uapi
-TEST_GEN_FILES = action.o
-
-top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
-
-CLANG ?= clang
-LLC   ?= llc
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-ifeq ($(PROBE),)
-  CPU ?= probe
-else
-  CPU ?= generic
-endif
-
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
-
-CLANG_FLAGS = -I. -I$(APIDIR) \
-	      $(CLANG_SYS_INCLUDES) \
-	      -Wno-compare-distinct-pointer-types
-
-$(OUTPUT)/%.o: %.c
-	$(CLANG) $(CLANG_FLAGS) \
-		 -O2 -target bpf -emit-llvm -c $< -o - |      \
-	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
deleted file mode 100644
index c32b99b80e19..000000000000
--- a/tools/testing/selftests/tc-testing/bpf/action.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Davide Caratti, Red Hat inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-
-#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
-
-__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
-{
-	return TC_ACT_OK;
-}
-
-__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
-{
-	s->data = 0x0;
-	return TC_ACT_OK;
-}
-
-char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
new file mode 100755
index 000000000000..7fe38c76db44
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./tdc.py -c actions --nobuildebpf
+./tdc.py -c qdisc
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 080709cc4297..cd4a27ee1466 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -24,7 +24,7 @@ NAMES = {
           # Name of the namespace to use
           'NS': 'tcut',
           # Directory containing eBPF test programs
-          'EBPFDIR': './bpf'
+          'EBPFDIR': './'
         }
 
 
-- 
cgit 


From 2ea485980734500d41e4530f038d94a21e2b3b34 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 17 Jul 2020 18:53:22 +0200
Subject: selftests: bpf: test_kmod.sh: Fix running out of srctree

When running out of srctree, relative path to lib/test_bpf.ko is
different than when running in srctree. Check $building_out_of_srctree
environment variable and use a different relative path if needed.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717165326.6786-2-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/test_kmod.sh | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 9df0d2ac45f8..4f6444bcd53f 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -10,7 +10,13 @@ if [ "$(id -u)" != "0" ]; then
 	exit $ksft_skip
 fi
 
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+	# We are in linux-build/kselftest/bpf
+	OUTPUT=../../
+else
+	# We are in linux/tools/testing/selftests/bpf
+	OUTPUT=../../../../
+fi
 
 test_run()
 {
@@ -19,8 +25,8 @@ test_run()
 
 	echo "[ JIT enabled:$1 hardened:$2 ]"
 	dmesg -C
-	if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
-		insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+	if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+		insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
 		if [ $? -ne 0 ]; then
 			rc=1
 		fi
-- 
cgit 


From da7a35062bcccd3f67779a357065ba707b02ff2b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Sun, 19 Jul 2020 23:17:41 -0700
Subject: libbpf bpf_helpers: Use __builtin_offsetof for offsetof

The non-builtin route for offsetof has a dependency on size_t from
stdlib.h/stdint.h that is undeclared and may break targets.
The offsetof macro in bpf_helpers may disable the same macro in other
headers that have a #ifdef offsetof guard. Rather than add additional
dependencies improve the offsetof macro declared here to use the
builtin that is available since llvm 3.7 (the first with a BPF backend).

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200720061741.1514673-1-irogers@google.com
---
 tools/lib/bpf/bpf_helpers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index a510d8ed716f..bc14db706b88 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
  * Helper macro to manipulate data structures
  */
 #ifndef offsetof
-#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
+#define offsetof(TYPE, MEMBER)  __builtin_offsetof(TYPE, MEMBER)
 #endif
 #ifndef container_of
 #define container_of(ptr, type, member)				\
-- 
cgit 


From 956fcfcd359512f15b19bcd157fa8206ed26605b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 17 Jul 2020 20:30:59 +0800
Subject: tools/bpftool: Fix error handing in do_skeleton()

Fix pass 0 to PTR_ERR, also dump more err info using
libbpf_strerror.

Fixes: 5dc7a8b21144 ("bpftool, selftests/bpf: Embed object file inside skeleton")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200717123059.29624-1-yuehaibing@huawei.com
---
 tools/bpf/bpftool/gen.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index b59d26e89367..8a4c2b3b0cd6 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -302,8 +302,11 @@ static int do_skeleton(int argc, char **argv)
 	opts.object_name = obj_name;
 	obj = bpf_object__open_mem(obj_data, file_sz, &opts);
 	if (IS_ERR(obj)) {
+		char err_buf[256];
+
+		libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf));
+		p_err("failed to open BPF object file: %s", err_buf);
 		obj = NULL;
-		p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
 		goto out;
 	}
 
-- 
cgit 


From 6bd557275ad5a1adaff5082f4e24218ba344e0c0 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 20 Jul 2020 12:18:10 +0200
Subject: selftests/bpf: Fix test_lwt_seg6local.sh hangs

OpenBSD netcat (Debian patchlevel 1.195-2) does not seem to react to
SIGINT for whatever reason, causing prefix.pl to hang after
test_lwt_seg6local.sh exits due to netcat inheriting
test_lwt_seg6local.sh's file descriptors.

Fix by using SIGTERM instead.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200720101810.84299-1-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/test_lwt_seg6local.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 785eabf2a593..5620919fde9e 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -140,7 +140,7 @@ ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
 ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
 ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
 sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
 
 if [[ $(< $TMP_FILE) != "foobar" ]]; then
 	exit 1
-- 
cgit 


From e4d9c2320716ea0e9ef59f503ddd8f253a642ddd Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Mon, 20 Jul 2020 13:48:06 +0200
Subject: samples/bpf, selftests/bpf: Use bpf_probe_read_kernel

A handful of samples and selftests fail to build on s390, because
after commit 0ebeea8ca8a4 ("bpf: Restrict bpf_probe_read{, str}()
only to archs where they work") bpf_probe_read is not available
anymore.

Fix by using bpf_probe_read_kernel.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200720114806.88823-1-iii@linux.ibm.com
---
 tools/bpf/bpftool/skeleton/pid_iter.bpf.c            | 3 ++-
 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c | 6 +++---
 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c    | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c    | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c    | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c    | 2 +-
 6 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
index 8468a608911e..d9b420972934 100644
--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -71,7 +71,8 @@ int iter(struct bpf_iter__task_file *ctx)
 
 	e.pid = task->tgid;
 	e.id = get_obj_id(file->private_data, obj_type);
-	bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+	bpf_probe_read_kernel(&e.comm, sizeof(e.comm),
+			      task->group_leader->comm);
 	bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 7de98a68599a..95989f4c99b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -36,10 +36,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
 	if (!nlk->groups)  {
 		group = 0;
 	} else {
-		/* FIXME: temporary use bpf_probe_read here, needs
+		/* FIXME: temporary use bpf_probe_read_kernel here, needs
 		 * verifier support to do direct access.
 		 */
-		bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+		bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
 	}
 	BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
 		       nlk->portid, (u32)group,
@@ -56,7 +56,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
 		 * with current verifier.
 		 */
 		inode = SOCK_INODE(sk);
-		bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+		bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	}
 	BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 30fd587cb325..54380c5e1069 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
 		return 0;
 
 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	return ino;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 10dec4392031..b4fbddfa4e10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -57,7 +57,7 @@ static long sock_i_ino(const struct sock *sk)
 		return 0;
 
 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	return ino;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index 7053784575e4..f258583afbbd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -18,7 +18,7 @@ static long sock_i_ino(const struct sock *sk)
 		return 0;
 
 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	return ino;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index c1175a6ecf43..65f93bb03f0f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -25,7 +25,7 @@ static long sock_i_ino(const struct sock *sk)
 		return 0;
 
 	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
-	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
 	return ino;
 }
 
-- 
cgit 


From d8dfe5bfe856c0c72b1750322dbfcad402e73373 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 20 Jul 2020 09:33:59 -0700
Subject: tools/bpf: Sync btf_ids.h to tools

Sync kernel header btf_ids.h to tools directory.
Also define macro CONFIG_DEBUG_INFO_BTF before
including btf_ids.h in prog_tests/resolve_btfids.c
since non-stub definitions for BTF_ID_LIST etc. macros
are defined under CONFIG_DEBUG_INFO_BTF. This
prevented test_progs from failing.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200720163359.1393079-1-yhs@fb.com
---
 tools/include/linux/btf_ids.h                           | 11 ++++++++++-
 tools/testing/selftests/bpf/prog_tests/resolve_btfids.c |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index fe019774f8a7..1cdb56950ffe 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
 #ifndef _LINUX_BTF_IDS_H
 #define _LINUX_BTF_IDS_H
 
+#ifdef CONFIG_DEBUG_INFO_BTF
+
 #include <linux/compiler.h> /* for __PASTE */
 
 /*
@@ -21,7 +23,7 @@
 asm(							\
 ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
 ".local " #symbol " ;                          \n"	\
-".type  " #symbol ", @object;                  \n"	\
+".type  " #symbol ", STT_OBJECT;               \n"	\
 ".size  " #symbol ", 4;                        \n"	\
 #symbol ":                                     \n"	\
 ".zero 4                                       \n"	\
@@ -83,5 +85,12 @@ asm(							\
 ".zero 4                                       \n"	\
 ".popsection;                                  \n");
 
+#else
+
+#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID(prefix, name)
+#define BTF_ID_UNUSED
+
+#endif /* CONFIG_DEBUG_INFO_BTF */
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 403be6f36cba..22d83bba4e91 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -6,6 +6,7 @@
 #include <bpf/libbpf.h>
 #include <linux/btf.h>
 #include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
 #include <linux/btf_ids.h>
 #include "test_progs.h"
 
-- 
cgit 


From 0f12e584b241285cf60a6227f3771fa444cfcf76 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 20 Jul 2020 09:34:01 -0700
Subject: bpf: Add BTF_ID_LIST_GLOBAL in btf_ids.h

Existing BTF_ID_LIST used a local static variable
to store btf_ids. This patch provided a new macro
BTF_ID_LIST_GLOBAL to store btf_ids in a global
variable which can be shared among multiple files.

The existing BTF_ID_LIST is still retained.
Two reasons. First, BTF_ID_LIST is also used to build
btf_ids for helper arguments which typically
is an array of 5. Since typically different
helpers have different signature, it makes
little sense to share them. Second, some
current computed btf_ids are indeed local.
If later those btf_ids are shared between
different files, they can use BTF_ID_LIST_GLOBAL then.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20200720163401.1393159-1-yhs@fb.com
---
 tools/include/linux/btf_ids.h                      | 10 +++++--
 .../selftests/bpf/prog_tests/resolve_btfids.c      | 33 ++++++++++++++++------
 2 files changed, 32 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 1cdb56950ffe..77ab45baa095 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -57,17 +57,20 @@ asm(							\
  * .zero 4
  *
  */
-#define __BTF_ID_LIST(name)				\
+#define __BTF_ID_LIST(name, scope)			\
 asm(							\
 ".pushsection " BTF_IDS_SECTION ",\"a\";       \n"	\
-".local " #name ";                             \n"	\
+"." #scope " " #name ";                        \n"	\
 #name ":;                                      \n"	\
 ".popsection;                                  \n");	\
 
 #define BTF_ID_LIST(name)				\
-__BTF_ID_LIST(name)					\
+__BTF_ID_LIST(name, local)				\
 extern u32 name[];
 
+#define BTF_ID_LIST_GLOBAL(name)			\
+__BTF_ID_LIST(name, globl)
+
 /*
  * The BTF_ID_UNUSED macro defines 4 zero bytes.
  * It's used when we want to define 'unused' entry
@@ -90,6 +93,7 @@ asm(							\
 #define BTF_ID_LIST(name) static u32 name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
+#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
 
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 22d83bba4e91..3b127cab4864 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -28,7 +28,17 @@ struct symbol test_symbols[] = {
 	{ "func",    BTF_KIND_FUNC,    -1 },
 };
 
-BTF_ID_LIST(test_list)
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct,  S)
+BTF_ID(union,   U)
+BTF_ID(func,    func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global)
 BTF_ID_UNUSED
 BTF_ID(typedef, S)
 BTF_ID(typedef, T)
@@ -94,18 +104,25 @@ static int resolve_symbols(void)
 
 int test_resolve_btfids(void)
 {
-	unsigned int i;
+	__u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+	unsigned int i, j;
 	int ret = 0;
 
 	if (resolve_symbols())
 		return -1;
 
-	/* Check BTF_ID_LIST(test_list) IDs */
-	for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
-		ret = CHECK(test_list[i] != test_symbols[i].id,
-			    "id_check",
-			    "wrong ID for %s (%d != %d)\n", test_symbols[i].name,
-			    test_list[i], test_symbols[i].id);
+	/* Check BTF_ID_LIST(test_list_local) and
+	 * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+	 */
+	for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+		test_list = test_lists[j];
+		for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+			ret = CHECK(test_list[i] != test_symbols[i].id,
+				    "id_check",
+				    "wrong ID for %s (%d != %d)\n",
+				    test_symbols[i].name,
+				    test_list[i], test_symbols[i].id);
+		}
 	}
 
 	return ret;
-- 
cgit 


From fce557bcef119a1bc5ab3cb02678cf454bcaf424 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 20 Jul 2020 09:34:02 -0700
Subject: bpf: Make btf_sock_ids global

tcp and udp bpf_iter can reuse some socket ids in
btf_sock_ids, so make it global.

I put the extern definition in btf_ids.h as a central
place so it can be easily discovered by developers.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200720163402.1393427-1-yhs@fb.com
---
 tools/include/linux/btf_ids.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'tools')

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 77ab45baa095..4867d549e3c1 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -97,4 +97,34 @@ asm(							\
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
 
+#ifdef CONFIG_NET
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock)	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock)	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock)	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock)				\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common)		\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock)		\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+extern u32 btf_sock_ids[];
+#endif
+
 #endif
-- 
cgit 


From 9165e1d70fb34ce438e78aad90408cfa86e4c2d0 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Mon, 20 Jul 2020 19:48:16 -0700
Subject: bpftool: Use only nftw for file tree parsing

The bpftool sources include code to walk file trees, but use multiple
frameworks to do so: nftw and fts. While nftw conforms to POSIX/SUSv3 and
is widely available, fts is not conformant and less common, especially on
non-glibc systems. The inconsistent framework usage hampers maintenance
and portability of bpftool, in particular for embedded systems.

Standardize code usage by rewriting one fts-based function to use nftw and
clean up some related function warnings by extending use of "const char *"
arguments. This change helps in building bpftool against musl for OpenWrt.

Also fix an unsafe call to dirname() by duplicating the string to pass,
since some implementations may directly alter it. The same approach is
used in libbpf.c.

Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200721024817.13701-1-Tony.Ambardar@gmail.com
---
 tools/bpf/bpftool/common.c | 137 ++++++++++++++++++++++++++-------------------
 tools/bpf/bpftool/main.h   |   4 +-
 2 files changed, 82 insertions(+), 59 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 9b28c69dd8e4..65303664417e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 /* Copyright (C) 2017-2018 Netronome Systems, Inc. */
 
+#define _GNU_SOURCE
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <fts.h>
+#include <ftw.h>
 #include <libgen.h>
 #include <mntent.h>
 #include <stdbool.h>
@@ -161,24 +162,35 @@ int mount_tracefs(const char *target)
 	return err;
 }
 
-int open_obj_pinned(char *path, bool quiet)
+int open_obj_pinned(const char *path, bool quiet)
 {
-	int fd;
+	char *pname;
+	int fd = -1;
+
+	pname = strdup(path);
+	if (!pname) {
+		if (!quiet)
+			p_err("mem alloc failed");
+		goto out_ret;
+	}
 
-	fd = bpf_obj_get(path);
+	fd = bpf_obj_get(pname);
 	if (fd < 0) {
 		if (!quiet)
-			p_err("bpf obj get (%s): %s", path,
-			      errno == EACCES && !is_bpffs(dirname(path)) ?
+			p_err("bpf obj get (%s): %s", pname,
+			      errno == EACCES && !is_bpffs(dirname(pname)) ?
 			    "directory not in bpf file system (bpffs)" :
 			    strerror(errno));
-		return -1;
+		goto out_free;
 	}
 
+out_free:
+	free(pname);
+out_ret:
 	return fd;
 }
 
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type)
 {
 	enum bpf_obj_type type;
 	int fd;
@@ -368,71 +380,82 @@ void print_hex_data_json(uint8_t *data, size_t len)
 	jsonw_end_array(json_wtr);
 }
 
+/* extra params for nftw cb */
+static struct pinned_obj_table *build_fn_table;
+static enum bpf_obj_type build_fn_type;
+
+static int do_build_table_cb(const char *fpath, const struct stat *sb,
+			     int typeflag, struct FTW *ftwbuf)
+{
+	struct bpf_prog_info pinned_info;
+	__u32 len = sizeof(pinned_info);
+	struct pinned_obj *obj_node;
+	enum bpf_obj_type objtype;
+	int fd, err = 0;
+
+	if (typeflag != FTW_F)
+		goto out_ret;
+
+	fd = open_obj_pinned(fpath, true);
+	if (fd < 0)
+		goto out_ret;
+
+	objtype = get_fd_type(fd);
+	if (objtype != build_fn_type)
+		goto out_close;
+
+	memset(&pinned_info, 0, sizeof(pinned_info));
+	if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len))
+		goto out_close;
+
+	obj_node = calloc(1, sizeof(*obj_node));
+	if (!obj_node) {
+		err = -1;
+		goto out_close;
+	}
+
+	obj_node->id = pinned_info.id;
+	obj_node->path = strdup(fpath);
+	if (!obj_node->path) {
+		err = -1;
+		free(obj_node);
+		goto out_close;
+	}
+
+	hash_add(build_fn_table->table, &obj_node->hash, obj_node->id);
+out_close:
+	close(fd);
+out_ret:
+	return err;
+}
+
 int build_pinned_obj_table(struct pinned_obj_table *tab,
 			   enum bpf_obj_type type)
 {
-	struct bpf_prog_info pinned_info = {};
-	struct pinned_obj *obj_node = NULL;
-	__u32 len = sizeof(pinned_info);
 	struct mntent *mntent = NULL;
-	enum bpf_obj_type objtype;
 	FILE *mntfile = NULL;
-	FTSENT *ftse = NULL;
-	FTS *fts = NULL;
-	int fd, err;
+	int flags = FTW_PHYS;
+	int nopenfd = 16;
+	int err = 0;
 
 	mntfile = setmntent("/proc/mounts", "r");
 	if (!mntfile)
 		return -1;
 
+	build_fn_table = tab;
+	build_fn_type = type;
+
 	while ((mntent = getmntent(mntfile))) {
-		char *path[] = { mntent->mnt_dir, NULL };
+		char *path = mntent->mnt_dir;
 
 		if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
 			continue;
-
-		fts = fts_open(path, 0, NULL);
-		if (!fts)
-			continue;
-
-		while ((ftse = fts_read(fts))) {
-			if (!(ftse->fts_info & FTS_F))
-				continue;
-			fd = open_obj_pinned(ftse->fts_path, true);
-			if (fd < 0)
-				continue;
-
-			objtype = get_fd_type(fd);
-			if (objtype != type) {
-				close(fd);
-				continue;
-			}
-			memset(&pinned_info, 0, sizeof(pinned_info));
-			err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
-			if (err) {
-				close(fd);
-				continue;
-			}
-
-			obj_node = malloc(sizeof(*obj_node));
-			if (!obj_node) {
-				close(fd);
-				fts_close(fts);
-				fclose(mntfile);
-				return -1;
-			}
-
-			memset(obj_node, 0, sizeof(*obj_node));
-			obj_node->id = pinned_info.id;
-			obj_node->path = strdup(ftse->fts_path);
-			hash_add(tab->table, &obj_node->hash, obj_node->id);
-
-			close(fd);
-		}
-		fts_close(fts);
+		err = nftw(path, do_build_table_cb, nopenfd, flags);
+		if (err)
+			break;
 	}
 	fclose(mntfile);
-	return 0;
+	return err;
 }
 
 void delete_pinned_obj_table(struct pinned_obj_table *tab)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 78d34e860713..e3a79b5a9960 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -152,8 +152,8 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path, bool quiet);
-int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int open_obj_pinned(const char *path, bool quiet);
+int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type);
 int mount_bpffs_for_pin(const char *name);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
 int do_pin_fd(int fd, const char *name);
-- 
cgit 


From 1b6687e31a2df9fbdb12d25c1d1d372777bf96a8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Thu, 23 Jul 2020 17:23:09 -0600
Subject: vrf: Handle CONFIG_SYSCTL not set

Randy reported compile failure when CONFIG_SYSCTL is not set/enabled:

ERROR: modpost: "sysctl_vals" [drivers/net/vrf.ko] undefined!

Fix by splitting out the sysctl init and cleanup into helpers that
can be set to do nothing when CONFIG_SYSCTL is disabled. In addition,
move vrf_strict_mode and vrf_strict_mode_change to above
vrf_shared_table_handler (code move only) and wrap all of it
in the ifdef CONFIG_SYSCTL.

Update the strict mode tests to check for the existence of the
/proc/sys entry.

Fixes: 33306f1aaf82 ("vrf: add sysctl parameter for strict mode")
Cc: Andrea Mayer <andrea.mayer@uniroma2.it>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David Ahern <dsahern@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/vrf_strict_mode_test.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 5274f4a1fba1..18b982d611de 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -379,6 +379,12 @@ if [ ! -x "$(command -v ip)" ]; then
 	exit 0
 fi
 
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+	echo "SKIP: vrf sysctl does not exist"
+	exit 0
+fi
+
 cleanup &> /dev/null
 
 setup
-- 
cgit 


From f3c93a93b564d25cab715cd71a6c3417e77b19f3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 21 Jul 2020 21:38:04 -0700
Subject: tools/bpftool: Strip BPF .o files before skeleton generation

Strip away DWARF info from .bpf.o files, before generating BPF skeletons.
This reduces bpftool binary size from 3.43MB to 2.58MB.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200722043804.2373298-1-andriin@fb.com
---
 tools/bpf/bpftool/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 51bd520ed437..8462690a039b 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -59,6 +59,7 @@ endif
 INSTALL ?= install
 RM ?= rm -f
 CLANG ?= clang
+LLVM_STRIP ?= llvm-strip
 
 FEATURE_USER = .bpftool
 FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
@@ -147,7 +148,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
 		-I$(srctree)/tools/include/uapi/ \
 		-I$(LIBBPF_PATH) \
 		-I$(srctree)/tools/lib \
-		-g -O2 -target bpf -c $< -o $@
+		-g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@
 
 $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
 	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
-- 
cgit 


From 86176a1821a1237ad163d312ba0f2d7598757894 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 22 Jul 2020 18:17:20 +0200
Subject: selftests/bpf: Test BPF socket lookup and reuseport with connections

Cover the case when BPF socket lookup returns a socket that belongs to a
reuseport group, and the reuseport group contains connected UDP sockets.

Ensure that the presence of connected UDP sockets in reuseport group does
not affect the socket lookup result. Socket selected by reuseport should
always be used as result in such case.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/bpf/20200722161720.940831-3-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 54 +++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index f1784ae4565a..9bbd2b2b7630 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -74,6 +74,7 @@ struct test {
 	struct inet_addr connect_to;
 	struct inet_addr listen_at;
 	enum server accept_on;
+	bool reuseport_has_conns; /* Add a connected socket to reuseport group */
 };
 
 static __u32 duration;		/* for CHECK macro */
@@ -559,7 +560,8 @@ close:
 
 static void run_lookup_prog(const struct test *t)
 {
-	int client_fd, server_fds[MAX_SERVERS] = { -1 };
+	int server_fds[MAX_SERVERS] = { -1 };
+	int client_fd, reuse_conn_fd = -1;
 	struct bpf_link *lookup_link;
 	int i, err;
 
@@ -583,6 +585,32 @@ static void run_lookup_prog(const struct test *t)
 			break;
 	}
 
+	/* Regular UDP socket lookup with reuseport behaves
+	 * differently when reuseport group contains connected
+	 * sockets. Check that adding a connected UDP socket to the
+	 * reuseport group does not affect how reuseport works with
+	 * BPF socket lookup.
+	 */
+	if (t->reuseport_has_conns) {
+		struct sockaddr_storage addr = {};
+		socklen_t len = sizeof(addr);
+
+		/* Add an extra socket to reuseport group */
+		reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
+					    t->listen_at.port,
+					    t->reuseport_prog);
+		if (reuse_conn_fd < 0)
+			goto close;
+
+		/* Connect the extra socket to itself */
+		err = getsockname(reuse_conn_fd, (void *)&addr, &len);
+		if (CHECK(err, "getsockname", "errno %d\n", errno))
+			goto close;
+		err = connect(reuse_conn_fd, (void *)&addr, len);
+		if (CHECK(err, "connect", "errno %d\n", errno))
+			goto close;
+	}
+
 	client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
 	if (client_fd < 0)
 		goto close;
@@ -594,6 +622,8 @@ static void run_lookup_prog(const struct test *t)
 
 	close(client_fd);
 close:
+	if (reuse_conn_fd != -1)
+		close(reuse_conn_fd);
 	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
 		if (server_fds[i] != -1)
 			close(server_fds[i]);
@@ -710,6 +740,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel)
 			.listen_at	= { INT_IP4, INT_PORT },
 			.accept_on	= SERVER_B,
 		},
+		{
+			.desc		= "UDP IPv4 redir and reuseport with conns",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP4, EXT_PORT },
+			.listen_at	= { INT_IP4, INT_PORT },
+			.accept_on	= SERVER_B,
+			.reuseport_has_conns = true,
+		},
 		{
 			.desc		= "UDP IPv4 redir skip reuseport",
 			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
@@ -754,6 +795,17 @@ static void test_redirect_lookup(struct test_sk_lookup *skel)
 			.listen_at	= { INT_IP6, INT_PORT },
 			.accept_on	= SERVER_B,
 		},
+		{
+			.desc		= "UDP IPv6 redir and reuseport with conns",
+			.lookup_prog	= skel->progs.select_sock_a,
+			.reuseport_prog	= skel->progs.select_sock_b,
+			.sock_map	= skel->maps.redir_map,
+			.sotype		= SOCK_DGRAM,
+			.connect_to	= { EXT_IP6, EXT_PORT },
+			.listen_at	= { INT_IP6, INT_PORT },
+			.accept_on	= SERVER_B,
+			.reuseport_has_conns = true,
+		},
 		{
 			.desc		= "UDP IPv6 redir skip reuseport",
 			.lookup_prog	= skel->progs.select_sock_a_no_reuseport,
-- 
cgit 


From a5cbe05a6673b85bed2a63ffcfea6a96c6410cff Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:12 -0700
Subject: bpf: Implement bpf iterator for map elements

The bpf iterator for map elements are implemented.
The bpf program will receive four parameters:
  bpf_iter_meta *meta: the meta data
  bpf_map *map:        the bpf_map whose elements are traversed
  void *key:           the key of one element
  void *value:         the value of the same element

Here, meta and map pointers are always valid, and
key has register type PTR_TO_RDONLY_BUF_OR_NULL and
value has register type PTR_TO_RDWR_BUF_OR_NULL.
The kernel will track the access range of key and value
during verification time. Later, these values will be compared
against the values in the actual map to ensure all accesses
are within range.

A new field iter_seq_info is added to bpf_map_ops which
is used to add map type specific information, i.e., seq_ops,
init/fini seq_file func and seq_file private data size.
Subsequent patches will have actual implementation
for bpf_map_ops->iter_seq_info.

In user space, BPF_ITER_LINK_MAP_FD needs to be
specified in prog attr->link_create.flags, which indicates
that attr->link_create.target_fd is a map_fd.
The reason for such an explicit flag is for possible
future cases where one bpf iterator may allow more than
one possible customization, e.g., pid and cgroup id for
task_file.

Current kernel internal implementation only allows
the target to register at most one required bpf_iter_link_info.
To support the above case, optional bpf_iter_link_info's
are needed, the target can be extended to register such link
infos, and user provided link_info needs to match one of
target supported ones.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184112.590360-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..828c2f6438f2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -246,6 +246,13 @@ enum bpf_link_type {
 	MAX_BPF_LINK_TYPE,
 };
 
+enum bpf_iter_link_info {
+	BPF_ITER_LINK_UNSPEC = 0,
+	BPF_ITER_LINK_MAP_FD = 1,
+
+	MAX_BPF_ITER_LINK_INFO,
+};
+
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
-- 
cgit 


From cd31039a7347610863aa8b77a9162048999723d0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:17 -0700
Subject: tools/libbpf: Add support for bpf map element iterator

Add map_fd to bpf_iter_attach_opts and flags to
bpf_link_create_opts. Later on, bpftool or selftest
will be able to create a bpf map element iterator
by passing map_fd to the kernel during link
creation time.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184117.590673-1-yhs@fb.com
---
 tools/lib/bpf/bpf.c    |  1 +
 tools/lib/bpf/bpf.h    |  3 ++-
 tools/lib/bpf/libbpf.c | 10 +++++++++-
 tools/lib/bpf/libbpf.h |  3 ++-
 4 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a7329b671c41..e1bdf214f75f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -598,6 +598,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 	attr.link_create.prog_fd = prog_fd;
 	attr.link_create.target_fd = target_fd;
 	attr.link_create.attach_type = attach_type;
+	attr.link_create.flags = OPTS_GET(opts, flags, 0);
 
 	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index dbef24ebcfcb..6d367e01d05e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -170,8 +170,9 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 
 struct bpf_link_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;
 };
-#define bpf_link_create_opts__last_field sz
+#define bpf_link_create_opts__last_field flags
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 846164c79df1..a05aa7e2bab6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8282,13 +8282,20 @@ struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts)
 {
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
 	int prog_fd, link_fd;
+	__u32 target_fd = 0;
 
 	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
 		return ERR_PTR(-EINVAL);
 
+	if (OPTS_HAS(opts, map_fd)) {
+		target_fd = opts->map_fd;
+		link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
+	}
+
 	prog_fd = bpf_program__fd(prog);
 	if (prog_fd < 0) {
 		pr_warn("program '%s': can't attach before loaded\n",
@@ -8301,7 +8308,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
 		return ERR_PTR(-ENOMEM);
 	link->detach = &bpf_link__detach_fd;
 
-	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
+				  &link_create_opts);
 	if (link_fd < 0) {
 		link_fd = -errno;
 		free(link);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c2272132e929..c6813791fa7e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -264,8 +264,9 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 map_fd;
 };
-#define bpf_iter_attach_opts__last_field sz
+#define bpf_iter_attach_opts__last_field map_fd
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
-- 
cgit 


From d8793aca708602c676372b03d6493972457524af Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:19 -0700
Subject: tools/bpftool: Add bpftool support for bpf map element iterator

The optional parameter "map MAP" can be added to "bpftool iter"
command to create a bpf iterator for map elements. For example,
  bpftool iter pin ./prog.o /sys/fs/bpf/p1 map id 333

For map element bpf iterator "map MAP" parameter is required.
Otherwise, bpf link creation will return an error.

Quentin Monnet kindly provided bash-completion implementation
for new "map MAP" option.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184119.590799-1-yhs@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-iter.rst | 18 +++++++++++--
 tools/bpf/bpftool/bash-completion/bpftool        | 18 ++++++++++++-
 tools/bpf/bpftool/iter.c                         | 33 +++++++++++++++++++++---
 3 files changed, 62 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 8dce698eab79..070ffacb42b5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -17,14 +17,15 @@ SYNOPSIS
 ITER COMMANDS
 ===================
 
-|	**bpftool** **iter pin** *OBJ* *PATH*
+|	**bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*]
 |	**bpftool** **iter help**
 |
 |	*OBJ* := /a/file/of/bpf_iter_target.o
+|	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
 
 DESCRIPTION
 ===========
-	**bpftool iter pin** *OBJ* *PATH*
+	**bpftool iter pin** *OBJ* *PATH* [**map** *MAP*]
 		  A bpf iterator combines a kernel iterating of
 		  particular kernel data (e.g., tasks, bpf_maps, etc.)
 		  and a bpf program called for each kernel data object
@@ -37,6 +38,12 @@ DESCRIPTION
 		  character ('.'), which is reserved for future extensions
 		  of *bpffs*.
 
+		  Map element bpf iterator requires an additional parameter
+		  *MAP* so bpf program can iterate over map elements for
+		  that map. User can have a bpf program in kernel to run
+		  with each map element, do checking, filtering, aggregation,
+		  etc. without copying data to user space.
+
 		  User can then *cat PATH* to see the bpf iterator output.
 
 	**bpftool iter help**
@@ -64,6 +71,13 @@ EXAMPLES
    Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
    to /sys/fs/bpf/my_netlink
 
+**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20**
+
+::
+
+   Create a file-based bpf iterator from bpf_iter_hashmap.o and map with
+   id 20, and pin it to /sys/fs/bpf/my_hashmap
+
 SEE ALSO
 ========
 	**bpf**\ (2),
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7b137264ea3a..257fa310ea2b 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -615,7 +615,23 @@ _bpftool()
         iter)
             case $command in
                 pin)
-                    _filedir
+                    case $prev in
+                        $command)
+                            _filedir
+                            ;;
+                        id)
+                            _bpftool_get_map_ids
+                            ;;
+                        name)
+                            _bpftool_get_map_names
+                            ;;
+                        pinned)
+                            _filedir
+                            ;;
+                        *)
+                            _bpftool_one_of_list $MAP_TYPE
+                            ;;
+                    esac
                     return 0
                     ;;
                 *)
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 33240fcc6319..c9dba7543dba 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -2,6 +2,7 @@
 // Copyright (C) 2020 Facebook
 
 #define _GNU_SOURCE
+#include <unistd.h>
 #include <linux/err.h>
 #include <bpf/libbpf.h>
 
@@ -9,11 +10,12 @@
 
 static int do_pin(int argc, char **argv)
 {
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
 	const char *objfile, *path;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
 	struct bpf_link *link;
-	int err;
+	int err = -1, map_fd = -1;
 
 	if (!REQ_ARGS(2))
 		usage();
@@ -21,10 +23,26 @@ static int do_pin(int argc, char **argv)
 	objfile = GET_ARG();
 	path = GET_ARG();
 
+	/* optional arguments */
+	if (argc) {
+		if (is_prefix(*argv, "map")) {
+			NEXT_ARG();
+
+			if (!REQ_ARGS(2)) {
+				p_err("incorrect map spec");
+				return -1;
+			}
+
+			map_fd = map_parse_fd(&argc, &argv);
+			if (map_fd < 0)
+				return -1;
+		}
+	}
+
 	obj = bpf_object__open(objfile);
 	if (IS_ERR(obj)) {
 		p_err("can't open objfile %s", objfile);
-		return -1;
+		goto close_map_fd;
 	}
 
 	err = bpf_object__load(obj);
@@ -39,7 +57,10 @@ static int do_pin(int argc, char **argv)
 		goto close_obj;
 	}
 
-	link = bpf_program__attach_iter(prog, NULL);
+	if (map_fd >= 0)
+		iter_opts.map_fd = map_fd;
+
+	link = bpf_program__attach_iter(prog, &iter_opts);
 	if (IS_ERR(link)) {
 		err = PTR_ERR(link);
 		p_err("attach_iter failed for program %s",
@@ -62,14 +83,18 @@ close_link:
 	bpf_link__destroy(link);
 close_obj:
 	bpf_object__close(obj);
+close_map_fd:
+	if (map_fd >= 0)
+		close(map_fd);
 	return err;
 }
 
 static int do_help(int argc, char **argv)
 {
 	fprintf(stderr,
-		"Usage: %1$s %2$s pin OBJ PATH\n"
+		"Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
 		"       %1$s %2$s help\n"
+		"       " HELP_SPEC_MAP "\n"
 		"",
 		bin_name, "iter");
 
-- 
cgit 


From 2a7c2fff7dd6e87634e47ddb2d2c7f272708dbbf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:20 -0700
Subject: selftests/bpf: Add test for bpf hash map iterators

Two subtests are added.
  $ ./test_progs -n 4
  ...
  #4/18 bpf_hash_map:OK
  #4/19 bpf_percpu_hash_map:OK
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184120.590916-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 187 +++++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_bpf_hash_map.c    | 100 +++++++++++
 .../bpf/progs/bpf_iter_bpf_percpu_hash_map.c       |  50 ++++++
 3 files changed, 337 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index fed42755416d..72790b600c62 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -15,6 +15,8 @@
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
 #include "bpf_iter_test_kern4.skel.h"
+#include "bpf_iter_bpf_hash_map.skel.h"
+#include "bpf_iter_bpf_percpu_hash_map.skel.h"
 
 static int duration;
 
@@ -455,6 +457,187 @@ out:
 	bpf_iter_test_kern4__destroy(skel);
 }
 
+static void test_bpf_hash_map(void)
+{
+	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_hash_map *skel;
+	int err, i, len, map_fd, iter_fd;
+	__u64 val, expected_val = 0;
+	struct bpf_link *link;
+	struct key_t {
+		int a;
+		int b;
+		int c;
+	} key;
+	char buf[64];
+
+	skel = bpf_iter_bpf_hash_map__open();
+	if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
+		  "skeleton open failed\n"))
+		return;
+
+	skel->bss->in_test_mode = true;
+
+	err = bpf_iter_bpf_hash_map__load(skel);
+	if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
+		  "skeleton load failed\n"))
+		goto out;
+
+	/* iterator with hashmap2 and hashmap3 should fail */
+	opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+	if (CHECK(!IS_ERR(link), "attach_iter",
+		  "attach_iter for hashmap2 unexpected succeeded\n"))
+		goto out;
+
+	opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+	if (CHECK(!IS_ERR(link), "attach_iter",
+		  "attach_iter for hashmap3 unexpected succeeded\n"))
+		goto out;
+
+	/* hashmap1 should be good, update map values here */
+	map_fd = bpf_map__fd(skel->maps.hashmap1);
+	for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+		key.a = i + 1;
+		key.b = i + 2;
+		key.c = i + 3;
+		val = i + 4;
+		expected_key_a += key.a;
+		expected_key_b += key.b;
+		expected_key_c += key.c;
+		expected_val += val;
+
+		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+		if (CHECK(err, "map_update", "map_update failed\n"))
+			goto out;
+	}
+
+	opts.map_fd = map_fd;
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	if (CHECK(skel->bss->key_sum_a != expected_key_a,
+		  "key_sum_a", "got %u expected %u\n",
+		  skel->bss->key_sum_a, expected_key_a))
+		goto close_iter;
+	if (CHECK(skel->bss->key_sum_b != expected_key_b,
+		  "key_sum_b", "got %u expected %u\n",
+		  skel->bss->key_sum_b, expected_key_b))
+		goto close_iter;
+	if (CHECK(skel->bss->val_sum != expected_val,
+		  "val_sum", "got %llu expected %llu\n",
+		  skel->bss->val_sum, expected_val))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_bpf_hash_map__destroy(skel);
+}
+
+static void test_bpf_percpu_hash_map(void)
+{
+	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_percpu_hash_map *skel;
+	int err, i, j, len, map_fd, iter_fd;
+	__u32 expected_val = 0;
+	struct bpf_link *link;
+	struct key_t {
+		int a;
+		int b;
+		int c;
+	} key;
+	char buf[64];
+	void *val;
+
+	val = malloc(8 * bpf_num_possible_cpus());
+
+	skel = bpf_iter_bpf_percpu_hash_map__open();
+	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
+		  "skeleton open failed\n"))
+		return;
+
+	skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+	err = bpf_iter_bpf_percpu_hash_map__load(skel);
+	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
+		  "skeleton load failed\n"))
+		goto out;
+
+	/* update map values here */
+	map_fd = bpf_map__fd(skel->maps.hashmap1);
+	for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+		key.a = i + 1;
+		key.b = i + 2;
+		key.c = i + 3;
+		expected_key_a += key.a;
+		expected_key_b += key.b;
+		expected_key_c += key.c;
+
+		for (j = 0; j < bpf_num_possible_cpus(); j++) {
+			*(__u32 *)(val + j * 8) = i + j;
+			expected_val += i + j;
+		}
+
+		err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
+		if (CHECK(err, "map_update", "map_update failed\n"))
+			goto out;
+	}
+
+	opts.map_fd = map_fd;
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	if (CHECK(skel->bss->key_sum_a != expected_key_a,
+		  "key_sum_a", "got %u expected %u\n",
+		  skel->bss->key_sum_a, expected_key_a))
+		goto close_iter;
+	if (CHECK(skel->bss->key_sum_b != expected_key_b,
+		  "key_sum_b", "got %u expected %u\n",
+		  skel->bss->key_sum_b, expected_key_b))
+		goto close_iter;
+	if (CHECK(skel->bss->val_sum != expected_val,
+		  "val_sum", "got %u expected %u\n",
+		  skel->bss->val_sum, expected_val))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_bpf_percpu_hash_map__destroy(skel);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -491,4 +674,8 @@ void test_bpf_iter(void)
 		test_overflow(true, false);
 	if (test__start_subtest("prog-ret-1"))
 		test_overflow(false, true);
+	if (test__start_subtest("bpf_hash_map"))
+		test_bpf_hash_map();
+	if (test__start_subtest("bpf_percpu_hash_map"))
+		test_bpf_percpu_hash_map();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
new file mode 100644
index 000000000000..07ddbfdbcab7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+	int a;
+	int b;
+	int c;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, __u64);
+} hashmap1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, __u64);
+	__type(value, __u64);
+} hashmap2 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, __u32);
+} hashmap3 SEC(".maps");
+
+/* will set before prog run */
+bool in_test_mode = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	__u32 seq_num = ctx->meta->seq_num;
+	struct bpf_map *map = ctx->map;
+	struct key_t *key = ctx->key;
+	__u64 *val = ctx->value;
+
+	if (in_test_mode) {
+		/* test mode is used by selftests to
+		 * test functionality of bpf_hash_map iter.
+		 *
+		 * the above hashmap1 will have correct size
+		 * and will be accepted, hashmap2 and hashmap3
+		 * should be rejected due to smaller key/value
+		 * size.
+		 */
+		if (key == (void *)0 || val == (void *)0)
+			return 0;
+
+		key_sum_a += key->a;
+		key_sum_b += key->b;
+		key_sum_c += key->c;
+		val_sum += *val;
+		return 0;
+	}
+
+	/* non-test mode, the map is prepared with the
+	 * below bpftool command sequence:
+	 *   bpftool map create /sys/fs/bpf/m1 type hash \
+	 *   	key 12 value 8 entries 3 name map1
+	 *   bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \
+	 *   	value 0 0 0 1 0 0 0 1
+	 *   bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \
+	 *   	value 0 0 0 1 0 0 0 2
+	 * The bpftool iter command line:
+	 *   bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \
+	 *   	map id 77
+	 * The below output will be:
+	 *   map dump starts
+	 *   77: (1000000 0 2000000) (200000001000000)
+	 *   77: (1000000 0 1000000) (100000001000000)
+	 *   map dump ends
+	 */
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "map dump starts\n");
+
+	if (key == (void *)0 || val == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "map dump ends\n");
+		return 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id,
+		       key->a, key->b, key->c, *val);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
new file mode 100644
index 000000000000..feaaa2b89c57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+	int a;
+	int b;
+	int c;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, __u32);
+} hashmap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u32 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	struct key_t *key = ctx->key;
+	void *pptr = ctx->value;
+	__u32 step;
+	int i;
+
+	if (key == (void *)0 || pptr == (void *)0)
+		return 0;
+
+	key_sum_a += key->a;
+	key_sum_b += key->b;
+	key_sum_c += key->c;
+
+	step = 8;
+	for (i = 0; i < num_cpus; i++) {
+		val_sum += *(__u32 *)pptr;
+		pptr += step;
+	}
+	return 0;
+}
-- 
cgit 


From 60dd49ea65390986a665c462da704927e861e67e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:21 -0700
Subject: selftests/bpf: Add test for bpf array map iterators

Two subtests are added.
  $ ./test_progs -n 4
  ...
  #4/20 bpf_array_map:OK
  #4/21 bpf_percpu_array_map:OK
  ...

The bpf_array_map subtest also tested bpf program
changing array element values and send key/value
to user space through bpf_seq_write() interface.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184121.591367-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 161 +++++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_bpf_array_map.c   |  40 +++++
 .../bpf/progs/bpf_iter_bpf_percpu_array_map.c      |  46 ++++++
 3 files changed, 247 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 72790b600c62..4a02b2222a6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -17,6 +17,8 @@
 #include "bpf_iter_test_kern4.skel.h"
 #include "bpf_iter_bpf_hash_map.skel.h"
 #include "bpf_iter_bpf_percpu_hash_map.skel.h"
+#include "bpf_iter_bpf_array_map.skel.h"
+#include "bpf_iter_bpf_percpu_array_map.skel.h"
 
 static int duration;
 
@@ -638,6 +640,161 @@ out:
 	bpf_iter_bpf_percpu_hash_map__destroy(skel);
 }
 
+static void test_bpf_array_map(void)
+{
+	__u64 val, expected_val = 0, res_first_val, first_val = 0;
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	__u32 expected_key = 0, res_first_key;
+	struct bpf_iter_bpf_array_map *skel;
+	int err, i, map_fd, iter_fd;
+	struct bpf_link *link;
+	char buf[64] = {};
+	int len, start;
+
+	skel = bpf_iter_bpf_array_map__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.arraymap1);
+	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+		val = i + 4;
+		expected_key += i;
+		expected_val += val;
+
+		if (i == 0)
+			first_val = val;
+
+		err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
+		if (CHECK(err, "map_update", "map_update failed\n"))
+			goto out;
+	}
+
+	opts.map_fd = map_fd;
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	start = 0;
+	while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
+		start += len;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	res_first_key = *(__u32 *)buf;
+	res_first_val = *(__u64 *)(buf + sizeof(__u32));
+	if (CHECK(res_first_key != 0 || res_first_val != first_val,
+		  "bpf_seq_write",
+		  "seq_write failure: first key %u vs expected 0, "
+		  " first value %llu vs expected %llu\n",
+		  res_first_key, res_first_val, first_val))
+		goto close_iter;
+
+	if (CHECK(skel->bss->key_sum != expected_key,
+		  "key_sum", "got %u expected %u\n",
+		  skel->bss->key_sum, expected_key))
+		goto close_iter;
+	if (CHECK(skel->bss->val_sum != expected_val,
+		  "val_sum", "got %llu expected %llu\n",
+		  skel->bss->val_sum, expected_val))
+		goto close_iter;
+
+	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+		err = bpf_map_lookup_elem(map_fd, &i, &val);
+		if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+			goto out;
+		if (CHECK(i != val, "invalid_val",
+			  "got value %llu expected %u\n", val, i))
+			goto out;
+	}
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_percpu_array_map(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_percpu_array_map *skel;
+	__u32 expected_key = 0, expected_val = 0;
+	int err, i, j, map_fd, iter_fd;
+	struct bpf_link *link;
+	char buf[64];
+	void *val;
+	int len;
+
+	val = malloc(8 * bpf_num_possible_cpus());
+
+	skel = bpf_iter_bpf_percpu_array_map__open();
+	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
+		  "skeleton open failed\n"))
+		return;
+
+	skel->rodata->num_cpus = bpf_num_possible_cpus();
+
+	err = bpf_iter_bpf_percpu_array_map__load(skel);
+	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
+		  "skeleton load failed\n"))
+		goto out;
+
+	/* update map values here */
+	map_fd = bpf_map__fd(skel->maps.arraymap1);
+	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+		expected_key += i;
+
+		for (j = 0; j < bpf_num_possible_cpus(); j++) {
+			*(__u32 *)(val + j * 8) = i + j;
+			expected_val += i + j;
+		}
+
+		err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
+		if (CHECK(err, "map_update", "map_update failed\n"))
+			goto out;
+	}
+
+	opts.map_fd = map_fd;
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	if (CHECK(skel->bss->key_sum != expected_key,
+		  "key_sum", "got %u expected %u\n",
+		  skel->bss->key_sum, expected_key))
+		goto close_iter;
+	if (CHECK(skel->bss->val_sum != expected_val,
+		  "val_sum", "got %u expected %u\n",
+		  skel->bss->val_sum, expected_val))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_bpf_percpu_array_map__destroy(skel);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -678,4 +835,8 @@ void test_bpf_iter(void)
 		test_bpf_hash_map();
 	if (test__start_subtest("bpf_percpu_hash_map"))
 		test_bpf_percpu_hash_map();
+	if (test__start_subtest("bpf_array_map"))
+		test_bpf_array_map();
+	if (test__start_subtest("bpf_percpu_array_map"))
+		test_bpf_percpu_array_map();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
new file mode 100644
index 000000000000..6286023fd62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+	int a;
+	int b;
+	int c;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} arraymap1 SEC(".maps");
+
+__u32 key_sum = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	__u32 *key = ctx->key;
+	__u64 *val = ctx->value;
+
+	if (key == (void *)0 || val == (void *)0)
+		return 0;
+
+	bpf_seq_write(ctx->meta->seq, key, sizeof(__u32));
+	bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
+	key_sum += *key;
+	val_sum += *val;
+	*val = *key;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
new file mode 100644
index 000000000000..85fa710fad90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+	int a;
+	int b;
+	int c;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u32);
+} arraymap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+__u32 key_sum = 0, val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	__u32 *key = ctx->key;
+	void *pptr = ctx->value;
+	__u32 step;
+	int i;
+
+	if (key == (void *)0 || pptr == (void *)0)
+		return 0;
+
+	key_sum += *key;
+
+	step = 8;
+	for (i = 0; i < num_cpus; i++) {
+		val_sum += *(__u32 *)pptr;
+		pptr += step;
+	}
+	return 0;
+}
-- 
cgit 


From 3b1c420bd882115eb7a3d2335cc00d7b9974eb0b Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:22 -0700
Subject: selftests/bpf: Add a test for bpf sk_storage_map iterator

Added one test for bpf sk_storage_map_iterator.
  $ ./test_progs -n 4
  ...
  #4/22 bpf_sk_storage_map:OK
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184122.591591-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 72 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_map.c        | 34 ++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 4a02b2222a6d..ffbbeb9fa268 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -19,6 +19,7 @@
 #include "bpf_iter_bpf_percpu_hash_map.skel.h"
 #include "bpf_iter_bpf_array_map.skel.h"
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_map.skel.h"
 
 static int duration;
 
@@ -795,6 +796,75 @@ out:
 	bpf_iter_bpf_percpu_array_map__destroy(skel);
 }
 
+static void test_bpf_sk_storage_map(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	int err, i, len, map_fd, iter_fd, num_sockets;
+	struct bpf_iter_bpf_sk_storage_map *skel;
+	int sock_fd[3] = {-1, -1, -1};
+	__u32 val, expected_val = 0;
+	struct bpf_link *link;
+	char buf[64];
+
+	skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+	num_sockets = ARRAY_SIZE(sock_fd);
+	for (i = 0; i < num_sockets; i++) {
+		sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
+		if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+			goto out;
+
+		val = i + 1;
+		expected_val += val;
+
+		err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
+					  BPF_NOEXIST);
+		if (CHECK(err, "map_update", "map_update failed\n"))
+			goto out;
+	}
+
+	opts.map_fd = map_fd;
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
+		  "ipv6_sk_count", "got %u expected %u\n",
+		  skel->bss->ipv6_sk_count, num_sockets))
+		goto close_iter;
+
+	if (CHECK(skel->bss->val_sum != expected_val,
+		  "val_sum", "got %u expected %u\n",
+		  skel->bss->val_sum, expected_val))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	for (i = 0; i < num_sockets; i++) {
+		if (sock_fd[i] >= 0)
+			close(sock_fd[i]);
+	}
+	bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -839,4 +909,6 @@ void test_bpf_iter(void)
 		test_bpf_array_map();
 	if (test__start_subtest("bpf_percpu_array_map"))
 		test_bpf_percpu_array_map();
+	if (test__start_subtest("bpf_sk_storage_map"))
+		test_bpf_sk_storage_map();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
new file mode 100644
index 000000000000..6b70ccaba301
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+__u32 val_sum = 0;
+__u32 ipv6_sk_count = 0;
+
+SEC("iter/bpf_sk_storage_map")
+int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+	struct sock *sk = ctx->sk;
+	__u32 *val = ctx->value;
+
+	if (sk == (void *)0 || val == (void *)0)
+		return 0;
+
+	if (sk->sk_family == AF_INET6)
+		ipv6_sk_count++;
+
+	val_sum += *val;
+	return 0;
+}
-- 
cgit 


From 9efcc4ad7a15ea50550c53fbf62457c309216051 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 Jul 2020 11:41:24 -0700
Subject: selftests/bpf: Add a test for out of bound rdonly buf access

If the bpf program contains out of bound access w.r.t. a
particular map key/value size, the verification will be
still okay, e.g., it will be accepted by verifier. But
it will be rejected during link_create time. A test
is added here to ensure link_create failure did happen
if out of bound access happened.
  $ ./test_progs -n 4
  ...
  #4/23 rdonly-buf-out-of-bound:OK
  ...

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723184124.591700-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 22 ++++++++++++++
 .../selftests/bpf/progs/bpf_iter_test_kern5.c      | 35 ++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index ffbbeb9fa268..d95de80b1851 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -20,6 +20,7 @@
 #include "bpf_iter_bpf_array_map.skel.h"
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
+#include "bpf_iter_test_kern5.skel.h"
 
 static int duration;
 
@@ -865,6 +866,25 @@ out:
 	bpf_iter_bpf_sk_storage_map__destroy(skel);
 }
 
+static void test_rdonly_buf_out_of_bound(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_test_kern5 *skel;
+	struct bpf_link *link;
+
+	skel = bpf_iter_test_kern5__open_and_load();
+	if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+	if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+		bpf_link__destroy(link);
+
+	bpf_iter_test_kern5__destroy(skel);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -911,4 +931,6 @@ void test_bpf_iter(void)
 		test_bpf_percpu_array_map();
 	if (test__start_subtest("bpf_sk_storage_map"))
 		test_bpf_sk_storage_map();
+	if (test__start_subtest("rdonly-buf-out-of-bound"))
+		test_rdonly_buf_out_of_bound();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
new file mode 100644
index 000000000000..e3a7575e81d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+	int a;
+	int b;
+	int c;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	void *key = ctx->key;
+
+	if (key == (void *)0)
+		return 0;
+
+	/* out of bound access w.r.t. hashmap1 */
+	key_sum += *(__u32 *)(key + sizeof(struct key_t));
+	return 0;
+}
-- 
cgit 


From d4b4dd6ce7709c2d2fe56dcfc15074ee18505bcb Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 23 Jul 2020 11:06:46 -0700
Subject: libbpf: Print hint when PERF_EVENT_IOC_SET_BPF returns -EPROTO

The kernel prevents potential unwinder warnings and crashes by blocking
BPF program with bpf_get_[stack|stackid] on perf_event without
PERF_SAMPLE_CALLCHAIN, or with exclude_callchain_[kernel|user]. Print a
hint message in libbpf to help the user debug such issues.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723180648.1429892-4-songliubraving@fb.com
---
 tools/lib/bpf/libbpf.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a05aa7e2bab6..e51479d60285 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7833,6 +7833,9 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 		pr_warn("program '%s': failed to attach to pfd %d: %s\n",
 			bpf_program__title(prog, false), pfd,
 			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		if (err == -EPROTO)
+			pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+				bpf_program__title(prog, false), pfd);
 		return ERR_PTR(err);
 	}
 	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
-- 
cgit 


From 1da4864c2b20f81afbb18b2a0b914d0c776331fc Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 23 Jul 2020 11:06:47 -0700
Subject: selftests/bpf: Add callchain_stackid

This tests new helper function bpf_get_stackid_pe and bpf_get_stack_pe.
These two helpers have different implementation for perf_event with PEB
entries.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200723180648.1429892-5-songliubraving@fb.com
---
 .../selftests/bpf/prog_tests/perf_event_stackmap.c | 116 +++++++++++++++++++++
 .../selftests/bpf/progs/perf_event_stackmap.c      |  59 +++++++++++
 2 files changed, 175 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
 create mode 100644 tools/testing/selftests/bpf/progs/perf_event_stackmap.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 000000000000..72c3690844fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+	static int val = 1;
+
+	val += 1;
+
+	usleep(100);
+	return val;
+}
+
+noinline int func_2(void)
+{
+	return func_1();
+}
+
+noinline int func_3(void)
+{
+	return func_2();
+}
+
+noinline int func_4(void)
+{
+	return func_3();
+}
+
+noinline int func_5(void)
+{
+	return func_4();
+}
+
+noinline int func_6(void)
+{
+	int i, val = 1;
+
+	for (i = 0; i < 100; i++)
+		val += func_5();
+
+	return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+	struct perf_event_attr attr = {
+		/* .type = PERF_TYPE_SOFTWARE, */
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.precise_ip = 2,
+		.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+			PERF_SAMPLE_CALLCHAIN,
+		.branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+			PERF_SAMPLE_BRANCH_NO_FLAGS |
+			PERF_SAMPLE_BRANCH_NO_CYCLES |
+			PERF_SAMPLE_BRANCH_CALL_STACK,
+		.sample_period = 5000,
+		.size = sizeof(struct perf_event_attr),
+	};
+	struct perf_event_stackmap *skel;
+	__u32 duration = 0;
+	cpu_set_t cpu_set;
+	int pmu_fd, err;
+
+	skel = perf_event_stackmap__open();
+
+	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+		return;
+
+	err = perf_event_stackmap__load(skel);
+	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+		goto cleanup;
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(0, &cpu_set);
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+		goto cleanup;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (pmu_fd < 0) {
+		printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+		test__skip();
+		goto cleanup;
+	}
+
+	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+							   pmu_fd);
+	if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+		  "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+		close(pmu_fd);
+		goto cleanup;
+	}
+
+	/* create kernel and user stack traces for testing */
+	func_6();
+
+	CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+	CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+	CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+	CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+
+cleanup:
+	perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 000000000000..25467d13c356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+	stack_trace_t *trace;
+	__u32 key = 0;
+	long val;
+
+	val = bpf_get_stackid(ctx, &stackmap, 0);
+	if (val > 0)
+		stackid_kernel = 2;
+	val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+	if (val > 0)
+		stackid_user = 2;
+
+	trace = bpf_map_lookup_elem(&stackdata_map, &key);
+	if (!trace)
+		return 0;
+
+	val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0);
+	if (val > 0)
+		stack_kernel = 2;
+
+	val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK);
+	if (val > 0)
+		stack_user = 2;
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit 


From 346938e9380cc0b2ad8e2566389cdc570386fe22 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 23 Jul 2020 11:06:48 -0700
Subject: selftests/bpf: Add get_stackid_cannot_attach

This test confirms that BPF program that calls bpf_get_stackid() cannot
attach to perf_event with precise_ip > 0 but not PERF_SAMPLE_CALLCHAIN;
and cannot attach if the perf_event has exclude_callchain_kernel.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200723180648.1429892-6-songliubraving@fb.com
---
 .../bpf/prog_tests/get_stackid_cannot_attach.c     | 91 ++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
new file mode 100644
index 000000000000..d884b2ed5bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
+
+void test_get_stackid_cannot_attach(void)
+{
+	struct perf_event_attr attr = {
+		/* .type = PERF_TYPE_SOFTWARE, */
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.precise_ip = 1,
+		.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK,
+		.branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+			PERF_SAMPLE_BRANCH_NO_FLAGS |
+			PERF_SAMPLE_BRANCH_NO_CYCLES |
+			PERF_SAMPLE_BRANCH_CALL_STACK,
+		.sample_period = 5000,
+		.size = sizeof(struct perf_event_attr),
+	};
+	struct test_stacktrace_build_id *skel;
+	__u32 duration = 0;
+	int pmu_fd, err;
+
+	skel = test_stacktrace_build_id__open();
+	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+		return;
+
+	/* override program type */
+	bpf_program__set_perf_event(skel->progs.oncpu);
+
+	err = test_stacktrace_build_id__load(skel);
+	if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+		goto cleanup;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+		printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n",
+		       __func__);
+		test__skip();
+		goto cleanup;
+	}
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto cleanup;
+
+	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+							   pmu_fd);
+	CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
+	      "should have failed\n");
+	close(pmu_fd);
+
+	/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
+	attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto cleanup;
+
+	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+							   pmu_fd);
+	CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
+	      "err: %ld\n", PTR_ERR(skel->links.oncpu));
+	close(pmu_fd);
+
+	/* add exclude_callchain_kernel, attach should fail */
+	attr.exclude_callchain_kernel = 1;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto cleanup;
+
+	skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+							   pmu_fd);
+	CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
+	      "should have failed\n");
+	close(pmu_fd);
+
+cleanup:
+	test_stacktrace_build_id__destroy(skel);
+}
-- 
cgit 


From d4a89c1eb81431479664029bcdec593dbf23385f Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 23 Jul 2020 23:47:41 -0500
Subject: selftests/bpf: Add test for CGROUP_STORAGE map on multiple attaches

This test creates a parent cgroup, and a child of that cgroup.
It attaches a cgroup_skb/egress program that simply counts packets,
to a global variable (ARRAY map), and to a CGROUP_STORAGE map.
The program is first attached to the parent cgroup only, then to
parent and child.

The test cases sends a message within the child cgroup, and because
the program is inherited across parent / child cgroups, it will
trigger the egress program for both the parent and child, if they
exist. The program, when looking up a CGROUP_STORAGE map, uses the
cgroup and attach type of the attachment parameters; therefore,
both attaches uses different cgroup storages.

We assert that all packet counts returns what we expects.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/5a20206afa4606144691c7caa0d1b997cd60dec0.1595565795.git.zhuyifei@google.com
---
 .../selftests/bpf/prog_tests/cg_storage_multi.c    | 161 +++++++++++++++++++++
 .../bpf/progs/cg_storage_multi_egress_only.c       |  30 ++++
 2 files changed, 191 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
 create mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
new file mode 100644
index 000000000000..e90e0547d759
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cg_storage_multi_egress_only.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
+			   __u32 expected)
+{
+	struct bpf_cgroup_storage_key key = {0};
+	__u32 value;
+	int map_fd;
+
+	map_fd = bpf_map__fd(map);
+
+	key.cgroup_inode_id = get_cgroup_id(cgroup_path);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	if (CHECK(bpf_map_lookup_elem(map_fd, &key, &value) < 0,
+		  "map-lookup", "errno %d", errno))
+		return true;
+	if (CHECK(value != expected,
+		  "assert-storage", "got %u expected %u", value, expected))
+		return true;
+
+	return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const char *cgroup_path)
+{
+	struct bpf_cgroup_storage_key key = {0};
+	__u32 value;
+	int map_fd;
+
+	map_fd = bpf_map__fd(map);
+
+	key.cgroup_inode_id = get_cgroup_id(cgroup_path);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	if (CHECK(bpf_map_lookup_elem(map_fd, &key, &value) == 0,
+		  "map-lookup", "succeeded, expected ENOENT"))
+		return true;
+	if (CHECK(errno != ENOENT,
+		  "map-lookup", "errno %d, expected ENOENT", errno))
+		return true;
+
+	return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+	bool res = true;
+	int server_fd = -1, client_fd = -1;
+
+	if (join_cgroup(cgroup_path))
+		goto out_clean;
+
+	server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+	if (server_fd < 0)
+		goto out_clean;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (client_fd < 0)
+		goto out_clean;
+
+	if (send(client_fd, "message", strlen("message"), 0) < 0)
+		goto out_clean;
+
+	res = false;
+
+out_clean:
+	close(client_fd);
+	close(server_fd);
+	return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+	struct cg_storage_multi_egress_only *obj;
+	struct bpf_link *parent_link = NULL, *child_link = NULL;
+	bool err;
+
+	obj = cg_storage_multi_egress_only__open_and_load();
+	if (CHECK(!obj, "skel-load", "errno %d", errno))
+		return;
+
+	/* Attach to parent cgroup, trigger packet from child.
+	 * Assert that there is only one run and in that run the storage is
+	 * parent cgroup's storage.
+	 * Also assert that child cgroup's storage does not exist
+	 */
+	parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+						 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
+		  "err %ld", PTR_ERR(parent_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "first-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 1,
+		  "first-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	if (assert_storage(obj->maps.cgroup_storage, PARENT_CGROUP, 1))
+		goto close_bpf_object;
+	if (assert_storage_noexist(obj->maps.cgroup_storage, CHILD_CGROUP))
+		goto close_bpf_object;
+
+	/* Attach to parent and child cgroup, trigger packet from child.
+	 * Assert that there are two additional runs, one that run with parent
+	 * cgroup's storage and one with child cgroup's storage.
+	 */
+	child_link = bpf_program__attach_cgroup(obj->progs.egress,
+						child_cgroup_fd);
+	if (CHECK(IS_ERR(child_link), "child-cg-attach",
+		  "err %ld", PTR_ERR(child_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "second-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 3,
+		  "second-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	if (assert_storage(obj->maps.cgroup_storage, PARENT_CGROUP, 2))
+		goto close_bpf_object;
+	if (assert_storage(obj->maps.cgroup_storage, CHILD_CGROUP, 1))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(parent_link);
+	bpf_link__destroy(child_link);
+
+	cg_storage_multi_egress_only__destroy(obj);
+}
+
+void test_cg_storage_multi(void)
+{
+	int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+	parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+	if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+		goto close_cgroup_fd;
+	child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+	if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+		goto close_cgroup_fd;
+
+	if (test__start_subtest("egress_only"))
+		test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+	close(child_cgroup_fd);
+	close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
new file mode 100644
index 000000000000..ec0165d07105
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, __u32);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+	__u32 *ptr_cg_storage = bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(ptr_cg_storage, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
-- 
cgit 


From 9e5bd1f7633bc1c3c8b25496eedfeced6d2675ff Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 23 Jul 2020 23:47:42 -0500
Subject: selftests/bpf: Test CGROUP_STORAGE map can't be used by multiple
 progs

The current assumption is that the lifetime of a cgroup storage
is tied to the program's attachment. The storage is created in
cgroup_bpf_attach, and released upon cgroup_bpf_detach and
cgroup_bpf_release.

Because the current semantics is that each attachment gets a
completely independent cgroup storage, and you can have multiple
programs attached to the same (cgroup, attach type) pair, the key
of the CGROUP_STORAGE map, looking up the map with this pair could
yield multiple storages, and that is not permitted. Therefore,
the kernel verifier checks that two programs cannot share the same
CGROUP_STORAGE map, even if they have different expected attach
types, considering that the actual attach type does not always
have to be equal to the expected attach type.

The test creates a CGROUP_STORAGE map and make it shared across
two different programs, one cgroup_skb/egress and one /ingress.
It asserts that the two programs cannot be both loaded, due to
verifier failure from the above reason.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/30a6b0da67ae6b0296c4d511bfb19c5f3d035916.1595565795.git.zhuyifei@google.com
---
 .../selftests/bpf/prog_tests/cg_storage_multi.c    | 43 +++++++++++++++++----
 .../testing/selftests/bpf/progs/cg_storage_multi.h | 13 +++++++
 .../bpf/progs/cg_storage_multi_egress_ingress.c    | 45 ++++++++++++++++++++++
 .../bpf/progs/cg_storage_multi_egress_only.c       |  9 +++--
 4 files changed, 99 insertions(+), 11 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi.h
 create mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index e90e0547d759..1c7653423698 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -8,7 +8,10 @@
 #include <cgroup_helpers.h>
 #include <network_helpers.h>
 
+#include "progs/cg_storage_multi.h"
+
 #include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_egress_ingress.skel.h"
 
 #define PARENT_CGROUP "/cgroup_storage"
 #define CHILD_CGROUP "/cgroup_storage/child"
@@ -16,10 +19,10 @@
 static int duration;
 
 static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
-			   __u32 expected)
+			   struct cgroup_value *expected)
 {
 	struct bpf_cgroup_storage_key key = {0};
-	__u32 value;
+	struct cgroup_value value;
 	int map_fd;
 
 	map_fd = bpf_map__fd(map);
@@ -29,8 +32,8 @@ static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
 	if (CHECK(bpf_map_lookup_elem(map_fd, &key, &value) < 0,
 		  "map-lookup", "errno %d", errno))
 		return true;
-	if (CHECK(value != expected,
-		  "assert-storage", "got %u expected %u", value, expected))
+	if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+		  "assert-storage", "storages differ"))
 		return true;
 
 	return false;
@@ -39,7 +42,7 @@ static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
 static bool assert_storage_noexist(struct bpf_map *map, const char *cgroup_path)
 {
 	struct bpf_cgroup_storage_key key = {0};
-	__u32 value;
+	struct cgroup_value value;
 	int map_fd;
 
 	map_fd = bpf_map__fd(map);
@@ -86,6 +89,7 @@ out_clean:
 static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 {
 	struct cg_storage_multi_egress_only *obj;
+	struct cgroup_value expected_cgroup_value;
 	struct bpf_link *parent_link = NULL, *child_link = NULL;
 	bool err;
 
@@ -109,7 +113,9 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	if (CHECK(obj->bss->invocations != 1,
 		  "first-invoke", "invocations=%d", obj->bss->invocations))
 		goto close_bpf_object;
-	if (assert_storage(obj->maps.cgroup_storage, PARENT_CGROUP, 1))
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   PARENT_CGROUP, &expected_cgroup_value))
 		goto close_bpf_object;
 	if (assert_storage_noexist(obj->maps.cgroup_storage, CHILD_CGROUP))
 		goto close_bpf_object;
@@ -129,9 +135,13 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	if (CHECK(obj->bss->invocations != 3,
 		  "second-invoke", "invocations=%d", obj->bss->invocations))
 		goto close_bpf_object;
-	if (assert_storage(obj->maps.cgroup_storage, PARENT_CGROUP, 2))
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   PARENT_CGROUP, &expected_cgroup_value))
 		goto close_bpf_object;
-	if (assert_storage(obj->maps.cgroup_storage, CHILD_CGROUP, 1))
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   CHILD_CGROUP, &expected_cgroup_value))
 		goto close_bpf_object;
 
 close_bpf_object:
@@ -141,6 +151,20 @@ close_bpf_object:
 	cg_storage_multi_egress_only__destroy(obj);
 }
 
+static void test_egress_ingress(int parent_cgroup_fd, int child_cgroup_fd)
+{
+	struct cg_storage_multi_egress_ingress *obj;
+
+	/* Cannot load both programs due to verifier failure:
+	 * "only one cgroup storage of each type is allowed"
+	 */
+	obj = cg_storage_multi_egress_ingress__open_and_load();
+	CHECK(obj || errno != EBUSY,
+	      "skel-load", "errno %d, expected EBUSY", errno);
+
+	cg_storage_multi_egress_ingress__destroy(obj);
+}
+
 void test_cg_storage_multi(void)
 {
 	int parent_cgroup_fd = -1, child_cgroup_fd = -1;
@@ -155,6 +179,9 @@ void test_cg_storage_multi(void)
 	if (test__start_subtest("egress_only"))
 		test_egress_only(parent_cgroup_fd, child_cgroup_fd);
 
+	if (test__start_subtest("egress_ingress"))
+		test_egress_ingress(parent_cgroup_fd, child_cgroup_fd);
+
 close_cgroup_fd:
 	close(child_cgroup_fd);
 	close(parent_cgroup_fd);
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
new file mode 100644
index 000000000000..a0778fe7857a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+	__u32 egress_pkts;
+	__u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c
new file mode 100644
index 000000000000..9ce386899365
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
index ec0165d07105..44ad46b33539 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -10,10 +10,12 @@
 #include <linux/udp.h>
 #include <bpf/bpf_helpers.h>
 
+#include "progs/cg_storage_multi.h"
+
 struct {
 	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
 	__type(key, struct bpf_cgroup_storage_key);
-	__type(value, __u32);
+	__type(value, struct cgroup_value);
 } cgroup_storage SEC(".maps");
 
 __u32 invocations = 0;
@@ -21,9 +23,10 @@ __u32 invocations = 0;
 SEC("cgroup_skb/egress")
 int egress(struct __sk_buff *skb)
 {
-	__u32 *ptr_cg_storage = bpf_get_local_storage(&cgroup_storage, 0);
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
 
-	__sync_fetch_and_add(ptr_cg_storage, 1);
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
 	__sync_fetch_and_add(&invocations, 1);
 
 	return 1;
-- 
cgit 


From 3573f384014f51fd5289df0e8369b63ae7fdc244 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Thu, 23 Jul 2020 23:47:44 -0500
Subject: selftests/bpf: Test CGROUP_STORAGE behavior on shared egress +
 ingress

This mirrors the original egress-only test. The cgroup_storage is
now extended to have two packet counters, one for egress and one
for ingress. We also extend to have two egress programs to test
that egress will always share with other egress origrams in the
same cgroup. The behavior of the counters are exactly the same as
the original egress-only test.

The test is split into two, one "isolated" test that when the key
type is struct bpf_cgroup_storage_key, which contains the attach
type, programs of different attach types will see different
storages. The other, "shared" test that when the key type is u64,
programs of different attach types will see the same storage if
they are attached to the same cgroup.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/c756f5f1521227b8e6e90a453299dda722d7324d.1595565795.git.zhuyifei@google.com
---
 .../selftests/bpf/prog_tests/cg_storage_multi.c    | 265 +++++++++++++++++++--
 .../bpf/progs/cg_storage_multi_egress_ingress.c    |  45 ----
 .../bpf/progs/cg_storage_multi_isolated.c          |  57 +++++
 .../selftests/bpf/progs/cg_storage_multi_shared.c  |  57 +++++
 4 files changed, 354 insertions(+), 70 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c
 create mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
 create mode 100644 tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 1c7653423698..c67d8c076a34 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -11,25 +11,23 @@
 #include "progs/cg_storage_multi.h"
 
 #include "cg_storage_multi_egress_only.skel.h"
-#include "cg_storage_multi_egress_ingress.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
 
 #define PARENT_CGROUP "/cgroup_storage"
 #define CHILD_CGROUP "/cgroup_storage/child"
 
 static int duration;
 
-static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
+static bool assert_storage(struct bpf_map *map, const void *key,
 			   struct cgroup_value *expected)
 {
-	struct bpf_cgroup_storage_key key = {0};
 	struct cgroup_value value;
 	int map_fd;
 
 	map_fd = bpf_map__fd(map);
 
-	key.cgroup_inode_id = get_cgroup_id(cgroup_path);
-	key.attach_type = BPF_CGROUP_INET_EGRESS;
-	if (CHECK(bpf_map_lookup_elem(map_fd, &key, &value) < 0,
+	if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
 		  "map-lookup", "errno %d", errno))
 		return true;
 	if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
@@ -39,17 +37,14 @@ static bool assert_storage(struct bpf_map *map, const char *cgroup_path,
 	return false;
 }
 
-static bool assert_storage_noexist(struct bpf_map *map, const char *cgroup_path)
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
 {
-	struct bpf_cgroup_storage_key key = {0};
 	struct cgroup_value value;
 	int map_fd;
 
 	map_fd = bpf_map__fd(map);
 
-	key.cgroup_inode_id = get_cgroup_id(cgroup_path);
-	key.attach_type = BPF_CGROUP_INET_EGRESS;
-	if (CHECK(bpf_map_lookup_elem(map_fd, &key, &value) == 0,
+	if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
 		  "map-lookup", "succeeded, expected ENOENT"))
 		return true;
 	if (CHECK(errno != ENOENT,
@@ -90,9 +85,12 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 {
 	struct cg_storage_multi_egress_only *obj;
 	struct cgroup_value expected_cgroup_value;
+	struct bpf_cgroup_storage_key key;
 	struct bpf_link *parent_link = NULL, *child_link = NULL;
 	bool err;
 
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+
 	obj = cg_storage_multi_egress_only__open_and_load();
 	if (CHECK(!obj, "skel-load", "errno %d", errno))
 		return;
@@ -113,11 +111,13 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	if (CHECK(obj->bss->invocations != 1,
 		  "first-invoke", "invocations=%d", obj->bss->invocations))
 		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
 	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
 	if (assert_storage(obj->maps.cgroup_storage,
-			   PARENT_CGROUP, &expected_cgroup_value))
+			   &key, &expected_cgroup_value))
 		goto close_bpf_object;
-	if (assert_storage_noexist(obj->maps.cgroup_storage, CHILD_CGROUP))
+	key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+	if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
 		goto close_bpf_object;
 
 	/* Attach to parent and child cgroup, trigger packet from child.
@@ -135,13 +135,15 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 	if (CHECK(obj->bss->invocations != 3,
 		  "second-invoke", "invocations=%d", obj->bss->invocations))
 		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
 	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
 	if (assert_storage(obj->maps.cgroup_storage,
-			   PARENT_CGROUP, &expected_cgroup_value))
+			   &key, &expected_cgroup_value))
 		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
 	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
 	if (assert_storage(obj->maps.cgroup_storage,
-			   CHILD_CGROUP, &expected_cgroup_value))
+			   &key, &expected_cgroup_value))
 		goto close_bpf_object;
 
 close_bpf_object:
@@ -151,18 +153,228 @@ close_bpf_object:
 	cg_storage_multi_egress_only__destroy(obj);
 }
 
-static void test_egress_ingress(int parent_cgroup_fd, int child_cgroup_fd)
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
 {
-	struct cg_storage_multi_egress_ingress *obj;
+	struct cg_storage_multi_isolated *obj;
+	struct cgroup_value expected_cgroup_value;
+	struct bpf_cgroup_storage_key key;
+	struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+	struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+	struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+	bool err;
 
-	/* Cannot load both programs due to verifier failure:
-	 * "only one cgroup storage of each type is allowed"
+	obj = cg_storage_multi_isolated__open_and_load();
+	if (CHECK(!obj, "skel-load", "errno %d", errno))
+		return;
+
+	/* Attach to parent cgroup, trigger packet from child.
+	 * Assert that there is three runs, two with parent cgroup egress and
+	 * one with parent cgroup ingress, stored in separate parent storages.
+	 * Also assert that child cgroup's storages does not exist
 	 */
-	obj = cg_storage_multi_egress_ingress__open_and_load();
-	CHECK(obj || errno != EBUSY,
-	      "skel-load", "errno %d, expected EBUSY", errno);
+	parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+		  "err %ld", PTR_ERR(parent_egress1_link)))
+		goto close_bpf_object;
+	parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+		  "err %ld", PTR_ERR(parent_egress2_link)))
+		goto close_bpf_object;
+	parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+		  "err %ld", PTR_ERR(parent_ingress_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "first-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 3,
+		  "first-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key.attach_type = BPF_CGROUP_INET_INGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+		goto close_bpf_object;
+	key.attach_type = BPF_CGROUP_INET_INGRESS;
+	if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+		goto close_bpf_object;
 
-	cg_storage_multi_egress_ingress__destroy(obj);
+	/* Attach to parent and child cgroup, trigger packet from child.
+	 * Assert that there is six additional runs, parent cgroup egresses and
+	 * ingress, child cgroup egresses and ingress.
+	 * Assert that egree and ingress storages are separate.
+	 */
+	child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+		  "err %ld", PTR_ERR(child_egress1_link)))
+		goto close_bpf_object;
+	child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+		  "err %ld", PTR_ERR(child_egress2_link)))
+		goto close_bpf_object;
+	child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+		  "err %ld", PTR_ERR(child_ingress_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "second-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 9,
+		  "second-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key.attach_type = BPF_CGROUP_INET_INGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+	key.attach_type = BPF_CGROUP_INET_EGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key.attach_type = BPF_CGROUP_INET_INGRESS;
+	expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(parent_egress1_link);
+	bpf_link__destroy(parent_egress2_link);
+	bpf_link__destroy(parent_ingress_link);
+	bpf_link__destroy(child_egress1_link);
+	bpf_link__destroy(child_egress2_link);
+	bpf_link__destroy(child_ingress_link);
+
+	cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+	struct cg_storage_multi_shared *obj;
+	struct cgroup_value expected_cgroup_value;
+	__u64 key;
+	struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+	struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+	struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+	bool err;
+
+	obj = cg_storage_multi_shared__open_and_load();
+	if (CHECK(!obj, "skel-load", "errno %d", errno))
+		return;
+
+	/* Attach to parent cgroup, trigger packet from child.
+	 * Assert that there is three runs, two with parent cgroup egress and
+	 * one with parent cgroup ingress.
+	 * Also assert that child cgroup's storage does not exist
+	 */
+	parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
+		  "err %ld", PTR_ERR(parent_egress1_link)))
+		goto close_bpf_object;
+	parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
+		  "err %ld", PTR_ERR(parent_egress2_link)))
+		goto close_bpf_object;
+	parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+							 parent_cgroup_fd);
+	if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
+		  "err %ld", PTR_ERR(parent_ingress_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "first-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 3,
+		  "first-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	key = get_cgroup_id(PARENT_CGROUP);
+	expected_cgroup_value = (struct cgroup_value) {
+		.egress_pkts = 2,
+		.ingress_pkts = 1,
+	};
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key = get_cgroup_id(CHILD_CGROUP);
+	if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+		goto close_bpf_object;
+
+	/* Attach to parent and child cgroup, trigger packet from child.
+	 * Assert that there is six additional runs, parent cgroup egresses and
+	 * ingress, child cgroup egresses and ingress.
+	 */
+	child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
+		  "err %ld", PTR_ERR(child_egress1_link)))
+		goto close_bpf_object;
+	child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
+		  "err %ld", PTR_ERR(child_egress2_link)))
+		goto close_bpf_object;
+	child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+							child_cgroup_fd);
+	if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
+		  "err %ld", PTR_ERR(child_ingress_link)))
+		goto close_bpf_object;
+	err = connect_send(CHILD_CGROUP);
+	if (CHECK(err, "second-connect-send", "errno %d", errno))
+		goto close_bpf_object;
+	if (CHECK(obj->bss->invocations != 9,
+		  "second-invoke", "invocations=%d", obj->bss->invocations))
+		goto close_bpf_object;
+	key = get_cgroup_id(PARENT_CGROUP);
+	expected_cgroup_value = (struct cgroup_value) {
+		.egress_pkts = 4,
+		.ingress_pkts = 2,
+	};
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+	key = get_cgroup_id(CHILD_CGROUP);
+	expected_cgroup_value = (struct cgroup_value) {
+		.egress_pkts = 2,
+		.ingress_pkts = 1,
+	};
+	if (assert_storage(obj->maps.cgroup_storage,
+			   &key, &expected_cgroup_value))
+		goto close_bpf_object;
+
+close_bpf_object:
+	bpf_link__destroy(parent_egress1_link);
+	bpf_link__destroy(parent_egress2_link);
+	bpf_link__destroy(parent_ingress_link);
+	bpf_link__destroy(child_egress1_link);
+	bpf_link__destroy(child_egress2_link);
+	bpf_link__destroy(child_ingress_link);
+
+	cg_storage_multi_shared__destroy(obj);
 }
 
 void test_cg_storage_multi(void)
@@ -179,8 +391,11 @@ void test_cg_storage_multi(void)
 	if (test__start_subtest("egress_only"))
 		test_egress_only(parent_cgroup_fd, child_cgroup_fd);
 
-	if (test__start_subtest("egress_ingress"))
-		test_egress_ingress(parent_cgroup_fd, child_cgroup_fd);
+	if (test__start_subtest("isolated"))
+		test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+	if (test__start_subtest("shared"))
+		test_shared(parent_cgroup_fd, child_cgroup_fd);
 
 close_cgroup_fd:
 	close(child_cgroup_fd);
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c
deleted file mode 100644
index 9ce386899365..000000000000
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_ingress.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-/*
- * Copyright 2020 Google LLC.
- */
-
-#include <errno.h>
-#include <linux/bpf.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <bpf/bpf_helpers.h>
-
-#include "progs/cg_storage_multi.h"
-
-struct {
-	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
-	__type(key, struct bpf_cgroup_storage_key);
-	__type(value, struct cgroup_value);
-} cgroup_storage SEC(".maps");
-
-__u32 invocations = 0;
-
-SEC("cgroup_skb/egress")
-int egress(struct __sk_buff *skb)
-{
-	struct cgroup_value *ptr_cg_storage =
-		bpf_get_local_storage(&cgroup_storage, 0);
-
-	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
-	__sync_fetch_and_add(&invocations, 1);
-
-	return 1;
-}
-
-SEC("cgroup_skb/ingress")
-int ingress(struct __sk_buff *skb)
-{
-	struct cgroup_value *ptr_cg_storage =
-		bpf_get_local_storage(&cgroup_storage, 0);
-
-	__sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
-	__sync_fetch_and_add(&invocations, 1);
-
-	return 1;
-}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
new file mode 100644
index 000000000000..a25373002055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
new file mode 100644
index 000000000000..a149f33bc533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, __u64);
+	__type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress/1")
+int egress1(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup_skb/egress/2")
+int egress2(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+	struct cgroup_value *ptr_cg_storage =
+		bpf_get_local_storage(&cgroup_storage, 0);
+
+	__sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+	__sync_fetch_and_add(&invocations, 1);
+
+	return 1;
+}
-- 
cgit 


From dc8698cac7aada9b61a612cb819341d84591163e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 21 Jul 2020 23:46:00 -0700
Subject: libbpf: Add support for BPF XDP link

Sync UAPI header and add support for using bpf_link-based XDP attachment.
Make xdp/ prog type set expected attach type. Kernel didn't enforce
attach_type for XDP programs before, so there is no backwards compatiblity
issues there.

Also fix section_names selftest to recognize that xdp prog types now have
expected attach type.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200722064603.3350758-8-andriin@fb.com
---
 tools/include/uapi/linux/bpf.h                         | 10 +++++++++-
 tools/lib/bpf/libbpf.c                                 |  9 ++++++++-
 tools/lib/bpf/libbpf.h                                 |  2 ++
 tools/lib/bpf/libbpf.map                               |  1 +
 tools/testing/selftests/bpf/prog_tests/section_names.c |  2 +-
 5 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 828c2f6438f2..e1ba4ae6a916 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -230,6 +230,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_SOCK_RELEASE,
 	BPF_XDP_CPUMAP,
 	BPF_SK_LOOKUP,
+	BPF_XDP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -242,6 +243,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_CGROUP = 3,
 	BPF_LINK_TYPE_ITER = 4,
 	BPF_LINK_TYPE_NETNS = 5,
+	BPF_LINK_TYPE_XDP = 6,
 
 	MAX_BPF_LINK_TYPE,
 };
@@ -614,7 +616,10 @@ union bpf_attr {
 
 	struct { /* struct used by BPF_LINK_CREATE command */
 		__u32		prog_fd;	/* eBPF program to attach */
-		__u32		target_fd;	/* object to attach to */
+		union {
+			__u32		target_fd;	/* object to attach to */
+			__u32		target_ifindex; /* target ifindex */
+		};
 		__u32		attach_type;	/* attach type */
 		__u32		flags;		/* extra flags */
 	} link_create;
@@ -4064,6 +4069,9 @@ struct bpf_link_info {
 			__u32 netns_ino;
 			__u32 attach_type;
 		} netns;
+		struct {
+			__u32 ifindex;
+		} xdp;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e51479d60285..54830d603fee 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6915,7 +6915,8 @@ static const struct bpf_sec_def section_defs[] = {
 						BPF_XDP_DEVMAP),
 	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,
 						BPF_XDP_CPUMAP),
-	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
+	BPF_EAPROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
+						BPF_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
 	BPF_PROG_SEC("lwt_out",			BPF_PROG_TYPE_LWT_OUT),
@@ -8281,6 +8282,12 @@ bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
 	return bpf_program__attach_fd(prog, netns_fd, "netns");
 }
 
+struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+{
+	/* target_fd/target_ifindex use the same field in LINK_CREATE */
+	return bpf_program__attach_fd(prog, ifindex, "xdp");
+}
+
 struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c6813791fa7e..9924385462ab 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -257,6 +257,8 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
 LIBBPF_API struct bpf_link *
 bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
 
 struct bpf_map;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 6f0856abe299..ca49a6a7e5b2 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -286,6 +286,7 @@ LIBBPF_0.1.0 {
 		bpf_map__set_value_size;
 		bpf_map__type;
 		bpf_map__value_size;
+		bpf_program__attach_xdp;
 		bpf_program__autoload;
 		bpf_program__is_sk_lookup;
 		bpf_program__set_autoload;
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 713167449c98..8b571890c57e 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -35,7 +35,7 @@ static struct sec_name_test tests[] = {
 		{-EINVAL, 0},
 	},
 	{"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
-	{"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+	{"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} },
 	{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
 	{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
 	{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
-- 
cgit 


From fe48230cf2ae27c9e3b96d29908e22e2926fd1ab Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 21 Jul 2020 23:46:01 -0700
Subject: selftests/bpf: Add BPF XDP link selftests

Add selftest validating all the attachment logic around BPF XDP link. Test
also link updates and get_obj_info() APIs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200722064603.3350758-9-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/xdp_link.c | 137 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_xdp_link.c |  12 ++
 2 files changed, 149 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_link.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_link.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
new file mode 100644
index 000000000000..52cba6795d40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+#include "test_xdp_link.skel.h"
+
+#define IFINDEX_LO 1
+
+void test_xdp_link(void)
+{
+	__u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
+	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
+	struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+	struct bpf_link_info link_info;
+	struct bpf_prog_info prog_info;
+	struct bpf_link *link;
+	__u32 link_info_len = sizeof(link_info);
+	__u32 prog_info_len = sizeof(prog_info);
+
+	skel1 = test_xdp_link__open_and_load();
+	if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n"))
+		goto cleanup;
+	prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
+
+	skel2 = test_xdp_link__open_and_load();
+	if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n"))
+		goto cleanup;
+	prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
+
+	memset(&prog_info, 0, sizeof(prog_info));
+	err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+	if (CHECK(err, "fd_info1", "failed %d\n", -errno))
+		goto cleanup;
+	id1 = prog_info.id;
+
+	memset(&prog_info, 0, sizeof(prog_info));
+	err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+	if (CHECK(err, "fd_info2", "failed %d\n", -errno))
+		goto cleanup;
+	id2 = prog_info.id;
+
+	/* set initial prog attachment */
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+	if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err))
+		goto cleanup;
+
+	/* validate prog ID */
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	CHECK(err || id0 != id1, "id1_check",
+	      "loaded prog id %u != id1 %u, err %d", id0, id1, err);
+
+	/* BPF link is not allowed to replace prog attachment */
+	link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+	if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+		bpf_link__destroy(link);
+		/* best-effort detach prog */
+		opts.old_fd = prog_fd1;
+		bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+		goto cleanup;
+	}
+
+	/* detach BPF program */
+	opts.old_fd = prog_fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts);
+	if (CHECK(err, "prog_detach", "failed %d\n", err))
+		goto cleanup;
+
+	/* now BPF link should attach successfully */
+	link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+	if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	skel1->links.xdp_handler = link;
+
+	/* validate prog ID */
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	if (CHECK(err || id0 != id1, "id1_check",
+		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+		goto cleanup;
+
+	/* BPF prog attach is not allowed to replace BPF link */
+	opts.old_fd = prog_fd1;
+	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+	if (CHECK(!err, "prog_attach_fail", "unexpected success\n"))
+		goto cleanup;
+
+	/* Can't force-update when BPF link is active */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0);
+	if (CHECK(!err, "prog_update_fail", "unexpected success\n"))
+		goto cleanup;
+
+	/* Can't force-detach when BPF link is active */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+	if (CHECK(!err, "prog_detach_fail", "unexpected success\n"))
+		goto cleanup;
+
+	/* BPF link is not allowed to replace another BPF link */
+	link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+	if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+		bpf_link__destroy(link);
+		goto cleanup;
+	}
+
+	bpf_link__destroy(skel1->links.xdp_handler);
+	skel1->links.xdp_handler = NULL;
+
+	/* new link attach should succeed */
+	link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+	if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	skel2->links.xdp_handler = link;
+
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	if (CHECK(err || id0 != id2, "id2_check",
+		  "loaded prog id %u != id2 %u, err %d", id0, id1, err))
+		goto cleanup;
+
+	/* updating program under active BPF link works as expected */
+	err = bpf_link__update_program(link, skel1->progs.xdp_handler);
+	if (CHECK(err, "link_upd", "failed: %d\n", err))
+		goto cleanup;
+
+	memset(&link_info, 0, sizeof(link_info));
+	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+	if (CHECK(err, "link_info", "failed: %d\n", err))
+		goto cleanup;
+
+	CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
+	      "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
+	CHECK(link_info.prog_id != id1, "link_prog_id",
+	      "got %u != exp %u\n", link_info.prog_id, id1);
+	CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
+	      "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+
+cleanup:
+	test_xdp_link__destroy(skel1);
+	test_xdp_link__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
new file mode 100644
index 000000000000..eb93ea95d1d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+SEC("xdp/handler")
+int xdp_handler(struct xdp_md *xdp)
+{
+	return 0;
+}
-- 
cgit 


From 70cfab1d871c771bd1963dd898f4f9b2731590c2 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 24 Jul 2020 10:06:17 +0100
Subject: tools, bpftool: Skip type probe if name is not found

For probing program and map types, bpftool loops on type values and uses
the relevant type name in prog_type_name[] or map_type_name[]. To ensure
the name exists, we exit from the loop if we go over the size of the
array.

However, this is not enough in the case where the arrays have "holes" in
them, program or map types for which they have no name, but not at the
end of the list. This is currently the case for BPF_PROG_TYPE_LSM, not
known to bpftool and which name is a null string. When probing for
features, bpftool attempts to strlen() that name and segfaults.

Let's fix it by skipping probes for "unknown" program and map types,
with an informational message giving the numeral value in that case.

Fixes: 93a3545d812a ("tools/bpftool: Add name mappings for SK_LOOKUP prog and attach type")
Reported-by: Paul Chaignon <paul@cilium.io>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200724090618.16378-2-quentin@isovalent.com
---
 tools/bpf/bpftool/feature.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 1cd75807673e..a43a6f10b564 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -504,6 +504,10 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
 
 	supported_types[prog_type] |= res;
 
+	if (!prog_type_name[prog_type]) {
+		p_info("program type name not found (type %d)", prog_type);
+		return;
+	}
 	maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
 	if (strlen(prog_type_name[prog_type]) > maxlen) {
 		p_info("program type name too long");
@@ -533,6 +537,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
 	 * check required for unprivileged users
 	 */
 
+	if (!map_type_name[map_type]) {
+		p_info("map type name not found (type %d)", map_type);
+		return;
+	}
 	maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
 	if (strlen(map_type_name[map_type]) > maxlen) {
 		p_info("map type name too long");
-- 
cgit 


From 9a97c9d2af5ca798377342debf7f0f44281d050e Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 24 Jul 2020 10:06:18 +0100
Subject: tools, bpftool: Add LSM type to array of prog names

Assign "lsm" as a printed name for BPF_PROG_TYPE_LSM in bpftool, so that
it can use it when listing programs loaded on the system or when probing
features.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200724090618.16378-3-quentin@isovalent.com
---
 tools/bpf/bpftool/prog.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3e6ecc6332e2..158995d853b0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -59,6 +59,7 @@ const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_TRACING]			= "tracing",
 	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_PROG_TYPE_EXT]			= "ext",
+	[BPF_PROG_TYPE_LSM]			= "lsm",
 	[BPF_PROG_TYPE_SK_LOOKUP]		= "sk_lookup",
 };
 
-- 
cgit 


From e1613b5714ee6c186c9628e9958edf65e9d9cddd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 27 Jul 2020 15:47:15 -0700
Subject: bpf: Fix bpf_ringbuf_output() signature to return long

Due to bpf tree fix merge, bpf_ringbuf_output() signature ended up with int as
a return type, while all other helpers got converted to returning long. So fix
it in bpf-next now.

Fixes: b0659d8a950d ("bpf: Fix definition of bpf_ringbuf_output() helper in UAPI comments")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200727224715.652037-1-andriin@fb.com
---
 tools/include/uapi/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e1ba4ae6a916..eb5e0c38eb2c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3241,7 +3241,7 @@ union bpf_attr {
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
  *
- * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
  * 	Description
  * 		Copy *size* bytes from *data* into a ring buffer *ringbuf*.
  * 		If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
-- 
cgit 


From 363885d7c62e293fb093c7c355bf5f05fa0a25a9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 27 Jul 2020 16:33:45 -0700
Subject: selftests/bpf: Add new bpf_iter context structs to fix build on old
 kernels

Add bpf_iter__bpf_map_elem and bpf_iter__bpf_sk_storage_map to bpf_iter.h.

Fixes: 3b1c420bd882 ("selftests/bpf: Add a test for bpf sk_storage_map iterator")
Fixes: 2a7c2fff7dd6 ("selftests/bpf: Add test for bpf hash map iterators")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200727233345.1686358-1-andriin@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 17db3bac518b..c196280df90d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -11,6 +11,8 @@
 #define tcp6_sock tcp6_sock___not_used
 #define bpf_iter__udp bpf_iter__udp___not_used
 #define udp6_sock udp6_sock___not_used
+#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
+#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -22,6 +24,8 @@
 #undef tcp6_sock
 #undef bpf_iter__udp
 #undef udp6_sock
+#undef bpf_iter__bpf_map_elem
+#undef bpf_iter__bpf_sk_storage_map
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -78,3 +82,17 @@ struct udp6_sock {
 	struct udp_sock	udp;
 	struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map_elem {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+	void *key;
+	void *value;
+};
+
+struct bpf_iter__bpf_sk_storage_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+	struct sock *sk;
+	void *value;
+};
-- 
cgit 


From ca5cd355b7f0372da0d50fce5b12a3367e417290 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 28 Jul 2020 07:39:02 -0700
Subject: bpf, selftests: use :: 1 for localhost in tcp_server.py

Using localhost requires the host to have a /etc/hosts file with that
specific line in it. By default my dev box did not, they used
ip6-localhost, so the test was failing. To fix remove the need for any
/etc/hosts and use ::1.

I could just add the line, but this seems easier.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159594714197.21431.10113693935099326445.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/tcp_client.py | 2 +-
 tools/testing/selftests/bpf/tcp_server.py | 2 +-
 tools/testing/selftests/bpf/test_netcnt.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
index a53ed58528d6..bfff82be3fc1 100755
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -34,7 +34,7 @@ serverPort = int(sys.argv[1])
 # create active socket
 sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
 try:
-    sock.connect(('localhost', serverPort))
+    sock.connect(('::1', serverPort))
 except socket.error as e:
     sys.exit(1)
 
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
index 0ca60d193bed..42ab8882f00f 100755
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -38,7 +38,7 @@ serverSocket = None
 # create passive socket
 serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
 
-try: serverSocket.bind(('localhost', 0))
+try: serverSocket.bind(('::1', 0))
 except socket.error as msg:
     print('bind fails: ' + str(msg))
 
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index c1da5404454a..7a68c9069639 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -82,9 +82,9 @@ int main(int argc, char **argv)
 	}
 
 	if (system("which ping6 &>/dev/null") == 0)
-		assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+		assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
 	else
-		assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
+		assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
 
 	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
 			   &prog_cnt)) {
-- 
cgit 


From 12e6196fb15953605be54ac9320ac54371aecab7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 28 Jul 2020 15:18:01 -0700
Subject: selftests/bpf: Test bpf_iter buffer access with negative offset

Commit afbf21dce668 ("bpf: Support readonly/readwrite buffers
in verifier") added readonly/readwrite buffer support which
is currently used by bpf_iter tracing programs. It has
a bug with incorrect parameter ordering which later fixed
by Commit f6dfbe31e8fa ("bpf: Fix swapped arguments in calls
to check_buffer_access").

This patch added a test case with a negative offset access
which will trigger the error path.

Without Commit f6dfbe31e8fa, running the test case in the patch,
the error message looks like:
   R1_w=rdwr_buf(id=0,off=0,imm=0) R10=fp0
  ; value_sum += *(__u32 *)(value - 4);
  2: (61) r1 = *(u32 *)(r1 -4)
  R1 invalid (null) buffer access: off=-4, size=4

With the above commit, the error message looks like:
   R1_w=rdwr_buf(id=0,off=0,imm=0) R10=fp0
  ; value_sum += *(__u32 *)(value - 4);
  2: (61) r1 = *(u32 *)(r1 -4)
  R1 invalid rdwr buffer access: off=-4, size=4

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200728221801.1090406-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c   | 13 +++++++++++++
 .../selftests/bpf/progs/bpf_iter_test_kern6.c       | 21 +++++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index d95de80b1851..4ffefdc1130f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -21,6 +21,7 @@
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
+#include "bpf_iter_test_kern6.skel.h"
 
 static int duration;
 
@@ -885,6 +886,16 @@ static void test_rdonly_buf_out_of_bound(void)
 	bpf_iter_test_kern5__destroy(skel);
 }
 
+static void test_buf_neg_offset(void)
+{
+	struct bpf_iter_test_kern6 *skel;
+
+	skel = bpf_iter_test_kern6__open_and_load();
+	if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
+		  "skeleton open_and_load unexpected success\n"))
+		bpf_iter_test_kern6__destroy(skel);
+}
+
 void test_bpf_iter(void)
 {
 	if (test__start_subtest("btf_id_or_null"))
@@ -933,4 +944,6 @@ void test_bpf_iter(void)
 		test_bpf_sk_storage_map();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
+	if (test__start_subtest("buf-neg-offset"))
+		test_buf_neg_offset();
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
new file mode 100644
index 000000000000..1c7304f56b1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+	void *value = ctx->value;
+
+	if (value == (void *)0)
+		return 0;
+
+	/* negative offset, verifier failure. */
+	value_sum += *(__u32 *)(value - 4);
+	return 0;
+}
-- 
cgit 


From 4fb5f94911405b6d2645d4384c2ae1215bfc6a76 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 28 Jul 2020 17:31:04 -0700
Subject: selftests/bpf: Verify socket storage in cgroup/sock_{create, release}

Augment udp_limit test to set and verify socket storage value.
That should be enough to exercise the changes from the previous
patch.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200729003104.1280813-2-sdf@google.com
---
 tools/testing/selftests/bpf/progs/udp_limit.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
index 8429b22525a7..165e3c2dd9a3 100644
--- a/tools/testing/selftests/bpf/progs/udp_limit.c
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -6,14 +6,28 @@
 
 int invocations = 0, in_use = 0;
 
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_map SEC(".maps");
+
 SEC("cgroup/sock_create")
 int sock(struct bpf_sock *ctx)
 {
+	int *sk_storage;
 	__u32 key;
 
 	if (ctx->type != SOCK_DGRAM)
 		return 1;
 
+	sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0,
+					BPF_SK_STORAGE_GET_F_CREATE);
+	if (!sk_storage)
+		return 0;
+	*sk_storage = 0xdeadbeef;
+
 	__sync_fetch_and_add(&invocations, 1);
 
 	if (in_use > 0) {
@@ -31,11 +45,16 @@ int sock(struct bpf_sock *ctx)
 SEC("cgroup/sock_release")
 int sock_release(struct bpf_sock *ctx)
 {
+	int *sk_storage;
 	__u32 key;
 
 	if (ctx->type != SOCK_DGRAM)
 		return 1;
 
+	sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0);
+	if (!sk_storage || *sk_storage != 0xdeadbeef)
+		return 0;
+
 	__sync_fetch_and_add(&invocations, 1);
 	__sync_fetch_and_add(&in_use, -1);
 	return 1;
-- 
cgit 


From dfdb0d93e5bc351af5b286ae9c630d3cf869b810 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Jul 2020 16:56:58 +0800
Subject: selftests/bpf: Add xdpdrv mode for test_xdp_redirect

This patch add xdpdrv mode for test_xdp_redirect.sh since veth has support
native mode. After update here is the test result:

  # ./test_xdp_redirect.sh
  selftests: test_xdp_redirect xdpgeneric [PASS]
  selftests: test_xdp_redirect xdpdrv [PASS]

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: William Tu <u9012063@gmail.com>
Link: https://lore.kernel.org/bpf/20200729085658.403794-1-liuhangbin@gmail.com
---
 tools/testing/selftests/bpf/test_xdp_redirect.sh | 84 +++++++++++++++---------
 1 file changed, 52 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c4b17e08d431..dd80f0c84afb 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -10,52 +10,72 @@
 #     | xdp forwarding |
 #     ------------------
 
-cleanup()
+ret=0
+
+setup()
 {
-	if [ "$?" = "0" ]; then
-		echo "selftests: test_xdp_redirect [PASS]";
-	else
-		echo "selftests: test_xdp_redirect [FAILED]";
-	fi
 
-	set +e
+	local xdpmode=$1
+
+	ip netns add ns1
+	ip netns add ns2
+
+	ip link add veth1 index 111 type veth peer name veth11 netns ns1
+	ip link add veth2 index 222 type veth peer name veth22 netns ns2
+
+	ip link set veth1 up
+	ip link set veth2 up
+	ip -n ns1 link set dev veth11 up
+	ip -n ns2 link set dev veth22 up
+
+	ip -n ns1 addr add 10.1.1.11/24 dev veth11
+	ip -n ns2 addr add 10.1.1.22/24 dev veth22
+}
+
+cleanup()
+{
 	ip link del veth1 2> /dev/null
 	ip link del veth2 2> /dev/null
 	ip netns del ns1 2> /dev/null
 	ip netns del ns2 2> /dev/null
 }
 
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
-	echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
-	exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
+test_xdp_redirect()
+{
+	local xdpmode=$1
 
-trap cleanup 0 2 3 6 9
+	setup
 
-ip link add veth1 index 111 type veth peer name veth11
-ip link add veth2 index 222 type veth peer name veth22
+	ip link set dev veth1 $xdpmode off &> /dev/null
+	if [ $? -ne 0 ];then
+		echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
+		return 0
+	fi
 
-ip link set veth11 netns ns1
-ip link set veth22 netns ns2
+	ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+	ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+	ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
+	ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
 
-ip link set veth1 up
-ip link set veth2 up
+	ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null
+	local ret1=$?
+	ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null
+	local ret2=$?
 
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+	if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then
+		echo "selftests: test_xdp_redirect $xdpmode [PASS]";
+	else
+		ret=1
+		echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
+	fi
 
-ip netns exec ns1 ip link set dev veth11 up
-ip netns exec ns2 ip link set dev veth22 up
+	cleanup
+}
 
-ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
-ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+set -e
+trap cleanup 2 3 6 9
 
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+test_xdp_redirect xdpgeneric
+test_xdp_redirect xdpdrv
 
-exit 0
+exit $ret
-- 
cgit 


From 80546ac4586c0bd326aa7ce80f076646db02bcd0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Jul 2020 21:50:56 -0700
Subject: selftests/bpf: Don't destroy failed link

Check that link is NULL or proper pointer before invoking bpf_link__destroy().
Not doing this causes crash in test_progs, when cg_storage_multi selftest
fails.

Fixes: 3573f384014f ("selftests/bpf: Test CGROUP_STORAGE behavior on shared egress + ingress")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200729045056.3363921-1-andriin@fb.com
---
 .../selftests/bpf/prog_tests/cg_storage_multi.c    | 42 ++++++++++++++--------
 1 file changed, 28 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index c67d8c076a34..643dfa35419c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -147,8 +147,10 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	bpf_link__destroy(parent_link);
-	bpf_link__destroy(child_link);
+	if (!IS_ERR(parent_link))
+		bpf_link__destroy(parent_link);
+	if (!IS_ERR(child_link))
+		bpf_link__destroy(child_link);
 
 	cg_storage_multi_egress_only__destroy(obj);
 }
@@ -262,12 +264,18 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	bpf_link__destroy(parent_egress1_link);
-	bpf_link__destroy(parent_egress2_link);
-	bpf_link__destroy(parent_ingress_link);
-	bpf_link__destroy(child_egress1_link);
-	bpf_link__destroy(child_egress2_link);
-	bpf_link__destroy(child_ingress_link);
+	if (!IS_ERR(parent_egress1_link))
+		bpf_link__destroy(parent_egress1_link);
+	if (!IS_ERR(parent_egress2_link))
+		bpf_link__destroy(parent_egress2_link);
+	if (!IS_ERR(parent_ingress_link))
+		bpf_link__destroy(parent_ingress_link);
+	if (!IS_ERR(child_egress1_link))
+		bpf_link__destroy(child_egress1_link);
+	if (!IS_ERR(child_egress2_link))
+		bpf_link__destroy(child_egress2_link);
+	if (!IS_ERR(child_ingress_link))
+		bpf_link__destroy(child_ingress_link);
 
 	cg_storage_multi_isolated__destroy(obj);
 }
@@ -367,12 +375,18 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
 		goto close_bpf_object;
 
 close_bpf_object:
-	bpf_link__destroy(parent_egress1_link);
-	bpf_link__destroy(parent_egress2_link);
-	bpf_link__destroy(parent_ingress_link);
-	bpf_link__destroy(child_egress1_link);
-	bpf_link__destroy(child_egress2_link);
-	bpf_link__destroy(child_ingress_link);
+	if (!IS_ERR(parent_egress1_link))
+		bpf_link__destroy(parent_egress1_link);
+	if (!IS_ERR(parent_egress2_link))
+		bpf_link__destroy(parent_egress2_link);
+	if (!IS_ERR(parent_ingress_link))
+		bpf_link__destroy(parent_ingress_link);
+	if (!IS_ERR(child_egress1_link))
+		bpf_link__destroy(child_egress1_link);
+	if (!IS_ERR(child_egress2_link))
+		bpf_link__destroy(child_egress2_link);
+	if (!IS_ERR(child_ingress_link))
+		bpf_link__destroy(child_ingress_link);
 
 	cg_storage_multi_shared__destroy(obj);
 }
-- 
cgit 


From a6599abdeac30063baf89df166068b20758e0e86 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 30 Jul 2020 14:53:25 +0200
Subject: selftests/bpf: Omit nodad flag when adding addresses to loopback

Setting IFA_F_NODAD flag for IPv6 addresses to add to loopback is
unnecessary. Duplicate Address Detection does not happen on loopback
device.

Also, passing 'nodad' flag to 'ip address' breaks libbpf CI, which runs in
an environment with BusyBox implementation of 'ip' command, that doesn't
understand this flag.

Fixes: 0ab5539f8584 ("selftests/bpf: Tests for BPF_SK_LOOKUP attach point")
Reported-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Andrii Nakryiko <andrii@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200730125325.1869363-1-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9bbd2b2b7630..379da6f10ee9 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -1290,8 +1290,8 @@ static void run_tests(struct test_sk_lookup *skel)
 static int switch_netns(void)
 {
 	static const char * const setup_script[] = {
-		"ip -6 addr add dev lo " EXT_IP6 "/128 nodad",
-		"ip -6 addr add dev lo " INT_IP6 "/128 nodad",
+		"ip -6 addr add dev lo " EXT_IP6 "/128",
+		"ip -6 addr add dev lo " INT_IP6 "/128",
 		"ip link set dev lo up",
 		NULL,
 	};
-- 
cgit 


From 50450fc716c1a570ee8d8bfe198ef5d3cfca36e4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 Jul 2020 16:21:48 -0700
Subject: libbpf: Make destructors more robust by handling ERR_PTR(err) cases

Most of libbpf "constructors" on failure return ERR_PTR(err) result encoded as
a pointer. It's a common mistake to eventually pass such malformed pointers
into xxx__destroy()/xxx__free() "destructors". So instead of fixing up
clean up code in selftests and user programs, handle such error pointers in
destructors themselves. This works beautifully for NULL pointers passed to
destructors, so might as well just work for error pointers.

Suggested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200729232148.896125-1-andriin@fb.com
---
 tools/lib/bpf/btf.c      | 4 ++--
 tools/lib/bpf/btf_dump.c | 2 +-
 tools/lib/bpf/libbpf.c   | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index c9e760e120dc..ded5b29965f9 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -386,7 +386,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 
 void btf__free(struct btf *btf)
 {
-	if (!btf)
+	if (IS_ERR_OR_NULL(btf))
 		return;
 
 	if (btf->fd >= 0)
@@ -1025,7 +1025,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
 
 void btf_ext__free(struct btf_ext *btf_ext)
 {
-	if (!btf_ext)
+	if (IS_ERR_OR_NULL(btf_ext))
 		return;
 	free(btf_ext->data);
 	free(btf_ext);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e1c344504cae..cf711168d34a 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -183,7 +183,7 @@ void btf_dump__free(struct btf_dump *d)
 {
 	int i, cnt;
 
-	if (!d)
+	if (IS_ERR_OR_NULL(d))
 		return;
 
 	free(d->type_states);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 54830d603fee..b9f11f854985 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6504,7 +6504,7 @@ void bpf_object__close(struct bpf_object *obj)
 {
 	size_t i;
 
-	if (!obj)
+	if (IS_ERR_OR_NULL(obj))
 		return;
 
 	if (obj->clear_priv)
@@ -7690,7 +7690,7 @@ int bpf_link__destroy(struct bpf_link *link)
 {
 	int err = 0;
 
-	if (!link)
+	if (IS_ERR_OR_NULL(link))
 		return 0;
 
 	if (!link->disconnected && link->detach)
@@ -8502,7 +8502,7 @@ void perf_buffer__free(struct perf_buffer *pb)
 {
 	int i;
 
-	if (!pb)
+	if (IS_ERR_OR_NULL(pb))
 		return;
 	if (pb->cpu_bufs) {
 		for (i = 0; i < pb->cpu_cnt; i++) {
@@ -9379,8 +9379,7 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
 	for (i = 0; i < s->prog_cnt; i++) {
 		struct bpf_link **link = s->progs[i].link;
 
-		if (!IS_ERR_OR_NULL(*link))
-			bpf_link__destroy(*link);
+		bpf_link__destroy(*link);
 		*link = NULL;
 	}
 }
-- 
cgit 


From d3239425351a219ec779488ffb125352449c4106 Mon Sep 17 00:00:00 2001
From: Jian Yang <jianyang@google.com>
Date: Mon, 27 Jul 2020 14:14:38 -0700
Subject: selftests: txtimestamp: add flag for timestamp validation tolerance.

The txtimestamp selftest sets a fixed 500us tolerance. This value was
arrived at experimentally. Some platforms have higher variances. Make
this adjustable by adding the following flag:

-t N: tolerance (usec) for timestamp validation.

Signed-off-by: Jian Yang <jianyang@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/txtimestamp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 011b0da6b033..490a8cca708a 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -64,6 +64,7 @@ static int cfg_payload_len = 10;
 static int cfg_poll_timeout = 100;
 static int cfg_delay_snd;
 static int cfg_delay_ack;
+static int cfg_delay_tolerance_usec = 500;
 static bool cfg_show_payload;
 static bool cfg_do_pktinfo;
 static bool cfg_busy_poll;
@@ -152,11 +153,12 @@ static void validate_key(int tskey, int tstype)
 
 static void validate_timestamp(struct timespec *cur, int min_delay)
 {
-	int max_delay = min_delay + 500 /* processing time upper bound */;
 	int64_t cur64, start64;
+	int max_delay;
 
 	cur64 = timespec_to_us64(cur);
 	start64 = timespec_to_us64(&ts_usr);
+	max_delay = min_delay + cfg_delay_tolerance_usec;
 
 	if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
 		fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
@@ -683,6 +685,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
 			"  -r:   use raw\n"
 			"  -R:   use raw (IP_HDRINCL)\n"
 			"  -S N: usec to sleep before reading error queue\n"
+			"  -t N: tolerance (usec) for timestamp validation\n"
 			"  -u:   use udp\n"
 			"  -v:   validate SND delay (usec)\n"
 			"  -V:   validate ACK delay (usec)\n"
@@ -697,7 +700,7 @@ static void parse_opt(int argc, char **argv)
 	int c;
 
 	while ((c = getopt(argc, argv,
-				"46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
+				"46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) {
 		switch (c) {
 		case '4':
 			do_ipv6 = 0;
@@ -760,6 +763,9 @@ static void parse_opt(int argc, char **argv)
 		case 'S':
 			cfg_sleep_usec = strtoul(optarg, NULL, 10);
 			break;
+		case 't':
+			cfg_delay_tolerance_usec = strtoul(optarg, NULL, 10);
+			break;
 		case 'u':
 			proto_count++;
 			cfg_proto = SOCK_DGRAM;
-- 
cgit 


From 1acf8f90ea7ee59006d0474275922145ac291331 Mon Sep 17 00:00:00 2001
From: Jerry Crunchtime <jerry.c.t@web.de>
Date: Fri, 31 Jul 2020 17:08:01 +0200
Subject: libbpf: Fix register in PT_REGS MIPS macros

The o32, n32 and n64 calling conventions require the return
value to be stored in $v0 which maps to $2 register, i.e.,
the register 2.

Fixes: c1932cd ("bpf: Add MIPS support to samples/bpf.")
Signed-off-by: Jerry Crunchtime <jerry.c.t@web.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/43707d31-0210-e8f0-9226-1af140907641@web.de
---
 tools/lib/bpf/bpf_tracing.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 58eceb884df3..eebf020cbe3e 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -215,7 +215,7 @@ struct pt_regs;
 #define PT_REGS_PARM5(x) ((x)->regs[8])
 #define PT_REGS_RET(x) ((x)->regs[31])
 #define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
-#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_RC(x) ((x)->regs[2])
 #define PT_REGS_SP(x) ((x)->regs[29])
 #define PT_REGS_IP(x) ((x)->cp0_epc)
 
@@ -226,7 +226,7 @@ struct pt_regs;
 #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
 #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
 #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2])
 #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
 #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
 
-- 
cgit 


From fed61c4b584c5839543fe46a2ee55e21dd1bbf80 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Jul 2020 21:25:57 +0200
Subject: selftests: mptcp: make 2nd net namespace use tcp syn cookies
 unconditionally

check we can establish connections also when syn cookies are in use.

Check that
MPTcpExtMPCapableSYNRX and MPTcpExtMPCapableACKRX increase for each
MPTCP test.

Check TcpExtSyncookiesSent and TcpExtSyncookiesRecv increase in netns2.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index c0589e071f20..57d75b7f6220 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -196,6 +196,9 @@ ip -net "$ns4" link set ns4eth3 up
 ip -net "$ns4" route add default via 10.0.3.2
 ip -net "$ns4" route add default via dead:beef:3::2
 
+# use TCP syn cookies, even if no flooding was detected.
+ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+
 set_ethtool_flags() {
 	local ns="$1"
 	local dev="$2"
@@ -407,6 +410,11 @@ do_transfer()
 		sleep 1
 	fi
 
+	local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do  echo $count;done)
+	local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do  echo $count;done)
+	local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do  echo $count;done)
+	local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do  echo $count;done)
+
 	ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
@@ -450,6 +458,45 @@ do_transfer()
 	check_transfer $cin $sout "file received by server"
 	rets=$?
 
+	local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX  | while read a count c rest ;do  echo $count;done)
+	local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX  | while read a count c rest ;do  echo $count;done)
+
+	local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do  echo $count;done)
+	local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do  echo $count;done)
+
+	expect_synrx=$((stat_synrx_last_l))
+	expect_ackrx=$((stat_ackrx_last_l))
+
+	cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
+	cookies=${cookies##*=}
+
+	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+		expect_synrx=$((stat_synrx_last_l+1))
+		expect_ackrx=$((stat_ackrx_last_l+1))
+	fi
+	if [ $cookies -eq 2 ];then
+		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+			echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance"
+		fi
+		if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+			echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance"
+		fi
+	else
+		if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+			echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed"
+		fi
+		if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+			echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed"
+		fi
+	fi
+
+	if [ $expect_synrx -ne $stat_synrx_now_l ] ;then
+		echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+	fi
+	if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then
+		echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+	fi
+
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
 		echo "$duration [ OK ]"
 		cat "$capout"
-- 
cgit 


From 00587187ad3016382bd8d3fad0f3bba0a518ab40 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Jul 2020 21:25:58 +0200
Subject: selftests: mptcp: add test cases for mptcp join tests with syn
 cookies

Also add test cases with MP_JOIN when tcp_syncookies sysctl is 2 (i.e.,
syncookies are always-on).

While at it, also print the test number and add the test number
to the pcap files that can be generated optionally.

This makes it easier to match the pcap to the test case.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 66 ++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index dd42c2f692d0..f39c1129ce5f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -72,6 +72,15 @@ reset()
 	init
 }
 
+reset_with_cookies()
+{
+	reset
+
+	for netns in "$ns1" "$ns2";do
+		ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
+	done
+}
+
 for arg in "$@"; do
 	if [ "$arg" = "-c" ]; then
 		capture=1
@@ -138,7 +147,7 @@ do_transfer()
 			capuser="-Z $SUDO_USER"
 		fi
 
-		capfile="mp_join-${listener_ns}.pcap"
+		capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
 
 		echo "Capturing traffic for test $TEST_COUNT into $capfile"
 		ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
@@ -227,7 +236,7 @@ chk_join_nr()
 	local count
 	local dump_stats
 
-	printf "%-36s %s" "$msg" "syn"
+	printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$syn_nr" ]; then
@@ -354,4 +363,57 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
 
+# single subflow, syncookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow with syn cookies" 1 1 1
+
+# multiple subflows with syn cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows with syn cookies" 2 2 2
+
+# multiple subflows limited by server
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows limited by server w cookies" 2 2 1
+
+# test signal address with cookies
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address with syn cookies" 1 1 1
+
+# test cookie with subflow and signal
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal w cookies" 2 2 2
+
+# accept and use add_addr with additional subflows
+reset_with_cookies
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflows and signal w. cookies" 3 3 3
+
 exit $ret
-- 
cgit 


From 4939b2847d26c025e2e2118744226967f239a1ac Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 31 Jul 2020 15:09:14 -0700
Subject: bpf, selftests: Use single cgroup helpers for both test_sockmap/progs

Nearly every user of cgroup helpers does the same sequence of API calls. So
push these into a single helper cgroup_setup_and_join. The cases that do
a bit of extra logic are test_progs which currently uses an env variable
to decide if it needs to setup the cgroup environment or can use an
existingi environment. And then tests that are doing cgroup tests
themselves. We skip these cases for now.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159623335418.30208.15807461815525100199.stgit@john-XPS-13-9370
---
 tools/testing/selftests/bpf/cgroup_helpers.c       | 23 ++++++++++++++++++++++
 tools/testing/selftests/bpf/cgroup_helpers.h       |  1 +
 tools/testing/selftests/bpf/get_cgroup_id_user.c   | 14 ++-----------
 tools/testing/selftests/bpf/test_cgroup_storage.c  | 17 +---------------
 tools/testing/selftests/bpf/test_dev_cgroup.c      | 15 ++------------
 tools/testing/selftests/bpf/test_netcnt.c          | 17 ++--------------
 .../selftests/bpf/test_skb_cgroup_id_user.c        |  8 +-------
 tools/testing/selftests/bpf/test_sock.c            |  8 +-------
 tools/testing/selftests/bpf/test_sock_addr.c       |  8 +-------
 tools/testing/selftests/bpf/test_sock_fields.c     | 14 +++----------
 tools/testing/selftests/bpf/test_socket_cookie.c   |  8 +-------
 tools/testing/selftests/bpf/test_sockmap.c         | 18 ++---------------
 tools/testing/selftests/bpf/test_sysctl.c          |  8 +-------
 tools/testing/selftests/bpf/test_tcpbpf_user.c     |  8 +-------
 tools/testing/selftests/bpf/test_tcpnotify_user.c  |  8 +-------
 15 files changed, 43 insertions(+), 132 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0fb910df5387..033051717ba5 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -290,3 +290,26 @@ free_mem:
 	free(fhp);
 	return ret;
 }
+
+int cgroup_setup_and_join(const char *path) {
+	int cg_fd;
+
+	if (setup_cgroup_environment()) {
+		fprintf(stderr, "Failed to setup cgroup environment\n");
+		return -EINVAL;
+	}
+
+	cg_fd = create_and_get_cgroup(path);
+	if (cg_fd < 0) {
+		fprintf(stderr, "Failed to create test cgroup\n");
+		cleanup_cgroup_environment();
+		return cg_fd;
+	}
+
+	if (join_cgroup(path)) {
+		fprintf(stderr, "Failed to join cgroup\n");
+		cleanup_cgroup_environment();
+		return -EINVAL;
+	}
+	return cg_fd;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index d64bb8957090..5fe3d88e4f0d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -9,6 +9,7 @@
 	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
 
+int cgroup_setup_and_join(const char *path);
 int create_and_get_cgroup(const char *path);
 int join_cgroup(const char *path);
 int setup_cgroup_environment(void);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e8da7b39158d..b8d6aef99db4 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -58,20 +58,10 @@ int main(int argc, char **argv)
 	int exit_code = 1;
 	char buf[256];
 
-	err = setup_cgroup_environment();
-	if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
-		  errno))
+	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+	if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
 		return 1;
 
-	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
-	if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
-		  cgroup_fd, errno))
-		goto cleanup_cgroup_env;
-
-	err = join_cgroup(TEST_CGROUP);
-	if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
-		goto cleanup_cgroup_env;
-
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
 		goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 655729004391..d946252a25bb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -74,22 +74,7 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
-	if (setup_cgroup_environment()) {
-		printf("Failed to setup cgroup environment\n");
-		goto err;
-	}
-
-	/* Create a cgroup, get fd, and join it */
-	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
-	if (cgroup_fd < 0) {
-		printf("Failed to create test cgroup\n");
-		goto err;
-	}
-
-	if (join_cgroup(TEST_CGROUP)) {
-		printf("Failed to join cgroup\n");
-		goto err;
-	}
+	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
 
 	/* Attach the bpf program */
 	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index d850fb9076b5..804dddd97d4c 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -33,21 +33,10 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
-	if (setup_cgroup_environment()) {
-		printf("Failed to load DEV_CGROUP program\n");
-		goto err;
-	}
-
-	/* Create a cgroup, get fd, and join it */
-	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
 	if (cgroup_fd < 0) {
 		printf("Failed to create test cgroup\n");
-		goto err;
-	}
-
-	if (join_cgroup(TEST_CGROUP)) {
-		printf("Failed to join cgroup\n");
-		goto err;
+		goto out;
 	}
 
 	/* Attach bpf program */
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index 7a68c9069639..a7b9a69f4fd5 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -58,22 +58,9 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
-	if (setup_cgroup_environment()) {
-		printf("Failed to load bpf program\n");
-		goto err;
-	}
-
-	/* Create a cgroup, get fd, and join it */
-	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
-	if (cgroup_fd < 0) {
-		printf("Failed to create test cgroup\n");
+	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+	if (cgroup_fd < 0)
 		goto err;
-	}
-
-	if (join_cgroup(TEST_CGROUP)) {
-		printf("Failed to join cgroup\n");
-		goto err;
-	}
 
 	/* Attach bpf program */
 	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 356351c0ac28..4a64306728ab 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -160,16 +160,10 @@ int main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cgfd = create_and_get_cgroup(CGROUP_PATH);
+	cgfd = cgroup_setup_and_join(CGROUP_PATH);
 	if (cgfd < 0)
 		goto err;
 
-	if (join_cgroup(CGROUP_PATH))
-		goto err;
-
 	if (send_packet(argv[1]))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 52bf14955797..9613f7538840 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -464,16 +464,10 @@ int main(int argc, char **argv)
 	int cgfd = -1;
 	int err = 0;
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cgfd = create_and_get_cgroup(CG_PATH);
+	cgfd = cgroup_setup_and_join(CG_PATH);
 	if (cgfd < 0)
 		goto err;
 
-	if (join_cgroup(CG_PATH))
-		goto err;
-
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 0358814c67dc..b8c72c1d9cf7 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -1638,16 +1638,10 @@ int main(int argc, char **argv)
 		exit(err);
 	}
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cgfd = create_and_get_cgroup(CG_PATH);
+	cgfd = cgroup_setup_and_join(CG_PATH);
 	if (cgfd < 0)
 		goto err;
 
-	if (join_cgroup(CG_PATH))
-		goto err;
-
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index f0fc103261a4..6c9f269c396d 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -421,19 +421,11 @@ int main(int argc, char **argv)
 	struct bpf_object *obj;
 	struct bpf_map *map;
 
-	err = setup_cgroup_environment();
-	CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
-	      err, errno);
-
-	atexit(cleanup_cgroup_environment);
-
 	/* Create a cgroup, get fd, and join it */
-	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
-	CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
+	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+	CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
 	      "cgroup_fd:%d errno:%d", cgroup_fd, errno);
-
-	err = join_cgroup(TEST_CGROUP);
-	CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
+	atexit(cleanup_cgroup_environment);
 
 	err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
 	CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 15653b0e26eb..154a8fd2a48d 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -191,16 +191,10 @@ int main(int argc, char **argv)
 	int cgfd = -1;
 	int err = 0;
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cgfd = create_and_get_cgroup(CG_PATH);
+	cgfd = cgroup_setup_and_join(CG_PATH);
 	if (cgfd < 0)
 		goto err;
 
-	if (join_cgroup(CG_PATH))
-		goto err;
-
 	if (run_test(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 78789b27e573..9b6fb00dc7a0 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1963,23 +1963,9 @@ int main(int argc, char **argv)
 	}
 
 	if (!cg_fd) {
-		if (setup_cgroup_environment()) {
-			fprintf(stderr, "ERROR: cgroup env failed\n");
-			return -EINVAL;
-		}
-
-		cg_fd = create_and_get_cgroup(CG_PATH);
-		if (cg_fd < 0) {
-			fprintf(stderr,
-				"ERROR: (%i) open cg path failed: %s\n",
-				cg_fd, strerror(errno));
+		cg_fd = cgroup_setup_and_join(CG_PATH);
+		if (cg_fd < 0)
 			return cg_fd;
-		}
-
-		if (join_cgroup(CG_PATH)) {
-			fprintf(stderr, "ERROR: failed to join cgroup\n");
-			return -EINVAL;
-		}
 		cg_created = 1;
 	}
 
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index d196e2a4a6e0..a20a919244c0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1619,16 +1619,10 @@ int main(int argc, char **argv)
 	int cgfd = -1;
 	int err = 0;
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cgfd = create_and_get_cgroup(CG_PATH);
+	cgfd = cgroup_setup_and_join(CG_PATH);
 	if (cgfd < 0)
 		goto err;
 
-	if (join_cgroup(CG_PATH))
-		goto err;
-
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
index 3ae127620463..74a9e49988b6 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -102,16 +102,10 @@ int main(int argc, char **argv)
 	__u32 key = 0;
 	int rv;
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cg_fd = create_and_get_cgroup(cg_path);
+	cg_fd = cgroup_setup_and_join(cg_path);
 	if (cg_fd < 0)
 		goto err;
 
-	if (join_cgroup(cg_path))
-		goto err;
-
 	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
 		printf("FAILED: load_bpf_file failed for: %s\n", file);
 		goto err;
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index f9765ddf0761..8549b31716ab 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -86,16 +86,10 @@ int main(int argc, char **argv)
 	CPU_SET(0, &cpuset);
 	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
 
-	if (setup_cgroup_environment())
-		goto err;
-
-	cg_fd = create_and_get_cgroup(cg_path);
+	cg_fd = cgroup_setup_and_join(cg_path);
 	if (cg_fd < 0)
 		goto err;
 
-	if (join_cgroup(cg_path))
-		goto err;
-
 	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
 		printf("FAILED: load_bpf_file failed for: %s\n", file);
 		goto err;
-- 
cgit 


From 2e49527e52486dac910460b1b3f6fce6e21c6b48 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 31 Jul 2020 11:28:27 -0700
Subject: libbpf: Add bpf_link detach APIs

Add low-level bpf_link_detach() API. Also add higher-level bpf_link__detach()
one.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200731182830.286260-3-andriin@fb.com
---
 tools/include/uapi/linux/bpf.h |  5 +++++
 tools/lib/bpf/bpf.c            | 10 ++++++++++
 tools/lib/bpf/bpf.h            |  2 ++
 tools/lib/bpf/libbpf.c         |  5 +++++
 tools/lib/bpf/libbpf.h         |  1 +
 tools/lib/bpf/libbpf.map       |  2 ++
 6 files changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index eb5e0c38eb2c..b134e679e9db 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_cmd {
 	BPF_LINK_GET_NEXT_ID,
 	BPF_ENABLE_STATS,
 	BPF_ITER_CREATE,
+	BPF_LINK_DETACH,
 };
 
 enum bpf_map_type {
@@ -634,6 +635,10 @@ union bpf_attr {
 		__u32		old_prog_fd;
 	} link_update;
 
+	struct {
+		__u32		link_fd;
+	} link_detach;
+
 	struct { /* struct used by BPF_ENABLE_STATS command */
 		__u32		type;
 	} enable_stats;
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e1bdf214f75f..eab14c97c15d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -603,6 +603,16 @@ int bpf_link_create(int prog_fd, int target_fd,
 	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_link_detach(int link_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_detach.link_fd = link_fd;
+
+	return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
 int bpf_link_update(int link_fd, int new_prog_fd,
 		    const struct bpf_link_update_opts *opts)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6d367e01d05e..28855fd5b5f4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -178,6 +178,8 @@ LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
 			       enum bpf_attach_type attach_type,
 			       const struct bpf_link_create_opts *opts);
 
+LIBBPF_API int bpf_link_detach(int link_fd);
+
 struct bpf_link_update_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 flags;	   /* extra flags */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b9f11f854985..7be04e45d29c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7748,6 +7748,11 @@ struct bpf_link *bpf_link__open(const char *path)
 	return link;
 }
 
+int bpf_link__detach(struct bpf_link *link)
+{
+	return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
 int bpf_link__pin(struct bpf_link *link, const char *path)
 {
 	int err;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9924385462ab..3ed1399bfbbc 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -229,6 +229,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
 LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
 					struct bpf_program *prog);
 LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
 LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
 LIBBPF_API struct bpf_link *
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index ca49a6a7e5b2..099863411f7d 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -273,6 +273,8 @@ LIBBPF_0.0.9 {
 
 LIBBPF_0.1.0 {
 	global:
+		bpf_link__detach;
+		bpf_link_detach;
 		bpf_map__ifindex;
 		bpf_map__key_size;
 		bpf_map__map_flags;
-- 
cgit 


From 90806ccc90bbd0150267a97ae4003269597a6a6c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 31 Jul 2020 11:28:28 -0700
Subject: selftests/bpf: Add link detach tests for cgroup, netns, and xdp
 bpf_links

Add bpf_link__detach() testing to selftests for cgroup, netns, and xdp
bpf_links.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200731182830.286260-4-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/cgroup_link.c | 20 ++++++++-
 tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 51 ++++++++++------------
 tools/testing/selftests/bpf/prog_tests/xdp_link.c  | 14 ++++++
 tools/testing/selftests/bpf/testing_helpers.c      | 14 ++++++
 tools/testing/selftests/bpf/testing_helpers.h      |  3 ++
 5 files changed, 73 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..4d9b514b3fd9 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
 
 #include <test_progs.h>
 #include "cgroup_helpers.h"
+#include "testing_helpers.h"
 #include "test_cgroup_link.skel.h"
 
 static __u32 duration = 0;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
 	int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
 	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
 	struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
-	__u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+	__u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+	struct bpf_link_info info;
 	int i = 0, err, prog_fd;
 	bool detach_legacy = false;
 
@@ -219,6 +221,22 @@ void test_cgroup_link(void)
 	/* BPF programs should still get called */
 	ping_and_check(0, cg_nr);
 
+	prog_id = link_info_prog_id(links[0], &info);
+	CHECK(prog_id == 0, "link_info", "failed\n");
+	CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+	err = bpf_link__detach(links[0]);
+	if (CHECK(err, "link_detach", "failed %d\n", err))
+		goto cleanup;
+
+	/* cgroup_id should be zero in link_info */
+	prog_id = link_info_prog_id(links[0], &info);
+	CHECK(prog_id == 0, "link_info", "failed\n");
+	CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+	/* First BPF program shouldn't be called anymore */
+	ping_and_check(0, cg_nr - 1);
+
 	/* leave cgroup and remove them, don't detach programs */
 	cleanup_cgroup_environment();
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 379da6f10ee9..c571584c00f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -34,6 +34,7 @@
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "network_helpers.h"
+#include "testing_helpers.h"
 #include "test_sk_lookup.skel.h"
 
 /* External (address, port) pairs the client sends packets to. */
@@ -469,34 +470,10 @@ static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
 	return 0;
 }
 
-static __u32 link_info_prog_id(struct bpf_link *link)
-{
-	struct bpf_link_info info = {};
-	__u32 info_len = sizeof(info);
-	int link_fd, err;
-
-	link_fd = bpf_link__fd(link);
-	if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
-		errno = -link_fd;
-		log_err("bpf_link__fd failed");
-		return 0;
-	}
-
-	err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
-	if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
-		log_err("bpf_obj_get_info_by_fd");
-		return 0;
-	}
-	if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
-		  "unexpected info len %u\n", info_len))
-		return 0;
-
-	return info.prog_id;
-}
-
 static void query_lookup_prog(struct test_sk_lookup *skel)
 {
 	struct bpf_link *link[3] = {};
+	struct bpf_link_info info;
 	__u32 attach_flags = 0;
 	__u32 prog_ids[3] = {};
 	__u32 prog_cnt = 3;
@@ -534,18 +511,36 @@ static void query_lookup_prog(struct test_sk_lookup *skel)
 	if (CHECK(prog_cnt != 3, "bpf_prog_query",
 		  "wrong program count on query: %u", prog_cnt))
 		goto detach;
-	prog_id = link_info_prog_id(link[0]);
+	prog_id = link_info_prog_id(link[0], &info);
 	CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
 	      "invalid program #0 id on query: %u != %u\n",
 	      prog_ids[0], prog_id);
-	prog_id = link_info_prog_id(link[1]);
+	CHECK(info.netns.netns_ino == 0, "netns_ino",
+	      "unexpected netns_ino: %u\n", info.netns.netns_ino);
+	prog_id = link_info_prog_id(link[1], &info);
 	CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
 	      "invalid program #1 id on query: %u != %u\n",
 	      prog_ids[1], prog_id);
-	prog_id = link_info_prog_id(link[2]);
+	CHECK(info.netns.netns_ino == 0, "netns_ino",
+	      "unexpected netns_ino: %u\n", info.netns.netns_ino);
+	prog_id = link_info_prog_id(link[2], &info);
 	CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
 	      "invalid program #2 id on query: %u != %u\n",
 	      prog_ids[2], prog_id);
+	CHECK(info.netns.netns_ino == 0, "netns_ino",
+	      "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+	err = bpf_link__detach(link[0]);
+	if (CHECK(err, "link_detach", "failed %d\n", err))
+		goto detach;
+
+	/* prog id is still there, but netns_ino is zeroed out */
+	prog_id = link_info_prog_id(link[0], &info);
+	CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+	      "invalid program #0 id on query: %u != %u\n",
+	      prog_ids[0], prog_id);
+	CHECK(info.netns.netns_ino != 0, "netns_ino",
+	      "unexpected netns_ino: %u\n", info.netns.netns_ino);
 
 detach:
 	if (link[2])
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 52cba6795d40..6f814999b395 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -131,6 +131,20 @@ void test_xdp_link(void)
 	CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
 	      "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
 
+	err = bpf_link__detach(link);
+	if (CHECK(err, "link_detach", "failed %d\n", err))
+		goto cleanup;
+
+	memset(&link_info, 0, sizeof(link_info));
+	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+	if (CHECK(err, "link_info", "failed: %d\n", err))
+		goto cleanup;
+	CHECK(link_info.prog_id != id1, "link_prog_id",
+	      "got %u != exp %u\n", link_info.prog_id, id1);
+	/* ifindex should be zeroed out */
+	CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
+	      "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+
 cleanup:
 	test_xdp_link__destroy(skel1);
 	test_xdp_link__destroy(skel2);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0af6337a8962..800d503e5cb4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 
 	return 0;
 }
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+	__u32 info_len = sizeof(*info);
+	int err;
+
+	memset(info, 0, sizeof(*info));
+	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+	if (err) {
+		printf("failed to get link info: %d\n", -errno);
+		return 0;
+	}
+	return info->prog_id;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 923b51762759..d4f8e749611b 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,8 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 /* Copyright (C) 2020 Facebook, Inc. */
 #include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 
 int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
-- 
cgit 


From 0e8c7c07f090668566db2030a027a360ffd00938 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 31 Jul 2020 11:28:29 -0700
Subject: tools/bpftool: Add `link detach` subcommand

Add ability to force-detach BPF link. Also add missing error message, if
specified link ID is wrong.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200731182830.286260-5-andriin@fb.com
---
 tools/bpf/bpftool/link.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 326b8fdf0243..1b793759170e 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -22,6 +22,8 @@ static const char * const link_type_name[] = {
 
 static int link_parse_fd(int *argc, char ***argv)
 {
+	int fd;
+
 	if (is_prefix(**argv, "id")) {
 		unsigned int id;
 		char *endptr;
@@ -35,7 +37,10 @@ static int link_parse_fd(int *argc, char ***argv)
 		}
 		NEXT_ARGP();
 
-		return bpf_link_get_fd_by_id(id);
+		fd = bpf_link_get_fd_by_id(id);
+		if (fd < 0)
+			p_err("failed to get link with ID %d: %s", id, strerror(errno));
+		return fd;
 	} else if (is_prefix(**argv, "pinned")) {
 		char *path;
 
@@ -316,6 +321,34 @@ static int do_pin(int argc, char **argv)
 	return err;
 }
 
+static int do_detach(int argc, char **argv)
+{
+	int err, fd;
+
+	if (argc != 2) {
+		p_err("link specifier is invalid or missing\n");
+		return 1;
+	}
+
+	fd = link_parse_fd(&argc, &argv);
+	if (fd < 0)
+		return 1;
+
+	err = bpf_link_detach(fd);
+	if (err)
+		err = -errno;
+	close(fd);
+	if (err) {
+		p_err("failed link detach: %s", strerror(-err));
+		return 1;
+	}
+
+	if (json_output)
+		jsonw_null(json_wtr);
+
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -326,6 +359,7 @@ static int do_help(int argc, char **argv)
 	fprintf(stderr,
 		"Usage: %1$s %2$s { show | list }   [LINK]\n"
 		"       %1$s %2$s pin        LINK  FILE\n"
+		"       %1$s %2$s detach     LINK\n"
 		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_LINK "\n"
@@ -341,6 +375,7 @@ static const struct cmd cmds[] = {
 	{ "list",	do_show },
 	{ "help",	do_help },
 	{ "pin",	do_pin },
+	{ "detach",	do_detach },
 	{ 0 }
 };
 
-- 
cgit 


From e85f99aa7760e74bb5a7e8515948f99c264a275f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 31 Jul 2020 11:28:30 -0700
Subject: tools/bpftool: Add documentation and bash-completion for `link
 detach`

Add info on link detach sub-command to man page. Add detach to bash-completion
as well.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com.
Link: https://lore.kernel.org/bpf/20200731182830.286260-6-andriin@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-link.rst | 8 ++++++++
 tools/bpf/bpftool/bash-completion/bpftool        | 4 ++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 38b0949a185b..4a52e7a93339 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,6 +21,7 @@ LINK COMMANDS
 
 |	**bpftool** **link { show | list }** [*LINK*]
 |	**bpftool** **link pin** *LINK* *FILE*
+|	**bpftool** **link detach *LINK*
 |	**bpftool** **link help**
 |
 |	*LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -49,6 +50,13 @@ DESCRIPTION
 		  contain a dot character ('.'), which is reserved for future
 		  extensions of *bpffs*.
 
+	**bpftool link detach** *LINK*
+		  Force-detach link *LINK*. BPF link and its underlying BPF
+		  program will stay valid, but they will be detached from the
+		  respective BPF hook and BPF link will transition into
+		  a defunct state until last open file descriptor for that
+		  link is closed.
+
 	**bpftool link help**
 		  Print short help message.
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 257fa310ea2b..f53ed2f1a4aa 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1122,7 +1122,7 @@ _bpftool()
             ;;
         link)
             case $command in
-                show|list|pin)
+                show|list|pin|detach)
                     case $prev in
                         id)
                             _bpftool_get_link_ids
@@ -1139,7 +1139,7 @@ _bpftool()
                     COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
                     return 0
                     ;;
-                pin)
+                pin|detach)
                     if [[ $prev == "$command" ]]; then
                         COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
                     else
-- 
cgit 


From b5cc46cdff76cdfe2cb45ab2636d847efb1012cc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 31 Jul 2020 13:49:57 -0700
Subject: selftests/bpf: Fix spurious test failures in core_retro selftest

core_retro selftest uses BPF program that's triggered on sys_enter
system-wide, but has no protection from some unrelated process doing syscall
while selftest is running. This leads to occasional test failures with
unexpected PIDs being returned. Fix that by filtering out all processes that
are not test_progs process.

Fixes: fcda189a5133 ("selftests/bpf: Add test relying only on CO-RE and no recent kernel features")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200731204957.2047119-1-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/core_retro.c |  8 ++++++--
 tools/testing/selftests/bpf/progs/test_core_retro.c | 13 +++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
index 78e30d3a23d5..6acb0e94d4d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_retro.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -6,7 +6,7 @@
 
 void test_core_retro(void)
 {
-	int err, zero = 0, res, duration = 0;
+	int err, zero = 0, res, duration = 0, my_pid = getpid();
 	struct test_core_retro *skel;
 
 	/* load program */
@@ -14,6 +14,10 @@ void test_core_retro(void)
 	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
 		goto out_close;
 
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
+	if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+		goto out_close;
+
 	/* attach probe */
 	err = test_core_retro__attach(skel);
 	if (CHECK(err, "attach_kprobe", "err %d\n", err))
@@ -26,7 +30,7 @@ void test_core_retro(void)
 	if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
 		goto out_close;
 
-	CHECK(res != getpid(), "pid_check", "got %d != exp %d\n", res, getpid());
+	CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
 
 out_close:
 	test_core_retro__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
index 75c60c3c29cf..20861ec2f674 100644
--- a/tools/testing/selftests/bpf/progs/test_core_retro.c
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -8,6 +8,13 @@ struct task_struct {
 	int tgid;
 } __attribute__((preserve_access_index));
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} exp_tgid_map SEC(".maps");
+
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
@@ -21,6 +28,12 @@ int handle_sys_enter(void *ctx)
 	struct task_struct *task = (void *)bpf_get_current_task();
 	int tgid = BPF_CORE_READ(task, tgid);
 	int zero = 0;
+	int real_tgid = bpf_get_current_pid_tgid() >> 32;
+	int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero);
+
+	/* only pass through sys_enters from test process */
+	if (!exp_tgid || *exp_tgid != real_tgid)
+		return 0;
 
 	bpf_map_update_elem(&results, &zero, &tgid, 0);
 
-- 
cgit 


From 2ef740da4fef947fb4614808fb61ca8ccb313e0b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 2 Aug 2020 03:26:21 +0200
Subject: selftests: netfilter: add meta iif/oif match test

simple test case, but would have caught this:

FAIL: iifgroupcount, want "packets 2", got
table inet filter {
        counter iifgroupcount {
                packets 0 bytes 0
        }
}

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/Makefile    |   2 +-
 tools/testing/selftests/netfilter/nft_meta.sh | 124 ++++++++++++++++++++++++++
 2 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/nft_meta.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a179f0dca8ce..a374e10ef506 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -4,7 +4,7 @@
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
-	nft_queue.sh
+	nft_queue.sh nft_meta.sh
 
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
new file mode 100755
index 000000000000..d250b84dd5bc
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+cleanup()
+{
+	ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+	counter iifcount {}
+	counter iifnamecount {}
+	counter iifgroupcount {}
+	counter iiftypecount {}
+	counter infproto4count {}
+	counter il4protocounter {}
+	counter imarkcounter {}
+
+	counter oifcount {}
+	counter oifnamecount {}
+	counter oifgroupcount {}
+	counter oiftypecount {}
+	counter onfproto4count {}
+	counter ol4protocounter {}
+	counter oskuidcounter {}
+	counter oskgidcounter {}
+	counter omarkcounter {}
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+
+		meta iif lo counter name "iifcount"
+		meta iifname "lo" counter name "iifnamecount"
+		meta iifgroup "default" counter name "iifgroupcount"
+		meta iiftype "loopback" counter name "iiftypecount"
+		meta nfproto ipv4 counter name "infproto4count"
+		meta l4proto icmp counter name "il4protocounter"
+		meta mark 42 counter name "imarkcounter"
+	}
+
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta oif lo counter name "oifcount" counter
+		meta oifname "lo" counter name "oifnamecount"
+		meta oifgroup "default" counter name "oifgroupcount"
+		meta oiftype "loopback" counter name "oiftypecount"
+		meta nfproto ipv4 counter name "onfproto4count"
+		meta l4proto icmp counter name "ol4protocounter"
+		meta skuid 0 counter name "oskuidcounter"
+		meta skgid 0 counter name "oskgidcounter"
+		meta mark 42 counter name "omarkcounter"
+	}
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "SKIP: Could not add test ruleset"
+	exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+	local cname="$1"
+	local want="packets $2"
+	local verbose="$3"
+
+	cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
+	if [ $? -ne 0 ];then
+		echo "FAIL: $cname, want \"$want\", got"
+		ret=1
+		ip netns exec "$ns0" nft list counter inet filter $counter
+	fi
+}
+
+check_lo_counters()
+{
+	local want="$1"
+	local verbose="$2"
+	local counter
+
+	for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+		       oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+		       il4protocounter \
+		       ol4protocounter \
+	     ; do
+		check_one_counter "$counter" "$want" "$verbose"
+	done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+
+if [ $ret -eq 0 ];then
+	echo "OK: nftables meta iif/oif counters at expected values"
+fi
+
+exit $ret
-- 
cgit 


From a278f3d8191228212c553a5d4303fa603214b717 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 30 Jul 2020 19:42:44 -0700
Subject: tools, build: Propagate build failures from
 tools/build/Makefile.build

The '&&' command seems to have a bad effect when $(cmd_$(1)) exits with
non-zero effect: the command failure is masked (despite `set -e`) and all but
the first command of $(dep-cmd) is executed (successfully, as they are mostly
printfs), thus overall returning 0 in the end.

This means in practice that despite compilation errors, tools's build Makefile
will return success. We see this very reliably with libbpf's Makefile, which
doesn't get compilation error propagated properly. This in turns causes issues
with selftests build, as well as bpftool and other projects that rely on
building libbpf.

The fix is simple: don't use &&. Given `set -e`, we don't need to chain
commands with &&. The shell will exit on first failure, giving desired
behavior and propagating error properly.

Fixes: 275e2d95591e ("tools build: Move dependency copy into function")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20200731024244.872574-1-andriin@fb.com
---
 tools/build/Build.include | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/build/Build.include b/tools/build/Build.include
index 9ec01f4454f9..585486e40995 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -74,7 +74,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
 #                   dependencies in the cmd file
 if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)),         \
                   @set -e;                                         \
-                  $(echo-cmd) $(cmd_$(1)) && $(dep-cmd))
+                  $(echo-cmd) $(cmd_$(1));                         \
+                  $(dep-cmd))
 
 # if_changed      - execute command if any prerequisite is newer than
 #                   target, or command line has changed
-- 
cgit 


From 041549b7b2c7811ec40e705c439211f00ade2dda Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Sun, 2 Aug 2020 19:15:40 +0800
Subject: tools, bpftool: Fix wrong return value in do_dump()

In case of btf_id does not exist, a negative error code -ENOENT
should be returned.

Fixes: c93cc69004df3 ("bpftool: add ability to dump BTF types")
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200802111540.5384-1-tianjia.zhang@linux.alibaba.com
---
 tools/bpf/bpftool/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fc9bc7a23db6..37c091308714 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -596,7 +596,7 @@ static int do_dump(int argc, char **argv)
 			goto done;
 		}
 		if (!btf) {
-			err = ENOENT;
+			err = -ENOENT;
 			p_err("can't find btf with ID (%u)", btf_id);
 			goto done;
 		}
-- 
cgit 


From 94a1fedd63edb672933bef44ca9213937e377c05 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 1 Aug 2020 18:32:17 -0700
Subject: libbpf: Add btf__parse_raw() and generic btf__parse() APIs

Add public APIs to parse BTF from raw data file (e.g.,
/sys/kernel/btf/vmlinux), as well as generic btf__parse(), which will try to
determine correct format, currently either raw or ELF.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200802013219.864880-2-andriin@fb.com
---
 tools/lib/bpf/btf.c      | 114 ++++++++++++++++++++++++++++++++---------------
 tools/lib/bpf/btf.h      |   5 ++-
 tools/lib/bpf/libbpf.map |   2 +
 3 files changed, 83 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ded5b29965f9..856b09a04563 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -562,6 +562,83 @@ done:
 	return btf;
 }
 
+struct btf *btf__parse_raw(const char *path)
+{
+	void *data = NULL;
+	struct btf *btf;
+	FILE *f = NULL;
+	__u16 magic;
+	int err = 0;
+	long sz;
+
+	f = fopen(path, "rb");
+	if (!f) {
+		err = -errno;
+		goto err_out;
+	}
+
+	/* check BTF magic */
+	if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
+		err = -EIO;
+		goto err_out;
+	}
+	if (magic != BTF_MAGIC) {
+		/* definitely not a raw BTF */
+		err = -EPROTO;
+		goto err_out;
+	}
+
+	/* get file size */
+	if (fseek(f, 0, SEEK_END)) {
+		err = -errno;
+		goto err_out;
+	}
+	sz = ftell(f);
+	if (sz < 0) {
+		err = -errno;
+		goto err_out;
+	}
+	/* rewind to the start */
+	if (fseek(f, 0, SEEK_SET)) {
+		err = -errno;
+		goto err_out;
+	}
+
+	/* pre-alloc memory and read all of BTF data */
+	data = malloc(sz);
+	if (!data) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	if (fread(data, 1, sz, f) < sz) {
+		err = -EIO;
+		goto err_out;
+	}
+
+	/* finally parse BTF data */
+	btf = btf__new(data, sz);
+
+err_out:
+	free(data);
+	if (f)
+		fclose(f);
+	return err ? ERR_PTR(err) : btf;
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+	struct btf *btf;
+
+	if (btf_ext)
+		*btf_ext = NULL;
+
+	btf = btf__parse_raw(path);
+	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+		return btf;
+
+	return btf__parse_elf(path, btf_ext);
+}
+
 static int compare_vsi_off(const void *_a, const void *_b)
 {
 	const struct btf_var_secinfo *a = _a;
@@ -2951,41 +3028,6 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
 	return 0;
 }
 
-static struct btf *btf_load_raw(const char *path)
-{
-	struct btf *btf;
-	size_t read_cnt;
-	struct stat st;
-	void *data;
-	FILE *f;
-
-	if (stat(path, &st))
-		return ERR_PTR(-errno);
-
-	data = malloc(st.st_size);
-	if (!data)
-		return ERR_PTR(-ENOMEM);
-
-	f = fopen(path, "rb");
-	if (!f) {
-		btf = ERR_PTR(-errno);
-		goto cleanup;
-	}
-
-	read_cnt = fread(data, 1, st.st_size, f);
-	fclose(f);
-	if (read_cnt < st.st_size) {
-		btf = ERR_PTR(-EBADF);
-		goto cleanup;
-	}
-
-	btf = btf__new(data, read_cnt);
-
-cleanup:
-	free(data);
-	return btf;
-}
-
 /*
  * Probe few well-known locations for vmlinux kernel image and try to load BTF
  * data out of it to use for target BTF.
@@ -3021,7 +3063,7 @@ struct btf *libbpf_find_kernel_btf(void)
 			continue;
 
 		if (locations[i].raw_btf)
-			btf = btf_load_raw(path);
+			btf = btf__parse_raw(path);
 		else
 			btf = btf__parse_elf(path, NULL);
 
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 491c7b41ffdc..f4a1a1d2b9a3 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -64,8 +64,9 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
-LIBBPF_API struct btf *btf__parse_elf(const char *path,
-				      struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_raw(const char *path);
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 099863411f7d..0c4722bfdd0a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -293,5 +293,7 @@ LIBBPF_0.1.0 {
 		bpf_program__is_sk_lookup;
 		bpf_program__set_autoload;
 		bpf_program__set_sk_lookup;
+		btf__parse;
+		btf__parse_raw;
 		btf__set_fd;
 } LIBBPF_0.0.9;
-- 
cgit 


From 8526df04570f5698c97ac661ad1f2f35293557a7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 1 Aug 2020 18:32:18 -0700
Subject: tools/bpftool: Use libbpf's btf__parse() API for parsing BTF from
 file

Use generic libbpf API to parse BTF data from file, instead of re-implementing
it in bpftool.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200802013219.864880-3-andriin@fb.com
---
 tools/bpf/bpftool/btf.c | 54 +------------------------------------------------
 1 file changed, 1 insertion(+), 53 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 37c091308714..8ab142ff5eac 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -422,54 +422,6 @@ done:
 	return err;
 }
 
-static struct btf *btf__parse_raw(const char *file)
-{
-	struct btf *btf;
-	struct stat st;
-	__u8 *buf;
-	FILE *f;
-
-	if (stat(file, &st))
-		return NULL;
-
-	f = fopen(file, "rb");
-	if (!f)
-		return NULL;
-
-	buf = malloc(st.st_size);
-	if (!buf) {
-		btf = ERR_PTR(-ENOMEM);
-		goto exit_close;
-	}
-
-	if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
-		btf = ERR_PTR(-EINVAL);
-		goto exit_free;
-	}
-
-	btf = btf__new(buf, st.st_size);
-
-exit_free:
-	free(buf);
-exit_close:
-	fclose(f);
-	return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
-	__u16 magic = 0;
-	int fd, nb_read;
-
-	fd = open(file, O_RDONLY);
-	if (fd < 0)
-		return false;
-
-	nb_read = read(fd, &magic, sizeof(magic));
-	close(fd);
-	return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
 static int do_dump(int argc, char **argv)
 {
 	struct btf *btf = NULL;
@@ -547,11 +499,7 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		if (is_btf_raw(*argv))
-			btf = btf__parse_raw(*argv);
-		else
-			btf = btf__parse_elf(*argv, NULL);
-
+		btf = btf__parse(*argv, NULL);
 		if (IS_ERR(btf)) {
 			err = -PTR_ERR(btf);
 			btf = NULL;
-- 
cgit 


From f86ca3cffef153555a3f4755b3a44881d962754f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 1 Aug 2020 18:32:19 -0700
Subject: tools/resolve_btfids: Use libbpf's btf__parse() API

Instead of re-implementing generic BTF parsing logic, use libbpf's API.
Also add .gitignore for resolve_btfids's build artifacts.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200802013219.864880-4-andriin@fb.com
---
 tools/bpf/resolve_btfids/.gitignore |  4 +++
 tools/bpf/resolve_btfids/main.c     | 58 +------------------------------------
 2 files changed, 5 insertions(+), 57 deletions(-)
 create mode 100644 tools/bpf/resolve_btfids/.gitignore

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
new file mode 100644
index 000000000000..a026df7dc280
--- /dev/null
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -0,0 +1,4 @@
+/FEATURE-DUMP.libbpf
+/bpf_helper_defs.h
+/fixdep
+/resolve_btfids
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 6956b6350cad..52d883325a23 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -403,62 +403,6 @@ static int symbols_collect(struct object *obj)
 	return 0;
 }
 
-static struct btf *btf__parse_raw(const char *file)
-{
-	struct btf *btf;
-	struct stat st;
-	__u8 *buf;
-	FILE *f;
-
-	if (stat(file, &st))
-		return NULL;
-
-	f = fopen(file, "rb");
-	if (!f)
-		return NULL;
-
-	buf = malloc(st.st_size);
-	if (!buf) {
-		btf = ERR_PTR(-ENOMEM);
-		goto exit_close;
-	}
-
-	if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) {
-		btf = ERR_PTR(-EINVAL);
-		goto exit_free;
-	}
-
-	btf = btf__new(buf, st.st_size);
-
-exit_free:
-	free(buf);
-exit_close:
-	fclose(f);
-	return btf;
-}
-
-static bool is_btf_raw(const char *file)
-{
-	__u16 magic = 0;
-	int fd, nb_read;
-
-	fd = open(file, O_RDONLY);
-	if (fd < 0)
-		return false;
-
-	nb_read = read(fd, &magic, sizeof(magic));
-	close(fd);
-	return nb_read == sizeof(magic) && magic == BTF_MAGIC;
-}
-
-static struct btf *btf_open(const char *path)
-{
-	if (is_btf_raw(path))
-		return btf__parse_raw(path);
-	else
-		return btf__parse_elf(path, NULL);
-}
-
 static int symbols_resolve(struct object *obj)
 {
 	int nr_typedefs = obj->nr_typedefs;
@@ -469,7 +413,7 @@ static int symbols_resolve(struct object *obj)
 	struct btf *btf;
 	__u32 nr;
 
-	btf = btf_open(obj->btf ?: obj->path);
+	btf = btf__parse(obj->btf ?: obj->path, NULL);
 	err = libbpf_get_error(btf);
 	if (err) {
 		pr_err("FAILED: load BTF from %s: %s",
-- 
cgit 


From 21594c44083c375697d418729c4b2e4522cf9f70 Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Mon, 3 Aug 2020 12:05:45 +0300
Subject: bpf: Allow to specify ifindex for skb in bpf_prog_test_run_skb

Now skb->dev is unconditionally set to the loopback device in current net
namespace. But if we want to test bpf program which contains code branch
based on ifindex condition (eg filters out localhost packets) it is useful
to allow specifying of ifindex from userspace. This patch adds such option
through ctx_in (__sk_buff) parameter.

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200803090545.82046-3-zeil@yandex-team.ru
---
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 7021b92af313..25de86af2d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,7 @@ void test_skb_ctx(void)
 		.cb[3] = 4,
 		.cb[4] = 5,
 		.priority = 6,
+		.ifindex = 1,
 		.tstamp = 7,
 		.wire_len = 100,
 		.gso_segs = 8,
@@ -92,6 +93,10 @@ void test_skb_ctx(void)
 		   "ctx_out_priority",
 		   "skb->priority == %d, expected %d\n",
 		   skb.priority, 7);
+	CHECK_ATTR(skb.ifindex != 1,
+		   "ctx_out_ifindex",
+		   "skb->ifindex == %d, expected %d\n",
+		   skb.ifindex, 1);
 	CHECK_ATTR(skb.tstamp != 8,
 		   "ctx_out_tstamp",
 		   "skb->tstamp == %lld, expected %d\n",
-- 
cgit 


From 8fb6ac457d5be8516eeed2b92342f7f95c448275 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Mon, 3 Aug 2020 19:11:41 +0300
Subject: selftests: mlxsw: RED: Test offload of trapping on RED qevents

Add a selftest for RED early_drop and mark qevents when a trap action is
attached at the associated block.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/sch_red_core.sh    | 35 ++++++++++++++++++----
 .../selftests/drivers/net/mlxsw/sch_red_ets.sh     | 11 +++++++
 2 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 45042105ead7..517297a14ecf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -568,17 +568,12 @@ do_drop_test()
 	busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
 	check_fail $? "Spurious packets observed without buffer pressure"
 
-	qevent_rule_uninstall_$subtest
-
 	# Push to the queue until it's at the limit. The configured limit is
 	# rounded by the qdisc and then by the driver, so this is the best we
-	# can do to get to the real limit of the system. Do this with the rules
-	# uninstalled so that the inevitable drops don't get counted.
+	# can do to get to the real limit of the system.
 	build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
 
-	qevent_rule_install_$subtest
 	base=$($fetch_counter)
-
 	send_packets $vlan udp 11
 
 	now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
@@ -631,3 +626,31 @@ do_drop_mirror_test()
 
 	tc filter del dev $h2 ingress pref 1 handle 101 flower
 }
+
+qevent_rule_install_trap()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+	local trap_name=$1; shift
+
+	devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trap_name=$1; shift
+
+	do_drop_test "$vlan" "$limit" "$trap_name" trap \
+		     "qevent_counter_fetch_trap $trap_name"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index c8968b041bea..3f007c5f8361 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
 	red_test
 	mc_backlog_test
 	red_mirror_test
+	red_trap_test
 "
 : ${QDISC:=ets}
 source sch_red_core.sh
@@ -94,6 +95,16 @@ red_mirror_test()
 	uninstall_qdisc
 }
 
+red_trap_test()
+{
+	install_qdisc qevent early_drop block 10
+
+	do_drop_trap_test 10 $BACKLOG1 early_drop
+	do_drop_trap_test 11 $BACKLOG2 early_drop
+
+	uninstall_qdisc
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit 


From df40e39c0df025da38dd819152a877a07ab1c115 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 4 Aug 2020 07:53:46 +0200
Subject: selftests: pmtu.sh: Add tests for bridged UDP tunnels

The new tests check that IP and IPv6 packets exceeding the local PMTU
estimate, both locally generated and forwarded by a bridge from
another node, result in the correct route exceptions being created,
and that communication with end-to-end fragmentation over VXLAN and
GENEVE tunnels is now possible as a result of PMTU discovery.

Part of the existing setup functions aren't generic enough to simply
add a namespace and a bridge to the existing routing setup. This
rework is in progress and we can easily shrink this once more generic
topology functions are available.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 166 ++++++++++++++++++++++++++++++++++--
 1 file changed, 159 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 77c09cd339c3..774e526573f3 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -59,6 +59,25 @@
 #	Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
 #	VXLAN
 #
+# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
+#	Set up three namespaces, A, B, and C, with routing between A and B over
+#	R1. R2 is unused in these tests. A has a veth connection to C, and is
+#	connected to B via a VXLAN endpoint, which is directly bridged to C.
+#	MTU on the B-R1 link is lower than other MTUs.
+#
+#	Check that both C and A are able to communicate with B over the VXLAN
+#	tunnel, and that PMTU exceptions with the correct values are created.
+#
+#	                  segment a_r1    segment b_r1            b_r1: 4000
+#	                .--------------R1--------------.    everything
+#	   C---veth     A                               B         else: 5000
+#	        ' bridge                                |
+#	            '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
+#	Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
+#	instead.
+#
 # - pmtu_ipv{4,6}_fou{4,6}_exception
 #	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
 #	(FoU) over IPv4/IPv6, instead of VXLAN
@@ -147,6 +166,14 @@ tests="
 	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions	1
 	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions	1
 	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions	1
+	pmtu_ipv4_br_vxlan4_exception	IPv4, bridged vxlan4: PMTU exceptions	1
+	pmtu_ipv6_br_vxlan4_exception	IPv6, bridged vxlan4: PMTU exceptions	1
+	pmtu_ipv4_br_vxlan6_exception	IPv4, bridged vxlan6: PMTU exceptions	1
+	pmtu_ipv6_br_vxlan6_exception	IPv6, bridged vxlan6: PMTU exceptions	1
+	pmtu_ipv4_br_geneve4_exception	IPv4, bridged geneve4: PMTU exceptions	1
+	pmtu_ipv6_br_geneve4_exception	IPv6, bridged geneve4: PMTU exceptions	1
+	pmtu_ipv4_br_geneve6_exception	IPv4, bridged geneve6: PMTU exceptions	1
+	pmtu_ipv6_br_geneve6_exception	IPv6, bridged geneve6: PMTU exceptions	1
 	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions		1
 	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions		1
 	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions		1
@@ -173,10 +200,12 @@ tests="
 
 NS_A="ns-A"
 NS_B="ns-B"
+NS_C="ns-C"
 NS_R1="ns-R1"
 NS_R2="ns-R2"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
+ns_c="ip netns exec ${NS_C}"
 ns_r1="ip netns exec ${NS_R1}"
 ns_r2="ip netns exec ${NS_R2}"
 
@@ -239,9 +268,11 @@ routes_nh="
 
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
+veth4_c_addr="192.168.2.10"
 veth4_mask="24"
 veth6_a_addr="fd00:1::a"
 veth6_b_addr="fd00:1::b"
+veth6_c_addr="fd00:2::c"
 veth6_mask="64"
 
 tunnel4_a_addr="192.168.2.1"
@@ -428,7 +459,7 @@ setup_ip6ip6() {
 }
 
 setup_namespaces() {
-	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
 		ip netns add ${n} || return 1
 
 		# Disable DAD, so that we don't have to wait to use the
@@ -484,6 +515,7 @@ setup_vxlan_or_geneve() {
 	a_addr="${2}"
 	b_addr="${3}"
 	opts="${4}"
+	br_if_a="${5}"
 
 	if [ "${type}" = "vxlan" ]; then
 		opts="${opts} ttl 64 dstport 4789"
@@ -497,10 +529,16 @@ setup_vxlan_or_geneve() {
 	run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
 	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
 
-	run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
-	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+	if [ -n "${br_if_a}" ]; then
+		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
+		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
+		run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
+	else
+		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+	fi
 
-	run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
 	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
 
 	run_cmd ${ns_a} ip link set ${type}_a up
@@ -516,11 +554,27 @@ setup_vxlan4() {
 }
 
 setup_geneve6() {
-	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
 }
 
 setup_vxlan6() {
-	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
+}
+
+setup_bridged_geneve4() {
+	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
+}
+
+setup_bridged_vxlan4() {
+	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
+}
+
+setup_bridged_geneve6() {
+	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
+}
+
+setup_bridged_vxlan6() {
+	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
 }
 
 setup_xfrm() {
@@ -630,6 +684,20 @@ setup_routing() {
 	return 0
 }
 
+setup_bridge() {
+	run_cmd ${ns_a} ip link add br0 type bridge || return 2
+	run_cmd ${ns_a} ip link set br0 up
+
+	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+	run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+
+	run_cmd ${ns_a} ip link set veth_A-C up
+	run_cmd ${ns_c} ip link set veth_C-A up
+	run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+	run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+	run_cmd ${ns_a} ip link set veth_A-C master br0
+}
+
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
@@ -657,7 +725,7 @@ cleanup() {
 	done
 	tcpdump_pids=
 
-	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
 }
@@ -892,6 +960,90 @@ test_pmtu_ipv6_geneve6_exception() {
 	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
 }
 
+test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
+	type=${1}
+	family=${2}
+	outer_family=${3}
+	ll_mtu=4000
+
+	if [ ${outer_family} -eq 4 ]; then
+		setup namespaces routing bridge bridged_${type}4 || return 2
+		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
+	else
+		setup namespaces routing bridge bridged_${type}6 || return 2
+		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
+	fi
+
+	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B \
+	      "${ns_a}" br0          "${ns_a}"  veth-A-C  \
+	      "${ns_c}" veth_C-A
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_a}"  br0       $((${ll_mtu} + 1000))
+	mtu "${ns_a}"  veth_A-C  $((${ll_mtu} + 1000))
+	mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+	run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1  -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+	# Check that exceptions were created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+}
+
+test_pmtu_ipv4_br_vxlan4_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 4
+}
+
+test_pmtu_ipv6_br_vxlan4_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 4
+}
+
+test_pmtu_ipv4_br_geneve4_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_br_geneve4_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_br_vxlan6_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 6
+}
+
+test_pmtu_ipv6_br_vxlan6_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 6
+}
+
+test_pmtu_ipv4_br_geneve6_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_br_geneve6_exception() {
+	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
+}
+
 test_pmtu_ipvX_over_fouY_or_gueY() {
 	inner_family=${1}
 	outer_family=${2}
-- 
cgit 


From 7b53682c94032a7a7adec400400c65d0af7fea5a Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 4 Aug 2020 07:53:47 +0200
Subject: selftests: pmtu.sh: Add tests for UDP tunnels handled by Open vSwitch

The new tests check that IP and IPv6 packets exceeding the local PMTU
estimate, forwarded by an Open vSwitch instance from another node,
result in the correct route exceptions being created, and that
communication with end-to-end fragmentation, over GENEVE and VXLAN
Open vSwitch ports, is now possible as a result of PMTU discovery.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 180 ++++++++++++++++++++++++++++++++++++
 1 file changed, 180 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 774e526573f3..6bbf69a28e12 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -78,6 +78,26 @@
 #	Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
 #	instead.
 #
+# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
+#	Set up two namespaces, B, and C, with routing between the init namespace
+#	and B over R1. A and R2 are unused in these tests. The init namespace
+#	has a veth connection to C, and is connected to B via a VXLAN endpoint,
+#	which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
+#	is lower than other MTUs.
+#
+#	Check that C is able to communicate with B over the VXLAN tunnel, and
+#	that PMTU exceptions with the correct values are created.
+#
+#	                  segment a_r1    segment b_r1            b_r1: 4000
+#	                .--------------R1--------------.    everything
+#	   C---veth    init                             B         else: 5000
+#	        '- ovs                                  |
+#	            '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
+#	Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
+#	instead.
+#
 # - pmtu_ipv{4,6}_fou{4,6}_exception
 #	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
 #	(FoU) over IPv4/IPv6, instead of VXLAN
@@ -174,6 +194,14 @@ tests="
 	pmtu_ipv6_br_geneve4_exception	IPv6, bridged geneve4: PMTU exceptions	1
 	pmtu_ipv4_br_geneve6_exception	IPv4, bridged geneve6: PMTU exceptions	1
 	pmtu_ipv6_br_geneve6_exception	IPv6, bridged geneve6: PMTU exceptions	1
+	pmtu_ipv4_ovs_vxlan4_exception	IPv4, OVS vxlan4: PMTU exceptions	1
+	pmtu_ipv6_ovs_vxlan4_exception	IPv6, OVS vxlan4: PMTU exceptions	1
+	pmtu_ipv4_ovs_vxlan6_exception	IPv4, OVS vxlan6: PMTU exceptions	1
+	pmtu_ipv6_ovs_vxlan6_exception	IPv6, OVS vxlan6: PMTU exceptions	1
+	pmtu_ipv4_ovs_geneve4_exception	IPv4, OVS geneve4: PMTU exceptions	1
+	pmtu_ipv6_ovs_geneve4_exception	IPv6, OVS geneve4: PMTU exceptions	1
+	pmtu_ipv4_ovs_geneve6_exception	IPv4, OVS geneve6: PMTU exceptions	1
+	pmtu_ipv6_ovs_geneve6_exception	IPv6, OVS geneve6: PMTU exceptions	1
 	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions		1
 	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions		1
 	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions		1
@@ -698,6 +726,66 @@ setup_bridge() {
 	run_cmd ${ns_a} ip link set veth_A-C master br0
 }
 
+setup_ovs_vxlan_or_geneve() {
+	type="${1}"
+	a_addr="${2}"
+	b_addr="${3}"
+
+	if [ "${type}" = "vxlan" ]; then
+		opts="${opts} ttl 64 dstport 4789"
+		opts_b="local ${b_addr}"
+	fi
+
+	run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+		set interface ${type}_a type=${type} \
+		options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+
+	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
+
+	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+	run_cmd ${ns_b} ip link set ${type}_b up
+}
+
+setup_ovs_geneve4() {
+	setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_vxlan4() {
+	setup_ovs_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_geneve6() {
+	setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_vxlan6() {
+	setup_ovs_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_bridge() {
+	run_cmd ovs-vsctl add-br ovs_br0 || return 2
+	run_cmd ip link set ovs_br0 up
+
+	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+	run_cmd ${ns_c} ip link set veth_A-C netns 1
+
+	run_cmd         ip link set veth_A-C up
+	run_cmd ${ns_c} ip link set veth_C-A up
+	run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+	run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+	run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+
+	# Move veth_A-R1 to init
+	run_cmd ${ns_a} ip link set veth_A-R1 netns 1
+	run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
+	run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
+	run_cmd ip link set veth_A-R1 up
+	run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
+	run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+}
+
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
@@ -728,6 +816,11 @@ cleanup() {
 	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
+
+	ip link del veth_A-C			2>/dev/null
+	ip link del veth_A-R1			2>/dev/null
+	ovs-vsctl --if-exists del-port vxlan_a	2>/dev/null
+	ovs-vsctl --if-exists del-br ovs_br0	2>/dev/null
 }
 
 mtu() {
@@ -1044,6 +1137,93 @@ test_pmtu_ipv6_br_geneve6_exception() {
 	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
 }
 
+test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
+	type=${1}
+	family=${2}
+	outer_family=${3}
+	ll_mtu=4000
+
+	if [ ${outer_family} -eq 4 ]; then
+		setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
+	else
+		setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
+		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
+	fi
+
+	if [ "${type}" = "vxlan" ]; then
+		tun_a="vxlan_sys_4789"
+	elif [ "${type}" = "geneve" ]; then
+		tun_a="genev_sys_6081"
+	fi
+
+	trace ""        "${tun_a}"  "${ns_b}"  ${type}_b \
+	      ""        veth_A-R1   "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1   "${ns_r1}" veth_R1-B \
+	      ""        ovs_br0     ""         veth-A-C  \
+	      "${ns_c}" veth_C-A
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu ""         veth_A-R1 $((${ll_mtu} + 1000))
+	mtu ""         ovs_br0   $((${ll_mtu} + 1000))
+	mtu ""         veth_A-C  $((${ll_mtu} + 1000))
+	mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu ""        ${tun_a}  $((${ll_mtu} + 1000))
+	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+	run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+	# Check that exceptions were created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
+}
+
+test_pmtu_ipv4_ovs_vxlan4_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 4
+}
+
+test_pmtu_ipv6_ovs_vxlan4_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 4
+}
+
+test_pmtu_ipv4_ovs_geneve4_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_ovs_geneve4_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_ovs_vxlan6_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 6
+}
+
+test_pmtu_ipv6_ovs_vxlan6_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 6
+}
+
+test_pmtu_ipv4_ovs_geneve6_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_ovs_geneve6_exception() {
+	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
+}
+
 test_pmtu_ipvX_over_fouY_or_gueY() {
 	inner_family=${1}
 	outer_family=${2}
-- 
cgit 


From c2a4d2747996ee6a1397e2064d44a4f57ac442e6 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 4 Aug 2020 18:18:02 +0800
Subject: selftests: rtnetlink: correct the final return value for the test

The return value "ret" will be reset to 0 from the beginning of each
sub-test in rtnetlink.sh, therefore this test will always pass if the
last sub-test has passed:
    $ sudo ./rtnetlink.sh
    PASS: policy routing
    PASS: route get
    PASS: preferred_lft addresses have expired
    PASS: promote_secondaries complete
    PASS: tc htb hierarchy
    PASS: gre tunnel endpoint
    PASS: gretap
    PASS: ip6gretap
    PASS: erspan
    PASS: ip6erspan
    PASS: bridge setup
    PASS: ipv6 addrlabel
    PASS: set ifalias a39ee707-e36b-41d3-802f-63179ed4d580 for test-dummy0
    PASS: vrf
    PASS: vxlan
    FAIL: can't add fou port 7777, skipping test
    PASS: macsec
    PASS: ipsec
    3,7c3,7
    < sa[0]    spi=0x00000009 proto=0x32 salt=0x64636261 crypt=1
    < sa[0]    key=0x31323334 35363738 39303132 33343536
    < sa[1] rx ipaddr=0x00000000 00000000 00000000 c0a87b03
    < sa[1]    spi=0x00000009 proto=0x32 salt=0x64636261 crypt=1
    < sa[1]    key=0x31323334 35363738 39303132 33343536
    ---
    > sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
    > sa[0]    key=0x34333231 38373635 32313039 36353433
    > sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
    > sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
    > sa[1]    key=0x34333231 38373635 32313039 36353433
    FAIL: ipsec_offload incorrect driver data
    FAIL: ipsec_offload
    PASS: bridge fdb get
    PASS: neigh get
    $ echo $?
    0

Make "ret" become a local variable for all sub-tests.
Also, check the sub-test results in kci_test_rtnl() and return the
final result for this test.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 65 +++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index bdbf4b3125b6..9db66bef6f1e 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -5,7 +5,6 @@
 # set -e
 
 devdummy="test-dummy0"
-ret=0
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -66,7 +65,7 @@ kci_test_bridge()
 	devbr="test-br0"
 	vlandev="testbr-vlan1"
 
-	ret=0
+	local ret=0
 	ip link add name "$devbr" type bridge
 	check_err $?
 
@@ -113,7 +112,7 @@ kci_test_gre()
 	rem=10.42.42.1
 	loc=10.0.0.1
 
-	ret=0
+	local ret=0
 	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
 	check_err $?
 	ip link set $gredev up
@@ -149,7 +148,7 @@ kci_test_gre()
 kci_test_tc()
 {
 	dev=lo
-	ret=0
+	local ret=0
 
 	tc qdisc add dev "$dev" root handle 1: htb
 	check_err $?
@@ -184,7 +183,7 @@ kci_test_tc()
 
 kci_test_polrouting()
 {
-	ret=0
+	local ret=0
 	ip rule add fwmark 1 lookup 100
 	check_err $?
 	ip route add local 0.0.0.0/0 dev lo table 100
@@ -207,7 +206,7 @@ kci_test_route_get()
 {
 	local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
 
-	ret=0
+	local ret=0
 
 	ip route get 127.0.0.1 > /dev/null
 	check_err $?
@@ -290,7 +289,7 @@ kci_test_promote_secondaries()
 
 kci_test_addrlabel()
 {
-	ret=0
+	local ret=0
 
 	ip addrlabel add prefix dead::/64 dev lo label 1
 	check_err $?
@@ -330,7 +329,7 @@ kci_test_addrlabel()
 
 kci_test_ifalias()
 {
-	ret=0
+	local ret=0
 	namewant=$(uuidgen)
 	syspathname="/sys/class/net/$devdummy/ifalias"
 
@@ -385,7 +384,7 @@ kci_test_ifalias()
 kci_test_vrf()
 {
 	vrfname="test-vrf"
-	ret=0
+	local ret=0
 
 	ip link show type vrf 2>/dev/null
 	if [ $? -ne 0 ]; then
@@ -425,7 +424,7 @@ kci_test_vrf()
 
 kci_test_encap_vxlan()
 {
-	ret=0
+	local ret=0
 	vxlan="test-vxlan0"
 	vlan="test-vlan0"
 	testns="$1"
@@ -511,7 +510,7 @@ kci_test_encap_vxlan()
 
 kci_test_encap_fou()
 {
-	ret=0
+	local ret=0
 	name="test-fou"
 	testns="$1"
 
@@ -548,7 +547,7 @@ kci_test_encap_fou()
 kci_test_encap()
 {
 	testns="testns"
-	ret=0
+	local ret=0
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
@@ -573,7 +572,7 @@ kci_test_encap()
 kci_test_macsec()
 {
 	msname="test_macsec0"
-	ret=0
+	local ret=0
 
 	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
 	if [ $? -ne 0 ]; then
@@ -631,7 +630,7 @@ kci_test_macsec()
 #-------------------------------------------------------------------
 kci_test_ipsec()
 {
-	ret=0
+	local ret=0
 	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
 	srcip=192.168.123.1
 	dstip=192.168.123.2
@@ -731,7 +730,7 @@ kci_test_ipsec()
 #-------------------------------------------------------------------
 kci_test_ipsec_offload()
 {
-	ret=0
+	local ret=0
 	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
 	srcip=192.168.123.3
 	dstip=192.168.123.4
@@ -841,7 +840,7 @@ kci_test_gretap()
 {
 	testns="testns"
 	DEV_NS=gretap00
-	ret=0
+	local ret=0
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
@@ -891,7 +890,7 @@ kci_test_ip6gretap()
 {
 	testns="testns"
 	DEV_NS=ip6gretap00
-	ret=0
+	local ret=0
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
@@ -941,7 +940,7 @@ kci_test_erspan()
 {
 	testns="testns"
 	DEV_NS=erspan00
-	ret=0
+	local ret=0
 
 	ip link help erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
@@ -1006,7 +1005,7 @@ kci_test_ip6erspan()
 {
 	testns="testns"
 	DEV_NS=ip6erspan00
-	ret=0
+	local ret=0
 
 	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
@@ -1077,7 +1076,7 @@ kci_test_fdb_get()
 	test_mac=de:ad:be:ef:13:37
 	localip="10.0.2.2"
 	dstip="10.0.2.3"
-	ret=0
+	local ret=0
 
 	bridge fdb help 2>&1 |grep -q 'bridge fdb get'
 	if [ $? -ne 0 ];then
@@ -1125,7 +1124,7 @@ kci_test_neigh_get()
 	dstmac=de:ad:be:ef:13:37
 	dstip=10.0.2.4
 	dstip6=dead::2
-	ret=0
+	local ret=0
 
 	ip neigh help 2>&1 |grep -q 'ip neigh get'
 	if [ $? -ne 0 ];then
@@ -1175,6 +1174,7 @@ kci_test_neigh_get()
 
 kci_test_rtnl()
 {
+	local ret=0
 	kci_add_dummy
 	if [ $ret -ne 0 ];then
 		echo "FAIL: cannot add dummy interface"
@@ -1182,27 +1182,48 @@ kci_test_rtnl()
 	fi
 
 	kci_test_polrouting
+	check_err $?
 	kci_test_route_get
+	check_err $?
 	kci_test_addrlft
+	check_err $?
 	kci_test_promote_secondaries
+	check_err $?
 	kci_test_tc
+	check_err $?
 	kci_test_gre
+	check_err $?
 	kci_test_gretap
+	check_err $?
 	kci_test_ip6gretap
+	check_err $?
 	kci_test_erspan
+	check_err $?
 	kci_test_ip6erspan
+	check_err $?
 	kci_test_bridge
+	check_err $?
 	kci_test_addrlabel
+	check_err $?
 	kci_test_ifalias
+	check_err $?
 	kci_test_vrf
+	check_err $?
 	kci_test_encap
+	check_err $?
 	kci_test_macsec
+	check_err $?
 	kci_test_ipsec
+	check_err $?
 	kci_test_ipsec_offload
+	check_err $?
 	kci_test_fdb_get
+	check_err $?
 	kci_test_neigh_get
+	check_err $?
 
 	kci_del_dummy
+	return $ret
 }
 
 #check for needed privileges
@@ -1221,4 +1242,4 @@ done
 
 kci_test_rtnl
 
-exit $ret
+exit $?
-- 
cgit 


From 72f70c159b53e1363191953875e0223ed959e143 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 4 Aug 2020 18:18:03 +0800
Subject: selftests: rtnetlink: make kci_test_encap() return sub-test result

kci_test_encap() is actually composed by two different sub-tests,
kci_test_encap_vxlan() and kci_test_encap_fou()

Therefore we should check the test result of these two in
kci_test_encap() to let the script be aware of the pass / fail status.
Otherwise it will generate false-negative result like below:
    $ sudo ./test.sh
    PASS: policy routing
    PASS: route get
    PASS: preferred_lft addresses have expired
    PASS: promote_secondaries complete
    PASS: tc htb hierarchy
    PASS: gre tunnel endpoint
    PASS: gretap
    PASS: ip6gretap
    PASS: erspan
    PASS: ip6erspan
    PASS: bridge setup
    PASS: ipv6 addrlabel
    PASS: set ifalias 5b193daf-0a08-46d7-af2c-e7aadd422ded for test-dummy0
    PASS: vrf
    PASS: vxlan
    FAIL: can't add fou port 7777, skipping test
    PASS: macsec
    PASS: bridge fdb get
    PASS: neigh get
    $ echo $?
    0

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 9db66bef6f1e..7c38a909f8b8 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -564,9 +564,12 @@ kci_test_encap()
 	check_err $?
 
 	kci_test_encap_vxlan "$testns"
+	check_err $?
 	kci_test_encap_fou "$testns"
+	check_err $?
 
 	ip netns del "$testns"
+	return $ret
 }
 
 kci_test_macsec()
-- 
cgit 


From 16f6458f2478b55e2b628797bc81a4455045c74e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 5 Aug 2020 04:40:45 -0400
Subject: selftests/net: relax cpu affinity requirement in msg_zerocopy test

The msg_zerocopy test pins the sender and receiver threads to separate
cores to reduce variance between runs.

But it hardcodes the cores and skips core 0, so it fails on machines
with the selected cores offline, or simply fewer cores.

The test mainly gives code coverage in automated runs. The throughput
of zerocopy ('-z') and non-zerocopy runs is logged for manual
inspection.

Continue even when sched_setaffinity fails. Just log to warn anyone
interpreting the data.

Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/msg_zerocopy.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 4b02933cab8a..bdc03a2097e8 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -125,9 +125,8 @@ static int do_setcpu(int cpu)
 	CPU_ZERO(&mask);
 	CPU_SET(cpu, &mask);
 	if (sched_setaffinity(0, sizeof(mask), &mask))
-		error(1, 0, "setaffinity %d", cpu);
-
-	if (cfg_verbose)
+		fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+	else if (cfg_verbose)
 		fprintf(stderr, "cpu: %u\n", cpu);
 
 	return 0;
-- 
cgit