From 1ba5ad36e00f46e3f7676f5de6b87f5a2f57f1f1 Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Mon, 23 May 2022 23:04:25 +0000
Subject: bpftool: Use libbpf_bpf_attach_type_str
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change switches bpftool over to using the recently introduced
libbpf_bpf_attach_type_str function instead of maintaining its own
string representation for the bpf_attach_type enum.

Note that contrary to other enum types, the variant names that bpftool
maps bpf_attach_type to do not adhere a simple to follow rule. With
bpf_prog_type, for example, the textual representation can easily be
inferred by stripping the BPF_PROG_TYPE_ prefix and lowercasing the
remaining string. bpf_attach_type violates this rule for various
variants.
We decided to fix up this deficiency with this change, meaning that
bpftool uses the same textual representations as libbpf. Supporting
tests, completion scripts, and man pages have been adjusted accordingly.
However, we did add support for accepting (the now undocumented)
original attach type names when they are provided by users.

For the test (test_bpftool_synctypes.py), I have removed the enum
representation checks, because we no longer mirror the various enum
variant names in bpftool source code. For the man page, help text, and
completion script checks we are now using enum definitions from
uapi/linux/bpf.h as the source of truth directly.

Signed-off-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220523230428.3077108-10-deso@posteo.net
---
 tools/bpf/bpftool/common.c | 82 ++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 46 deletions(-)

(limited to 'tools/bpf/bpftool/common.c')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c740142c24d8..a45b42ee8ab0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -31,52 +31,6 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
-const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
-	[BPF_CGROUP_INET_INGRESS]	= "ingress",
-	[BPF_CGROUP_INET_EGRESS]	= "egress",
-	[BPF_CGROUP_INET_SOCK_CREATE]	= "sock_create",
-	[BPF_CGROUP_INET_SOCK_RELEASE]	= "sock_release",
-	[BPF_CGROUP_SOCK_OPS]		= "sock_ops",
-	[BPF_CGROUP_DEVICE]		= "device",
-	[BPF_CGROUP_INET4_BIND]		= "bind4",
-	[BPF_CGROUP_INET6_BIND]		= "bind6",
-	[BPF_CGROUP_INET4_CONNECT]	= "connect4",
-	[BPF_CGROUP_INET6_CONNECT]	= "connect6",
-	[BPF_CGROUP_INET4_POST_BIND]	= "post_bind4",
-	[BPF_CGROUP_INET6_POST_BIND]	= "post_bind6",
-	[BPF_CGROUP_INET4_GETPEERNAME]	= "getpeername4",
-	[BPF_CGROUP_INET6_GETPEERNAME]	= "getpeername6",
-	[BPF_CGROUP_INET4_GETSOCKNAME]	= "getsockname4",
-	[BPF_CGROUP_INET6_GETSOCKNAME]	= "getsockname6",
-	[BPF_CGROUP_UDP4_SENDMSG]	= "sendmsg4",
-	[BPF_CGROUP_UDP6_SENDMSG]	= "sendmsg6",
-	[BPF_CGROUP_SYSCTL]		= "sysctl",
-	[BPF_CGROUP_UDP4_RECVMSG]	= "recvmsg4",
-	[BPF_CGROUP_UDP6_RECVMSG]	= "recvmsg6",
-	[BPF_CGROUP_GETSOCKOPT]		= "getsockopt",
-	[BPF_CGROUP_SETSOCKOPT]		= "setsockopt",
-	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
-	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
-	[BPF_SK_SKB_VERDICT]		= "sk_skb_verdict",
-	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
-	[BPF_LIRC_MODE2]		= "lirc_mode2",
-	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
-	[BPF_TRACE_RAW_TP]		= "raw_tp",
-	[BPF_TRACE_FENTRY]		= "fentry",
-	[BPF_TRACE_FEXIT]		= "fexit",
-	[BPF_MODIFY_RETURN]		= "mod_ret",
-	[BPF_LSM_MAC]			= "lsm_mac",
-	[BPF_SK_LOOKUP]			= "sk_lookup",
-	[BPF_TRACE_ITER]		= "trace_iter",
-	[BPF_XDP_DEVMAP]		= "xdp_devmap",
-	[BPF_XDP_CPUMAP]		= "xdp_cpumap",
-	[BPF_XDP]			= "xdp",
-	[BPF_SK_REUSEPORT_SELECT]	= "sk_skb_reuseport_select",
-	[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]	= "sk_skb_reuseport_select_or_migrate",
-	[BPF_PERF_EVENT]		= "perf_event",
-	[BPF_TRACE_KPROBE_MULTI]	= "trace_kprobe_multi",
-};
-
 void p_err(const char *fmt, ...)
 {
 	va_list ap;
@@ -1009,3 +963,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx)
 {
 	return k1 == k2;
 }
+
+const char *bpf_attach_type_input_str(enum bpf_attach_type t)
+{
+	switch (t) {
+	case BPF_CGROUP_INET_INGRESS:		return "ingress";
+	case BPF_CGROUP_INET_EGRESS:		return "egress";
+	case BPF_CGROUP_INET_SOCK_CREATE:	return "sock_create";
+	case BPF_CGROUP_INET_SOCK_RELEASE:	return "sock_release";
+	case BPF_CGROUP_SOCK_OPS:		return "sock_ops";
+	case BPF_CGROUP_DEVICE:			return "device";
+	case BPF_CGROUP_INET4_BIND:		return "bind4";
+	case BPF_CGROUP_INET6_BIND:		return "bind6";
+	case BPF_CGROUP_INET4_CONNECT:		return "connect4";
+	case BPF_CGROUP_INET6_CONNECT:		return "connect6";
+	case BPF_CGROUP_INET4_POST_BIND:	return "post_bind4";
+	case BPF_CGROUP_INET6_POST_BIND:	return "post_bind6";
+	case BPF_CGROUP_INET4_GETPEERNAME:	return "getpeername4";
+	case BPF_CGROUP_INET6_GETPEERNAME:	return "getpeername6";
+	case BPF_CGROUP_INET4_GETSOCKNAME:	return "getsockname4";
+	case BPF_CGROUP_INET6_GETSOCKNAME:	return "getsockname6";
+	case BPF_CGROUP_UDP4_SENDMSG:		return "sendmsg4";
+	case BPF_CGROUP_UDP6_SENDMSG:		return "sendmsg6";
+	case BPF_CGROUP_SYSCTL:			return "sysctl";
+	case BPF_CGROUP_UDP4_RECVMSG:		return "recvmsg4";
+	case BPF_CGROUP_UDP6_RECVMSG:		return "recvmsg6";
+	case BPF_CGROUP_GETSOCKOPT:		return "getsockopt";
+	case BPF_CGROUP_SETSOCKOPT:		return "setsockopt";
+	case BPF_TRACE_RAW_TP:			return "raw_tp";
+	case BPF_TRACE_FENTRY:			return "fentry";
+	case BPF_TRACE_FEXIT:			return "fexit";
+	case BPF_MODIFY_RETURN:			return "mod_ret";
+	case BPF_SK_REUSEPORT_SELECT:		return "sk_skb_reuseport_select";
+	case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:	return "sk_skb_reuseport_select_or_migrate";
+	default:	return libbpf_bpf_attach_type_str(t);
+	}
+}
-- 
cgit 


From 6b4384ff108874cf336fe2fb1633313c2c7620bf Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 10 Jun 2022 12:26:47 +0100
Subject: Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"

This reverts commit a777e18f1bcd32528ff5dfd10a6629b655b05eb8.

In commit a777e18f1bcd ("bpftool: Use libbpf 1.0 API mode instead of
RLIMIT_MEMLOCK"), we removed the rlimit bump in bpftool, because the
kernel has switched to memcg-based memory accounting. Thanks to the
LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK, we attempted to keep compatibility
with other systems and ask libbpf to raise the limit for us if
necessary.

How do we know if memcg-based accounting is supported? There is a probe
in libbpf to check this. But this probe currently relies on the
availability of a given BPF helper, bpf_ktime_get_coarse_ns(), which
landed in the same kernel version as the memory accounting change. This
works in the generic case, but it may fail, for example, if the helper
function has been backported to an older kernel. This has been observed
for Google Cloud's Container-Optimized OS (COS), where the helper is
available but rlimit is still in use. The probe succeeds, the rlimit is
not raised, and probing features with bpftool, for example, fails.

A patch was submitted [0] to update this probe in libbpf, based on what
the cilium/ebpf Go library does [1]. It would lower the soft rlimit to
0, attempt to load a BPF object, and reset the rlimit. But it may induce
some hard-to-debug flakiness if another process starts, or the current
application is killed, while the rlimit is reduced, and the approach was
discarded.

As a workaround to ensure that the rlimit bump does not depend on the
availability of a given helper, we restore the unconditional rlimit bump
in bpftool for now.

  [0] https://lore.kernel.org/bpf/20220609143614.97837-1-quentin@isovalent.com/
  [1] https://github.com/cilium/ebpf/blob/v0.9.0/rlimit/rlimit.go#L39

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Yafang Shao <laoar.shao@gmail.com>
Cc: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20220610112648.29695-2-quentin@isovalent.com
---
 tools/bpf/bpftool/common.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools/bpf/bpftool/common.c')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index a45b42ee8ab0..a0d4acd7c54a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -17,6 +17,7 @@
 #include <linux/magic.h>
 #include <net/if.h>
 #include <sys/mount.h>
+#include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/vfs.h>
 
@@ -72,6 +73,13 @@ static bool is_bpffs(char *path)
 	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
 }
 
+void set_max_rlimit(void)
+{
+	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+
+	setrlimit(RLIMIT_MEMLOCK, &rinf);
+}
+
 static int
 mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
 {
-- 
cgit 


From f0cf642c56b76dfbbb5f2be67fa180191d5ab0ef Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 29 Jun 2022 12:13:51 +0100
Subject: bpftool: Probe for memcg-based accounting before bumping rlimit

Bpftool used to bump the memlock rlimit to make sure to be able to load
BPF objects. After the kernel has switched to memcg-based memory
accounting [0] in 5.11, bpftool has relied on libbpf to probe the system
for memcg-based accounting support and for raising the rlimit if
necessary [1]. But this was later reverted, because the probe would
sometimes fail, resulting in bpftool not being able to load all required
objects [2].

Here we add a more efficient probe, in bpftool itself. We first lower
the rlimit to 0, then we attempt to load a BPF object (and finally reset
the rlimit): if the load succeeds, then memcg-based memory accounting is
supported.

This approach was earlier proposed for the probe in libbpf itself [3],
but given that the library may be used in multithreaded applications,
the probe could have undesirable consequences if one thread attempts to
lock kernel memory while memlock rlimit is at 0. Since bpftool is
single-threaded and the rlimit is process-based, this is fine to do in
bpftool itself.

This probe was inspired by the similar one from the cilium/ebpf Go
library [4].

  [0] commit 97306be45fbe ("Merge branch 'switch to memcg-based memory accounting'")
  [1] commit a777e18f1bcd ("bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK")
  [2] commit 6b4384ff1088 ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"")
  [3] https://lore.kernel.org/bpf/20220609143614.97837-1-quentin@isovalent.com/t/#u
  [4] https://github.com/cilium/ebpf/blob/v0.9.0/rlimit/rlimit.go#L39

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/bpf/20220629111351.47699-1-quentin@isovalent.com
---
 tools/bpf/bpftool/common.c | 71 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

(limited to 'tools/bpf/bpftool/common.c')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index a0d4acd7c54a..fc8172a4969a 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -13,14 +13,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
 #include <net/if.h>
 #include <sys/mount.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/vfs.h>
 
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
 #include <bpf/bpf.h>
 #include <bpf/hashmap.h>
 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
@@ -73,11 +76,73 @@ static bool is_bpffs(char *path)
 	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
 }
 
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+	struct rlimit rlim_init, rlim_cur_zero = {};
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	size_t insn_cnt = ARRAY_SIZE(insns);
+	union bpf_attr attr;
+	int prog_fd, err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = insn_cnt;
+	attr.license = ptr_to_u64("GPL");
+
+	if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+		return false;
+
+	/* Drop the soft limit to zero. We maintain the hard limit to its
+	 * current value, because lowering it would be a permanent operation
+	 * for unprivileged users.
+	 */
+	rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+		return false;
+
+	/* Do not use bpf_prog_load() from libbpf here, because it calls
+	 * bump_rlimit_memlock(), interfering with the current probe.
+	 */
+	prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = errno;
+
+	/* reset soft rlimit to its initial value */
+	setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+	if (prog_fd < 0)
+		return err == EPERM;
+
+	close(prog_fd);
+	return false;
+}
+
 void set_max_rlimit(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
+	if (known_to_need_rlimit())
+		setrlimit(RLIMIT_MEMLOCK, &rinf);
 }
 
 static int
-- 
cgit 


From 7a255ae77216237a4ce83ddea595aa4e0a812f46 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 29 Jun 2022 15:48:32 +0000
Subject: bpftool: Show also the name of type BPF_OBJ_LINK

For example, /sys/fs/bpf/maps.debug is a BPF link. When you run `bpftool map show`
to show it:

Before:

  $ bpftool map show pinned /sys/fs/bpf/maps.debug
  Error: incorrect object type: unknown

After:

  $ bpftool map show pinned /sys/fs/bpf/maps.debug
  Error: incorrect object type: link

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220629154832.56986-5-laoar.shao@gmail.com
---
 tools/bpf/bpftool/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/bpf/bpftool/common.c')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index fc8172a4969a..067e9ea59e3b 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -316,6 +316,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
 		[BPF_OBJ_UNKNOWN]	= "unknown",
 		[BPF_OBJ_PROG]		= "prog",
 		[BPF_OBJ_MAP]		= "map",
+		[BPF_OBJ_LINK]		= "link",
 	};
 
 	if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
-- 
cgit