summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorJakub Sitnicki <jakub@cloudflare.com>2020-07-17 12:35:25 +0200
committerAlexei Starovoitov <ast@kernel.org>2020-07-17 20:18:16 -0700
commit1559b4aa1db443096af493c7d621dc156054babe (patch)
treeecbdcacb6be084e01d4e8fe05b39a2789d540e63 /include/linux
parent80b373f74f9e28b0093930a6b95c929732f02512 (diff)
inet: Run SK_LOOKUP BPF program on socket lookup
Run a BPF program before looking up a listening socket on the receive path. Program selects a listening socket to yield as result of socket lookup by calling bpf_sk_assign() helper and returning SK_PASS code. Program can revert its decision by assigning a NULL socket with bpf_sk_assign(). Alternatively, BPF program can also fail the lookup by returning with SK_DROP, or let the lookup continue as usual with SK_PASS on return, when no socket has been selected with bpf_sk_assign(). This lets the user match packets with listening sockets freely at the last possible point on the receive path, where we know that packets are destined for local delivery after undergoing policing, filtering, and routing. With BPF code selecting the socket, directing packets destined to an IP range or to a port range to a single socket becomes possible. In case multiple programs are attached, they are run in series in the order in which they were attached. The end result is determined from return codes of all the programs according to following rules: 1. If any program returned SK_PASS and selected a valid socket, the socket is used as result of socket lookup. 2. If more than one program returned SK_PASS and selected a socket, last selection takes effect. 3. If any program returned SK_DROP, and no program returned SK_PASS and selected a socket, socket lookup fails with -ECONNREFUSED. 4. If all programs returned SK_PASS and none of them selected a socket, socket lookup continues to htable-based lookup. Suggested-by: Marek Majkowski <marek@cloudflare.com> Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200717103536.397595-5-jakub@cloudflare.com
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/filter.h91
1 files changed, 91 insertions, 0 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index fa1ea12ad2cd..c4f54c216347 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1295,4 +1295,95 @@ struct bpf_sk_lookup_kern {
bool no_reuseport;
};
+extern struct static_key_false bpf_sk_lookup_enabled;
+
+/* Runners for BPF_SK_LOOKUP programs to invoke on socket lookup.
+ *
+ * Allowed return values for a BPF SK_LOOKUP program are SK_PASS and
+ * SK_DROP. Their meaning is as follows:
+ *
+ * SK_PASS && ctx.selected_sk != NULL: use selected_sk as lookup result
+ * SK_PASS && ctx.selected_sk == NULL: continue to htable-based socket lookup
+ * SK_DROP : terminate lookup with -ECONNREFUSED
+ *
+ * This macro aggregates return values and selected sockets from
+ * multiple BPF programs according to following rules in order:
+ *
+ * 1. If any program returned SK_PASS and a non-NULL ctx.selected_sk,
+ * macro result is SK_PASS and last ctx.selected_sk is used.
+ * 2. If any program returned SK_DROP return value,
+ * macro result is SK_DROP.
+ * 3. Otherwise result is SK_PASS and ctx.selected_sk is NULL.
+ *
+ * Caller must ensure that the prog array is non-NULL, and that the
+ * array as well as the programs it contains remain valid.
+ */
+#define BPF_PROG_SK_LOOKUP_RUN_ARRAY(array, ctx, func) \
+ ({ \
+ struct bpf_sk_lookup_kern *_ctx = &(ctx); \
+ struct bpf_prog_array_item *_item; \
+ struct sock *_selected_sk = NULL; \
+ bool _no_reuseport = false; \
+ struct bpf_prog *_prog; \
+ bool _all_pass = true; \
+ u32 _ret; \
+ \
+ migrate_disable(); \
+ _item = &(array)->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ /* restore most recent selection */ \
+ _ctx->selected_sk = _selected_sk; \
+ _ctx->no_reuseport = _no_reuseport; \
+ \
+ _ret = func(_prog, _ctx); \
+ if (_ret == SK_PASS && _ctx->selected_sk) { \
+ /* remember last non-NULL socket */ \
+ _selected_sk = _ctx->selected_sk; \
+ _no_reuseport = _ctx->no_reuseport; \
+ } else if (_ret == SK_DROP && _all_pass) { \
+ _all_pass = false; \
+ } \
+ _item++; \
+ } \
+ _ctx->selected_sk = _selected_sk; \
+ _ctx->no_reuseport = _no_reuseport; \
+ migrate_enable(); \
+ _all_pass || _selected_sk ? SK_PASS : SK_DROP; \
+ })
+
+static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
+ const __be32 saddr, const __be16 sport,
+ const __be32 daddr, const u16 dport,
+ struct sock **psk)
+{
+ struct bpf_prog_array *run_array;
+ struct sock *selected_sk = NULL;
+ bool no_reuseport = false;
+
+ rcu_read_lock();
+ run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]);
+ if (run_array) {
+ struct bpf_sk_lookup_kern ctx = {
+ .family = AF_INET,
+ .protocol = protocol,
+ .v4.saddr = saddr,
+ .v4.daddr = daddr,
+ .sport = sport,
+ .dport = dport,
+ };
+ u32 act;
+
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ if (act == SK_PASS) {
+ selected_sk = ctx.selected_sk;
+ no_reuseport = ctx.no_reuseport;
+ } else {
+ selected_sk = ERR_PTR(-ECONNREFUSED);
+ }
+ }
+ rcu_read_unlock();
+ *psk = selected_sk;
+ return no_reuseport;
+}
+
#endif /* __LINUX_FILTER_H__ */