summaryrefslogtreecommitdiff
path: root/net/core/filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c396
1 files changed, 390 insertions, 6 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 73395384afe2..7124f0fe6974 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
@@ -73,6 +74,31 @@
#include <net/lwtunnel.h>
#include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h>
+#include <net/transp_v6.h>
+#include <linux/btf_ids.h>
+
+int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
+{
+ if (in_compat_syscall()) {
+ struct compat_sock_fprog f32;
+
+ if (len != sizeof(f32))
+ return -EINVAL;
+ if (copy_from_sockptr(&f32, src, sizeof(f32)))
+ return -EFAULT;
+ memset(dst, 0, sizeof(*dst));
+ dst->len = f32.len;
+ dst->filter = compat_ptr(f32.filter);
+ } else {
+ if (len != sizeof(*dst))
+ return -EINVAL;
+ if (copy_from_sockptr(dst, src, sizeof(*dst)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -3777,7 +3803,9 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_skb_output_btf_ids[5];
+BTF_ID_LIST(bpf_skb_output_btf_ids)
+BTF_ID(struct, sk_buff)
+
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
@@ -4171,7 +4199,9 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static int bpf_xdp_output_btf_ids[5];
+BTF_ID_LIST(bpf_xdp_output_btf_ids)
+BTF_ID(struct, xdp_buff)
+
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
@@ -4289,10 +4319,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
char devname[IFNAMSIZ];
+ int val, valbool;
struct net *net;
int ifindex;
int ret = 0;
- int val;
if (!sk_fullsock(sk))
return -EINVAL;
@@ -4303,6 +4333,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
+ valbool = val ? 1 : 0;
/* Only some socketops are supported */
switch (optname) {
@@ -4361,6 +4392,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
ret = sock_bindtoindex(sk, ifindex, false);
break;
+ case SO_KEEPALIVE:
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
default:
ret = -EINVAL;
}
@@ -4421,6 +4457,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ret = tcp_set_congestion_control(sk, name, false,
reinit, true);
} else {
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (optlen != sizeof(int))
@@ -4449,6 +4486,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
tp->save_syn = val;
break;
+ case TCP_KEEPIDLE:
+ ret = tcp_sock_set_keepidle_locked(sk, val);
+ break;
+ case TCP_KEEPINTVL:
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ ret = -EINVAL;
+ else
+ tp->keepalive_intvl = val * HZ;
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ ret = -EINVAL;
+ else
+ tp->keepalive_probes = val;
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ ret = -EINVAL;
+ else
+ icsk->icsk_syn_retries = val;
+ break;
+ case TCP_USER_TIMEOUT:
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ icsk->icsk_user_timeout = val;
+ break;
default:
ret = -EINVAL;
}
@@ -5853,12 +5917,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
{
unsigned int iphdr_len;
- if (skb->protocol == cpu_to_be16(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
iphdr_len = sizeof(struct iphdr);
- else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
+ break;
+ case cpu_to_be16(ETH_P_IPV6):
iphdr_len = sizeof(struct ipv6hdr);
- else
+ break;
+ default:
return 0;
+ }
if (skb_headlen(skb) < iphdr_len)
return 0;
@@ -6119,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func)
}
const struct bpf_func_proto bpf_event_output_data_proto __weak;
+const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
static const struct bpf_func_proto *
sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -6151,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
+ case BPF_FUNC_sk_storage_get:
+ return &bpf_sk_storage_get_cg_sock_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6854,6 +6925,7 @@ static bool __sock_filter_check_attach_type(int off,
case offsetof(struct bpf_sock, priority):
switch (attach_type) {
case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_INET_SOCK_RELEASE:
goto full_access;
default:
return false;
@@ -9183,6 +9255,189 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
+
+DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
+EXPORT_SYMBOL(bpf_sk_lookup_enabled);
+
+BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
+ struct sock *, sk, u64, flags)
+{
+ if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
+ BPF_SK_LOOKUP_F_NO_REUSEPORT)))
+ return -EINVAL;
+ if (unlikely(sk && sk_is_refcounted(sk)))
+ return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
+ if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
+ return -ESOCKTNOSUPPORT; /* reject connected sockets */
+
+ /* Check if socket is suitable for packet L3/L4 protocol */
+ if (sk && sk->sk_protocol != ctx->protocol)
+ return -EPROTOTYPE;
+ if (sk && sk->sk_family != ctx->family &&
+ (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
+ return -EAFNOSUPPORT;
+
+ if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
+ return -EEXIST;
+
+ /* Select socket as lookup result */
+ ctx->selected_sk = sk;
+ ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
+ .func = bpf_sk_lookup_assign,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_perf_event_output:
+ return &bpf_event_output_data_proto;
+ case BPF_FUNC_sk_assign:
+ return &bpf_sk_lookup_assign_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static bool sk_lookup_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ info->reg_type = PTR_TO_SOCKET_OR_NULL;
+ return size == sizeof(__u64);
+
+ case bpf_ctx_range(struct bpf_sk_lookup, family):
+ case bpf_ctx_range(struct bpf_sk_lookup, protocol):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
+ case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+ bpf_ctx_record_field_size(info, sizeof(__u32));
+ return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
+
+ default:
+ return false;
+ }
+}
+
+static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct bpf_sk_lookup, sk):
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, selected_sk));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, family):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ family, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, protocol):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ protocol, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, remote_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.saddr, 4, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_ip4):
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ v4.daddr, 4, target_size));
+ break;
+
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ remote_ip6[0], remote_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.saddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case bpf_ctx_range_till(struct bpf_sk_lookup,
+ local_ip6[0], local_ip6[3]): {
+#if IS_ENABLED(CONFIG_IPV6)
+ int off = si->off;
+
+ off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
+ off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sk_lookup_kern, v6.daddr));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
+#else
+ *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+ break;
+ }
+ case offsetof(struct bpf_sk_lookup, remote_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ sport, 2, target_size));
+ break;
+
+ case offsetof(struct bpf_sk_lookup, local_port):
+ *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+ bpf_target_off(struct bpf_sk_lookup_kern,
+ dport, 2, target_size));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_prog_ops sk_lookup_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+ .get_func_proto = sk_lookup_func_proto,
+ .is_valid_access = sk_lookup_is_valid_access,
+ .convert_ctx_access = sk_lookup_convert_ctx_access,
+};
+
#endif /* CONFIG_INET */
DEFINE_BPF_DISPATCHER(xdp)
@@ -9191,3 +9446,132 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
+
+#ifdef CONFIG_DEBUG_INFO_BTF
+BTF_ID_LIST_GLOBAL(btf_sock_ids)
+#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+#else
+u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
+#endif
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+ int i;
+
+ /* only one argument, no need to check arg */
+ for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
+ if (btf_sock_ids[i] == btf_id)
+ return true;
+ return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+ /* tcp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct tcp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+ .func = bpf_skc_to_tcp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
+{
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
+ .func = bpf_skc_to_tcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
+ .func = bpf_skc_to_tcp_timewait_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
+ .func = bpf_skc_to_tcp_request_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
+};
+
+BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
+{
+ /* udp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct udp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+ sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
+ .func = bpf_skc_to_udp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
+};