summaryrefslogtreecommitdiff
path: root/net/core/filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c554
1 files changed, 470 insertions, 84 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 9dfd145eedcc..fd423ce3da34 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
return 0;
}
-static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
-{
- struct bpf_prog *old_prog;
- int err;
-
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
- return -ENOMEM;
-
- if (sk_unhashed(sk) && sk->sk_reuseport) {
- err = reuseport_alloc(sk);
- if (err)
- return err;
- } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
- /* The socket wasn't bound with SO_REUSEPORT */
- return -EINVAL;
- }
-
- old_prog = reuseport_attach_prog(sk, prog);
- if (old_prog)
- bpf_prog_destroy(old_prog);
-
- return 0;
-}
-
static
struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
{
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __reuseport_attach_prog(prog, sk);
- if (err < 0) {
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ err = -ENOMEM;
+ else
+ err = reuseport_attach_prog(sk, prog);
+
+ if (err)
__bpf_prog_release(prog);
- return err;
- }
- return 0;
+ return err;
}
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
- struct bpf_prog *prog = __get_bpf(ufd, sk);
+ struct bpf_prog *prog;
int err;
+ if (sock_flag(sk, SOCK_FILTER_LOCKED))
+ return -EPERM;
+
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
if (IS_ERR(prog))
return PTR_ERR(prog);
- err = __reuseport_attach_prog(prog, sk);
- if (err < 0) {
- bpf_prog_put(prog);
- return err;
+ if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
+ /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
+ * bpf prog (e.g. sockmap). It depends on the
+ * limitation imposed by bpf_prog_load().
+ * Hence, sysctl_optmem_max is not checked.
+ */
+ if ((sk->sk_type != SOCK_STREAM &&
+ sk->sk_type != SOCK_DGRAM) ||
+ (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_TCP) ||
+ (sk->sk_family != AF_INET &&
+ sk->sk_family != AF_INET6)) {
+ err = -ENOTSUPP;
+ goto err_prog_put;
+ }
+ } else {
+ /* BPF_PROG_TYPE_SOCKET_FILTER */
+ if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ err = -ENOMEM;
+ goto err_prog_put;
+ }
}
- return 0;
+ err = reuseport_attach_prog(sk, prog);
+err_prog_put:
+ if (err)
+ bpf_prog_put(prog);
+
+ return err;
+}
+
+void sk_reuseport_prog_free(struct bpf_prog *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+ bpf_prog_put(prog);
+ else
+ bpf_prog_destroy(prog);
}
struct bpf_scratchpad {
@@ -2082,19 +2099,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
-struct redirect_info {
- u32 ifindex;
- u32 flags;
- struct bpf_map *map;
- struct bpf_map *map_to_flush;
- unsigned long map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags & ~(BPF_F_INGRESS)))
return TC_ACT_SHOT;
@@ -2107,7 +2117,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
int skb_do_redirect(struct sk_buff *skb)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *dev;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
@@ -3200,7 +3210,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
void xdp_do_flush_map(void)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
@@ -3245,7 +3255,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
@@ -3285,7 +3295,7 @@ err:
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct net_device *fwd;
u32 index = ri->ifindex;
int err;
@@ -3317,7 +3327,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
@@ -3368,7 +3378,7 @@ err:
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
u32 index = ri->ifindex;
struct net_device *fwd;
int err = 0;
@@ -3399,7 +3409,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags))
return XDP_ABORTED;
@@ -3423,7 +3433,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
unsigned long, map_owner)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
if (unlikely(flags))
return XDP_ABORTED;
@@ -3681,7 +3691,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
if (unlikely(size > IP_TUNNEL_OPTS_MAX))
return -ENOMEM;
- ip_tunnel_info_opts_set(info, from, size);
+ ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
return 0;
}
@@ -3768,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
+
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+ ancestor_level)
+{
+ struct sock *sk = skb_to_full_sk(skb);
+ struct cgroup *ancestor;
+ struct cgroup *cgrp;
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ ancestor = cgroup_ancestor(cgrp, ancestor_level);
+ if (!ancestor)
+ return 0;
+
+ return ancestor->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
+ .func = bpf_skb_ancestor_cgroup_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
#endif
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -3814,6 +3850,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
+{
+ return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
+ .func = bpf_get_socket_cookie_sock_addr,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+ return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
+ .func = bpf_get_socket_cookie_sock_ops,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4544,26 +4604,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_tlvs, *srh_end, *ptr;
- struct ipv6_sr_hdr *srh;
int srhoff = 0;
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ if (srh == NULL)
return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
ptr = skb->data + offset;
if (ptr >= srh_tlvs && ptr + len <= srh_end)
- srh_state->valid = 0;
+ srh_state->valid = false;
else if (ptr < (void *)&srh->flags ||
ptr + len > (void *)&srh->segments)
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
memcpy(skb->data + offset, from, len);
return 0;
@@ -4579,52 +4641,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE
};
-BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
- u32, action, void *, param, u32, param_len)
+static void bpf_update_srh_state(struct sk_buff *skb)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
- struct ipv6_sr_hdr *srh;
int srhoff = 0;
- int err;
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
- return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
-
- if (!srh_state->valid) {
- if (unlikely((srh_state->hdrlen & 7) != 0))
- return -EBADMSG;
-
- srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
- if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
- return -EBADMSG;
-
- srh_state->valid = 1;
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
+ srh_state->srh = NULL;
+ } else {
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh_state->hdrlen = srh_state->srh->hdrlen << 3;
+ srh_state->valid = true;
}
+}
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+ u32, action, void *, param, u32, param_len)
+{
+ struct seg6_bpf_srh_state *srh_state =
+ this_cpu_ptr(&seg6_bpf_srh_states);
+ int hdroff = 0;
+ int err;
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
if (param_len != sizeof(struct in6_addr))
return -EINVAL;
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
case SEG6_LOCAL_ACTION_END_T:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
+ if (param_len != sizeof(int))
+ return -EINVAL;
+ return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+ case SEG6_LOCAL_ACTION_END_DT6:
+ if (!seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
if (param_len != sizeof(int))
return -EINVAL;
+
+ if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
+ return -EBADMSG;
+ if (!pskb_pull(skb, hdroff))
+ return -EBADMSG;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->encapsulation = 0;
+
+ bpf_compute_data_pointers(skb);
+ bpf_update_srh_state(skb);
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_B6:
+ if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
param, param_len);
if (!err)
- srh_state->hdrlen =
- ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ bpf_update_srh_state(skb);
+
return err;
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+ if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+ return -EBADMSG;
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
param, param_len);
if (!err)
- srh_state->hdrlen =
- ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+ bpf_update_srh_state(skb);
+
return err;
default:
return -EINVAL;
@@ -4646,15 +4734,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
+ struct ipv6_sr_hdr *srh = srh_state->srh;
void *srh_end, *srh_tlvs, *ptr;
- struct ipv6_sr_hdr *srh;
struct ipv6hdr *hdr;
int srhoff = 0;
int ret;
- if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ if (unlikely(srh == NULL))
return -EINVAL;
- srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
((srh->first_segment + 1) << 4));
@@ -4684,8 +4771,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
hdr = (struct ipv6hdr *)skb->data;
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return -EINVAL;
+ srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_state->hdrlen += len;
- srh_state->valid = 0;
+ srh_state->valid = false;
return 0;
}
@@ -4753,6 +4843,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();
+ /* else: fall through */
default:
return NULL;
}
@@ -4767,6 +4858,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
*/
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4789,6 +4882,10 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
default:
return NULL;
}
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_sock_addr_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4812,6 +4909,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
+ default:
+ return sk_filter_func_proto(func_id, prog);
+ }
+}
+
+static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -4884,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
+ case BPF_FUNC_skb_ancestor_cgroup_id:
+ return &bpf_skb_ancestor_cgroup_id_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -4931,6 +5041,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_map_update_proto;
case BPF_FUNC_sock_hash_update:
return &bpf_sock_hash_update_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_cookie_sock_ops_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4950,6 +5064,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_cork_bytes_proto;
case BPF_FUNC_msg_pull_data:
return &bpf_msg_pull_data_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4977,6 +5093,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -6781,7 +6899,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
};
const struct bpf_verifier_ops cg_skb_verifier_ops = {
- .get_func_proto = sk_filter_func_proto,
+ .get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
@@ -6940,3 +7058,271 @@ out:
release_sock(sk);
return ret;
}
+
+#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+ struct sk_buff *skb;
+ struct sock *sk;
+ struct sock *selected_sk;
+ void *data_end;
+ u32 hash;
+ u32 reuseport_id;
+ bool bind_inany;
+};
+
+static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
+ struct sock_reuseport *reuse,
+ struct sock *sk, struct sk_buff *skb,
+ u32 hash)
+{
+ reuse_kern->skb = skb;
+ reuse_kern->sk = sk;
+ reuse_kern->selected_sk = NULL;
+ reuse_kern->data_end = skb->data + skb_headlen(skb);
+ reuse_kern->hash = hash;
+ reuse_kern->reuseport_id = reuse->reuseport_id;
+ reuse_kern->bind_inany = reuse->bind_inany;
+}
+
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+ struct bpf_prog *prog, struct sk_buff *skb,
+ u32 hash)
+{
+ struct sk_reuseport_kern reuse_kern;
+ enum sk_action action;
+
+ bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+ action = BPF_PROG_RUN(prog, &reuse_kern);
+
+ if (action == SK_PASS)
+ return reuse_kern.selected_sk;
+ else
+ return ERR_PTR(-ECONNREFUSED);
+}
+
+BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+ struct bpf_map *, map, void *, key, u32, flags)
+{
+ struct sock_reuseport *reuse;
+ struct sock *selected_sk;
+
+ selected_sk = map->ops->map_lookup_elem(map, key);
+ if (!selected_sk)
+ return -ENOENT;
+
+ reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
+ if (!reuse)
+ /* selected_sk is unhashed (e.g. by close()) after the
+ * above map_lookup_elem(). Treat selected_sk has already
+ * been removed from the map.
+ */
+ return -ENOENT;
+
+ if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
+ struct sock *sk;
+
+ if (unlikely(!reuse_kern->reuseport_id))
+ /* There is a small race between adding the
+ * sk to the map and setting the
+ * reuse_kern->reuseport_id.
+ * Treat it as the sk has not been added to
+ * the bpf map yet.
+ */
+ return -ENOENT;
+
+ sk = reuse_kern->sk;
+ if (sk->sk_protocol != selected_sk->sk_protocol)
+ return -EPROTOTYPE;
+ else if (sk->sk_family != selected_sk->sk_family)
+ return -EAFNOSUPPORT;
+
+ /* Catch all. Likely bound to a different sockaddr. */
+ return -EBADFD;
+ }
+
+ reuse_kern->selected_sk = selected_sk;
+
+ return 0;
+}
+
+static const struct bpf_func_proto sk_select_reuseport_proto = {
+ .func = sk_select_reuseport,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_PTR_TO_MAP_KEY,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(sk_reuseport_load_bytes,
+ const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+ void *, to, u32, len)
+{
+ return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
+ .func = sk_reuseport_load_bytes,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(sk_reuseport_load_bytes_relative,
+ const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+ void *, to, u32, len, u32, start_header)
+{
+ return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
+ len, start_header);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
+ .func = sk_reuseport_load_bytes_relative,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_reuseport_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_sk_select_reuseport:
+ return &sk_select_reuseport_proto;
+ case BPF_FUNC_skb_load_bytes:
+ return &sk_reuseport_load_bytes_proto;
+ case BPF_FUNC_skb_load_bytes_relative:
+ return &sk_reuseport_load_bytes_relative_proto;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static bool
+sk_reuseport_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ const u32 size_default = sizeof(__u32);
+
+ if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
+ off % size || type != BPF_READ)
+ return false;
+
+ switch (off) {
+ case offsetof(struct sk_reuseport_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ return size == sizeof(__u64);
+
+ case offsetof(struct sk_reuseport_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ return size == sizeof(__u64);
+
+ case offsetof(struct sk_reuseport_md, hash):
+ return size == size_default;
+
+ /* Fields that allow narrowing */
+ case offsetof(struct sk_reuseport_md, eth_protocol):
+ if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+ return false;
+ /* fall through */
+ case offsetof(struct sk_reuseport_md, ip_protocol):
+ case offsetof(struct sk_reuseport_md, bind_inany):
+ case offsetof(struct sk_reuseport_md, len):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size, size_default);
+
+ default:
+ return false;
+ }
+}
+
+#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+ si->dst_reg, si->src_reg, \
+ bpf_target_off(struct sk_reuseport_kern, F, \
+ FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+ target_size)); \
+ })
+
+#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
+ struct sk_buff, \
+ skb, \
+ SKB_FIELD)
+
+#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
+ SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
+ struct sock, \
+ sk, \
+ SK_FIELD, BPF_SIZE, EXTRA_OFF)
+
+static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sk_reuseport_md, data):
+ SK_REUSEPORT_LOAD_SKB_FIELD(data);
+ break;
+
+ case offsetof(struct sk_reuseport_md, len):
+ SK_REUSEPORT_LOAD_SKB_FIELD(len);
+ break;
+
+ case offsetof(struct sk_reuseport_md, eth_protocol):
+ SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
+ break;
+
+ case offsetof(struct sk_reuseport_md, ip_protocol):
+ BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
+ SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
+ BPF_W, 0);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+ SK_FL_PROTO_SHIFT);
+ /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
+ * aware. No further narrowing or masking is needed.
+ */
+ *target_size = 1;
+ break;
+
+ case offsetof(struct sk_reuseport_md, data_end):
+ SK_REUSEPORT_LOAD_FIELD(data_end);
+ break;
+
+ case offsetof(struct sk_reuseport_md, hash):
+ SK_REUSEPORT_LOAD_FIELD(hash);
+ break;
+
+ case offsetof(struct sk_reuseport_md, bind_inany):
+ SK_REUSEPORT_LOAD_FIELD(bind_inany);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
+ .get_func_proto = sk_reuseport_func_proto,
+ .is_valid_access = sk_reuseport_is_valid_access,
+ .convert_ctx_access = sk_reuseport_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_reuseport_prog_ops = {
+};
+#endif /* CONFIG_INET */