summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-10-19 13:49:35 -0700
committerAlexei Starovoitov <ast@kernel.org>2018-10-19 13:49:35 -0700
commitd375e344781d099697cdc0fb4ee8b9170558f272 (patch)
treec8a67095d219b652a8eea067188bff6c599e6881
parent2929ad29a3015120ab64c86f362fc2a590ceb69c (diff)
parent2cb494a36c98279c5c6ce8e99cf9776f15449ade (diff)
Merge branch 'cg_skb_direct_pkt_access'
Song Liu says: ==================== Changes v7 -> v8: 1. Dynamically allocate the dummy sk to avoid race conditions. Changes v6 -> v7: 1. Make dummy sk a global variable (test_run_sk). Changes v5 -> v6: 1. Fixed dummy sk in bpf_prog_test_run_skb() as suggested by Eric Dumazet. Changes v4 -> v5: 1. Replaced bpf_compute_and_save_data_pointers() with bpf_compute_and_save_data_end(); Replaced bpf_restore_data_pointers() with bpf_restore_data_end(). 2. Fixed indentation in test_verifier.c Changes v3 -> v4: 1. Fixed crash issue reported by Alexei. Changes v2 -> v3: 1. Added helper function bpf_compute_and_save_data_pointers() and bpf_restore_data_pointers(). Changes v1 -> v2: 1. Updated the list of read-only fields, and read-write fields. 2. Added dummy sk to bpf_prog_test_run_skb(). This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access some __skb_buff data directly. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/filter.h21
-rw-r--r--kernel/bpf/cgroup.c6
-rw-r--r--net/bpf/test_run.c15
-rw-r--r--net/core/filter.c36
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c171
5 files changed, 248 insertions, 1 deletions
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
cb->data_end = skb->data + skb_headlen(skb);
}
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+ struct sk_buff *skb, void **saved_data_end)
+{
+ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+ *saved_data_end = cb->data_end;
+ cb->data_end = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+ struct sk_buff *skb, void *saved_data_end)
+{
+ struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+ cb->data_end = saved_data_end;
+}
+
static inline u8 *bpf_skb_cb(struct sk_buff *skb)
{
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
{
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
+ void *saved_data_end;
struct cgroup *cgrp;
int ret;
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+ /* compute pointers for the bpf prog */
+ bpf_compute_and_save_data_end(skb, &saved_data_end);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
bpf_prog_run_save_cb);
+ bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..c89c22c49015 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,8 @@
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/sched/signal.h>
+#include <net/sock.h>
+#include <net/tcp.h>
static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+ struct sock *sk;
void *data;
int ret;
@@ -137,11 +140,21 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
+ sk = kzalloc(sizeof(struct sock), GFP_USER);
+ if (!sk) {
+ kfree(data);
+ return -ENOMEM;
+ }
+ sock_net_set(sk, current->nsproxy->net_ns);
+ sock_init_data(NULL, sk);
+
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
+ kfree(sk);
return -ENOMEM;
}
+ skb->sk = sk;
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
@@ -159,6 +172,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
kfree_skb(skb);
+ kfree(sk);
return -ENOMEM;
}
}
@@ -171,6 +185,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
size = skb_headlen(skb);
ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
kfree_skb(skb);
+ kfree(sk);
return ret;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index ea48ec789b5c..5fd5139e8638 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5352,6 +5352,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
+static bool cg_skb_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ case bpf_ctx_range(struct __sk_buff, flow_keys):
+ return false;
+ }
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, mark):
+ case bpf_ctx_range(struct __sk_buff, priority):
+ case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7044,7 +7078,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
- .is_valid_access = sk_filter_is_valid_access,
+ .is_valid_access = cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..f1ae8d09770f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4863,6 +4863,177 @@ static struct bpf_test tests[] = {
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "direct packet read test#1 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#2 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_proto)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, priority)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, priority)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff,
+ ingress_ifindex)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#3 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "direct packet read test#4 for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of tc_classid for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of data_meta for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data_meta)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid access of flow_keys for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, flow_keys)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid write access to napi_id for CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
"valid cgroup storage access",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),