From 13a9c48a85ccf1417b527975c0a12b47fbfaf625 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:51 -0800 Subject: bpf: offload: add comment warning developers about double destroy Offload state may get destroyed either because the device for which it was constructed is going away, or because the refcount of bpf program itself has reached 0. In both of those cases we will call __bpf_prog_offload_destroy() to unlink the offload from the device. We may in fact call it twice, which works just fine, but we should make clear this is intended and caution others trying to extend the function. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 2816feb38be1..fd696d3dd429 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -85,6 +85,10 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) struct bpf_dev_offload *offload = prog->aux->offload; struct netdev_bpf data = {}; + /* Caution - if netdev is destroyed before the program, this function + * will be called twice. + */ + data.offload.prog = prog; if (offload->verifier_running) -- cgit From 649f11dcd19a5f0d00fdbc760fbdccdd98e56a43 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:52 -0800 Subject: bpf: offload: limit offload to cls_bpf and xdp programs only We are currently only allowing attachment of device-bound cls_bpf and XDP programs. Make this restriction explicit in the BPF offload code. This way we can potentially reuse the ifindex field in the future. Since XDP and cls_bpf programs can only be loaded by admin, we can drop the explicit capability check from offload code. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index fd696d3dd429..ac187f9ee182 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -14,8 +14,9 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) struct net *net = current->nsproxy->net_ns; struct bpf_dev_offload *offload; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && + attr->prog_type != BPF_PROG_TYPE_XDP) + return -EINVAL; if (attr->prog_flags) return -EINVAL; -- cgit From 1f6f4cb7ba219b00a3fa9afe8049fa16444d8b52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:53 -0800 Subject: bpf: offload: rename the ifindex field bpf_target_prog seems long and clunky, rename it to prog_ifindex. We don't want to call this field just ifindex, because maps may need a similar field in the future and bpf_attr members for programs and maps are unnamed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 2 +- kernel/bpf/offload.c | 2 +- kernel/bpf/syscall.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e880ae6434ee..3f626df42516 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -262,7 +262,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ + __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index ac187f9ee182..a778e5df7e26 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -29,7 +29,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) init_waitqueue_head(&offload->verifier_done); rtnl_lock(); - offload->netdev = __dev_get_by_index(net, attr->prog_target_ifindex); + offload->netdev = __dev_get_by_index(net, attr->prog_ifindex); if (!offload->netdev) { rtnl_unlock(); kfree(offload); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 09badc37e864..8e9d065bb7cd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1118,7 +1118,7 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex +#define BPF_PROG_LOAD_LAST_FIELD prog_ifindex static int bpf_prog_load(union bpf_attr *attr) { @@ -1181,7 +1181,7 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (attr->prog_target_ifindex) { + if (attr->prog_ifindex) { err = bpf_prog_offload_init(prog, attr); if (err) goto free_prog; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e880ae6434ee..3f626df42516 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -262,7 +262,7 @@ union bpf_attr { __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; - __u32 prog_target_ifindex; /* ifindex of netdev to prep for */ + __u32 prog_ifindex; /* ifindex of netdev to prep for */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit From 288b3de55aace830f13280985ec9e6bcbff33b1b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:54 -0800 Subject: bpf: offload: move offload device validation out to the drivers With TC shared block changes we can't depend on correct netdev pointer being available in cls_bpf. Move the device validation to the driver. Core will only make sure that offloaded programs are always attached in the driver (or in HW by the driver). We trust that drivers which implement offload callbacks will perform necessary checks. Moving the checks to the driver is generally a useful thing, in practice the check should be against a switchdev instance, not a netdev, given that most ASICs will probably allow using the same program on many ports. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Jiri Pirko Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 10 ++++++++-- include/linux/bpf.h | 4 ++-- kernel/bpf/syscall.c | 23 ++++++++++++----------- net/core/dev.c | 7 ++----- net/sched/cls_bpf.c | 8 +++----- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index b6cee71f49d3..bc879aeb62d4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -214,8 +214,14 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, { int err; - if (prog && !prog->aux->offload) - return -EINVAL; + if (prog) { + struct bpf_dev_offload *offload = prog->aux->offload; + + if (!offload) + return -EINVAL; + if (offload->netdev != nn->dp.netdev) + return -EINVAL; + } if (prog && old_prog) { u8 cap; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c397934f91dd..f82be640731e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -336,7 +336,7 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - struct net_device *netdev); + bool attach_drv); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -433,7 +433,7 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - struct net_device *netdev) + bool attach_drv) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8e9d065bb7cd..38da55905ab0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1057,22 +1057,23 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static bool bpf_prog_can_attach(struct bpf_prog *prog, - enum bpf_prog_type *attach_type, - struct net_device *netdev) +static bool bpf_prog_get_ok(struct bpf_prog *prog, + enum bpf_prog_type *attach_type, bool attach_drv) { - struct bpf_dev_offload *offload = prog->aux->offload; + /* not an attachment, just a refcount inc, always allow */ + if (!attach_type) + return true; if (prog->type != *attach_type) return false; - if (offload && offload->netdev != netdev) + if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) return false; return true; } static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, - struct net_device *netdev) + bool attach_drv) { struct fd f = fdget(ufd); struct bpf_prog *prog; @@ -1080,7 +1081,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, prog = ____bpf_prog_get(f); if (IS_ERR(prog)) return prog; - if (attach_type && !bpf_prog_can_attach(prog, attach_type, netdev)) { + if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { prog = ERR_PTR(-EINVAL); goto out; } @@ -1093,12 +1094,12 @@ out: struct bpf_prog *bpf_prog_get(u32 ufd) { - return __bpf_prog_get(ufd, NULL, NULL); + return __bpf_prog_get(ufd, NULL, false); } struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type, NULL); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, false); if (!IS_ERR(prog)) trace_bpf_prog_get_type(prog); @@ -1107,9 +1108,9 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - struct net_device *netdev) + bool attach_drv) { - struct bpf_prog *prog = __bpf_prog_get(ufd, &type, netdev); + struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv); if (!IS_ERR(prog)) trace_bpf_prog_get_type(prog); diff --git a/net/core/dev.c b/net/core/dev.c index 8ee29f4f5fa9..09525a27319c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7139,11 +7139,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - if (bpf_op == ops->ndo_bpf) - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - dev); - else - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + bpf_op == ops->ndo_bpf); if (IS_ERR(prog)) return PTR_ERR(prog); } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fb680dafac5a..a9f3e317055c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -382,15 +382,13 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, { struct bpf_prog *fp; char *name = NULL; + bool skip_sw; u32 bpf_fd; bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); + skip_sw = gen_flags & TCA_CLS_FLAGS_SKIP_SW; - if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) - fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, - qdisc_dev(tp->q)); - else - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, skip_sw); if (IS_ERR(fp)) return PTR_ERR(fp); -- cgit From 441a33031fe5a3e828fbc17a4f9a5bab9143243c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:55 -0800 Subject: net: xdp: don't allow device-bound programs in driver mode Currently device-bound programs are not able to run on the host to save resources (host JIT is not invoked). Don't allow XDP programs to be attached without the HW_MODE flag. In theory if program is already translated for device offload the driver should choose to offload it instead of loading it in the driver. However, offloading translated program may still fail resulting in device-bound program being run on the host. Prevent this by refusing to attach device bound programs if XDP_FLAGS_HW_MODE is not set. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 09525a27319c..5e2ba133fba7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7143,6 +7143,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_op == ops->ndo_bpf); if (IS_ERR(prog)) return PTR_ERR(prog); + + if (!(flags & XDP_FLAGS_HW_MODE) && + bpf_prog_is_dev_bound(prog->aux)) { + NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); + bpf_prog_put(prog); + return -EINVAL; + } } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); -- cgit From 479321e9c31a6c05426790b11888427400f75ac8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:56 -0800 Subject: bpf: turn bpf_prog_get_type() into a wrapper bpf_prog_get_type() is identical to bpf_prog_get_type_dev(), with false passed as attach_drv. Instead of keeping it as an exported symbol turn it into static inline wrapper. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 13 ++++++------- kernel/bpf/syscall.c | 10 ---------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f82be640731e..37bbab8c0f56 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -334,7 +334,6 @@ extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); -struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); @@ -425,12 +424,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, - enum bpf_prog_type type) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) @@ -514,6 +507,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } #endif /* CONFIG_BPF_SYSCALL */ +static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, + enum bpf_prog_type type) +{ + return bpf_prog_get_type_dev(ufd, type, false); +} + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 38da55905ab0..41509cf825d8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1097,16 +1097,6 @@ struct bpf_prog *bpf_prog_get(u32 ufd) return __bpf_prog_get(ufd, NULL, false); } -struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) -{ - struct bpf_prog *prog = __bpf_prog_get(ufd, &type, false); - - if (!IS_ERR(prog)) - trace_bpf_prog_get_type(prog); - return prog; -} -EXPORT_SYMBOL_GPL(bpf_prog_get_type); - struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) { -- cgit From 62c71b45e8537b8cb746cc929ea05ba0258e0b5a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:57 -0800 Subject: bpf: offload: ignore namespace moves We are currently destroying the device offload state when device moves to another net namespace. This doesn't break with current NFP code, because offload state is not used on program removal, but it's not correct behaviour. Ignore the device unregister notifications on namespace move. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index a778e5df7e26..d4267c674fec 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -174,6 +174,10 @@ static int bpf_offload_notification(struct notifier_block *notifier, switch (event) { case NETDEV_UNREGISTER: + /* ignore namespace changes */ + if (netdev->reg_state != NETREG_UNREGISTERING) + break; + list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads) { if (offload->netdev == netdev) -- cgit From 51aa423959b0ab62169c98b90566a0628ba096b8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:58 -0800 Subject: bpftool: revert printing program device bound info This reverts commit 928631e05495 ("bpftool: print program device bound info"). We will remove this API and redo it right in -next. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 31 ------------------------------- tools/include/uapi/linux/bpf.h | 6 ------ 2 files changed, 37 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f45c44ef9bec..ad619b96c276 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -230,21 +229,6 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); - if (info->status & BPF_PROG_STATUS_DEV_BOUND) { - jsonw_name(json_wtr, "dev"); - if (info->ifindex) { - char name[IF_NAMESIZE]; - - if (!if_indextoname(info->ifindex, name)) - jsonw_printf(json_wtr, "\"ifindex:%d\"", - info->ifindex); - else - jsonw_printf(json_wtr, "\"%s\"", name); - } else { - jsonw_printf(json_wtr, "\"unknown\""); - } - } - if (info->load_time) { char buf[32]; @@ -302,21 +286,6 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); - printf(" "); - - if (info->status & BPF_PROG_STATUS_DEV_BOUND) { - printf("dev "); - if (info->ifindex) { - char name[IF_NAMESIZE]; - - if (!if_indextoname(info->ifindex, name)) - printf("ifindex:%d ", info->ifindex); - else - printf("%s ", name); - } else { - printf("unknown "); - } - } printf("\n"); if (info->load_time) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3f626df42516..4c223ab30293 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -897,10 +897,6 @@ enum sk_action { #define BPF_TAG_SIZE 8 -enum bpf_prog_status { - BPF_PROG_STATUS_DEV_BOUND = (1 << 0), -}; - struct bpf_prog_info { __u32 type; __u32 id; @@ -914,8 +910,6 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { -- cgit From 1ee640095f049e7ac4ec36b985abada497b98cc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:59 -0800 Subject: bpf: revert report offload info to user space This reverts commit bd601b6ada11 ("bpf: report offload info to user space"). The ifindex by itself is not sufficient, we should provide information on which network namespace this ifindex belongs to. After considering some options we concluded that it's best to just remove this API for now, and rework it in -next. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 - include/uapi/linux/bpf.h | 6 ------ kernel/bpf/offload.c | 12 ------------ kernel/bpf/syscall.c | 5 ----- 4 files changed, 24 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 37bbab8c0f56..76c577281d78 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -515,7 +515,6 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); -u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3f626df42516..4c223ab30293 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -897,10 +897,6 @@ enum sk_action { #define BPF_TAG_SIZE 8 -enum bpf_prog_status { - BPF_PROG_STATUS_DEV_BOUND = (1 << 0), -}; - struct bpf_prog_info { __u32 type; __u32 id; @@ -914,8 +910,6 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; - __u32 ifindex; - __u32 status; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index d4267c674fec..68ec884440b7 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -149,18 +149,6 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) return bpf_prog_offload_translate(prog); } -u32 bpf_prog_offload_ifindex(struct bpf_prog *prog) -{ - struct bpf_dev_offload *offload = prog->aux->offload; - u32 ifindex; - - rtnl_lock(); - ifindex = offload->netdev ? offload->netdev->ifindex : 0; - rtnl_unlock(); - - return ifindex; -} - const struct bpf_prog_ops bpf_offload_prog_ops = { }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 41509cf825d8..2c4cfeaa8d5e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1616,11 +1616,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } - if (bpf_prog_is_dev_bound(prog->aux)) { - info.status |= BPF_PROG_STATUS_DEV_BOUND; - info.ifindex = bpf_prog_offload_ifindex(prog); - } - done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) -- cgit From 1438019479349d262b76f8767ace3273d11b6dcb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:22:00 -0800 Subject: bpf: make bpf_prog_offload_verifier_prep() static inline Header implementation of bpf_prog_offload_verifier_prep() which is used if CONFIG_NET=n should be a static inline. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 07b96aaca256..b61482d354a2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -171,7 +171,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); #else -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { return -EOPNOTSUPP; } -- cgit From f1a8b8e3d98b0b3d1d88d119f52cc85d05ab3189 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 21 Nov 2017 11:23:40 -0800 Subject: bpf: change bpf_probe_write_user to bpf_trace_printk in test_verifier There are four tests in test_verifier using bpf_probe_write_user helper. These four tests will emit the following kernel messages [ 12.974753] test_verifier[220] is installing a program with bpf_probe_write_user helper that may corrupt user memory! [ 12.979285] test_verifier[220] is installing a program with bpf_probe_write_user helper that may corrupt user memory! ...... This may confuse certain users. This patch replaces bpf_probe_write_user with bpf_trace_printk. The test_verifier already uses bpf_trace_printk earlier in the test and a trace_printk warning message has been printed. So this patch does not emit any more kernel messages. Fixes: b6ff63911232 ("bpf: fix and add test cases for ARG_CONST_SIZE_OR_ZERO semantics change") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 39 ++++++++++++----------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index bf092b83e453..2a5267bef160 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -4377,11 +4377,10 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_write_user), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, @@ -4481,14 +4480,12 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_write_user), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, @@ -4618,18 +4615,16 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_write_user), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R2 min value is outside of the array range", + .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4760,20 +4755,18 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct test_val, foo), 4), + offsetof(struct test_val, foo), 3), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_write_user), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R2 min value is outside of the array range", + .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, -- cgit From db1ac4964fa172803a0fea83033cd35d380a8a77 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:53 +0000 Subject: bpf: introduce ARG_PTR_TO_MEM_OR_NULL With the current ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM semantics, an helper argument can be NULL when the next argument type is ARG_CONST_SIZE_OR_ZERO and the verifier can prove the value of this next argument is 0. However, most helpers are just interested in handling , so forcing them to deal with makes the implementation of those helpers more complicated for no apparent benefits, requiring them to explicitly handle those corner cases with checks that bpf programs could start relying upon, preventing the possibility of removing them later. Solve this by making ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM never accept NULL even when ARG_CONST_SIZE_OR_ZERO is set, and introduce a new argument type ARG_PTR_TO_MEM_OR_NULL to explicitly deal with the NULL case. Currently, the only helper that needs this is bpf_csum_diff_proto(), so change arg1 and arg3 to this new type as well. Also add a new battery of tests that explicitly test the !ARG_PTR_TO_MEM_OR_NULL combination: all the current ones testing the various variations are focused on bpf_csum_diff, so cover also other helpers. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + kernel/bpf/verifier.c | 4 +- net/core/filter.c | 4 +- tools/testing/selftests/bpf/test_verifier.c | 113 ++++++++++++++++++++++++++-- 4 files changed, 112 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76c577281d78..e55e4255a210 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -78,6 +78,7 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, * helper function must fill all bytes or clear * them in error case. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dd54d20ace2f..308b0638ec5d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1384,13 +1384,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (type != expected_type) goto err_type; } else if (arg_type == ARG_PTR_TO_MEM || + arg_type == ARG_PTR_TO_MEM_OR_NULL || arg_type == ARG_PTR_TO_UNINIT_MEM) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a SCALAR_VALUE type. Final test * happens during stack boundary checking. */ - if (register_is_null(*reg)) + if (register_is_null(*reg) && + arg_type == ARG_PTR_TO_MEM_OR_NULL) /* final test in check_stack_boundary() */; else if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && diff --git a/net/core/filter.c b/net/core/filter.c index 1afa17935954..6a85e67fafce 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1646,9 +1646,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .gpl_only = false, .pkt_access = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_MEM, + .arg1_type = ARG_PTR_TO_MEM_OR_NULL, .arg2_type = ARG_CONST_SIZE_OR_ZERO, - .arg3_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_PTR_TO_MEM_OR_NULL, .arg4_type = ARG_CONST_SIZE_OR_ZERO, .arg5_type = ARG_ANYTHING, }; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2a5267bef160..3c64f30cf63c 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -5631,7 +5631,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { - "helper access to variable memory: size = 0 allowed on NULL", + "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_2, 0), @@ -5645,7 +5645,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size > 0 not allowed on NULL", + "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_2, 0), @@ -5663,7 +5663,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size = 0 allowed on != NULL stack pointer", + "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), @@ -5680,7 +5680,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size = 0 allowed on != NULL map pointer", + "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5702,7 +5702,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer", + "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5727,7 +5727,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size possible = 0 allowed on != NULL map pointer", + "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5750,7 +5750,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer", + "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -5771,6 +5771,105 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=inv expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=inv expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, { "helper access to variable memory: 8 bytes leak", .insns = { -- cgit From eb33f2cca49ec49a1b893b5af546e7c042ca6365 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:54 +0000 Subject: bpf: remove explicit handling of 0 for arg2 in bpf_probe_read Commit 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO") changed arg2 type to ARG_CONST_SIZE_OR_ZERO to simplify writing bpf programs by taking advantage of the new semantics introduced for ARG_CONST_SIZE_OR_ZERO which allows arguments. In order to prevent the helper from actually passing a NULL pointer to probe_kernel_read, which can happen when is passed to the helper, the commit also introduced an explicit check against size == 0. After the recent introduction of the ARG_PTR_TO_MEM_OR_NULL type, bpf_probe_read can not receive a pair of arguments anymore, thus the check is not needed anymore and can be removed, since probe_kernel_read can correctly handle a call. This also fixes the semantics of the helper before it gets officially released and bpf programs start relying on this check. Fixes: 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO") Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a5580c670866..728909f7951c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,16 +78,12 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - int ret = 0; - - if (unlikely(size == 0)) - goto out; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); - out: return ret; } -- cgit From 5c4e1201740ceae9bd6f622851a9bf7c66debe3a Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:55 +0000 Subject: bpf: change bpf_probe_read_str arg2 type to ARG_CONST_SIZE_OR_ZERO Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way the compiler generates optimized BPF code when checking boundaries of an argument from C code. A typical example of this optimized code can be generated using the bpf_probe_read_str helper when operating on variable memory: /* len is a generic scalar */ if (len > 0 && len <= 0x7fff) bpf_probe_read_str(p, len, s); 251: (79) r1 = *(u64 *)(r10 -88) 252: (07) r1 += -1 253: (25) if r1 > 0x7ffe goto pc-42 254: (bf) r1 = r7 255: (79) r2 = *(u64 *)(r10 -88) 256: (bf) r8 = r4 257: (85) call bpf_probe_read_str#45 R2 min value is negative, either use unsigned or 'var &= const' With this code, the verifier loses track of the variable. Replacing arg2 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it avoids this quite common case which leads to usability issues, and the compiler generates code that the verifier can more easily test: if (len <= 0x7fff) bpf_probe_read_str(p, len, s); or bpf_probe_read_str(p, len & 0x7fff, s); No changes to the bpf_probe_read_str helper are necessary since strncpy_from_unsafe itself immediately returns if the size passed is 0. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 728909f7951c..ed8601a1a861 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -494,7 +494,7 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = { .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, }; -- cgit From a60dd35d2e39209fa7645945e1192bf9769872c6 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:56 +0000 Subject: bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way the compiler generates optimized BPF code when checking boundaries of an argument from C code. A typical example of this optimized code can be generated using the bpf_perf_event_output helper when operating on variable memory: /* len is a generic scalar */ if (len > 0 && len <= 0x7fff) bpf_perf_event_output(ctx, &perf_map, 0, buf, len); 110: (79) r5 = *(u64 *)(r10 -40) 111: (bf) r1 = r5 112: (07) r1 += -1 113: (25) if r1 > 0x7ffe goto pc+6 114: (bf) r1 = r6 115: (18) r2 = 0xffff94e5f166c200 117: (b7) r3 = 0 118: (bf) r4 = r7 119: (85) call bpf_perf_event_output#25 R5 min value is negative, either use unsigned or 'var &= const' With this code, the verifier loses track of the variable. Replacing arg5 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it avoids this quite common case which leads to usability issues, and the compiler generates code that the verifier can more easily test: if (len <= 0x7fff) bpf_perf_event_output(ctx, &perf_map, 0, buf, len); or bpf_perf_event_output(ctx, &perf_map, 0, buf, len & 0x7fff); No changes to the bpf_perf_event_output helper are necessary since it can handle a case where size is 0, and an empty frame is pushed. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/trace/bpf_trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ed8601a1a861..27d1f4ffa3de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -403,7 +403,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); @@ -605,7 +605,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, -- cgit From c131187db2d3fa2f8bf32fdf4e9a4ef805168467 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 22 Nov 2017 16:42:05 -0800 Subject: bpf: fix branch pruning logic when the verifier detects that register contains a runtime constant and it's compared with another constant it will prune exploration of the branch that is guaranteed not to be taken at runtime. This is all correct, but malicious program may be constructed in such a way that it always has a constant comparison and the other branch is never taken under any conditions. In this case such path through the program will not be explored by the verifier. It won't be taken at run-time either, but since all instructions are JITed the malicious program may cause JITs to complain about using reserved fields, etc. To fix the issue we have to track the instructions explored by the verifier and sanitize instructions that are dead at run time with NOPs. We cannot reject such dead code, since llvm generates it for valid C code, since it doesn't do as much data flow analysis as the verifier does. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b61482d354a2..c561b986bab0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,7 +115,7 @@ struct bpf_insn_aux_data { struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int converted_op_size; /* the valid value width after perceived conversion */ + bool seen; /* this insn was processed by the verifier */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 308b0638ec5d..d4593571c404 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3827,6 +3827,7 @@ static int do_check(struct bpf_verifier_env *env) return err; regs = cur_regs(env); + env->insn_aux_data[insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -4022,6 +4023,7 @@ process_bpf_exit: return err; insn_idx++; + env->insn_aux_data[insn_idx].seen = true; } else { verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; @@ -4204,6 +4206,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, u32 off, u32 cnt) { struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; + int i; if (cnt == 1) return 0; @@ -4213,6 +4216,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); + for (i = off; i < off + cnt - 1; i++) + new_data[i].seen = true; env->insn_aux_data = new_data; vfree(old_data); return 0; @@ -4231,6 +4236,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +/* The verifier does more data flow analysis than llvm and will not explore + * branches that are dead at run time. Malicious programs can have dead code + * too. Therefore replace all dead at-run-time code with nops. + */ +static void sanitize_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++) { + if (aux_data[i].seen) + continue; + memcpy(insn + i, &nop, sizeof(nop)); + } +} + /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -4557,6 +4581,9 @@ skip_full_check: while (!pop_stack(env, NULL, NULL)); free_states(env); + if (ret == 0) + sanitize_dead_code(env); + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env); -- cgit