summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorStanislav Fomichev <sdf@google.com>2021-01-27 11:31:39 -0800
committerAlexei Starovoitov <ast@kernel.org>2021-01-27 18:18:15 -0800
commit772412176fb98493158929b220fe250127f611af (patch)
treeb2aef4837caa452c535a1a0a1364f8f44a7a3bd5 /include
parent8063e184e49011f6f3f34f6c358dc8a83890bb5b (diff)
bpf: Allow rewriting to ports under ip_unprivileged_port_start
At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port to the privileged ones (< ip_unprivileged_port_start), but it will be rejected later on in the __inet_bind or __inet6_bind. Let's add another return value to indicate that CAP_NET_BIND_SERVICE check should be ignored. Use the same idea as we currently use in cgroup/egress where bit #1 indicates CN. Instead, for cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should be bypassed. v5: - rename flags to be less confusing (Andrey Ignatov) - rework BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY to work on flags and accept BPF_RET_SET_CN (no behavioral changes) v4: - Add missing IPv6 support (Martin KaFai Lau) v3: - Update description (Martin KaFai Lau) - Fix capability restore in selftest (Martin KaFai Lau) v2: - Switch to explicit return code (Martin KaFai Lau) Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Andrey Ignatov <rdna@fb.com> Link: https://lore.kernel.org/bpf/20210127193140.3170382-1-sdf@google.com
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf-cgroup.h38
-rw-r--r--include/linux/bpf.h52
-rw-r--r--include/net/inet_common.h2
3 files changed, 63 insertions, 29 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0748fd87969e..c42e02b4d84b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type,
- void *t_ctx);
+ void *t_ctx,
+ u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
if (cgroup_bpf_enabled(type)) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- NULL); \
+ NULL, \
+ &__unused_flags); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
if (cgroup_bpf_enabled(type)) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- t_ctx); \
+ t_ctx, \
+ &__unused_flags); \
release_sock(sk); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
+({ \
+ u32 __flags = 0; \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled(type)) { \
+ lock_sock(sk); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ NULL, &__flags); \
+ release_sock(sk); \
+ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
+ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
+ } \
+ __ret; \
+})
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1aac2af12fed..321966fc35db 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1073,6 +1073,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
+/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
+#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
+/* BPF program asks to set CN on the packet. */
+#define BPF_RET_SET_CN (1 << 0)
+
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
+ ({ \
+ struct bpf_prog_array_item *_item; \
+ struct bpf_prog *_prog; \
+ struct bpf_prog_array *_array; \
+ u32 _ret = 1; \
+ u32 func_ret; \
+ migrate_disable(); \
+ rcu_read_lock(); \
+ _array = rcu_dereference(array); \
+ _item = &_array->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ bpf_cgroup_storage_set(_item->cgroup_storage); \
+ func_ret = func(_prog, ctx); \
+ _ret &= (func_ret & 1); \
+ *(ret_flags) |= (func_ret >> 1); \
+ _item++; \
+ } \
+ rcu_read_unlock(); \
+ migrate_enable(); \
+ _ret; \
+ })
+
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1120,25 +1148,11 @@ _out: \
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- u32 ret; \
- u32 _ret = 1; \
- u32 _cn = 0; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- _item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- ret = func(_prog, ctx); \
- _ret &= (ret & 1); \
- _cn |= (ret & 2); \
- _item++; \
- } \
- rcu_read_unlock(); \
- migrate_enable(); \
+ u32 _flags = 0; \
+ bool _cn; \
+ u32 _ret; \
+ _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+ _cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index cb2818862919..cad2a611efde 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,6 +41,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
#define BIND_WITH_LOCK (1 << 1)
/* Called from BPF program. */
#define BIND_FROM_BPF (1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE (1 << 3)
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
u32 flags);
int inet_getname(struct socket *sock, struct sockaddr *uaddr,