diff options
Diffstat (limited to 'kernel/bpf/helpers.c')
-rw-r--r-- | kernel/bpf/helpers.c | 844 |
1 files changed, 704 insertions, 140 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d19cd863d294..f27ce162427a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -111,7 +111,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) @@ -124,7 +124,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) @@ -158,6 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .func = bpf_get_smp_processor_id, .gpl_only = false, .ret_type = RET_INTEGER, + .allow_fastcall = true, }; BPF_CALL_0(bpf_get_numa_node_id) @@ -334,7 +335,7 @@ static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock) __this_cpu_write(irqsave_flags, flags); } -notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock) { __bpf_spin_lock_irqsave(lock); return 0; @@ -357,7 +358,7 @@ static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock) local_irq_restore(flags); } -notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) +NOTRACE_BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock) { __bpf_spin_unlock_irqrestore(lock); return 0; @@ -517,16 +518,15 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags, } BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, - long *, res) + s64 *, res) { long long _res; int err; + *res = 0; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; - if (_res != (long)_res) - return -ERANGE; *res = _res; return err; } @@ -538,23 +538,23 @@ const struct bpf_func_proto bpf_strtol_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg4_size = sizeof(s64), }; BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, - unsigned long *, res) + u64 *, res) { unsigned long long _res; bool is_negative; int err; + *res = 0; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; if (is_negative) return -EINVAL; - if (_res != (unsigned long)_res) - return -ERANGE; *res = _res; return err; } @@ -566,7 +566,8 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_LONG, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg4_size = sizeof(u64), }; BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) @@ -714,7 +715,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) if (cpu >= nr_cpu_ids) return (unsigned long)NULL; - return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); + return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu); } const struct bpf_func_proto bpf_per_cpu_ptr_proto = { @@ -727,7 +728,7 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = { BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr) { - return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr); + return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr); } const struct bpf_func_proto bpf_this_cpu_ptr_proto = { @@ -1079,11 +1080,23 @@ const struct bpf_func_proto bpf_snprintf_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +struct bpf_async_cb { + struct bpf_map *map; + struct bpf_prog *prog; + void __rcu *callback_fn; + void *value; + union { + struct rcu_head rcu; + struct work_struct delete_work; + }; + u64 flags; +}; + /* BPF map elements can contain 'struct bpf_timer'. * Such map owns all of its BPF timers. * 'struct bpf_timer' is allocated as part of map element allocation * and it's zero initialized. - * That space is used to keep 'struct bpf_timer_kern'. + * That space is used to keep 'struct bpf_async_kern'. * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and * remembers 'struct bpf_map *' pointer it's part of. * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. @@ -1096,17 +1109,24 @@ const struct bpf_func_proto bpf_snprintf_proto = { * freeing the timers when inner map is replaced or deleted by user space. */ struct bpf_hrtimer { + struct bpf_async_cb cb; struct hrtimer timer; - struct bpf_map *map; - struct bpf_prog *prog; - void __rcu *callback_fn; - void *value; - struct rcu_head rcu; + atomic_t cancelling; +}; + +struct bpf_work { + struct bpf_async_cb cb; + struct work_struct work; + struct work_struct delete_work; }; -/* the actual struct hidden inside uapi struct bpf_timer */ -struct bpf_timer_kern { - struct bpf_hrtimer *timer; +/* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */ +struct bpf_async_kern { + union { + struct bpf_async_cb *cb; + struct bpf_hrtimer *timer; + struct bpf_work *work; + }; /* bpf_spin_lock is used here instead of spinlock_t to make * sure that it always fits into space reserved by struct bpf_timer * regardless of LOCKDEP and spinlock debug flags. @@ -1114,19 +1134,24 @@ struct bpf_timer_kern { struct bpf_spin_lock lock; } __attribute__((aligned(8))); +enum bpf_async_type { + BPF_ASYNC_TYPE_TIMER = 0, + BPF_ASYNC_TYPE_WQ, +}; + static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running); static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer) { struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer); - struct bpf_map *map = t->map; - void *value = t->value; + struct bpf_map *map = t->cb.map; + void *value = t->cb.value; bpf_callback_t callback_fn; void *key; u32 idx; BTF_TYPE_EMIT(struct bpf_timer); - callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); + callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held()); if (!callback_fn) goto out; @@ -1155,46 +1180,129 @@ out: return HRTIMER_NORESTART; } -BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map, - u64, flags) +static void bpf_wq_work(struct work_struct *work) { - clockid_t clockid = flags & (MAX_CLOCKS - 1); + struct bpf_work *w = container_of(work, struct bpf_work, work); + struct bpf_async_cb *cb = &w->cb; + struct bpf_map *map = cb->map; + bpf_callback_t callback_fn; + void *value = cb->value; + void *key; + u32 idx; + + BTF_TYPE_EMIT(struct bpf_wq); + + callback_fn = READ_ONCE(cb->callback_fn); + if (!callback_fn) + return; + + if (map->map_type == BPF_MAP_TYPE_ARRAY) { + struct bpf_array *array = container_of(map, struct bpf_array, map); + + /* compute the key */ + idx = ((char *)value - array->value) / array->elem_size; + key = &idx; + } else { /* hash or lru */ + key = value - round_up(map->key_size, 8); + } + + rcu_read_lock_trace(); + migrate_disable(); + + callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0); + + migrate_enable(); + rcu_read_unlock_trace(); +} + +static void bpf_wq_delete_work(struct work_struct *work) +{ + struct bpf_work *w = container_of(work, struct bpf_work, delete_work); + + cancel_work_sync(&w->work); + + kfree_rcu(w, cb.rcu); +} + +static void bpf_timer_delete_work(struct work_struct *work) +{ + struct bpf_hrtimer *t = container_of(work, struct bpf_hrtimer, cb.delete_work); + + /* Cancel the timer and wait for callback to complete if it was running. + * If hrtimer_cancel() can be safely called it's safe to call + * kfree_rcu(t) right after for both preallocated and non-preallocated + * maps. The async->cb = NULL was already done and no code path can see + * address 't' anymore. Timer if armed for existing bpf_hrtimer before + * bpf_timer_cancel_and_free will have been cancelled. + */ + hrtimer_cancel(&t->timer); + kfree_rcu(t, cb.rcu); +} + +static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, + enum bpf_async_type type) +{ + struct bpf_async_cb *cb; struct bpf_hrtimer *t; + struct bpf_work *w; + clockid_t clockid; + size_t size; int ret = 0; - BUILD_BUG_ON(MAX_CLOCKS != 16); - BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer)); - BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer)); - if (in_nmi()) return -EOPNOTSUPP; - if (flags >= MAX_CLOCKS || - /* similar to timerfd except _ALARM variants are not supported */ - (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME && - clockid != CLOCK_BOOTTIME)) + switch (type) { + case BPF_ASYNC_TYPE_TIMER: + size = sizeof(struct bpf_hrtimer); + break; + case BPF_ASYNC_TYPE_WQ: + size = sizeof(struct bpf_work); + break; + default: return -EINVAL; - __bpf_spin_lock_irqsave(&timer->lock); - t = timer->timer; + } + + __bpf_spin_lock_irqsave(&async->lock); + t = async->timer; if (t) { ret = -EBUSY; goto out; } + /* allocate hrtimer via map_kmalloc to use memcg accounting */ - t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node); - if (!t) { + cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node); + if (!cb) { ret = -ENOMEM; goto out; } - t->value = (void *)timer - map->record->timer_off; - t->map = map; - t->prog = NULL; - rcu_assign_pointer(t->callback_fn, NULL); - hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); - t->timer.function = bpf_timer_cb; - WRITE_ONCE(timer->timer, t); - /* Guarantee the order between timer->timer and map->usercnt. So + + switch (type) { + case BPF_ASYNC_TYPE_TIMER: + clockid = flags & (MAX_CLOCKS - 1); + t = (struct bpf_hrtimer *)cb; + + atomic_set(&t->cancelling, 0); + INIT_WORK(&t->cb.delete_work, bpf_timer_delete_work); + hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT); + t->timer.function = bpf_timer_cb; + cb->value = (void *)async - map->record->timer_off; + break; + case BPF_ASYNC_TYPE_WQ: + w = (struct bpf_work *)cb; + + INIT_WORK(&w->work, bpf_wq_work); + INIT_WORK(&w->delete_work, bpf_wq_delete_work); + cb->value = (void *)async - map->record->wq_off; + break; + } + cb->map = map; + cb->prog = NULL; + cb->flags = flags; + rcu_assign_pointer(cb->callback_fn, NULL); + + WRITE_ONCE(async->cb, cb); + /* Guarantee the order between async->cb and map->usercnt. So * when there are concurrent uref release and bpf timer init, either * bpf_timer_cancel_and_free() called by uref release reads a no-NULL * timer or atomic64_read() below returns a zero usercnt. @@ -1204,15 +1312,34 @@ BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map /* maps with timers must be either held by user space * or pinned in bpffs. */ - WRITE_ONCE(timer->timer, NULL); - kfree(t); + WRITE_ONCE(async->cb, NULL); + kfree(cb); ret = -EPERM; } out: - __bpf_spin_unlock_irqrestore(&timer->lock); + __bpf_spin_unlock_irqrestore(&async->lock); return ret; } +BPF_CALL_3(bpf_timer_init, struct bpf_async_kern *, timer, struct bpf_map *, map, + u64, flags) +{ + clock_t clockid = flags & (MAX_CLOCKS - 1); + + BUILD_BUG_ON(MAX_CLOCKS != 16); + BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_timer)); + BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_timer)); + + if (flags >= MAX_CLOCKS || + /* similar to timerfd except _ALARM variants are not supported */ + (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME && + clockid != CLOCK_BOOTTIME)) + return -EINVAL; + + return __bpf_async_init(timer, map, flags, BPF_ASYNC_TYPE_TIMER); +} + static const struct bpf_func_proto bpf_timer_init_proto = { .func = bpf_timer_init, .gpl_only = true, @@ -1222,22 +1349,23 @@ static const struct bpf_func_proto bpf_timer_init_proto = { .arg3_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn, - struct bpf_prog_aux *, aux) +static int __bpf_async_set_callback(struct bpf_async_kern *async, void *callback_fn, + struct bpf_prog_aux *aux, unsigned int flags, + enum bpf_async_type type) { struct bpf_prog *prev, *prog = aux->prog; - struct bpf_hrtimer *t; + struct bpf_async_cb *cb; int ret = 0; if (in_nmi()) return -EOPNOTSUPP; - __bpf_spin_lock_irqsave(&timer->lock); - t = timer->timer; - if (!t) { + __bpf_spin_lock_irqsave(&async->lock); + cb = async->cb; + if (!cb) { ret = -EINVAL; goto out; } - if (!atomic64_read(&t->map->usercnt)) { + if (!atomic64_read(&cb->map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. Otherwise timer might still be * running even when bpf prog is detached and user space @@ -1246,7 +1374,7 @@ BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callb ret = -EPERM; goto out; } - prev = t->prog; + prev = cb->prog; if (prev != prog) { /* Bump prog refcnt once. Every bpf_timer_set_callback() * can pick different callback_fn-s within the same prog. @@ -1259,14 +1387,20 @@ BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callb if (prev) /* Drop prev prog refcnt when swapping with new prog */ bpf_prog_put(prev); - t->prog = prog; + cb->prog = prog; } - rcu_assign_pointer(t->callback_fn, callback_fn); + rcu_assign_pointer(cb->callback_fn, callback_fn); out: - __bpf_spin_unlock_irqrestore(&timer->lock); + __bpf_spin_unlock_irqrestore(&async->lock); return ret; } +BPF_CALL_3(bpf_timer_set_callback, struct bpf_async_kern *, timer, void *, callback_fn, + struct bpf_prog_aux *, aux) +{ + return __bpf_async_set_callback(timer, callback_fn, aux, 0, BPF_ASYNC_TYPE_TIMER); +} + static const struct bpf_func_proto bpf_timer_set_callback_proto = { .func = bpf_timer_set_callback, .gpl_only = true, @@ -1275,7 +1409,7 @@ static const struct bpf_func_proto bpf_timer_set_callback_proto = { .arg2_type = ARG_PTR_TO_FUNC, }; -BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags) +BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, timer, u64, nsecs, u64, flags) { struct bpf_hrtimer *t; int ret = 0; @@ -1287,7 +1421,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla return -EINVAL; __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; - if (!t || !t->prog) { + if (!t || !t->cb.prog) { ret = -EINVAL; goto out; } @@ -1315,20 +1449,21 @@ static const struct bpf_func_proto bpf_timer_start_proto = { .arg3_type = ARG_ANYTHING, }; -static void drop_prog_refcnt(struct bpf_hrtimer *t) +static void drop_prog_refcnt(struct bpf_async_cb *async) { - struct bpf_prog *prog = t->prog; + struct bpf_prog *prog = async->prog; if (prog) { bpf_prog_put(prog); - t->prog = NULL; - rcu_assign_pointer(t->callback_fn, NULL); + async->prog = NULL; + rcu_assign_pointer(async->callback_fn, NULL); } } -BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) +BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer) { - struct bpf_hrtimer *t; + struct bpf_hrtimer *t, *cur_t; + bool inc = false; int ret = 0; if (in_nmi()) @@ -1340,21 +1475,50 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) ret = -EINVAL; goto out; } - if (this_cpu_read(hrtimer_running) == t) { + + cur_t = this_cpu_read(hrtimer_running); + if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock - * since it waits for callback_fn to finish + * since it waits for callback_fn to finish. */ ret = -EDEADLK; goto out; } - drop_prog_refcnt(t); + + /* Only account in-flight cancellations when invoked from a timer + * callback, since we want to avoid waiting only if other _callbacks_ + * are waiting on us, to avoid introducing lockups. Non-callback paths + * are ok, since nobody would synchronously wait for their completion. + */ + if (!cur_t) + goto drop; + atomic_inc(&t->cancelling); + /* Need full barrier after relaxed atomic_inc */ + smp_mb__after_atomic(); + inc = true; + if (atomic_read(&cur_t->cancelling)) { + /* We're cancelling timer t, while some other timer callback is + * attempting to cancel us. In such a case, it might be possible + * that timer t belongs to the other callback, or some other + * callback waiting upon it (creating transitive dependencies + * upon us), and we will enter a deadlock if we continue + * cancelling and waiting for it synchronously, since it might + * do the same. Bail! + */ + ret = -EDEADLK; + goto out; + } +drop: + drop_prog_refcnt(&t->cb); out: __bpf_spin_unlock_irqrestore(&timer->lock); /* Cancel the timer and wait for associated callback to finish * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + if (inc) + atomic_dec(&t->cancelling); rcu_read_unlock(); return ret; } @@ -1366,57 +1530,114 @@ static const struct bpf_func_proto bpf_timer_cancel_proto = { .arg1_type = ARG_PTR_TO_TIMER, }; -/* This function is called by map_delete/update_elem for individual element and - * by ops->map_release_uref when the user space reference to a map reaches zero. - */ -void bpf_timer_cancel_and_free(void *val) +static struct bpf_async_cb *__bpf_async_cancel_and_free(struct bpf_async_kern *async) { - struct bpf_timer_kern *timer = val; - struct bpf_hrtimer *t; + struct bpf_async_cb *cb; - /* Performance optimization: read timer->timer without lock first. */ - if (!READ_ONCE(timer->timer)) - return; + /* Performance optimization: read async->cb without lock first. */ + if (!READ_ONCE(async->cb)) + return NULL; - __bpf_spin_lock_irqsave(&timer->lock); + __bpf_spin_lock_irqsave(&async->lock); /* re-read it under lock */ - t = timer->timer; - if (!t) + cb = async->cb; + if (!cb) goto out; - drop_prog_refcnt(t); + drop_prog_refcnt(cb); /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized. */ - WRITE_ONCE(timer->timer, NULL); + WRITE_ONCE(async->cb, NULL); out: - __bpf_spin_unlock_irqrestore(&timer->lock); + __bpf_spin_unlock_irqrestore(&async->lock); + return cb; +} + +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_timer_cancel_and_free(void *val) +{ + struct bpf_hrtimer *t; + + t = (struct bpf_hrtimer *)__bpf_async_cancel_and_free(val); + if (!t) return; - /* Cancel the timer and wait for callback to complete if it was running. - * If hrtimer_cancel() can be safely called it's safe to call kfree(t) - * right after for both preallocated and non-preallocated maps. - * The timer->timer = NULL was already done and no code path can - * see address 't' anymore. - * - * Check that bpf_map_delete/update_elem() wasn't called from timer - * callback_fn. In such case don't call hrtimer_cancel() (since it will - * deadlock) and don't call hrtimer_try_to_cancel() (since it will just - * return -1). Though callback_fn is still running on this cpu it's + /* We check that bpf_map_delete/update_elem() was called from timer + * callback_fn. In such case we don't call hrtimer_cancel() (since it + * will deadlock) and don't call hrtimer_try_to_cancel() (since it will + * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', - * since timer->timer = NULL was already done. The timer will be + * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. + * + * However, it is possible the timer callback_fn calling us armed the + * timer _before_ calling us, such that failing to cancel it here will + * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. + * Therefore, we _need_ to cancel any outstanding timers before we do + * kfree_rcu, even though no more timers can be armed. + * + * Moreover, we need to schedule work even if timer does not belong to + * the calling callback_fn, as on two different CPUs, we can end up in a + * situation where both sides run in parallel, try to cancel one + * another, and we end up waiting on both sides in hrtimer_cancel + * without making forward progress, since timer1 depends on time2 + * callback to finish, and vice versa. + * + * CPU 1 (timer1_cb) CPU 2 (timer2_cb) + * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) + * + * To avoid these issues, punt to workqueue context when we are in a + * timer callback. + */ + if (this_cpu_read(hrtimer_running)) { + queue_work(system_unbound_wq, &t->cb.delete_work); + return; + } + + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* If the timer is running on other CPU, also use a kworker to + * wait for the completion of the timer instead of trying to + * acquire a sleepable lock in hrtimer_cancel() to wait for its + * completion. + */ + if (hrtimer_try_to_cancel(&t->timer) >= 0) + kfree_rcu(t, cb.rcu); + else + queue_work(system_unbound_wq, &t->cb.delete_work); + } else { + bpf_timer_delete_work(&t->cb.delete_work); + } +} + +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_wq_cancel_and_free(void *val) +{ + struct bpf_work *work; + + BTF_TYPE_EMIT(struct bpf_wq); + + work = (struct bpf_work *)__bpf_async_cancel_and_free(val); + if (!work) + return; + /* Trigger cancel of the sleepable work, but *do not* wait for + * it to finish if it was running as we might not be in a + * sleepable context. + * kfree will be called once the work has finished. */ - if (this_cpu_read(hrtimer_running) != t) - hrtimer_cancel(&t->timer); - kfree_rcu(t, rcu); + schedule_work(&work->delete_work); } -BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) +BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr) { - unsigned long *kptr = map_value; + unsigned long *kptr = dst; + /* This helper may be inlined by verifier. */ return xchg(kptr, (unsigned long)ptr); } @@ -1429,7 +1650,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, .ret_btf_id = BPF_PTR_POISON, - .arg1_type = ARG_PTR_TO_KPTR, + .arg1_type = ARG_KPTR_XCHG_DEST, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, .arg2_btf_id = BPF_PTR_POISON, }; @@ -1442,7 +1663,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { #define DYNPTR_SIZE_MASK 0xFFFFFF #define DYNPTR_RDONLY_BIT BIT(31) -static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_RDONLY_BIT; } @@ -1535,7 +1756,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, @@ -1682,7 +1903,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1729,11 +1950,15 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_ns_current_pid_tgid: + return &bpf_get_ns_current_pid_tgid_proto; default: break; } - if (!bpf_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return NULL; switch (func_id) { @@ -1791,7 +2016,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) break; } - if (!perfmon_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) return NULL; switch (func_id) { @@ -1823,6 +2048,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) return NULL; } } +EXPORT_SYMBOL_GPL(bpf_base_func_proto); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock) @@ -1854,9 +2080,7 @@ unlock: /* The contained type can also have resources, including a * bpf_list_head which needs to be freed. */ - migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); - migrate_enable(); } } @@ -1893,9 +2117,7 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, obj -= field->graph_root.node_offset; - migrate_disable(); __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); - migrate_enable(); } } @@ -2247,6 +2469,29 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, return ret; } +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + /** * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its @@ -2287,8 +2532,27 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) } /** + * bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up + * in the pid namespace of the current task. If a task is returned, it must + * either be stored in a map, or released with bpf_task_release(). + * @vpid: The vpid of the task being looked up. + */ +__bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid) +{ + struct task_struct *p; + + rcu_read_lock(); + p = find_task_by_vpid(vpid); + if (p) + p = bpf_task_acquire(p); + rcu_read_unlock(); + + return p; +} + +/** * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. - * @ptr: The dynptr whose data slice to retrieve + * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the @@ -2314,9 +2578,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; @@ -2358,7 +2623,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset /** * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. - * @ptr: The dynptr whose data slice to retrieve + * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the @@ -2398,16 +2663,18 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * * For skb-type dynptrs, it is safe to write into the returned pointer - * if the bpf program allows skb data writes. There are two possiblities + * if the bpf program allows skb data writes. There are two possibilities * that may occur when calling bpf_dynptr_slice_rdwr: * * 1) The requested slice is in the head of the skb. In this case, the @@ -2426,11 +2693,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ - return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); + return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) @@ -2447,36 +2715,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en return 0; } -__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + return !ptr->data; } -__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } -__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, - struct bpf_dynptr_kern *clone__uninit) +__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, + struct bpf_dynptr *clone__uninit) { + struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) { - bpf_dynptr_set_null(clone__uninit); + bpf_dynptr_set_null(clone); return -EINVAL; } - *clone__uninit = *ptr; + *clone = *ptr; return 0; } @@ -2486,9 +2763,9 @@ __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj) return obj; } -__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k) +__bpf_kfunc void *bpf_rdonly_cast(const void *obj__ign, u32 btf_id__k) { - return obj__ign; + return (void *)obj__ign; } __bpf_kfunc void bpf_rcu_read_lock(void) @@ -2544,10 +2821,271 @@ __bpf_kfunc void bpf_throw(u64 cookie) WARN(1, "A call to BPF exception callback should never return\n"); } +__bpf_kfunc int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) +{ + struct bpf_async_kern *async = (struct bpf_async_kern *)wq; + struct bpf_map *map = p__map; + + BUILD_BUG_ON(sizeof(struct bpf_async_kern) > sizeof(struct bpf_wq)); + BUILD_BUG_ON(__alignof__(struct bpf_async_kern) != __alignof__(struct bpf_wq)); + + if (flags) + return -EINVAL; + + return __bpf_async_init(async, map, flags, BPF_ASYNC_TYPE_WQ); +} + +__bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) +{ + struct bpf_async_kern *async = (struct bpf_async_kern *)wq; + struct bpf_work *w; + + if (in_nmi()) + return -EOPNOTSUPP; + if (flags) + return -EINVAL; + w = READ_ONCE(async->work); + if (!w || !READ_ONCE(w->cb.prog)) + return -EINVAL; + + schedule_work(&w->work); + return 0; +} + +__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq, + int (callback_fn)(void *map, int *key, void *value), + unsigned int flags, + void *aux__ign) +{ + struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign; + struct bpf_async_kern *async = (struct bpf_async_kern *)wq; + + if (flags) + return -EINVAL; + + return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ); +} + +__bpf_kfunc void bpf_preempt_disable(void) +{ + preempt_disable(); +} + +__bpf_kfunc void bpf_preempt_enable(void) +{ + preempt_enable(); +} + +struct bpf_iter_bits { + __u64 __opaque[2]; +} __aligned(8); + +#define BITS_ITER_NR_WORDS_MAX 511 + +struct bpf_iter_bits_kern { + union { + __u64 *bits; + __u64 bits_copy; + }; + int nr_bits; + int bit; +} __aligned(8); + +/* On 64-bit hosts, unsigned long and u64 have the same size, so passing + * a u64 pointer and an unsigned long pointer to find_next_bit() will + * return the same result, as both point to the same 8-byte area. + * + * For 32-bit little-endian hosts, using a u64 pointer or unsigned long + * pointer also makes no difference. This is because the first iterated + * unsigned long is composed of bits 0-31 of the u64 and the second unsigned + * long is composed of bits 32-63 of the u64. + * + * However, for 32-bit big-endian hosts, this is not the case. The first + * iterated unsigned long will be bits 32-63 of the u64, so swap these two + * ulong values within the u64. + */ +static void swap_ulong_in_u64(u64 *bits, unsigned int nr) +{ +#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) + unsigned int i; + + for (i = 0; i < nr; i++) + bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32); +#endif +} + +/** + * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area + * @it: The new bpf_iter_bits to be created + * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over + * @nr_words: The size of the specified memory area, measured in 8-byte units. + * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be + * further reduced by the BPF memory allocator implementation. + * + * This function initializes a new bpf_iter_bits structure for iterating over + * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It + * copies the data of the memory area to the newly created bpf_iter_bits @it for + * subsequent iteration operations. + * + * On success, 0 is returned. On failure, ERR is returned. + */ +__bpf_kfunc int +bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) +{ + struct bpf_iter_bits_kern *kit = (void *)it; + u32 nr_bytes = nr_words * sizeof(u64); + u32 nr_bits = BYTES_TO_BITS(nr_bytes); + int err; + + BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) != + __alignof__(struct bpf_iter_bits)); + + kit->nr_bits = 0; + kit->bits_copy = 0; + kit->bit = -1; + + if (!unsafe_ptr__ign || !nr_words) + return -EINVAL; + if (nr_words > BITS_ITER_NR_WORDS_MAX) + return -E2BIG; + + /* Optimization for u64 mask */ + if (nr_bits == 64) { + err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign); + if (err) + return -EFAULT; + + swap_ulong_in_u64(&kit->bits_copy, nr_words); + + kit->nr_bits = nr_bits; + return 0; + } + + if (bpf_mem_alloc_check_size(false, nr_bytes)) + return -E2BIG; + + /* Fallback to memalloc */ + kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes); + if (!kit->bits) + return -ENOMEM; + + err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign); + if (err) { + bpf_mem_free(&bpf_global_ma, kit->bits); + return err; + } + + swap_ulong_in_u64(kit->bits, nr_words); + + kit->nr_bits = nr_bits; + return 0; +} + +/** + * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits + * @it: The bpf_iter_bits to be checked + * + * This function returns a pointer to a number representing the value of the + * next bit in the bits. + * + * If there are no further bits available, it returns NULL. + */ +__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it) +{ + struct bpf_iter_bits_kern *kit = (void *)it; + int bit = kit->bit, nr_bits = kit->nr_bits; + const void *bits; + + if (!nr_bits || bit >= nr_bits) + return NULL; + + bits = nr_bits == 64 ? &kit->bits_copy : kit->bits; + bit = find_next_bit(bits, nr_bits, bit + 1); + if (bit >= nr_bits) { + kit->bit = bit; + return NULL; + } + + kit->bit = bit; + return &kit->bit; +} + +/** + * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits + * @it: The bpf_iter_bits to be destroyed + * + * Destroy the resource associated with the bpf_iter_bits. + */ +__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it) +{ + struct bpf_iter_bits_kern *kit = (void *)it; + + if (kit->nr_bits <= 64) + return; + bpf_mem_free(&bpf_global_ma, kit->bits); +} + +/** + * bpf_copy_from_user_str() - Copy a string from an unsafe user address + * @dst: Destination address, in kernel space. This buffer must be + * at least @dst__sz bytes long. + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. + * @unsafe_ptr__ign: Source address, in user space. + * @flags: The only supported flag is BPF_F_PAD_ZEROS + * + * Copies a NUL-terminated string from userspace to BPF space. If user string is + * too long this will still ensure zero termination in the dst buffer unless + * buffer size is 0. + * + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and + * memset all of @dst on failure. + */ +__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags) +{ + int ret; + + if (unlikely(flags & ~BPF_F_PAD_ZEROS)) + return -EINVAL; + + if (unlikely(!dst__sz)) + return 0; + + ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1); + if (ret < 0) { + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst, 0, dst__sz); + + return ret; + } + + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst + ret, 0, dst__sz - ret); + else + ((char *)dst)[ret] = '\0'; + + return ret + 1; +} + +/* Keep unsinged long in prototype so that kfunc is usable when emitted to + * vmlinux.h in BPF programs directly, but note that while in BPF prog, the + * unsigned long always points to 8-byte region on stack, the kernel may only + * read and write the 4-bytes on 32-bit. + */ +__bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag) +{ + local_irq_save(*flags__irq_flag); +} + +__bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) +{ + local_irq_restore(*flags__irq_flag); +} + __bpf_kfunc_end_defs(); -BTF_SET8_START(generic_btf_ids) -#ifdef CONFIG_KEXEC_CORE +BTF_KFUNCS_START(generic_btf_ids) +#ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) @@ -2574,8 +3112,12 @@ BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_task_from_vpid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) -BTF_SET8_END(generic_btf_ids) +#ifdef CONFIG_BPF_EVENTS +BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS) +#endif +BTF_KFUNCS_END(generic_btf_ids) static const struct btf_kfunc_id_set generic_kfunc_set = { .owner = THIS_MODULE, @@ -2591,9 +3133,9 @@ BTF_ID(struct, cgroup) BTF_ID(func, bpf_cgroup_release_dtor) #endif -BTF_SET8_START(common_btf_ids) -BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) -BTF_ID_FLAGS(func, bpf_rdonly_cast) +BTF_KFUNCS_START(common_btf_ids) +BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL) +BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) @@ -2620,7 +3162,25 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) BTF_ID_FLAGS(func, bpf_dynptr_size) BTF_ID_FLAGS(func, bpf_dynptr_clone) -BTF_SET8_END(common_btf_ids) +#ifdef CONFIG_NET +BTF_ID_FLAGS(func, bpf_modify_return_test_tp) +#endif +BTF_ID_FLAGS(func, bpf_wq_init) +BTF_ID_FLAGS(func, bpf_wq_set_callback_impl) +BTF_ID_FLAGS(func, bpf_wq_start) +BTF_ID_FLAGS(func, bpf_preempt_disable) +BTF_ID_FLAGS(func, bpf_preempt_enable) +BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_get_kmem_cache) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_local_irq_save) +BTF_ID_FLAGS(func, bpf_local_irq_restore) +BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { .owner = THIS_MODULE, @@ -2647,6 +3207,8 @@ static int __init kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); @@ -2660,7 +3222,9 @@ late_initcall(kfunc_init); */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { - return bpf_dynptr_slice(ptr, 0, NULL, len); + const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; + + return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If |