summaryrefslogtreecommitdiff
path: root/include/linux/bpf.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-30 13:06:27 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-30 13:06:27 -0700
commit494e7fe591bf834d57c6607cdc26ab8873708aa7 (patch)
tree3089aa4e61f01125a1a34e1e83bf985b7458fc1d /include/linux/bpf.h
parentfa593d0f969dcfa41d390822fdf1a0ab48cd882c (diff)
parent6ffb9017e9329168b3b4216d15def8e78e1b1fac (diff)
Merge tag 'bpf_res_spin_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf relisient spinlock support from Alexei Starovoitov: "This patch set introduces Resilient Queued Spin Lock (or rqspinlock with res_spin_lock() and res_spin_unlock() APIs). This is a qspinlock variant which recovers the kernel from a stalled state when the lock acquisition path cannot make forward progress. This can occur when a lock acquisition attempt enters a deadlock situation (e.g. AA, or ABBA), or more generally, when the owner of the lock (which we’re trying to acquire) isn’t making forward progress. Deadlock detection is the main mechanism used to provide instant recovery, with the timeout mechanism acting as a final line of defense. Detection is triggered immediately when beginning the waiting loop of a lock slow path. Additionally, BPF programs attached to different parts of the kernel can introduce new control flow into the kernel, which increases the likelihood of deadlocks in code not written to handle reentrancy. There have been multiple syzbot reports surfacing deadlocks in internal kernel code due to the diverse ways in which BPF programs can be attached to different parts of the kernel. By switching the BPF subsystem’s lock usage to rqspinlock, all of these issues are mitigated at runtime. This spin lock implementation allows BPF maps to become safer and remove mechanisms that have fallen short in assuring safety when nesting programs in arbitrary ways in the same context or across different contexts. We run benchmarks that stress locking scalability and perform comparison against the baseline (qspinlock). For the rqspinlock case, we replace the default qspinlock with it in the kernel, such that all spin locks in the kernel use the rqspinlock slow path. As such, benchmarks that stress kernel spin locks end up exercising rqspinlock. More details in the cover letter in commit 6ffb9017e932 ("Merge branch 'resilient-queued-spin-lock'")" * tag 'bpf_res_spin_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (24 commits) selftests/bpf: Add tests for rqspinlock bpf: Maintain FIFO property for rqspinlock unlock bpf: Implement verifier support for rqspinlock bpf: Introduce rqspinlock kfuncs bpf: Convert lpm_trie.c to rqspinlock bpf: Convert percpu_freelist.c to rqspinlock bpf: Convert hashtab.c to rqspinlock rqspinlock: Add locktorture support rqspinlock: Add entry to Makefile, MAINTAINERS rqspinlock: Add macros for rqspinlock usage rqspinlock: Add basic support for CONFIG_PARAVIRT rqspinlock: Add a test-and-set fallback rqspinlock: Add deadlock detection and recovery rqspinlock: Protect waiters in trylock fallback from stalls rqspinlock: Protect waiters in queue from stalls rqspinlock: Protect pending bit owners from stalls rqspinlock: Hardcode cond_acquire loops for arm64 rqspinlock: Add support for timeouts rqspinlock: Drop PV and virtualization support rqspinlock: Add rqspinlock.h header ...
Diffstat (limited to 'include/linux/bpf.h')
-rw-r--r--include/linux/bpf.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 111bea4e507f..d67490dc3a2b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -30,6 +30,7 @@
#include <linux/static_call.h>
#include <linux/memcontrol.h>
#include <linux/cfi.h>
+#include <asm/rqspinlock.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -204,6 +205,7 @@ enum btf_field_type {
BPF_REFCOUNT = (1 << 9),
BPF_WORKQUEUE = (1 << 10),
BPF_UPTR = (1 << 11),
+ BPF_RES_SPIN_LOCK = (1 << 12),
};
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -239,6 +241,7 @@ struct btf_record {
u32 cnt;
u32 field_mask;
int spin_lock_off;
+ int res_spin_lock_off;
int timer_off;
int wq_off;
int refcount_off;
@@ -314,6 +317,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
switch (type) {
case BPF_SPIN_LOCK:
return "bpf_spin_lock";
+ case BPF_RES_SPIN_LOCK:
+ return "bpf_res_spin_lock";
case BPF_TIMER:
return "bpf_timer";
case BPF_WORKQUEUE:
@@ -346,6 +351,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
switch (type) {
case BPF_SPIN_LOCK:
return sizeof(struct bpf_spin_lock);
+ case BPF_RES_SPIN_LOCK:
+ return sizeof(struct bpf_res_spin_lock);
case BPF_TIMER:
return sizeof(struct bpf_timer);
case BPF_WORKQUEUE:
@@ -376,6 +383,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
switch (type) {
case BPF_SPIN_LOCK:
return __alignof__(struct bpf_spin_lock);
+ case BPF_RES_SPIN_LOCK:
+ return __alignof__(struct bpf_res_spin_lock);
case BPF_TIMER:
return __alignof__(struct bpf_timer);
case BPF_WORKQUEUE:
@@ -419,6 +428,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_RB_ROOT:
/* RB_ROOT_CACHED 0-inits, no need to do anything after memset */
case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
case BPF_TIMER:
case BPF_WORKQUEUE:
case BPF_KPTR_UNREF: