diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2025-11-29 09:35:36 -0800 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2025-11-29 09:35:36 -0800 |
| commit | 34235a3544f20291819c20d1d6c4ba07784045a2 (patch) | |
| tree | 145058be55b4ecf4916b0ac3df96b49e3d78d9c6 /tools | |
| parent | bd5bdd200c9e981cd5e2495966968cb26010573c (diff) | |
| parent | 3448375e71a49cc29cc62cc941bea137d723956e (diff) | |
Merge branch 'limited-queueing-in-nmi-for-rqspinlock'
Kumar Kartikeya Dwivedi says:
====================
Limited queueing in NMI for rqspinlock
Ritesh reported that he was frequently seeing timeouts in cases which
should have been covered by the AA heuristics. This led to the discovery
of multiple gaps in the current code that could lead to timeouts when
AA heuristics could work to prevent them. More details and investigation
is available in the original threads. [0][1]
This set restores the ability for NMI waiters to queue in the slow path,
and reduces the cases where they would attempt to trylock. However, such
queueing must not happen when interrupting waiters which the NMI itself
depends upon for forward progress; in those cases the trylock fallback
remains, but with a single attempt to avoid aimless attempts to acquire
the lock.
It also closes a possible window in the lock fast path and the unlock
path where NMIs landing between cmpxchg and entry creation, or entry
deletion and unlock would miss the detection of an AA scenario and end
up timing out.
This virtually eliminates all the cases where existing heuristics can
prevent timeouts and quickly recover from a deadlock. More details are
available in the commit logs for each patch.
[0]: https://lore.kernel.org/bpf/CAH6OuBTjG+N=+GGwcpOUbeDN563oz4iVcU3rbse68egp9wj9_A@mail.gmail.com
[1]: https://lore.kernel.org/bpf/20251125203253.3287019-1-memxor@gmail.com
====================
Link: https://patch.msgid.link/20251128232802.1031906-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c | 55 |
1 files changed, 43 insertions, 12 deletions
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c index e8dd3fbc6ea5..7b4ae5e81d32 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c @@ -33,9 +33,16 @@ static const unsigned int rqsl_hist_ms[] = { }; #define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms) +enum rqsl_context { + RQSL_CTX_NORMAL = 0, + RQSL_CTX_NMI, + RQSL_CTX_MAX, +}; + struct rqsl_cpu_hist { - atomic64_t normal[RQSL_NR_HIST_BUCKETS]; - atomic64_t nmi[RQSL_NR_HIST_BUCKETS]; + atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS]; + atomic64_t success[RQSL_CTX_MAX]; + atomic64_t failure[RQSL_CTX_MAX]; }; static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists); @@ -117,14 +124,18 @@ static u32 rqsl_hist_bucket_idx(u32 delta_ms) return RQSL_NR_HIST_BUCKETS - 1; } -static void rqsl_record_lock_time(u64 delta_ns, bool is_nmi) +static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret) { struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists); u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC); u32 bucket = rqsl_hist_bucket_idx(delta_ms); - atomic64_t *buckets = is_nmi ? hist->nmi : hist->normal; + atomic64_t *buckets = hist->hist[ctx]; atomic64_inc(&buckets[bucket]); + if (!ret) + atomic64_inc(&hist->success[ctx]); + else + atomic64_inc(&hist->failure[ctx]); } static int rqspinlock_worker_fn(void *arg) @@ -147,7 +158,8 @@ static int rqspinlock_worker_fn(void *arg) } start_ns = ktime_get_mono_fast_ns(); ret = raw_res_spin_lock_irqsave(worker_lock, flags); - rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, false); + rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns, + RQSL_CTX_NORMAL, ret); mdelay(normal_delay); if (!ret) raw_res_spin_unlock_irqrestore(worker_lock, flags); @@ -190,7 +202,8 @@ static void nmi_cb(struct perf_event *event, struct perf_sample_data *data, locks = rqsl_get_lock_pair(cpu); start_ns = ktime_get_mono_fast_ns(); ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags); - rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, true); + rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns, + RQSL_CTX_NMI, ret); mdelay(nmi_delay); @@ -300,12 +313,14 @@ static void rqsl_print_histograms(void) u64 norm_counts[RQSL_NR_HIST_BUCKETS]; u64 nmi_counts[RQSL_NR_HIST_BUCKETS]; u64 total_counts[RQSL_NR_HIST_BUCKETS]; + u64 norm_success, nmi_success, success_total; + u64 norm_failure, nmi_failure, failure_total; u64 norm_total = 0, nmi_total = 0, total = 0; bool has_slow = false; for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { - norm_counts[i] = atomic64_read(&hist->normal[i]); - nmi_counts[i] = atomic64_read(&hist->nmi[i]); + norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]); + nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]); total_counts[i] = norm_counts[i] + nmi_counts[i]; norm_total += norm_counts[i]; nmi_total += nmi_counts[i]; @@ -315,17 +330,33 @@ static void rqsl_print_histograms(void) has_slow = true; } + norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]); + nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]); + norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]); + nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]); + success_total = norm_success + nmi_success; + failure_total = norm_failure + nmi_failure; + if (!total) continue; if (!has_slow) { - pr_err(" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n", - cpu, total, norm_total, nmi_total, RQSL_SLOW_THRESHOLD_MS); + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | " + "success %llu (normal %llu, nmi %llu) | " + "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n", + cpu, total, norm_total, nmi_total, + success_total, norm_success, nmi_success, + failure_total, norm_failure, nmi_failure, + RQSL_SLOW_THRESHOLD_MS); continue; } - pr_err(" cpu%d: total %llu (normal %llu, nmi %llu)\n", - cpu, total, norm_total, nmi_total); + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | " + "success %llu (normal %llu, nmi %llu) | " + "failure %llu (normal %llu, nmi %llu)\n", + cpu, total, norm_total, nmi_total, + success_total, norm_success, nmi_success, + failure_total, norm_failure, nmi_failure); for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { unsigned int start_ms; |
