summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2025-11-29 09:35:36 -0800
committerAlexei Starovoitov <ast@kernel.org>2025-11-29 09:35:36 -0800
commit34235a3544f20291819c20d1d6c4ba07784045a2 (patch)
tree145058be55b4ecf4916b0ac3df96b49e3d78d9c6 /tools
parentbd5bdd200c9e981cd5e2495966968cb26010573c (diff)
parent3448375e71a49cc29cc62cc941bea137d723956e (diff)
Merge branch 'limited-queueing-in-nmi-for-rqspinlock'
Kumar Kartikeya Dwivedi says: ==================== Limited queueing in NMI for rqspinlock Ritesh reported that he was frequently seeing timeouts in cases which should have been covered by the AA heuristics. This led to the discovery of multiple gaps in the current code that could lead to timeouts when AA heuristics could work to prevent them. More details and investigation is available in the original threads. [0][1] This set restores the ability for NMI waiters to queue in the slow path, and reduces the cases where they would attempt to trylock. However, such queueing must not happen when interrupting waiters which the NMI itself depends upon for forward progress; in those cases the trylock fallback remains, but with a single attempt to avoid aimless attempts to acquire the lock. It also closes a possible window in the lock fast path and the unlock path where NMIs landing between cmpxchg and entry creation, or entry deletion and unlock would miss the detection of an AA scenario and end up timing out. This virtually eliminates all the cases where existing heuristics can prevent timeouts and quickly recover from a deadlock. More details are available in the commit logs for each patch. [0]: https://lore.kernel.org/bpf/CAH6OuBTjG+N=+GGwcpOUbeDN563oz4iVcU3rbse68egp9wj9_A@mail.gmail.com [1]: https://lore.kernel.org/bpf/20251125203253.3287019-1-memxor@gmail.com ==================== Link: https://patch.msgid.link/20251128232802.1031906-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c55
1 files changed, 43 insertions, 12 deletions
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
index e8dd3fbc6ea5..7b4ae5e81d32 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -33,9 +33,16 @@ static const unsigned int rqsl_hist_ms[] = {
};
#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
+enum rqsl_context {
+ RQSL_CTX_NORMAL = 0,
+ RQSL_CTX_NMI,
+ RQSL_CTX_MAX,
+};
+
struct rqsl_cpu_hist {
- atomic64_t normal[RQSL_NR_HIST_BUCKETS];
- atomic64_t nmi[RQSL_NR_HIST_BUCKETS];
+ atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
+ atomic64_t success[RQSL_CTX_MAX];
+ atomic64_t failure[RQSL_CTX_MAX];
};
static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
@@ -117,14 +124,18 @@ static u32 rqsl_hist_bucket_idx(u32 delta_ms)
return RQSL_NR_HIST_BUCKETS - 1;
}
-static void rqsl_record_lock_time(u64 delta_ns, bool is_nmi)
+static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
{
struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
u32 bucket = rqsl_hist_bucket_idx(delta_ms);
- atomic64_t *buckets = is_nmi ? hist->nmi : hist->normal;
+ atomic64_t *buckets = hist->hist[ctx];
atomic64_inc(&buckets[bucket]);
+ if (!ret)
+ atomic64_inc(&hist->success[ctx]);
+ else
+ atomic64_inc(&hist->failure[ctx]);
}
static int rqspinlock_worker_fn(void *arg)
@@ -147,7 +158,8 @@ static int rqspinlock_worker_fn(void *arg)
}
start_ns = ktime_get_mono_fast_ns();
ret = raw_res_spin_lock_irqsave(worker_lock, flags);
- rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, false);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NORMAL, ret);
mdelay(normal_delay);
if (!ret)
raw_res_spin_unlock_irqrestore(worker_lock, flags);
@@ -190,7 +202,8 @@ static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
locks = rqsl_get_lock_pair(cpu);
start_ns = ktime_get_mono_fast_ns();
ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
- rqsl_record_lock_time(ktime_get_mono_fast_ns() - start_ns, true);
+ rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+ RQSL_CTX_NMI, ret);
mdelay(nmi_delay);
@@ -300,12 +313,14 @@ static void rqsl_print_histograms(void)
u64 norm_counts[RQSL_NR_HIST_BUCKETS];
u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
u64 total_counts[RQSL_NR_HIST_BUCKETS];
+ u64 norm_success, nmi_success, success_total;
+ u64 norm_failure, nmi_failure, failure_total;
u64 norm_total = 0, nmi_total = 0, total = 0;
bool has_slow = false;
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
- norm_counts[i] = atomic64_read(&hist->normal[i]);
- nmi_counts[i] = atomic64_read(&hist->nmi[i]);
+ norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
+ nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
total_counts[i] = norm_counts[i] + nmi_counts[i];
norm_total += norm_counts[i];
nmi_total += nmi_counts[i];
@@ -315,17 +330,33 @@ static void rqsl_print_histograms(void)
has_slow = true;
}
+ norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
+ nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
+ norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
+ nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
+ success_total = norm_success + nmi_success;
+ failure_total = norm_failure + nmi_failure;
+
if (!total)
continue;
if (!has_slow) {
- pr_err(" cpu%d: total %llu (normal %llu, nmi %llu), all within 0-%ums\n",
- cpu, total, norm_total, nmi_total, RQSL_SLOW_THRESHOLD_MS);
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure,
+ RQSL_SLOW_THRESHOLD_MS);
continue;
}
- pr_err(" cpu%d: total %llu (normal %llu, nmi %llu)\n",
- cpu, total, norm_total, nmi_total);
+ pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+ "success %llu (normal %llu, nmi %llu) | "
+ "failure %llu (normal %llu, nmi %llu)\n",
+ cpu, total, norm_total, nmi_total,
+ success_total, norm_success, nmi_success,
+ failure_total, norm_failure, nmi_failure);
for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
unsigned int start_ms;