Merge tag 'irq-core-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner: - Prevent a interrupt migration related live lock in handle_edge_irq() If the interrupt affinity is moved to a new target CPU and the interrupt is currently handled on the previous target CPU for edge type interrupts the handler might get stuck on the previous target for a long time, which causes both involved CPUs to waste cycles and eventually run into a soft-lockup situation. Solve this by checking whether the interrupt is redirected to a new target CPU and if the interrupt is handled on that new target CPU, busy wait for completion instead of masking it and sending the pending but which would cause the old CPU to re-run the handler and in the worst case repeating this excercise for a long time. This only works on architectures which use single CPU interrupt targets, but that's so far the only ones where this behaviour has been observed. - Add a kunit test for interrupt disable depth counts The nested interrupt disable depth has been an issue in the past especially vs. free_irq(), interrupt shutdown and CPU hotplug and their interactions. The test exercises the combinations of these scenarios and checks for correctness. * tag 'irq-core-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: genirq: Prevent migration live lock in handle_edge_irq() genirq: Split up irq_pm_check_wakeup() genirq: Move irq_wait_for_poll() to call site genirq: Remove pointless local variable genirq: Add kunit tests for depth counts
author: Linus Torvalds <torvalds@linux-foundation.org> 2025-07-29 12:55:12 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2025-07-29 12:55:12 -0700
commit: dba3ec9f2ad085f05528ccd36d6835b06b5370cd (patch)
tree: 05777ad148977140291c316c58ef9b6a9fa53159 /kernel/irq/chip.c
parent: 5623870d9b4f1b9bd4a8b75544f2f9ed2a49afff (diff)
parent: 8d39d6ec4db5da9899993092227584a97c203fd3 (diff)
1 files changed, 62 insertions, 10 deletions
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 2b274007e8ba..624106e886ad 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -457,22 +457,33 @@ void unmask_threaded_irq(struct irq_desc *desc)
 	unmask_irq(desc);
 }
 
-static bool irq_check_poll(struct irq_desc *desc)
-{
-	if (!(desc->istate & IRQS_POLL_INPROGRESS))
-		return false;
-	return irq_wait_for_poll(desc);
+/* Busy wait until INPROGRESS is cleared */
+static bool irq_wait_on_inprogress(struct irq_desc *desc)
+{
+	if (IS_ENABLED(CONFIG_SMP)) {
+		do {
+			raw_spin_unlock(&desc->lock);
+			while (irqd_irq_inprogress(&desc->irq_data))
+				cpu_relax();
+			raw_spin_lock(&desc->lock);
+		} while (irqd_irq_inprogress(&desc->irq_data));
+
+		/* Might have been disabled in meantime */
+		return !irqd_irq_disabled(&desc->irq_data) && desc->action;
+	}
+	return false;
 }
 
 static bool irq_can_handle_pm(struct irq_desc *desc)
 {
-	unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
+	struct irq_data *irqd = &desc->irq_data;
+	const struct cpumask *aff;
 
 	/*
 	 * If the interrupt is not in progress and is not an armed
 	 * wakeup interrupt, proceed.
 	 */
-	if (!irqd_has_set(&desc->irq_data, mask))
+	if (!irqd_has_set(irqd, IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED))
 		return true;
 
 	/*
@@ -480,13 +491,54 @@ static bool irq_can_handle_pm(struct irq_desc *desc)
 	 * and suspended, disable it and notify the pm core about the
 	 * event.
 	 */
-	if (irq_pm_check_wakeup(desc))
+	if (unlikely(irqd_has_set(irqd, IRQD_WAKEUP_ARMED))) {
+		irq_pm_handle_wakeup(desc);
+		return false;
+	}
+
+	/* Check whether the interrupt is polled on another CPU */
+	if (unlikely(desc->istate & IRQS_POLL_INPROGRESS)) {
+		if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
+			      "irq poll in progress on cpu %d for irq %d\n",
+			      smp_processor_id(), desc->irq_data.irq))
+			return false;
+		return irq_wait_on_inprogress(desc);
+	}
+
+	/* The below works only for single target interrupts */
+	if (!IS_ENABLED(CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK) ||
+	    !irqd_is_single_target(irqd) || desc->handle_irq != handle_edge_irq)
 		return false;
 
 	/*
-	 * Handle a potential concurrent poll on a different core.
+	 * If the interrupt affinity was moved to this CPU and the
+	 * interrupt is currently handled on the previous target CPU, then
+	 * busy wait for INPROGRESS to be cleared. Otherwise for edge type
+	 * interrupts the handler might get stuck on the previous target:
+	 *
+	 * CPU 0			CPU 1 (new target)
+	 * handle_edge_irq()
+	 * repeat:
+	 *	handle_event()		handle_edge_irq()
+	 *			        if (INPROGESS) {
+	 *				  set(PENDING);
+	 *				  mask();
+	 *				  return;
+	 *				}
+	 *	if (PENDING) {
+	 *	  clear(PENDING);
+	 *	  unmask();
+	 *	  goto repeat;
+	 *	}
+	 *
+	 * This happens when the device raises interrupts with a high rate
+	 * and always before handle_event() completes and the CPU0 handler
+	 * can clear INPROGRESS. This has been observed in virtual machines.
 	 */
-	return irq_check_poll(desc);
+	aff = irq_data_get_effective_affinity_mask(irqd);
+	if (cpumask_first(aff) != smp_processor_id())
+		return false;
+	return irq_wait_on_inprogress(desc);
 }
 
 static inline bool irq_can_handle_actions(struct irq_desc *desc)
author	Linus Torvalds <torvalds@linux-foundation.org>	2025-07-29 12:55:12 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2025-07-29 12:55:12 -0700
commit	dba3ec9f2ad085f05528ccd36d6835b06b5370cd (patch)
tree	05777ad148977140291c316c58ef9b6a9fa53159 /kernel/irq/chip.c
parent	5623870d9b4f1b9bd4a8b75544f2f9ed2a49afff (diff)
parent	8d39d6ec4db5da9899993092227584a97c203fd3 (diff)