summaryrefslogtreecommitdiff
path: root/kernel/irq_work.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/irq_work.c')
-rw-r--r--kernel/irq_work.c266
1 files changed, 199 insertions, 67 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 6b7cdf17ccf8..73f7e1fd4ab4 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
*
@@ -17,35 +18,54 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
+#include <linux/smpboot.h>
#include <asm/processor.h>
+#include <linux/kasan.h>
+#include <trace/events/ipi.h>
static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(struct llist_head, lazy_list);
+static DEFINE_PER_CPU(struct task_struct *, irq_workd);
+
+static void wake_irq_workd(void)
+{
+ struct task_struct *tsk = __this_cpu_read(irq_workd);
+
+ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
+ wake_up_process(tsk);
+}
+
+#ifdef CONFIG_SMP
+static void irq_work_wake(struct irq_work *entry)
+{
+ wake_irq_workd();
+}
+
+static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
+ IRQ_WORK_INIT_HARD(irq_work_wake);
+#endif
+
+static int irq_workd_should_run(unsigned int cpu)
+{
+ return !llist_empty(this_cpu_ptr(&lazy_list));
+}
/*
* Claim the entry so that no one else will poke at it.
*/
static bool irq_work_claim(struct irq_work *work)
{
- unsigned long flags, oflags, nflags;
+ int oflags;
+ oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
/*
- * Start with our best wish as a premise but only trust any
- * flag value after cmpxchg() result.
+ * If the work is already pending, no need to raise the IPI.
+ * The pairing smp_mb() in irq_work_single() makes sure
+ * everything we did before is visible.
*/
- flags = work->flags & ~IRQ_WORK_PENDING;
- for (;;) {
- nflags = flags | IRQ_WORK_CLAIMED;
- oflags = cmpxchg(&work->flags, flags, nflags);
- if (oflags == flags)
- break;
- if (oflags & IRQ_WORK_PENDING)
- return false;
- flags = oflags;
- cpu_relax();
- }
-
+ if (oflags & IRQ_WORK_PENDING)
+ return false;
return true;
}
@@ -56,6 +76,58 @@ void __weak arch_irq_work_raise(void)
*/
}
+static __always_inline void irq_work_raise(struct irq_work *work)
+{
+ if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt())
+ trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func);
+
+ arch_irq_work_raise();
+}
+
+/* Enqueue on current CPU, work must already be claimed and preempt disabled */
+static void __irq_work_queue_local(struct irq_work *work)
+{
+ struct llist_head *list;
+ bool rt_lazy_work = false;
+ bool lazy_work = false;
+ int work_flags;
+
+ work_flags = atomic_read(&work->node.a_flags);
+ if (work_flags & IRQ_WORK_LAZY)
+ lazy_work = true;
+ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+ !(work_flags & IRQ_WORK_HARD_IRQ))
+ rt_lazy_work = true;
+
+ if (lazy_work || rt_lazy_work)
+ list = this_cpu_ptr(&lazy_list);
+ else
+ list = this_cpu_ptr(&raised_list);
+
+ if (!llist_add(&work->node.llist, list))
+ return;
+
+ /* If the work is "lazy", handle it from next tick if any */
+ if (!lazy_work || tick_nohz_tick_stopped())
+ irq_work_raise(work);
+}
+
+/* Enqueue the irq work @work on the current CPU */
+bool irq_work_queue(struct irq_work *work)
+{
+ /* Only queue if not already pending */
+ if (!irq_work_claim(work))
+ return false;
+
+ /* Queue the entry and raise the IPI if needed. */
+ preempt_disable();
+ __irq_work_queue_local(work);
+ preempt_enable();
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(irq_work_queue);
+
/*
* Enqueue the irq_work @work on @cpu unless it's already pending
* somewhere.
@@ -64,53 +136,50 @@ void __weak arch_irq_work_raise(void)
*/
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
+#ifndef CONFIG_SMP
+ return irq_work_queue(work);
+
+#else /* CONFIG_SMP: */
/* All work should have been flushed before going offline */
WARN_ON_ONCE(cpu_is_offline(cpu));
-#ifdef CONFIG_SMP
-
- /* Arch remote IPI send/receive backend aren't NMI safe */
- WARN_ON_ONCE(in_nmi());
-
/* Only queue if not already pending */
if (!irq_work_claim(work))
return false;
- if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
- arch_send_call_function_single_ipi(cpu);
+ kasan_record_aux_stack(work);
-#else /* #ifdef CONFIG_SMP */
- irq_work_queue(work);
-#endif /* #else #ifdef CONFIG_SMP */
+ preempt_disable();
+ if (cpu != smp_processor_id()) {
+ /* Arch remote IPI send/receive backend aren't NMI safe */
+ WARN_ON_ONCE(in_nmi());
- return true;
-}
+ /*
+ * On PREEMPT_RT the items which are not marked as
+ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
+ * item is used on the remote CPU to wake the thread.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
+ !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
-/* Enqueue the irq work @work on the current CPU */
-bool irq_work_queue(struct irq_work *work)
-{
- /* Only queue if not already pending */
- if (!irq_work_claim(work))
- return false;
+ if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
+ goto out;
- /* Queue the entry and raise the IPI if needed. */
- preempt_disable();
+ work = &per_cpu(irq_work_wakeup, cpu);
+ if (!irq_work_claim(work))
+ goto out;
+ }
- /* If the work is "lazy", handle it from next tick if any */
- if (work->flags & IRQ_WORK_LAZY) {
- if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) &&
- tick_nohz_tick_stopped())
- arch_irq_work_raise();
+ __smp_call_single_queue(cpu, &work->node.llist);
} else {
- if (llist_add(&work->llnode, this_cpu_ptr(&raised_list)))
- arch_irq_work_raise();
+ __irq_work_queue_local(work);
}
-
+out:
preempt_enable();
return true;
+#endif /* CONFIG_SMP */
}
-EXPORT_SYMBOL_GPL(irq_work_queue);
bool irq_work_needs_cpu(void)
{
@@ -129,36 +198,58 @@ bool irq_work_needs_cpu(void)
return true;
}
+void irq_work_single(void *arg)
+{
+ struct irq_work *work = arg;
+ int flags;
+
+ /*
+ * Clear the PENDING bit, after this point the @work can be re-used.
+ * The PENDING bit acts as a lock, and we own it, so we can clear it
+ * without atomic ops.
+ */
+ flags = atomic_read(&work->node.a_flags);
+ flags &= ~IRQ_WORK_PENDING;
+ atomic_set(&work->node.a_flags, flags);
+
+ /*
+ * See irq_work_claim().
+ */
+ smp_mb();
+
+ lockdep_irq_work_enter(flags);
+ work->func(work);
+ lockdep_irq_work_exit(flags);
+
+ /*
+ * Clear the BUSY bit, if set, and return to the free state if no-one
+ * else claimed it meanwhile.
+ */
+ (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
+
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+ !arch_irq_work_has_interrupt())
+ rcuwait_wake_up(&work->irqwait);
+}
+
static void irq_work_run_list(struct llist_head *list)
{
struct irq_work *work, *tmp;
struct llist_node *llnode;
- unsigned long flags;
- BUG_ON(!irqs_disabled());
+ /*
+ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
+ * in a per-CPU thread in preemptible context. Only the items which are
+ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
+ */
+ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
if (llist_empty(list))
return;
llnode = llist_del_all(list);
- llist_for_each_entry_safe(work, tmp, llnode, llnode) {
- /*
- * Clear the PENDING bit, after this point the @work
- * can be re-used.
- * Make it immediately visible so that other CPUs trying
- * to claim that work don't rely on us to handle their data
- * while we are in the middle of the func.
- */
- flags = work->flags & ~IRQ_WORK_PENDING;
- xchg(&work->flags, flags);
-
- work->func(work);
- /*
- * Clear the BUSY bit and return to the free state if
- * no-one else claimed it meanwhile.
- */
- (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
- }
+ llist_for_each_entry_safe(work, tmp, llnode, node.llist)
+ irq_work_single(work);
}
/*
@@ -168,7 +259,10 @@ static void irq_work_run_list(struct llist_head *list)
void irq_work_run(void)
{
irq_work_run_list(this_cpu_ptr(&raised_list));
- irq_work_run_list(this_cpu_ptr(&lazy_list));
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
+ else
+ wake_irq_workd();
}
EXPORT_SYMBOL_GPL(irq_work_run);
@@ -178,7 +272,11 @@ void irq_work_tick(void)
if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
irq_work_run_list(raised);
- irq_work_run_list(this_cpu_ptr(&lazy_list));
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
+ else
+ wake_irq_workd();
}
/*
@@ -188,8 +286,42 @@ void irq_work_tick(void)
void irq_work_sync(struct irq_work *work)
{
lockdep_assert_irqs_enabled();
+ might_sleep();
- while (work->flags & IRQ_WORK_BUSY)
+ if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+ !arch_irq_work_has_interrupt()) {
+ rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
+ TASK_UNINTERRUPTIBLE);
+ return;
+ }
+
+ while (irq_work_is_busy(work))
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
+
+static void run_irq_workd(unsigned int cpu)
+{
+ irq_work_run_list(this_cpu_ptr(&lazy_list));
+}
+
+static void irq_workd_setup(unsigned int cpu)
+{
+ sched_set_fifo_low(current);
+}
+
+static struct smp_hotplug_thread irqwork_threads = {
+ .store = &irq_workd,
+ .setup = irq_workd_setup,
+ .thread_should_run = irq_workd_should_run,
+ .thread_fn = run_irq_workd,
+ .thread_comm = "irq_work/%u",
+};
+
+static __init int irq_work_init_threads(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
+ return 0;
+}
+early_initcall(irq_work_init_threads);