diff options
Diffstat (limited to 'kernel/irq/manage.c')
| -rw-r--r-- | kernel/irq/manage.c | 2447 |
1 files changed, 1781 insertions, 666 deletions
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 514bcfd855a8..0bb29316b436 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * linux/kernel/irq/manage.c - * * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar * Copyright (C) 2005-2006 Thomas Gleixner * @@ -14,45 +13,36 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/rt.h> +#include <linux/sched/task.h> +#include <linux/sched/isolation.h> +#include <uapi/linux/sched/types.h> #include <linux/task_work.h> #include "internals.h" -#ifdef CONFIG_IRQ_FORCED_THREADING -__read_mostly bool force_irqthreads; +#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); static int __init setup_forced_irqthreads(char *arg) { - force_irqthreads = true; + static_branch_enable(&force_irqthreads_key); return 0; } early_param("threadirqs", setup_forced_irqthreads); #endif -/** - * synchronize_irq - wait for pending IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for - * - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void synchronize_irq(unsigned int irq) +static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); + +static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_data *irqd = irq_desc_get_irq_data(desc); bool inprogress; - if (!desc) - return; - do { - unsigned long flags; - /* * Wait until we're out of the critical section. This might * give the wrong answer due to the lack of memory barriers. @@ -61,100 +51,238 @@ void synchronize_irq(unsigned int irq) cpu_relax(); /* Ok, that indicated we're done: double-check carefully. */ - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); inprogress = irqd_irq_inprogress(&desc->irq_data); - raw_spin_unlock_irqrestore(&desc->lock, flags); + /* + * If requested and supported, check at the chip whether it + * is in flight at the hardware level, i.e. already pending + * in a CPU and waiting for service and acknowledge. + */ + if (!inprogress && sync_chip) { + /* + * Ignore the return code. inprogress is only updated + * when the chip supports it. + */ + __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, + &inprogress); + } /* Oops, that failed? */ } while (inprogress); +} + +/** + * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. It does not take + * associated threaded handlers into account. + * + * Do not use this for shutdown scenarios where you must be sure that all + * parts (hardirq and threaded handler) have completed. + * + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. + * + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when called + * with interrupts disabled and the target CPU of the interrupt is the + * current CPU. + */ +bool synchronize_hardirq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + __synchronize_hardirq(desc, false); + return !atomic_read(&desc->threads_active); + } + return true; +} +EXPORT_SYMBOL(synchronize_hardirq); + +static void __synchronize_irq(struct irq_desc *desc) +{ + __synchronize_hardirq(desc, true); /* - * We made sure that no hardirq handler is running. Now verify - * that no threaded handlers are active. + * We made sure that no hardirq handler is running. Now verify that no + * threaded handlers are active. */ wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); } + +/** + * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. + */ +void synchronize_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) + __synchronize_irq(desc); +} EXPORT_SYMBOL(synchronize_irq); #ifdef CONFIG_SMP cpumask_var_t irq_default_affinity; +static bool __irq_can_set_affinity(struct irq_desc *desc) +{ + if (!desc || !irqd_can_balance(&desc->irq_data) || + !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity) + return false; + return true; +} + /** - * irq_can_set_affinity - Check if the affinity of a given irq can be set - * @irq: Interrupt to check + * irq_can_set_affinity - Check if the affinity of a given irq can be set + * @irq: Interrupt to check * */ int irq_can_set_affinity(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); + return __irq_can_set_affinity(irq_to_desc(irq)); +} - if (!desc || !irqd_can_balance(&desc->irq_data) || - !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity) - return 0; +/** + * irq_can_set_affinity_usr - Check if affinity of a irq can be set from user space + * @irq: Interrupt to check + * + * Like irq_can_set_affinity() above, but additionally checks for the + * AFFINITY_MANAGED flag. + */ +bool irq_can_set_affinity_usr(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); - return 1; + return __irq_can_set_affinity(desc) && + !irqd_affinity_is_managed(&desc->irq_data); } /** - * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affitnity changed + * irq_set_thread_affinity - Notify irq threads to adjust affinity + * @desc: irq descriptor which has affinity changed * - * We just set IRQTF_AFFINITY and delegate the affinity setting - * to the interrupt thread itself. We can not call - * set_cpus_allowed_ptr() here as we hold desc->lock and this - * code can be called from hard interrupt context. + * Just set IRQTF_AFFINITY and delegate the affinity setting to the + * interrupt thread itself. We can not call set_cpus_allowed_ptr() here as + * we hold desc->lock and this code can be called from hard interrupt + * context. */ -void irq_set_thread_affinity(struct irq_desc *desc) +static void irq_set_thread_affinity(struct irq_desc *desc) { - struct irqaction *action = desc->action; + struct irqaction *action; - while (action) { - if (action->thread) + for_each_action_of_desc(desc, action) { + if (action->thread) { set_bit(IRQTF_AFFINITY, &action->thread_flags); - action = action->next; + wake_up_process(action->thread); + } + if (action->secondary && action->secondary->thread) { + set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags); + wake_up_process(action->secondary->thread); + } } } -#ifdef CONFIG_GENERIC_PENDING_IRQ -static inline bool irq_can_move_pcntxt(struct irq_data *data) -{ - return irqd_can_move_in_process_context(data); -} -static inline bool irq_move_pending(struct irq_data *data) -{ - return irqd_is_setaffinity_pending(data); -} -static inline void -irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) -{ - cpumask_copy(desc->pending_mask, mask); -} -static inline void -irq_get_pending(struct cpumask *mask, struct irq_desc *desc) +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static void irq_validate_effective_affinity(struct irq_data *data) { - cpumask_copy(mask, desc->pending_mask); + const struct cpumask *m = irq_data_get_effective_affinity_mask(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (!cpumask_empty(m)) + return; + pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", + chip->name, data->irq); } #else -static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; } -static inline bool irq_move_pending(struct irq_data *data) { return false; } -static inline void -irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { } -static inline void -irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } +static inline void irq_validate_effective_affinity(struct irq_data *data) { } #endif +static DEFINE_PER_CPU(struct cpumask, __tmp_mask); + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { + struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; - ret = chip->irq_set_affinity(data, mask, false); + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + + /* + * If this is a managed interrupt and housekeeping is enabled on + * it check whether the requested affinity mask intersects with + * a housekeeping CPU. If so, then remove the isolated CPUs from + * the mask and just keep the housekeeping CPU(s). This prevents + * the affinity setter from routing the interrupt to an isolated + * CPU to avoid that I/O submitted from a housekeeping CPU causes + * interrupts on an isolated one. + * + * If the masks do not intersect or include online CPU(s) then + * keep the requested mask. The isolated target CPUs are only + * receiving interrupts when the I/O operation was submitted + * directly from them. + * + * If all housekeeping CPUs in the affinity mask are offline, the + * interrupt will be migrated by the CPU hotplug code once a + * housekeeping CPU which belongs to the affinity mask comes + * online. + */ + if (irqd_affinity_is_managed(data) && + housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { + const struct cpumask *hk_mask; + + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); + + cpumask_and(tmp_mask, mask, hk_mask); + if (!cpumask_intersects(tmp_mask, cpu_online_mask)) + prog_mask = mask; + else + prog_mask = tmp_mask; + } else { + prog_mask = mask; + } + + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ + cpumask_and(tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(tmp_mask)) + ret = chip->irq_set_affinity(data, tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); + else + ret = -EINVAL; + switch (ret) { case IRQ_SET_MASK_OK: - cpumask_copy(data->affinity, mask); + case IRQ_SET_MASK_OK_DONE: + cpumask_copy(desc->irq_common_data.affinity, mask); + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: + irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); ret = 0; } @@ -162,7 +290,65 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, return ret; } -int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) +#ifdef CONFIG_GENERIC_PENDING_IRQ +static inline int irq_set_affinity_pending(struct irq_data *data, + const struct cpumask *dest) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + irqd_set_move_pending(data); + irq_copy_pending(desc, dest); + return 0; +} +#else +static inline int irq_set_affinity_pending(struct irq_data *data, + const struct cpumask *dest) +{ + return -EBUSY; +} +#endif + +static int irq_try_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force) +{ + int ret = irq_do_set_affinity(data, dest, force); + + /* + * In case that the underlying vector management is busy and the + * architecture supports the generic pending mechanism then utilize + * this to avoid returning an error to user space. + */ + if (ret == -EBUSY && !force) + ret = irq_set_affinity_pending(data, dest); + return ret; +} + +static bool irq_set_affinity_deactivated(struct irq_data *data, + const struct cpumask *mask) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * + * If the interrupt is not yet activated, just store the affinity + * mask and do not call the chip driver at all. On activation the + * driver has to make sure anyway that the interrupt is in a + * usable state so startup works. + */ + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) + return false; + + cpumask_copy(desc->irq_common_data.affinity, mask); + irq_data_update_effective_affinity(data, mask); + irqd_set(data, IRQD_AFFINITY_SET); + return true; +} + +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) { struct irq_chip *chip = irq_data_get_irq_chip(data); struct irq_desc *desc = irq_data_to_desc(data); @@ -171,8 +357,11 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) if (!chip || !chip->irq_set_affinity) return -EINVAL; - if (irq_can_move_pcntxt(data)) { - ret = irq_do_set_affinity(data, mask, false); + if (irq_set_affinity_deactivated(data, mask)) + return 0; + + if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { + ret = irq_try_set_affinity(data, mask, force); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -180,7 +369,11 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); - schedule_work(&desc->affinity_notify->work); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, + desc->affinity_notify->release); + } } irqd_set(data, IRQD_AFFINITY_SET); @@ -188,56 +381,135 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) } /** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @mask: cpumask + * irq_update_affinity_desc - Update affinity management for an interrupt + * @irq: The interrupt number to update + * @affinity: Pointer to the affinity descriptor + * + * This interface can be used to configure the affinity management of + * interrupts which have been allocated already. * + * There are certain limitations on when it may be used - attempts to use it + * for when the kernel is configured for generic IRQ reservation mode (in + * config GENERIC_IRQ_RESERVATION_MODE) will fail, as it may conflict with + * managed/non-managed interrupt accounting. In addition, attempts to use it on + * an interrupt which is already started or which has already been configured + * as managed will also fail, as these mean invalid init state or double init. */ -int irq_set_affinity(unsigned int irq, const struct cpumask *mask) +int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity) +{ + /* + * Supporting this with the reservation scheme used by x86 needs + * some more thought. Fail it for now. + */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) + return -EOPNOTSUPP; + + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + bool activated; + + /* Requires the interrupt to be shut down */ + if (irqd_is_started(&desc->irq_data)) + return -EBUSY; + + /* Interrupts which are already managed cannot be modified */ + if (irqd_affinity_is_managed(&desc->irq_data)) + return -EBUSY; + /* + * Deactivate the interrupt. That's required to undo + * anything an earlier activation has established. + */ + activated = irqd_is_activated(&desc->irq_data); + if (activated) + irq_domain_deactivate_irq(&desc->irq_data); + + if (affinity->is_managed) { + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); + } + + cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); + + /* Restore the activation state */ + if (activated) + irq_domain_activate_irq(&desc->irq_data, false); + return 0; + } + return -EINVAL; +} + +static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, + bool force) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; if (!desc) return -EINVAL; - raw_spin_lock_irqsave(&desc->lock, flags); - ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; + guard(raw_spinlock_irqsave)(&desc->lock); + return irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); } -int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + return __irq_set_affinity(irq, cpumask, false); +} +EXPORT_SYMBOL_GPL(irq_set_affinity); - if (!desc) - return -EINVAL; - desc->affinity_hint = m; - irq_put_desc_unlock(desc, flags); - return 0; +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} +EXPORT_SYMBOL_GPL(irq_force_affinity); + +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, bool setaffinity) +{ + int ret = -EINVAL; + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc->affinity_hint = m; + ret = 0; + } + + if (!ret && m && setaffinity) + __irq_set_affinity(irq, m, false); + return ret; } -EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); + struct irq_affinity_notify *notify = container_of(work, struct irq_affinity_notify, work); struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; - unsigned long flags; if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL)) goto out; - raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_move_pending(&desc->irq_data)) - irq_get_pending(cpumask, desc); - else - cpumask_copy(cpumask, desc->irq_data.affinity); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + if (irq_move_pending(&desc->irq_data)) + irq_get_pending(cpumask, desc); + else + cpumask_copy(cpumask, desc->irq_common_data.affinity); + } notify->notify(notify, cpumask); @@ -247,27 +519,25 @@ out: } /** - * irq_set_affinity_notifier - control notification of IRQ affinity changes - * @irq: Interrupt for which to enable/disable notification - * @notify: Context for notification, or %NULL to disable - * notification. Function pointers must be initialised; - * the other fields will be initialised by this function. - * - * Must be called in process context. Notification may only be enabled - * after the IRQ is allocated and must be disabled before the IRQ is - * freed using free_irq(). + * irq_set_affinity_notifier - control notification of IRQ affinity changes + * @irq: Interrupt for which to enable/disable notification + * @notify: Context for notification, or %NULL to disable + * notification. Function pointers must be initialised; + * the other fields will be initialised by this function. + * + * Must be called in process context. Notification may only be enabled + * after the IRQ is allocated and must be disabled before the IRQ is freed + * using free_irq(). */ -int -irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) +int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { struct irq_desc *desc = irq_to_desc(irq); struct irq_affinity_notify *old_notify; - unsigned long flags; /* The release function is promised process context */ might_sleep(); - if (!desc) + if (!desc || irq_is_nmi(desc)) return -EINVAL; /* Complete initialisation of *notify */ @@ -277,13 +547,18 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) INIT_WORK(¬ify->work, irq_affinity_notify); } - raw_spin_lock_irqsave(&desc->lock, flags); - old_notify = desc->affinity_notify; - desc->affinity_notify = notify; - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irq, &desc->lock) { + old_notify = desc->affinity_notify; + desc->affinity_notify = notify; + } - if (old_notify) + if (old_notify) { + if (cancel_work_sync(&old_notify->work)) { + /* Pending work had a ref, put that one too */ + kref_put(&old_notify->kref, old_notify->release); + } kref_put(&old_notify->kref, old_notify->release); + } return 0; } @@ -293,104 +568,114 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); /* * Generic version of the affinity autoselector. */ -static int -setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) +int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; - int node = desc->irq_data.node; + int node = irq_desc_get_node(desc); + + static DEFINE_RAW_SPINLOCK(mask_lock); + static struct cpumask mask; /* Excludes PER_CPU and NO_BALANCE interrupts */ - if (!irq_can_set_affinity(irq)) + if (!__irq_can_set_affinity(desc)) return 0; + guard(raw_spinlock)(&mask_lock); /* - * Preserve an userspace affinity setup, but make sure that - * one of the targets is online. + * Preserve the managed affinity setting and a userspace affinity + * setup, but make sure that one of the targets is online. */ - if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { - if (cpumask_intersects(desc->irq_data.affinity, + if (irqd_affinity_is_managed(&desc->irq_data) || + irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { + if (cpumask_intersects(desc->irq_common_data.affinity, cpu_online_mask)) - set = desc->irq_data.affinity; + set = desc->irq_common_data.affinity; else irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); } - cpumask_and(mask, cpu_online_mask, set); + cpumask_and(&mask, cpu_online_mask, set); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpu_online_mask); + if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); /* make sure at least one of the cpus in nodemask is online */ - if (cpumask_intersects(mask, nodemask)) - cpumask_and(mask, mask, nodemask); + if (cpumask_intersects(&mask, nodemask)) + cpumask_and(&mask, &mask, nodemask); } - irq_do_set_affinity(&desc->irq_data, mask, false); - return 0; + return irq_do_set_affinity(&desc->irq_data, &mask, false); } #else -static inline int -setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask) +/* Wrapper for ALPHA specific affinity selector magic */ +int irq_setup_affinity(struct irq_desc *desc) { - return irq_select_affinity(irq); + return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when affinity is set via /proc/irq + +/** + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt + * @irq: interrupt number to set affinity + * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU + * specific data for percpu_devid interrupts + * + * This function uses the vCPU specific data to set the vCPU affinity for + * an irq. The vCPU specific data is passed from outside, such as KVM. One + * example code path is as below: KVM -> IOMMU -> irq_set_vcpu_affinity(). */ -int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask) +int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) { - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; + scoped_irqdesc_get_and_lock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + struct irq_data *data; + struct irq_chip *chip; - raw_spin_lock_irqsave(&desc->lock, flags); - ret = setup_affinity(irq, desc, mask); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} + data = irq_desc_get_irq_data(desc); + do { + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + break; -#else -static inline int -setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) -{ - return 0; -} -#endif + data = irqd_get_parent_data(data); + } while (data); -void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) -{ - if (suspend) { - if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)) - return; - desc->istate |= IRQS_SUSPENDED; + if (!data) + return -ENOSYS; + return chip->irq_set_vcpu_affinity(data, vcpu_info); } + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); +void __disable_irq(struct irq_desc *desc) +{ if (!desc->depth++) irq_disable(desc); } static int __disable_irq_nosync(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return -EINVAL; - __disable_irq(desc, irq, false); - irq_put_desc_busunlock(desc, flags); - return 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + __disable_irq(scoped_irqdesc); + return 0; + } + return -EINVAL; } /** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. * - * This function may be called from IRQ context. + * This function may be called from IRQ context. */ void disable_irq_nosync(unsigned int irq) { @@ -399,51 +684,93 @@ void disable_irq_nosync(unsigned int irq) EXPORT_SYMBOL(disable_irq_nosync); /** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. + * Disable the selected interrupt line. Enables and Disables are nested. + * + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when an + * interrupt thread is associated to @irq. * - * This function may be called - with care - from IRQ context. */ void disable_irq(unsigned int irq) { + might_sleep(); if (!__disable_irq_nosync(irq)) synchronize_irq(irq); } EXPORT_SYMBOL(disable_irq); -void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) +/** + * disable_hardirq - disables an irq and waits for hardirq completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are nested. + * + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the hard IRQ handler may need you will deadlock. + * + * When used to optimistically disable an interrupt from atomic context the + * return value must be checked. + * + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. + */ +bool disable_hardirq(unsigned int irq) { - if (resume) { - if (!(desc->istate & IRQS_SUSPENDED)) { - if (!desc->action) - return; - if (!(desc->action->flags & IRQF_FORCE_RESUME)) - return; - /* Pretend that it got disabled ! */ - desc->depth++; - } - desc->istate &= ~IRQS_SUSPENDED; - } + if (!__disable_irq_nosync(irq)) + return synchronize_hardirq(irq); + return false; +} +EXPORT_SYMBOL_GPL(disable_hardirq); + +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are nested. + * + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} +void __enable_irq(struct irq_desc *desc) +{ switch (desc->depth) { case 0: err_out: - WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); + WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", + irq_desc_get_irq(desc)); break; case 1: { if (desc->istate & IRQS_SUSPENDED) goto err_out; /* Prevent probing on this irq: */ irq_settings_set_noprobe(desc); - irq_enable(desc); - check_irq_resend(desc, irq); - /* fall-through */ + /* + * Call irq_startup() not irq_enable() here because the + * interrupt might be marked NOAUTOEN so irq_startup() + * needs to be invoked when it gets enabled the first time. + * This is also required when __enable_irq() is invoked for + * a managed and shutdown interrupt from the S3 resume + * path. + * + * If it was already started up, then irq_startup() will + * invoke irq_enable() under the hood. + */ + irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); + break; } default: desc->depth--; @@ -451,33 +778,40 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) } /** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. + * Undoes the effect of one call to disable_irq(). If this matches the + * last disable, processing of interrupts on this IRQ line is re-enabled. * - * This function may be called from IRQ context only when - * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! + * This function may be called from IRQ context only when + * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! */ void enable_irq(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return; - if (WARN(!desc->irq_data.chip, - KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) - goto out; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - __enable_irq(desc, irq, false); -out: - irq_put_desc_busunlock(desc, flags); + if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) + return; + __enable_irq(desc); + } } EXPORT_SYMBOL(enable_irq); +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this matches the last + * disable, processing of interrupts on this IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + static int set_irq_wake_real(unsigned int irq, unsigned int on) { struct irq_desc *desc = irq_to_desc(irq); @@ -493,50 +827,59 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) } /** - * irq_set_irq_wake - control irq power management wakeup - * @irq: interrupt to control - * @on: enable/disable power management wakeup + * irq_set_irq_wake - control irq power management wakeup + * @irq: interrupt to control + * @on: enable/disable power management wakeup + * + * Enable/disable power management wakeup mode, which is disabled by + * default. Enables and disables must match, just as they match for + * non-wakeup mode support. * - * Enable/disable power management wakeup mode, which is - * disabled by default. Enables and disables must match, - * just as they match for non-wakeup mode support. + * Wakeup mode lets this IRQ wake the system from sleep states like + * "suspend to RAM". * - * Wakeup mode lets this IRQ wake the system from sleep - * states like "suspend to RAM". + * Note: irq enable/disable state is completely orthogonal to the + * enable/disable state of irq wake. An irq can be disabled with + * disable_irq() and still wake the system as long as the irq has wake + * enabled. If this does not hold, then the underlying irq chip and the + * related driver need to be investigated. */ int irq_set_irq_wake(unsigned int irq, unsigned int on) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - int ret = 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; + int ret = 0; - if (!desc) - return -EINVAL; + /* Don't use NMIs as wake up interrupts please */ + if (irq_is_nmi(desc)) + return -EINVAL; - /* wakeup-capable irqs can be shared between drivers that - * don't need to have the same sleep mode behaviors. - */ - if (on) { - if (desc->wake_depth++ == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 0; - else - irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); - } - } else { - if (desc->wake_depth == 0) { - WARN(1, "Unbalanced IRQ %d wake disable\n", irq); - } else if (--desc->wake_depth == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 1; - else - irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + /* + * wakeup-capable irqs can be shared between drivers that + * don't need to have the same sleep mode behaviors. + */ + if (on) { + if (desc->wake_depth++ == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 0; + else + irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); + } + } else { + if (desc->wake_depth == 0) { + WARN(1, "Unbalanced IRQ %d wake disable\n", irq); + } else if (--desc->wake_depth == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 1; + else + irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + } } + return ret; } - irq_put_desc_busunlock(desc, flags); - return ret; + return -EINVAL; } EXPORT_SYMBOL(irq_set_irq_wake); @@ -545,26 +888,20 @@ EXPORT_SYMBOL(irq_set_irq_wake); * particular irq has been exclusively allocated or is available * for driver use. */ -int can_request_irq(unsigned int irq, unsigned long irqflags) +bool can_request_irq(unsigned int irq, unsigned long irqflags) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - int canrequest = 0; - - if (!desc) - return 0; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (irq_settings_can_request(desc)) { - if (!desc->action || - irqflags & desc->action->flags & IRQF_SHARED) - canrequest = 1; + if (irq_settings_can_request(desc)) { + if (!desc->action || irqflags & desc->action->flags & IRQF_SHARED) + return true; + } } - irq_put_desc_unlock(desc, flags); - return canrequest; + return false; } -int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, - unsigned long flags) +int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) { struct irq_chip *chip = desc->irq_data.chip; int ret, unmask = 0; @@ -574,13 +911,12 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, * IRQF_TRIGGER_* but the PIC does not support multiple * flow-types? */ - pr_debug("No set_type function for IRQ %d (%s)\n", irq, + pr_debug("No set_type function for IRQ %d (%s)\n", + irq_desc_get_irq(desc), chip ? (chip->name ? : "unknown") : "unknown"); return 0; } - flags &= IRQ_TYPE_SENSE_MASK; - if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { if (!irqd_irq_masked(&desc->irq_data)) mask_irq(desc); @@ -588,13 +924,16 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unmask = 1; } - /* caller masked out all except trigger mode flags */ + /* Mask all flags except trigger mode */ + flags &= IRQ_TYPE_SENSE_MASK; ret = chip->irq_set_type(&desc->irq_data, flags); switch (ret) { case IRQ_SET_MASK_OK: + case IRQ_SET_MASK_OK_DONE: irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); irqd_set(&desc->irq_data, flags); + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: flags = irqd_get_trigger_type(&desc->irq_data); @@ -609,8 +948,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, ret = 0; break; default: - pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", - flags, irq, chip->irq_set_type); + pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n", + flags, irq_desc_get_irq(desc), chip->irq_set_type); } if (unmask) unmask_irq(desc); @@ -620,17 +959,13 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, #ifdef CONFIG_HARDIRQS_SW_RESEND int irq_set_parent(int irq, int parent_irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return -EINVAL; - - desc->parent_irq = parent_irq; - - irq_put_desc_unlock(desc, flags); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->parent_irq = parent_irq; + return 0; + } + return -EINVAL; } +EXPORT_SYMBOL_GPL(irq_set_parent); #endif /* @@ -653,11 +988,65 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) return IRQ_NONE; } -static int irq_wait_for_interrupt(struct irqaction *action) +static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) +{ + WARN(1, "Secondary action handler called for irq %d\n", irq); + return IRQ_NONE; +} + +#ifdef CONFIG_SMP +/* + * Check whether we need to change the affinity of the interrupt thread. + */ +static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { - set_current_state(TASK_INTERRUPTIBLE); + cpumask_var_t mask; - while (!kthread_should_stop()) { + if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) + return; + + __set_current_state(TASK_RUNNING); + + /* + * In case we are out of memory we set IRQTF_AFFINITY again and + * try again next time + */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + set_bit(IRQTF_AFFINITY, &action->thread_flags); + return; + } + + scoped_guard(raw_spinlock_irq, &desc->lock) { + const struct cpumask *m; + + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); + } + + set_cpus_allowed_ptr(current, mask); + free_cpumask_var(mask); +} +#else +static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } +#endif + +static int irq_wait_for_interrupt(struct irq_desc *desc, + struct irqaction *action) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + irq_thread_check_affinity(desc, action); + + if (kthread_should_stop()) { + /* may need to run one last time */ + if (test_and_clear_bit(IRQTF_RUNTHREAD, + &action->thread_flags)) { + __set_current_state(TASK_RUNNING); + return 0; + } + __set_current_state(TASK_RUNNING); + return -1; + } if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) { @@ -665,10 +1054,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) return 0; } schedule(); - set_current_state(TASK_INTERRUPTIBLE); } - __set_current_state(TASK_RUNNING); - return -1; } /* @@ -679,7 +1065,8 @@ static int irq_wait_for_interrupt(struct irqaction *action) static void irq_finalize_oneshot(struct irq_desc *desc, struct irqaction *action) { - if (!(desc->istate & IRQS_ONESHOT)) + if (!(desc->istate & IRQS_ONESHOT) || + action->handler == irq_forced_secondary_handler) return; again: chip_bus_lock(desc); @@ -695,7 +1082,7 @@ again: * to IRQS_INPROGRESS and the irq line is masked forever. * * This also serializes the state of shared oneshot handlers - * versus "desc->threads_onehsot |= action->thread_mask;" in + * versus "desc->threads_oneshot |= action->thread_mask;" in * irq_wake_thread(). See the comment there which explains the * serialization. */ @@ -718,92 +1105,52 @@ again: if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && irqd_irq_masked(&desc->irq_data)) - unmask_irq(desc); + unmask_threaded_irq(desc); out_unlock: raw_spin_unlock_irq(&desc->lock); chip_bus_sync_unlock(desc); } -#ifdef CONFIG_SMP /* - * Check whether we need to chasnge the affinity of the interrupt thread. + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. */ -static void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action) { - cpumask_var_t mask; - bool valid = true; + irqreturn_t ret = action->thread_fn(action->irq, action->dev_id); - if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) - return; + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); - /* - * In case we are out of memory we set IRQTF_AFFINITY again and - * try again next time - */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - set_bit(IRQTF_AFFINITY, &action->thread_flags); - return; - } - - raw_spin_lock_irq(&desc->lock); - /* - * This code is triggered unconditionally. Check the affinity - * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. - */ - if (desc->irq_data.affinity) - cpumask_copy(mask, desc->irq_data.affinity); - else - valid = false; - raw_spin_unlock_irq(&desc->lock); - - if (valid) - set_cpus_allowed_ptr(current, mask); - free_cpumask_var(mask); + irq_finalize_oneshot(desc, action); + return ret; } -#else -static inline void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } -#endif /* - * Interrupts which are not explicitely requested as threaded + * Interrupts which are not explicitly requested as threaded * interrupts rely on the implicit bh/preempt disable of the hard irq * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static irqreturn_t -irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { irqreturn_t ret; local_bh_disable(); - ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); + ret = irq_thread_fn(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } -/* - * Interrupts explicitely requested as threaded interupts want to be - * preemtible - many of them need to sleep and wait for slow busses to - * complete. - */ -static irqreturn_t irq_thread_fn(struct irq_desc *desc, - struct irqaction *action) +void wake_threads_waitq(struct irq_desc *desc) { - irqreturn_t ret; - - ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action); - return ret; -} - -static void wake_threads_waitq(struct irq_desc *desc) -{ - if (atomic_dec_and_test(&desc->threads_active) && - waitqueue_active(&desc->wait_for_threads)) + if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); } @@ -834,6 +1181,42 @@ static void irq_thread_dtor(struct callback_head *unused) irq_finalize_oneshot(desc, action); } +static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) +{ + struct irqaction *secondary = action->secondary; + + if (WARN_ON_ONCE(!secondary)) + return; + + guard(raw_spinlock_irq)(&desc->lock); + __irq_wake_thread(desc, secondary); +} + +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + /* * Interrupt handler thread */ @@ -845,25 +1228,28 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); - if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, - &action->thread_flags)) + irq_thread_set_ready(desc, action); + + if (action->handler == irq_forced_secondary_handler) + sched_set_fifo_secondary(current); + else + sched_set_fifo(current); + + if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) handler_fn = irq_forced_thread_fn; else handler_fn = irq_thread_fn; init_task_work(&on_exit_work, irq_thread_dtor); - task_work_add(current, &on_exit_work, false); - - irq_thread_check_affinity(desc, action); + task_work_add(current, &on_exit_work, TWA_NONE); - while (!irq_wait_for_interrupt(action)) { + while (!irq_wait_for_interrupt(desc, action)) { irqreturn_t action_ret; - irq_thread_check_affinity(desc, action); - action_ret = handler_fn(desc, action); - if (!noirqdebug) - note_interrupt(action->irq, desc, action_ret); + if (action_ret == IRQ_WAKE_THREAD) + irq_wake_secondary(desc, action); wake_threads_waitq(desc); } @@ -871,35 +1257,194 @@ static int irq_thread(void *data) /* * This is the regular exit path. __free_irq() is stopping the * thread via kthread_stop() after calling - * synchronize_irq(). So neither IRQTF_RUNTHREAD nor the - * oneshot mask bit can be set. We cannot verify that as we - * cannot touch the oneshot mask at this point anymore as - * __setup_irq() might have given out currents thread_mask - * again. + * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the + * oneshot mask bit can be set. */ - task_work_cancel(current, irq_thread_dtor); + task_work_cancel_func(current, irq_thread_dtor); return 0; } -static void irq_setup_forced_threading(struct irqaction *new) +/** + * irq_wake_thread - wake the irq thread for the action identified by dev_id + * @irq: Interrupt line + * @dev_id: Device identity for which the thread should be woken + */ +void irq_wake_thread(unsigned int irq, void *dev_id) { - if (!force_irqthreads) + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + + if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) return; + + guard(raw_spinlock_irqsave)(&desc->lock); + for_each_action_of_desc(desc, action) { + if (action->dev_id == dev_id) { + if (action->thread) + __irq_wake_thread(desc, action); + break; + } + } +} +EXPORT_SYMBOL_GPL(irq_wake_thread); + +static int irq_setup_forced_threading(struct irqaction *new) +{ + if (!force_irqthreads()) + return 0; if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) - return; + return 0; + + /* + * No further action required for interrupts which are requested as + * threaded interrupts already + */ + if (new->handler == irq_default_primary_handler) + return 0; new->flags |= IRQF_ONESHOT; - if (!new->thread_fn) { - set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); - new->thread_fn = new->handler; - new->handler = irq_default_primary_handler; + /* + * Handle the case where we have a real primary handler and a + * thread handler. We force thread them as well by creating a + * secondary action. + */ + if (new->handler && new->thread_fn) { + /* Allocate the secondary action */ + new->secondary = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!new->secondary) + return -ENOMEM; + new->secondary->handler = irq_forced_secondary_handler; + new->secondary->thread_fn = new->thread_fn; + new->secondary->dev_id = new->dev_id; + new->secondary->irq = new->irq; + new->secondary->name = new->name; } + /* Deal with the primary handler */ + set_bit(IRQTF_FORCED_THREAD, &new->thread_flags); + new->thread_fn = new->handler; + new->handler = irq_default_primary_handler; + return 0; +} + +static int irq_request_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + return c->irq_request_resources ? c->irq_request_resources(d) : 0; +} + +static void irq_release_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + if (c->irq_release_resources) + c->irq_release_resources(d); +} + +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + +static int +setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) +{ + struct task_struct *t; + + if (!secondary) { + t = kthread_create(irq_thread, new, "irq/%d-%s", irq, + new->name); + } else { + t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, + new->name); + } + + if (IS_ERR(t)) + return PTR_ERR(t); + + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code + * references an already freed task_struct. + */ + new->thread = get_task_struct(t); + + /* + * The affinity can not be established yet, but it will be once the + * interrupt is enabled. Delay and defer the actual setting to the + * thread itself once it is ready to run. In the meantime, prevent + * it from ever being re-affined directly by cpuset or + * housekeeping. The proper way to do it is to re-affine the whole + * vector. + */ + kthread_bind_mask(t, cpu_possible_mask); + + /* + * Ensure the thread adjusts the affinity once it reaches the + * thread function. + */ + new->thread_flags = BIT(IRQTF_AFFINITY); + + return 0; +} + +static bool valid_percpu_irqaction(struct irqaction *old, struct irqaction *new) +{ + do { + if (cpumask_intersects(old->affinity, new->affinity) || + old->percpu_dev_id == new->percpu_dev_id) + return false; + + old = old->next; + } while (old); + + return true; } /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. + * + * Locking rules: + * + * desc->request_mutex Provides serialization against a concurrent free_irq() + * chip_bus_lock Provides serialization for slow bus operations + * desc->lock Provides serialization against hard interrupts + * + * chip_bus_lock and desc->lock are sufficient for all other management and + * interrupt related functions. desc->request_mutex solely serializes + * request/free_irq(). */ static int __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) @@ -907,7 +1452,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; - cpumask_var_t mask; + bool per_cpu_devid; if (!desc) return -EINVAL; @@ -917,6 +1462,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!try_module_get(desc->owner)) return -ENODEV; + per_cpu_devid = irq_settings_is_per_cpu_devid(desc); + + new->irq = irq; + + /* + * If the trigger type is not specified by the caller, + * then use the default for this interrupt. + */ + if (!(new->flags & IRQF_TRIGGER_MASK)) + new->flags |= irqd_get_trigger_type(&desc->irq_data); + /* * Check whether the interrupt nests into another interrupt * thread. @@ -934,8 +1490,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ new->handler = irq_nested_primary_handler; } else { - if (irq_settings_can_thread(desc)) - irq_setup_forced_threading(new); + if (irq_settings_can_thread(desc)) { + ret = irq_setup_forced_threading(new); + if (ret) + goto out_mput; + } } /* @@ -944,42 +1503,14 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * thread. */ if (new->thread_fn && !nested) { - struct task_struct *t; - static const struct sched_param param = { - .sched_priority = MAX_USER_RT_PRIO/2, - }; - - t = kthread_create(irq_thread, new, "irq/%d-%s", irq, - new->name); - if (IS_ERR(t)) { - ret = PTR_ERR(t); + ret = setup_irq_thread(new, irq, false); + if (ret) goto out_mput; + if (new->secondary) { + ret = setup_irq_thread(new->secondary, irq, true); + if (ret) + goto out_thread; } - - sched_setscheduler(t, SCHED_FIFO, ¶m); - - /* - * We keep the reference to the task struct even if - * the thread dies to avoid that the interrupt code - * references an already freed task_struct. - */ - get_task_struct(t); - new->thread = t; - /* - * Tell the thread to set its affinity. This is - * important for shared interrupt handlers as we do - * not invoke setup_affinity() for the secondary - * handlers as everything is already set up. Even for - * interrupts marked with IRQF_NO_BALANCE this is - * correct as we want the thread to move to the cpu(s) - * on which the requesting code placed the interrupt. - */ - set_bit(IRQTF_AFFINITY, &new->thread_flags); - } - - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - ret = -ENOMEM; - goto out_thread; } /* @@ -995,7 +1526,36 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) new->flags &= ~IRQF_ONESHOT; /* + * Protects against a concurrent __free_irq() call which might wait + * for synchronize_hardirq() to complete without holding the optional + * chip bus lock and desc->lock. Also protects against handing out + * a recycled oneshot thread_mask bit while it's still in use by + * its previous owner. + */ + mutex_lock(&desc->request_mutex); + + /* + * Acquire bus lock as the irq_request_resources() callback below + * might rely on the serialization or the magic power management + * functions which are abusing the irq_bus_lock() callback, + */ + chip_bus_lock(desc); + + /* First installed action requests resources. */ + if (!desc->action) { + ret = irq_request_resources(desc); + if (ret) { + pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", + new->name, irq, desc->irq_data.chip->name); + goto out_bus_unlock; + } + } + + /* * The following block of code has to be executed atomically + * protected against a concurrent interrupt and any of the other + * management calls which are not serialized via + * desc->request_mutex or the optional bus lock. */ raw_spin_lock_irqsave(&desc->lock, flags); old_ptr = &desc->action; @@ -1007,10 +1567,43 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. */ + unsigned int oldtype; + + if (irq_is_nmi(desc) && !per_cpu_devid) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + + if (per_cpu_devid && !valid_percpu_irqaction(old, new)) { + pr_err("Overlapping affinities for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + + /* + * If nobody did set the configuration before, inherit + * the one provided by the requester. + */ + if (irqd_trigger_type_was_set(&desc->irq_data)) { + oldtype = irqd_get_trigger_type(&desc->irq_data); + } else { + oldtype = new->flags & IRQF_TRIGGER_MASK; + irqd_set_trigger_type(&desc->irq_data, oldtype); + } + if (!((old->flags & new->flags) & IRQF_SHARED) || - ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ @@ -1044,7 +1637,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ if (thread_mask == ~0UL) { ret = -EBUSY; - goto out_mask; + goto out_unlock; } /* * The thread_mask for the action is or'ed to @@ -1066,7 +1659,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * thread_mask assigned. See the loop above which or's * all existing action->thread_mask bits. */ - new->thread_mask = 1 << ffz(thread_mask); + new->thread_mask = 1UL << ffz(thread_mask); } else if (new->handler == irq_default_primary_handler && !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) { @@ -1085,24 +1678,37 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * has. The type flags are unreliable as the * underlying chip implementation can override them. */ - pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", - irq); + pr_err("Threaded irq requested with handler=NULL and !ONESHOT for %s (irq %d)\n", + new->name, irq); ret = -EINVAL; - goto out_mask; + goto out_unlock; } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { - ret = __irq_set_trigger(desc, irq, - new->flags & IRQF_TRIGGER_MASK); + ret = __irq_set_trigger(desc, + new->flags & IRQF_TRIGGER_MASK); if (ret) - goto out_mask; + goto out_unlock; } + /* + * Activate the interrupt. That activation must happen + * independently of IRQ_NOAUTOEN. request_irq() can fail + * and the callers are supposed to handle + * that. enable_irq() of an interrupt requested with + * IRQ_NOAUTOEN is not supposed to fail. The activation + * keeps it in shutdown mode, it merily associates + * resources if necessary and if that's not possible it + * fails. Interrupts which are in managed shutdown mode + * will simply ignore that activation request. + */ + ret = irq_activate(desc); + if (ret) + goto out_unlock; + desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ IRQS_ONESHOT | IRQS_WAITING); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); @@ -1110,39 +1716,51 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_PERCPU) { irqd_set(&desc->irq_data, IRQD_PER_CPU); irq_settings_set_per_cpu(desc); + if (new->flags & IRQF_NO_DEBUG) + irq_settings_set_no_debug(desc); } + if (noirqdebug) + irq_settings_set_no_debug(desc); + if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; - if (irq_settings_can_autoenable(desc)) - irq_startup(desc, true); - else - /* Undo nested disables: */ - desc->depth = 1; - /* Exclude IRQ from balancing if requested */ if (new->flags & IRQF_NOBALANCING) { irq_settings_set_no_balancing(desc); irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - /* Set default affinity mask once everything is setup */ - setup_affinity(irq, desc, mask); + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + } else if (!per_cpu_devid) { + /* + * Shared interrupts do not go well with disabling + * auto enable. The sharing interrupt might request + * it while it's still disabled and then wait for + * interrupts forever. + */ + WARN_ON_ONCE(new->flags & IRQF_SHARED); + /* Undo nested disables: */ + desc->depth = 1; + } } else if (new->flags & IRQF_TRIGGER_MASK) { unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK; - unsigned int omsk = irq_settings_get_trigger_mask(desc); + unsigned int omsk = irqd_get_trigger_type(&desc->irq_data); if (nmsk != omsk) /* hope the handler works with current trigger mode */ - pr_warning("irq %d uses trigger mode %u; requested %u\n", - irq, nmsk, omsk); + pr_warn("irq %d uses trigger mode %u; requested %u\n", + irq, omsk, nmsk); } - new->irq = irq; *old_ptr = new; + irq_pm_install_action(desc, new); + /* Reset broken irq detection when installing new handler */ desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -1153,23 +1771,21 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) { desc->istate &= ~IRQS_SPURIOUS_DISABLED; - __enable_irq(desc, irq, false); + __enable_irq(desc); } raw_spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); + irq_setup_timings(desc, new); + + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; register_handler_proc(irq, new); - free_cpumask_var(mask); - return 0; mismatch: @@ -1182,60 +1798,47 @@ mismatch: } ret = -EBUSY; -out_mask: +out_unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); - free_cpumask_var(mask); + + if (!desc->action) + irq_release_resources(desc); +out_bus_unlock: + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); out_thread: if (new->thread) { struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); + } + if (new->secondary && new->secondary->thread) { + struct task_struct *t = new->secondary->thread; + + new->secondary->thread = NULL; + kthread_stop_put(t); } out_mput: module_put(desc->owner); return ret; } -/** - * setup_irq - setup an interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt - * - * Used to statically setup interrupts in the early boot process. - */ -int setup_irq(unsigned int irq, struct irqaction *act) -{ - int retval; - struct irq_desc *desc = irq_to_desc(irq); - - if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) - return -EINVAL; - chip_bus_lock(desc); - retval = __setup_irq(irq, desc, act); - chip_bus_sync_unlock(desc); - - return retval; -} -EXPORT_SYMBOL_GPL(setup_irq); - /* * Internal function to unregister an irqaction - used to free * regular and special interrupts that are part of the architecture. */ -static struct irqaction *__free_irq(unsigned int irq, void *dev_id) +static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) { - struct irq_desc *desc = irq_to_desc(irq); + unsigned irq = desc->irq_data.irq; struct irqaction *action, **action_ptr; unsigned long flags; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); - if (!desc) - return NULL; - + mutex_lock(&desc->request_mutex); + chip_bus_lock(desc); raw_spin_lock_irqsave(&desc->lock, flags); /* @@ -1249,7 +1852,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) if (!action) { WARN(1, "Trying to free already-free IRQ %d\n", irq); raw_spin_unlock_irqrestore(&desc->lock, flags); - + chip_bus_sync_unlock(desc); + mutex_unlock(&desc->request_mutex); return NULL; } @@ -1261,9 +1865,14 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) /* Found it - now remove it from the list of entries: */ *action_ptr = action->next; + irq_pm_remove_action(desc, action); + /* If this was the last handler, shut down the IRQ line: */ - if (!desc->action) + if (!desc->action) { + irq_settings_clr_disable_unlazy(desc); + /* Only shutdown. Deactivate after synchronize_hardirq() */ irq_shutdown(desc); + } #ifdef CONFIG_SMP /* make sure affinity_hint is cleaned up */ @@ -1272,11 +1881,30 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) #endif raw_spin_unlock_irqrestore(&desc->lock, flags); + /* + * Drop bus_lock here so the changes which were done in the chip + * callbacks above are synced out to the irq chips which hang + * behind a slow bus (I2C, SPI) before calling synchronize_hardirq(). + * + * Aside of that the bus_lock can also be taken from the threaded + * handler in irq_finalize_oneshot() which results in a deadlock + * because kthread_stop() would wait forever for the thread to + * complete, which is blocked on the bus lock. + * + * The still held desc->request_mutex() protects against a + * concurrent request_irq() of this irq so the release of resources + * and timing data is properly serialized. + */ + chip_bus_sync_unlock(desc); unregister_handler_proc(irq, action); - /* Make sure it's not being used on another CPU: */ - synchronize_irq(irq); + /* + * Make sure it's not being used on another CPU and if the chip + * supports it also make sure that there is no (not yet serviced) + * interrupt in flight at the hardware level. + */ + __synchronize_irq(desc); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1285,7 +1913,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) * is so by doing an extra call to the handler .... * * ( We do this after actually deregistering it, to make sure that a - * 'real' IRQ doesn't run in * parallel with our fake. ) + * 'real' IRQ doesn't run in parallel with our fake. ) */ if (action->flags & IRQF_SHARED) { local_irq_save(flags); @@ -1294,104 +1922,170 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) } #endif + /* + * The action has already been removed above, but the thread writes + * its oneshot mask bit when it completes. Though request_mutex is + * held across this which prevents __setup_irq() from handing out + * the same bit to a newly requested action. + */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); + } + + /* Last action releases resources */ + if (!desc->action) { + /* + * Reacquire bus lock as irq_release_resources() might + * require it to deallocate resources over the slow bus. + */ + chip_bus_lock(desc); + /* + * There is no interrupt on the fly anymore. Deactivate it + * completely. + */ + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_domain_deactivate_irq(&desc->irq_data); + + irq_release_resources(desc); + chip_bus_sync_unlock(desc); + irq_remove_timings(desc); } + mutex_unlock(&desc->request_mutex); + + irq_chip_pm_put(&desc->irq_data); module_put(desc->owner); + kfree(action->secondary); return action; } /** - * remove_irq - free an interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt + * free_irq - free an interrupt allocated with request_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free * - * Used to remove interrupts statically setup by the early boot process. - */ -void remove_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) - __free_irq(irq, act->dev_id); -} -EXPORT_SYMBOL_GPL(remove_irq); - -/** - * free_irq - free an interrupt allocated with request_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function must not be called from interrupt context. + * Remove an interrupt handler. The handler is removed and if the interrupt + * line is no longer in use by any driver it is disabled. On a shared IRQ + * the caller must ensure the interrupt is disabled on the card it drives + * before calling this function. The function does not return until any + * executing interrupts for this IRQ have completed. + * + * This function must not be called from interrupt context. + * + * Returns the devname argument passed to request_irq. */ -void free_irq(unsigned int irq, void *dev_id) +const void *free_irq(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + const char *devname; if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) - return; + return NULL; #ifdef CONFIG_SMP if (WARN_ON(desc->affinity_notify)) desc->affinity_notify = NULL; #endif - chip_bus_lock(desc); - kfree(__free_irq(irq, dev_id)); - chip_bus_sync_unlock(desc); + action = __free_irq(desc, dev_id); + + if (!action) + return NULL; + + devname = action->name; + kfree(action); + return devname; } EXPORT_SYMBOL(free_irq); +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown_and_deactivate(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || WARN_ON(!irq_is_nmi(desc))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + guard(raw_spinlock_irqsave)(&desc->lock); + irq_nmi_teardown(desc); + return __cleanup_nmi(irq, desc); +} + /** - * request_threaded_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Primary handler for threaded interrupts - * If NULL and thread_fn != NULL the default - * primary handler is installed - * @thread_fn: Function called from the irq handler thread - * If NULL, no irq thread is created - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * If you want to set up a threaded irq handler for your device - * then you need to supply @handler and @thread_fn. @handler is - * still called in hard interrupt context and has to check - * whether the interrupt originates from the device. If yes it - * needs to disable the interrupt on the device and return - * IRQ_WAKE_THREAD which will wake up the handler thread and run - * @thread_fn. This split handler design is necessary to support - * shared interrupts. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: + * request_threaded_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts. + * If handler is NULL and thread_fn != NULL + * the default primary handler is installed. + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. From the point this call is made your handler function + * may be invoked. Since your handler function must clear any interrupt the + * board raises, you must take care both to initialise your hardware and to + * set up the interrupt handler in the right order. + * + * If you want to set up a threaded irq handler for your device then you + * need to supply @handler and @thread_fn. @handler is still called in hard + * interrupt context and has to check whether the interrupt originates from + * the device. If yes it needs to disable the interrupt on the device and + * return IRQ_WAKE_THREAD which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support shared + * interrupts. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id as this is + * required when freeing the interrupt. + * + * Flags: * * IRQF_SHARED Interrupt is shared * IRQF_TRIGGER_* Specify active edge(s) or level - * + * IRQF_ONESHOT Run thread_fn with interrupt line masked */ int request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long irqflags, @@ -1401,13 +2095,26 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, struct irq_desc *desc; int retval; + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + /* * Sanity-check: shared interrupts must pass in a real dev-ID, * otherwise we'll have trouble later trying to figure out * which interrupt is which (messes up the interrupt freeing * logic etc). + * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * + * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and + * it cannot be set along with IRQF_NO_SUSPEND. */ - if ((irqflags & IRQF_SHARED) && !dev_id) + if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || + (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || + ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; desc = irq_to_desc(irq); @@ -1434,12 +2141,19 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, action->name = devname; action->dev_id = dev_id; - chip_bus_lock(desc); + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) { + kfree(action); + return retval; + } + retval = __setup_irq(irq, desc, action); - chip_bus_sync_unlock(desc); - if (retval) + if (retval) { + irq_chip_pm_put(&desc->irq_data); + kfree(action->secondary); kfree(action); + } #ifdef CONFIG_DEBUG_SHIRQ_FIXME if (!retval && (irqflags & IRQF_SHARED)) { @@ -1465,28 +2179,31 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL(request_threaded_irq); /** - * request_any_context_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Threaded handler for threaded interrupts. - * @flags: Interrupt type flags - * @name: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. It selects either a - * hardirq or threaded handling method depending on the - * context. - * - * On failure, it returns a negative value. On success, - * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. + * request_any_context_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @flags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It selects either a hardirq or threaded handling + * method depending on the context. + * + * Returns: On failure, it returns a negative value. On success, it returns either + * IRQC_IS_HARDIRQ or IRQC_IS_NESTED. */ int request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; int ret; + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + desc = irq_to_desc(irq); if (!desc) return -EINVAL; @@ -1501,133 +2218,243 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(request_any_context_irq); -void enable_percpu_irq(unsigned int irq, unsigned int type) +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It sets up the IRQ line to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function will fail + * and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + struct irqaction *action; + struct irq_desc *desc; + int retval; - if (!desc) - return; + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; - type &= IRQ_TYPE_SENSE_MASK; - if (type != IRQ_TYPE_NONE) { - int ret; + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; - ret = __irq_set_trigger(desc, irq, type); + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; - if (ret) { - WARN(1, "failed to set type for IRQ%d\n", irq); - goto out; + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + return -EINVAL; } + return 0; } - irq_percpu_enable(desc, cpu); -out: - irq_put_desc_unlock(desc, flags); +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +void enable_percpu_irq(unsigned int irq, unsigned int type) +{ + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + struct irq_desc *desc = scoped_irqdesc; + + /* + * If the trigger type is not specified by the caller, then + * use the default for this interrupt. + */ + type &= IRQ_TYPE_SENSE_MASK; + if (type == IRQ_TYPE_NONE) + type = irqd_get_trigger_type(&desc->irq_data); + + if (type != IRQ_TYPE_NONE) { + if (__irq_set_trigger(desc, type)) { + WARN(1, "failed to set type for IRQ%d\n", irq); + return; + } + } + irq_percpu_enable(desc, smp_processor_id()); + } } EXPORT_SYMBOL_GPL(enable_percpu_irq); -void disable_percpu_irq(unsigned int irq) +void enable_percpu_nmi(unsigned int irq, unsigned int type) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + enable_percpu_irq(irq, type); +} - if (!desc) - return; +/** + * irq_percpu_is_enabled - Check whether the per cpu irq is enabled + * @irq: Linux irq number to check for + * + * Must be called from a non migratable context. Returns the enable + * state of a per cpu interrupt on the current cpu. + */ +bool irq_percpu_is_enabled(unsigned int irq) +{ + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + return cpumask_test_cpu(smp_processor_id(), scoped_irqdesc->percpu_enabled); + return false; +} +EXPORT_SYMBOL_GPL(irq_percpu_is_enabled); - irq_percpu_disable(desc, cpu); - irq_put_desc_unlock(desc, flags); +void disable_percpu_irq(unsigned int irq) +{ + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + irq_percpu_disable(scoped_irqdesc, smp_processor_id()); } EXPORT_SYMBOL_GPL(disable_percpu_irq); +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + /* * Internal function to unregister a percpu irqaction. */ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - unsigned long flags; + struct irqaction *action, **action_ptr; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); if (!desc) return NULL; - raw_spin_lock_irqsave(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + action_ptr = &desc->action; + for (;;) { + action = *action_ptr; - action = desc->action; - if (!action || action->percpu_dev_id != dev_id) { - WARN(1, "Trying to free already-free IRQ %d\n", irq); - goto bad; - } + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + return NULL; + } - if (!cpumask_empty(desc->percpu_enabled)) { - WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", - irq, cpumask_first(desc->percpu_enabled)); - goto bad; - } + if (action->percpu_dev_id == dev_id) + break; - /* Found it - now remove it from the list of entries: */ - desc->action = NULL; + action_ptr = &action->next; + } - raw_spin_unlock_irqrestore(&desc->lock, flags); + if (cpumask_intersects(desc->percpu_enabled, action->affinity)) { + WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", irq, + cpumask_first_and(desc->percpu_enabled, action->affinity)); + return NULL; + } - unregister_handler_proc(irq, action); + /* Found it - now remove it from the list of entries: */ + *action_ptr = action->next; + /* Demote from NMI if we killed the last action */ + if (!desc->action) + desc->istate &= ~IRQS_NMI; + } + + unregister_handler_proc(irq, action); + irq_chip_pm_put(&desc->irq_data); module_put(desc->owner); return action; - -bad: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return NULL; } /** - * remove_percpu_irq - free a per-cpu interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt + * free_percpu_irq - free an interrupt allocated with request_percpu_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove a percpu interrupt handler. The handler is removed, but the + * interrupt line is not disabled. This must be done on each CPU before + * calling this function. The function does not return until any executing + * interrupts for this IRQ have completed. * - * Used to remove interrupts statically setup by the early boot process. + * This function must not be called from interrupt context. */ -void remove_percpu_irq(unsigned int irq, struct irqaction *act) +void free_percpu_irq(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - if (desc && irq_settings_is_per_cpu_devid(desc)) - __free_percpu_irq(irq, act->percpu_dev_id); + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + chip_bus_lock(desc); + kfree(__free_percpu_irq(irq, dev_id)); + chip_bus_sync_unlock(desc); } +EXPORT_SYMBOL_GPL(free_percpu_irq); -/** - * free_percpu_irq - free an interrupt allocated with request_percpu_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove a percpu interrupt handler. The handler is removed, but - * the interrupt line is not disabled. This must be done on each - * CPU before calling this function. The function does not return - * until any executing interrupts for this IRQ have completed. - * - * This function must not be called from interrupt context. - */ -void free_percpu_irq(unsigned int irq, void __percpu *dev_id) +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); if (!desc || !irq_settings_is_per_cpu_devid(desc)) return; - chip_bus_lock(desc); + if (WARN_ON(!irq_is_nmi(desc))) + return; + kfree(__free_percpu_irq(irq, dev_id)); - chip_bus_sync_unlock(desc); } /** - * setup_percpu_irq - setup a per-cpu interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt + * setup_percpu_irq - setup a per-cpu interrupt + * @irq: Interrupt line to setup + * @act: irqaction for the interrupt * * Used to statically setup per-cpu interrupts in the early boot process. */ @@ -1638,30 +2465,70 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) if (!desc || !irq_settings_is_per_cpu_devid(desc)) return -EINVAL; - chip_bus_lock(desc); + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + return retval; + retval = __setup_irq(irq, desc, act); - chip_bus_sync_unlock(desc); + + if (retval) + irq_chip_pm_put(&desc->irq_data); return retval; } +static +struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, + const char *devname, const cpumask_t *affinity, + void __percpu *dev_id) +{ + struct irqaction *action; + + if (!affinity) + affinity = cpu_possible_mask; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return NULL; + + action->handler = handler; + action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; + action->name = devname; + action->percpu_dev_id = dev_id; + action->affinity = affinity; + + /* + * We allow some form of sharing for non-overlapping affinity + * masks. Obviously, covering all CPUs prevents any sharing in + * the first place. + */ + if (!cpumask_equal(affinity, cpu_possible_mask)) + action->flags |= IRQF_SHARED; + + return action; +} + /** - * request_percpu_irq - allocate a percpu interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * @devname: An ascii name for the claiming device - * @dev_id: A percpu cookie passed back to the handler function - * - * This call allocates interrupt resources, but doesn't - * automatically enable the interrupt. It has to be done on each - * CPU using enable_percpu_irq(). - * - * Dev_id must be globally unique. It is a per-cpu variable, and - * the handler gets called with the interrupted CPU's instance of - * that variable. + * __request_percpu_irq - allocate a percpu interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @flags: Interrupt type flags (IRQF_TIMER only) + * @devname: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources, but doesn't enable the interrupt + * on any CPU, as all percpu-devid interrupts are flagged with IRQ_NOAUTOEN. + * It has to be done on each CPU using enable_percpu_irq(). + * + * @dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. */ -int request_percpu_irq(unsigned int irq, irq_handler_t handler, - const char *devname, void __percpu *dev_id) +int __request_percpu_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *devname, + const cpumask_t *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -1675,21 +2542,269 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, !irq_settings_is_per_cpu_devid(desc)) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (flags && flags != IRQF_TIMER) + return -EINVAL; + + action = create_percpu_irqaction(handler, flags, devname, affinity, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND; - action->name = devname; - action->percpu_dev_id = dev_id; + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) { + kfree(action); + return retval; + } - chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); - chip_bus_sync_unlock(desc); - if (retval) + if (retval) { + irq_chip_pm_put(&desc->irq_data); kfree(action); + } return retval; } +EXPORT_SYMBOL_GPL(__request_percpu_irq); + +/** + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling prepare_percpu_nmi() before + * being enabled on the same CPU by using enable_percpu_nmi(). + * + * @dev_id must be globally unique. It is a per-cpu variable, and the + * handler gets called with the interrupted CPU's instance of that + * variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, + const struct cpumask *affinity, void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + int retval; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot be NMI already if the new request covers all CPUs */ + if (irq_is_nmi(desc) && + (!affinity || cpumask_equal(affinity, cpu_possible_mask))) + return -EINVAL; + + action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, + name, affinity, dev_id); + if (!action) + return -ENOMEM; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + scoped_guard(raw_spinlock_irqsave, &desc->lock) + desc->istate |= IRQS_NMI; + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function will fail + * returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + int ret = -EINVAL; + + WARN_ON(preemptible()); + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN(!irq_is_nmi(scoped_irqdesc), + "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", irq)) + return -EINVAL; + + ret = irq_nmi_setup(scoped_irqdesc); + if (ret) + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + } + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + WARN_ON(preemptible()); + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN_ON(!irq_is_nmi(scoped_irqdesc))) + return; + irq_nmi_teardown(scoped_irqdesc); + } +} + +static int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) +{ + struct irq_chip *chip; + int err = -EINVAL; + + do { + chip = irq_data_get_irq_chip(data); + if (WARN_ON_ONCE(!chip)) + return -ENODEV; + if (chip->irq_get_irqchip_state) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + err = chip->irq_get_irqchip_state(data, which, state); + return err; +} + +/** + * irq_get_irqchip_state - returns the irqchip state of a interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: One of IRQCHIP_STATE_* the caller wants to know about + * @state: a pointer to a boolean where the state is to be stored + * + * This call snapshots the internal irqchip state of an interrupt, + * returning into @state the bit corresponding to stage @which + * + * This function should be called with preemption disabled if the interrupt + * controller has per-cpu registers. + */ +int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state) +{ + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); + + return __irq_get_irqchip_state(data, which, state); + } + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_get_irqchip_state); + +/** + * irq_set_irqchip_state - set the state of a forwarded interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which + * + * This call sets the internal irqchip state of an interrupt, depending on + * the value of @which. + * + * This function should be called with migration disabled if the interrupt + * controller has per-cpu registers. + */ +int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool val) +{ + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); + struct irq_chip *chip; + + do { + chip = irq_data_get_irq_chip(data); + + if (WARN_ON_ONCE(!chip)) + return -ENODEV; + + if (chip->irq_set_irqchip_state) + break; + + data = irqd_get_parent_data(data); + } while (data); + + if (data) + return chip->irq_set_irqchip_state(data, which, val); + } + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_set_irqchip_state); + +/** + * irq_has_action - Check whether an interrupt is requested + * @irq: The linux irq number + * + * Returns: A snapshot of the current state + */ +bool irq_has_action(unsigned int irq) +{ + bool res; + + rcu_read_lock(); + res = irq_desc_has_action(irq_to_desc(irq)); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_has_action); + +/** + * irq_check_status_bit - Check whether bits in the irq descriptor status are set + * @irq: The linux irq number + * @bitmask: The bitmask to evaluate + * + * Returns: True if one of the bits in @bitmask is set + */ +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) +{ + struct irq_desc *desc; + bool res = false; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (desc) + res = !!(desc->status_use_accessors & bitmask); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_check_status_bit); |
