diff options
Diffstat (limited to 'kernel/irq/manage.c')
| -rw-r--r-- | kernel/irq/manage.c | 1890 |
1 files changed, 1219 insertions, 671 deletions
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..0bb29316b436 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -13,34 +13,36 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/rt.h> #include <linux/sched/task.h> +#include <linux/sched/isolation.h> #include <uapi/linux/sched/types.h> #include <linux/task_work.h> #include "internals.h" -#ifdef CONFIG_IRQ_FORCED_THREADING -__read_mostly bool force_irqthreads; -EXPORT_SYMBOL_GPL(force_irqthreads); +#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); static int __init setup_forced_irqthreads(char *arg) { - force_irqthreads = true; + static_branch_enable(&force_irqthreads_key); return 0; } early_param("threadirqs", setup_forced_irqthreads); #endif -static void __synchronize_hardirq(struct irq_desc *desc) +static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); + +static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) { + struct irq_data *irqd = irq_desc_get_irq_data(desc); bool inprogress; do { - unsigned long flags; - /* * Wait until we're out of the critical section. This might * give the wrong answer due to the lack of memory barriers. @@ -49,37 +51,53 @@ static void __synchronize_hardirq(struct irq_desc *desc) cpu_relax(); /* Ok, that indicated we're done: double-check carefully. */ - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); inprogress = irqd_irq_inprogress(&desc->irq_data); - raw_spin_unlock_irqrestore(&desc->lock, flags); + /* + * If requested and supported, check at the chip whether it + * is in flight at the hardware level, i.e. already pending + * in a CPU and waiting for service and acknowledge. + */ + if (!inprogress && sync_chip) { + /* + * Ignore the return code. inprogress is only updated + * when the chip supports it. + */ + __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, + &inprogress); + } /* Oops, that failed? */ } while (inprogress); } /** - * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for + * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. It does not take + * associated threaded handlers into account. * - * This function waits for any pending hard IRQ handlers for this - * interrupt to complete before returning. If you use this - * function while holding a resource the IRQ handler may need you - * will deadlock. It does not take associated threaded handlers - * into account. + * Do not use this for shutdown scenarios where you must be sure that all + * parts (hardirq and threaded handler) have completed. * - * Do not use this for shutdown scenarios where you must be sure - * that all parts (hardirq and threaded handler) have completed. + * Returns: false if a threaded handler is active. * - * Returns: false if a threaded handler is active. + * This function may be called - with care - from IRQ context. * - * This function may be called - with care - from IRQ context. + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when called + * with interrupts disabled and the target CPU of the interrupt is the + * current CPU. */ bool synchronize_hardirq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); if (desc) { - __synchronize_hardirq(desc); + __synchronize_hardirq(desc, false); return !atomic_read(&desc->threads_active); } @@ -87,30 +105,37 @@ bool synchronize_hardirq(unsigned int irq) } EXPORT_SYMBOL(synchronize_hardirq); +static void __synchronize_irq(struct irq_desc *desc) +{ + __synchronize_hardirq(desc, true); + /* + * We made sure that no hardirq handler is running. Now verify that no + * threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); +} + /** - * synchronize_irq - wait for pending IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for + * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. * - * This function may be called - with care - from IRQ context. + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. */ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) { - __synchronize_hardirq(desc); - /* - * We made sure that no hardirq handler is - * running. Now verify that no threaded handlers are - * active. - */ - wait_event(desc->wait_for_threads, - !atomic_read(&desc->threads_active)); - } + if (desc) + __synchronize_irq(desc); } EXPORT_SYMBOL(synchronize_irq); @@ -126,8 +151,8 @@ static bool __irq_can_set_affinity(struct irq_desc *desc) } /** - * irq_can_set_affinity - Check if the affinity of a given irq can be set - * @irq: Interrupt to check + * irq_can_set_affinity - Check if the affinity of a given irq can be set + * @irq: Interrupt to check * */ int irq_can_set_affinity(unsigned int irq) @@ -151,26 +176,33 @@ bool irq_can_set_affinity_usr(unsigned int irq) } /** - * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affitnity changed + * irq_set_thread_affinity - Notify irq threads to adjust affinity + * @desc: irq descriptor which has affinity changed * - * We just set IRQTF_AFFINITY and delegate the affinity setting - * to the interrupt thread itself. We can not call - * set_cpus_allowed_ptr() here as we hold desc->lock and this - * code can be called from hard interrupt context. + * Just set IRQTF_AFFINITY and delegate the affinity setting to the + * interrupt thread itself. We can not call set_cpus_allowed_ptr() here as + * we hold desc->lock and this code can be called from hard interrupt + * context. */ -void irq_set_thread_affinity(struct irq_desc *desc) +static void irq_set_thread_affinity(struct irq_desc *desc) { struct irqaction *action; - for_each_action_of_desc(desc, action) - if (action->thread) + for_each_action_of_desc(desc, action) { + if (action->thread) { set_bit(IRQTF_AFFINITY, &action->thread_flags); + wake_up_process(action->thread); + } + if (action->secondary && action->secondary->thread) { + set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags); + wake_up_process(action->secondary->thread); + } + } } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK static void irq_validate_effective_affinity(struct irq_data *data) { -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK const struct cpumask *m = irq_data_get_effective_affinity_mask(data); struct irq_chip *chip = irq_data_get_irq_chip(data); @@ -178,24 +210,77 @@ static void irq_validate_effective_affinity(struct irq_data *data) return; pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", chip->name, data->irq); -#endif } +#else +static inline void irq_validate_effective_affinity(struct irq_data *data) { } +#endif + +static DEFINE_PER_CPU(struct cpumask, __tmp_mask); int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { + struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; if (!chip || !chip->irq_set_affinity) return -EINVAL; - ret = chip->irq_set_affinity(data, mask, force); + /* + * If this is a managed interrupt and housekeeping is enabled on + * it check whether the requested affinity mask intersects with + * a housekeeping CPU. If so, then remove the isolated CPUs from + * the mask and just keep the housekeeping CPU(s). This prevents + * the affinity setter from routing the interrupt to an isolated + * CPU to avoid that I/O submitted from a housekeeping CPU causes + * interrupts on an isolated one. + * + * If the masks do not intersect or include online CPU(s) then + * keep the requested mask. The isolated target CPUs are only + * receiving interrupts when the I/O operation was submitted + * directly from them. + * + * If all housekeeping CPUs in the affinity mask are offline, the + * interrupt will be migrated by the CPU hotplug code once a + * housekeeping CPU which belongs to the affinity mask comes + * online. + */ + if (irqd_affinity_is_managed(data) && + housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { + const struct cpumask *hk_mask; + + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); + + cpumask_and(tmp_mask, mask, hk_mask); + if (!cpumask_intersects(tmp_mask, cpu_online_mask)) + prog_mask = mask; + else + prog_mask = tmp_mask; + } else { + prog_mask = mask; + } + + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ + cpumask_and(tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(tmp_mask)) + ret = chip->irq_set_affinity(data, tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); + else + ret = -EINVAL; + switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); @@ -238,6 +323,30 @@ static int irq_try_set_affinity(struct irq_data *data, return ret; } +static bool irq_set_affinity_deactivated(struct irq_data *data, + const struct cpumask *mask) +{ + struct irq_desc *desc = irq_data_to_desc(data); + + /* + * Handle irq chips which can handle affinity only in activated + * state correctly + * + * If the interrupt is not yet activated, just store the affinity + * mask and do not call the chip driver at all. On activation the + * driver has to make sure anyway that the interrupt is in a + * usable state so startup works. + */ + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) + return false; + + cpumask_copy(desc->irq_common_data.affinity, mask); + irq_data_update_effective_affinity(data, mask); + irqd_set(data, IRQD_AFFINITY_SET); + return true; +} + int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -248,6 +357,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (!chip || !chip->irq_set_affinity) return -EINVAL; + if (irq_set_affinity_deactivated(data, mask)) + return 0; + if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { ret = irq_try_set_affinity(data, mask, force); } else { @@ -257,61 +369,147 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); - schedule_work(&desc->affinity_notify->work); + if (!schedule_work(&desc->affinity_notify->work)) { + /* Work was already scheduled, drop our extra ref */ + kref_put(&desc->affinity_notify->kref, + desc->affinity_notify->release); + } } irqd_set(data, IRQD_AFFINITY_SET); return ret; } -int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) +/** + * irq_update_affinity_desc - Update affinity management for an interrupt + * @irq: The interrupt number to update + * @affinity: Pointer to the affinity descriptor + * + * This interface can be used to configure the affinity management of + * interrupts which have been allocated already. + * + * There are certain limitations on when it may be used - attempts to use it + * for when the kernel is configured for generic IRQ reservation mode (in + * config GENERIC_IRQ_RESERVATION_MODE) will fail, as it may conflict with + * managed/non-managed interrupt accounting. In addition, attempts to use it on + * an interrupt which is already started or which has already been configured + * as managed will also fail, as these mean invalid init state or double init. + */ +int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity) +{ + /* + * Supporting this with the reservation scheme used by x86 needs + * some more thought. Fail it for now. + */ + if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) + return -EOPNOTSUPP; + + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + bool activated; + + /* Requires the interrupt to be shut down */ + if (irqd_is_started(&desc->irq_data)) + return -EBUSY; + + /* Interrupts which are already managed cannot be modified */ + if (irqd_affinity_is_managed(&desc->irq_data)) + return -EBUSY; + /* + * Deactivate the interrupt. That's required to undo + * anything an earlier activation has established. + */ + activated = irqd_is_activated(&desc->irq_data); + if (activated) + irq_domain_deactivate_irq(&desc->irq_data); + + if (affinity->is_managed) { + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); + } + + cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); + + /* Restore the activation state */ + if (activated) + irq_domain_activate_irq(&desc->irq_data, false); + return 0; + } + return -EINVAL; +} + +static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, + bool force) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; if (!desc) return -EINVAL; - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; + guard(raw_spinlock_irqsave)(&desc->lock); + return irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); } -int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + return __irq_set_affinity(irq, cpumask, false); +} +EXPORT_SYMBOL_GPL(irq_set_affinity); - if (!desc) - return -EINVAL; - desc->affinity_hint = m; - irq_put_desc_unlock(desc, flags); - /* set the initial affinity to prevent every interrupt being on CPU0 */ - if (m) +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} +EXPORT_SYMBOL_GPL(irq_force_affinity); + +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, bool setaffinity) +{ + int ret = -EINVAL; + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc->affinity_hint = m; + ret = 0; + } + + if (!ret && m && setaffinity) __irq_set_affinity(irq, m, false); - return 0; + return ret; } -EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); + struct irq_affinity_notify *notify = container_of(work, struct irq_affinity_notify, work); struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; - unsigned long flags; if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL)) goto out; - raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_move_pending(&desc->irq_data)) - irq_get_pending(cpumask, desc); - else - cpumask_copy(cpumask, desc->irq_common_data.affinity); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + if (irq_move_pending(&desc->irq_data)) + irq_get_pending(cpumask, desc); + else + cpumask_copy(cpumask, desc->irq_common_data.affinity); + } notify->notify(notify, cpumask); @@ -321,27 +519,25 @@ out: } /** - * irq_set_affinity_notifier - control notification of IRQ affinity changes - * @irq: Interrupt for which to enable/disable notification - * @notify: Context for notification, or %NULL to disable - * notification. Function pointers must be initialised; - * the other fields will be initialised by this function. - * - * Must be called in process context. Notification may only be enabled - * after the IRQ is allocated and must be disabled before the IRQ is - * freed using free_irq(). + * irq_set_affinity_notifier - control notification of IRQ affinity changes + * @irq: Interrupt for which to enable/disable notification + * @notify: Context for notification, or %NULL to disable + * notification. Function pointers must be initialised; + * the other fields will be initialised by this function. + * + * Must be called in process context. Notification may only be enabled + * after the IRQ is allocated and must be disabled before the IRQ is freed + * using free_irq(). */ -int -irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) +int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { struct irq_desc *desc = irq_to_desc(irq); struct irq_affinity_notify *old_notify; - unsigned long flags; /* The release function is promised process context */ might_sleep(); - if (!desc) + if (!desc || irq_is_nmi(desc)) return -EINVAL; /* Complete initialisation of *notify */ @@ -351,13 +547,18 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) INIT_WORK(¬ify->work, irq_affinity_notify); } - raw_spin_lock_irqsave(&desc->lock, flags); - old_notify = desc->affinity_notify; - desc->affinity_notify = notify; - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irq, &desc->lock) { + old_notify = desc->affinity_notify; + desc->affinity_notify = notify; + } - if (old_notify) + if (old_notify) { + if (cancel_work_sync(&old_notify->work)) { + /* Pending work had a ref, put that one too */ + kref_put(&old_notify->kref, old_notify->release); + } kref_put(&old_notify->kref, old_notify->release); + } return 0; } @@ -370,7 +571,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; - int ret, node = irq_desc_get_node(desc); + int node = irq_desc_get_node(desc); + static DEFINE_RAW_SPINLOCK(mask_lock); static struct cpumask mask; @@ -378,7 +580,7 @@ int irq_setup_affinity(struct irq_desc *desc) if (!__irq_can_set_affinity(desc)) return 0; - raw_spin_lock(&mask_lock); + guard(raw_spinlock)(&mask_lock); /* * Preserve the managed affinity setting and a userspace affinity * setup, but make sure that one of the targets is online. @@ -393,6 +595,9 @@ int irq_setup_affinity(struct irq_desc *desc) } cpumask_and(&mask, cpu_online_mask, set); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpu_online_mask); + if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); @@ -400,9 +605,7 @@ int irq_setup_affinity(struct irq_desc *desc) if (cpumask_intersects(&mask, nodemask)) cpumask_and(&mask, &mask, nodemask); } - ret = irq_do_set_affinity(&desc->irq_data, &mask, false); - raw_spin_unlock(&mask_lock); - return ret; + return irq_do_set_affinity(&desc->irq_data, &mask, false); } #else /* Wrapper for ALPHA specific affinity selector magic */ @@ -410,63 +613,41 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif - -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** - * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt - * @irq: interrupt number to set affinity - * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU - * specific data for percpu_devid interrupts - * - * This function uses the vCPU specific data to set the vCPU - * affinity for an irq. The vCPU specific data is passed from - * outside, such as KVM. One example code path is as below: - * KVM -> IOMMU -> irq_set_vcpu_affinity(). + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt + * @irq: interrupt number to set affinity + * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU + * specific data for percpu_devid interrupts + * + * This function uses the vCPU specific data to set the vCPU affinity for + * an irq. The vCPU specific data is passed from outside, such as KVM. One + * example code path is as below: KVM -> IOMMU -> irq_set_vcpu_affinity(). */ int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - struct irq_data *data; - struct irq_chip *chip; - int ret = -ENOSYS; - - if (!desc) - return -EINVAL; + scoped_irqdesc_get_and_lock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + struct irq_data *data; + struct irq_chip *chip; - data = irq_desc_get_irq_data(desc); - do { - chip = irq_data_get_irq_chip(data); - if (chip && chip->irq_set_vcpu_affinity) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); + data = irq_desc_get_irq_data(desc); + do { + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + break; - if (data) - ret = chip->irq_set_vcpu_affinity(data, vcpu_info); - irq_put_desc_unlock(desc, flags); + data = irqd_get_parent_data(data); + } while (data); - return ret; + if (!data) + return -ENOSYS; + return chip->irq_set_vcpu_affinity(data, vcpu_info); + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); @@ -478,26 +659,23 @@ void __disable_irq(struct irq_desc *desc) static int __disable_irq_nosync(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return -EINVAL; - __disable_irq(desc); - irq_put_desc_busunlock(desc, flags); - return 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + __disable_irq(scoped_irqdesc); + return 0; + } + return -EINVAL; } /** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. * - * This function may be called from IRQ context. + * This function may be called from IRQ context. */ void disable_irq_nosync(unsigned int irq) { @@ -506,50 +684,67 @@ void disable_irq_nosync(unsigned int irq) EXPORT_SYMBOL(disable_irq_nosync); /** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. + * Disable the selected interrupt line. Enables and Disables are nested. + * + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when an + * interrupt thread is associated to @irq. * - * This function may be called - with care - from IRQ context. */ void disable_irq(unsigned int irq) { + might_sleep(); if (!__disable_irq_nosync(irq)) synchronize_irq(irq); } EXPORT_SYMBOL(disable_irq); /** - * disable_hardirq - disables an irq and waits for hardirq completion - * @irq: Interrupt to disable + * disable_hardirq - disables an irq and waits for hardirq completion + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending hard IRQ handlers for this - * interrupt to complete before returning. If you use this function while - * holding a resource the hard IRQ handler may need you will deadlock. + * Disable the selected interrupt line. Enables and Disables are nested. * - * When used to optimistically disable an interrupt from atomic context - * the return value must be checked. + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the hard IRQ handler may need you will deadlock. * - * Returns: false if a threaded handler is active. + * When used to optimistically disable an interrupt from atomic context the + * return value must be checked. * - * This function may be called - with care - from IRQ context. + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. */ bool disable_hardirq(unsigned int irq) { if (!__disable_irq_nosync(irq)) return synchronize_hardirq(irq); - return false; } EXPORT_SYMBOL_GPL(disable_hardirq); +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are nested. + * + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} + void __enable_irq(struct irq_desc *desc) { switch (desc->depth) { @@ -565,10 +760,14 @@ void __enable_irq(struct irq_desc *desc) irq_settings_set_noprobe(desc); /* * Call irq_startup() not irq_enable() here because the - * interrupt might be marked NOAUTOEN. So irq_startup() - * needs to be invoked when it gets enabled the first - * time. If it was already started up, then irq_startup() - * will invoke irq_enable() under the hood. + * interrupt might be marked NOAUTOEN so irq_startup() + * needs to be invoked when it gets enabled the first time. + * This is also required when __enable_irq() is invoked for + * a managed and shutdown interrupt from the S3 resume + * path. + * + * If it was already started up, then irq_startup() will + * invoke irq_enable() under the hood. */ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); break; @@ -579,33 +778,40 @@ void __enable_irq(struct irq_desc *desc) } /** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. + * Undoes the effect of one call to disable_irq(). If this matches the + * last disable, processing of interrupts on this IRQ line is re-enabled. * - * This function may be called from IRQ context only when - * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! + * This function may be called from IRQ context only when + * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! */ void enable_irq(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (!desc) - return; - if (WARN(!desc->irq_data.chip, - KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) - goto out; - - __enable_irq(desc); -out: - irq_put_desc_busunlock(desc, flags); + if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) + return; + __enable_irq(desc); + } } EXPORT_SYMBOL(enable_irq); +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this matches the last + * disable, processing of interrupts on this IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + static int set_irq_wake_real(unsigned int irq, unsigned int on) { struct irq_desc *desc = irq_to_desc(irq); @@ -621,50 +827,59 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) } /** - * irq_set_irq_wake - control irq power management wakeup - * @irq: interrupt to control - * @on: enable/disable power management wakeup + * irq_set_irq_wake - control irq power management wakeup + * @irq: interrupt to control + * @on: enable/disable power management wakeup * - * Enable/disable power management wakeup mode, which is - * disabled by default. Enables and disables must match, - * just as they match for non-wakeup mode support. + * Enable/disable power management wakeup mode, which is disabled by + * default. Enables and disables must match, just as they match for + * non-wakeup mode support. * - * Wakeup mode lets this IRQ wake the system from sleep - * states like "suspend to RAM". + * Wakeup mode lets this IRQ wake the system from sleep states like + * "suspend to RAM". + * + * Note: irq enable/disable state is completely orthogonal to the + * enable/disable state of irq wake. An irq can be disabled with + * disable_irq() and still wake the system as long as the irq has wake + * enabled. If this does not hold, then the underlying irq chip and the + * related driver need to be investigated. */ int irq_set_irq_wake(unsigned int irq, unsigned int on) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - int ret = 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; + int ret = 0; - if (!desc) - return -EINVAL; + /* Don't use NMIs as wake up interrupts please */ + if (irq_is_nmi(desc)) + return -EINVAL; - /* wakeup-capable irqs can be shared between drivers that - * don't need to have the same sleep mode behaviors. - */ - if (on) { - if (desc->wake_depth++ == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 0; - else - irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); - } - } else { - if (desc->wake_depth == 0) { - WARN(1, "Unbalanced IRQ %d wake disable\n", irq); - } else if (--desc->wake_depth == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 1; - else - irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + /* + * wakeup-capable irqs can be shared between drivers that + * don't need to have the same sleep mode behaviors. + */ + if (on) { + if (desc->wake_depth++ == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 0; + else + irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); + } + } else { + if (desc->wake_depth == 0) { + WARN(1, "Unbalanced IRQ %d wake disable\n", irq); + } else if (--desc->wake_depth == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 1; + else + irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + } } + return ret; } - irq_put_desc_busunlock(desc, flags); - return ret; + return -EINVAL; } EXPORT_SYMBOL(irq_set_irq_wake); @@ -673,22 +888,17 @@ EXPORT_SYMBOL(irq_set_irq_wake); * particular irq has been exclusively allocated or is available * for driver use. */ -int can_request_irq(unsigned int irq, unsigned long irqflags) +bool can_request_irq(unsigned int irq, unsigned long irqflags) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - int canrequest = 0; - - if (!desc) - return 0; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (irq_settings_can_request(desc)) { - if (!desc->action || - irqflags & desc->action->flags & IRQF_SHARED) - canrequest = 1; + if (irq_settings_can_request(desc)) { + if (!desc->action || irqflags & desc->action->flags & IRQF_SHARED) + return true; + } } - irq_put_desc_unlock(desc, flags); - return canrequest; + return false; } int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) @@ -723,6 +933,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) case IRQ_SET_MASK_OK_DONE: irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); irqd_set(&desc->irq_data, flags); + fallthrough; case IRQ_SET_MASK_OK_NOCOPY: flags = irqd_get_trigger_type(&desc->irq_data); @@ -737,7 +948,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) ret = 0; break; default: - pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", + pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n", flags, irq_desc_get_irq(desc), chip->irq_set_type); } if (unmask) @@ -748,16 +959,11 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) #ifdef CONFIG_HARDIRQS_SW_RESEND int irq_set_parent(int irq, int parent_irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return -EINVAL; - - desc->parent_irq = parent_irq; - - irq_put_desc_unlock(desc, flags); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->parent_irq = parent_irq; + return 0; + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_parent); #endif @@ -788,10 +994,48 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) return IRQ_NONE; } -static int irq_wait_for_interrupt(struct irqaction *action) +#ifdef CONFIG_SMP +/* + * Check whether we need to change the affinity of the interrupt thread. + */ +static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +{ + cpumask_var_t mask; + + if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) + return; + + __set_current_state(TASK_RUNNING); + + /* + * In case we are out of memory we set IRQTF_AFFINITY again and + * try again next time + */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + set_bit(IRQTF_AFFINITY, &action->thread_flags); + return; + } + + scoped_guard(raw_spinlock_irq, &desc->lock) { + const struct cpumask *m; + + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); + } + + set_cpus_allowed_ptr(current, mask); + free_cpumask_var(mask); +} +#else +static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } +#endif + +static int irq_wait_for_interrupt(struct irq_desc *desc, + struct irqaction *action) { for (;;) { set_current_state(TASK_INTERRUPTIBLE); + irq_thread_check_affinity(desc, action); if (kthread_should_stop()) { /* may need to run one last time */ @@ -838,7 +1082,7 @@ again: * to IRQS_INPROGRESS and the irq line is masked forever. * * This also serializes the state of shared oneshot handlers - * versus "desc->threads_onehsot |= action->thread_mask;" in + * versus "desc->threads_oneshot |= action->thread_mask;" in * irq_wake_thread(). See the comment there which explains the * serialization. */ @@ -868,51 +1112,21 @@ out_unlock: chip_bus_sync_unlock(desc); } -#ifdef CONFIG_SMP /* - * Check whether we need to change the affinity of the interrupt thread. + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. */ -static void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action) { - cpumask_var_t mask; - bool valid = true; + irqreturn_t ret = action->thread_fn(action->irq, action->dev_id); - if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) - return; - - /* - * In case we are out of memory we set IRQTF_AFFINITY again and - * try again next time - */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - set_bit(IRQTF_AFFINITY, &action->thread_flags); - return; - } - - raw_spin_lock_irq(&desc->lock); - /* - * This code is triggered unconditionally. Check the affinity - * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. - */ - if (cpumask_available(desc->irq_common_data.affinity)) { - const struct cpumask *m; - - m = irq_data_get_effective_affinity_mask(&desc->irq_data); - cpumask_copy(mask, m); - } else { - valid = false; - } - raw_spin_unlock_irq(&desc->lock); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); - if (valid) - set_cpus_allowed_ptr(current, mask); - free_cpumask_var(mask); + irq_finalize_oneshot(desc, action); + return ret; } -#else -static inline void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } -#endif /* * Interrupts which are not explicitly requested as threaded @@ -920,40 +1134,21 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static irqreturn_t -irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { irqreturn_t ret; local_bh_disable(); - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); + ret = irq_thread_fn(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } -/* - * Interrupts explicitly requested as threaded interrupts want to be - * preemtible - many of them need to sleep and wait for slow busses to - * complete. - */ -static irqreturn_t irq_thread_fn(struct irq_desc *desc, - struct irqaction *action) -{ - irqreturn_t ret; - - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); - return ret; -} - -static void wake_threads_waitq(struct irq_desc *desc) +void wake_threads_waitq(struct irq_desc *desc) { if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); @@ -993,9 +1188,33 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) if (WARN_ON_ONCE(!secondary)) return; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); __irq_wake_thread(desc, secondary); - raw_spin_unlock_irq(&desc->lock); +} + +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); } /* @@ -1009,22 +1228,25 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); - if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, - &action->thread_flags)) + irq_thread_set_ready(desc, action); + + if (action->handler == irq_forced_secondary_handler) + sched_set_fifo_secondary(current); + else + sched_set_fifo(current); + + if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) handler_fn = irq_forced_thread_fn; else handler_fn = irq_thread_fn; init_task_work(&on_exit_work, irq_thread_dtor); - task_work_add(current, &on_exit_work, false); - - irq_thread_check_affinity(desc, action); + task_work_add(current, &on_exit_work, TWA_NONE); - while (!irq_wait_for_interrupt(action)) { + while (!irq_wait_for_interrupt(desc, action)) { irqreturn_t action_ret; - irq_thread_check_affinity(desc, action); - action_ret = handler_fn(desc, action); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); @@ -1038,26 +1260,24 @@ static int irq_thread(void *data) * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the * oneshot mask bit can be set. */ - task_work_cancel(current, irq_thread_dtor); + task_work_cancel_func(current, irq_thread_dtor); return 0; } /** - * irq_wake_thread - wake the irq thread for the action identified by dev_id - * @irq: Interrupt line - * @dev_id: Device identity for which the thread should be woken - * + * irq_wake_thread - wake the irq thread for the action identified by dev_id + * @irq: Interrupt line + * @dev_id: Device identity for which the thread should be woken */ void irq_wake_thread(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; - unsigned long flags; if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) return; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); for_each_action_of_desc(desc, action) { if (action->dev_id == dev_id) { if (action->thread) @@ -1065,13 +1285,12 @@ void irq_wake_thread(unsigned int irq, void *dev_id) break; } } - raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL_GPL(irq_wake_thread); static int irq_setup_forced_threading(struct irqaction *new) { - if (!force_irqthreads) + if (!force_irqthreads()) return 0; if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; @@ -1125,13 +1344,43 @@ static void irq_release_resources(struct irq_desc *desc) c->irq_release_resources(d); } +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + static int setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) { struct task_struct *t; - struct sched_param param = { - .sched_priority = MAX_USER_RT_PRIO/2, - }; if (!secondary) { t = kthread_create(irq_thread, new, "irq/%d-%s", irq, @@ -1139,34 +1388,50 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) } else { t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq, new->name); - param.sched_priority -= 1; } if (IS_ERR(t)) return PTR_ERR(t); - sched_setscheduler_nocheck(t, SCHED_FIFO, ¶m); - /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code * references an already freed task_struct. */ - get_task_struct(t); - new->thread = t; + new->thread = get_task_struct(t); + /* - * Tell the thread to set its affinity. This is - * important for shared interrupt handlers as we do - * not invoke setup_affinity() for the secondary - * handlers as everything is already set up. Even for - * interrupts marked with IRQF_NO_BALANCE this is - * correct as we want the thread to move to the cpu(s) - * on which the requesting code placed the interrupt. + * The affinity can not be established yet, but it will be once the + * interrupt is enabled. Delay and defer the actual setting to the + * thread itself once it is ready to run. In the meantime, prevent + * it from ever being re-affined directly by cpuset or + * housekeeping. The proper way to do it is to re-affine the whole + * vector. */ - set_bit(IRQTF_AFFINITY, &new->thread_flags); + kthread_bind_mask(t, cpu_possible_mask); + + /* + * Ensure the thread adjusts the affinity once it reaches the + * thread function. + */ + new->thread_flags = BIT(IRQTF_AFFINITY); + return 0; } +static bool valid_percpu_irqaction(struct irqaction *old, struct irqaction *new) +{ + do { + if (cpumask_intersects(old->affinity, new->affinity) || + old->percpu_dev_id == new->percpu_dev_id) + return false; + + old = old->next; + } while (old); + + return true; +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -1187,6 +1452,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; + bool per_cpu_devid; if (!desc) return -EINVAL; @@ -1196,6 +1462,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!try_module_get(desc->owner)) return -ENODEV; + per_cpu_devid = irq_settings_is_per_cpu_devid(desc); + new->irq = irq; /* @@ -1299,9 +1567,24 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. */ unsigned int oldtype; + if (irq_is_nmi(desc) && !per_cpu_devid) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + + if (per_cpu_devid && !valid_percpu_irqaction(old, new)) { + pr_err("Overlapping affinities for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1314,8 +1597,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ @@ -1390,15 +1678,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * has. The type flags are unreliable as the * underlying chip implementation can override them. */ - pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", - irq); + pr_err("Threaded irq requested with handler=NULL and !ONESHOT for %s (irq %d)\n", + new->name, irq); ret = -EINVAL; goto out_unlock; } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1430,8 +1716,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_PERCPU) { irqd_set(&desc->irq_data, IRQD_PER_CPU); irq_settings_set_per_cpu(desc); + if (new->flags & IRQF_NO_DEBUG) + irq_settings_set_no_debug(desc); } + if (noirqdebug) + irq_settings_set_no_debug(desc); + if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; @@ -1441,9 +1732,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - if (irq_settings_can_autoenable(desc)) { + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - } else { + } else if (!per_cpu_devid) { /* * Shared interrupts do not go well with disabling * auto enable. The sharing interrupt might request @@ -1488,14 +1780,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; @@ -1526,49 +1812,19 @@ out_thread: struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } if (new->secondary && new->secondary->thread) { struct task_struct *t = new->secondary->thread; new->secondary->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } out_mput: module_put(desc->owner); return ret; } -/** - * setup_irq - setup an interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt - * - * Used to statically setup interrupts in the early boot process. - */ -int setup_irq(unsigned int irq, struct irqaction *act) -{ - int retval; - struct irq_desc *desc = irq_to_desc(irq); - - if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) - return -EINVAL; - - retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) - return retval; - - retval = __setup_irq(irq, desc, act); - - if (retval) - irq_chip_pm_put(&desc->irq_data); - - return retval; -} -EXPORT_SYMBOL_GPL(setup_irq); - /* * Internal function to unregister an irqaction - used to free * regular and special interrupts that are part of the architecture. @@ -1614,6 +1870,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) /* If this was the last handler, shut down the IRQ line: */ if (!desc->action) { irq_settings_clr_disable_unlazy(desc); + /* Only shutdown. Deactivate after synchronize_hardirq() */ irq_shutdown(desc); } @@ -1642,8 +1899,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) unregister_handler_proc(irq, action); - /* Make sure it's not being used on another CPU: */ - synchronize_hardirq(irq); + /* + * Make sure it's not being used on another CPU and if the chip + * supports it also make sure that there is no (not yet serviced) + * interrupt in flight at the hardware level. + */ + __synchronize_irq(desc); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1668,21 +1929,25 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * the same bit to a newly requested action. */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); - if (action->secondary && action->secondary->thread) { - kthread_stop(action->secondary->thread); - put_task_struct(action->secondary->thread); - } + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); } /* Last action releases resources */ if (!desc->action) { /* - * Reaquire bus lock as irq_release_resources() might + * Reacquire bus lock as irq_release_resources() might * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); + /* + * There is no interrupt on the fly anymore. Deactivate it + * completely. + */ + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_domain_deactivate_irq(&desc->irq_data); + irq_release_resources(desc); chip_bus_sync_unlock(desc); irq_remove_timings(desc); @@ -1697,36 +1962,19 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) } /** - * remove_irq - free an interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt + * free_irq - free an interrupt allocated with request_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free * - * Used to remove interrupts statically setup by the early boot process. - */ -void remove_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc))) - __free_irq(desc, act->dev_id); -} -EXPORT_SYMBOL_GPL(remove_irq); - -/** - * free_irq - free an interrupt allocated with request_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free + * Remove an interrupt handler. The handler is removed and if the interrupt + * line is no longer in use by any driver it is disabled. On a shared IRQ + * the caller must ensure the interrupt is disabled on the card it drives + * before calling this function. The function does not return until any + * executing interrupts for this IRQ have completed. * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. + * This function must not be called from interrupt context. * - * This function must not be called from interrupt context. - * - * Returns the devname argument passed to request_irq. + * Returns the devname argument passed to request_irq. */ const void *free_irq(unsigned int irq, void *dev_id) { @@ -1753,47 +2001,91 @@ const void *free_irq(unsigned int irq, void *dev_id) } EXPORT_SYMBOL(free_irq); +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown_and_deactivate(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || WARN_ON(!irq_is_nmi(desc))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + guard(raw_spinlock_irqsave)(&desc->lock); + irq_nmi_teardown(desc); + return __cleanup_nmi(irq, desc); +} + /** - * request_threaded_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Primary handler for threaded interrupts - * If NULL and thread_fn != NULL the default - * primary handler is installed - * @thread_fn: Function called from the irq handler thread - * If NULL, no irq thread is created - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * If you want to set up a threaded irq handler for your device - * then you need to supply @handler and @thread_fn. @handler is - * still called in hard interrupt context and has to check - * whether the interrupt originates from the device. If yes it - * needs to disable the interrupt on the device and return - * IRQ_WAKE_THREAD which will wake up the handler thread and run - * @thread_fn. This split handler design is necessary to support - * shared interrupts. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: + * request_threaded_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts. + * If handler is NULL and thread_fn != NULL + * the default primary handler is installed. + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. From the point this call is made your handler function + * may be invoked. Since your handler function must clear any interrupt the + * board raises, you must take care both to initialise your hardware and to + * set up the interrupt handler in the right order. + * + * If you want to set up a threaded irq handler for your device then you + * need to supply @handler and @thread_fn. @handler is still called in hard + * interrupt context and has to check whether the interrupt originates from + * the device. If yes it needs to disable the interrupt on the device and + * return IRQ_WAKE_THREAD which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support shared + * interrupts. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id as this is + * required when freeing the interrupt. + * + * Flags: * * IRQF_SHARED Interrupt is shared * IRQF_TRIGGER_* Specify active edge(s) or level - * + * IRQF_ONESHOT Run thread_fn with interrupt line masked */ int request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long irqflags, @@ -1812,10 +2104,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * which interrupt is which (messes up the interrupt freeing * logic etc). * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and * it cannot be set along with IRQF_NO_SUSPEND. */ if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; @@ -1882,21 +2179,20 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL(request_threaded_irq); /** - * request_any_context_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Threaded handler for threaded interrupts. - * @flags: Interrupt type flags - * @name: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. It selects either a - * hardirq or threaded handling method depending on the - * context. - * - * On failure, it returns a negative value. On success, - * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. + * request_any_context_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @flags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It selects either a hardirq or threaded handling + * method depending on the context. + * + * Returns: On failure, it returns a negative value. On success, it returns either + * IRQC_IS_HARDIRQ or IRQC_IS_NESTED. */ int request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id) @@ -1922,40 +2218,125 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(request_any_context_irq); -void enable_percpu_irq(unsigned int irq, unsigned int type) +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It sets up the IRQ line to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function will fail + * and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); + struct irqaction *action; + struct irq_desc *desc; + int retval; - if (!desc) - return; + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; - /* - * If the trigger type is not specified by the caller, then - * use the default for this interrupt. - */ - type &= IRQ_TYPE_SENSE_MASK; - if (type == IRQ_TYPE_NONE) - type = irqd_get_trigger_type(&desc->irq_data); + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; - if (type != IRQ_TYPE_NONE) { - int ret; + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; - ret = __irq_set_trigger(desc, type); + if (!handler) + return -EINVAL; - if (ret) { - WARN(1, "failed to set type for IRQ%d\n", irq); - goto out; + desc = irq_to_desc(irq); + + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + return -EINVAL; } + return 0; } - irq_percpu_enable(desc, cpu); -out: - irq_put_desc_unlock(desc, flags); +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +void enable_percpu_irq(unsigned int irq, unsigned int type) +{ + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + struct irq_desc *desc = scoped_irqdesc; + + /* + * If the trigger type is not specified by the caller, then + * use the default for this interrupt. + */ + type &= IRQ_TYPE_SENSE_MASK; + if (type == IRQ_TYPE_NONE) + type = irqd_get_trigger_type(&desc->irq_data); + + if (type != IRQ_TYPE_NONE) { + if (__irq_set_trigger(desc, type)) { + WARN(1, "failed to set type for IRQ%d\n", irq); + return; + } + } + irq_percpu_enable(desc, smp_processor_id()); + } } EXPORT_SYMBOL_GPL(enable_percpu_irq); +void enable_percpu_nmi(unsigned int irq, unsigned int type) +{ + enable_percpu_irq(irq, type); +} + /** * irq_percpu_is_enabled - Check whether the per cpu irq is enabled * @irq: Linux irq number to check for @@ -1965,106 +2346,84 @@ EXPORT_SYMBOL_GPL(enable_percpu_irq); */ bool irq_percpu_is_enabled(unsigned int irq) { - unsigned int cpu = smp_processor_id(); - struct irq_desc *desc; - unsigned long flags; - bool is_enabled; - - desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); - if (!desc) - return false; - - is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); - irq_put_desc_unlock(desc, flags); - - return is_enabled; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + return cpumask_test_cpu(smp_processor_id(), scoped_irqdesc->percpu_enabled); + return false; } EXPORT_SYMBOL_GPL(irq_percpu_is_enabled); void disable_percpu_irq(unsigned int irq) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); - - if (!desc) - return; - - irq_percpu_disable(desc, cpu); - irq_put_desc_unlock(desc, flags); + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + irq_percpu_disable(scoped_irqdesc, smp_processor_id()); } EXPORT_SYMBOL_GPL(disable_percpu_irq); +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + /* * Internal function to unregister a percpu irqaction. */ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - unsigned long flags; + struct irqaction *action, **action_ptr; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); if (!desc) return NULL; - raw_spin_lock_irqsave(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + action_ptr = &desc->action; + for (;;) { + action = *action_ptr; - action = desc->action; - if (!action || action->percpu_dev_id != dev_id) { - WARN(1, "Trying to free already-free IRQ %d\n", irq); - goto bad; - } + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + return NULL; + } - if (!cpumask_empty(desc->percpu_enabled)) { - WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", - irq, cpumask_first(desc->percpu_enabled)); - goto bad; - } + if (action->percpu_dev_id == dev_id) + break; - /* Found it - now remove it from the list of entries: */ - desc->action = NULL; + action_ptr = &action->next; + } - raw_spin_unlock_irqrestore(&desc->lock, flags); + if (cpumask_intersects(desc->percpu_enabled, action->affinity)) { + WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", irq, + cpumask_first_and(desc->percpu_enabled, action->affinity)); + return NULL; + } - unregister_handler_proc(irq, action); + /* Found it - now remove it from the list of entries: */ + *action_ptr = action->next; + /* Demote from NMI if we killed the last action */ + if (!desc->action) + desc->istate &= ~IRQS_NMI; + } + + unregister_handler_proc(irq, action); irq_chip_pm_put(&desc->irq_data); module_put(desc->owner); return action; - -bad: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return NULL; } /** - * remove_percpu_irq - free a per-cpu interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt + * free_percpu_irq - free an interrupt allocated with request_percpu_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free * - * Used to remove interrupts statically setup by the early boot process. - */ -void remove_percpu_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc && irq_settings_is_per_cpu_devid(desc)) - __free_percpu_irq(irq, act->percpu_dev_id); -} - -/** - * free_percpu_irq - free an interrupt allocated with request_percpu_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove a percpu interrupt handler. The handler is removed, but - * the interrupt line is not disabled. This must be done on each - * CPU before calling this function. The function does not return - * until any executing interrupts for this IRQ have completed. + * Remove a percpu interrupt handler. The handler is removed, but the + * interrupt line is not disabled. This must be done on each CPU before + * calling this function. The function does not return until any executing + * interrupts for this IRQ have completed. * - * This function must not be called from interrupt context. + * This function must not be called from interrupt context. */ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) { @@ -2079,10 +2438,23 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) } EXPORT_SYMBOL_GPL(free_percpu_irq); +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + if (WARN_ON(!irq_is_nmi(desc))) + return; + + kfree(__free_percpu_irq(irq, dev_id)); +} + /** - * setup_percpu_irq - setup a per-cpu interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt + * setup_percpu_irq - setup a per-cpu interrupt + * @irq: Interrupt line to setup + * @act: irqaction for the interrupt * * Used to statically setup per-cpu interrupts in the early boot process. */ @@ -2106,26 +2478,57 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) return retval; } +static +struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, + const char *devname, const cpumask_t *affinity, + void __percpu *dev_id) +{ + struct irqaction *action; + + if (!affinity) + affinity = cpu_possible_mask; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return NULL; + + action->handler = handler; + action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; + action->name = devname; + action->percpu_dev_id = dev_id; + action->affinity = affinity; + + /* + * We allow some form of sharing for non-overlapping affinity + * masks. Obviously, covering all CPUs prevents any sharing in + * the first place. + */ + if (!cpumask_equal(affinity, cpu_possible_mask)) + action->flags |= IRQF_SHARED; + + return action; +} + /** - * __request_percpu_irq - allocate a percpu interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * @flags: Interrupt type flags (IRQF_TIMER only) - * @devname: An ascii name for the claiming device - * @dev_id: A percpu cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt on the local CPU. If the interrupt is supposed to be - * enabled on other CPUs, it has to be done on each CPU using - * enable_percpu_irq(). - * - * Dev_id must be globally unique. It is a per-cpu variable, and - * the handler gets called with the interrupted CPU's instance of - * that variable. + * __request_percpu_irq - allocate a percpu interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @flags: Interrupt type flags (IRQF_TIMER only) + * @devname: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources, but doesn't enable the interrupt + * on any CPU, as all percpu-devid interrupts are flagged with IRQ_NOAUTOEN. + * It has to be done on each CPU using enable_percpu_irq(). + * + * @dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. */ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, - void __percpu *dev_id) + const cpumask_t *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -2142,15 +2545,10 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, if (flags && flags != IRQF_TIMER) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + action = create_percpu_irqaction(handler, flags, devname, affinity, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; - action->name = devname; - action->percpu_dev_id = dev_id; - retval = irq_chip_pm_get(&desc->irq_data); if (retval < 0) { kfree(action); @@ -2169,35 +2567,136 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL_GPL(__request_percpu_irq); /** - * irq_get_irqchip_state - returns the irqchip state of a interrupt. - * @irq: Interrupt line that is forwarded to a VM - * @which: One of IRQCHIP_STATE_* the caller wants to know about - * @state: a pointer to a boolean where the state is to be storeed + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function * - * This call snapshots the internal irqchip state of an - * interrupt, returning into @state the bit corresponding to - * stage @which + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling prepare_percpu_nmi() before + * being enabled on the same CPU by using enable_percpu_nmi(). * - * This function should be called with preemption disabled if the - * interrupt controller has per-cpu registers. + * @dev_id must be globally unique. It is a per-cpu variable, and the + * handler gets called with the interrupted CPU's instance of that + * variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. */ -int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, - bool *state) +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, + const struct cpumask *affinity, void __percpu *dev_id) { + struct irqaction *action; struct irq_desc *desc; - struct irq_data *data; - struct irq_chip *chip; - unsigned long flags; - int err = -EINVAL; + int retval; - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return err; + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot be NMI already if the new request covers all CPUs */ + if (irq_is_nmi(desc) && + (!affinity || cpumask_equal(affinity, cpu_possible_mask))) + return -EINVAL; + + action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, + name, affinity, dev_id); + if (!action) + return -ENOMEM; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + scoped_guard(raw_spinlock_irqsave, &desc->lock) + desc->istate |= IRQS_NMI; + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function will fail + * returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + int ret = -EINVAL; - data = irq_desc_get_irq_data(desc); + WARN_ON(preemptible()); + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN(!irq_is_nmi(scoped_irqdesc), + "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", irq)) + return -EINVAL; + + ret = irq_nmi_setup(scoped_irqdesc); + if (ret) + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + } + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + WARN_ON(preemptible()); + + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN_ON(!irq_is_nmi(scoped_irqdesc))) + return; + irq_nmi_teardown(scoped_irqdesc); + } +} + +static int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) +{ + struct irq_chip *chip; + int err = -EINVAL; do { chip = irq_data_get_irq_chip(data); + if (WARN_ON_ONCE(!chip)) + return -ENODEV; if (chip->irq_get_irqchip_state) break; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -2209,54 +2708,103 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, if (data) err = chip->irq_get_irqchip_state(data, which, state); - - irq_put_desc_busunlock(desc, flags); return err; } + +/** + * irq_get_irqchip_state - returns the irqchip state of a interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: One of IRQCHIP_STATE_* the caller wants to know about + * @state: a pointer to a boolean where the state is to be stored + * + * This call snapshots the internal irqchip state of an interrupt, + * returning into @state the bit corresponding to stage @which + * + * This function should be called with preemption disabled if the interrupt + * controller has per-cpu registers. + */ +int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state) +{ + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); + + return __irq_get_irqchip_state(data, which, state); + } + return -EINVAL; +} EXPORT_SYMBOL_GPL(irq_get_irqchip_state); /** - * irq_set_irqchip_state - set the state of a forwarded interrupt. - * @irq: Interrupt line that is forwarded to a VM - * @which: State to be restored (one of IRQCHIP_STATE_*) - * @val: Value corresponding to @which + * irq_set_irqchip_state - set the state of a forwarded interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which * - * This call sets the internal irqchip state of an interrupt, - * depending on the value of @which. + * This call sets the internal irqchip state of an interrupt, depending on + * the value of @which. * - * This function should be called with preemption disabled if the - * interrupt controller has per-cpu registers. + * This function should be called with migration disabled if the interrupt + * controller has per-cpu registers. */ -int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, - bool val) +int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool val) { - struct irq_desc *desc; - struct irq_data *data; - struct irq_chip *chip; - unsigned long flags; - int err = -EINVAL; + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); + struct irq_chip *chip; - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return err; + do { + chip = irq_data_get_irq_chip(data); - data = irq_desc_get_irq_data(desc); + if (WARN_ON_ONCE(!chip)) + return -ENODEV; - do { - chip = irq_data_get_irq_chip(data); - if (chip->irq_set_irqchip_state) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); + if (chip->irq_set_irqchip_state) + break; - if (data) - err = chip->irq_set_irqchip_state(data, which, val); + data = irqd_get_parent_data(data); + } while (data); - irq_put_desc_busunlock(desc, flags); - return err; + if (data) + return chip->irq_set_irqchip_state(data, which, val); + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_irqchip_state); + +/** + * irq_has_action - Check whether an interrupt is requested + * @irq: The linux irq number + * + * Returns: A snapshot of the current state + */ +bool irq_has_action(unsigned int irq) +{ + bool res; + + rcu_read_lock(); + res = irq_desc_has_action(irq_to_desc(irq)); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_has_action); + +/** + * irq_check_status_bit - Check whether bits in the irq descriptor status are set + * @irq: The linux irq number + * @bitmask: The bitmask to evaluate + * + * Returns: True if one of the bits in @bitmask is set + */ +bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) +{ + struct irq_desc *desc; + bool res = false; + + rcu_read_lock(); + desc = irq_to_desc(irq); + if (desc) + res = !!(desc->status_use_accessors & bitmask); + rcu_read_unlock(); + return res; +} +EXPORT_SYMBOL_GPL(irq_check_status_bit); |
