diff options
Diffstat (limited to 'kernel/irq')
| -rw-r--r-- | kernel/irq/Kconfig | 42 | ||||
| -rw-r--r-- | kernel/irq/Makefile | 4 | ||||
| -rw-r--r-- | kernel/irq/affinity.c | 409 | ||||
| -rw-r--r-- | kernel/irq/autoprobe.c | 26 | ||||
| -rw-r--r-- | kernel/irq/chip.c | 926 | ||||
| -rw-r--r-- | kernel/irq/cpuhotplug.c | 56 | ||||
| -rw-r--r-- | kernel/irq/debugfs.c | 33 | ||||
| -rw-r--r-- | kernel/irq/devres.c | 171 | ||||
| -rw-r--r-- | kernel/irq/generic-chip.c | 206 | ||||
| -rw-r--r-- | kernel/irq/handle.c | 59 | ||||
| -rw-r--r-- | kernel/irq/internals.h | 111 | ||||
| -rw-r--r-- | kernel/irq/ipi-mux.c | 206 | ||||
| -rw-r--r-- | kernel/irq/ipi.c | 22 | ||||
| -rw-r--r-- | kernel/irq/irq_sim.c | 89 | ||||
| -rw-r--r-- | kernel/irq/irq_test.c | 236 | ||||
| -rw-r--r-- | kernel/irq/irqdesc.c | 552 | ||||
| -rw-r--r-- | kernel/irq/irqdomain.c | 991 | ||||
| -rw-r--r-- | kernel/irq/kexec.c | 36 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 1578 | ||||
| -rw-r--r-- | kernel/irq/matrix.c | 36 | ||||
| -rw-r--r-- | kernel/irq/migration.c | 24 | ||||
| -rw-r--r-- | kernel/irq/msi.c | 1216 | ||||
| -rw-r--r-- | kernel/irq/pm.c | 67 | ||||
| -rw-r--r-- | kernel/irq/proc.c | 109 | ||||
| -rw-r--r-- | kernel/irq/resend.c | 80 | ||||
| -rw-r--r-- | kernel/irq/settings.h | 6 | ||||
| -rw-r--r-- | kernel/irq/spurious.c | 137 | ||||
| -rw-r--r-- | kernel/irq/timings.c | 1 |
28 files changed, 4315 insertions, 3114 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 10929eda9825..1b4254d19a73 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -6,10 +6,6 @@ menu "IRQ subsystem" config MAY_HAVE_SPARSE_IRQ bool -# Legacy support, required for itanic -config GENERIC_IRQ_LEGACY - bool - # Enable the generic irq autoprobe mechanism config GENERIC_IRQ_PROBE bool @@ -24,6 +20,7 @@ config GENERIC_IRQ_SHOW_LEVEL # Supports effective affinity mask config GENERIC_IRQ_EFFECTIVE_AFF_MASK + depends on SMP bool # Support for delayed migration from interrupt context @@ -46,10 +43,6 @@ config GENERIC_IRQ_INJECTION config HARDIRQS_SW_RESEND bool -# Edge style eoi based handler (cell) -config IRQ_EDGE_EOI_HANDLER - bool - # Generic configurable interrupt chip implementation config GENERIC_IRQ_CHIP bool @@ -82,18 +75,20 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + depends on SMP select IRQ_DOMAIN_HIERARCHY -# Generic MSI interrupt support -config GENERIC_MSI_IRQ +# Generic IRQ IPI Mux support +config GENERIC_IRQ_IPI_MUX bool + depends on SMP # Generic MSI hierarchical interrupt domain support -config GENERIC_MSI_IRQ_DOMAIN +config GENERIC_MSI_IRQ bool select IRQ_DOMAIN_HIERARCHY - select GENERIC_MSI_IRQ +# irqchip drivers should select this if they call iommu_dma_prepare_msi() config IRQ_MSI_IOMMU bool @@ -106,6 +101,10 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR config GENERIC_IRQ_RESERVATION_MODE bool +# Snapshot for interrupt statistics +config GENERIC_IRQ_STAT_SNAPSHOT + bool + # Support forced irq threading config IRQ_FORCED_THREADING bool @@ -135,6 +134,25 @@ config GENERIC_IRQ_DEBUGFS If you don't know what to do here, say N. +# Clear forwarded VM interrupts during kexec. +# This option ensures the kernel clears active states for interrupts +# forwarded to virtual machines (VMs) during a machine kexec. +config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD + bool + +config IRQ_KUNIT_TEST + bool "KUnit tests for IRQ management APIs" if !KUNIT_ALL_TESTS + depends on KUNIT=y + depends on SPARSE_IRQ + default KUNIT_ALL_TESTS + select IRQ_DOMAIN + imply SMP + help + This option enables KUnit tests for the IRQ subsystem API. These are + only for development and testing, not for regular kernel use cases. + + If unsure, say N. + endmenu config GENERIC_IRQ_MULTI_HANDLER diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index b4f53717d143..6ab3a4055667 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) CFLAGS_timings.o += -DDEBUG @@ -15,6 +15,8 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o obj-$(CONFIG_PM_SLEEP) += pm.o obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o +obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o obj-$(CONFIG_SMP) += affinity.o obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o +obj-$(CONFIG_IRQ_KUNIT_TEST) += irq_test.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f7ff8919dc9b..4013e6ad2b2f 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -7,397 +7,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/cpu.h> -#include <linux/sort.h> - -static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, - unsigned int cpus_per_vec) -{ - const struct cpumask *siblmsk; - int cpu, sibl; - - for ( ; cpus_per_vec > 0; ) { - cpu = cpumask_first(nmsk); - - /* Should not happen, but I'm too lazy to think about it */ - if (cpu >= nr_cpu_ids) - return; - - cpumask_clear_cpu(cpu, nmsk); - cpumask_set_cpu(cpu, irqmsk); - cpus_per_vec--; - - /* If the cpu has siblings, use them first */ - siblmsk = topology_sibling_cpumask(cpu); - for (sibl = -1; cpus_per_vec > 0; ) { - sibl = cpumask_next(sibl, siblmsk); - if (sibl >= nr_cpu_ids) - break; - if (!cpumask_test_and_clear_cpu(sibl, nmsk)) - continue; - cpumask_set_cpu(sibl, irqmsk); - cpus_per_vec--; - } - } -} - -static cpumask_var_t *alloc_node_to_cpumask(void) -{ - cpumask_var_t *masks; - int node; - - masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); - if (!masks) - return NULL; - - for (node = 0; node < nr_node_ids; node++) { - if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) - goto out_unwind; - } - - return masks; - -out_unwind: - while (--node >= 0) - free_cpumask_var(masks[node]); - kfree(masks); - return NULL; -} - -static void free_node_to_cpumask(cpumask_var_t *masks) -{ - int node; - - for (node = 0; node < nr_node_ids; node++) - free_cpumask_var(masks[node]); - kfree(masks); -} - -static void build_node_to_cpumask(cpumask_var_t *masks) -{ - int cpu; - - for_each_possible_cpu(cpu) - cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); -} - -static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, - const struct cpumask *mask, nodemask_t *nodemsk) -{ - int n, nodes = 0; - - /* Calculate the number of nodes in the supplied affinity mask */ - for_each_node(n) { - if (cpumask_intersects(mask, node_to_cpumask[n])) { - node_set(n, *nodemsk); - nodes++; - } - } - return nodes; -} - -struct node_vectors { - unsigned id; - - union { - unsigned nvectors; - unsigned ncpus; - }; -}; - -static int ncpus_cmp_func(const void *l, const void *r) -{ - const struct node_vectors *ln = l; - const struct node_vectors *rn = r; - - return ln->ncpus - rn->ncpus; -} - -/* - * Allocate vector number for each node, so that for each node: - * - * 1) the allocated number is >= 1 - * - * 2) the allocated numbver is <= active CPU number of this node - * - * The actual allocated total vectors may be less than @numvecs when - * active total CPU number is less than @numvecs. - * - * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' - * for each node. - */ -static void alloc_nodes_vectors(unsigned int numvecs, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - const nodemask_t nodemsk, - struct cpumask *nmsk, - struct node_vectors *node_vectors) -{ - unsigned n, remaining_ncpus = 0; - - for (n = 0; n < nr_node_ids; n++) { - node_vectors[n].id = n; - node_vectors[n].ncpus = UINT_MAX; - } - - for_each_node_mask(n, nodemsk) { - unsigned ncpus; - - cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); - ncpus = cpumask_weight(nmsk); - - if (!ncpus) - continue; - remaining_ncpus += ncpus; - node_vectors[n].ncpus = ncpus; - } - - numvecs = min_t(unsigned, remaining_ncpus, numvecs); - - sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]), - ncpus_cmp_func, NULL); - - /* - * Allocate vectors for each node according to the ratio of this - * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is - * bigger than number of active numa nodes. Always start the - * allocation from the node with minimized nr_cpus. - * - * This way guarantees that each active node gets allocated at - * least one vector, and the theory is simple: over-allocation - * is only done when this node is assigned by one vector, so - * other nodes will be allocated >= 1 vector, since 'numvecs' is - * bigger than number of numa nodes. - * - * One perfect invariant is that number of allocated vectors for - * each node is <= CPU count of this node: - * - * 1) suppose there are two nodes: A and B - * ncpu(X) is CPU count of node X - * vecs(X) is the vector count allocated to node X via this - * algorithm - * - * ncpu(A) <= ncpu(B) - * ncpu(A) + ncpu(B) = N - * vecs(A) + vecs(B) = V - * - * vecs(A) = max(1, round_down(V * ncpu(A) / N)) - * vecs(B) = V - vecs(A) - * - * both N and V are integer, and 2 <= V <= N, suppose - * V = N - delta, and 0 <= delta <= N - 2 - * - * 2) obviously vecs(A) <= ncpu(A) because: - * - * if vecs(A) is 1, then vecs(A) <= ncpu(A) given - * ncpu(A) >= 1 - * - * otherwise, - * vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N - * - * 3) prove how vecs(B) <= ncpu(B): - * - * if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be - * over-allocated, so vecs(B) <= ncpu(B), - * - * otherwise: - * - * vecs(A) = - * round_down(V * ncpu(A) / N) = - * round_down((N - delta) * ncpu(A) / N) = - * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= - * round_down((N * ncpu(A) - delta * N) / N) = - * cpu(A) - delta - * - * then: - * - * vecs(A) - V >= ncpu(A) - delta - V - * => - * V - vecs(A) <= V + delta - ncpu(A) - * => - * vecs(B) <= N - ncpu(A) - * => - * vecs(B) <= cpu(B) - * - * For nodes >= 3, it can be thought as one node and another big - * node given that is exactly what this algorithm is implemented, - * and we always re-calculate 'remaining_ncpus' & 'numvecs', and - * finally for each node X: vecs(X) <= ncpu(X). - * - */ - for (n = 0; n < nr_node_ids; n++) { - unsigned nvectors, ncpus; - - if (node_vectors[n].ncpus == UINT_MAX) - continue; - - WARN_ON_ONCE(numvecs == 0); - - ncpus = node_vectors[n].ncpus; - nvectors = max_t(unsigned, 1, - numvecs * ncpus / remaining_ncpus); - WARN_ON_ONCE(nvectors > ncpus); - - node_vectors[n].nvectors = nvectors; - - remaining_ncpus -= ncpus; - numvecs -= nvectors; - } -} - -static int __irq_build_affinity_masks(unsigned int startvec, - unsigned int numvecs, - unsigned int firstvec, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - struct cpumask *nmsk, - struct irq_affinity_desc *masks) -{ - unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0; - unsigned int last_affv = firstvec + numvecs; - unsigned int curvec = startvec; - nodemask_t nodemsk = NODE_MASK_NONE; - struct node_vectors *node_vectors; - - if (!cpumask_weight(cpu_mask)) - return 0; - - nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); - - /* - * If the number of nodes in the mask is greater than or equal the - * number of vectors we just spread the vectors across the nodes. - */ - if (numvecs <= nodes) { - for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); - if (++curvec == last_affv) - curvec = firstvec; - } - return numvecs; - } - - node_vectors = kcalloc(nr_node_ids, - sizeof(struct node_vectors), - GFP_KERNEL); - if (!node_vectors) - return -ENOMEM; - - /* allocate vector number for each node */ - alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask, - nodemsk, nmsk, node_vectors); - - for (i = 0; i < nr_node_ids; i++) { - unsigned int ncpus, v; - struct node_vectors *nv = &node_vectors[i]; - - if (nv->nvectors == UINT_MAX) - continue; - - /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); - ncpus = cpumask_weight(nmsk); - if (!ncpus) - continue; - - WARN_ON_ONCE(nv->nvectors > ncpus); - - /* Account for rounding errors */ - extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors); - - /* Spread allocated vectors on CPUs of the current node */ - for (v = 0; v < nv->nvectors; v++, curvec++) { - cpus_per_vec = ncpus / nv->nvectors; - - /* Account for extra vectors to compensate rounding errors */ - if (extra_vecs) { - cpus_per_vec++; - --extra_vecs; - } - - /* - * wrapping has to be considered given 'startvec' - * may start anywhere - */ - if (curvec >= last_affv) - curvec = firstvec; - irq_spread_init_one(&masks[curvec].mask, nmsk, - cpus_per_vec); - } - done += nv->nvectors; - } - kfree(node_vectors); - return done; -} - -/* - * build affinity in two stages: - * 1) spread present CPU on these vectors - * 2) spread other possible CPUs on these vectors - */ -static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, - unsigned int firstvec, - struct irq_affinity_desc *masks) -{ - unsigned int curvec = startvec, nr_present = 0, nr_others = 0; - cpumask_var_t *node_to_cpumask; - cpumask_var_t nmsk, npresmsk; - int ret = -ENOMEM; - - if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) - return ret; - - if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) - goto fail_nmsk; - - node_to_cpumask = alloc_node_to_cpumask(); - if (!node_to_cpumask) - goto fail_npresmsk; - - /* Stabilize the cpumasks */ - cpus_read_lock(); - build_node_to_cpumask(node_to_cpumask); - - /* Spread on present CPUs starting from affd->pre_vectors */ - ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, - node_to_cpumask, cpu_present_mask, - nmsk, masks); - if (ret < 0) - goto fail_build_affinity; - nr_present = ret; - - /* - * Spread on non present CPUs starting from the next vector to be - * handled. If the spreading of present CPUs already exhausted the - * vector space, assign the non present CPUs to the already spread - * out vectors. - */ - if (nr_present >= numvecs) - curvec = firstvec; - else - curvec = firstvec + nr_present; - cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, - node_to_cpumask, npresmsk, nmsk, - masks); - if (ret >= 0) - nr_others = ret; - - fail_build_affinity: - cpus_read_unlock(); - - if (ret >= 0) - WARN_ON(nr_present + nr_others < numvecs); - - free_node_to_cpumask(node_to_cpumask); - - fail_npresmsk: - free_cpumask_var(npresmsk); - - fail_nmsk: - free_cpumask_var(nmsk); - return ret < 0 ? ret : 0; -} +#include <linux/group_cpus.h> static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) { @@ -459,17 +69,20 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) * have multiple sets, build each sets affinity mask separately. */ for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { - unsigned int this_vecs = affd->set_size[i]; - int ret; + unsigned int nr_masks, this_vecs = affd->set_size[i]; + struct cpumask *result = group_cpus_evenly(this_vecs, &nr_masks); - ret = irq_build_affinity_masks(curvec, this_vecs, - curvec, masks); - if (ret) { + if (!result) { kfree(masks); return NULL; } - curvec += this_vecs; - usedvecs += this_vecs; + + for (int j = 0; j < nr_masks; j++) + cpumask_copy(&masks[curvec + j].mask, &result[j]); + kfree(result); + + curvec += nr_masks; + usedvecs += nr_masks; } /* Fill out vectors at the end that don't need affinity */ diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index ae60cae24e9a..d0af8a8b3ae6 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -43,18 +43,16 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { /* * Some chips need to know about probing in * progress: */ if (desc->irq_data.chip->irq_set_type) - desc->irq_data.chip->irq_set_type(&desc->irq_data, - IRQ_TYPE_PROBE); + desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE); irq_activate_and_startup(desc, IRQ_NORESEND); } - raw_spin_unlock_irq(&desc->lock); } /* Wait for longstanding interrupts to trigger. */ @@ -66,13 +64,12 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; if (irq_activate_and_startup(desc, IRQ_NORESEND)) desc->istate |= IRQS_PENDING; } - raw_spin_unlock_irq(&desc->lock); } /* @@ -84,18 +81,16 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); - + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { /* It triggered already - consider it spurious. */ if (!(desc->istate & IRQS_WAITING)) { desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); - } else - if (i < 32) - mask |= 1 << i; + } else if (i < 32) { + mask |= 1 << i; + } } - raw_spin_unlock_irq(&desc->lock); } return mask; @@ -121,7 +116,7 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { if (i < 16 && !(desc->istate & IRQS_WAITING)) mask |= 1 << i; @@ -129,7 +124,6 @@ unsigned int probe_irq_mask(unsigned long val) desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); } - raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); @@ -160,8 +154,7 @@ int probe_irq_off(unsigned long val) struct irq_desc *desc; for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); - + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { if (!(desc->istate & IRQS_WAITING)) { if (!nr_of_irqs) @@ -171,7 +164,6 @@ int probe_irq_off(unsigned long val) desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); } - raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c09324663088..678f094d261a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -34,101 +34,80 @@ struct irqaction chained_action = { }; /** - * irq_set_chip - set the irq chip for an irq - * @irq: irq number - * @chip: pointer to irq chip description structure + * irq_set_chip - set the irq chip for an irq + * @irq: irq number + * @chip: pointer to irq chip description structure */ -int irq_set_chip(unsigned int irq, struct irq_chip *chip) +int irq_set_chip(unsigned int irq, const struct irq_chip *chip) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); + int ret = -EINVAL; - if (!desc) - return -EINVAL; - - if (!chip) - chip = &no_irq_chip; - - desc->irq_data.chip = chip; - irq_put_desc_unlock(desc, flags); - /* - * For !CONFIG_SPARSE_IRQ make the irq show up in - * allocated_irqs. - */ - irq_mark_irq(irq); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); + ret = 0; + } + /* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */ + if (!ret) + irq_mark_irq(irq); + return ret; } EXPORT_SYMBOL(irq_set_chip); /** - * irq_set_irq_type - set the irq trigger type for an irq - * @irq: irq number - * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + * irq_set_irq_type - set the irq trigger type for an irq + * @irq: irq number + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h */ int irq_set_irq_type(unsigned int irq, unsigned int type) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - int ret = 0; - - if (!desc) - return -EINVAL; - - ret = __irq_set_trigger(desc, type); - irq_put_desc_busunlock(desc, flags); - return ret; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) + return __irq_set_trigger(scoped_irqdesc, type); + return -EINVAL; } EXPORT_SYMBOL(irq_set_irq_type); /** - * irq_set_handler_data - set irq handler data for an irq - * @irq: Interrupt number - * @data: Pointer to interrupt specific data + * irq_set_handler_data - set irq handler data for an irq + * @irq: Interrupt number + * @data: Pointer to interrupt specific data * - * Set the hardware irq controller data for an irq + * Set the hardware irq controller data for an irq */ int irq_set_handler_data(unsigned int irq, void *data) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return -EINVAL; - desc->irq_common_data.handler_data = data; - irq_put_desc_unlock(desc, flags); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->irq_common_data.handler_data = data; + return 0; + } + return -EINVAL; } EXPORT_SYMBOL(irq_set_handler_data); /** - * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset - * @irq_base: Interrupt number base - * @irq_offset: Interrupt number offset - * @entry: Pointer to MSI descriptor data + * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset + * @irq_base: Interrupt number base + * @irq_offset: Interrupt number offset + * @entry: Pointer to MSI descriptor data * - * Set the MSI descriptor entry for an irq at offset + * Set the MSI descriptor entry for an irq at offset */ -int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, - struct msi_desc *entry) -{ - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return -EINVAL; - desc->irq_common_data.msi_desc = entry; - if (entry && !irq_offset) - entry->irq = irq_base; - irq_put_desc_unlock(desc, flags); - return 0; +int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, struct msi_desc *entry) +{ + scoped_irqdesc_get_and_lock(irq_base + irq_offset, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc->irq_common_data.msi_desc = entry; + if (entry && !irq_offset) + entry->irq = irq_base; + return 0; + } + return -EINVAL; } /** - * irq_set_msi_desc - set MSI descriptor data for an irq - * @irq: Interrupt number - * @entry: Pointer to MSI descriptor data + * irq_set_msi_desc - set MSI descriptor data for an irq + * @irq: Interrupt number + * @entry: Pointer to MSI descriptor data * - * Set the MSI descriptor entry for an irq + * Set the MSI descriptor entry for an irq */ int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) { @@ -136,22 +115,19 @@ int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) } /** - * irq_set_chip_data - set irq chip data for an irq - * @irq: Interrupt number - * @data: Pointer to chip specific data + * irq_set_chip_data - set irq chip data for an irq + * @irq: Interrupt number + * @data: Pointer to chip specific data * - * Set the hardware irq chip data for an irq + * Set the hardware irq chip data for an irq */ int irq_set_chip_data(unsigned int irq, void *data) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return -EINVAL; - desc->irq_data.chip_data = data; - irq_put_desc_unlock(desc, flags); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->irq_data.chip_data = data; + return 0; + } + return -EINVAL; } EXPORT_SYMBOL(irq_set_chip_data); @@ -191,7 +167,8 @@ enum { #ifdef CONFIG_SMP static int -__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) { struct irq_data *d = irq_desc_get_irq_data(desc); @@ -200,7 +177,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) irqd_clr_managed_shutdown(d); - if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(aff, cpu_online_mask)) { /* * Catch code which fiddles with enable_irq() on a managed * and potentially shutdown IRQ. Chained interrupt @@ -225,14 +202,51 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) return IRQ_STARTUP_ABORT; return IRQ_STARTUP_MANAGED; } + +void irq_startup_managed(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + + /* + * Clear managed-shutdown flag, so we don't repeat managed-startup for + * multiple hotplugs, and cause imbalanced disable depth. + */ + irqd_clr_managed_shutdown(d); + + /* + * Only start it up when the disable depth is 1, so that a disable, + * hotunplug, hotplug sequence does not end up enabling it during + * hotplug unconditionally. + */ + desc->depth--; + if (!desc->depth) + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); +} + #else static __always_inline int -__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) { return IRQ_STARTUP_NORMAL; } #endif +static void irq_enable(struct irq_desc *desc) +{ + if (!irqd_irq_disabled(&desc->irq_data)) { + unmask_irq(desc); + } else { + irq_state_clr_disabled(desc); + if (desc->irq_data.chip->irq_enable) { + desc->irq_data.chip->irq_enable(&desc->irq_data); + irq_state_clr_masked(desc); + } else { + unmask_irq(desc); + } + } +} + static int __irq_startup(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); @@ -255,7 +269,7 @@ static int __irq_startup(struct irq_desc *desc) int irq_startup(struct irq_desc *desc, bool resend, bool force) { struct irq_data *d = irq_desc_get_irq_data(desc); - struct cpumask *aff = irq_data_get_affinity_mask(d); + const struct cpumask *aff = irq_data_get_affinity_mask(d); int ret = 0; desc->depth = 0; @@ -276,6 +290,7 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) ret = __irq_startup(desc); break; case IRQ_STARTUP_ABORT: + desc->depth = 1; irqd_set_managed_shutdown(d); return 0; } @@ -307,7 +322,14 @@ static void __irq_disable(struct irq_desc *desc, bool mask); void irq_shutdown(struct irq_desc *desc) { if (irqd_is_started(&desc->irq_data)) { - desc->depth = 1; + clear_irq_resend(desc); + /* + * Increment disable depth, so that a managed shutdown on + * CPU hotunplug preserves the actual disabled state when the + * CPU comes back online. See irq_startup_managed(). + */ + desc->depth++; + if (desc->irq_data.chip->irq_shutdown) { desc->irq_data.chip->irq_shutdown(&desc->irq_data); irq_state_set_disabled(desc); @@ -332,21 +354,6 @@ void irq_shutdown_and_deactivate(struct irq_desc *desc) irq_domain_deactivate_irq(&desc->irq_data); } -void irq_enable(struct irq_desc *desc) -{ - if (!irqd_irq_disabled(&desc->irq_data)) { - unmask_irq(desc); - } else { - irq_state_clr_disabled(desc); - if (desc->irq_data.chip->irq_enable) { - desc->irq_data.chip->irq_enable(&desc->irq_data); - irq_state_clr_masked(desc); - } else { - unmask_irq(desc); - } - } -} - static void __irq_disable(struct irq_desc *desc, bool mask) { if (irqd_irq_disabled(&desc->irq_data)) { @@ -450,67 +457,33 @@ void unmask_threaded_irq(struct irq_desc *desc) unmask_irq(desc); } -/* - * handle_nested_irq - Handle a nested irq from a irq thread - * @irq: the interrupt number - * - * Handle interrupts which are nested into a threaded interrupt - * handler. The handler function is called inside the calling - * threads context. - */ -void handle_nested_irq(unsigned int irq) +/* Busy wait until INPROGRESS is cleared */ +static bool irq_wait_on_inprogress(struct irq_desc *desc) { - struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - irqreturn_t action_ret; - - might_sleep(); - - raw_spin_lock_irq(&desc->lock); - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + if (IS_ENABLED(CONFIG_SMP)) { + do { + raw_spin_unlock(&desc->lock); + while (irqd_irq_inprogress(&desc->irq_data)) + cpu_relax(); + raw_spin_lock(&desc->lock); + } while (irqd_irq_inprogress(&desc->irq_data)); - action = desc->action; - if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; - goto out_unlock; + /* Might have been disabled in meantime */ + return !irqd_irq_disabled(&desc->irq_data) && desc->action; } - - kstat_incr_irqs_this_cpu(desc); - irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); - raw_spin_unlock_irq(&desc->lock); - - action_ret = IRQ_NONE; - for_each_action_of_desc(desc, action) - action_ret |= action->thread_fn(action->irq, action->dev_id); - - if (!irq_settings_no_debug(desc)) - note_interrupt(desc, action_ret); - - raw_spin_lock_irq(&desc->lock); - irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); - -out_unlock: - raw_spin_unlock_irq(&desc->lock); + return false; } -EXPORT_SYMBOL_GPL(handle_nested_irq); -static bool irq_check_poll(struct irq_desc *desc) +static bool irq_can_handle_pm(struct irq_desc *desc) { - if (!(desc->istate & IRQS_POLL_INPROGRESS)) - return false; - return irq_wait_for_poll(desc); -} - -static bool irq_may_run(struct irq_desc *desc) -{ - unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED; + struct irq_data *irqd = &desc->irq_data; + const struct cpumask *aff; /* * If the interrupt is not in progress and is not an armed * wakeup interrupt, proceed. */ - if (!irqd_has_set(&desc->irq_data, mask)) + if (!irqd_has_set(irqd, IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED)) return true; /* @@ -518,86 +491,167 @@ static bool irq_may_run(struct irq_desc *desc) * and suspended, disable it and notify the pm core about the * event. */ - if (irq_pm_check_wakeup(desc)) + if (unlikely(irqd_has_set(irqd, IRQD_WAKEUP_ARMED))) { + irq_pm_handle_wakeup(desc); + return false; + } + + /* Check whether the interrupt is polled on another CPU */ + if (unlikely(desc->istate & IRQS_POLL_INPROGRESS)) { + if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), + "irq poll in progress on cpu %d for irq %d\n", + smp_processor_id(), desc->irq_data.irq)) + return false; + return irq_wait_on_inprogress(desc); + } + + /* The below works only for single target interrupts */ + if (!IS_ENABLED(CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK) || + !irqd_is_single_target(irqd) || desc->handle_irq != handle_edge_irq) return false; /* - * Handle a potential concurrent poll on a different core. + * If the interrupt affinity was moved to this CPU and the + * interrupt is currently handled on the previous target CPU, then + * busy wait for INPROGRESS to be cleared. Otherwise for edge type + * interrupts the handler might get stuck on the previous target: + * + * CPU 0 CPU 1 (new target) + * handle_edge_irq() + * repeat: + * handle_event() handle_edge_irq() + * if (INPROGESS) { + * set(PENDING); + * mask(); + * return; + * } + * if (PENDING) { + * clear(PENDING); + * unmask(); + * goto repeat; + * } + * + * This happens when the device raises interrupts with a high rate + * and always before handle_event() completes and the CPU0 handler + * can clear INPROGRESS. This has been observed in virtual machines. */ - return irq_check_poll(desc); + aff = irq_data_get_effective_affinity_mask(irqd); + if (cpumask_first(aff) != smp_processor_id()) + return false; + return irq_wait_on_inprogress(desc); +} + +static inline bool irq_can_handle_actions(struct irq_desc *desc) +{ + desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; + return false; + } + return true; +} + +static inline bool irq_can_handle(struct irq_desc *desc) +{ + if (!irq_can_handle_pm(desc)) + return false; + + return irq_can_handle_actions(desc); } /** - * handle_simple_irq - Simple and software-decoded IRQs. - * @desc: the interrupt description structure for this irq + * handle_nested_irq - Handle a nested irq from a irq thread + * @irq: the interrupt number * - * Simple interrupts are either sent from a demultiplexing interrupt - * handler or come from hardware, where no interrupt hardware control - * is necessary. - * - * Note: The caller is expected to handle the ack, clear, mask and - * unmask issues if necessary. + * Handle interrupts which are nested into a threaded interrupt + * handler. The handler function is called inside the calling threads + * context. */ -void handle_simple_irq(struct irq_desc *desc) +void handle_nested_irq(unsigned int irq) { - raw_spin_lock(&desc->lock); + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + irqreturn_t action_ret; - if (!irq_may_run(desc)) - goto out_unlock; + might_sleep(); - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + scoped_guard(raw_spinlock_irq, &desc->lock) { + if (!irq_can_handle_actions(desc)) + return; - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; - goto out_unlock; + action = desc->action; + kstat_incr_irqs_this_cpu(desc); + atomic_inc(&desc->threads_active); } + action_ret = IRQ_NONE; + for_each_action_of_desc(desc, action) + action_ret |= action->thread_fn(action->irq, action->dev_id); + + if (!irq_settings_no_debug(desc)) + note_interrupt(desc, action_ret); + + wake_threads_waitq(desc); +} +EXPORT_SYMBOL_GPL(handle_nested_irq); + +/** + * handle_simple_irq - Simple and software-decoded IRQs. + * @desc: the interrupt description structure for this irq + * + * Simple interrupts are either sent from a demultiplexing interrupt + * handler or come from hardware, where no interrupt hardware control is + * necessary. + * + * Note: The caller is expected to handle the ack, clear, mask and unmask + * issues if necessary. + */ +void handle_simple_irq(struct irq_desc *desc) +{ + guard(raw_spinlock)(&desc->lock); + + if (!irq_can_handle_pm(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; + return; + } + + if (!irq_can_handle_actions(desc)) + return; + kstat_incr_irqs_this_cpu(desc); handle_irq_event(desc); - -out_unlock: - raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_simple_irq); /** - * handle_untracked_irq - Simple and software-decoded IRQs. - * @desc: the interrupt description structure for this irq + * handle_untracked_irq - Simple and software-decoded IRQs. + * @desc: the interrupt description structure for this irq * - * Untracked interrupts are sent from a demultiplexing interrupt - * handler when the demultiplexer does not know which device it its - * multiplexed irq domain generated the interrupt. IRQ's handled - * through here are not subjected to stats tracking, randomness, or - * spurious interrupt detection. + * Untracked interrupts are sent from a demultiplexing interrupt handler + * when the demultiplexer does not know which device it its multiplexed irq + * domain generated the interrupt. IRQ's handled through here are not + * subjected to stats tracking, randomness, or spurious interrupt + * detection. * - * Note: Like handle_simple_irq, the caller is expected to handle - * the ack, clear, mask and unmask issues if necessary. + * Note: Like handle_simple_irq, the caller is expected to handle the ack, + * clear, mask and unmask issues if necessary. */ void handle_untracked_irq(struct irq_desc *desc) { - raw_spin_lock(&desc->lock); - - if (!irq_may_run(desc)) - goto out_unlock; - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + scoped_guard(raw_spinlock, &desc->lock) { + if (!irq_can_handle(desc)) + return; - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; - goto out_unlock; + desc->istate &= ~IRQS_PENDING; + irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); } - desc->istate &= ~IRQS_PENDING; - irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); - raw_spin_unlock(&desc->lock); - __handle_irq_event_percpu(desc); - raw_spin_lock(&desc->lock); - irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); - -out_unlock: - raw_spin_unlock(&desc->lock); + scoped_guard(raw_spinlock, &desc->lock) + irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); } EXPORT_SYMBOL_GPL(handle_untracked_irq); @@ -620,40 +674,26 @@ static void cond_unmask_irq(struct irq_desc *desc) } /** - * handle_level_irq - Level type irq handler - * @desc: the interrupt description structure for this irq + * handle_level_irq - Level type irq handler + * @desc: the interrupt description structure for this irq * - * Level type interrupts are active as long as the hardware line has - * the active level. This may require to mask the interrupt and unmask - * it after the associated handler has acknowledged the device, so the - * interrupt line is back to inactive. + * Level type interrupts are active as long as the hardware line has the + * active level. This may require to mask the interrupt and unmask it after + * the associated handler has acknowledged the device, so the interrupt + * line is back to inactive. */ void handle_level_irq(struct irq_desc *desc) { - raw_spin_lock(&desc->lock); + guard(raw_spinlock)(&desc->lock); mask_ack_irq(desc); - if (!irq_may_run(desc)) - goto out_unlock; - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - - /* - * If its disabled or no action available - * keep it masked and get out of here - */ - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; - goto out_unlock; - } + if (!irq_can_handle(desc)) + return; kstat_incr_irqs_this_cpu(desc); handle_irq_event(desc); cond_unmask_irq(desc); - -out_unlock: - raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_level_irq); @@ -678,34 +718,43 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) } } +static inline void cond_eoi_irq(struct irq_chip *chip, struct irq_data *data) +{ + if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) + chip->irq_eoi(data); +} + /** - * handle_fasteoi_irq - irq handler for transparent controllers - * @desc: the interrupt description structure for this irq + * handle_fasteoi_irq - irq handler for transparent controllers + * @desc: the interrupt description structure for this irq * - * Only a single callback will be issued to the chip: an ->eoi() - * call when the interrupt has been serviced. This enables support - * for modern forms of interrupt handlers, which handle the flow - * details in hardware, transparently. + * Only a single callback will be issued to the chip: an ->eoi() call when + * the interrupt has been serviced. This enables support for modern forms + * of interrupt handlers, which handle the flow details in hardware, + * transparently. */ void handle_fasteoi_irq(struct irq_desc *desc) { struct irq_chip *chip = desc->irq_data.chip; - raw_spin_lock(&desc->lock); - - if (!irq_may_run(desc)) - goto out; - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + guard(raw_spinlock)(&desc->lock); /* - * If its disabled or no action available - * then mask it and get out of here: + * When an affinity change races with IRQ handling, the next interrupt + * can arrive on the new CPU before the original CPU has completed + * handling the previous one - it may need to be resent. */ - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; + if (!irq_can_handle_pm(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; + cond_eoi_irq(chip, &desc->irq_data); + return; + } + + if (!irq_can_handle_actions(desc)) { mask_irq(desc); - goto out; + cond_eoi_irq(chip, &desc->irq_data); + return; } kstat_incr_irqs_this_cpu(desc); @@ -716,12 +765,11 @@ void handle_fasteoi_irq(struct irq_desc *desc) cond_unmask_eoi_irq(desc, chip); - raw_spin_unlock(&desc->lock); - return; -out: - if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) - chip->irq_eoi(&desc->irq_data); - raw_spin_unlock(&desc->lock); + /* + * When the race described above happens this will resend the interrupt. + */ + if (unlikely(desc->istate & IRQS_PENDING)) + check_irq_resend(desc, false); } EXPORT_SYMBOL_GPL(handle_fasteoi_irq); @@ -759,40 +807,27 @@ void handle_fasteoi_nmi(struct irq_desc *desc) EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); /** - * handle_edge_irq - edge type IRQ handler - * @desc: the interrupt description structure for this irq + * handle_edge_irq - edge type IRQ handler + * @desc: the interrupt description structure for this irq * - * Interrupt occurs on the falling and/or rising edge of a hardware - * signal. The occurrence is latched into the irq controller hardware - * and must be acked in order to be reenabled. After the ack another - * interrupt can happen on the same source even before the first one - * is handled by the associated event handler. If this happens it - * might be necessary to disable (mask) the interrupt depending on the - * controller hardware. This requires to reenable the interrupt inside - * of the loop which handles the interrupts which have arrived while - * the handler was running. If all pending interrupts are handled, the - * loop is left. + * Interrupt occurs on the falling and/or rising edge of a hardware + * signal. The occurrence is latched into the irq controller hardware and + * must be acked in order to be reenabled. After the ack another interrupt + * can happen on the same source even before the first one is handled by + * the associated event handler. If this happens it might be necessary to + * disable (mask) the interrupt depending on the controller hardware. This + * requires to reenable the interrupt inside of the loop which handles the + * interrupts which have arrived while the handler was running. If all + * pending interrupts are handled, the loop is left. */ void handle_edge_irq(struct irq_desc *desc) { - raw_spin_lock(&desc->lock); - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - - if (!irq_may_run(desc)) { - desc->istate |= IRQS_PENDING; - mask_ack_irq(desc); - goto out_unlock; - } + guard(raw_spinlock)(&desc->lock); - /* - * If its disabled or no action available then mask it and get - * out of here. - */ - if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { + if (!irq_can_handle(desc)) { desc->istate |= IRQS_PENDING; mask_ack_irq(desc); - goto out_unlock; + return; } kstat_incr_irqs_this_cpu(desc); @@ -803,7 +838,7 @@ void handle_edge_irq(struct irq_desc *desc) do { if (unlikely(!desc->action)) { mask_irq(desc); - goto out_unlock; + return; } /* @@ -819,61 +854,10 @@ void handle_edge_irq(struct irq_desc *desc) handle_irq_event(desc); - } while ((desc->istate & IRQS_PENDING) && - !irqd_irq_disabled(&desc->irq_data)); - -out_unlock: - raw_spin_unlock(&desc->lock); + } while ((desc->istate & IRQS_PENDING) && !irqd_irq_disabled(&desc->irq_data)); } EXPORT_SYMBOL(handle_edge_irq); -#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER -/** - * handle_edge_eoi_irq - edge eoi type IRQ handler - * @desc: the interrupt description structure for this irq - * - * Similar as the above handle_edge_irq, but using eoi and w/o the - * mask/unmask logic. - */ -void handle_edge_eoi_irq(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - - raw_spin_lock(&desc->lock); - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - - if (!irq_may_run(desc)) { - desc->istate |= IRQS_PENDING; - goto out_eoi; - } - - /* - * If its disabled or no action available then mask it and get - * out of here. - */ - if (irqd_irq_disabled(&desc->irq_data) || !desc->action) { - desc->istate |= IRQS_PENDING; - goto out_eoi; - } - - kstat_incr_irqs_this_cpu(desc); - - do { - if (unlikely(!desc->action)) - goto out_eoi; - - handle_irq_event(desc); - - } while ((desc->istate & IRQS_PENDING) && - !irqd_irq_disabled(&desc->irq_data)); - -out_eoi: - chip->irq_eoi(&desc->irq_data); - raw_spin_unlock(&desc->lock); -} -#endif - /** * handle_percpu_irq - Per CPU local irq handler * @desc: the interrupt description structure for this irq @@ -913,8 +897,9 @@ void handle_percpu_irq(struct irq_desc *desc) void handle_percpu_devid_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct irqaction *action = desc->action; unsigned int irq = irq_desc_get_irq(desc); + unsigned int cpu = smp_processor_id(); + struct irqaction *action; irqreturn_t res; /* @@ -926,12 +911,15 @@ void handle_percpu_devid_irq(struct irq_desc *desc) if (chip->irq_ack) chip->irq_ack(&desc->irq_data); + for (action = desc->action; action; action = action->next) + if (cpumask_test_cpu(cpu, action->affinity)) + break; + if (likely(action)) { trace_irq_handler_entry(irq, action); res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); trace_irq_handler_exit(irq, action, res); } else { - unsigned int cpu = smp_processor_id(); bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); if (enabled) @@ -945,31 +933,6 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -/** - * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu - * dev ids - * @desc: the interrupt description structure for this irq - * - * Similar to handle_fasteoi_nmi, but handling the dev_id cookie - * as a percpu pointer. - */ -void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) -{ - struct irq_chip *chip = irq_desc_get_chip(desc); - struct irqaction *action = desc->action; - unsigned int irq = irq_desc_get_irq(desc); - irqreturn_t res; - - __kstat_incr_irqs_this_cpu(desc); - - trace_irq_handler_entry(irq, action); - res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); - trace_irq_handler_exit(irq, action, res); - - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); -} - static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) @@ -1009,8 +972,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, if (desc->irq_data.chip != &no_irq_chip) mask_ack_irq(desc); irq_state_set_disabled(desc); - if (is_chained) + if (is_chained) { desc->action = NULL; + WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc))); + } desc->depth = 1; } desc->handle_irq = handle; @@ -1036,44 +1001,33 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = &chained_action; + WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc))); irq_activate_and_startup(desc, IRQ_RESEND); } } -void -__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, - const char *name) +void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, + const char *name) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); - - if (!desc) - return; - - __irq_do_set_handler(desc, handle, is_chained, name); - irq_put_desc_busunlock(desc, flags); + scoped_irqdesc_get_and_buslock(irq, 0) + __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name); } EXPORT_SYMBOL_GPL(__irq_set_handler); -void -irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, - void *data) +void irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, + void *data) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, 0); - - if (!desc) - return; - - desc->irq_common_data.handler_data = data; - __irq_do_set_handler(desc, handle, 1, NULL); + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; - irq_put_desc_busunlock(desc, flags); + desc->irq_common_data.handler_data = data; + __irq_do_set_handler(desc, handle, 1, NULL); + } } EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name) { irq_set_chip(irq, chip); @@ -1083,40 +1037,34 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { - unsigned long flags, trigger, tmp; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return; - - /* - * Warn when a driver sets the no autoenable flag on an already - * active interrupt. - */ - WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN)); - - irq_settings_clr_and_set(desc, clr, set); + scoped_irqdesc_get_and_lock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + unsigned long trigger, tmp; + /* + * Warn when a driver sets the no autoenable flag on an already + * active interrupt. + */ + WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN)); - trigger = irqd_get_trigger_type(&desc->irq_data); + irq_settings_clr_and_set(desc, clr, set); - irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | - IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); - if (irq_settings_has_no_balance_set(desc)) - irqd_set(&desc->irq_data, IRQD_NO_BALANCING); - if (irq_settings_is_per_cpu(desc)) - irqd_set(&desc->irq_data, IRQD_PER_CPU); - if (irq_settings_can_move_pcntxt(desc)) - irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); - if (irq_settings_is_level(desc)) - irqd_set(&desc->irq_data, IRQD_LEVEL); + trigger = irqd_get_trigger_type(&desc->irq_data); - tmp = irq_settings_get_trigger_mask(desc); - if (tmp != IRQ_TYPE_NONE) - trigger = tmp; + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | + IRQD_TRIGGER_MASK | IRQD_LEVEL); + if (irq_settings_has_no_balance_set(desc)) + irqd_set(&desc->irq_data, IRQD_NO_BALANCING); + if (irq_settings_is_per_cpu(desc)) + irqd_set(&desc->irq_data, IRQD_PER_CPU); + if (irq_settings_is_level(desc)) + irqd_set(&desc->irq_data, IRQD_LEVEL); - irqd_set(&desc->irq_data, trigger); + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; - irq_put_desc_unlock(desc, flags); + irqd_set(&desc->irq_data, trigger); + } } EXPORT_SYMBOL_GPL(irq_modify_status); @@ -1129,25 +1077,21 @@ EXPORT_SYMBOL_GPL(irq_modify_status); */ void irq_cpu_online(void) { - struct irq_desc *desc; - struct irq_chip *chip; - unsigned long flags; unsigned int irq; for_each_active_irq(irq) { - desc = irq_to_desc(irq); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_chip *chip; + if (!desc) continue; - raw_spin_lock_irqsave(&desc->lock, flags); - + guard(raw_spinlock_irqsave)(&desc->lock); chip = irq_data_get_irq_chip(&desc->irq_data); if (chip && chip->irq_cpu_online && (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || !irqd_irq_disabled(&desc->irq_data))) chip->irq_cpu_online(&desc->irq_data); - - raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -1159,25 +1103,21 @@ void irq_cpu_online(void) */ void irq_cpu_offline(void) { - struct irq_desc *desc; - struct irq_chip *chip; - unsigned long flags; unsigned int irq; for_each_active_irq(irq) { - desc = irq_to_desc(irq); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_chip *chip; + if (!desc) continue; - raw_spin_lock_irqsave(&desc->lock, flags); - + guard(raw_spinlock_irqsave)(&desc->lock); chip = irq_data_get_irq_chip(&desc->irq_data); if (chip && chip->irq_cpu_offline && (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || !irqd_irq_disabled(&desc->irq_data))) chip->irq_cpu_offline(&desc->irq_data); - - raw_spin_unlock_irqrestore(&desc->lock, flags); } } #endif @@ -1186,102 +1126,69 @@ void irq_cpu_offline(void) #ifdef CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS /** - * handle_fasteoi_ack_irq - irq handler for edge hierarchy - * stacked on transparent controllers + * handle_fasteoi_ack_irq - irq handler for edge hierarchy stacked on + * transparent controllers * - * @desc: the interrupt description structure for this irq + * @desc: the interrupt description structure for this irq * - * Like handle_fasteoi_irq(), but for use with hierarchy where - * the irq_chip also needs to have its ->irq_ack() function - * called. + * Like handle_fasteoi_irq(), but for use with hierarchy where the irq_chip + * also needs to have its ->irq_ack() function called. */ void handle_fasteoi_ack_irq(struct irq_desc *desc) { struct irq_chip *chip = desc->irq_data.chip; - raw_spin_lock(&desc->lock); - - if (!irq_may_run(desc)) - goto out; + guard(raw_spinlock)(&desc->lock); - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); + if (!irq_can_handle_pm(desc)) { + cond_eoi_irq(chip, &desc->irq_data); + return; + } - /* - * If its disabled or no action available - * then mask it and get out of here: - */ - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; + if (unlikely(!irq_can_handle_actions(desc))) { mask_irq(desc); - goto out; + cond_eoi_irq(chip, &desc->irq_data); + return; } kstat_incr_irqs_this_cpu(desc); if (desc->istate & IRQS_ONESHOT) mask_irq(desc); - /* Start handling the irq */ desc->irq_data.chip->irq_ack(&desc->irq_data); handle_irq_event(desc); cond_unmask_eoi_irq(desc, chip); - - raw_spin_unlock(&desc->lock); - return; -out: - if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) - chip->irq_eoi(&desc->irq_data); - raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_fasteoi_ack_irq); /** - * handle_fasteoi_mask_irq - irq handler for level hierarchy - * stacked on transparent controllers + * handle_fasteoi_mask_irq - irq handler for level hierarchy stacked on + * transparent controllers * - * @desc: the interrupt description structure for this irq + * @desc: the interrupt description structure for this irq * - * Like handle_fasteoi_irq(), but for use with hierarchy where - * the irq_chip also needs to have its ->irq_mask_ack() function - * called. + * Like handle_fasteoi_irq(), but for use with hierarchy where the irq_chip + * also needs to have its ->irq_mask_ack() function called. */ void handle_fasteoi_mask_irq(struct irq_desc *desc) { struct irq_chip *chip = desc->irq_data.chip; - raw_spin_lock(&desc->lock); + guard(raw_spinlock)(&desc->lock); mask_ack_irq(desc); - if (!irq_may_run(desc)) - goto out; - - desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); - - /* - * If its disabled or no action available - * then mask it and get out of here: - */ - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { - desc->istate |= IRQS_PENDING; - mask_irq(desc); - goto out; + if (!irq_can_handle(desc)) { + cond_eoi_irq(chip, &desc->irq_data); + return; } kstat_incr_irqs_this_cpu(desc); - if (desc->istate & IRQS_ONESHOT) - mask_irq(desc); handle_irq_event(desc); cond_unmask_eoi_irq(desc, chip); - - raw_spin_unlock(&desc->lock); - return; -out: - if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) - chip->irq_eoi(&desc->irq_data); - raw_spin_unlock(&desc->lock); } EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); @@ -1332,6 +1239,43 @@ int irq_chip_get_parent_state(struct irq_data *data, EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); /** + * irq_chip_shutdown_parent - Shutdown the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_shutdown() callback of the parent if available or falls + * back to irq_chip_disable_parent(). + */ +void irq_chip_shutdown_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_shutdown) + parent->chip->irq_shutdown(parent); + else + irq_chip_disable_parent(data); +} +EXPORT_SYMBOL_GPL(irq_chip_shutdown_parent); + +/** + * irq_chip_startup_parent - Startup the parent interrupt + * @data: Pointer to interrupt specific data + * + * Invokes the irq_startup() callback of the parent if available or falls + * back to irq_chip_enable_parent(). + */ +unsigned int irq_chip_startup_parent(struct irq_data *data) +{ + struct irq_data *parent = data->parent_data; + + if (parent->chip->irq_startup) + return parent->chip->irq_startup(parent); + + irq_chip_enable_parent(data); + return 0; +} +EXPORT_SYMBOL_GPL(irq_chip_startup_parent); + +/** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) * @data: Pointer to interrupt specific data @@ -1516,7 +1460,8 @@ int irq_chip_request_resources_parent(struct irq_data *data) if (data->chip->irq_request_resources) return data->chip->irq_request_resources(data); - return -ENOSYS; + /* no error on missing optional irq_chip::irq_request_resources */ + return 0; } EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); @@ -1558,6 +1503,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return 0; } +static struct device *irq_get_pm_device(struct irq_data *data) +{ + if (data->domain) + return data->domain->pm_dev; + + return NULL; +} + /** * irq_chip_pm_get - Enable power for an IRQ chip * @data: Pointer to interrupt specific data @@ -1567,17 +1520,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ int irq_chip_pm_get(struct irq_data *data) { - int retval; + struct device *dev = irq_get_pm_device(data); + int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { - retval = pm_runtime_get_sync(data->chip->parent_device); - if (retval < 0) { - pm_runtime_put_noidle(data->chip->parent_device); - return retval; - } - } + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_resume_and_get(dev); - return 0; + return retval; } /** @@ -1590,10 +1539,11 @@ int irq_chip_pm_get(struct irq_data *data) */ int irq_chip_pm_put(struct irq_data *data) { + struct device *dev = irq_get_pm_device(data); int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) - retval = pm_runtime_put(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_put(dev); return (retval < 0) ? retval : 0; } diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 39a41c56ad4f..755346ea9819 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -37,7 +37,7 @@ static inline bool irq_needs_fixup(struct irq_data *d) * has been removed from the online mask already. */ if (cpumask_any_but(m, cpu) < nr_cpu_ids && - cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) { + !cpumask_intersects(m, cpu_online_mask)) { /* * If this happens then there was a missed IRQ fixup at some * point. Warn about it and enforce fixup. @@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + + /* * No move required, if: * - Interrupt is per cpu * - Interrupt is not started @@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* - * Complete an eventually pending irq move cleanup. If this - * interrupt was moved in hard irq context, then the vectors need - * to be cleaned up. It can't wait until this interrupt actually - * happens and this CPU was involved. - */ - irq_force_complete_move(desc); - - /* * If there is a setaffinity pending, then try to reuse the pending * mask, so the last change of the affinity does not get lost. If * there is no move pending or the pending mask does not contain @@ -110,7 +110,7 @@ static bool migrate_one_irq(struct irq_desc *desc) if (maskchip && chip->irq_mask) chip->irq_mask(d); - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(affinity, cpu_online_mask)) { /* * If the interrupt is managed, then shut it down and leave * the affinity untouched. @@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_desc *desc) * CPU. */ err = irq_do_set_affinity(d, affinity, false); + + /* + * If there are online CPUs in the affinity mask, but they have no + * vectors left to make the migration work, try to break the + * affinity by migrating to any online CPU. + */ + if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) { + pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n", + d->irq, cpumask_pr_args(affinity)); + + affinity = cpu_online_mask; + brokeaff = true; + + err = irq_do_set_affinity(d, affinity, false); + } + if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err); @@ -161,9 +177,8 @@ void irq_migrate_all_off_this_cpu(void) bool affinity_broken; desc = irq_to_desc(irq); - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); + scoped_guard(raw_spinlock, &desc->lock) + affinity_broken = migrate_one_irq(desc); if (affinity_broken) { pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n", @@ -176,10 +191,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu) { const struct cpumask *hk_mask; - if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) + if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) return false; - hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask)) return false; @@ -195,10 +210,8 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - if (irqd_is_managed_and_shutdown(data)) { - irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - return; - } + if (irqd_is_managed_and_shutdown(data)) + irq_startup_managed(desc); /* * If the interrupt can only be directed to a single target @@ -223,9 +236,8 @@ int irq_affinity_online_cpu(unsigned int cpu) irq_lock_sparse(); for_each_active_irq(irq) { desc = irq_to_desc(irq); - raw_spin_lock_irq(&desc->lock); - irq_restore_affinity_of_irq(desc, cpu); - raw_spin_unlock_irq(&desc->lock); + scoped_guard(raw_spinlock_irq, &desc->lock) + irq_restore_affinity_of_irq(desc, cpu); } irq_unlock_sparse(); diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index e4cff358b437..3527defd2890 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -9,14 +9,8 @@ static struct dentry *irq_dir; -struct irq_bit_descr { - unsigned int mask; - char *name; -}; -#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } - -static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, - const struct irq_bit_descr *sd, int size) +void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size) { int i; @@ -30,7 +24,7 @@ static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); - struct cpumask *msk; + const struct cpumask *msk; msk = irq_data_get_affinity_mask(data); seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk)); @@ -58,6 +52,8 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_IMMUTABLE), + BIT_MASK_DESCR(IRQCHIP_MOVE_DEFERRED), }; static void @@ -69,8 +65,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) seq_printf(m, "chip: None\n"); return; } - seq_printf(m, "%*schip: %s\n", ind, "", chip->name); - seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + seq_printf(m, "%*schip: ", ind, ""); + if (chip->irq_print_chip) + chip->irq_print_chip(data, m); + else + seq_printf(m, "%s", chip->name); + seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags); irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, ARRAY_SIZE(irqchip_flags)); } @@ -109,14 +109,12 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_NO_BALANCING), BIT_MASK_DESCR(IRQD_SINGLE_TARGET), - BIT_MASK_DESCR(IRQD_MOVE_PCNTXT), BIT_MASK_DESCR(IRQD_AFFINITY_SET), BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), - BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), @@ -128,6 +126,8 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), + + BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS), }; static const struct irq_bit_descr irqdesc_states[] = { @@ -160,7 +160,7 @@ static int irq_debug_show(struct seq_file *m, void *p) struct irq_desc *desc = m->private; struct irq_data *data; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); data = irq_desc_get_irq_data(desc); seq_printf(m, "handler: %ps\n", desc->handle_irq); seq_printf(m, "device: %s\n", desc->dev_name); @@ -178,7 +178,6 @@ static int irq_debug_show(struct seq_file *m, void *p) seq_printf(m, "node: %d\n", irq_data_get_node(data)); irq_debug_show_masks(m, desc); irq_debug_show_data(m, data, 0); - raw_spin_unlock_irq(&desc->lock); return 0; } @@ -226,12 +225,12 @@ void irq_debugfs_copy_devname(int irq, struct device *dev) void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc) { - char name [10]; + char name [12]; if (!irq_dir || !desc || desc->debugfs_file) return; - sprintf(name, "%d", irq); + sprintf(name, "%u", irq); desc->debugfs_file = debugfs_create_file(name, 0644, irq_dir, desc, &dfs_irq_ops); } diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index f6e5515ee077..b41188698622 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/device.h> #include <linux/gfp.h> #include <linux/irq.h> @@ -29,29 +30,22 @@ static int devm_irq_match(struct device *dev, void *res, void *data) return this->irq == match->irq && this->dev_id == match->dev_id; } -/** - * devm_request_threaded_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @thread_fn: function to be called in a threaded interrupt context. NULL - * for devices which handle everything in @handler - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function - * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_threaded_irq(). IRQs requested with this function will be - * automatically freed on driver detach. - * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. - */ -int devm_request_threaded_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, irq_handler_t thread_fn, - unsigned long irqflags, const char *devname, - void *dev_id) +static int devm_request_result(struct device *dev, int rc, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + const char *devname) +{ + if (rc >= 0) + return rc; + + return dev_err_probe(dev, rc, "request_irq(%u) %ps %ps %s\n", + irq, handler, thread_fn, devname ? : ""); +} + +static int __devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + irq_handler_t thread_fn, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -77,28 +71,48 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq, return 0; } -EXPORT_SYMBOL(devm_request_threaded_irq); /** - * devm_request_any_context_irq - allocate an interrupt line for a managed device - * @dev: device to request interrupt for - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device, dev_name(dev) if NULL - * @dev_id: A cookie passed back to the handler function + * devm_request_threaded_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for devices which handle everything in @handler + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function * - * Except for the extra @dev argument, this function takes the - * same arguments and performs the same function as - * request_any_context_irq(). IRQs requested with this function will be - * automatically freed on driver detach. + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_threaded_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. * - * If an IRQ allocated with this function needs to be freed - * separately, devm_free_irq() must be used. + * Return: 0 on success or a negative error number. */ -int devm_request_any_context_irq(struct device *dev, unsigned int irq, - irq_handler_t handler, unsigned long irqflags, - const char *devname, void *dev_id) +int devm_request_threaded_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, + void *dev_id) +{ + int rc = __devm_request_threaded_irq(dev, irq, handler, thread_fn, + irqflags, devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, thread_fn, devname); +} +EXPORT_SYMBOL(devm_request_threaded_irq); + +static int __devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) { struct irq_devres *dr; int rc; @@ -123,6 +137,40 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq, return rc; } + +/** + * devm_request_any_context_irq - allocate an interrupt line for a managed device with error logging + * @dev: Device to request interrupt for + * @irq: Interrupt line to allocate + * @handler: Function to be called when the interrupt occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * Except for the extra @dev argument, this function takes the same + * arguments and performs the same function as request_any_context_irq(). + * Interrupts requested with this function will be automatically freed on + * driver detach. + * + * If an interrupt allocated with this function needs to be freed + * separately, devm_free_irq() must be used. + * + * When the request fails, an error message is printed with contextual + * information (device name, interrupt number, handler functions and + * error code). Don't add extra error messages at the call sites. + * + * Return: IRQC_IS_HARDIRQ or IRQC_IS_NESTED on success, or a negative error + * number. + */ +int devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id) +{ + int rc = __devm_request_any_context_irq(dev, irq, handler, irqflags, + devname, dev_id); + + return devm_request_result(dev, rc, irq, handler, NULL, devname); +} EXPORT_SYMBOL(devm_request_any_context_irq); /** @@ -140,9 +188,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) { struct irq_devres match_data = { irq, dev_id }; - WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, + WARN_ON(devres_release(dev, devm_irq_release, devm_irq_match, &match_data)); - free_irq(irq, dev_id); } EXPORT_SYMBOL(devm_free_irq); @@ -282,3 +329,43 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, } EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip); #endif /* CONFIG_GENERIC_IRQ_CHIP */ + +#ifdef CONFIG_IRQ_DOMAIN +static void devm_irq_domain_remove(struct device *dev, void *res) +{ + struct irq_domain **domain = res; + + irq_domain_remove(*domain); +} + +/** + * devm_irq_domain_instantiate() - Instantiate a new irq domain data for a + * managed device. + * @dev: Device to instantiate the domain for + * @info: Domain information pointer pointing to the information for this + * domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *devm_irq_domain_instantiate(struct device *dev, + const struct irq_domain_info *info) +{ + struct irq_domain *domain; + struct irq_domain **dr; + + dr = devres_alloc(devm_irq_domain_remove, sizeof(*dr), GFP_KERNEL); + if (!dr) + return ERR_PTR(-ENOMEM); + + domain = irq_domain_instantiate(info); + if (!IS_ERR(domain)) { + *dr = domain; + devres_add(dev, dr); + } else { + devres_free(dr); + } + + return domain; +} +EXPORT_SYMBOL_GPL(devm_irq_domain_instantiate); +#endif /* CONFIG_IRQ_DOMAIN */ diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index f0862eb6b506..3cd0c40282c0 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -40,10 +40,9 @@ void irq_gc_mask_disable_reg(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.disable); *ct->mask_cache &= ~mask; - irq_gc_unlock(gc); } EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg); @@ -60,10 +59,9 @@ void irq_gc_mask_set_bit(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); *ct->mask_cache |= mask; irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); - irq_gc_unlock(gc); } EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); @@ -80,10 +78,9 @@ void irq_gc_mask_clr_bit(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); *ct->mask_cache &= ~mask; irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); - irq_gc_unlock(gc); } EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); @@ -100,10 +97,9 @@ void irq_gc_unmask_enable_reg(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.enable); *ct->mask_cache |= mask; - irq_gc_unlock(gc); } EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg); @@ -117,9 +113,8 @@ void irq_gc_ack_set_bit(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); } EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); @@ -133,9 +128,8 @@ void irq_gc_ack_clr_bit(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = ~d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); } /** @@ -156,12 +150,12 @@ void irq_gc_mask_disable_and_ack_set(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.disable); *ct->mask_cache &= ~mask; irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); } +EXPORT_SYMBOL_GPL(irq_gc_mask_disable_and_ack_set); /** * irq_gc_eoi - EOI interrupt @@ -173,9 +167,8 @@ void irq_gc_eoi(struct irq_data *d) struct irq_chip_type *ct = irq_data_get_chip_type(d); u32 mask = d->mask; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); irq_reg_writel(gc, mask, ct->regs.eoi); - irq_gc_unlock(gc); } /** @@ -195,12 +188,11 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on) if (!(mask & gc->wake_enabled)) return -EINVAL; - irq_gc_lock(gc); + guard(raw_spinlock)(&gc->lock); if (on) gc->wake_active |= mask; else gc->wake_active &= ~mask; - irq_gc_unlock(gc); return 0; } EXPORT_SYMBOL_GPL(irq_gc_set_wake); @@ -219,11 +211,15 @@ void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, int num_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler) { + struct irq_chip_type *ct = gc->chip_types; + int i; + raw_spin_lock_init(&gc->lock); gc->num_ct = num_ct; gc->irq_base = irq_base; gc->reg_base = reg_base; - gc->chip_types->chip.name = name; + for (i = 0; i < num_ct; i++) + ct[i].chip.name = name; gc->chip_types->handler = handler; } @@ -272,51 +268,45 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) } /** - * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain - * @d: irq domain for which to allocate chips - * @irqs_per_chip: Number of interrupts each chip handles (max 32) - * @num_ct: Number of irq_chip_type instances associated with this - * @name: Name of the irq chip - * @handler: Default flow handler associated with these chips - * @clr: IRQ_* bits to clear in the mapping function - * @set: IRQ_* bits to set in the mapping function - * @gcflags: Generic chip specific setup flags + * irq_domain_alloc_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @info: Generic chip information + * + * Return: 0 on success, negative error code on failure */ -int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags gcflags) +int irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info) { struct irq_domain_chip_generic *dgc; struct irq_chip_generic *gc; - unsigned long flags; int numchips, i; size_t dgc_sz; size_t gc_sz; size_t sz; void *tmp; + int ret; if (d->gc) return -EBUSY; - numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); + numchips = DIV_ROUND_UP(d->revmap_size, info->irqs_per_chip); if (!numchips) return -EINVAL; /* Allocate a pointer, generic chip and chiptypes for each chip */ - gc_sz = struct_size(gc, chip_types, num_ct); + gc_sz = struct_size(gc, chip_types, info->num_ct); dgc_sz = struct_size(dgc, gc, numchips); sz = dgc_sz + numchips * gc_sz; tmp = dgc = kzalloc(sz, GFP_KERNEL); if (!dgc) return -ENOMEM; - dgc->irqs_per_chip = irqs_per_chip; + dgc->irqs_per_chip = info->irqs_per_chip; dgc->num_chips = numchips; - dgc->irq_flags_to_set = set; - dgc->irq_flags_to_clear = clr; - dgc->gc_flags = gcflags; + dgc->irq_flags_to_set = info->irq_flags_to_set; + dgc->irq_flags_to_clear = info->irq_flags_to_clear; + dgc->gc_flags = info->gc_flags; + dgc->exit = info->exit; d->gc = dgc; /* Calc pointer to the first generic chip */ @@ -324,22 +314,91 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, for (i = 0; i < numchips; i++) { /* Store the pointer to the generic chip */ dgc->gc[i] = gc = tmp; - irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, - NULL, handler); + irq_init_generic_chip(gc, info->name, info->num_ct, + i * dgc->irqs_per_chip, NULL, + info->handler); gc->domain = d; - if (gcflags & IRQ_GC_BE_IO) { + if (dgc->gc_flags & IRQ_GC_BE_IO) { gc->reg_readl = &irq_readl_be; gc->reg_writel = &irq_writel_be; } - raw_spin_lock_irqsave(&gc_lock, flags); - list_add_tail(&gc->list, &gc_list); - raw_spin_unlock_irqrestore(&gc_lock, flags); + if (info->init) { + ret = info->init(gc); + if (ret) + goto err; + } + + scoped_guard (raw_spinlock_irqsave, &gc_lock) + list_add_tail(&gc->list, &gc_list); /* Calc pointer to the next generic chip */ tmp += gc_sz; } return 0; + +err: + while (i--) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + } + d->gc = NULL; + kfree(dgc); + return ret; +} +EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips); + +/** + * irq_domain_remove_generic_chips - Remove generic chips from an irq domain + * @d: irq domain for which generic chips are to be removed + */ +void irq_domain_remove_generic_chips(struct irq_domain *d) +{ + struct irq_domain_chip_generic *dgc = d->gc; + unsigned int i; + + if (!dgc) + return; + + for (i = 0; i < dgc->num_chips; i++) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + } + d->gc = NULL; + kfree(dgc); +} +EXPORT_SYMBOL_GPL(irq_domain_remove_generic_chips); + +/** + * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with this + * @name: Name of the irq chip + * @handler: Default flow handler associated with these chips + * @clr: IRQ_* bits to clear in the mapping function + * @set: IRQ_* bits to set in the mapping function + * @gcflags: Generic chip specific setup flags + */ +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags gcflags) +{ + struct irq_domain_chip_generic_info info = { + .irqs_per_chip = irqs_per_chip, + .num_ct = num_ct, + .name = name, + .handler = handler, + .irq_flags_to_clear = clr, + .irq_flags_to_set = set, + .gc_flags = gcflags, + }; + + return irq_domain_alloc_generic_chips(d, &info); } EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); @@ -389,7 +448,6 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, struct irq_chip_generic *gc; struct irq_chip_type *ct; struct irq_chip *chip; - unsigned long flags; int idx; gc = __irq_get_domain_generic_chip(d, hw_irq); @@ -409,9 +467,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, /* We only init the cache for the first mapping of a generic chip */ if (!gc->installed) { - raw_spin_lock_irqsave(&gc->lock, flags); + guard(raw_spinlock_irqsave)(&gc->lock); irq_gc_init_mask_cache(gc, dgc->gc_flags); - raw_spin_unlock_irqrestore(&gc->lock, flags); } /* Mark the interrupt as installed */ @@ -431,7 +488,7 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, return 0; } -static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) +void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) { struct irq_data *data = irq_domain_get_irq_data(d, virq); struct irq_domain_chip_generic *dgc = d->gc; @@ -478,9 +535,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, struct irq_chip *chip = &ct->chip; unsigned int i; - raw_spin_lock(&gc_lock); - list_add_tail(&gc->list, &gc_list); - raw_spin_unlock(&gc_lock); + scoped_guard (raw_spinlock, &gc_lock) + list_add_tail(&gc->list, &gc_list); irq_gc_init_mask_cache(gc, flags); @@ -544,21 +600,33 @@ EXPORT_SYMBOL_GPL(irq_setup_alt_chip); void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set) { - unsigned int i = gc->irq_base; + unsigned int i, virq; - raw_spin_lock(&gc_lock); - list_del(&gc->list); - raw_spin_unlock(&gc_lock); + scoped_guard (raw_spinlock, &gc_lock) + list_del(&gc->list); - for (; msk; msk >>= 1, i++) { + for (i = 0; msk; msk >>= 1, i++) { if (!(msk & 0x01)) continue; + /* + * Interrupt domain based chips store the base hardware + * interrupt number in gc::irq_base. Otherwise gc::irq_base + * contains the base Linux interrupt number. + */ + if (gc->domain) { + virq = irq_find_mapping(gc->domain, gc->irq_base + i); + if (!virq) + continue; + } else { + virq = gc->irq_base + i; + } + /* Remove handler first. That will mask the irq line */ - irq_set_handler(i, NULL); - irq_set_chip(i, &no_irq_chip); - irq_set_chip_data(i, NULL); - irq_modify_status(i, clr, set); + irq_set_handler(virq, NULL); + irq_set_chip(virq, &no_irq_chip); + irq_set_chip_data(virq, NULL); + irq_modify_status(virq, clr, set); } } EXPORT_SYMBOL_GPL(irq_remove_generic_chip); @@ -582,7 +650,7 @@ static struct irq_data *irq_gc_get_irq_data(struct irq_chip_generic *gc) } #ifdef CONFIG_PM -static int irq_gc_suspend(void) +static int irq_gc_suspend(void *data) { struct irq_chip_generic *gc; @@ -602,7 +670,7 @@ static int irq_gc_suspend(void) return 0; } -static void irq_gc_resume(void) +static void irq_gc_resume(void *data) { struct irq_chip_generic *gc; @@ -625,7 +693,7 @@ static void irq_gc_resume(void) #define irq_gc_resume NULL #endif -static void irq_gc_shutdown(void) +static void irq_gc_shutdown(void *data) { struct irq_chip_generic *gc; @@ -641,15 +709,19 @@ static void irq_gc_shutdown(void) } } -static struct syscore_ops irq_gc_syscore_ops = { +static const struct syscore_ops irq_gc_syscore_ops = { .suspend = irq_gc_suspend, .resume = irq_gc_resume, .shutdown = irq_gc_shutdown, }; +static struct syscore irq_gc_syscore = { + .ops = &irq_gc_syscore_ops, +}; + static int __init irq_gc_init_ops(void) { - register_syscore_ops(&irq_gc_syscore_ops); + register_syscore(&irq_gc_syscore); return 0; } device_initcall(irq_gc_init_ops); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 9489f93b3db3..786f5570a640 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,7 +133,53 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) */ atomic_inc(&desc->threads_active); - wake_up_process(action->thread); + /* + * This might be a premature wakeup before the thread reached the + * thread function and set the IRQTF_READY bit. It's waiting in + * kthread code with state UNINTERRUPTIBLE. Once it reaches the + * thread function it waits with INTERRUPTIBLE. The wakeup is not + * lost in that case because the thread is guaranteed to observe + * the RUN flag before it goes to sleep in wait_for_interrupt(). + */ + wake_up_state(action->thread, TASK_INTERRUPTIBLE); +} + +static DEFINE_STATIC_KEY_FALSE(irqhandler_duration_check_enabled); +static u64 irqhandler_duration_threshold_ns __ro_after_init; + +static int __init irqhandler_duration_check_setup(char *arg) +{ + unsigned long val; + int ret; + + ret = kstrtoul(arg, 0, &val); + if (ret) { + pr_err("Unable to parse irqhandler.duration_warn_us setting: ret=%d\n", ret); + return 0; + } + + if (!val) { + pr_err("Invalid irqhandler.duration_warn_us setting, must be > 0\n"); + return 0; + } + + irqhandler_duration_threshold_ns = val * 1000; + static_branch_enable(&irqhandler_duration_check_enabled); + + return 1; +} +__setup("irqhandler.duration_warn_us=", irqhandler_duration_check_setup); + +static inline void irqhandler_duration_check(u64 ts_start, unsigned int irq, + const struct irqaction *action) +{ + u64 delta_ns = local_clock() - ts_start; + + if (unlikely(delta_ns > irqhandler_duration_threshold_ns)) { + pr_warn_ratelimited("[CPU%u] long duration of IRQ[%u:%ps], took: %llu us\n", + smp_processor_id(), irq, action->handler, + div_u64(delta_ns, NSEC_PER_USEC)); + } } irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) @@ -155,7 +201,16 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) lockdep_hardirq_threaded(); trace_irq_handler_entry(irq, action); - res = action->handler(irq, action->dev_id); + + if (static_branch_unlikely(&irqhandler_duration_check_enabled)) { + u64 ts_start = local_clock(); + + res = action->handler(irq, action->dev_id); + irqhandler_duration_check(ts_start, irq, action); + } else { + res = action->handler(irq, action->dev_id); + } + trace_irq_handler_exit(irq, action, res); if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 99cbdf55a8bd..0164ca48da59 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,14 +12,15 @@ #include <linux/sched/clock.h> #ifdef CONFIG_SPARSE_IRQ -# define IRQ_BITMAP_BITS (NR_IRQS + 8196) +# define MAX_SPARSE_IRQS INT_MAX #else -# define IRQ_BITMAP_BITS NR_IRQS +# define MAX_SPARSE_IRQS NR_IRQS #endif #define istate core_internal_state__do_not_mess_with_it extern bool noirqdebug; +extern int irq_poll_cpu; extern struct irqaction chained_action; @@ -29,12 +30,14 @@ extern struct irqaction chained_action; * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready */ enum { IRQTF_RUNTHREAD, IRQTF_WARNED, IRQTF_AFFINITY, IRQTF_FORCED_THREAD, + IRQTF_READY, }; /* @@ -45,11 +48,15 @@ enum { * detection * IRQS_POLL_INPROGRESS - polling in progress * IRQS_ONESHOT - irq is not unmasked in primary handler - * IRQS_REPLAY - irq is replayed + * IRQS_REPLAY - irq has been resent and will not be resent + * again until the handler has run and cleared + * this flag. * IRQS_WAITING - irq is waiting - * IRQS_PENDING - irq is pending and replayed later + * IRQS_PENDING - irq needs to be resent and should be resent + * at the next available opportunity. * IRQS_SUSPENDED - irq is suspended * IRQS_NMI - irq line is used to deliver NMIs + * IRQS_SYSFS - descriptor has been added to sysfs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -62,6 +69,7 @@ enum { IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, IRQS_NMI = 0x00002000, + IRQS_SYSFS = 0x00004000, }; #include "debug.h" @@ -80,10 +88,10 @@ extern void __enable_irq(struct irq_desc *desc); extern int irq_activate(struct irq_desc *desc); extern int irq_activate_and_startup(struct irq_desc *desc, bool resend); extern int irq_startup(struct irq_desc *desc, bool resend, bool force); +extern void irq_startup_managed(struct irq_desc *desc); extern void irq_shutdown(struct irq_desc *desc); extern void irq_shutdown_and_deactivate(struct irq_desc *desc); -extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); @@ -97,21 +105,18 @@ static inline void irq_mark_irq(unsigned int irq) { } extern void irq_mark_irq(unsigned int irq); #endif -extern int __irq_get_irqchip_state(struct irq_data *data, - enum irqchip_irq_state which, - bool *state); - -extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); - irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event(struct irq_desc *desc); /* Resending of interrupts :*/ int check_irq_resend(struct irq_desc *desc, bool inject); -bool irq_wait_for_poll(struct irq_desc *desc); +void clear_irq_resend(struct irq_desc *desc); +void irq_resend_init(struct irq_desc *desc); void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); +void wake_threads_waitq(struct irq_desc *desc); + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); @@ -128,8 +133,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern void irq_set_thread_affinity(struct irq_desc *desc); - extern int irq_do_set_affinity(struct irq_data *data, const struct cpumask *dest, bool force); @@ -139,6 +142,10 @@ extern int irq_setup_affinity(struct irq_desc *desc); static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } #endif + +#define for_each_action_of_desc(desc, act) \ + for (act = desc->action; act; act = act->next) + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { @@ -158,38 +165,33 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc) #define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) #define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) -#define for_each_action_of_desc(desc, act) \ - for (act = desc->action; act; act = act->next) - -struct irq_desc * -__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, - unsigned int check); +struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check); void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); -static inline struct irq_desc * -irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check) -{ - return __irq_get_desc_lock(irq, flags, true, check); -} +__DEFINE_CLASS_IS_CONDITIONAL(irqdesc_lock, true); +__DEFINE_UNLOCK_GUARD(irqdesc_lock, struct irq_desc, + __irq_put_desc_unlock(_T->lock, _T->flags, _T->bus), + unsigned long flags; bool bus); -static inline void -irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags) +static inline class_irqdesc_lock_t class_irqdesc_lock_constructor(unsigned int irq, bool bus, + unsigned int check) { - __irq_put_desc_unlock(desc, flags, true); -} + class_irqdesc_lock_t _t = { .bus = bus, }; -static inline struct irq_desc * -irq_get_desc_lock(unsigned int irq, unsigned long *flags, unsigned int check) -{ - return __irq_get_desc_lock(irq, flags, false, check); -} + _t.lock = __irq_get_desc_lock(irq, &_t.flags, bus, check); -static inline void -irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) -{ - __irq_put_desc_unlock(desc, flags, false); + return _t; } +#define scoped_irqdesc_get_and_lock(_irq, _check) \ + scoped_guard(irqdesc_lock, _irq, false, _check) + +#define scoped_irqdesc_get_and_buslock(_irq, _check) \ + scoped_guard(irqdesc_lock, _irq, true, _check) + +#define scoped_irqdesc ((struct irq_desc *)(__guard_ptr(irqdesc_lock)(&scope))) + #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) static inline unsigned int irqd_get(struct irq_data *d) @@ -249,7 +251,7 @@ static inline void irq_state_set_masked(struct irq_desc *desc) static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc) { - __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(desc->kstat_irqs->cnt); __this_cpu_inc(kstat.irqs_sum); } @@ -269,12 +271,17 @@ static inline int irq_desc_is_chained(struct irq_desc *desc) return (desc->action && desc->action == &chained_action); } +static inline bool irq_is_nmi(struct irq_desc *desc) +{ + return desc->istate & IRQS_NMI; +} + #ifdef CONFIG_PM_SLEEP -bool irq_pm_check_wakeup(struct irq_desc *desc); +void irq_pm_handle_wakeup(struct irq_desc *desc); void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action); #else -static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; } +static inline void irq_pm_handle_wakeup(struct irq_desc *desc) { } static inline void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { } static inline void @@ -405,7 +412,7 @@ irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, #ifdef CONFIG_GENERIC_PENDING_IRQ static inline bool irq_can_move_pcntxt(struct irq_data *data) { - return irqd_can_move_in_process_context(data); + return !(data->chip->flags & IRQCHIP_MOVE_DEFERRED); } static inline bool irq_move_pending(struct irq_data *data) { @@ -425,11 +432,8 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) { return desc->pending_mask; } -static inline bool handle_enforce_irqctx(struct irq_data *data) -{ - return irqd_is_handle_enforce_irqctx(data); -} bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); +void irq_force_complete_move(struct irq_desc *desc); #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) { @@ -455,10 +459,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; } -static inline bool handle_enforce_irqctx(struct irq_data *data) -{ - return false; -} +static inline void irq_force_complete_move(struct irq_desc *desc) { } #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) @@ -485,6 +486,16 @@ static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd) #ifdef CONFIG_GENERIC_IRQ_DEBUGFS #include <linux/debugfs.h> +struct irq_bit_descr { + unsigned int mask; + char *name; +}; + +#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } + +void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size); + void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); static inline void irq_remove_debugfs_entry(struct irq_desc *desc) { diff --git a/kernel/irq/ipi-mux.c b/kernel/irq/ipi-mux.c new file mode 100644 index 000000000000..fa4fc18c6131 --- /dev/null +++ b/kernel/irq/ipi-mux.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Multiplex several virtual IPIs over a single HW IPI. + * + * Copyright The Asahi Linux Contributors + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "ipi-mux: " fmt +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> +#include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/smp.h> + +struct ipi_mux_cpu { + atomic_t enable; + atomic_t bits; +}; + +static struct ipi_mux_cpu __percpu *ipi_mux_pcpu; +static struct irq_domain *ipi_mux_domain; +static void (*ipi_mux_send)(unsigned int cpu); + +static void ipi_mux_mask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + + atomic_andnot(BIT(irqd_to_hwirq(d)), &icpu->enable); +} + +static void ipi_mux_unmask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + + atomic_or(ibit, &icpu->enable); + + /* + * The atomic_or() above must complete before the atomic_read() + * below to avoid racing ipi_mux_send_mask(). + */ + smp_mb__after_atomic(); + + /* If a pending IPI was unmasked, raise a parent IPI immediately. */ + if (atomic_read(&icpu->bits) & ibit) + ipi_mux_send(smp_processor_id()); +} + +static void ipi_mux_send_mask(struct irq_data *d, const struct cpumask *mask) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + unsigned long pending; + int cpu; + + for_each_cpu(cpu, mask) { + icpu = per_cpu_ptr(ipi_mux_pcpu, cpu); + + /* + * This sequence is the mirror of the one in ipi_mux_unmask(); + * see the comment there. Additionally, release semantics + * ensure that the vIPI flag set is ordered after any shared + * memory accesses that precede it. This therefore also pairs + * with the atomic_fetch_andnot in ipi_mux_process(). + */ + pending = atomic_fetch_or_release(ibit, &icpu->bits); + + /* + * The atomic_fetch_or_release() above must complete + * before the atomic_read() below to avoid racing with + * ipi_mux_unmask(). + */ + smp_mb__after_atomic(); + + /* + * The flag writes must complete before the physical IPI is + * issued to another CPU. This is implied by the control + * dependency on the result of atomic_read() below, which is + * itself already ordered after the vIPI flag write. + */ + if (!(pending & ibit) && (atomic_read(&icpu->enable) & ibit)) + ipi_mux_send(cpu); + } +} + +static const struct irq_chip ipi_mux_chip = { + .name = "IPI Mux", + .irq_mask = ipi_mux_mask, + .irq_unmask = ipi_mux_unmask, + .ipi_send_mask = ipi_mux_send_mask, +}; + +static int ipi_mux_domain_alloc(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_set_percpu_devid(virq + i); + irq_domain_set_info(d, virq + i, i, &ipi_mux_chip, NULL, + handle_percpu_devid_irq, NULL, NULL); + } + + return 0; +} + +static const struct irq_domain_ops ipi_mux_domain_ops = { + .alloc = ipi_mux_domain_alloc, + .free = irq_domain_free_irqs_top, +}; + +/** + * ipi_mux_process - Process multiplexed virtual IPIs + */ +void ipi_mux_process(void) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + irq_hw_number_t hwirq; + unsigned long ipis; + unsigned int en; + + /* + * Reading enable mask does not need to be ordered as long as + * this function is called from interrupt handler because only + * the CPU itself can change it's own enable mask. + */ + en = atomic_read(&icpu->enable); + + /* + * Clear the IPIs we are about to handle. This pairs with the + * atomic_fetch_or_release() in ipi_mux_send_mask(). + */ + ipis = atomic_fetch_andnot(en, &icpu->bits) & en; + + for_each_set_bit(hwirq, &ipis, BITS_PER_TYPE(int)) + generic_handle_domain_irq(ipi_mux_domain, hwirq); +} + +/** + * ipi_mux_create - Create virtual IPIs multiplexed on top of a single + * parent IPI. + * @nr_ipi: number of virtual IPIs to create. This should + * be <= BITS_PER_TYPE(int) + * @mux_send: callback to trigger parent IPI for a particular CPU + * + * Returns first virq of the newly created virtual IPIs upon success + * or <=0 upon failure + */ +int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu)) +{ + struct fwnode_handle *fwnode; + struct irq_domain *domain; + int rc; + + if (ipi_mux_domain) + return -EEXIST; + + if (BITS_PER_TYPE(int) < nr_ipi || !mux_send) + return -EINVAL; + + ipi_mux_pcpu = alloc_percpu(typeof(*ipi_mux_pcpu)); + if (!ipi_mux_pcpu) + return -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("IPI-Mux"); + if (!fwnode) { + pr_err("unable to create IPI Mux fwnode\n"); + rc = -ENOMEM; + goto fail_free_cpu; + } + + domain = irq_domain_create_linear(fwnode, nr_ipi, + &ipi_mux_domain_ops, NULL); + if (!domain) { + pr_err("unable to add IPI Mux domain\n"); + rc = -ENOMEM; + goto fail_free_fwnode; + } + + domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE; + irq_domain_update_bus_token(domain, DOMAIN_BUS_IPI); + + rc = irq_domain_alloc_irqs(domain, nr_ipi, NUMA_NO_NODE, NULL); + if (rc <= 0) { + pr_err("unable to alloc IRQs from IPI Mux domain\n"); + goto fail_free_domain; + } + + ipi_mux_domain = domain; + ipi_mux_send = mux_send; + + return rc; + +fail_free_domain: + irq_domain_remove(domain); +fail_free_fwnode: + irq_domain_free_fwnode(fwnode); +fail_free_cpu: + free_percpu(ipi_mux_pcpu); + return rc; +} diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 08ce7da3b57c..961d4af76af3 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -115,11 +115,11 @@ free_descs: int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) { struct irq_data *data = irq_get_irq_data(irq); - struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; + const struct cpumask *ipimask; struct irq_domain *domain; unsigned int nr_irqs; - if (!irq || !data || !ipimask) + if (!irq || !data) return -EINVAL; domain = data->domain; @@ -131,7 +131,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) return -EINVAL; } - if (WARN_ON(!cpumask_subset(dest, ipimask))) + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || WARN_ON(!cpumask_subset(dest, ipimask))) /* * Must be destroying a subset of CPUs to which this IPI * was set up to target @@ -162,12 +163,13 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) { struct irq_data *data = irq_get_irq_data(irq); - struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; + const struct cpumask *ipimask; - if (!data || !ipimask || cpu >= nr_cpu_ids) + if (!data || cpu >= nr_cpu_ids) return INVALID_HWIRQ; - if (!cpumask_test_cpu(cpu, ipimask)) + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || !cpumask_test_cpu(cpu, ipimask)) return INVALID_HWIRQ; /* @@ -186,9 +188,9 @@ EXPORT_SYMBOL_GPL(ipi_get_hwirq); static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, const struct cpumask *dest, unsigned int cpu) { - struct cpumask *ipimask = irq_data_get_affinity_mask(data); + const struct cpumask *ipimask; - if (!chip || !ipimask) + if (!chip || !data) return -EINVAL; if (!chip->ipi_send_single && !chip->ipi_send_mask) @@ -197,6 +199,10 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, if (cpu >= nr_cpu_ids) return -EINVAL; + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask) + return -EINVAL; + if (dest) { if (!cpumask_subset(dest, ipimask)) return -EINVAL; diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 0cd02efa3a74..ae4c9cbd1b4b 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -4,22 +4,23 @@ * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com> */ +#include <linux/cleanup.h> +#include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irq_sim.h> #include <linux/irq_work.h> -#include <linux/interrupt.h> #include <linux/slab.h> struct irq_sim_work_ctx { struct irq_work work; - int irq_base; unsigned int irq_count; unsigned long *pending; struct irq_domain *domain; + struct irq_sim_ops ops; + void *user_data; }; struct irq_sim_irq_ctx { - int irqnum; bool enabled; struct irq_sim_work_ctx *work_ctx; }; @@ -88,6 +89,31 @@ static int irq_sim_set_irqchip_state(struct irq_data *data, return 0; } +static int irq_sim_request_resources(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx; + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + if (work_ctx->ops.irq_sim_irq_requested) + return work_ctx->ops.irq_sim_irq_requested(work_ctx->domain, + hwirq, + work_ctx->user_data); + + return 0; +} + +static void irq_sim_release_resources(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx; + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + if (work_ctx->ops.irq_sim_irq_released) + work_ctx->ops.irq_sim_irq_released(work_ctx->domain, hwirq, + work_ctx->user_data); +} + static struct irq_chip irq_sim_irqchip = { .name = "irq_sim", .irq_mask = irq_sim_irqmask, @@ -95,6 +121,8 @@ static struct irq_chip irq_sim_irqchip = { .irq_set_type = irq_sim_set_type, .irq_get_irqchip_state = irq_sim_get_irqchip_state, .irq_set_irqchip_state = irq_sim_set_irqchip_state, + .irq_request_resources = irq_sim_request_resources, + .irq_release_resources = irq_sim_release_resources, }; static void irq_sim_handle_irq(struct irq_work *work) @@ -164,35 +192,42 @@ static const struct irq_domain_ops irq_sim_domain_ops = { struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, unsigned int num_irqs) { - struct irq_sim_work_ctx *work_ctx; + return irq_domain_create_sim_full(fwnode, num_irqs, NULL, NULL); +} +EXPORT_SYMBOL_GPL(irq_domain_create_sim); + +struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data) +{ + struct irq_sim_work_ctx *work_ctx __free(kfree) = + kzalloc(sizeof(*work_ctx), GFP_KERNEL); - work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL); if (!work_ctx) - goto err_out; + return ERR_PTR(-ENOMEM); - work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL); - if (!work_ctx->pending) - goto err_free_work_ctx; + unsigned long *pending __free(bitmap) = bitmap_zalloc(num_irqs, GFP_KERNEL); + if (!pending) + return ERR_PTR(-ENOMEM); work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs, &irq_sim_domain_ops, work_ctx); if (!work_ctx->domain) - goto err_free_bitmap; + return ERR_PTR(-ENOMEM); work_ctx->irq_count = num_irqs; - init_irq_work(&work_ctx->work, irq_sim_handle_irq); + work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq); + work_ctx->pending = no_free_ptr(pending); + work_ctx->user_data = data; - return work_ctx->domain; + if (ops) + memcpy(&work_ctx->ops, ops, sizeof(*ops)); -err_free_bitmap: - bitmap_free(work_ctx->pending); -err_free_work_ctx: - kfree(work_ctx); -err_out: - return ERR_PTR(-ENOMEM); + return no_free_ptr(work_ctx)->domain; } -EXPORT_SYMBOL_GPL(irq_domain_create_sim); +EXPORT_SYMBOL_GPL(irq_domain_create_sim_full); /** * irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free @@ -234,10 +269,22 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev, struct fwnode_handle *fwnode, unsigned int num_irqs) { + return devm_irq_domain_create_sim_full(dev, fwnode, num_irqs, + NULL, NULL); +} +EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); + +struct irq_domain * +devm_irq_domain_create_sim_full(struct device *dev, + struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data) +{ struct irq_domain *domain; int ret; - domain = irq_domain_create_sim(fwnode, num_irqs); + domain = irq_domain_create_sim_full(fwnode, num_irqs, ops, data); if (IS_ERR(domain)) return domain; @@ -247,4 +294,4 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev, return domain; } -EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); +EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim_full); diff --git a/kernel/irq/irq_test.c b/kernel/irq/irq_test.c new file mode 100644 index 000000000000..e2d31914b3c4 --- /dev/null +++ b/kernel/irq/irq_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: LGPL-2.1+ + +#include <linux/cleanup.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> +#include <linux/irqdomain.h> +#include <linux/nodemask.h> +#include <kunit/test.h> + +#include "internals.h" + +static irqreturn_t noop_handler(int irq, void *data) +{ + return IRQ_HANDLED; +} + +static void noop(struct irq_data *data) { } +static unsigned int noop_ret(struct irq_data *data) { return 0; } + +static int noop_affinity(struct irq_data *data, const struct cpumask *dest, + bool force) +{ + irq_data_update_effective_affinity(data, dest); + + return 0; +} + +static struct irq_chip fake_irq_chip = { + .name = "fake", + .irq_startup = noop_ret, + .irq_shutdown = noop, + .irq_enable = noop, + .irq_disable = noop, + .irq_ack = noop, + .irq_mask = noop, + .irq_unmask = noop, + .irq_set_affinity = noop_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static int irq_test_setup_fake_irq(struct kunit *test, struct irq_affinity_desc *affd) +{ + struct irq_desc *desc; + int virq; + + virq = irq_domain_alloc_descs(-1, 1, 0, NUMA_NO_NODE, affd); + KUNIT_ASSERT_GE(test, virq, 0); + + irq_set_chip_and_handler(virq, &fake_irq_chip, handle_simple_irq); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + /* On some architectures, IRQs are NOREQUEST | NOPROBE by default. */ + irq_settings_clr_norequest(desc); + + return virq; +} + +static void irq_disable_depth_test(struct kunit *test) +{ + struct irq_desc *desc; + int virq, ret; + + virq = irq_test_setup_fake_irq(test, NULL); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + disable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + enable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + free_irq(virq, NULL); +} + +static void irq_free_disabled_test(struct kunit *test) +{ + struct irq_desc *desc; + int virq, ret; + + virq = irq_test_setup_fake_irq(test, NULL); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + disable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + free_irq(virq, NULL); + KUNIT_EXPECT_GE(test, desc->depth, 1); + + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + free_irq(virq, NULL); +} + +static void irq_shutdown_depth_test(struct kunit *test) +{ + struct irq_desc *desc; + struct irq_data *data; + int virq, ret; + struct irq_affinity_desc affinity = { + .is_managed = 1, + .mask = CPU_MASK_ALL, + }; + + if (!IS_ENABLED(CONFIG_SMP)) + kunit_skip(test, "requires CONFIG_SMP for managed shutdown"); + + virq = irq_test_setup_fake_irq(test, &affinity); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + data = irq_desc_get_irq_data(desc); + KUNIT_ASSERT_PTR_NE(test, data, NULL); + + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); + KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data)); + + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + disable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_shutdown_and_deactivate(desc); + + KUNIT_EXPECT_FALSE(test, irqd_is_activated(data)); + KUNIT_EXPECT_FALSE(test, irqd_is_started(data)); + + KUNIT_EXPECT_EQ(test, irq_activate(desc), 0); +#ifdef CONFIG_SMP + irq_startup_managed(desc); +#endif + + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + enable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + free_irq(virq, NULL); +} + +static void irq_cpuhotplug_test(struct kunit *test) +{ + struct irq_desc *desc; + struct irq_data *data; + int virq, ret; + struct irq_affinity_desc affinity = { + .is_managed = 1, + }; + + if (!IS_ENABLED(CONFIG_SMP)) + kunit_skip(test, "requires CONFIG_SMP for CPU hotplug"); + if (!get_cpu_device(1)) + kunit_skip(test, "requires more than 1 CPU for CPU hotplug"); + if (!cpu_is_hotpluggable(1)) + kunit_skip(test, "CPU 1 must be hotpluggable"); + if (!cpu_online(1)) + kunit_skip(test, "CPU 1 must be online"); + + cpumask_copy(&affinity.mask, cpumask_of(1)); + + virq = irq_test_setup_fake_irq(test, &affinity); + + desc = irq_to_desc(virq); + KUNIT_ASSERT_PTR_NE(test, desc, NULL); + + data = irq_desc_get_irq_data(desc); + KUNIT_ASSERT_PTR_NE(test, data, NULL); + + ret = request_irq(virq, noop_handler, 0, "test_irq", NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); + KUNIT_EXPECT_TRUE(test, irqd_affinity_is_managed(data)); + + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + disable_irq(virq); + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + KUNIT_EXPECT_EQ(test, remove_cpu(1), 0); + KUNIT_EXPECT_GE(test, desc->depth, 1); + KUNIT_EXPECT_EQ(test, add_cpu(1), 0); + + KUNIT_EXPECT_EQ(test, desc->depth, 1); + + enable_irq(virq); + KUNIT_EXPECT_TRUE(test, irqd_is_activated(data)); + KUNIT_EXPECT_TRUE(test, irqd_is_started(data)); + KUNIT_EXPECT_EQ(test, desc->depth, 0); + + free_irq(virq, NULL); +} + +static struct kunit_case irq_test_cases[] = { + KUNIT_CASE(irq_disable_depth_test), + KUNIT_CASE(irq_free_disabled_test), + KUNIT_CASE(irq_shutdown_depth_test), + KUNIT_CASE(irq_cpuhotplug_test), + {} +}; + +static struct kunit_suite irq_test_suite = { + .name = "irq_test_cases", + .test_cases = irq_test_cases, +}; + +kunit_test_suite(irq_test_suite); +MODULE_DESCRIPTION("IRQ unit test suite"); +MODULE_LICENSE("GPL"); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 2267e6527db3..6acf268f005b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -12,10 +12,10 @@ #include <linux/export.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> -#include <linux/radix-tree.h> -#include <linux/bitmap.h> +#include <linux/maple_tree.h> #include <linux/irqdomain.h> #include <linux/sysfs.h> +#include <linux/string_choices.h> #include "internals.h" @@ -93,11 +93,23 @@ static void desc_smp_init(struct irq_desc *desc, int node, #endif } +static void free_masks(struct irq_desc *desc) +{ +#ifdef CONFIG_GENERIC_PENDING_IRQ + free_cpumask_var(desc->pending_mask); +#endif + free_cpumask_var(desc->irq_common_data.affinity); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif +} + #else static inline int alloc_masks(struct irq_desc *desc, int node) { return 0; } static inline void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { } +static inline void free_masks(struct irq_desc *desc) { } #endif static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, @@ -123,15 +135,106 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, desc->name = NULL; desc->owner = owner; for_each_possible_cpu(cpu) - *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { }; desc_smp_init(desc, node, affinity); } -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL_GPL(nr_irqs); +static unsigned int nr_irqs = NR_IRQS; + +/** + * irq_get_nr_irqs() - Number of interrupts supported by the system. + */ +unsigned int irq_get_nr_irqs(void) +{ + return nr_irqs; +} +EXPORT_SYMBOL_GPL(irq_get_nr_irqs); + +/** + * irq_set_nr_irqs() - Set the number of interrupts supported by the system. + * @nr: New number of interrupts. + * + * Return: @nr. + */ +unsigned int irq_set_nr_irqs(unsigned int nr) +{ + nr_irqs = nr; + + return nr; +} +EXPORT_SYMBOL_GPL(irq_set_nr_irqs); static DEFINE_MUTEX(sparse_irq_lock); -static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); +static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs, + MT_FLAGS_ALLOC_RANGE | + MT_FLAGS_LOCK_EXTERN | + MT_FLAGS_USE_RCU, + sparse_irq_lock); + +static int irq_find_free_area(unsigned int from, unsigned int cnt) +{ + MA_STATE(mas, &sparse_irqs, 0, 0); + + if (mas_empty_area(&mas, from, MAX_SPARSE_IRQS, cnt)) + return -ENOSPC; + return mas.index; +} + +static unsigned int irq_find_at_or_after(unsigned int offset) +{ + unsigned long index = offset; + struct irq_desc *desc; + + guard(rcu)(); + desc = mt_find(&sparse_irqs, &index, nr_irqs); + + return desc ? irq_desc_get_irq(desc) : nr_irqs; +} + +static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + WARN_ON(mas_store_gfp(&mas, desc, GFP_KERNEL) != 0); +} + +static void delete_irq_desc(unsigned int irq) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + mas_erase(&mas); +} + +#ifdef CONFIG_SPARSE_IRQ +static const struct kobj_type irq_kobj_type; +#endif + +static int init_desc(struct irq_desc *desc, int irq, int node, + unsigned int flags, + const struct cpumask *affinity, + struct module *owner) +{ + desc->kstat_irqs = alloc_percpu(struct irqstat); + if (!desc->kstat_irqs) + return -ENOMEM; + + if (alloc_masks(desc, node)) { + free_percpu(desc->kstat_irqs); + return -ENOMEM; + } + + raw_spin_lock_init(&desc->lock); + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + mutex_init(&desc->request_mutex); + init_waitqueue_head(&desc->wait_for_threads); + desc_set_defaults(irq, desc, node, affinity, owner); + irqd_set(&desc->irq_data, flags); + irq_resend_init(desc); +#ifdef CONFIG_SPARSE_IRQ + kobject_init(&desc->kobj, &irq_kobj_type); + init_rcu_head(&desc->rcu); +#endif + + return 0; +} #ifdef CONFIG_SPARSE_IRQ @@ -143,8 +246,7 @@ static struct kobject *irq_kobj_base; #define IRQ_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) -static ssize_t per_cpu_count_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t per_cpu_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); ssize_t ret = 0; @@ -154,113 +256,83 @@ static ssize_t per_cpu_count_show(struct kobject *kobj, for_each_possible_cpu(cpu) { unsigned int c = irq_desc_kstat_cpu(desc, cpu); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); + ret += sysfs_emit_at(buf, ret, "%s%u", p, c); p = ","; } - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } IRQ_ATTR_RO(per_cpu_count); -static ssize_t chip_name_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t chip_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - raw_spin_lock_irq(&desc->lock); - if (desc->irq_data.chip && desc->irq_data.chip->name) { - ret = scnprintf(buf, PAGE_SIZE, "%s\n", - desc->irq_data.chip->name); - } - raw_spin_unlock_irq(&desc->lock); - - return ret; + guard(raw_spinlock_irq)(&desc->lock); + if (desc->irq_data.chip && desc->irq_data.chip->name) + return sysfs_emit(buf, "%s\n", desc->irq_data.chip->name); + return 0; } IRQ_ATTR_RO(chip_name); -static ssize_t hwirq_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t hwirq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.domain) - ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq); - raw_spin_unlock_irq(&desc->lock); - - return ret; + return sysfs_emit(buf, "%lu\n", desc->irq_data.hwirq); + return 0; } IRQ_ATTR_RO(hwirq); -static ssize_t type_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - - raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", - irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); - raw_spin_unlock_irq(&desc->lock); - return ret; + guard(raw_spinlock_irq)(&desc->lock); + return sysfs_emit(buf, "%s\n", irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); } IRQ_ATTR_RO(type); -static ssize_t wakeup_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t wakeup_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - - raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", - irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled"); - raw_spin_unlock_irq(&desc->lock); - - return ret; + guard(raw_spinlock_irq)(&desc->lock); + return sysfs_emit(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); } IRQ_ATTR_RO(wakeup); -static ssize_t name_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->name) - ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name); - raw_spin_unlock_irq(&desc->lock); - - return ret; + return sysfs_emit(buf, "%s\n", desc->name); + return 0; } IRQ_ATTR_RO(name); -static ssize_t actions_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t actions_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); struct irqaction *action; ssize_t ret = 0; char *p = ""; - raw_spin_lock_irq(&desc->lock); - for (action = desc->action; action != NULL; action = action->next) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", - p, action->name); - p = ","; + scoped_guard(raw_spinlock_irq, &desc->lock) { + for_each_action_of_desc(desc, action) { + ret += sysfs_emit_at(buf, ret, "%s%s", p, action->name); + p = ","; + } } - raw_spin_unlock_irq(&desc->lock); if (ret) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); - + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } IRQ_ATTR_RO(actions); @@ -277,7 +349,7 @@ static struct attribute *irq_attrs[] = { }; ATTRIBUTE_GROUPS(irq); -static struct kobj_type irq_kobj_type = { +static const struct kobj_type irq_kobj_type = { .release = irq_kobj_release, .sysfs_ops = &kobj_sysfs_ops, .default_groups = irq_groups, @@ -288,22 +360,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) if (irq_kobj_base) { /* * Continue even in case of failure as this is nothing - * crucial. + * crucial and failures in the late irq_sysfs_init() + * cannot be rolled back. */ if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) pr_warn("Failed to add kobject for irq %d\n", irq); + else + desc->istate |= IRQS_SYSFS; } } static void irq_sysfs_del(struct irq_desc *desc) { /* - * If irq_sysfs_init() has not yet been invoked (early boot), then - * irq_kobj_base is NULL and the descriptor was never added. - * kobject_del() complains about a object with no parent, so make - * it conditional. + * Only invoke kobject_del() when kobject_add() was successfully + * invoked for the descriptor. This covers both early boot, where + * sysfs is not initialized yet, and the case of a failed + * kobject_add() invocation. */ - if (irq_kobj_base) + if (desc->istate & IRQS_SYSFS) kobject_del(&desc->kobj); } @@ -313,26 +388,21 @@ static int __init irq_sysfs_init(void) int irq; /* Prevent concurrent irq alloc/free */ - irq_lock_sparse(); - + guard(mutex)(&sparse_irq_lock); irq_kobj_base = kobject_create_and_add("irq", kernel_kobj); - if (!irq_kobj_base) { - irq_unlock_sparse(); + if (!irq_kobj_base) return -ENOMEM; - } /* Add the already allocated interrupts */ for_each_irq_desc(irq, desc) irq_sysfs_add(irq, desc); - irq_unlock_sparse(); - return 0; } postcore_initcall(irq_sysfs_init); #else /* !CONFIG_SYSFS */ -static struct kobj_type irq_kobj_type = { +static const struct kobj_type irq_kobj_type = { .release = irq_kobj_release, }; @@ -341,41 +411,14 @@ static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ -static RADIX_TREE(irq_desc_tree, GFP_KERNEL); - -static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) -{ - radix_tree_insert(&irq_desc_tree, irq, desc); -} - struct irq_desc *irq_to_desc(unsigned int irq) { - return radix_tree_lookup(&irq_desc_tree, irq); + return mtree_load(&sparse_irqs, irq); } #ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE EXPORT_SYMBOL_GPL(irq_to_desc); #endif -static void delete_irq_desc(unsigned int irq) -{ - radix_tree_delete(&irq_desc_tree, irq); -} - -#ifdef CONFIG_SMP -static void free_masks(struct irq_desc *desc) -{ -#ifdef CONFIG_GENERIC_PENDING_IRQ - free_cpumask_var(desc->pending_mask); -#endif - free_cpumask_var(desc->irq_common_data.affinity); -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - free_cpumask_var(desc->irq_common_data.effective_affinity); -#endif -} -#else -static inline void free_masks(struct irq_desc *desc) { } -#endif - void irq_lock_sparse(void) { mutex_lock(&sparse_irq_lock); @@ -391,34 +434,19 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, struct module *owner) { struct irq_desc *desc; + int ret; desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node); if (!desc) return NULL; - /* allocate based on nr_cpu_ids */ - desc->kstat_irqs = alloc_percpu(unsigned int); - if (!desc->kstat_irqs) - goto err_desc; - - if (alloc_masks(desc, node)) - goto err_kstat; - - raw_spin_lock_init(&desc->lock); - lockdep_set_class(&desc->lock, &irq_desc_lock_class); - mutex_init(&desc->request_mutex); - init_rcu_head(&desc->rcu); - desc_set_defaults(irq, desc, node, affinity, owner); - irqd_set(&desc->irq_data, flags); - kobject_init(&desc->kobj, &irq_kobj_type); + ret = init_desc(desc, irq, node, flags, affinity, owner); + if (unlikely(ret)) { + kfree(desc); + return NULL; + } return desc; - -err_kstat: - free_percpu(desc->kstat_irqs); -err_desc: - kfree(desc); - return NULL; } static void irq_kobj_release(struct kobject *kobj) @@ -489,6 +517,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; @@ -501,7 +530,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, irq_sysfs_add(start + i, desc); irq_add_debugfs_entry(start + i, desc); } - bitmap_set(allocated_irqs, start, cnt); return start; err: @@ -510,12 +538,12 @@ err: return -ENOMEM; } -static int irq_expand_nr_irqs(unsigned int nr) +static bool irq_expand_nr_irqs(unsigned int nr) { - if (nr > IRQ_BITMAP_BITS) - return -ENOMEM; + if (nr > MAX_SPARSE_IRQS) + return false; nr_irqs = nr; - return 0; + return true; } int __init early_irq_init(void) @@ -530,18 +558,17 @@ int __init early_irq_init(void) printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n", NR_IRQS, nr_irqs, initcnt); - if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) - nr_irqs = IRQ_BITMAP_BITS; + if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS)) + nr_irqs = MAX_SPARSE_IRQS; - if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) - initcnt = IRQ_BITMAP_BITS; + if (WARN_ON(initcnt > MAX_SPARSE_IRQS)) + initcnt = MAX_SPARSE_IRQS; if (initcnt > nr_irqs) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { desc = alloc_desc(i, node, 0, NULL, NULL); - set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } return arch_early_irq_init(); @@ -560,24 +587,29 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { int __init early_irq_init(void) { int count, i, node = first_online_node; - struct irq_desc *desc; + int ret; init_irq_default_affinity(); printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS); - desc = irq_desc; count = ARRAY_SIZE(irq_desc); for (i = 0; i < count; i++) { - desc[i].kstat_irqs = alloc_percpu(unsigned int); - alloc_masks(&desc[i], node); - raw_spin_lock_init(&desc[i].lock); - lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - mutex_init(&desc[i].request_mutex); - desc_set_defaults(i, &desc[i], node, NULL, NULL); + ret = init_desc(irq_desc + i, i, node, 0, NULL, NULL); + if (unlikely(ret)) + goto __free_desc_res; } + return arch_early_irq_init(); + +__free_desc_res: + while (--i >= 0) { + free_masks(irq_desc + i); + free_percpu(irq_desc[i].kstat_irqs); + } + + return ret; } struct irq_desc *irq_to_desc(unsigned int irq) @@ -589,11 +621,10 @@ EXPORT_SYMBOL(irq_to_desc); static void free_desc(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); + delete_irq_desc(irq); } static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, @@ -606,29 +637,21 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, struct irq_desc *desc = irq_to_desc(start + i); desc->owner = owner; + irq_insert_desc(start + i, desc); } - bitmap_set(allocated_irqs, start, cnt); return start; } -static int irq_expand_nr_irqs(unsigned int nr) +static inline bool irq_expand_nr_irqs(unsigned int nr) { - return -ENOMEM; + return false; } void irq_mark_irq(unsigned int irq) { - mutex_lock(&sparse_irq_lock); - bitmap_set(allocated_irqs, irq, 1); - mutex_unlock(&sparse_irq_lock); -} - -#ifdef CONFIG_GENERIC_IRQ_LEGACY -void irq_init_desc(unsigned int irq) -{ - free_desc(irq); + guard(mutex)(&sparse_irq_lock); + irq_insert_desc(irq, irq_desc + irq); } -#endif #endif /* !CONFIG_SPARSE_IRQ */ @@ -640,7 +663,7 @@ int handle_irq_desc(struct irq_desc *desc) return -EINVAL; data = irq_desc_get_irq_data(desc); - if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + if (WARN_ON_ONCE(!in_hardirq() && irqd_is_handle_enforce_irqctx(data))) return -EPERM; generic_handle_irq_desc(desc); @@ -662,6 +685,29 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +/** + * generic_handle_irq_safe - Invoke the handler for a particular irq from any + * context. + * @irq: The irq number to handle + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_irq_safe(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_to_desc(irq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); + #ifdef CONFIG_IRQ_DOMAIN /** * generic_handle_domain_irq - Invoke the handler for a HW irq belonging @@ -676,11 +722,34 @@ EXPORT_SYMBOL_GPL(generic_handle_irq); */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - WARN_ON_ONCE(!in_irq()); return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); + /** + * generic_handle_irq_safe - Invoke the handler for a HW irq belonging + * to a domain from any context. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process + * context). If the interrupt is marked as 'enforce IRQ-context only' then + * the function must be invoked from hard interrupt context. + */ +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq)); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe); + /** * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging * to a domain. @@ -713,12 +782,9 @@ void irq_free_descs(unsigned int from, unsigned int cnt) if (from >= nr_irqs || (from + cnt) > nr_irqs) return; - mutex_lock(&sparse_irq_lock); + guard(mutex)(&sparse_irq_lock); for (i = 0; i < cnt; i++) free_desc(from + i); - - bitmap_clear(allocated_irqs, from, cnt); - mutex_unlock(&sparse_irq_lock); } EXPORT_SYMBOL_GPL(irq_free_descs); @@ -735,11 +801,10 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * * Returns the first irq number or error code */ -int __ref -__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner, const struct irq_affinity_desc *affinity) +int __ref __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner, const struct irq_affinity_desc *affinity) { - int start, ret; + int start; if (!cnt) return -EINVAL; @@ -757,23 +822,17 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, from = arch_dynirq_lower_bound(from); } - mutex_lock(&sparse_irq_lock); + guard(mutex)(&sparse_irq_lock); - start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS, - from, cnt, 0); - ret = -EEXIST; + start = irq_find_free_area(from, cnt); if (irq >=0 && start != irq) - goto unlock; + return -EEXIST; if (start + cnt > nr_irqs) { - ret = irq_expand_nr_irqs(start + cnt); - if (ret) - goto unlock; + if (!irq_expand_nr_irqs(start + cnt)) + return -ENOMEM; } - ret = alloc_descs(start, cnt, node, affinity, owner); -unlock: - mutex_unlock(&sparse_irq_lock); - return ret; + return alloc_descs(start, cnt, node, affinity, owner); } EXPORT_SYMBOL_GPL(__irq_alloc_descs); @@ -785,30 +844,30 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs); */ unsigned int irq_get_next_irq(unsigned int offset) { - return find_next_bit(allocated_irqs, nr_irqs, offset); + return irq_find_at_or_after(offset); } -struct irq_desc * -__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, - unsigned int check) +struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; - if (desc) { - if (check & _IRQ_DESC_CHECK) { - if ((check & _IRQ_DESC_PERCPU) && - !irq_settings_is_per_cpu_devid(desc)) - return NULL; - - if (!(check & _IRQ_DESC_PERCPU) && - irq_settings_is_per_cpu_devid(desc)) - return NULL; - } + desc = irq_to_desc(irq); + if (!desc) + return NULL; + + if (check & _IRQ_DESC_CHECK) { + if ((check & _IRQ_DESC_PERCPU) && !irq_settings_is_per_cpu_devid(desc)) + return NULL; - if (bus) - chip_bus_lock(desc); - raw_spin_lock_irqsave(&desc->lock, *flags); + if (!(check & _IRQ_DESC_PERCPU) && irq_settings_is_per_cpu_devid(desc)) + return NULL; } + + if (bus) + chip_bus_lock(desc); + raw_spin_lock_irqsave(&desc->lock, *flags); + return desc; } @@ -820,15 +879,11 @@ void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus) chip_bus_sync_unlock(desc); } -int irq_set_percpu_devid_partition(unsigned int irq, - const struct cpumask *affinity) +int irq_set_percpu_devid(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - if (!desc) - return -EINVAL; - - if (desc->percpu_enabled) + if (!desc || desc->percpu_enabled) return -EINVAL; desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL); @@ -836,34 +891,10 @@ int irq_set_percpu_devid_partition(unsigned int irq, if (!desc->percpu_enabled) return -ENOMEM; - if (affinity) - desc->percpu_affinity = affinity; - else - desc->percpu_affinity = cpu_possible_mask; - irq_set_percpu_devid_flags(irq); return 0; } -int irq_set_percpu_devid(unsigned int irq) -{ - return irq_set_percpu_devid_partition(irq, NULL); -} - -int irq_get_percpu_devid_partition(unsigned int irq, struct cpumask *affinity) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (!desc || !desc->percpu_enabled) - return -EINVAL; - - if (affinity) - cpumask_copy(affinity, desc->percpu_affinity); - - return 0; -} -EXPORT_SYMBOL_GPL(irq_get_percpu_devid_partition); - void kstat_incr_irq_this_cpu(unsigned int irq) { kstat_incr_irqs_this_cpu(irq_to_desc(irq)); @@ -882,33 +913,58 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); - return desc && desc->kstat_irqs ? - *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; + return desc && desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } -static bool irq_is_nmi(struct irq_desc *desc) +static unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask) { - return desc->istate & IRQS_NMI; -} - -static unsigned int kstat_irqs(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); unsigned int sum = 0; int cpu; - if (!desc || !desc->kstat_irqs) - return 0; if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) return data_race(desc->tot_count); - for_each_possible_cpu(cpu) - sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); + for_each_cpu(cpu, cpumask) + sum += data_race(per_cpu(desc->kstat_irqs->cnt, cpu)); return sum; } +static unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->kstat_irqs) + return 0; + return kstat_irqs_desc(desc, cpu_possible_mask); +} + +#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT + +void kstat_snapshot_irqs(void) +{ + struct irq_desc *desc; + unsigned int irq; + + for_each_irq_desc(irq, desc) { + if (!desc->kstat_irqs) + continue; + this_cpu_write(desc->kstat_irqs->ref, this_cpu_read(desc->kstat_irqs->cnt)); + } +} + +unsigned int kstat_get_irq_since_snapshot(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->kstat_irqs) + return 0; + return this_cpu_read(desc->kstat_irqs->cnt) - this_cpu_read(desc->kstat_irqs->ref); +} + +#endif + /** * kstat_irqs_usr - Get the statistics for an interrupt from thread context * @irq: The interrupt number diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index bf38c546aa25..2652c4cfd877 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -25,7 +25,11 @@ static DEFINE_MUTEX(irq_domain_mutex); static struct irq_domain *irq_default_domain; +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity); static void irq_domain_check_hierarchy(struct irq_domain *domain); +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq); struct irqchip_fwid { struct fwnode_handle fwnode; @@ -107,6 +111,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode); /** * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle + * @fwnode: fwnode_handle to free * * Free a fwnode_handle allocated with irq_domain_alloc_fwnode. */ @@ -114,7 +119,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid; - if (WARN_ON(!is_fwnode_irqchip(fwnode))) + if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) return; fwid = container_of(fwnode, struct irqchip_fwid, fwnode); @@ -123,119 +128,252 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) } EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); -/** - * __irq_domain_add() - Allocate a new irq_domain data structure - * @fwnode: firmware node for the interrupt controller - * @size: Size of linear map; 0 for radix mapping only - * @hwirq_max: Maximum number of interrupts supported by controller - * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no - * direct mapping - * @ops: domain callbacks - * @host_data: Controller private data pointer - * - * Allocates and initializes an irq_domain structure. - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data) +static int alloc_name(struct irq_domain *domain, char *base, enum irq_domain_bus_token bus_token) { - struct irqchip_fwid *fwid; - struct irq_domain *domain; + if (bus_token == DOMAIN_BUS_ANY) + domain->name = kasprintf(GFP_KERNEL, "%s", base); + else + domain->name = kasprintf(GFP_KERNEL, "%s-%d", base, bus_token); + if (!domain->name) + return -ENOMEM; + + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} +static int alloc_fwnode_name(struct irq_domain *domain, const struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token, const char *suffix) +{ + const char *sep = suffix ? "-" : ""; + const char *suf = suffix ? : ""; + char *name; + + if (bus_token == DOMAIN_BUS_ANY) + name = kasprintf(GFP_KERNEL, "%pfw%s%s", fwnode, sep, suf); + else + name = kasprintf(GFP_KERNEL, "%pfw%s%s-%d", fwnode, sep, suf, bus_token); + if (!name) + return -ENOMEM; + + /* + * fwnode paths contain '/', which debugfs is legitimately unhappy + * about. Replace them with ':', which does the trick and is not as + * offensive as '\'... + */ + domain->name = strreplace(name, '/', ':'); + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} + +static int alloc_unknown_name(struct irq_domain *domain, enum irq_domain_bus_token bus_token) +{ static atomic_t unknown_domains; + int id = atomic_inc_return(&unknown_domains); - if (WARN_ON((size && direct_max) || - (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max))) - return NULL; + if (bus_token == DOMAIN_BUS_ANY) + domain->name = kasprintf(GFP_KERNEL, "unknown-%d", id); + else + domain->name = kasprintf(GFP_KERNEL, "unknown-%d-%d", id, bus_token); + if (!domain->name) + return -ENOMEM; - domain = kzalloc_node(struct_size(domain, revmap, size), - GFP_KERNEL, of_node_to_nid(to_of_node(fwnode))); - if (!domain) - return NULL; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} + +static int irq_domain_set_name(struct irq_domain *domain, const struct irq_domain_info *info) +{ + enum irq_domain_bus_token bus_token = info->bus_token; + const struct fwnode_handle *fwnode = info->fwnode; if (is_fwnode_irqchip(fwnode)) { - fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + /* + * The name_suffix is only intended to be used to avoid a name + * collision when multiple domains are created for a single + * device and the name is picked using a real device node. + * (Typical use-case is regmap-IRQ controllers for devices + * providing more than one physical IRQ.) There should be no + * need to use name_suffix with irqchip-fwnode. + */ + if (info->name_suffix) + return -EINVAL; switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: - domain->fwnode = fwnode; - domain->name = kstrdup(fwid->name, GFP_KERNEL); - if (!domain->name) { - kfree(domain); - return NULL; - } - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; - break; + return alloc_name(domain, fwid->name, bus_token); default: - domain->fwnode = fwnode; domain->name = fwid->name; - break; + if (bus_token != DOMAIN_BUS_ANY) + return alloc_name(domain, fwid->name, bus_token); } - } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || - is_software_node(fwnode)) { - char *name; - /* - * fwnode paths contain '/', which debugfs is legitimately - * unhappy about. Replace them with ':', which does - * the trick and is not as offensive as '\'... - */ - name = kasprintf(GFP_KERNEL, "%pfw", fwnode); - if (!name) { - kfree(domain); - return NULL; - } + } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || is_software_node(fwnode)) { + return alloc_fwnode_name(domain, fwnode, bus_token, info->name_suffix); + } - strreplace(name, '/', ':'); + if (domain->name) + return 0; - domain->name = name; - domain->fwnode = fwnode; - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; - } + if (fwnode) + pr_err("Invalid fwnode type for irqdomain\n"); + return alloc_unknown_name(domain, bus_token); +} - if (!domain->name) { - if (fwnode) - pr_err("Invalid fwnode type for irqdomain\n"); - domain->name = kasprintf(GFP_KERNEL, "unknown-%d", - atomic_inc_return(&unknown_domains)); - if (!domain->name) { - kfree(domain); - return NULL; - } - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; +static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info) +{ + struct irq_domain *domain; + int err; + + if (WARN_ON((info->size && info->direct_max) || + (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && info->direct_max) || + (info->direct_max && info->direct_max != info->hwirq_max))) + return ERR_PTR(-EINVAL); + + domain = kzalloc_node(struct_size(domain, revmap, info->size), + GFP_KERNEL, of_node_to_nid(to_of_node(info->fwnode))); + if (!domain) + return ERR_PTR(-ENOMEM); + + err = irq_domain_set_name(domain, info); + if (err) { + kfree(domain); + return ERR_PTR(err); } - fwnode_handle_get(fwnode); - fwnode_dev_initialized(fwnode, true); + domain->fwnode = fwnode_handle_get(info->fwnode); + fwnode_dev_initialized(domain->fwnode, true); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); - mutex_init(&domain->revmap_mutex); - domain->ops = ops; - domain->host_data = host_data; - domain->hwirq_max = hwirq_max; + domain->ops = info->ops; + domain->host_data = info->host_data; + domain->bus_token = info->bus_token; + domain->hwirq_max = info->hwirq_max; - if (direct_max) { - size = direct_max; + if (info->direct_max) domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP; - } - domain->revmap_size = size; + domain->revmap_size = info->size; + + /* + * Hierarchical domains use the domain lock of the root domain + * (innermost domain). + * + * For non-hierarchical domains (as for root domains), the root + * pointer is set to the domain itself so that &domain->root->mutex + * always points to the right lock. + */ + mutex_init(&domain->mutex); + domain->root = domain; irq_domain_check_hierarchy(domain); + return domain; +} + +static void __irq_domain_publish(struct irq_domain *domain) +{ mutex_lock(&irq_domain_mutex); debugfs_add_domain_dir(domain); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); pr_debug("Added domain %s\n", domain->name); +} + +static void irq_domain_free(struct irq_domain *domain) +{ + fwnode_dev_initialized(domain->fwnode, false); + fwnode_handle_put(domain->fwnode); + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); + kfree(domain); +} + +static void irq_domain_instantiate_descs(const struct irq_domain_info *info) +{ + if (!IS_ENABLED(CONFIG_SPARSE_IRQ)) + return; + + if (irq_alloc_descs(info->virq_base, info->virq_base, info->size, + of_node_to_nid(to_of_node(info->fwnode))) < 0) { + pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", + info->virq_base); + } +} + +static struct irq_domain *__irq_domain_instantiate(const struct irq_domain_info *info, + bool cond_alloc_descs, bool force_associate) +{ + struct irq_domain *domain; + int err; + + domain = __irq_domain_create(info); + if (IS_ERR(domain)) + return domain; + + domain->flags |= info->domain_flags; + domain->exit = info->exit; + domain->dev = info->dev; + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (info->parent) { + domain->root = info->parent->root; + domain->parent = info->parent; + } +#endif + + if (info->dgc_info) { + err = irq_domain_alloc_generic_chips(domain, info->dgc_info); + if (err) + goto err_domain_free; + } + + if (info->init) { + err = info->init(domain); + if (err) + goto err_domain_gc_remove; + } + + __irq_domain_publish(domain); + + if (cond_alloc_descs && info->virq_base > 0) + irq_domain_instantiate_descs(info); + + /* + * Legacy interrupt domains have a fixed Linux interrupt number + * associated. Other interrupt domains can request association by + * providing a Linux interrupt number > 0. + */ + if (force_associate || info->virq_base > 0) { + irq_domain_associate_many(domain, info->virq_base, info->hwirq_base, + info->size - info->hwirq_base); + } + return domain; + +err_domain_gc_remove: + if (info->dgc_info) + irq_domain_remove_generic_chips(domain); +err_domain_free: + irq_domain_free(domain); + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(__irq_domain_add); + +/** + * irq_domain_instantiate() - Instantiate a new irq domain data structure + * @info: Domain information pointer pointing to the information for this domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) +{ + return __irq_domain_instantiate(info, false, false); +} +EXPORT_SYMBOL_GPL(irq_domain_instantiate); /** * irq_domain_remove() - Remove an irq domain. @@ -247,6 +385,9 @@ EXPORT_SYMBOL_GPL(__irq_domain_add); */ void irq_domain_remove(struct irq_domain *domain) { + if (domain->exit) + domain->exit(domain); + mutex_lock(&irq_domain_mutex); debugfs_remove_domain_dir(domain); @@ -258,17 +399,15 @@ void irq_domain_remove(struct irq_domain *domain) * If the going away domain is the default one, reset it. */ if (unlikely(irq_default_domain == domain)) - irq_set_default_host(NULL); + irq_set_default_domain(NULL); mutex_unlock(&irq_domain_mutex); - pr_debug("Removed domain %s\n", domain->name); + if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC) + irq_domain_remove_generic_chips(domain); - fwnode_dev_initialized(domain->fwnode, false); - fwnode_handle_put(domain->fwnode); - if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) - kfree(domain->name); - kfree(domain); + pr_debug("Removed domain %s\n", domain->name); + irq_domain_free(domain); } EXPORT_SYMBOL_GPL(irq_domain_remove); @@ -328,55 +467,20 @@ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain *domain; - - domain = __irq_domain_add(fwnode, size, size, 0, ops, host_data); - if (!domain) - return NULL; - - if (first_irq > 0) { - if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { - /* attempt to allocated irq_descs */ - int rc = irq_alloc_descs(first_irq, first_irq, size, - of_node_to_nid(to_of_node(fwnode))); - if (rc < 0) - pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", - first_irq); - } - irq_domain_associate_many(domain, first_irq, 0, size); - } + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .virq_base = first_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *domain = __irq_domain_instantiate(&info, true, false); - return domain; + return IS_ERR(domain) ? NULL : domain; } EXPORT_SYMBOL_GPL(irq_domain_create_simple); -/** - * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. - * @of_node: pointer to interrupt controller's device tree node. - * @size: total number of irqs in legacy mapping - * @first_irq: first number of irq block assigned to the domain - * @first_hwirq: first hwirq number to use for the translation. Should normally - * be '0', but a positive integer can be used if the effective - * hwirqs numbering does not begin at zero. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer - * - * Note: the map() callback will be called before this function returns - * for all legacy interrupts except 0 (which is always the invalid irq for - * a legacy controller). - */ -struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_legacy(of_node_to_fwnode(of_node), size, - first_irq, first_hwirq, ops, host_data); -} -EXPORT_SYMBOL_GPL(irq_domain_add_legacy); - struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, @@ -384,13 +488,18 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain *domain; - - domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data); - if (domain) - irq_domain_associate_many(domain, first_irq, first_hwirq, size); + struct irq_domain_info info = { + .fwnode = fwnode, + .size = first_hwirq + size, + .hwirq_max = first_hwirq + size, + .hwirq_base = first_hwirq, + .virq_base = first_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *domain = __irq_domain_instantiate(&info, false, true); - return domain; + return IS_ERR(domain) ? NULL : domain; } EXPORT_SYMBOL_GPL(irq_domain_create_legacy); @@ -406,7 +515,8 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, struct fwnode_handle *fwnode = fwspec->fwnode; int rc; - /* We might want to match the legacy controller last since + /* + * We might want to match the legacy controller last since * it might potentially be set to match all interrupts in * the absence of a device node. This isn't a problem so far * yet though... @@ -417,7 +527,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, */ mutex_lock(&irq_domain_mutex); list_for_each_entry(h, &irq_domain_list, link) { - if (h->ops->select && fwspec->param_count) + if (h->ops->select && bus_token != DOMAIN_BUS_ANY) rc = h->ops->select(h, fwspec, bus_token); else if (h->ops->match) rc = h->ops->match(h, to_of_node(fwnode), bus_token); @@ -437,32 +547,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); /** - * irq_domain_check_msi_remap - Check whether all MSI irq domains implement - * IRQ remapping - * - * Return: false if any MSI irq domain does not support IRQ remapping, - * true otherwise (including if there is no MSI irq domain) - */ -bool irq_domain_check_msi_remap(void) -{ - struct irq_domain *h; - bool ret = true; - - mutex_lock(&irq_domain_mutex); - list_for_each_entry(h, &irq_domain_list, link) { - if (irq_domain_is_msi(h) && - !irq_domain_hierarchical_is_msi_remap(h)) { - ret = false; - break; - } - } - mutex_unlock(&irq_domain_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap); - -/** - * irq_set_default_host() - Set a "default" irq domain + * irq_set_default_domain() - Set a "default" irq domain * @domain: default domain pointer * * For convenience, it's possible to set a "default" domain that will be used @@ -470,16 +555,16 @@ EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap); * platforms that want to manipulate a few hard coded interrupt numbers that * aren't properly represented in the device-tree. */ -void irq_set_default_host(struct irq_domain *domain) +void irq_set_default_domain(struct irq_domain *domain) { pr_debug("Default domain set to @0x%p\n", domain); irq_default_domain = domain; } -EXPORT_SYMBOL_GPL(irq_set_default_host); +EXPORT_SYMBOL_GPL(irq_set_default_domain); /** - * irq_get_default_host() - Retrieve the "default" irq domain + * irq_get_default_domain() - Retrieve the "default" irq domain * * Returns: the default domain, if any. * @@ -487,11 +572,11 @@ EXPORT_SYMBOL_GPL(irq_set_default_host); * systems that cannot implement a firmware->fwnode mapping (which * both DT and ACPI provide). */ -struct irq_domain *irq_get_default_host(void) +struct irq_domain *irq_get_default_domain(void) { return irq_default_domain; } -EXPORT_SYMBOL_GPL(irq_get_default_host); +EXPORT_SYMBOL_GPL(irq_get_default_domain); static bool irq_domain_is_nomap(struct irq_domain *domain) { @@ -502,30 +587,34 @@ static bool irq_domain_is_nomap(struct irq_domain *domain) static void irq_domain_clear_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { + lockdep_assert_held(&domain->root->mutex); + if (irq_domain_is_nomap(domain)) return; - mutex_lock(&domain->revmap_mutex); if (hwirq < domain->revmap_size) rcu_assign_pointer(domain->revmap[hwirq], NULL); else radix_tree_delete(&domain->revmap_tree, hwirq); - mutex_unlock(&domain->revmap_mutex); } static void irq_domain_set_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, struct irq_data *irq_data) { + /* + * This also makes sure that all domains point to the same root when + * called from irq_domain_insert_irq() for each domain in a hierarchy. + */ + lockdep_assert_held(&domain->root->mutex); + if (irq_domain_is_nomap(domain)) return; - mutex_lock(&domain->revmap_mutex); if (hwirq < domain->revmap_size) rcu_assign_pointer(domain->revmap[hwirq], irq_data); else radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); - mutex_unlock(&domain->revmap_mutex); } static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) @@ -538,6 +627,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) return; hwirq = irq_data->hwirq; + + mutex_lock(&domain->root->mutex); + irq_set_status_flags(irq, IRQ_NOREQUEST); /* remove chip and handler */ @@ -557,10 +649,12 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) /* Clear reverse map for this hwirq */ irq_domain_clear_mapping(domain, hwirq); + + mutex_unlock(&domain->root->mutex); } -int irq_domain_associate(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) +static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) { struct irq_data *irq_data = irq_get_irq_data(virq); int ret; @@ -573,7 +667,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) return -EINVAL; - mutex_lock(&irq_domain_mutex); irq_data->hwirq = hwirq; irq_data->domain = domain; if (domain->ops->map) { @@ -590,23 +683,29 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, } irq_data->domain = NULL; irq_data->hwirq = 0; - mutex_unlock(&irq_domain_mutex); return ret; } - - /* If not already assigned, give the domain the chip's name */ - if (!domain->name && irq_data->chip) - domain->name = irq_data->chip->name; } domain->mapcount++; irq_domain_set_mapping(domain, hwirq, irq_data); - mutex_unlock(&irq_domain_mutex); irq_clear_status_flags(virq, IRQ_NOREQUEST); return 0; } + +int irq_domain_associate(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + int ret; + + mutex_lock(&domain->root->mutex); + ret = irq_domain_associate_locked(domain, virq, hwirq); + mutex_unlock(&domain->root->mutex); + + return ret; +} EXPORT_SYMBOL_GPL(irq_domain_associate); void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, @@ -619,9 +718,8 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, of_node_full_name(of_node), irq_base, (int)hwirq_base, count); - for (i = 0; i < count; i++) { + for (i = 0; i < count; i++) irq_domain_associate(domain, irq_base + i, hwirq_base + i); - } } EXPORT_SYMBOL_GPL(irq_domain_associate_many); @@ -650,9 +748,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) pr_debug("create_direct virq allocation failed\n"); return 0; } - if (virq >= domain->revmap_size) { - pr_err("ERROR: no free irqs available below %i maximum\n", - domain->revmap_size); + if (virq >= domain->hwirq_max) { + pr_err("ERROR: no free irqs available below %lu maximum\n", + domain->hwirq_max); irq_free_desc(virq); return 0; } @@ -668,6 +766,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) EXPORT_SYMBOL_GPL(irq_create_direct_mapping); #endif +static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) +{ + struct device_node *of_node = irq_domain_get_of_node(domain); + int virq; + + pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + + /* Allocate a virtual interrupt number */ + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), + affinity); + if (virq <= 0) { + pr_debug("-> virq allocation failed\n"); + return 0; + } + + if (irq_domain_associate_locked(domain, virq, hwirq)) { + irq_free_desc(virq); + return 0; + } + + pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", + hwirq, of_node_full_name(of_node), virq); + + return virq; +} + /** * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space * @domain: domain owning this hardware interrupt or NULL for default domain @@ -680,14 +806,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping); * on the number returned from that call. */ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, - irq_hw_number_t hwirq, - const struct irq_affinity_desc *affinity) + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) { - struct device_node *of_node; int virq; - pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); - /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; @@ -695,32 +818,19 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); return 0; } - pr_debug("-> using domain @%p\n", domain); - of_node = irq_domain_get_of_node(domain); + mutex_lock(&domain->root->mutex); /* Check if mapping already exists */ virq = irq_find_mapping(domain, hwirq); if (virq) { - pr_debug("-> existing mapping on virq %d\n", virq); - return virq; + pr_debug("existing mapping on virq %d\n", virq); + goto out; } - /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), - affinity); - if (virq <= 0) { - pr_debug("-> virq allocation failed\n"); - return 0; - } - - if (irq_domain_associate(domain, virq, hwirq)) { - irq_free_desc(virq); - return 0; - } - - pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", - hwirq, of_node_full_name(of_node), virq); + virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity); +out: + mutex_unlock(&domain->root->mutex); return virq; } @@ -749,7 +859,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, { int i; - fwspec->fwnode = of_node_to_fwnode(np); + fwspec->fwnode = of_fwnode_handle(np); fwspec->param_count = count; for (i = 0; i < count; i++) @@ -757,13 +867,9 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, } EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); -unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) +static struct irq_domain *fwspec_to_domain(struct irq_fwspec *fwspec) { struct irq_domain *domain; - struct irq_data *irq_data; - irq_hw_number_t hwirq; - unsigned int type = IRQ_TYPE_NONE; - int virq; if (fwspec->fwnode) { domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); @@ -773,6 +879,32 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) domain = irq_default_domain; } + return domain; +} + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info) +{ + struct irq_domain *domain = fwspec_to_domain(fwspec); + + memset(info, 0, sizeof(*info)); + + if (!domain || !domain->ops->get_fwspec_info) + return 0; + + return domain->ops->get_fwspec_info(fwspec, info); +} +#endif + +unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) +{ + unsigned int type = IRQ_TYPE_NONE; + struct irq_domain *domain; + struct irq_data *irq_data; + irq_hw_number_t hwirq; + int virq; + + domain = fwspec_to_domain(fwspec); if (!domain) { pr_warn("no irq domain found for %s !\n", of_node_full_name(to_of_node(fwspec->fwnode))); @@ -789,6 +921,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) type &= IRQ_TYPE_SENSE_MASK; + mutex_lock(&domain->root->mutex); + /* * If we've already configured this interrupt, * don't do it again, or hell will break loose. @@ -801,7 +935,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) * interrupt number. */ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) - return virq; + goto out; /* * If the trigger type has not been set yet, then set @@ -809,40 +943,50 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) */ if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { irq_data = irq_get_irq_data(virq); - if (!irq_data) - return 0; + if (!irq_data) { + virq = 0; + goto out; + } irqd_set_trigger_type(irq_data, type); - return virq; + goto out; } pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); - return 0; + virq = 0; + goto out; } if (irq_domain_is_hierarchy(domain)) { - virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec); - if (virq <= 0) - return 0; + if (irq_domain_is_msi_device(domain)) { + mutex_unlock(&domain->root->mutex); + virq = msi_device_domain_alloc_wired(domain, hwirq, type); + mutex_lock(&domain->root->mutex); + } else + virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE, + fwspec, false, NULL); + if (virq <= 0) { + virq = 0; + goto out; + } } else { /* Create mapping */ - virq = irq_create_mapping(domain, hwirq); + virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL); if (!virq) - return virq; + goto out; } irq_data = irq_get_irq_data(virq); - if (!irq_data) { - if (irq_domain_is_hierarchy(domain)) - irq_domain_free_irqs(virq, 1); - else - irq_dispose_mapping(virq); - return 0; + if (WARN_ON(!irq_data)) { + virq = 0; + goto out; } /* Store trigger type */ irqd_set_trigger_type(irq_data, type); +out: + mutex_unlock(&domain->root->mutex); return virq; } @@ -865,10 +1009,11 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping); */ void irq_dispose_mapping(unsigned int virq) { - struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *irq_data; struct irq_domain *domain; - if (!virq || !irq_data) + irq_data = virq ? irq_get_irq_data(virq) : NULL; + if (!irq_data) return; domain = irq_data->domain; @@ -876,7 +1021,7 @@ void irq_dispose_mapping(unsigned int virq) return; if (irq_domain_is_hierarchy(domain)) { - irq_domain_free_irqs(virq, 1); + irq_domain_free_one_irq(domain, virq); } else { irq_domain_disassociate(domain, virq); irq_free_desc(virq); @@ -906,10 +1051,12 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, return desc; if (irq_domain_is_nomap(domain)) { - if (hwirq < domain->revmap_size) { + if (hwirq < domain->hwirq_max) { data = irq_domain_get_irq_data(domain, hwirq); if (data && data->hwirq == hwirq) desc = irq_data_to_desc(data); + if (irq && desc) + *irq = hwirq; } return desc; @@ -935,6 +1082,12 @@ EXPORT_SYMBOL_GPL(__irq_resolve_mapping); /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with one cell * bindings where the cell value maps directly to the hwirq number. @@ -953,6 +1106,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); /** * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number @@ -970,7 +1129,38 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); /** + * irq_domain_xlate_twothreecell() - Generic xlate for direct two or three cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type + * + * Device Tree interrupt specifier translation function for two or three + * cell bindings, where the cell values map directly to the hardware + * interrupt number and the type specifier. + */ +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + struct irq_fwspec fwspec; + + of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec); + + return irq_domain_translate_twothreecell(d, &fwspec, out_hwirq, out_type); +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_twothreecell); + +/** * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with either one * or two cell bindings where the cell values map directly to the hwirq number @@ -1004,6 +1194,10 @@ EXPORT_SYMBOL_GPL(irq_domain_simple_ops); /** * irq_domain_translate_onecell() - Generic translate for direct one cell * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type */ int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, @@ -1021,6 +1215,10 @@ EXPORT_SYMBOL_GPL(irq_domain_translate_onecell); /** * irq_domain_translate_twocell() - Generic translate for direct two cell * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number @@ -1039,6 +1237,37 @@ int irq_domain_translate_twocell(struct irq_domain *d, } EXPORT_SYMBOL_GPL(irq_domain_translate_twocell); +/** + * irq_domain_translate_twothreecell() - Generic translate for direct two or three cell + * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type + * + * Firmware interrupt specifier translation function for two or three cell + * specifications, where the parameter values map directly to the hardware + * interrupt number and the type specifier. + */ +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (fwspec->param_count == 2) { + *out_hwirq = fwspec->param[0]; + *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + return 0; + } + + if (fwspec->param_count == 3) { + *out_hwirq = fwspec->param[1]; + *out_type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_domain_translate_twothreecell); + int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity) { @@ -1048,7 +1277,7 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, affinity); } else { - hint = hwirq % nr_irqs; + hint = hwirq % irq_get_nr_irqs(); if (hint == 0) hint++; virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, @@ -1075,43 +1304,6 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data) EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -/** - * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy - * @parent: Parent irq domain to associate with the new domain - * @flags: Irq domain flags associated to the domain - * @size: Size of the domain. See below - * @fwnode: Optional fwnode of the interrupt controller - * @ops: Pointer to the interrupt domain callbacks - * @host_data: Controller private data pointer - * - * If @size is 0 a tree domain is created, otherwise a linear domain. - * - * If successful the parent is associated to the new domain and the - * domain flags are set. - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain *domain; - - if (size) - domain = irq_domain_create_linear(fwnode, size, ops, host_data); - else - domain = irq_domain_create_tree(fwnode, ops, host_data); - if (domain) { - domain->parent = parent; - domain->flags |= flags; - } - - return domain; -} -EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy); - static void irq_domain_insert_irq(int virq) { struct irq_data *data; @@ -1121,10 +1313,6 @@ static void irq_domain_insert_irq(int virq) domain->mapcount++; irq_domain_set_mapping(domain, data->hwirq, data); - - /* If not already assigned, give the domain the chip's name */ - if (!domain->name && data->chip) - domain->name = data->chip->name; } irq_clear_status_flags(virq, IRQ_NOREQUEST); @@ -1226,7 +1414,7 @@ static int irq_domain_trim_hierarchy(unsigned int virq) tail = NULL; /* The first entry must have a valid irqchip */ - if (!irq_data->chip || IS_ERR(irq_data->chip)) + if (IS_ERR_OR_NULL(irq_data->chip)) return -EINVAL; /* @@ -1319,7 +1507,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @chip_data: The associated chip data */ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data) { struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); @@ -1328,7 +1517,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, return -ENOENT; irq_data->hwirq = hwirq; - irq_data->chip = chip ? chip : &no_irq_chip; + irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; return 0; @@ -1347,7 +1536,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1395,6 +1584,7 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, } irq_domain_free_irqs_common(domain, virq, nr_irqs); } +EXPORT_SYMBOL_GPL(irq_domain_free_irqs_top); static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, @@ -1411,9 +1601,8 @@ static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, } } -int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs, void *arg) +static int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, + unsigned int nr_irqs, void *arg) { if (!domain->ops->alloc) { pr_debug("domain->ops->alloc() is NULL\n"); @@ -1423,40 +1612,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } -/** - * __irq_domain_alloc_irqs - Allocate IRQs from domain - * @domain: domain to allocate from - * @irq_base: allocate specified IRQ number if irq_base >= 0 - * @nr_irqs: number of IRQs to allocate - * @node: NUMA node id for memory allocation - * @arg: domain specific argument - * @realloc: IRQ descriptors have already been allocated if true - * @affinity: Optional irq affinity mask for multiqueue devices - * - * Allocate IRQ numbers and initialized all data structures to support - * hierarchy IRQ domains. - * Parameter @realloc is mainly to support legacy IRQs. - * Returns error code or allocated IRQ number - * - * The whole process to setup an IRQ has been split into two steps. - * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ - * descriptor and required hardware resources. The second step, - * irq_domain_activate_irq(), is to program the hardware with preallocated - * resources. In this way, it's easier to rollback when failing to - * allocate resources. - */ -int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, const struct irq_affinity_desc *affinity) +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) { int i, ret, virq; - if (domain == NULL) { - domain = irq_default_domain; - if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) - return -EINVAL; - } - if (realloc && irq_base >= 0) { virq = irq_base; } else { @@ -1475,24 +1636,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, goto out_free_desc; } - mutex_lock(&irq_domain_mutex); ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); - if (ret < 0) { - mutex_unlock(&irq_domain_mutex); + if (ret < 0) goto out_free_irq_data; - } for (i = 0; i < nr_irqs; i++) { ret = irq_domain_trim_hierarchy(virq + i); - if (ret) { - mutex_unlock(&irq_domain_mutex); + if (ret) goto out_free_irq_data; - } } - + for (i = 0; i < nr_irqs; i++) irq_domain_insert_irq(virq + i); - mutex_unlock(&irq_domain_mutex); return virq; @@ -1502,6 +1657,48 @@ out_free_desc: irq_free_descs(virq, nr_irqs); return ret; } + +/** + * __irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @irq_base: allocate specified IRQ number if irq_base >= 0 + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * @realloc: IRQ descriptors have already been allocated if true + * @affinity: Optional irq affinity mask for multiqueue devices + * + * Allocate IRQ numbers and initialized all data structures to support + * hierarchy IRQ domains. + * Parameter @realloc is mainly to support legacy IRQs. + * Returns error code or allocated IRQ number + * + * The whole process to setup an IRQ has been split into two steps. + * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ + * descriptor and required hardware resources. The second step, + * irq_domain_activate_irq(), is to program the hardware with preallocated + * resources. In this way, it's easier to rollback when failing to + * allocate resources. + */ +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) +{ + int ret; + + if (domain == NULL) { + domain = irq_default_domain; + if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) + return -EINVAL; + } + + mutex_lock(&domain->root->mutex); + ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, + realloc, affinity); + mutex_unlock(&domain->root->mutex); + + return ret; +} EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); /* The irq_data was moved, fix the revmap to refer to the new location */ @@ -1509,11 +1706,12 @@ static void irq_domain_fix_revmap(struct irq_data *d) { void __rcu **slot; + lockdep_assert_held(&d->domain->root->mutex); + if (irq_domain_is_nomap(d->domain)) return; /* Fix up the revmap. */ - mutex_lock(&d->domain->revmap_mutex); if (d->hwirq < d->domain->revmap_size) { /* Not using radix tree */ rcu_assign_pointer(d->domain->revmap[d->hwirq], d); @@ -1522,7 +1720,6 @@ static void irq_domain_fix_revmap(struct irq_data *d) if (slot) radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); } - mutex_unlock(&d->domain->revmap_mutex); } /** @@ -1538,8 +1735,8 @@ static void irq_domain_fix_revmap(struct irq_data *d) */ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) { - struct irq_data *child_irq_data; - struct irq_data *root_irq_data = irq_get_irq_data(virq); + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; struct irq_desc *desc; int rv = 0; @@ -1564,47 +1761,46 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (WARN_ON(!irq_domain_is_hierarchy(domain))) return -EINVAL; - if (!root_irq_data) + if (!irq_data) return -EINVAL; - if (domain->parent != root_irq_data->domain) + if (domain->parent != irq_data->domain) return -EINVAL; - child_irq_data = kzalloc_node(sizeof(*child_irq_data), GFP_KERNEL, - irq_data_get_node(root_irq_data)); - if (!child_irq_data) + parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL, + irq_data_get_node(irq_data)); + if (!parent_irq_data) return -ENOMEM; - mutex_lock(&irq_domain_mutex); + mutex_lock(&domain->root->mutex); /* Copy the original irq_data. */ - *child_irq_data = *root_irq_data; + *parent_irq_data = *irq_data; /* - * Overwrite the root_irq_data, which is embedded in struct - * irq_desc, with values for this domain. + * Overwrite the irq_data, which is embedded in struct irq_desc, with + * values for this domain. */ - root_irq_data->parent_data = child_irq_data; - root_irq_data->domain = domain; - root_irq_data->mask = 0; - root_irq_data->hwirq = 0; - root_irq_data->chip = NULL; - root_irq_data->chip_data = NULL; + irq_data->parent_data = parent_irq_data; + irq_data->domain = domain; + irq_data->mask = 0; + irq_data->hwirq = 0; + irq_data->chip = NULL; + irq_data->chip_data = NULL; /* May (probably does) set hwirq, chip, etc. */ rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); if (rv) { /* Restore the original irq_data. */ - *root_irq_data = *child_irq_data; - kfree(child_irq_data); + *irq_data = *parent_irq_data; + kfree(parent_irq_data); goto error; } - irq_domain_fix_revmap(child_irq_data); - irq_domain_set_mapping(domain, root_irq_data->hwirq, root_irq_data); - + irq_domain_fix_revmap(parent_irq_data); + irq_domain_set_mapping(domain, irq_data->hwirq, irq_data); error: - mutex_unlock(&irq_domain_mutex); + mutex_unlock(&domain->root->mutex); return rv; } @@ -1620,8 +1816,8 @@ EXPORT_SYMBOL_GPL(irq_domain_push_irq); */ int irq_domain_pop_irq(struct irq_domain *domain, int virq) { - struct irq_data *root_irq_data = irq_get_irq_data(virq); - struct irq_data *child_irq_data; + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; struct irq_data *tmp_irq_data; struct irq_desc *desc; @@ -1643,37 +1839,37 @@ int irq_domain_pop_irq(struct irq_domain *domain, int virq) if (domain == NULL) return -EINVAL; - if (!root_irq_data) + if (!irq_data) return -EINVAL; tmp_irq_data = irq_domain_get_irq_data(domain, virq); /* We can only "pop" if this domain is at the top of the list */ - if (WARN_ON(root_irq_data != tmp_irq_data)) + if (WARN_ON(irq_data != tmp_irq_data)) return -EINVAL; - if (WARN_ON(root_irq_data->domain != domain)) + if (WARN_ON(irq_data->domain != domain)) return -EINVAL; - child_irq_data = root_irq_data->parent_data; - if (WARN_ON(!child_irq_data)) + parent_irq_data = irq_data->parent_data; + if (WARN_ON(!parent_irq_data)) return -EINVAL; - mutex_lock(&irq_domain_mutex); + mutex_lock(&domain->root->mutex); - root_irq_data->parent_data = NULL; + irq_data->parent_data = NULL; - irq_domain_clear_mapping(domain, root_irq_data->hwirq); + irq_domain_clear_mapping(domain, irq_data->hwirq); irq_domain_free_irqs_hierarchy(domain, virq, 1); /* Restore the original irq_data. */ - *root_irq_data = *child_irq_data; + *irq_data = *parent_irq_data; - irq_domain_fix_revmap(root_irq_data); + irq_domain_fix_revmap(irq_data); - mutex_unlock(&irq_domain_mutex); + mutex_unlock(&domain->root->mutex); - kfree(child_irq_data); + kfree(parent_irq_data); return 0; } @@ -1687,22 +1883,33 @@ EXPORT_SYMBOL_GPL(irq_domain_pop_irq); void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { struct irq_data *data = irq_get_irq_data(virq); + struct irq_domain *domain; int i; if (WARN(!data || !data->domain || !data->domain->ops->free, "NULL pointer, cannot free irq\n")) return; - mutex_lock(&irq_domain_mutex); + domain = data->domain; + + mutex_lock(&domain->root->mutex); for (i = 0; i < nr_irqs; i++) irq_domain_remove_irq(virq + i); - irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs); - mutex_unlock(&irq_domain_mutex); + irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs); + mutex_unlock(&domain->root->mutex); irq_domain_free_irq_data(virq, nr_irqs); irq_free_descs(virq, nr_irqs); } +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) +{ + if (irq_domain_is_msi_device(domain)) + msi_device_domain_free_wired(domain, virq); + else + irq_domain_free_irqs(virq, 1); +} + /** * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain * @domain: Domain below which interrupts must be allocated @@ -1812,22 +2019,8 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain) if (domain->ops->alloc) domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; } - -/** - * irq_domain_hierarchical_is_msi_remap - Check if the domain or any - * parent has MSI remapping support - * @domain: domain pointer - */ -bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) -{ - for (; domain; domain = domain->parent) { - if (irq_domain_is_msi_remap(domain)) - return true; - } - return false; -} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -/** +/* * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain * @domain: domain to match * @virq: IRQ number to get irq_data @@ -1841,7 +2034,7 @@ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); -/** +/* * irq_domain_set_info - Set the complete data for a @virq in @domain * @domain: Interrupt domain to match * @virq: IRQ number @@ -1853,7 +2046,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1862,21 +2055,43 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_set_handler_data(virq, handler_data); } -static void irq_domain_check_hierarchy(struct irq_domain *domain) +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) { + return -EINVAL; } + +static void irq_domain_check_hierarchy(struct irq_domain *domain) { } +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { } + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_IRQ_DEBUGFS +#include "internals.h" + static struct dentry *domain_dir; -static void -irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) +static const struct irq_bit_descr irqdomain_flags[] = { + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_HIERARCHY), + BIT_MASK_DESCR(IRQ_DOMAIN_NAME_ALLOCATED), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_PER_CPU), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_SINGLE), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_ISOLATED_MSI), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NO_MAP), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_PARENT), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_DEVICE), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NONCORE), +}; + +static void irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) { seq_printf(m, "%*sname: %s\n", ind, "", d->name); seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size); seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); + irq_debug_show_bits(m, ind, d->flags, irqdomain_flags, ARRAY_SIZE(irqdomain_flags)); if (d->ops && d->ops->debug_show) d->ops->debug_show(m, d, NULL, ind + 1); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -1912,7 +2127,7 @@ static void debugfs_add_domain_dir(struct irq_domain *d) static void debugfs_remove_domain_dir(struct irq_domain *d) { - debugfs_remove(debugfs_lookup(d->name, domain_dir)); + debugfs_lookup_and_remove(d->name, domain_dir); } void __init irq_domain_debugfs_init(struct dentry *root) diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c new file mode 100644 index 000000000000..1a3deffe6b5b --- /dev/null +++ b/kernel/irq/kexec.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> +#include <linux/irqnr.h> + +#include "internals.h" + +void machine_kexec_mask_interrupts(void) +{ + struct irq_desc *desc; + unsigned int i; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int check_eoi = 1; + + chip = irq_desc_get_chip(desc); + if (!chip || !irqd_is_started(&desc->irq_data)) + continue; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) { + /* + * First try to remove the active state from an interrupt which is forwarded + * to a VM. If the interrupt is not forwarded, try to EOI the interrupt. + */ + check_eoi = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + } + + if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + irq_shutdown(desc); + } +} diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index f23ffd30385b..0bb29316b436 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -35,14 +35,14 @@ static int __init setup_forced_irqthreads(char *arg) early_param("threadirqs", setup_forced_irqthreads); #endif +static int __irq_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *state); + static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) { struct irq_data *irqd = irq_desc_get_irq_data(desc); bool inprogress; do { - unsigned long flags; - /* * Wait until we're out of the critical section. This might * give the wrong answer due to the lack of memory barriers. @@ -51,7 +51,7 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) cpu_relax(); /* Ok, that indicated we're done: double-check carefully. */ - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); inprogress = irqd_irq_inprogress(&desc->irq_data); /* @@ -67,33 +67,30 @@ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, &inprogress); } - raw_spin_unlock_irqrestore(&desc->lock, flags); - /* Oops, that failed? */ } while (inprogress); } /** - * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for + * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * - * This function waits for any pending hard IRQ handlers for this - * interrupt to complete before returning. If you use this - * function while holding a resource the IRQ handler may need you - * will deadlock. It does not take associated threaded handlers - * into account. + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. It does not take + * associated threaded handlers into account. * - * Do not use this for shutdown scenarios where you must be sure - * that all parts (hardirq and threaded handler) have completed. + * Do not use this for shutdown scenarios where you must be sure that all + * parts (hardirq and threaded handler) have completed. * - * Returns: false if a threaded handler is active. + * Returns: false if a threaded handler is active. * - * This function may be called - with care - from IRQ context. + * This function may be called - with care - from IRQ context. * - * It does not check whether there is an interrupt in flight at the - * hardware level, but not serviced yet, as this might deadlock when - * called with interrupts disabled and the target CPU of the interrupt - * is the current CPU. + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when called + * with interrupts disabled and the target CPU of the interrupt is the + * current CPU. */ bool synchronize_hardirq(unsigned int irq) { @@ -108,35 +105,37 @@ bool synchronize_hardirq(unsigned int irq) } EXPORT_SYMBOL(synchronize_hardirq); +static void __synchronize_irq(struct irq_desc *desc) +{ + __synchronize_hardirq(desc, true); + /* + * We made sure that no hardirq handler is running. Now verify that no + * threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); +} + /** - * synchronize_irq - wait for pending IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for + * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. * - * Can only be called from preemptible code as it might sleep when - * an interrupt thread is associated to @irq. + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. * - * It optionally makes sure (when the irq chip supports that method) - * that the interrupt is not pending in any CPU and waiting for - * service. + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. */ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) { - __synchronize_hardirq(desc, true); - /* - * We made sure that no hardirq handler is - * running. Now verify that no threaded handlers are - * active. - */ - wait_event(desc->wait_for_threads, - !atomic_read(&desc->threads_active)); - } + if (desc) + __synchronize_irq(desc); } EXPORT_SYMBOL(synchronize_irq); @@ -152,8 +151,8 @@ static bool __irq_can_set_affinity(struct irq_desc *desc) } /** - * irq_can_set_affinity - Check if the affinity of a given irq can be set - * @irq: Interrupt to check + * irq_can_set_affinity - Check if the affinity of a given irq can be set + * @irq: Interrupt to check * */ int irq_can_set_affinity(unsigned int irq) @@ -177,21 +176,28 @@ bool irq_can_set_affinity_usr(unsigned int irq) } /** - * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affinity changed + * irq_set_thread_affinity - Notify irq threads to adjust affinity + * @desc: irq descriptor which has affinity changed * - * We just set IRQTF_AFFINITY and delegate the affinity setting - * to the interrupt thread itself. We can not call - * set_cpus_allowed_ptr() here as we hold desc->lock and this - * code can be called from hard interrupt context. + * Just set IRQTF_AFFINITY and delegate the affinity setting to the + * interrupt thread itself. We can not call set_cpus_allowed_ptr() here as + * we hold desc->lock and this code can be called from hard interrupt + * context. */ -void irq_set_thread_affinity(struct irq_desc *desc) +static void irq_set_thread_affinity(struct irq_desc *desc) { struct irqaction *action; - for_each_action_of_desc(desc, action) - if (action->thread) + for_each_action_of_desc(desc, action) { + if (action->thread) { set_bit(IRQTF_AFFINITY, &action->thread_flags); + wake_up_process(action->thread); + } + if (action->secondary && action->secondary->thread) { + set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags); + wake_up_process(action->secondary->thread); + } + } } #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -205,23 +211,19 @@ static void irq_validate_effective_affinity(struct irq_data *data) pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", chip->name, data->irq); } - -static inline void irq_init_effective_affinity(struct irq_data *data, - const struct cpumask *mask) -{ - cpumask_copy(irq_data_get_effective_affinity_mask(data), mask); -} #else static inline void irq_validate_effective_affinity(struct irq_data *data) { } -static inline void irq_init_effective_affinity(struct irq_data *data, - const struct cpumask *mask) { } #endif +static DEFINE_PER_CPU(struct cpumask, __tmp_mask); + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { + struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; if (!chip || !chip->irq_set_affinity) @@ -247,25 +249,33 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, * online. */ if (irqd_affinity_is_managed(data) && - housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) { - const struct cpumask *hk_mask, *prog_mask; + housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { + const struct cpumask *hk_mask; - static DEFINE_RAW_SPINLOCK(tmp_mask_lock); - static struct cpumask tmp_mask; + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); - hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); - - raw_spin_lock(&tmp_mask_lock); - cpumask_and(&tmp_mask, mask, hk_mask); - if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) + cpumask_and(tmp_mask, mask, hk_mask); + if (!cpumask_intersects(tmp_mask, cpu_online_mask)) prog_mask = mask; else - prog_mask = &tmp_mask; - ret = chip->irq_set_affinity(data, prog_mask, force); - raw_spin_unlock(&tmp_mask_lock); + prog_mask = tmp_mask; } else { - ret = chip->irq_set_affinity(data, mask, force); + prog_mask = mask; } + + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ + cpumask_and(tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(tmp_mask)) + ret = chip->irq_set_affinity(data, tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); + else + ret = -EINVAL; + switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: @@ -314,7 +324,7 @@ static int irq_try_set_affinity(struct irq_data *data, } static bool irq_set_affinity_deactivated(struct irq_data *data, - const struct cpumask *mask, bool force) + const struct cpumask *mask) { struct irq_desc *desc = irq_data_to_desc(data); @@ -332,7 +342,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, return false; cpumask_copy(desc->irq_common_data.affinity, mask); - irq_init_effective_affinity(data, mask); + irq_data_update_effective_affinity(data, mask); irqd_set(data, IRQD_AFFINITY_SET); return true; } @@ -347,7 +357,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (!chip || !chip->irq_set_affinity) return -EINVAL; - if (irq_set_affinity_deactivated(data, mask, force)) + if (irq_set_affinity_deactivated(data, mask)) return 0; if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { @@ -385,14 +395,8 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, * an interrupt which is already started or which has already been configured * as managed will also fail, as these mean invalid init state or double init. */ -int irq_update_affinity_desc(unsigned int irq, - struct irq_affinity_desc *affinity) +int irq_update_affinity_desc(unsigned int irq, struct irq_affinity_desc *affinity) { - struct irq_desc *desc; - unsigned long flags; - bool activated; - int ret = 0; - /* * Supporting this with the reservation scheme used by x86 needs * some more thought. Fail it for now. @@ -400,60 +404,50 @@ int irq_update_affinity_desc(unsigned int irq, if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) return -EOPNOTSUPP; - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return -EINVAL; + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + bool activated; - /* Requires the interrupt to be shut down */ - if (irqd_is_started(&desc->irq_data)) { - ret = -EBUSY; - goto out_unlock; - } - - /* Interrupts which are already managed cannot be modified */ - if (irqd_affinity_is_managed(&desc->irq_data)) { - ret = -EBUSY; - goto out_unlock; - } + /* Requires the interrupt to be shut down */ + if (irqd_is_started(&desc->irq_data)) + return -EBUSY; - /* - * Deactivate the interrupt. That's required to undo - * anything an earlier activation has established. - */ - activated = irqd_is_activated(&desc->irq_data); - if (activated) - irq_domain_deactivate_irq(&desc->irq_data); - - if (affinity->is_managed) { - irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); - irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); - } + /* Interrupts which are already managed cannot be modified */ + if (irqd_affinity_is_managed(&desc->irq_data)) + return -EBUSY; + /* + * Deactivate the interrupt. That's required to undo + * anything an earlier activation has established. + */ + activated = irqd_is_activated(&desc->irq_data); + if (activated) + irq_domain_deactivate_irq(&desc->irq_data); - cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); + if (affinity->is_managed) { + irqd_set(&desc->irq_data, IRQD_AFFINITY_MANAGED); + irqd_set(&desc->irq_data, IRQD_MANAGED_SHUTDOWN); + } - /* Restore the activation state */ - if (activated) - irq_domain_activate_irq(&desc->irq_data, false); + cpumask_copy(desc->irq_common_data.affinity, &affinity->mask); -out_unlock: - irq_put_desc_busunlock(desc, flags); - return ret; + /* Restore the activation state */ + if (activated) + irq_domain_activate_irq(&desc->irq_data, false); + return 0; + } + return -EINVAL; } static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; if (!desc) return -EINVAL; - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; + guard(raw_spinlock_irqsave)(&desc->lock); + return irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); } /** @@ -486,39 +480,36 @@ int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) } EXPORT_SYMBOL_GPL(irq_force_affinity); -int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, - bool setaffinity) +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, bool setaffinity) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + int ret = -EINVAL; - if (!desc) - return -EINVAL; - desc->affinity_hint = m; - irq_put_desc_unlock(desc, flags); - if (m && setaffinity) + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc->affinity_hint = m; + ret = 0; + } + + if (!ret && m && setaffinity) __irq_set_affinity(irq, m, false); - return 0; + return ret; } EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); + struct irq_affinity_notify *notify = container_of(work, struct irq_affinity_notify, work); struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; - unsigned long flags; if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL)) goto out; - raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_move_pending(&desc->irq_data)) - irq_get_pending(cpumask, desc); - else - cpumask_copy(cpumask, desc->irq_common_data.affinity); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + if (irq_move_pending(&desc->irq_data)) + irq_get_pending(cpumask, desc); + else + cpumask_copy(cpumask, desc->irq_common_data.affinity); + } notify->notify(notify, cpumask); @@ -528,27 +519,25 @@ out: } /** - * irq_set_affinity_notifier - control notification of IRQ affinity changes - * @irq: Interrupt for which to enable/disable notification - * @notify: Context for notification, or %NULL to disable - * notification. Function pointers must be initialised; - * the other fields will be initialised by this function. - * - * Must be called in process context. Notification may only be enabled - * after the IRQ is allocated and must be disabled before the IRQ is - * freed using free_irq(). + * irq_set_affinity_notifier - control notification of IRQ affinity changes + * @irq: Interrupt for which to enable/disable notification + * @notify: Context for notification, or %NULL to disable + * notification. Function pointers must be initialised; + * the other fields will be initialised by this function. + * + * Must be called in process context. Notification may only be enabled + * after the IRQ is allocated and must be disabled before the IRQ is freed + * using free_irq(). */ -int -irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) +int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) { struct irq_desc *desc = irq_to_desc(irq); struct irq_affinity_notify *old_notify; - unsigned long flags; /* The release function is promised process context */ might_sleep(); - if (!desc || desc->istate & IRQS_NMI) + if (!desc || irq_is_nmi(desc)) return -EINVAL; /* Complete initialisation of *notify */ @@ -558,10 +547,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) INIT_WORK(¬ify->work, irq_affinity_notify); } - raw_spin_lock_irqsave(&desc->lock, flags); - old_notify = desc->affinity_notify; - desc->affinity_notify = notify; - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irq, &desc->lock) { + old_notify = desc->affinity_notify; + desc->affinity_notify = notify; + } if (old_notify) { if (cancel_work_sync(&old_notify->work)) { @@ -582,7 +571,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); int irq_setup_affinity(struct irq_desc *desc) { struct cpumask *set = irq_default_affinity; - int ret, node = irq_desc_get_node(desc); + int node = irq_desc_get_node(desc); + static DEFINE_RAW_SPINLOCK(mask_lock); static struct cpumask mask; @@ -590,7 +580,7 @@ int irq_setup_affinity(struct irq_desc *desc) if (!__irq_can_set_affinity(desc)) return 0; - raw_spin_lock(&mask_lock); + guard(raw_spinlock)(&mask_lock); /* * Preserve the managed affinity setting and a userspace affinity * setup, but make sure that one of the targets is online. @@ -615,9 +605,7 @@ int irq_setup_affinity(struct irq_desc *desc) if (cpumask_intersects(&mask, nodemask)) cpumask_and(&mask, &mask, nodemask); } - ret = irq_do_set_affinity(&desc->irq_data, &mask, false); - raw_spin_unlock(&mask_lock); - return ret; + return irq_do_set_affinity(&desc->irq_data, &mask, false); } #else /* Wrapper for ALPHA specific affinity selector magic */ @@ -630,44 +618,36 @@ int irq_setup_affinity(struct irq_desc *desc) /** - * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt - * @irq: interrupt number to set affinity - * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU - * specific data for percpu_devid interrupts - * - * This function uses the vCPU specific data to set the vCPU - * affinity for an irq. The vCPU specific data is passed from - * outside, such as KVM. One example code path is as below: - * KVM -> IOMMU -> irq_set_vcpu_affinity(). + * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt + * @irq: interrupt number to set affinity + * @vcpu_info: vCPU specific data or pointer to a percpu array of vCPU + * specific data for percpu_devid interrupts + * + * This function uses the vCPU specific data to set the vCPU affinity for + * an irq. The vCPU specific data is passed from outside, such as KVM. One + * example code path is as below: KVM -> IOMMU -> irq_set_vcpu_affinity(). */ int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - struct irq_data *data; - struct irq_chip *chip; - int ret = -ENOSYS; + scoped_irqdesc_get_and_lock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; + struct irq_data *data; + struct irq_chip *chip; - if (!desc) - return -EINVAL; - - data = irq_desc_get_irq_data(desc); - do { - chip = irq_data_get_irq_chip(data); - if (chip && chip->irq_set_vcpu_affinity) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); + data = irq_desc_get_irq_data(desc); + do { + chip = irq_data_get_irq_chip(data); + if (chip && chip->irq_set_vcpu_affinity) + break; - if (data) - ret = chip->irq_set_vcpu_affinity(data, vcpu_info); - irq_put_desc_unlock(desc, flags); + data = irqd_get_parent_data(data); + } while (data); - return ret; + if (!data) + return -ENOSYS; + return chip->irq_set_vcpu_affinity(data, vcpu_info); + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_vcpu_affinity); @@ -679,26 +659,23 @@ void __disable_irq(struct irq_desc *desc) static int __disable_irq_nosync(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return -EINVAL; - __disable_irq(desc); - irq_put_desc_busunlock(desc, flags); - return 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + __disable_irq(scoped_irqdesc); + return 0; + } + return -EINVAL; } /** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. * - * This function may be called from IRQ context. + * This function may be called from IRQ context. */ void disable_irq_nosync(unsigned int irq) { @@ -707,59 +684,61 @@ void disable_irq_nosync(unsigned int irq) EXPORT_SYMBOL(disable_irq_nosync); /** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are nested. * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. + * This function waits for any pending IRQ handlers for this interrupt to + * complete before returning. If you use this function while holding a + * resource the IRQ handler may need you will deadlock. + * + * Can only be called from preemptible code as it might sleep when an + * interrupt thread is associated to @irq. * - * This function may be called - with care - from IRQ context. */ void disable_irq(unsigned int irq) { + might_sleep(); if (!__disable_irq_nosync(irq)) synchronize_irq(irq); } EXPORT_SYMBOL(disable_irq); /** - * disable_hardirq - disables an irq and waits for hardirq completion - * @irq: Interrupt to disable + * disable_hardirq - disables an irq and waits for hardirq completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are nested. * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending hard IRQ handlers for this - * interrupt to complete before returning. If you use this function while - * holding a resource the hard IRQ handler may need you will deadlock. + * This function waits for any pending hard IRQ handlers for this interrupt + * to complete before returning. If you use this function while holding a + * resource the hard IRQ handler may need you will deadlock. * - * When used to optimistically disable an interrupt from atomic context - * the return value must be checked. + * When used to optimistically disable an interrupt from atomic context the + * return value must be checked. * - * Returns: false if a threaded handler is active. + * Returns: false if a threaded handler is active. * - * This function may be called - with care - from IRQ context. + * This function may be called - with care - from IRQ context. */ bool disable_hardirq(unsigned int irq) { if (!__disable_irq_nosync(irq)) return synchronize_hardirq(irq); - return false; } EXPORT_SYMBOL_GPL(disable_hardirq); /** - * disable_nmi_nosync - disable an nmi without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and enables are - * nested. - * The interrupt to disable must have been requested through request_nmi. - * Unlike disable_nmi(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are nested. + * + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. */ void disable_nmi_nosync(unsigned int irq) { @@ -781,10 +760,14 @@ void __enable_irq(struct irq_desc *desc) irq_settings_set_noprobe(desc); /* * Call irq_startup() not irq_enable() here because the - * interrupt might be marked NOAUTOEN. So irq_startup() - * needs to be invoked when it gets enabled the first - * time. If it was already started up, then irq_startup() - * will invoke irq_enable() under the hood. + * interrupt might be marked NOAUTOEN so irq_startup() + * needs to be invoked when it gets enabled the first time. + * This is also required when __enable_irq() is invoked for + * a managed and shutdown interrupt from the S3 resume + * path. + * + * If it was already started up, then irq_startup() will + * invoke irq_enable() under the hood. */ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); break; @@ -795,41 +778,34 @@ void __enable_irq(struct irq_desc *desc) } /** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. + * Undoes the effect of one call to disable_irq(). If this matches the + * last disable, processing of interrupts on this IRQ line is re-enabled. * - * This function may be called from IRQ context only when - * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! + * This function may be called from IRQ context only when + * desc->irq_data.chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! */ void enable_irq(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - - if (!desc) - return; - if (WARN(!desc->irq_data.chip, - KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) - goto out; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - __enable_irq(desc); -out: - irq_put_desc_busunlock(desc, flags); + if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) + return; + __enable_irq(desc); + } } EXPORT_SYMBOL(enable_irq); /** - * enable_nmi - enable handling of an nmi - * @irq: Interrupt to enable + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable * - * The interrupt to enable must have been requested through request_nmi. - * Undoes the effect of one call to disable_nmi(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this matches the last + * disable, processing of interrupts on this IRQ line is re-enabled. */ void enable_nmi(unsigned int irq) { @@ -851,65 +827,59 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on) } /** - * irq_set_irq_wake - control irq power management wakeup - * @irq: interrupt to control - * @on: enable/disable power management wakeup - * - * Enable/disable power management wakeup mode, which is - * disabled by default. Enables and disables must match, - * just as they match for non-wakeup mode support. - * - * Wakeup mode lets this IRQ wake the system from sleep - * states like "suspend to RAM". - * - * Note: irq enable/disable state is completely orthogonal - * to the enable/disable state of irq wake. An irq can be - * disabled with disable_irq() and still wake the system as - * long as the irq has wake enabled. If this does not hold, - * then the underlying irq chip and the related driver need - * to be investigated. + * irq_set_irq_wake - control irq power management wakeup + * @irq: interrupt to control + * @on: enable/disable power management wakeup + * + * Enable/disable power management wakeup mode, which is disabled by + * default. Enables and disables must match, just as they match for + * non-wakeup mode support. + * + * Wakeup mode lets this IRQ wake the system from sleep states like + * "suspend to RAM". + * + * Note: irq enable/disable state is completely orthogonal to the + * enable/disable state of irq wake. An irq can be disabled with + * disable_irq() and still wake the system as long as the irq has wake + * enabled. If this does not hold, then the underlying irq chip and the + * related driver need to be investigated. */ int irq_set_irq_wake(unsigned int irq, unsigned int on) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); - int ret = 0; + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; + int ret = 0; - if (!desc) - return -EINVAL; - - /* Don't use NMIs as wake up interrupts please */ - if (desc->istate & IRQS_NMI) { - ret = -EINVAL; - goto out_unlock; - } + /* Don't use NMIs as wake up interrupts please */ + if (irq_is_nmi(desc)) + return -EINVAL; - /* wakeup-capable irqs can be shared between drivers that - * don't need to have the same sleep mode behaviors. - */ - if (on) { - if (desc->wake_depth++ == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 0; - else - irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); - } - } else { - if (desc->wake_depth == 0) { - WARN(1, "Unbalanced IRQ %d wake disable\n", irq); - } else if (--desc->wake_depth == 0) { - ret = set_irq_wake_real(irq, on); - if (ret) - desc->wake_depth = 1; - else - irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + /* + * wakeup-capable irqs can be shared between drivers that + * don't need to have the same sleep mode behaviors. + */ + if (on) { + if (desc->wake_depth++ == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 0; + else + irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE); + } + } else { + if (desc->wake_depth == 0) { + WARN(1, "Unbalanced IRQ %d wake disable\n", irq); + } else if (--desc->wake_depth == 0) { + ret = set_irq_wake_real(irq, on); + if (ret) + desc->wake_depth = 1; + else + irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); + } } + return ret; } - -out_unlock: - irq_put_desc_busunlock(desc, flags); - return ret; + return -EINVAL; } EXPORT_SYMBOL(irq_set_irq_wake); @@ -918,22 +888,17 @@ EXPORT_SYMBOL(irq_set_irq_wake); * particular irq has been exclusively allocated or is available * for driver use. */ -int can_request_irq(unsigned int irq, unsigned long irqflags) +bool can_request_irq(unsigned int irq, unsigned long irqflags) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - int canrequest = 0; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (!desc) - return 0; - - if (irq_settings_can_request(desc)) { - if (!desc->action || - irqflags & desc->action->flags & IRQF_SHARED) - canrequest = 1; + if (irq_settings_can_request(desc)) { + if (!desc->action || irqflags & desc->action->flags & IRQF_SHARED) + return true; + } } - irq_put_desc_unlock(desc, flags); - return canrequest; + return false; } int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) @@ -994,16 +959,11 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) #ifdef CONFIG_HARDIRQS_SW_RESEND int irq_set_parent(int irq, int parent_irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); - - if (!desc) - return -EINVAL; - - desc->parent_irq = parent_irq; - - irq_put_desc_unlock(desc, flags); - return 0; + scoped_irqdesc_get_and_lock(irq, 0) { + scoped_irqdesc->parent_irq = parent_irq; + return 0; + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_parent); #endif @@ -1034,10 +994,48 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) return IRQ_NONE; } -static int irq_wait_for_interrupt(struct irqaction *action) +#ifdef CONFIG_SMP +/* + * Check whether we need to change the affinity of the interrupt thread. + */ +static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +{ + cpumask_var_t mask; + + if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) + return; + + __set_current_state(TASK_RUNNING); + + /* + * In case we are out of memory we set IRQTF_AFFINITY again and + * try again next time + */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + set_bit(IRQTF_AFFINITY, &action->thread_flags); + return; + } + + scoped_guard(raw_spinlock_irq, &desc->lock) { + const struct cpumask *m; + + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); + } + + set_cpus_allowed_ptr(current, mask); + free_cpumask_var(mask); +} +#else +static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } +#endif + +static int irq_wait_for_interrupt(struct irq_desc *desc, + struct irqaction *action) { for (;;) { set_current_state(TASK_INTERRUPTIBLE); + irq_thread_check_affinity(desc, action); if (kthread_should_stop()) { /* may need to run one last time */ @@ -1114,51 +1112,21 @@ out_unlock: chip_bus_sync_unlock(desc); } -#ifdef CONFIG_SMP /* - * Check whether we need to change the affinity of the interrupt thread. + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. */ -static void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action) { - cpumask_var_t mask; - bool valid = true; - - if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) - return; - - /* - * In case we are out of memory we set IRQTF_AFFINITY again and - * try again next time - */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - set_bit(IRQTF_AFFINITY, &action->thread_flags); - return; - } + irqreturn_t ret = action->thread_fn(action->irq, action->dev_id); - raw_spin_lock_irq(&desc->lock); - /* - * This code is triggered unconditionally. Check the affinity - * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. - */ - if (cpumask_available(desc->irq_common_data.affinity)) { - const struct cpumask *m; - - m = irq_data_get_effective_affinity_mask(&desc->irq_data); - cpumask_copy(mask, m); - } else { - valid = false; - } - raw_spin_unlock_irq(&desc->lock); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); - if (valid) - set_cpus_allowed_ptr(current, mask); - free_cpumask_var(mask); + irq_finalize_oneshot(desc, action); + return ret; } -#else -static inline void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } -#endif /* * Interrupts which are not explicitly requested as threaded @@ -1166,44 +1134,21 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static irqreturn_t -irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { irqreturn_t ret; local_bh_disable(); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_disable(); - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); + ret = irq_thread_fn(desc, action); if (!IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_enable(); local_bh_enable(); return ret; } -/* - * Interrupts explicitly requested as threaded interrupts want to be - * preemptible - many of them need to sleep and wait for slow busses to - * complete. - */ -static irqreturn_t irq_thread_fn(struct irq_desc *desc, - struct irqaction *action) -{ - irqreturn_t ret; - - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); - return ret; -} - -static void wake_threads_waitq(struct irq_desc *desc) +void wake_threads_waitq(struct irq_desc *desc) { if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); @@ -1243,9 +1188,33 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) if (WARN_ON_ONCE(!secondary)) return; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); __irq_wake_thread(desc, secondary); - raw_spin_unlock_irq(&desc->lock); +} + +/* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); } /* @@ -1259,7 +1228,12 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); - sched_set_fifo(current); + irq_thread_set_ready(desc, action); + + if (action->handler == irq_forced_secondary_handler) + sched_set_fifo_secondary(current); + else + sched_set_fifo(current); if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) @@ -1270,13 +1244,9 @@ static int irq_thread(void *data) init_task_work(&on_exit_work, irq_thread_dtor); task_work_add(current, &on_exit_work, TWA_NONE); - irq_thread_check_affinity(desc, action); - - while (!irq_wait_for_interrupt(action)) { + while (!irq_wait_for_interrupt(desc, action)) { irqreturn_t action_ret; - irq_thread_check_affinity(desc, action); - action_ret = handler_fn(desc, action); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); @@ -1290,26 +1260,24 @@ static int irq_thread(void *data) * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the * oneshot mask bit can be set. */ - task_work_cancel(current, irq_thread_dtor); + task_work_cancel_func(current, irq_thread_dtor); return 0; } /** - * irq_wake_thread - wake the irq thread for the action identified by dev_id - * @irq: Interrupt line - * @dev_id: Device identity for which the thread should be woken - * + * irq_wake_thread - wake the irq thread for the action identified by dev_id + * @irq: Interrupt line + * @dev_id: Device identity for which the thread should be woken */ void irq_wake_thread(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; - unsigned long flags; if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) return; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); for_each_action_of_desc(desc, action) { if (action->dev_id == dev_id) { if (action->thread) @@ -1317,7 +1285,6 @@ void irq_wake_thread(unsigned int irq, void *dev_id) break; } } - raw_spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL_GPL(irq_wake_thread); @@ -1432,19 +1399,39 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) * references an already freed task_struct. */ new->thread = get_task_struct(t); + /* - * Tell the thread to set its affinity. This is - * important for shared interrupt handlers as we do - * not invoke setup_affinity() for the secondary - * handlers as everything is already set up. Even for - * interrupts marked with IRQF_NO_BALANCE this is - * correct as we want the thread to move to the cpu(s) - * on which the requesting code placed the interrupt. + * The affinity can not be established yet, but it will be once the + * interrupt is enabled. Delay and defer the actual setting to the + * thread itself once it is ready to run. In the meantime, prevent + * it from ever being re-affined directly by cpuset or + * housekeeping. The proper way to do it is to re-affine the whole + * vector. */ - set_bit(IRQTF_AFFINITY, &new->thread_flags); + kthread_bind_mask(t, cpu_possible_mask); + + /* + * Ensure the thread adjusts the affinity once it reaches the + * thread function. + */ + new->thread_flags = BIT(IRQTF_AFFINITY); + return 0; } +static bool valid_percpu_irqaction(struct irqaction *old, struct irqaction *new) +{ + do { + if (cpumask_intersects(old->affinity, new->affinity) || + old->percpu_dev_id == new->percpu_dev_id) + return false; + + old = old->next; + } while (old); + + return true; +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -1465,6 +1452,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; unsigned long flags, thread_mask = 0; int ret, nested, shared = 0; + bool per_cpu_devid; if (!desc) return -EINVAL; @@ -1474,6 +1462,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!try_module_get(desc->owner)) return -ENODEV; + per_cpu_devid = irq_settings_is_per_cpu_devid(desc); + new->irq = irq; /* @@ -1581,13 +1571,20 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ unsigned int oldtype; - if (desc->istate & IRQS_NMI) { + if (irq_is_nmi(desc) && !per_cpu_devid) { pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", new->name, irq, desc->irq_data.chip->name); ret = -EINVAL; goto out_unlock; } + if (per_cpu_devid && !valid_percpu_irqaction(old, new)) { + pr_err("Overlapping affinities for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1600,8 +1597,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ @@ -1683,8 +1685,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1735,7 +1735,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (!(new->flags & IRQF_NO_AUTOEN) && irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); - } else { + } else if (!per_cpu_devid) { /* * Shared interrupts do not go well with disabling * auto enable. The sharing interrupt might request @@ -1780,14 +1780,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; @@ -1818,15 +1812,13 @@ out_thread: struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } if (new->secondary && new->secondary->thread) { struct task_struct *t = new->secondary->thread; new->secondary->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } out_mput: module_put(desc->owner); @@ -1912,7 +1904,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * supports it also make sure that there is no (not yet serviced) * interrupt in flight at the hardware level. */ - __synchronize_hardirq(desc, true); + __synchronize_irq(desc); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1937,12 +1929,9 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * the same bit to a newly requested action. */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); - if (action->secondary && action->secondary->thread) { - kthread_stop(action->secondary->thread); - put_task_struct(action->secondary->thread); - } + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); } /* Last action releases resources */ @@ -1956,9 +1945,8 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * There is no interrupt on the fly anymore. Deactivate it * completely. */ - raw_spin_lock_irqsave(&desc->lock, flags); - irq_domain_deactivate_irq(&desc->irq_data); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + irq_domain_deactivate_irq(&desc->irq_data); irq_release_resources(desc); chip_bus_sync_unlock(desc); @@ -1974,20 +1962,19 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) } /** - * free_irq - free an interrupt allocated with request_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free + * free_irq - free an interrupt allocated with request_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. + * Remove an interrupt handler. The handler is removed and if the interrupt + * line is no longer in use by any driver it is disabled. On a shared IRQ + * the caller must ensure the interrupt is disabled on the card it drives + * before calling this function. The function does not return until any + * executing interrupts for this IRQ have completed. * - * This function must not be called from interrupt context. + * This function must not be called from interrupt context. * - * Returns the devname argument passed to request_irq. + * Returns the devname argument passed to request_irq. */ const void *free_irq(unsigned int irq, void *dev_id) { @@ -2044,10 +2031,8 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) const void *free_nmi(unsigned int irq, void *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - const void *devname; - if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + if (!desc || WARN_ON(!irq_is_nmi(desc))) return NULL; if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) @@ -2057,53 +2042,46 @@ const void *free_nmi(unsigned int irq, void *dev_id) if (WARN_ON(desc->depth == 0)) disable_nmi_nosync(irq); - raw_spin_lock_irqsave(&desc->lock, flags); - + guard(raw_spinlock_irqsave)(&desc->lock); irq_nmi_teardown(desc); - devname = __cleanup_nmi(irq, desc); - - raw_spin_unlock_irqrestore(&desc->lock, flags); - - return devname; + return __cleanup_nmi(irq, desc); } /** - * request_threaded_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Primary handler for threaded interrupts. - * If handler is NULL and thread_fn != NULL - * the default primary handler is installed. - * @thread_fn: Function called from the irq handler thread - * If NULL, no irq thread is created - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * If you want to set up a threaded irq handler for your device - * then you need to supply @handler and @thread_fn. @handler is - * still called in hard interrupt context and has to check - * whether the interrupt originates from the device. If yes it - * needs to disable the interrupt on the device and return - * IRQ_WAKE_THREAD which will wake up the handler thread and run - * @thread_fn. This split handler design is necessary to support - * shared interrupts. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: + * request_threaded_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts. + * If handler is NULL and thread_fn != NULL + * the default primary handler is installed. + * @thread_fn: Function called from the irq handler thread + * If NULL, no irq thread is created + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. From the point this call is made your handler function + * may be invoked. Since your handler function must clear any interrupt the + * board raises, you must take care both to initialise your hardware and to + * set up the interrupt handler in the right order. + * + * If you want to set up a threaded irq handler for your device then you + * need to supply @handler and @thread_fn. @handler is still called in hard + * interrupt context and has to check whether the interrupt originates from + * the device. If yes it needs to disable the interrupt on the device and + * return IRQ_WAKE_THREAD which will wake up the handler thread and run + * @thread_fn. This split handler design is necessary to support shared + * interrupts. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id as this is + * required when freeing the interrupt. + * + * Flags: * * IRQF_SHARED Interrupt is shared * IRQF_TRIGGER_* Specify active edge(s) or level @@ -2201,21 +2179,20 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL(request_threaded_irq); /** - * request_any_context_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Threaded handler for threaded interrupts. - * @flags: Interrupt type flags - * @name: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. It selects either a - * hardirq or threaded handling method depending on the - * context. - * - * On failure, it returns a negative value. On success, - * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. + * request_any_context_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @flags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It selects either a hardirq or threaded handling + * method depending on the context. + * + * Returns: On failure, it returns a negative value. On success, it returns either + * IRQC_IS_HARDIRQ or IRQC_IS_NESTED. */ int request_any_context_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev_id) @@ -2242,37 +2219,35 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL_GPL(request_any_context_irq); /** - * request_nmi - allocate an interrupt line for NMI delivery - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * Threaded handler for threaded interrupts. - * @irqflags: Interrupt type flags - * @name: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. It sets up the IRQ line - * to be handled as an NMI. - * - * An interrupt line delivering NMIs cannot be shared and IRQ handling - * cannot be threaded. - * - * Interrupt lines requested for NMI delivering must produce per cpu - * interrupts and have auto enabling setting disabled. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If the interrupt line cannot be used to deliver NMIs, function - * will fail and return a negative value. + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the interrupt line + * and IRQ handling. It sets up the IRQ line to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * @dev_id must be globally unique. Normally the address of the device data + * structure is used as the cookie. Since the handler receives this value + * it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function will fail + * and return a negative value. */ int request_nmi(unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *name, void *dev_id) { struct irqaction *action; struct irq_desc *desc; - unsigned long flags; int retval; if (irq == IRQ_NOTCONNECTED) @@ -2314,21 +2289,17 @@ int request_nmi(unsigned int irq, irq_handler_t handler, if (retval) goto err_irq_setup; - raw_spin_lock_irqsave(&desc->lock, flags); - - /* Setup NMI state */ - desc->istate |= IRQS_NMI; - retval = irq_nmi_setup(desc); - if (retval) { - __cleanup_nmi(irq, desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return -EINVAL; + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + return -EINVAL; + } + return 0; } - raw_spin_unlock_irqrestore(&desc->lock, flags); - - return 0; - err_irq_setup: irq_chip_pm_put(&desc->irq_data); err_out: @@ -2339,35 +2310,25 @@ err_out: void enable_percpu_irq(unsigned int irq, unsigned int type) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); - - if (!desc) - return; - - /* - * If the trigger type is not specified by the caller, then - * use the default for this interrupt. - */ - type &= IRQ_TYPE_SENSE_MASK; - if (type == IRQ_TYPE_NONE) - type = irqd_get_trigger_type(&desc->irq_data); - - if (type != IRQ_TYPE_NONE) { - int ret; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + struct irq_desc *desc = scoped_irqdesc; - ret = __irq_set_trigger(desc, type); - - if (ret) { - WARN(1, "failed to set type for IRQ%d\n", irq); - goto out; + /* + * If the trigger type is not specified by the caller, then + * use the default for this interrupt. + */ + type &= IRQ_TYPE_SENSE_MASK; + if (type == IRQ_TYPE_NONE) + type = irqd_get_trigger_type(&desc->irq_data); + + if (type != IRQ_TYPE_NONE) { + if (__irq_set_trigger(desc, type)) { + WARN(1, "failed to set type for IRQ%d\n", irq); + return; + } } + irq_percpu_enable(desc, smp_processor_id()); } - - irq_percpu_enable(desc, cpu); -out: - irq_put_desc_unlock(desc, flags); } EXPORT_SYMBOL_GPL(enable_percpu_irq); @@ -2385,33 +2346,16 @@ void enable_percpu_nmi(unsigned int irq, unsigned int type) */ bool irq_percpu_is_enabled(unsigned int irq) { - unsigned int cpu = smp_processor_id(); - struct irq_desc *desc; - unsigned long flags; - bool is_enabled; - - desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); - if (!desc) - return false; - - is_enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); - irq_put_desc_unlock(desc, flags); - - return is_enabled; + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + return cpumask_test_cpu(smp_processor_id(), scoped_irqdesc->percpu_enabled); + return false; } EXPORT_SYMBOL_GPL(irq_percpu_is_enabled); void disable_percpu_irq(unsigned int irq) { - unsigned int cpu = smp_processor_id(); - unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_PERCPU); - - if (!desc) - return; - - irq_percpu_disable(desc, cpu); - irq_put_desc_unlock(desc, flags); + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) + irq_percpu_disable(scoped_irqdesc, smp_processor_id()); } EXPORT_SYMBOL_GPL(disable_percpu_irq); @@ -2426,72 +2370,60 @@ void disable_percpu_nmi(unsigned int irq) static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_id) { struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - unsigned long flags; + struct irqaction *action, **action_ptr; WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); if (!desc) return NULL; - raw_spin_lock_irqsave(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) { + action_ptr = &desc->action; + for (;;) { + action = *action_ptr; - action = desc->action; - if (!action || action->percpu_dev_id != dev_id) { - WARN(1, "Trying to free already-free IRQ %d\n", irq); - goto bad; - } + if (!action) { + WARN(1, "Trying to free already-free IRQ %d\n", irq); + return NULL; + } - if (!cpumask_empty(desc->percpu_enabled)) { - WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", - irq, cpumask_first(desc->percpu_enabled)); - goto bad; - } + if (action->percpu_dev_id == dev_id) + break; - /* Found it - now remove it from the list of entries: */ - desc->action = NULL; + action_ptr = &action->next; + } - desc->istate &= ~IRQS_NMI; + if (cpumask_intersects(desc->percpu_enabled, action->affinity)) { + WARN(1, "percpu IRQ %d still enabled on CPU%d!\n", irq, + cpumask_first_and(desc->percpu_enabled, action->affinity)); + return NULL; + } - raw_spin_unlock_irqrestore(&desc->lock, flags); + /* Found it - now remove it from the list of entries: */ + *action_ptr = action->next; - unregister_handler_proc(irq, action); + /* Demote from NMI if we killed the last action */ + if (!desc->action) + desc->istate &= ~IRQS_NMI; + } + unregister_handler_proc(irq, action); irq_chip_pm_put(&desc->irq_data); module_put(desc->owner); return action; - -bad: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return NULL; } /** - * remove_percpu_irq - free a per-cpu interrupt - * @irq: Interrupt line to free - * @act: irqaction for the interrupt + * free_percpu_irq - free an interrupt allocated with request_percpu_irq + * @irq: Interrupt line to free + * @dev_id: Device identity to free * - * Used to remove interrupts statically setup by the early boot process. - */ -void remove_percpu_irq(unsigned int irq, struct irqaction *act) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc && irq_settings_is_per_cpu_devid(desc)) - __free_percpu_irq(irq, act->percpu_dev_id); -} - -/** - * free_percpu_irq - free an interrupt allocated with request_percpu_irq - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove a percpu interrupt handler. The handler is removed, but - * the interrupt line is not disabled. This must be done on each - * CPU before calling this function. The function does not return - * until any executing interrupts for this IRQ have completed. + * Remove a percpu interrupt handler. The handler is removed, but the + * interrupt line is not disabled. This must be done on each CPU before + * calling this function. The function does not return until any executing + * interrupts for this IRQ have completed. * - * This function must not be called from interrupt context. + * This function must not be called from interrupt context. */ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) { @@ -2513,16 +2445,16 @@ void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) if (!desc || !irq_settings_is_per_cpu_devid(desc)) return; - if (WARN_ON(!(desc->istate & IRQS_NMI))) + if (WARN_ON(!irq_is_nmi(desc))) return; kfree(__free_percpu_irq(irq, dev_id)); } /** - * setup_percpu_irq - setup a per-cpu interrupt - * @irq: Interrupt line to setup - * @act: irqaction for the interrupt + * setup_percpu_irq - setup a per-cpu interrupt + * @irq: Interrupt line to setup + * @act: irqaction for the interrupt * * Used to statically setup per-cpu interrupts in the early boot process. */ @@ -2546,26 +2478,57 @@ int setup_percpu_irq(unsigned int irq, struct irqaction *act) return retval; } +static +struct irqaction *create_percpu_irqaction(irq_handler_t handler, unsigned long flags, + const char *devname, const cpumask_t *affinity, + void __percpu *dev_id) +{ + struct irqaction *action; + + if (!affinity) + affinity = cpu_possible_mask; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return NULL; + + action->handler = handler; + action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; + action->name = devname; + action->percpu_dev_id = dev_id; + action->affinity = affinity; + + /* + * We allow some form of sharing for non-overlapping affinity + * masks. Obviously, covering all CPUs prevents any sharing in + * the first place. + */ + if (!cpumask_equal(affinity, cpu_possible_mask)) + action->flags |= IRQF_SHARED; + + return action; +} + /** - * __request_percpu_irq - allocate a percpu interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * @flags: Interrupt type flags (IRQF_TIMER only) - * @devname: An ascii name for the claiming device - * @dev_id: A percpu cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt on the local CPU. If the interrupt is supposed to be - * enabled on other CPUs, it has to be done on each CPU using - * enable_percpu_irq(). - * - * Dev_id must be globally unique. It is a per-cpu variable, and - * the handler gets called with the interrupted CPU's instance of - * that variable. + * __request_percpu_irq - allocate a percpu interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @flags: Interrupt type flags (IRQF_TIMER only) + * @devname: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources, but doesn't enable the interrupt + * on any CPU, as all percpu-devid interrupts are flagged with IRQ_NOAUTOEN. + * It has to be done on each CPU using enable_percpu_irq(). + * + * @dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. */ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *devname, - void __percpu *dev_id) + const cpumask_t *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; @@ -2582,15 +2545,10 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, if (flags && flags != IRQF_TIMER) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + action = create_percpu_irqaction(handler, flags, devname, affinity, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = flags | IRQF_PERCPU | IRQF_NO_SUSPEND; - action->name = devname; - action->percpu_dev_id = dev_id; - retval = irq_chip_pm_get(&desc->irq_data); if (retval < 0) { kfree(action); @@ -2609,32 +2567,32 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL_GPL(__request_percpu_irq); /** - * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs. - * @name: An ascii name for the claiming device - * @dev_id: A percpu cookie passed back to the handler function - * - * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs - * have to be setup on each CPU by calling prepare_percpu_nmi() before - * being enabled on the same CPU by using enable_percpu_nmi(). - * - * Dev_id must be globally unique. It is a per-cpu variable, and - * the handler gets called with the interrupted CPU's instance of - * that variable. - * - * Interrupt lines requested for NMI delivering should have auto enabling - * setting disabled. - * - * If the interrupt line cannot be used to deliver NMIs, function - * will fail returning a negative value. + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @affinity: A cpumask describing the target CPUs for this interrupt + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling prepare_percpu_nmi() before + * being enabled on the same CPU by using enable_percpu_nmi(). + * + * @dev_id must be globally unique. It is a per-cpu variable, and the + * handler gets called with the interrupted CPU's instance of that + * variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. */ -int request_percpu_nmi(unsigned int irq, irq_handler_t handler, - const char *name, void __percpu *dev_id) +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name, + const struct cpumask *affinity, void __percpu *dev_id) { struct irqaction *action; struct irq_desc *desc; - unsigned long flags; int retval; if (!handler) @@ -2648,20 +2606,16 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, !irq_supports_nmi(desc)) return -EINVAL; - /* The line cannot already be NMI */ - if (desc->istate & IRQS_NMI) + /* The line cannot be NMI already if the new request covers all CPUs */ + if (irq_is_nmi(desc) && + (!affinity || cpumask_equal(affinity, cpu_possible_mask))) return -EINVAL; - action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + action = create_percpu_irqaction(handler, IRQF_NO_THREAD | IRQF_NOBALANCING, + name, affinity, dev_id); if (!action) return -ENOMEM; - action->handler = handler; - action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD - | IRQF_NOBALANCING; - action->name = name; - action->percpu_dev_id = dev_id; - retval = irq_chip_pm_get(&desc->irq_data); if (retval < 0) goto err_out; @@ -2670,10 +2624,8 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, if (retval) goto err_irq_setup; - raw_spin_lock_irqsave(&desc->lock, flags); - desc->istate |= IRQS_NMI; - raw_spin_unlock_irqrestore(&desc->lock, flags); - + scoped_guard(raw_spinlock_irqsave, &desc->lock) + desc->istate |= IRQS_NMI; return 0; err_irq_setup: @@ -2685,83 +2637,58 @@ err_out: } /** - * prepare_percpu_nmi - performs CPU local setup for NMI delivery - * @irq: Interrupt line to prepare for NMI delivery + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery * - * This call prepares an interrupt line to deliver NMI on the current CPU, - * before that interrupt line gets enabled with enable_percpu_nmi(). + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). * - * As a CPU local operation, this should be called from non-preemptible - * context. + * As a CPU local operation, this should be called from non-preemptible + * context. * - * If the interrupt line cannot be used to deliver NMIs, function - * will fail returning a negative value. + * If the interrupt line cannot be used to deliver NMIs, function will fail + * returning a negative value. */ int prepare_percpu_nmi(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc; - int ret = 0; + int ret = -EINVAL; WARN_ON(preemptible()); - desc = irq_get_desc_lock(irq, &flags, - IRQ_GET_DESC_CHECK_PERCPU); - if (!desc) - return -EINVAL; - - if (WARN(!(desc->istate & IRQS_NMI), - KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", - irq)) { - ret = -EINVAL; - goto out; - } + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN(!irq_is_nmi(scoped_irqdesc), + "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", irq)) + return -EINVAL; - ret = irq_nmi_setup(desc); - if (ret) { - pr_err("Failed to setup NMI delivery: irq %u\n", irq); - goto out; + ret = irq_nmi_setup(scoped_irqdesc); + if (ret) + pr_err("Failed to setup NMI delivery: irq %u\n", irq); } - -out: - irq_put_desc_unlock(desc, flags); return ret; } /** - * teardown_percpu_nmi - undoes NMI setup of IRQ line - * @irq: Interrupt line from which CPU local NMI configuration should be - * removed - * - * This call undoes the setup done by prepare_percpu_nmi(). + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be removed * - * IRQ line should not be enabled for the current CPU. + * This call undoes the setup done by prepare_percpu_nmi(). * - * As a CPU local operation, this should be called from non-preemptible - * context. + * IRQ line should not be enabled for the current CPU. + * As a CPU local operation, this should be called from non-preemptible + * context. */ void teardown_percpu_nmi(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc; - WARN_ON(preemptible()); - desc = irq_get_desc_lock(irq, &flags, - IRQ_GET_DESC_CHECK_PERCPU); - if (!desc) - return; - - if (WARN_ON(!(desc->istate & IRQS_NMI))) - goto out; - - irq_nmi_teardown(desc); -out: - irq_put_desc_unlock(desc, flags); + scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_PERCPU) { + if (WARN_ON(!irq_is_nmi(scoped_irqdesc))) + return; + irq_nmi_teardown(scoped_irqdesc); + } } -int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, - bool *state) +static int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct irq_chip *chip; int err = -EINVAL; @@ -2785,87 +2712,62 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, } /** - * irq_get_irqchip_state - returns the irqchip state of a interrupt. - * @irq: Interrupt line that is forwarded to a VM - * @which: One of IRQCHIP_STATE_* the caller wants to know about - * @state: a pointer to a boolean where the state is to be stored + * irq_get_irqchip_state - returns the irqchip state of a interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: One of IRQCHIP_STATE_* the caller wants to know about + * @state: a pointer to a boolean where the state is to be stored * - * This call snapshots the internal irqchip state of an - * interrupt, returning into @state the bit corresponding to - * stage @which + * This call snapshots the internal irqchip state of an interrupt, + * returning into @state the bit corresponding to stage @which * - * This function should be called with preemption disabled if the - * interrupt controller has per-cpu registers. + * This function should be called with preemption disabled if the interrupt + * controller has per-cpu registers. */ -int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, - bool *state) +int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool *state) { - struct irq_desc *desc; - struct irq_data *data; - unsigned long flags; - int err = -EINVAL; - - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return err; + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); - data = irq_desc_get_irq_data(desc); - - err = __irq_get_irqchip_state(data, which, state); - - irq_put_desc_busunlock(desc, flags); - return err; + return __irq_get_irqchip_state(data, which, state); + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_get_irqchip_state); /** - * irq_set_irqchip_state - set the state of a forwarded interrupt. - * @irq: Interrupt line that is forwarded to a VM - * @which: State to be restored (one of IRQCHIP_STATE_*) - * @val: Value corresponding to @which + * irq_set_irqchip_state - set the state of a forwarded interrupt. + * @irq: Interrupt line that is forwarded to a VM + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which * - * This call sets the internal irqchip state of an interrupt, - * depending on the value of @which. + * This call sets the internal irqchip state of an interrupt, depending on + * the value of @which. * - * This function should be called with migration disabled if the - * interrupt controller has per-cpu registers. + * This function should be called with migration disabled if the interrupt + * controller has per-cpu registers. */ -int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, - bool val) +int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool val) { - struct irq_desc *desc; - struct irq_data *data; - struct irq_chip *chip; - unsigned long flags; - int err = -EINVAL; + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_data *data = irq_desc_get_irq_data(scoped_irqdesc); + struct irq_chip *chip; - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return err; + do { + chip = irq_data_get_irq_chip(data); - data = irq_desc_get_irq_data(desc); + if (WARN_ON_ONCE(!chip)) + return -ENODEV; - do { - chip = irq_data_get_irq_chip(data); - if (WARN_ON_ONCE(!chip)) { - err = -ENODEV; - goto out_unlock; - } - if (chip->irq_set_irqchip_state) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); + if (chip->irq_set_irqchip_state) + break; - if (data) - err = chip->irq_set_irqchip_state(data, which, val); + data = irqd_get_parent_data(data); + } while (data); -out_unlock: - irq_put_desc_busunlock(desc, flags); - return err; + if (data) + return chip->irq_set_irqchip_state(data, which, val); + } + return -EINVAL; } EXPORT_SYMBOL_GPL(irq_set_irqchip_state); diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index bbfb26489aa1..8f222d1cccec 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -8,8 +8,6 @@ #include <linux/cpu.h> #include <linux/irq.h> -#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS)) - struct cpumap { unsigned int available; unsigned int allocated; @@ -17,8 +15,8 @@ struct cpumap { unsigned int managed_allocated; bool initialized; bool online; - unsigned long alloc_map[IRQ_MATRIX_SIZE]; - unsigned long managed_map[IRQ_MATRIX_SIZE]; + unsigned long *managed_map; + unsigned long alloc_map[]; }; struct irq_matrix { @@ -32,8 +30,8 @@ struct irq_matrix { unsigned int total_allocated; unsigned int online_maps; struct cpumap __percpu *maps; - unsigned long scratch_map[IRQ_MATRIX_SIZE]; - unsigned long system_map[IRQ_MATRIX_SIZE]; + unsigned long *system_map; + unsigned long scratch_map[]; }; #define CREATE_TRACE_POINTS @@ -50,24 +48,32 @@ __init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, unsigned int alloc_start, unsigned int alloc_end) { + unsigned int cpu, matrix_size = BITS_TO_LONGS(matrix_bits); struct irq_matrix *m; - if (matrix_bits > IRQ_MATRIX_BITS) - return NULL; - - m = kzalloc(sizeof(*m), GFP_KERNEL); + m = kzalloc(struct_size(m, scratch_map, matrix_size * 2), GFP_KERNEL); if (!m) return NULL; + m->system_map = &m->scratch_map[matrix_size]; + m->matrix_bits = matrix_bits; m->alloc_start = alloc_start; m->alloc_end = alloc_end; m->alloc_size = alloc_end - alloc_start; - m->maps = alloc_percpu(*m->maps); + m->maps = __alloc_percpu(struct_size(m->maps, alloc_map, matrix_size * 2), + __alignof__(*m->maps)); if (!m->maps) { kfree(m); return NULL; } + + for_each_possible_cpu(cpu) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + cm->managed_map = &cm->alloc_map[matrix_size]; + } + return m; } @@ -286,7 +292,7 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, unsigned int *mapped_cpu) { - unsigned int bit, cpu, end = m->alloc_end; + unsigned int bit, cpu, end; struct cpumap *cm; if (cpumask_empty(msk)) @@ -466,16 +472,16 @@ unsigned int irq_matrix_reserved(struct irq_matrix *m) } /** - * irq_matrix_allocated - Get the number of allocated irqs on the local cpu + * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU * @m: Pointer to the matrix to search * - * This returns number of allocated irqs + * This returns number of allocated non-managed interrupts. */ unsigned int irq_matrix_allocated(struct irq_matrix *m) { struct cpumap *cm = this_cpu_ptr(m->maps); - return cm->allocated; + return cm->allocated - cm->managed_allocated; } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 61ca924ef4b4..f2b2929986ff 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -26,7 +26,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) * The outgoing CPU might be the last online target in a pending * interrupt move. If that's the case clear the pending move bit. */ - if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(desc->pending_mask, cpu_online_mask)) { irqd_clr_move_pending(data); return false; } @@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) return true; } +void irq_force_complete_move(struct irq_desc *desc) +{ + for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = irqd_get_parent_data(d)) { + if (d->chip && d->chip->irq_force_complete_move) { + d->chip->irq_force_complete_move(d); + return; + } + } +} + void irq_move_masked_irq(struct irq_data *idata) { struct irq_desc *desc = irq_data_to_desc(idata); @@ -74,7 +84,7 @@ void irq_move_masked_irq(struct irq_data *idata) * For correct operation this depends on the caller * masking the irqs. */ - if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) { + if (cpumask_intersects(desc->pending_mask, cpu_online_mask)) { int ret; ret = irq_do_set_affinity(data, desc->pending_mask, false); @@ -117,3 +127,13 @@ void __irq_move_irq(struct irq_data *idata) if (!masked) idata->chip->irq_unmask(idata); } + +bool irq_can_move_in_process_context(struct irq_data *data) +{ + /* + * Get the top level irq_data in the hierarchy, which is optimized + * away when CONFIG_IRQ_DOMAIN_HIERARCHY is disabled. + */ + data = irq_desc_get_irq_data(irq_data_to_desc(data)); + return irq_can_move_pcntxt(data); +} diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 2bdfce5edafd..68886881fe10 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -8,18 +8,59 @@ * This file contains common code to support Message Signaled Interrupts for * PCI compatible and non PCI compatible devices. */ -#include <linux/types.h> #include <linux/device.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/msi.h> +#include <linux/mutex.h> +#include <linux/pci.h> #include <linux/slab.h> +#include <linux/seq_file.h> #include <linux/sysfs.h> -#include <linux/pci.h> +#include <linux/types.h> +#include <linux/xarray.h> #include "internals.h" +/** + * struct msi_device_data - MSI per device data + * @properties: MSI properties which are interesting to drivers + * @mutex: Mutex protecting the MSI descriptor store + * @__domains: Internal data for per device MSI domains + * @__iter_idx: Index to search the next entry for iterators + */ +struct msi_device_data { + unsigned long properties; + struct mutex mutex; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; + unsigned long __iter_idx; +}; + +/** + * struct msi_ctrl - MSI internal management control structure + * @domid: ID of the domain on which management operations should be done + * @first: First (hardware) slot index to operate on + * @last: Last (hardware) slot index to operate on + * @nirqs: The number of Linux interrupts to allocate. Can be larger + * than the range due to PCI/multi-MSI. + */ +struct msi_ctrl { + unsigned int domid; + unsigned int first; + unsigned int last; + unsigned int nirqs; +}; + +/* Invalid Xarray index which is outside of any searchable range */ +#define MSI_XA_MAX_INDEX (ULONG_MAX - 1) +/* The maximum domain size */ +#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) + +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid); static inline int msi_sysfs_create_group(struct device *dev); +static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg); /** * msi_alloc_desc - Allocate an initialized msi_desc @@ -33,7 +74,7 @@ static inline int msi_sysfs_create_group(struct device *dev); * Return: pointer to allocated &msi_desc on success or %NULL on failure */ static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity) + const struct irq_affinity_desc *affinity) { struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); @@ -43,7 +84,7 @@ static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, desc->dev = dev; desc->nvec_used = nvec; if (affinity) { - desc->affinity = kmemdup(affinity, nvec * sizeof(*desc->affinity), GFP_KERNEL); + desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL); if (!desc->affinity) { kfree(desc); return NULL; @@ -58,25 +99,56 @@ static void msi_free_desc(struct msi_desc *desc) kfree(desc); } -static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index) +static int msi_insert_desc(struct device *dev, struct msi_desc *desc, + unsigned int domid, unsigned int index) { + struct msi_device_data *md = dev->msi.data; + struct xarray *xa = &md->__domains[domid].store; + unsigned int hwsize; int ret; - desc->msi_index = index; - ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); - if (ret) - msi_free_desc(desc); + hwsize = msi_domain_get_hwsize(dev, domid); + + if (index == MSI_ANY_INDEX) { + struct xa_limit limit = { .min = 0, .max = hwsize - 1 }; + unsigned int index; + + /* Let the xarray allocate a free index within the limit */ + ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL); + if (ret) + goto fail; + + desc->msi_index = index; + return 0; + } else { + if (index >= hwsize) { + ret = -ERANGE; + goto fail; + } + + desc->msi_index = index; + ret = xa_insert(xa, index, desc, GFP_KERNEL); + if (ret) + goto fail; + return 0; + } +fail: + msi_free_desc(desc); return ret; } /** - * msi_add_msi_desc - Allocate and initialize a MSI descriptor + * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and + * insert it at @init_desc->msi_index + * * @dev: Pointer to the device for which the descriptor is allocated + * @domid: The id of the interrupt domain to which the desriptor is added * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor * * Return: 0 on success or an appropriate failure code. */ -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc) +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc) { struct msi_desc *desc; @@ -88,40 +160,8 @@ int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc) /* Copy type specific data to the new descriptor. */ desc->pci = init_desc->pci; - return msi_insert_desc(dev->msi.data, desc, init_desc->msi_index); -} - -/** - * msi_add_simple_msi_descs - Allocate and initialize MSI descriptors - * @dev: Pointer to the device for which the descriptors are allocated - * @index: Index for the first MSI descriptor - * @ndesc: Number of descriptors to allocate - * - * Return: 0 on success or an appropriate failure code. - */ -static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsigned int ndesc) -{ - unsigned int idx, last = index + ndesc - 1; - struct msi_desc *desc; - int ret; - - lockdep_assert_held(&dev->msi.data->mutex); - for (idx = index; idx <= last; idx++) { - desc = msi_alloc_desc(dev, 1, NULL); - if (!desc) - goto fail_mem; - ret = msi_insert_desc(dev->msi.data, desc, idx); - if (ret) - goto fail; - } - return 0; - -fail_mem: - ret = -ENOMEM; -fail: - msi_free_msi_descs_range(dev, MSI_DESC_NOTASSOCIATED, index, last); - return ret; + return msi_insert_desc(dev, desc, domid, init_desc->msi_index); } static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) @@ -138,28 +178,97 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) return false; } +static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) +{ + unsigned int hwsize; + + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + (dev->msi.domain && + !dev->msi.data->__domains[ctrl->domid].domain))) + return false; + + hwsize = msi_domain_get_hwsize(dev, ctrl->domid); + if (WARN_ON_ONCE(ctrl->first > ctrl->last || + ctrl->first >= hwsize || + ctrl->last >= hwsize)) + return false; + return true; +} + +static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + struct xarray *xa; + unsigned long idx; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl->domid].store; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + xa_erase(xa, idx); + + /* Leak the descriptor when it is still referenced */ + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED))) + continue; + msi_free_desc(desc); + } +} + +/** + * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain + * @dev: Device for which to free the descriptors + * @domid: Id of the domain to operate on + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + + msi_domain_free_descs(dev, &ctrl); +} + /** - * msi_free_msi_descs_range - Free MSI descriptors of a device - * @dev: Device to free the descriptors - * @filter: Descriptor state filter - * @first_index: Index to start freeing from - * @last_index: Last index to be freed + * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors + * @dev: Pointer to the device for which the descriptors are allocated + * @ctrl: Allocation control struct + * + * Return: 0 on success or an appropriate failure code. */ -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index) +static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl) { - struct xarray *xa = &dev->msi.data->__store; struct msi_desc *desc; - unsigned long idx; + unsigned int idx; + int ret; lockdep_assert_held(&dev->msi.data->mutex); - xa_for_each_range(xa, idx, desc, first_index, last_index) { - if (msi_desc_match(desc, filter)) { - xa_erase(xa, idx); - msi_free_desc(desc); - } + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + for (idx = ctrl->first; idx <= ctrl->last; idx++) { + desc = msi_alloc_desc(dev, 1, NULL); + if (!desc) + goto fail_mem; + ret = msi_insert_desc(dev, desc, ctrl->domid, idx); + if (ret) + goto fail; } + return 0; + +fail_mem: + ret = -ENOMEM; +fail: + msi_domain_free_descs(dev, ctrl); + return ret; } void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) @@ -178,9 +287,13 @@ EXPORT_SYMBOL_GPL(get_cached_msi_msg); static void msi_device_data_release(struct device *dev, void *res) { struct msi_device_data *md = res; + int i; - WARN_ON_ONCE(!xa_empty(&md->__store)); - xa_destroy(&md->__store); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + msi_remove_device_irq_domain(dev, i); + WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); + xa_destroy(&md->__domains[i].store); + } dev->msi.data = NULL; } @@ -197,7 +310,7 @@ static void msi_device_data_release(struct device *dev, void *res) int msi_setup_device_data(struct device *dev) { struct msi_device_data *md; - int ret; + int ret, i; if (dev->msi.data) return 0; @@ -212,7 +325,18 @@ int msi_setup_device_data(struct device *dev) return ret; } - xa_init(&md->__store); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) + xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC); + + /* + * If @dev::msi::domain is set and is a global MSI domain, copy the + * pointer into the domain array so all code can operate on domain + * ids. The NULL pointer check is required to keep the legacy + * architecture specific PCI/MSI support working. + */ + if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain)) + md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain; + mutex_init(&md->mutex); dev->msi.data = md; devres_add(dev, md); @@ -220,42 +344,49 @@ int msi_setup_device_data(struct device *dev) } /** - * msi_lock_descs - Lock the MSI descriptor storage of a device + * __msi_lock_descs - Lock the MSI descriptor storage of a device * @dev: Device to operate on + * + * Internal function for guard(msi_descs_lock). Don't use in code. */ -void msi_lock_descs(struct device *dev) +void __msi_lock_descs(struct device *dev) { mutex_lock(&dev->msi.data->mutex); } -EXPORT_SYMBOL_GPL(msi_lock_descs); +EXPORT_SYMBOL_GPL(__msi_lock_descs); /** - * msi_unlock_descs - Unlock the MSI descriptor storage of a device + * __msi_unlock_descs - Unlock the MSI descriptor storage of a device * @dev: Device to operate on + * + * Internal function for guard(msi_descs_lock). Don't use in code. */ -void msi_unlock_descs(struct device *dev) +void __msi_unlock_descs(struct device *dev) { - /* Invalidate the index wich was cached by the iterator */ - dev->msi.data->__iter_idx = MSI_MAX_INDEX; + /* Invalidate the index which was cached by the iterator */ + dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; mutex_unlock(&dev->msi.data->mutex); } -EXPORT_SYMBOL_GPL(msi_unlock_descs); +EXPORT_SYMBOL_GPL(__msi_unlock_descs); -static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter) +static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, + enum msi_desc_filter filter) { + struct xarray *xa = &md->__domains[domid].store; struct msi_desc *desc; - xa_for_each_start(&md->__store, md->__iter_idx, desc, md->__iter_idx) { + xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { if (msi_desc_match(desc, filter)) return desc; } - md->__iter_idx = MSI_MAX_INDEX; + md->__iter_idx = MSI_XA_MAX_INDEX; return NULL; } /** - * msi_first_desc - Get the first MSI descriptor of a device + * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. * @filter: Descriptor state filter * * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() @@ -264,23 +395,26 @@ static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_ * Return: Pointer to the first MSI descriptor matching the search * criteria, NULL if none found. */ -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter) +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); md->__iter_idx = 0; - return msi_find_desc(md, filter); + return msi_find_desc(md, domid, filter); } -EXPORT_SYMBOL_GPL(msi_first_desc); +EXPORT_SYMBOL_GPL(msi_domain_first_desc); /** * msi_next_desc - Get the next MSI descriptor of a device * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. + * @filter: Descriptor state filter * * The first invocation of msi_next_desc() has to be preceeded by a * successful invocation of __msi_first_desc(). Consecutive invocations are @@ -290,11 +424,12 @@ EXPORT_SYMBOL_GPL(msi_first_desc); * Return: Pointer to the next MSI descriptor matching the search * criteria, NULL if none found. */ -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); @@ -303,47 +438,51 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) return NULL; md->__iter_idx++; - return msi_find_desc(md, filter); + return msi_find_desc(md, domid, filter); } EXPORT_SYMBOL_GPL(msi_next_desc); /** - * msi_get_virq - Return Linux interrupt number of a MSI interrupt + * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain * @dev: Device to operate on + * @domid: Domain ID of the interrupt domain associated to the device * @index: MSI interrupt index to look for (0-based) * * Return: The Linux interrupt number on success (> 0), 0 if not found */ -unsigned int msi_get_virq(struct device *dev, unsigned int index) +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index) { struct msi_desc *desc; - unsigned int ret = 0; - bool pcimsi; + bool pcimsi = false; + struct xarray *xa; if (!dev->msi.data) return 0; - pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false; + if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return 0; + + /* This check is only valid for the PCI default MSI domain */ + if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN) + pcimsi = to_pci_dev(dev)->msi_enabled; - msi_lock_descs(dev); - desc = xa_load(&dev->msi.data->__store, pcimsi ? 0 : index); + guard(msi_descs_lock)(dev); + xa = &dev->msi.data->__domains[domid].store; + desc = xa_load(xa, pcimsi ? 0 : index); if (desc && desc->irq) { /* * PCI-MSI has only one descriptor for multiple interrupts. * PCI-MSIX and platform MSI use a descriptor per * interrupt. */ - if (pcimsi) { - if (index < desc->nvec_used) - ret = desc->irq + index; - } else { - ret = desc->irq; - } + if (!pcimsi) + return desc->irq; + if (index < desc->nvec_used) + return desc->irq + index; } - msi_unlock_descs(dev); - return ret; + return 0; } -EXPORT_SYMBOL_GPL(msi_get_virq); +EXPORT_SYMBOL_GPL(msi_domain_get_virq); #ifdef CONFIG_SYSFS static struct attribute *msi_dev_attrs[] = { @@ -420,7 +559,7 @@ fail: return ret; } -#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS +#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN) /** * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device * @dev: The device (PCI, platform etc) which will get sysfs entries @@ -452,14 +591,46 @@ void msi_device_destroy_sysfs(struct device *dev) msi_for_each_desc(desc, dev, MSI_DESC_ALL) msi_sysfs_remove_desc(dev, desc); } -#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */ +#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */ #else /* CONFIG_SYSFS */ static inline int msi_sysfs_create_group(struct device *dev) { return 0; } static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; } static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } #endif /* !CONFIG_SYSFS */ -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid) +{ + struct irq_domain *domain; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + domain = dev->msi.data->__domains[domid].domain; + if (!domain) + return NULL; + + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) + return NULL; + + return domain; +} + +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + domain = msi_get_device_domain(dev, domid); + if (domain) { + info = domain->host_data; + return info->hwsize; + } + /* No domain, default to MSI_XA_DOMAIN_SIZE */ + return MSI_XA_DOMAIN_SIZE; +} + static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) { @@ -535,7 +706,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq = ops->get_hwirq(info, arg); int i, ret; - if (irq_find_mapping(domain, hwirq) > 0) + if (irq_resolve_mapping(domain, hwirq)) return -EEXIST; if (domain->parent) { @@ -548,7 +719,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg); if (ret < 0) { if (ops->msi_free) { - for (i--; i > 0; i--) + for (i--; i >= 0; i--) ops->msi_free(domain, info, virq + i); } irq_domain_free_irqs_top(domain, virq, nr_irqs); @@ -572,11 +743,44 @@ static void msi_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } +static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type) +{ + struct msi_domain_info *info = domain->host_data; + + /* + * This will catch allocations through the regular irqdomain path except + * for MSI domains which really support this, e.g. MBIGEN. + */ + if (!info->ops->msi_translate) + return -ENOTSUPP; + return info->ops->msi_translate(domain, fwspec, hwirq, type); +} + +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS +static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) +{ + struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL; + + if (!desc) + return; + + seq_printf(m, "\n%*saddress_hi: 0x%08x", ind + 1, "", desc->msg.address_hi); + seq_printf(m, "\n%*saddress_lo: 0x%08x", ind + 1, "", desc->msg.address_lo); + seq_printf(m, "\n%*smsg_data: 0x%08x\n", ind + 1, "", desc->msg.data); +} +#endif + static const struct irq_domain_ops msi_domain_ops = { .alloc = msi_domain_alloc, .free = msi_domain_free, .activate = msi_domain_activate, .deactivate = msi_domain_deactivate, + .translate = msi_domain_translate, +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + .debug_show = msi_domain_debug_show, +#endif }; static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info, @@ -592,6 +796,10 @@ static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev, return 0; } +static void msi_domain_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) +{ +} + static void msi_domain_ops_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { @@ -613,21 +821,12 @@ static int msi_domain_ops_init(struct irq_domain *domain, return 0; } -static int msi_domain_ops_check(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev) -{ - return 0; -} - static struct msi_domain_ops msi_domain_ops_default = { .get_hwirq = msi_domain_ops_get_hwirq, .msi_init = msi_domain_ops_init, - .msi_check = msi_domain_ops_check, .msi_prepare = msi_domain_ops_prepare, + .msi_teardown = msi_domain_ops_teardown, .set_desc = msi_domain_ops_set_desc, - .domain_alloc_irqs = __msi_domain_alloc_irqs, - .domain_free_irqs = __msi_domain_free_irqs, }; static void msi_domain_update_dom_ops(struct msi_domain_info *info) @@ -639,11 +838,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) return; } - if (ops->domain_alloc_irqs == NULL) - ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs; - if (ops->domain_free_irqs == NULL) - ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs; - if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS)) return; @@ -651,10 +845,10 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) ops->get_hwirq = msi_domain_ops_default.get_hwirq; if (ops->msi_init == NULL) ops->msi_init = msi_domain_ops_default.msi_init; - if (ops->msi_check == NULL) - ops->msi_check = msi_domain_ops_default.msi_check; if (ops->msi_prepare == NULL) ops->msi_prepare = msi_domain_ops_default.msi_prepare; + if (ops->msi_teardown == NULL) + ops->msi_teardown = msi_domain_ops_default.msi_teardown; if (ops->set_desc == NULL) ops->set_desc = msi_domain_ops_default.set_desc; } @@ -664,10 +858,45 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info) struct irq_chip *chip = info->chip; BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); - if (!chip->irq_set_affinity) + if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY)) chip->irq_set_affinity = msi_domain_set_affinity; } +static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + unsigned int flags, + struct irq_domain *parent) +{ + struct irq_domain *domain; + + if (info->hwsize > MSI_XA_DOMAIN_SIZE) + return NULL; + + /* + * Hardware size 0 is valid for backwards compatibility and for + * domains which are not backed by a hardware table. Grant the + * maximum index space. + */ + if (!info->hwsize) + info->hwsize = MSI_XA_DOMAIN_SIZE; + + msi_domain_update_dom_ops(info); + if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) + msi_domain_update_chip_ops(info); + + domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0, + fwnode, &msi_domain_ops, info); + + if (domain) { + irq_domain_update_bus_token(domain, info->bus_token); + domain->dev = info->dev; + if (info->flags & MSI_FLAG_PARENT_PM_DEV) + domain->pm_dev = parent->pm_dev; + } + + return domain; +} + /** * msi_create_irq_domain - Create an MSI interrupt domain * @fwnode: Optional fwnode of the interrupt controller @@ -680,69 +909,260 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent) { - struct irq_domain *domain; + return __msi_create_irq_domain(fwnode, info, 0, parent); +} - msi_domain_update_dom_ops(info); - if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) - msi_domain_update_chip_ops(info); +/** + * msi_create_parent_irq_domain - Create an MSI-parent interrupt domain + * @info: MSI irqdomain creation info + * @msi_parent_ops: MSI parent callbacks and configuration + * + * Return: pointer to the created &struct irq_domain or %NULL on failure + */ +struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info, + const struct msi_parent_ops *msi_parent_ops) +{ + struct irq_domain *d; - domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, - fwnode, &msi_domain_ops, info); + info->hwirq_max = max(info->hwirq_max, info->size); + info->size = info->hwirq_max; + info->domain_flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + info->bus_token = msi_parent_ops->bus_select_token; - if (domain && !domain->name && info->chip) - domain->name = info->chip->name; + d = irq_domain_instantiate(info); + if (IS_ERR(d)) + return NULL; - return domain; + d->msi_parent_ops = msi_parent_ops; + return d; } +EXPORT_SYMBOL_GPL(msi_create_parent_irq_domain); -int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) +/** + * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down + * in the domain hierarchy + * @dev: The device for which the domain should be created + * @domain: The domain in the hierarchy this op is being called on + * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to + * be created + * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE + * domain to be created + * + * Return: true on success, false otherwise + * + * This is the most complex problem of per device MSI domains and the + * underlying interrupt domain hierarchy: + * + * The device domain to be initialized requests the broadest feature set + * possible and the underlying domain hierarchy puts restrictions on it. + * + * That's trivial for a simple parent->child relationship, but it gets + * interesting with an intermediate domain: root->parent->child. The + * intermediate 'parent' can expand the capabilities which the 'root' + * domain is providing. So that creates a classic hen and egg problem: + * Which entity is doing the restrictions/expansions? + * + * One solution is to let the root domain handle the initialization that's + * why there is the @domain and the @msi_parent_domain pointer. + */ +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - int ret; + struct irq_domain *parent = domain->parent; + + if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops || + !parent->msi_parent_ops->init_dev_msi_info)) + return false; - ret = ops->msi_check(domain, info, dev); - if (ret == 0) - ret = ops->msi_prepare(domain, dev, nvec, arg); + return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain, + msi_child_info); +} - return ret; +/** + * msi_create_device_irq_domain - Create a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + * @template: MSI domain info bundle used as template + * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited) + * @domain_data: Optional pointer to domain specific data which is set in + * msi_domain_info::data + * @chip_data: Optional pointer to chip specific data which is set in + * msi_domain_info::chip_data + * + * Return: True on success, false otherwise + * + * There is no firmware node required for this interface because the per + * device domains are software constructs which are actually closer to the + * hardware reality than any firmware can describe them. + * + * The domain name and the irq chip name for a MSI device domain are + * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)" + * + * $PREFIX: Optional prefix provided by the underlying MSI parent domain + * via msi_parent_ops::prefix. If that pointer is NULL the prefix + * is empty. + * $CHIPNAME: The name of the irq_chip in @template + * $DEVNAME: The name of the device + * + * This results in understandable chip names and hardware interrupt numbers + * in e.g. /proc/interrupts + * + * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix + * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-' + * + * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect + * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device + * IR-PCI-MSIX-0000:3d:00.0 2-edge + * + * On IMS domains the hardware interrupt number is either a table entry + * index or a purely software managed index but it is guaranteed to be + * unique. + * + * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All + * subsequent operations on the domain depend on the domain id. + * + * The domain is automatically freed when the device is removed via devres + * in the context of @dev::msi::data freeing, but it can also be + * independently removed via @msi_remove_device_irq_domain(). + */ +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data) +{ + struct irq_domain *domain, *parent = dev->msi.domain; + const struct msi_parent_ops *pops; + struct fwnode_handle *fwnode; + + if (!irq_domain_is_msi_parent(parent)) + return false; + + if (domid >= MSI_MAX_DEVICE_IRQDOMAINS) + return false; + + struct msi_domain_template *bundle __free(kfree) = + kmemdup(template, sizeof(*bundle), GFP_KERNEL); + if (!bundle) + return false; + + bundle->info.hwsize = hwsize; + bundle->info.chip = &bundle->chip; + bundle->info.ops = &bundle->ops; + bundle->info.data = domain_data; + bundle->info.chip_data = chip_data; + bundle->info.alloc_data = &bundle->alloc_info; + bundle->info.dev = dev; + + pops = parent->msi_parent_ops; + snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s", + pops->prefix ? : "", bundle->chip.name, dev_name(dev)); + bundle->chip.name = bundle->name; + + /* + * Using the device firmware node is required for wire to MSI + * device domains so that the existing firmware results in a domain + * match. + * All other device domains like PCI/MSI use the named firmware + * node as they are not guaranteed to have a fwnode. They are never + * looked up and always handled in the context of the device. + */ + struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL; + + if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)) + fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name); + else + fwnode = dev->fwnode; + + if (!fwnode) + return false; + + if (msi_setup_device_data(dev)) + return false; + + guard(msi_descs_lock)(dev); + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid))) + return false; + + if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info)) + return false; + + domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent); + if (!domain) + return false; + + dev->msi.data->__domains[domid].domain = domain; + + if (msi_domain_prepare_irqs(domain, dev, hwsize, &bundle->alloc_info)) { + dev->msi.data->__domains[domid].domain = NULL; + irq_domain_remove(domain); + return false; + } + + /* @bundle and @fwnode_alloced are now in use. Prevent cleanup */ + retain_and_null_ptr(bundle); + retain_and_null_ptr(fwnode_alloced); + return true; } -int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, - int virq_base, int nvec, msi_alloc_info_t *arg) +/** + * msi_remove_device_irq_domain - Free a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + */ +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - struct msi_desc *desc; - int ret, virq; + struct fwnode_handle *fwnode = NULL; + struct msi_domain_info *info; + struct irq_domain *domain; - msi_lock_descs(dev); - ret = msi_add_simple_msi_descs(dev, virq_base, nvec); - if (ret) - goto unlock; + guard(msi_descs_lock)(dev); + domain = msi_get_device_domain(dev, domid); + if (!domain || !irq_domain_is_msi_device(domain)) + return; - for (virq = virq_base; virq < virq_base + nvec; virq++) { - desc = xa_load(&dev->msi.data->__store, virq); - desc->irq = virq; + dev->msi.data->__domains[domid].domain = NULL; + info = domain->host_data; - ops->set_desc(arg, desc); - ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); - if (ret) - goto fail; + info->ops->msi_teardown(domain, info->alloc_data); + + if (irq_domain_is_msi_device(domain)) + fwnode = domain->fwnode; + irq_domain_remove(domain); + irq_domain_free_fwnode(fwnode); + kfree(container_of(info, struct msi_domain_template, info)); +} + +/** + * msi_match_device_irq_domain - Match a device irq domain against a bus token + * @dev: Pointer to the device + * @domid: Domain id + * @bus_token: Bus token to match against the domain bus token + * + * Return: True if device domain exists and bus tokens match. + */ +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token) +{ + struct msi_domain_info *info; + struct irq_domain *domain; - irq_set_msi_desc(virq, desc); + guard(msi_descs_lock)(dev); + domain = msi_get_device_domain(dev, domid); + if (domain && irq_domain_is_msi_device(domain)) { + info = domain->host_data; + return info->bus_token == bus_token; } - msi_unlock_descs(dev); - return 0; + return false; +} -fail: - for (--virq; virq >= virq_base; virq--) - irq_domain_free_irqs_common(domain, virq, 1); - msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1); -unlock: - msi_unlock_descs(dev); - return ret; +static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + + return ops->msi_prepare(domain, dev, nvec, arg); } /* @@ -764,6 +1184,8 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, switch(domain->bus_token) { case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: case DOMAIN_BUS_VMD_MSI: break; default: @@ -773,7 +1195,7 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) return false; - if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) + if (info->flags & MSI_FLAG_NO_MASK) return false; /* @@ -789,6 +1211,8 @@ static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, { switch(domain->bus_token) { case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: case DOMAIN_BUS_VMD_MSI: if (IS_ENABLED(CONFIG_PCI_MSI)) break; @@ -807,7 +1231,6 @@ static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, #define VIRQ_CAN_RESERVE 0x01 #define VIRQ_ACTIVATE 0x02 -#define VIRQ_NOMASK_QUIRK 0x04 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) { @@ -816,8 +1239,21 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag if (!(vflags & VIRQ_CAN_RESERVE)) { irqd_clr_can_reserve(irqd); - if (vflags & VIRQ_NOMASK_QUIRK) - irqd_set_msi_nomask_quirk(irqd); + + /* + * If the interrupt is managed but no CPU is available to + * service it, shut it down until better times. Note that + * we only do this on the !RESERVE path as x86 (the only + * architecture using this flag) deals with this in a + * different way by using a catch-all vector. + */ + if ((vflags & VIRQ_ACTIVATE) && + irqd_affinity_is_managed(irqd) && + !cpumask_intersects(irq_data_get_affinity_mask(irqd), + cpu_online_mask)) { + irqd_set_managed_shutdown(irqd); + return 0; + } } if (!(vflags & VIRQ_ACTIVATE)) @@ -835,18 +1271,37 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag return 0; } -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec) +static int populate_alloc_info(struct irq_domain *domain, struct device *dev, + unsigned int nirqs, msi_alloc_info_t *arg) { struct msi_domain_info *info = domain->host_data; + + /* + * If the caller has provided a template alloc info, use that. Once + * all users of msi_create_irq_domain() have been eliminated, this + * should be the only source of allocation information, and the + * prepare call below should be finally removed. + */ + if (!info->alloc_data) + return msi_domain_prepare_irqs(domain, dev, nirqs, arg); + + *arg = *info->alloc_data; + return 0; +} + +static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) +{ + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; + struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; + unsigned int vflags = 0, allocated = 0; msi_alloc_info_t arg = { }; - unsigned int vflags = 0; struct msi_desc *desc; - int allocated = 0; + unsigned long idx; int i, ret, virq; - ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); + ret = populate_alloc_info(domain, dev, ctrl->nirqs, &arg); if (ret) return ret; @@ -862,17 +1317,20 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, * Interrupt can use a reserved vector and will not occupy * a real device vector until the interrupt is requested. */ - if (msi_check_reservation_mode(domain, info, dev)) { + if (msi_check_reservation_mode(domain, info, dev)) vflags |= VIRQ_CAN_RESERVE; - /* - * MSI affinity setting requires a special quirk (X86) when - * reservation mode is active. - */ - if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) - vflags |= VIRQ_NOMASK_QUIRK; - } - msi_for_each_desc(desc, dev, MSI_DESC_NOTASSOCIATED) { + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) + continue; + + /* This should return -ECONFUSED... */ + if (WARN_ON_ONCE(allocated >= ctrl->nirqs)) + return -EINVAL; + + if (ops->prepare_desc) + ops->prepare_desc(domain, &arg, desc); + ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, @@ -898,76 +1356,259 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, return 0; } -static int msi_domain_add_simple_msi_descs(struct msi_domain_info *info, - struct device *dev, - unsigned int num_descs) +static int msi_domain_alloc_simple_msi_descs(struct device *dev, + struct msi_domain_info *info, + struct msi_ctrl *ctrl) { if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) return 0; - return msi_add_simple_msi_descs(dev, 0, num_descs); + return msi_domain_add_simple_msi_descs(dev, ctrl); +} + +static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + int ret; + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return -ENODEV; + + info = domain->host_data; + + ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl); + if (ret) + return ret; + + ops = info->ops; + if (ops->domain_alloc_irqs) + return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs); + + return __msi_domain_alloc_irqs(dev, domain, ctrl); +} + +static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + int ret = __msi_domain_alloc_locked(dev, ctrl); + + if (ret) + msi_domain_free_locked(dev, ctrl); + return ret; } /** - * msi_domain_alloc_irqs_descs_locked - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from + * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain * @dev: Pointer to device struct of the device for which the interrupts * are allocated - * @nvec: The number of interrupts to allocate + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) * * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own vector + * pair. Use this for MSI irqdomains which implement their own descriptor * allocation/free. * * Return: %0 on success or an error code. */ -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec) +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - int ret; - - lockdep_assert_held(&dev->msi.data->mutex); + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + .nirqs = last + 1 - first, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} - ret = msi_domain_add_simple_msi_descs(info, dev, nvec); - if (ret) - return ret; +/** + * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ - ret = ops->domain_alloc_irqs(domain, dev, nvec); - if (ret) - msi_domain_free_irqs_descs_locked(domain, dev); - return ret; + guard(msi_descs_lock)(dev); + return msi_domain_alloc_irqs_range_locked(dev, domid, first, last); } +EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range); /** - * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from + * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain + * * @dev: Pointer to device struct of the device for which the interrupts * are allocated - * @nvec: The number of interrupts to allocate + * @domid: Id of the interrupt domain to operate on + * @nirqs: The number of interrupts to allocate + * + * This function scans all MSI descriptors of the MSI domain and allocates interrupts + * for all unassigned ones. That function is to be used for MSI domain usage where + * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X]. * * Return: %0 on success or an error code. */ -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec) +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs) { + struct msi_ctrl ctrl = { + .domid = domid, + .first = 0, + .last = msi_domain_get_hwsize(dev, domid) - 1, + .nirqs = nirqs, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, + unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) +{ + struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, }; + struct irq_domain *domain; + struct msi_map map = { }; + struct msi_desc *desc; int ret; - msi_lock_descs(dev); - ret = msi_domain_alloc_irqs_descs_locked(domain, dev, nvec); - msi_unlock_descs(dev); - return ret; + domain = msi_get_device_domain(dev, domid); + if (!domain) { + map.index = -ENODEV; + return map; + } + + desc = msi_alloc_desc(dev, 1, affdesc); + if (!desc) { + map.index = -ENOMEM; + return map; + } + + if (icookie) + desc->data.icookie = *icookie; + + ret = msi_insert_desc(dev, desc, domid, index); + if (ret) { + map.index = ret; + return map; + } + + ctrl.first = ctrl.last = desc->msi_index; + + ret = __msi_domain_alloc_irqs(dev, domain, &ctrl); + if (ret) { + map.index = ret; + msi_domain_free_locked(dev, &ctrl); + } else { + map.index = desc->msi_index; + map.virq = desc->irq; + } + return map; } -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +/** + * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at + * a given index - or at the next free index + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation + * uses the next free index. + * @affdesc: Optional pointer to an interrupt affinity descriptor structure + * @icookie: Optional pointer to a domain specific per instance cookie. If + * non-NULL the content of the cookie is stored in msi_desc::data. + * Must be NULL for MSI-X allocations + * + * This requires a MSI interrupt domain which lets the core code manage the + * MSI descriptors. + * + * Return: struct msi_map + * + * On success msi_map::index contains the allocated index number and + * msi_map::virq the corresponding Linux interrupt number + * + * On failure msi_map::index contains the error code and msi_map::virq + * is %0. + */ +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) { + guard(msi_descs_lock)(dev); + return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie); +} + +/** + * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain + * @domain: The domain to allocate on + * @hwirq: The hardware interrupt number to allocate for + * @type: The interrupt type + * + * This weirdness supports wire to MSI controllers like MBIGEN. + * + * @hwirq is the hardware interrupt number which is handed in from + * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but + * sized in firmware, the hardware interrupt number cannot be used as MSI + * index. For the underlying irq chip the MSI index is irrelevant and + * all it needs is the hardware interrupt number. + * + * To handle this the MSI index is allocated with MSI_ANY_INDEX and the + * hardware interrupt number is stored along with the type information in + * msi_desc::cookie so the underlying interrupt chip and domain code can + * retrieve it. + * + * Return: The Linux interrupt number (> 0) or an error code + */ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + unsigned int domid = MSI_DEFAULT_DOMAIN; + union msi_instance_cookie icookie = { }; + struct device *dev = domain->dev; + struct msi_map map = { }; + + if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI)) + return -EINVAL; + + icookie.value = ((u64)type << 32) | hwirq; + + guard(msi_descs_lock)(dev); + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain)) + map.index = -EINVAL; + else + map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie); + return map.index >= 0 ? map.virq : map.index; +} + +static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) +{ + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; struct irq_data *irqd; struct msi_desc *desc; + unsigned long idx; int i; - /* Only handle MSI entries which have an interrupt associated */ - msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + /* Only handle MSI entries which have an interrupt associated */ + if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED)) + continue; + /* Make sure all interrupts are deactivated */ for (i = 0; i < desc->nvec_used; i++) { irqd = irq_domain_get_irq_data(domain, desc->irq + i); @@ -982,45 +1623,119 @@ void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) } } -static void msi_domain_free_msi_descs(struct msi_domain_info *info, - struct device *dev) +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) { + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return; + + info = domain->host_data; + ops = info->ops; + + if (ops->domain_free_irqs) + ops->domain_free_irqs(domain, dev); + else + __msi_domain_free_irqs(dev, domain, ctrl); + if (info->flags & MSI_FLAG_FREE_MSI_DESCS) - msi_free_msi_descs(dev); + msi_domain_free_descs(dev, ctrl); } /** - * msi_domain_free_irqs_descs_locked - Free interrupts from a MSI interrupt @domain associated to @dev - * @domain: The domain to managing the interrupts + * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain + * associated to @dev with msi_lock held * @dev: Pointer to device struct of the device for which the interrupts - * are free + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + msi_domain_free_locked(dev, &ctrl); +} + +/** + * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain + * associated to @dev + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + guard(msi_descs_lock)(dev); + msi_domain_free_irqs_range_locked(dev, domid, first, last); +} +EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all); + +/** + * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on * * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() * pair. Use this for MSI irqdomains which implement their own vector * allocation. */ -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev) +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - - lockdep_assert_held(&dev->msi.data->mutex); - - ops->domain_free_irqs(domain, dev); - msi_domain_free_msi_descs(info, dev); + msi_domain_free_irqs_range_locked(dev, domid, 0, + msi_domain_get_hwsize(dev, domid) - 1); } /** - * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated to @dev - * @domain: The domain to managing the interrupts + * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain + * associated to a device * @dev: Pointer to device struct of the device for which the interrupts - * are free + * are freed + * @domid: The id of the domain to operate on */ -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) { - msi_lock_descs(dev); - msi_domain_free_irqs_descs_locked(domain, dev); - msi_unlock_descs(dev); + guard(msi_descs_lock)(dev); + msi_domain_free_irqs_all_locked(dev, domid); +} + +/** + * msi_device_domain_free_wired - Free a wired interrupt in @domain + * @domain: The domain to free the interrupt on + * @virq: The Linux interrupt number to free + * + * This is the counterpart of msi_device_domain_alloc_wired() for the + * weird wired to MSI converting domains. + */ +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + struct msi_desc *desc = irq_get_msi_desc(virq); + struct device *dev = domain->dev; + + if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI)) + return; + + guard(msi_descs_lock)(dev); + if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) + return; + msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index, + desc->msi_index); } /** @@ -1034,4 +1749,29 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) return (struct msi_domain_info *)domain->host_data; } -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +/** + * msi_device_has_isolated_msi - True if the device has isolated MSI + * @dev: The device to check + * + * Isolated MSI means that HW modeled by an irq_domain on the path from the + * initiating device to the CPU will validate that the MSI message specifies an + * interrupt number that the device is authorized to trigger. This must block + * devices from triggering interrupts they are not authorized to trigger. + * Currently authorization means the MSI vector is one assigned to the device. + * + * This is interesting for securing VFIO use cases where a rouge MSI (eg created + * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to + * impact outside its security domain, eg userspace triggering interrupts on + * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM + * triggering interrupts on another VM. + */ +bool msi_device_has_isolated_msi(struct device *dev) +{ + struct irq_domain *domain = dev_get_msi_domain(dev); + + for (; domain; domain = domain->parent) + if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) + return true; + return arch_is_isolated_msi(); +} +EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index ca71123a6130..99ff65466d87 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -13,17 +13,13 @@ #include "internals.h" -bool irq_pm_check_wakeup(struct irq_desc *desc) +void irq_pm_handle_wakeup(struct irq_desc *desc) { - if (irqd_is_wakeup_armed(&desc->irq_data)) { - irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); - desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; - desc->depth++; - irq_disable(desc); - pm_system_irq_wakeup(irq_desc_get_irq(desc)); - return true; - } - return false; + irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED); + desc->istate |= IRQS_SUSPENDED | IRQS_PENDING; + desc->depth++; + irq_disable(desc); + pm_system_irq_wakeup(irq_desc_get_irq(desc)); } /* @@ -46,8 +42,7 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && - (desc->no_suspend_depth + - desc->cond_suspend_depth) != desc->nr_actions); + (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* @@ -134,20 +129,17 @@ void suspend_device_irqs(void) int irq; for_each_irq_desc(irq, desc) { - unsigned long flags; bool sync; if (irq_settings_is_nested_thread(desc)) continue; - raw_spin_lock_irqsave(&desc->lock, flags); - sync = suspend_device_irq(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + sync = suspend_device_irq(desc); if (sync) synchronize_irq(irq); } } -EXPORT_SYMBOL_GPL(suspend_device_irqs); static void resume_irq(struct irq_desc *desc) { @@ -187,18 +179,15 @@ static void resume_irqs(bool want_early) int irq; for_each_irq_desc(irq, desc) { - unsigned long flags; - bool is_early = desc->action && - desc->action->flags & IRQF_EARLY_RESUME; + bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); resume_irq(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -208,41 +197,40 @@ static void resume_irqs(bool want_early) */ void rearm_wake_irq(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (!desc) - return; - - if (!(desc->istate & IRQS_SUSPENDED) || - !irqd_is_wakeup_set(&desc->irq_data)) - goto unlock; + if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) + return; - desc->istate &= ~IRQS_SUSPENDED; - irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); - __enable_irq(desc); - -unlock: - irq_put_desc_busunlock(desc, flags); + desc->istate &= ~IRQS_SUSPENDED; + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + __enable_irq(desc); + } } /** * irq_pm_syscore_resume - enable interrupt lines early + * @data: syscore context * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ -static void irq_pm_syscore_resume(void) +static void irq_pm_syscore_resume(void *data) { resume_irqs(true); } -static struct syscore_ops irq_pm_syscore_ops = { +static const struct syscore_ops irq_pm_syscore_ops = { .resume = irq_pm_syscore_resume, }; +static struct syscore irq_pm_syscore = { + .ops = &irq_pm_syscore_ops, +}; + static int __init irq_pm_init_ops(void) { - register_syscore_ops(&irq_pm_syscore_ops); + register_syscore(&irq_pm_syscore); return 0; } @@ -259,4 +247,3 @@ void resume_device_irqs(void) { resume_irqs(false); } -EXPORT_SYMBOL_GPL(resume_device_irqs); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 623b8136e9af..77258eafbf63 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -48,14 +48,14 @@ static int show_irq_affinity(int type, struct seq_file *m) struct irq_desc *desc = irq_to_desc((long)m->private); const struct cpumask *mask; + guard(raw_spinlock_irq)(&desc->lock); + switch (type) { case AFFINITY: case AFFINITY_LIST: mask = desc->irq_common_data.affinity; -#ifdef CONFIG_GENERIC_PENDING_IRQ - if (irqd_is_setaffinity_pending(&desc->irq_data)) - mask = desc->pending_mask; -#endif + if (irq_move_pending(&desc->irq_data)) + mask = irq_desc_get_pending_mask(desc); break; case EFFECTIVE: case EFFECTIVE_LIST: @@ -83,20 +83,18 @@ static int show_irq_affinity(int type, struct seq_file *m) static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - unsigned long flags; cpumask_var_t mask; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - raw_spin_lock_irqsave(&desc->lock, flags); - if (desc->affinity_hint) - cpumask_copy(mask, desc->affinity_hint); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irq, &desc->lock) { + if (desc->affinity_hint) + cpumask_copy(mask, desc->affinity_hint); + } seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); - return 0; } @@ -142,7 +140,7 @@ static ssize_t write_irq_affinity(int type, struct file *file, int err; if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) - return -EIO; + return -EPERM; if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; @@ -297,32 +295,26 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) #define MAX_NAMELEN 128 -static int name_unique(unsigned int irq, struct irqaction *new_action) +static bool name_unique(unsigned int irq, struct irqaction *new_action) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; - unsigned long flags; - int ret = 1; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irq)(&desc->lock); for_each_action_of_desc(desc, action) { if ((action != new_action) && action->name && - !strcmp(new_action->name, action->name)) { - ret = 0; - break; - } + !strcmp(new_action->name, action->name)) + return false; } - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; + return true; } void register_handler_proc(unsigned int irq, struct irqaction *action) { - char name [MAX_NAMELEN]; + char name[MAX_NAMELEN]; struct irq_desc *desc = irq_to_desc(irq); - if (!desc->dir || action->dir || !action->name || - !name_unique(irq, action)) + if (!desc->dir || action->dir || !action->name || !name_unique(irq, action)) return; snprintf(name, MAX_NAMELEN, "%s", action->name); @@ -349,45 +341,45 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) * added, not when the descriptor is created, so multiple * tasks might try to register at the same time. */ - mutex_lock(®ister_lock); + guard(mutex)(®ister_lock); if (desc->dir) - goto out_unlock; - - sprintf(name, "%d", irq); + return; /* create /proc/irq/1234 */ + sprintf(name, "%u", irq); desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - goto out_unlock; + return; #ifdef CONFIG_SMP + umode_t umode = S_IRUGO; + + if (irq_can_set_affinity_usr(desc->irq_data.irq)) + umode |= S_IWUSR; + /* create /proc/irq/<irq>/smp_affinity */ - proc_create_data("smp_affinity", 0644, desc->dir, - &irq_affinity_proc_ops, irqp); + proc_create_data("smp_affinity", umode, desc->dir, &irq_affinity_proc_ops, irqp); /* create /proc/irq/<irq>/affinity_hint */ proc_create_single_data("affinity_hint", 0444, desc->dir, - irq_affinity_hint_proc_show, irqp); + irq_affinity_hint_proc_show, irqp); /* create /proc/irq/<irq>/smp_affinity_list */ - proc_create_data("smp_affinity_list", 0644, desc->dir, + proc_create_data("smp_affinity_list", umode, desc->dir, &irq_affinity_list_proc_ops, irqp); - proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, - irqp); + proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, irqp); # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK proc_create_single_data("effective_affinity", 0444, desc->dir, - irq_effective_aff_proc_show, irqp); + irq_effective_aff_proc_show, irqp); proc_create_single_data("effective_affinity_list", 0444, desc->dir, - irq_effective_aff_list_proc_show, irqp); + irq_effective_aff_list_proc_show, irqp); # endif #endif proc_create_single_data("spurious", 0444, desc->dir, - irq_spurious_proc_show, (void *)(long)irq); + irq_spurious_proc_show, (void *)(long)irq); -out_unlock: - mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) @@ -454,14 +446,14 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec) } #ifndef ACTUAL_NR_IRQS -# define ACTUAL_NR_IRQS nr_irqs +# define ACTUAL_NR_IRQS irq_get_nr_irqs() #endif int show_interrupts(struct seq_file *p, void *v) { + const unsigned int nr_irqs = irq_get_nr_irqs(); static int prec; - unsigned long flags, any_count = 0; int i = *(loff_t *) v, j; struct irqaction *action; struct irq_desc *desc; @@ -483,34 +475,32 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - rcu_read_lock(); + guard(rcu)(); desc = irq_to_desc(i); if (!desc || irq_settings_is_hidden(desc)) - goto outsparse; + return 0; - if (desc->kstat_irqs) { - for_each_online_cpu(j) - any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); - } + if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs) + return 0; - if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) - goto outsparse; + seq_printf(p, "%*d:", prec, i); + for_each_online_cpu(j) { + unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0; - seq_printf(p, "%*d: ", prec, i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", desc->kstat_irqs ? - *per_cpu_ptr(desc->kstat_irqs, j) : 0); + seq_put_decimal_ull_width(p, " ", cnt, 10); + } + seq_putc(p, ' '); - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); else if (desc->irq_data.chip->name) - seq_printf(p, " %8s", desc->irq_data.chip->name); + seq_printf(p, "%8s", desc->irq_data.chip->name); else - seq_printf(p, " %8s", "-"); + seq_printf(p, "%8s", "-"); } else { - seq_printf(p, " %8s", "None"); + seq_printf(p, "%8s", "None"); } if (desc->irq_data.domain) seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); @@ -530,9 +520,6 @@ int show_interrupts(struct seq_file *p, void *v) } seq_putc(p, '\n'); - raw_spin_unlock_irqrestore(&desc->lock, flags); -outsparse: - rcu_read_unlock(); return 0; } #endif diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 0c46e9fe3a89..ca9cc1b806a9 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -21,26 +21,25 @@ #ifdef CONFIG_HARDIRQS_SW_RESEND -/* Bitmap to handle software resend of interrupts: */ -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); +/* hlist_head to handle software resend of interrupts: */ +static HLIST_HEAD(irq_resend_list); +static DEFINE_RAW_SPINLOCK(irq_resend_lock); /* * Run software resends of IRQ's */ static void resend_irqs(struct tasklet_struct *unused) { - struct irq_desc *desc; - int irq; + guard(raw_spinlock_irq)(&irq_resend_lock); + while (!hlist_empty(&irq_resend_list)) { + struct irq_desc *desc; - while (!bitmap_empty(irqs_resend, nr_irqs)) { - irq = find_first_bit(irqs_resend, nr_irqs); - clear_bit(irq, irqs_resend); - desc = irq_to_desc(irq); - if (!desc) - continue; - local_irq_disable(); + desc = hlist_entry(irq_resend_list.first, struct irq_desc, resend_node); + hlist_del_init(&desc->resend_node); + + raw_spin_unlock(&irq_resend_lock); desc->handle_irq(desc); - local_irq_enable(); + raw_spin_lock(&irq_resend_lock); } } @@ -49,13 +48,11 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs); static int irq_sw_resend(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); - /* * Validate whether this interrupt can be safely injected from * non interrupt context */ - if (handle_enforce_irqctx(&desc->irq_data)) + if (irqd_is_handle_enforce_irqctx(&desc->irq_data)) return -EINVAL; /* @@ -70,16 +67,35 @@ static int irq_sw_resend(struct irq_desc *desc) */ if (!desc->parent_irq) return -EINVAL; - irq = desc->parent_irq; + + desc = irq_to_desc(desc->parent_irq); + if (!desc) + return -EINVAL; } - /* Set it pending and activate the softirq: */ - set_bit(irq, irqs_resend); + /* Add to resend_list and activate the softirq: */ + scoped_guard(raw_spinlock, &irq_resend_lock) { + if (hlist_unhashed(&desc->resend_node)) + hlist_add_head(&desc->resend_node, &irq_resend_list); + } tasklet_schedule(&resend_tasklet); return 0; } +void clear_irq_resend(struct irq_desc *desc) +{ + guard(raw_spinlock)(&irq_resend_lock); + hlist_del_init(&desc->resend_node); +} + +void irq_resend_init(struct irq_desc *desc) +{ + INIT_HLIST_NODE(&desc->resend_node); +} #else +void clear_irq_resend(struct irq_desc *desc) {} +void irq_resend_init(struct irq_desc *desc) {} + static int irq_sw_resend(struct irq_desc *desc) { return -EINVAL; @@ -154,30 +170,24 @@ int check_irq_resend(struct irq_desc *desc, bool inject) */ int irq_inject_interrupt(unsigned int irq) { - struct irq_desc *desc; - unsigned long flags; - int err; + int err = -EINVAL; /* Try the state injection hardware interface first */ if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) return 0; /* That failed, try via the resend mechanism */ - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return -EINVAL; - - /* - * Only try to inject when the interrupt is: - * - not NMI type - * - activated - */ - if ((desc->istate & IRQS_NMI) || !irqd_is_activated(&desc->irq_data)) - err = -EINVAL; - else - err = check_irq_resend(desc, true); + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; - irq_put_desc_busunlock(desc, flags); + /* + * Only try to inject when the interrupt is: + * - not NMI type + * - activated + */ + if (!irq_is_nmi(desc) && irqd_is_activated(&desc->irq_data)) + err = check_irq_resend(desc, true); + } return err; } EXPORT_SYMBOL_GPL(irq_inject_interrupt); diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 7b7efb1a114b..00b3bd127692 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h @@ -11,7 +11,6 @@ enum { _IRQ_NOREQUEST = IRQ_NOREQUEST, _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, - _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, @@ -142,11 +141,6 @@ static inline void irq_settings_set_noprobe(struct irq_desc *desc) desc->status_use_accessors |= _IRQ_NOPROBE; } -static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) -{ - return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; -} - static inline bool irq_settings_can_autoenable(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOAUTOEN); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 02b2daf07441..73280ccb74b0 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -19,77 +19,41 @@ static int irqfixup __read_mostly; #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) static void poll_spurious_irqs(struct timer_list *unused); static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); -static int irq_poll_cpu; +int irq_poll_cpu; static atomic_t irq_poll_active; /* - * We wait here for a poller to finish. - * - * If the poll runs on this CPU, then we yell loudly and return - * false. That will leave the interrupt line disabled in the worst - * case, but it should never happen. - * - * We wait until the poller is done and then recheck disabled and - * action (about to be disabled). Only if it's still active, we return - * true and let the handler run. - */ -bool irq_wait_for_poll(struct irq_desc *desc) - __must_hold(&desc->lock) -{ - if (WARN_ONCE(irq_poll_cpu == smp_processor_id(), - "irq poll in progress on cpu %d for irq %d\n", - smp_processor_id(), desc->irq_data.irq)) - return false; - -#ifdef CONFIG_SMP - do { - raw_spin_unlock(&desc->lock); - while (irqd_irq_inprogress(&desc->irq_data)) - cpu_relax(); - raw_spin_lock(&desc->lock); - } while (irqd_irq_inprogress(&desc->irq_data)); - /* Might have been disabled in meantime */ - return !irqd_irq_disabled(&desc->irq_data) && desc->action; -#else - return false; -#endif -} - - -/* * Recovery handler for misrouted interrupts. */ -static int try_one_irq(struct irq_desc *desc, bool force) +static bool try_one_irq(struct irq_desc *desc, bool force) { - irqreturn_t ret = IRQ_NONE; struct irqaction *action; + bool ret = false; - raw_spin_lock(&desc->lock); + guard(raw_spinlock)(&desc->lock); /* * PER_CPU, nested thread interrupts and interrupts explicitly * marked polled are excluded from polling. */ - if (irq_settings_is_per_cpu(desc) || - irq_settings_is_nested_thread(desc) || + if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc) || irq_settings_is_polled(desc)) - goto out; + return false; /* * Do not poll disabled interrupts unless the spurious * disabled poller asks explicitly. */ if (irqd_irq_disabled(&desc->irq_data) && !force) - goto out; + return false; /* * All handlers must agree on IRQF_SHARED, so we test just the * first. */ action = desc->action; - if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER)) - goto out; + if (!action || !(action->flags & IRQF_SHARED) || (action->flags & __IRQF_TIMER)) + return false; /* Already running on another processor */ if (irqd_irq_inprogress(&desc->irq_data)) { @@ -98,21 +62,19 @@ static int try_one_irq(struct irq_desc *desc, bool force) * CPU to go looking for our mystery interrupt too */ desc->istate |= IRQS_PENDING; - goto out; + return false; } /* Mark it poll in progress */ desc->istate |= IRQS_POLL_INPROGRESS; do { if (handle_irq_event(desc) == IRQ_HANDLED) - ret = IRQ_HANDLED; + ret = true; /* Make sure that there is still a valid action */ action = desc->action; } while ((desc->istate & IRQS_PENDING) && action); desc->istate &= ~IRQS_POLL_INPROGRESS; -out: - raw_spin_unlock(&desc->lock); - return ret == IRQ_HANDLED; + return ret; } static int misrouted_irq(int irq) @@ -157,8 +119,7 @@ static void poll_spurious_irqs(struct timer_list *unused) continue; /* Racy but it doesn't matter */ - state = desc->istate; - barrier(); + state = READ_ONCE(desc->istate); if (!(state & IRQS_SPURIOUS_DISABLED)) continue; @@ -168,8 +129,7 @@ static void poll_spurious_irqs(struct timer_list *unused) } out: atomic_dec(&irq_poll_active); - mod_timer(&poll_spurious_irq_timer, - jiffies + POLL_SPURIOUS_IRQ_INTERVAL); + mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } static inline int bad_action_ret(irqreturn_t action_ret) @@ -193,17 +153,13 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) { unsigned int irq = irq_desc_get_irq(desc); struct irqaction *action; - unsigned long flags; - if (bad_action_ret(action_ret)) { - printk(KERN_ERR "irq event %d: bogus return value %x\n", - irq, action_ret); - } else { - printk(KERN_ERR "irq %d: nobody cared (try booting with " - "the \"irqpoll\" option)\n", irq); - } + if (bad_action_ret(action_ret)) + pr_err("irq event %d: bogus return value %x\n", irq, action_ret); + else + pr_err("irq %d: nobody cared (try booting with the \"irqpoll\" option)\n", irq); dump_stack(); - printk(KERN_ERR "handlers:\n"); + pr_err("handlers:\n"); /* * We need to take desc->lock here. note_interrupt() is called @@ -211,15 +167,13 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) * with something else removing an action. It's ok to take * desc->lock here. See synchronize_irq(). */ - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); for_each_action_of_desc(desc, action) { - printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler); + pr_err("[<%p>] %ps", action->handler, action->handler); if (action->thread_fn) - printk(KERN_CONT " threaded [<%p>] %ps", - action->thread_fn, action->thread_fn); - printk(KERN_CONT "\n"); + pr_cont(" threaded [<%p>] %ps", action->thread_fn, action->thread_fn); + pr_cont("\n"); } - raw_spin_unlock_irqrestore(&desc->lock, flags); } static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) @@ -232,18 +186,17 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) } } -static inline int -try_misrouted_irq(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret) +static inline bool try_misrouted_irq(unsigned int irq, struct irq_desc *desc, + irqreturn_t action_ret) { struct irqaction *action; if (!irqfixup) - return 0; + return false; /* We didn't actually handle the IRQ - see if it was misrouted? */ if (action_ret == IRQ_NONE) - return 1; + return true; /* * But for 'irqfixup == 2' we also do it for handled interrupts if @@ -251,19 +204,16 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc, * traditional PC timer interrupt.. Legacy) */ if (irqfixup < 2) - return 0; + return false; if (!irq) - return 1; + return true; /* * Since we don't get the descriptor lock, "action" can - * change under us. We don't really care, but we don't - * want to follow a NULL pointer. So tell the compiler to - * just load it once by using a barrier. + * change under us. */ - action = desc->action; - barrier(); + action = READ_ONCE(desc->action); return action && (action->flags & IRQF_IRQPOLL); } @@ -273,8 +223,7 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) { unsigned int irq; - if (desc->istate & IRQS_POLL_INPROGRESS || - irq_settings_is_polled(desc)) + if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc)) return; if (bad_action_ret(action_ret)) { @@ -420,13 +369,12 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) /* * Now kill the IRQ */ - printk(KERN_EMERG "Disabling IRQ #%d\n", irq); + pr_emerg("Disabling IRQ #%d\n", irq); desc->istate |= IRQS_SPURIOUS_DISABLED; desc->depth++; irq_disable(desc); - mod_timer(&poll_spurious_irq_timer, - jiffies + POLL_SPURIOUS_IRQ_INTERVAL); + mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } desc->irqs_unhandled = 0; } @@ -436,11 +384,9 @@ bool noirqdebug __read_mostly; int noirqdebug_setup(char *str) { noirqdebug = 1; - printk(KERN_INFO "IRQ lockup detection disabled\n"); - + pr_info("IRQ lockup detection disabled\n"); return 1; } - __setup("noirqdebug", noirqdebug_setup); module_param(noirqdebug, bool, 0644); MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); @@ -452,12 +398,10 @@ static int __init irqfixup_setup(char *str) return 1; } irqfixup = 1; - printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); - printk(KERN_WARNING "This may impact system performance.\n"); - + pr_warn("Misrouted IRQ fixup support enabled.\n"); + pr_warn("This may impact system performance.\n"); return 1; } - __setup("irqfixup", irqfixup_setup); module_param(irqfixup, int, 0644); @@ -468,11 +412,8 @@ static int __init irqpoll_setup(char *str) return 1; } irqfixup = 2; - printk(KERN_WARNING "Misrouted IRQ fixup and polling support " - "enabled\n"); - printk(KERN_WARNING "This may significantly impact system " - "performance\n"); + pr_warn("Misrouted IRQ fixup and polling support enabled\n"); + pr_warn("This may significantly impact system performance\n"); return 1; } - __setup("irqpoll", irqpoll_setup); diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index c43e2ac2f8de..4b7315e99bd6 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -509,6 +509,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) /** * irq_timings_next_event - Return when the next event is supposed to arrive + * @now: current time * * During the last busy cycle, the number of interrupts is incremented * and stored in the irq_timings structure. This information is |
