diff options
Diffstat (limited to 'kernel/irq')
-rw-r--r-- | kernel/irq/Kconfig | 35 | ||||
-rw-r--r-- | kernel/irq/Makefile | 3 | ||||
-rw-r--r-- | kernel/irq/affinity.c | 408 | ||||
-rw-r--r-- | kernel/irq/chip.c | 107 | ||||
-rw-r--r-- | kernel/irq/cpuhotplug.c | 53 | ||||
-rw-r--r-- | kernel/irq/debugfs.c | 26 | ||||
-rw-r--r-- | kernel/irq/devres.c | 44 | ||||
-rw-r--r-- | kernel/irq/dummychip.c | 2 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 166 | ||||
-rw-r--r-- | kernel/irq/handle.c | 31 | ||||
-rw-r--r-- | kernel/irq/internals.h | 52 | ||||
-rw-r--r-- | kernel/irq/ipi-mux.c | 206 | ||||
-rw-r--r-- | kernel/irq/ipi.c | 56 | ||||
-rw-r--r-- | kernel/irq/irq_sim.c | 122 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 448 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 1001 | ||||
-rw-r--r-- | kernel/irq/kexec.c | 36 | ||||
-rw-r--r-- | kernel/irq/manage.c | 413 | ||||
-rw-r--r-- | kernel/irq/matrix.c | 50 | ||||
-rw-r--r-- | kernel/irq/migration.c | 6 | ||||
-rw-r--r-- | kernel/irq/msi.c | 1525 | ||||
-rw-r--r-- | kernel/irq/pm.c | 4 | ||||
-rw-r--r-- | kernel/irq/proc.c | 60 | ||||
-rw-r--r-- | kernel/irq/resend.c | 60 | ||||
-rw-r--r-- | kernel/irq/settings.h | 18 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 12 | ||||
-rw-r--r-- | kernel/irq/timings.c | 16 |
27 files changed, 3347 insertions, 1613 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index d79ef2493a28..875f25ed6f71 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -24,6 +24,7 @@ config GENERIC_IRQ_SHOW_LEVEL # Supports effective affinity mask config GENERIC_IRQ_EFFECTIVE_AFF_MASK + depends on SMP bool # Support for delayed migration from interrupt context @@ -70,6 +71,11 @@ config IRQ_DOMAIN_HIERARCHY bool select IRQ_DOMAIN +# Support for obsolete non-mapping irq domains +config IRQ_DOMAIN_NOMAP + bool + select IRQ_DOMAIN + # Support for hierarchical fasteoi+edge and fasteoi+level handlers config IRQ_FASTEOI_HIERARCHY_HANDLERS bool @@ -77,24 +83,22 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + depends on SMP select IRQ_DOMAIN_HIERARCHY -# Generic MSI interrupt support -config GENERIC_MSI_IRQ +# Generic IRQ IPI Mux support +config GENERIC_IRQ_IPI_MUX bool + depends on SMP # Generic MSI hierarchical interrupt domain support -config GENERIC_MSI_IRQ_DOMAIN +config GENERIC_MSI_IRQ bool select IRQ_DOMAIN_HIERARCHY - select GENERIC_MSI_IRQ config IRQ_MSI_IOMMU bool -config HANDLE_DOMAIN_IRQ - bool - config IRQ_TIMINGS bool @@ -104,6 +108,10 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR config GENERIC_IRQ_RESERVATION_MODE bool +# Snapshot for interrupt statistics +config GENERIC_IRQ_STAT_SNAPSHOT + bool + # Support forced irq threading config IRQ_FORCED_THREADING bool @@ -133,9 +141,22 @@ config GENERIC_IRQ_DEBUGFS If you don't know what to do here, say N. +# Clear forwarded VM interrupts during kexec. +# This option ensures the kernel clears active states for interrupts +# forwarded to virtual machines (VMs) during a machine kexec. +config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD + bool + endmenu config GENERIC_IRQ_MULTI_HANDLER bool help Allow to specify the low level IRQ handler at run time. + +# Cavium Octeon is the last system to use this deprecated option +# Do not even think of enabling this on any new platform +config DEPRECATED_IRQ_CPU_ONOFFLINE + bool + depends on CAVIUM_OCTEON_SOC + default CAVIUM_OCTEON_SOC diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index b4f53717d143..c0f44c06d69d 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) CFLAGS_timings.o += -DDEBUG @@ -15,6 +15,7 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o obj-$(CONFIG_PM_SLEEP) += pm.o obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o +obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o obj-$(CONFIG_SMP) += affinity.o obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 4d89ad4fae3b..44a4eba80315 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -7,397 +7,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/cpu.h> -#include <linux/sort.h> - -static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, - unsigned int cpus_per_vec) -{ - const struct cpumask *siblmsk; - int cpu, sibl; - - for ( ; cpus_per_vec > 0; ) { - cpu = cpumask_first(nmsk); - - /* Should not happen, but I'm too lazy to think about it */ - if (cpu >= nr_cpu_ids) - return; - - cpumask_clear_cpu(cpu, nmsk); - cpumask_set_cpu(cpu, irqmsk); - cpus_per_vec--; - - /* If the cpu has siblings, use them first */ - siblmsk = topology_sibling_cpumask(cpu); - for (sibl = -1; cpus_per_vec > 0; ) { - sibl = cpumask_next(sibl, siblmsk); - if (sibl >= nr_cpu_ids) - break; - if (!cpumask_test_and_clear_cpu(sibl, nmsk)) - continue; - cpumask_set_cpu(sibl, irqmsk); - cpus_per_vec--; - } - } -} - -static cpumask_var_t *alloc_node_to_cpumask(void) -{ - cpumask_var_t *masks; - int node; - - masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); - if (!masks) - return NULL; - - for (node = 0; node < nr_node_ids; node++) { - if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) - goto out_unwind; - } - - return masks; - -out_unwind: - while (--node >= 0) - free_cpumask_var(masks[node]); - kfree(masks); - return NULL; -} - -static void free_node_to_cpumask(cpumask_var_t *masks) -{ - int node; - - for (node = 0; node < nr_node_ids; node++) - free_cpumask_var(masks[node]); - kfree(masks); -} - -static void build_node_to_cpumask(cpumask_var_t *masks) -{ - int cpu; - - for_each_possible_cpu(cpu) - cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); -} - -static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, - const struct cpumask *mask, nodemask_t *nodemsk) -{ - int n, nodes = 0; - - /* Calculate the number of nodes in the supplied affinity mask */ - for_each_node(n) { - if (cpumask_intersects(mask, node_to_cpumask[n])) { - node_set(n, *nodemsk); - nodes++; - } - } - return nodes; -} - -struct node_vectors { - unsigned id; - - union { - unsigned nvectors; - unsigned ncpus; - }; -}; - -static int ncpus_cmp_func(const void *l, const void *r) -{ - const struct node_vectors *ln = l; - const struct node_vectors *rn = r; - - return ln->ncpus - rn->ncpus; -} - -/* - * Allocate vector number for each node, so that for each node: - * - * 1) the allocated number is >= 1 - * - * 2) the allocated numbver is <= active CPU number of this node - * - * The actual allocated total vectors may be less than @numvecs when - * active total CPU number is less than @numvecs. - * - * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' - * for each node. - */ -static void alloc_nodes_vectors(unsigned int numvecs, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - const nodemask_t nodemsk, - struct cpumask *nmsk, - struct node_vectors *node_vectors) -{ - unsigned n, remaining_ncpus = 0; - - for (n = 0; n < nr_node_ids; n++) { - node_vectors[n].id = n; - node_vectors[n].ncpus = UINT_MAX; - } - - for_each_node_mask(n, nodemsk) { - unsigned ncpus; - - cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); - ncpus = cpumask_weight(nmsk); - - if (!ncpus) - continue; - remaining_ncpus += ncpus; - node_vectors[n].ncpus = ncpus; - } - - numvecs = min_t(unsigned, remaining_ncpus, numvecs); - - sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]), - ncpus_cmp_func, NULL); - - /* - * Allocate vectors for each node according to the ratio of this - * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is - * bigger than number of active numa nodes. Always start the - * allocation from the node with minimized nr_cpus. - * - * This way guarantees that each active node gets allocated at - * least one vector, and the theory is simple: over-allocation - * is only done when this node is assigned by one vector, so - * other nodes will be allocated >= 1 vector, since 'numvecs' is - * bigger than number of numa nodes. - * - * One perfect invariant is that number of allocated vectors for - * each node is <= CPU count of this node: - * - * 1) suppose there are two nodes: A and B - * ncpu(X) is CPU count of node X - * vecs(X) is the vector count allocated to node X via this - * algorithm - * - * ncpu(A) <= ncpu(B) - * ncpu(A) + ncpu(B) = N - * vecs(A) + vecs(B) = V - * - * vecs(A) = max(1, round_down(V * ncpu(A) / N)) - * vecs(B) = V - vecs(A) - * - * both N and V are integer, and 2 <= V <= N, suppose - * V = N - delta, and 0 <= delta <= N - 2 - * - * 2) obviously vecs(A) <= ncpu(A) because: - * - * if vecs(A) is 1, then vecs(A) <= ncpu(A) given - * ncpu(A) >= 1 - * - * otherwise, - * vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N - * - * 3) prove how vecs(B) <= ncpu(B): - * - * if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be - * over-allocated, so vecs(B) <= ncpu(B), - * - * otherwise: - * - * vecs(A) = - * round_down(V * ncpu(A) / N) = - * round_down((N - delta) * ncpu(A) / N) = - * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= - * round_down((N * ncpu(A) - delta * N) / N) = - * cpu(A) - delta - * - * then: - * - * vecs(A) - V >= ncpu(A) - delta - V - * => - * V - vecs(A) <= V + delta - ncpu(A) - * => - * vecs(B) <= N - ncpu(A) - * => - * vecs(B) <= cpu(B) - * - * For nodes >= 3, it can be thought as one node and another big - * node given that is exactly what this algorithm is implemented, - * and we always re-calculate 'remaining_ncpus' & 'numvecs', and - * finally for each node X: vecs(X) <= ncpu(X). - * - */ - for (n = 0; n < nr_node_ids; n++) { - unsigned nvectors, ncpus; - - if (node_vectors[n].ncpus == UINT_MAX) - continue; - - WARN_ON_ONCE(numvecs == 0); - - ncpus = node_vectors[n].ncpus; - nvectors = max_t(unsigned, 1, - numvecs * ncpus / remaining_ncpus); - WARN_ON_ONCE(nvectors > ncpus); - - node_vectors[n].nvectors = nvectors; - - remaining_ncpus -= ncpus; - numvecs -= nvectors; - } -} - -static int __irq_build_affinity_masks(unsigned int startvec, - unsigned int numvecs, - unsigned int firstvec, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - struct cpumask *nmsk, - struct irq_affinity_desc *masks) -{ - unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0; - unsigned int last_affv = firstvec + numvecs; - unsigned int curvec = startvec; - nodemask_t nodemsk = NODE_MASK_NONE; - struct node_vectors *node_vectors; - - if (!cpumask_weight(cpu_mask)) - return 0; - - nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); - - /* - * If the number of nodes in the mask is greater than or equal the - * number of vectors we just spread the vectors across the nodes. - */ - if (numvecs <= nodes) { - for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, &masks[curvec].mask, - node_to_cpumask[n]); - if (++curvec == last_affv) - curvec = firstvec; - } - return numvecs; - } - - node_vectors = kcalloc(nr_node_ids, - sizeof(struct node_vectors), - GFP_KERNEL); - if (!node_vectors) - return -ENOMEM; - - /* allocate vector number for each node */ - alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask, - nodemsk, nmsk, node_vectors); - - for (i = 0; i < nr_node_ids; i++) { - unsigned int ncpus, v; - struct node_vectors *nv = &node_vectors[i]; - - if (nv->nvectors == UINT_MAX) - continue; - - /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); - ncpus = cpumask_weight(nmsk); - if (!ncpus) - continue; - - WARN_ON_ONCE(nv->nvectors > ncpus); - - /* Account for rounding errors */ - extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors); - - /* Spread allocated vectors on CPUs of the current node */ - for (v = 0; v < nv->nvectors; v++, curvec++) { - cpus_per_vec = ncpus / nv->nvectors; - - /* Account for extra vectors to compensate rounding errors */ - if (extra_vecs) { - cpus_per_vec++; - --extra_vecs; - } - - /* - * wrapping has to be considered given 'startvec' - * may start anywhere - */ - if (curvec >= last_affv) - curvec = firstvec; - irq_spread_init_one(&masks[curvec].mask, nmsk, - cpus_per_vec); - } - done += nv->nvectors; - } - kfree(node_vectors); - return done; -} - -/* - * build affinity in two stages: - * 1) spread present CPU on these vectors - * 2) spread other possible CPUs on these vectors - */ -static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, - unsigned int firstvec, - struct irq_affinity_desc *masks) -{ - unsigned int curvec = startvec, nr_present = 0, nr_others = 0; - cpumask_var_t *node_to_cpumask; - cpumask_var_t nmsk, npresmsk; - int ret = -ENOMEM; - - if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) - return ret; - - if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) - goto fail_nmsk; - - node_to_cpumask = alloc_node_to_cpumask(); - if (!node_to_cpumask) - goto fail_npresmsk; - - /* Stabilize the cpumasks */ - get_online_cpus(); - build_node_to_cpumask(node_to_cpumask); - - /* Spread on present CPUs starting from affd->pre_vectors */ - ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, - node_to_cpumask, cpu_present_mask, - nmsk, masks); - if (ret < 0) - goto fail_build_affinity; - nr_present = ret; - - /* - * Spread on non present CPUs starting from the next vector to be - * handled. If the spreading of present CPUs already exhausted the - * vector space, assign the non present CPUs to the already spread - * out vectors. - */ - if (nr_present >= numvecs) - curvec = firstvec; - else - curvec = firstvec + nr_present; - cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - ret = __irq_build_affinity_masks(curvec, numvecs, firstvec, - node_to_cpumask, npresmsk, nmsk, - masks); - if (ret >= 0) - nr_others = ret; - - fail_build_affinity: - put_online_cpus(); - - if (ret >= 0) - WARN_ON(nr_present + nr_others < numvecs); - - free_node_to_cpumask(node_to_cpumask); - - fail_npresmsk: - free_cpumask_var(npresmsk); - - fail_nmsk: - free_cpumask_var(nmsk); - return ret < 0 ? ret : 0; -} +#include <linux/group_cpus.h> static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) { @@ -460,14 +70,18 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) */ for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { unsigned int this_vecs = affd->set_size[i]; - int ret; + int j; + struct cpumask *result = group_cpus_evenly(this_vecs); - ret = irq_build_affinity_masks(curvec, this_vecs, - curvec, masks); - if (ret) { + if (!result) { kfree(masks); return NULL; } + + for (j = 0; j < this_vecs; j++) + cpumask_copy(&masks[curvec + j].mask, &result[j]); + kfree(result); + curvec += this_vecs; usedvecs += this_vecs; } @@ -505,9 +119,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, if (affd->calc_sets) { set_vecs = maxvec - resv; } else { - get_online_cpus(); + cpus_read_lock(); set_vecs = cpumask_weight(cpu_possible_mask); - put_online_cpus(); + cpus_read_unlock(); } return resv + min(set_vecs, maxvec - resv); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6d89e33fe3aa..c901436ebd9f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -38,7 +38,7 @@ struct irqaction chained_action = { * @irq: irq number * @chip: pointer to irq chip description structure */ -int irq_set_chip(unsigned int irq, struct irq_chip *chip) +int irq_set_chip(unsigned int irq, const struct irq_chip *chip) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); @@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip) if (!desc) return -EINVAL; - if (!chip) - chip = &no_irq_chip; - - desc->irq_data.chip = chip; + desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); irq_put_desc_unlock(desc, flags); /* * For !CONFIG_SPARSE_IRQ make the irq show up in @@ -191,7 +188,8 @@ enum { #ifdef CONFIG_SMP static int -__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) { struct irq_data *d = irq_desc_get_irq_data(desc); @@ -200,7 +198,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) irqd_clr_managed_shutdown(d); - if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(aff, cpu_online_mask)) { /* * Catch code which fiddles with enable_irq() on a managed * and potentially shutdown IRQ. Chained interrupt @@ -227,7 +225,8 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) } #else static __always_inline int -__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) +__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, + bool force) { return IRQ_STARTUP_NORMAL; } @@ -255,7 +254,7 @@ static int __irq_startup(struct irq_desc *desc) int irq_startup(struct irq_desc *desc, bool resend, bool force) { struct irq_data *d = irq_desc_get_irq_data(desc); - struct cpumask *aff = irq_data_get_affinity_mask(d); + const struct cpumask *aff = irq_data_get_affinity_mask(d); int ret = 0; desc->depth = 0; @@ -265,8 +264,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) } else { switch (__irq_startup_managed(desc, aff, force)) { case IRQ_STARTUP_NORMAL: + if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP) + irq_setup_affinity(desc); ret = __irq_startup(desc); - irq_setup_affinity(desc); + if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)) + irq_setup_affinity(desc); break; case IRQ_STARTUP_MANAGED: irq_do_set_affinity(d, aff, false); @@ -304,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask); void irq_shutdown(struct irq_desc *desc) { if (irqd_is_started(&desc->irq_data)) { + clear_irq_resend(desc); desc->depth = 1; if (desc->irq_data.chip->irq_shutdown) { desc->irq_data.chip->irq_shutdown(&desc->irq_data); @@ -470,25 +473,22 @@ void handle_nested_irq(unsigned int irq) action = desc->action; if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { desc->istate |= IRQS_PENDING; - goto out_unlock; + raw_spin_unlock_irq(&desc->lock); + return; } kstat_incr_irqs_this_cpu(desc); - irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); + atomic_inc(&desc->threads_active); raw_spin_unlock_irq(&desc->lock); action_ret = IRQ_NONE; for_each_action_of_desc(desc, action) action_ret |= action->thread_fn(action->irq, action->dev_id); - if (!noirqdebug) + if (!irq_settings_no_debug(desc)) note_interrupt(desc, action_ret); - raw_spin_lock_irq(&desc->lock); - irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); - -out_unlock: - raw_spin_unlock_irq(&desc->lock); + wake_threads_waitq(desc); } EXPORT_SYMBOL_GPL(handle_nested_irq); @@ -572,8 +572,6 @@ EXPORT_SYMBOL_GPL(handle_simple_irq); */ void handle_untracked_irq(struct irq_desc *desc) { - unsigned int flags = 0; - raw_spin_lock(&desc->lock); if (!irq_may_run(desc)) @@ -590,7 +588,7 @@ void handle_untracked_irq(struct irq_desc *desc) irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock(&desc->lock); - __handle_irq_event_percpu(desc, &flags); + __handle_irq_event_percpu(desc); raw_spin_lock(&desc->lock); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); @@ -692,8 +690,16 @@ void handle_fasteoi_irq(struct irq_desc *desc) raw_spin_lock(&desc->lock); - if (!irq_may_run(desc)) + /* + * When an affinity change races with IRQ handling, the next interrupt + * can arrive on the new CPU before the original CPU has completed + * handling the previous one - it may need to be resent. + */ + if (!irq_may_run(desc)) { + if (irqd_needs_resend_when_in_progress(&desc->irq_data)) + desc->istate |= IRQS_PENDING; goto out; + } desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); @@ -715,6 +721,12 @@ void handle_fasteoi_irq(struct irq_desc *desc) cond_unmask_eoi_irq(desc, chip); + /* + * When the race described above happens this will resend the interrupt. + */ + if (unlikely(desc->istate & IRQS_PENDING)) + check_irq_resend(desc, false); + raw_spin_unlock(&desc->lock); return; out: @@ -761,7 +773,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq * - * Interrupt occures on the falling and/or rising edge of a hardware + * Interrupt occurs on the falling and/or rising edge of a hardware * signal. The occurrence is latched into the irq controller hardware * and must be acked in order to be reenabled. After the ack another * interrupt can happen on the same source even before the first one @@ -808,7 +820,7 @@ void handle_edge_irq(struct irq_desc *desc) /* * When another irq arrived while we were handling * one, we could have masked the irq. - * Renable it, if it was not disabled in meantime. + * Reenable it, if it was not disabled in meantime. */ if (unlikely(desc->istate & IRQS_PENDING)) { if (!irqd_irq_disabled(&desc->irq_data) && @@ -1008,8 +1020,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, if (desc->irq_data.chip != &no_irq_chip) mask_ack_irq(desc); irq_state_set_disabled(desc); - if (is_chained) + if (is_chained) { desc->action = NULL; + WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc))); + } desc->depth = 1; } desc->handle_irq = handle; @@ -1035,6 +1049,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); desc->action = &chained_action; + WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc))); irq_activate_and_startup(desc, IRQ_RESEND); } } @@ -1072,7 +1087,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data); void -irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, +irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip, irq_flow_handler_t handle, const char *name) { irq_set_chip(irq, chip); @@ -1099,13 +1114,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) trigger = irqd_get_trigger_type(&desc->irq_data); irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | - IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); + IRQD_TRIGGER_MASK | IRQD_LEVEL); if (irq_settings_has_no_balance_set(desc)) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); if (irq_settings_is_per_cpu(desc)) irqd_set(&desc->irq_data, IRQD_PER_CPU); - if (irq_settings_can_move_pcntxt(desc)) - irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); if (irq_settings_is_level(desc)) irqd_set(&desc->irq_data, IRQD_LEVEL); @@ -1119,6 +1132,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) } EXPORT_SYMBOL_GPL(irq_modify_status); +#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE /** * irq_cpu_online - Invoke all irq_cpu_online functions. * @@ -1178,6 +1192,7 @@ void irq_cpu_offline(void) raw_spin_unlock_irqrestore(&desc->lock, flags); } } +#endif #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -1419,7 +1434,7 @@ EXPORT_SYMBOL_GPL(irq_chip_eoi_parent); * @dest: The affinity mask to set * @force: Flag to enforce setting (disable online checks) * - * Conditinal, as the underlying parent chip might not implement it. + * Conditional, as the underlying parent chip might not implement it. */ int irq_chip_set_affinity_parent(struct irq_data *data, const struct cpumask *dest, bool force) @@ -1513,7 +1528,8 @@ int irq_chip_request_resources_parent(struct irq_data *data) if (data->chip->irq_request_resources) return data->chip->irq_request_resources(data); - return -ENOSYS; + /* no error on missing optional irq_chip::irq_request_resources */ + return 0; } EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); @@ -1531,7 +1547,7 @@ EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** - * irq_chip_compose_msi_msg - Componse msi message for a irq chip + * irq_chip_compose_msi_msg - Compose msi message for a irq chip * @data: Pointer to interrupt specific data * @msg: Pointer to the MSI message * @@ -1555,6 +1571,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return 0; } +static struct device *irq_get_pm_device(struct irq_data *data) +{ + if (data->domain) + return data->domain->pm_dev; + + return NULL; +} + /** * irq_chip_pm_get - Enable power for an IRQ chip * @data: Pointer to interrupt specific data @@ -1564,17 +1588,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ int irq_chip_pm_get(struct irq_data *data) { - int retval; + struct device *dev = irq_get_pm_device(data); + int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { - retval = pm_runtime_get_sync(data->chip->parent_device); - if (retval < 0) { - pm_runtime_put_noidle(data->chip->parent_device); - return retval; - } - } + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_resume_and_get(dev); - return 0; + return retval; } /** @@ -1587,10 +1607,11 @@ int irq_chip_pm_get(struct irq_data *data) */ int irq_chip_pm_put(struct irq_data *data) { + struct device *dev = irq_get_pm_device(data); int retval = 0; - if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) - retval = pm_runtime_put(data->chip->parent_device); + if (IS_ENABLED(CONFIG_PM) && dev) + retval = pm_runtime_put(dev); return (retval < 0) ? retval : 0; } diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 02236b13b359..15a7654eff68 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -37,7 +37,7 @@ static inline bool irq_needs_fixup(struct irq_data *d) * has been removed from the online mask already. */ if (cpumask_any_but(m, cpu) < nr_cpu_ids && - cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) { + !cpumask_intersects(m, cpu_online_mask)) { /* * If this happens then there was a missed IRQ fixup at some * point. Warn about it and enforce fixup. @@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* + * Complete an eventually pending irq move cleanup. If this + * interrupt was moved in hard irq context, then the vectors need + * to be cleaned up. It can't wait until this interrupt actually + * happens and this CPU was involved. + */ + irq_force_complete_move(desc); + + /* * No move required, if: * - Interrupt is per cpu * - Interrupt is not started @@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc) } /* - * Complete an eventually pending irq move cleanup. If this - * interrupt was moved in hard irq context, then the vectors need - * to be cleaned up. It can't wait until this interrupt actually - * happens and this CPU was involved. - */ - irq_force_complete_move(desc); - - /* * If there is a setaffinity pending, then try to reuse the pending * mask, so the last change of the affinity does not get lost. If * there is no move pending or the pending mask does not contain @@ -110,7 +110,7 @@ static bool migrate_one_irq(struct irq_desc *desc) if (maskchip && chip->irq_mask) chip->irq_mask(d); - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(affinity, cpu_online_mask)) { /* * If the interrupt is managed, then shut it down and leave * the affinity untouched. @@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_desc *desc) * CPU. */ err = irq_do_set_affinity(d, affinity, false); + + /* + * If there are online CPUs in the affinity mask, but they have no + * vectors left to make the migration work, try to break the + * affinity by migrating to any online CPU. + */ + if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) { + pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n", + d->irq, cpumask_pr_args(affinity)); + + affinity = cpu_online_mask; + brokeaff = true; + + err = irq_do_set_affinity(d, affinity, false); + } + if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err); @@ -166,7 +182,7 @@ void irq_migrate_all_off_this_cpu(void) raw_spin_unlock(&desc->lock); if (affinity_broken) { - pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n", + pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n", irq, smp_processor_id()); } } @@ -176,10 +192,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu) { const struct cpumask *hk_mask; - if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) + if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) return false; - hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask)) return false; @@ -195,10 +211,15 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - if (irqd_is_managed_and_shutdown(data)) { - irq_startup(desc, IRQ_RESEND, IRQ_START_COND); + /* + * Don't restore suspended interrupts here when a system comes back + * from S3. They are reenabled via resume_device_irqs(). + */ + if (desc->istate & IRQS_SUSPENDED) return; - } + + if (irqd_is_managed_and_shutdown(data)) + irq_startup(desc, IRQ_RESEND, IRQ_START_COND); /* * If the interrupt can only be directed to a single target diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index e4cff358b437..ca142b9a4db3 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -9,14 +9,8 @@ static struct dentry *irq_dir; -struct irq_bit_descr { - unsigned int mask; - char *name; -}; -#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } - -static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, - const struct irq_bit_descr *sd, int size) +void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size) { int i; @@ -30,7 +24,7 @@ static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); - struct cpumask *msk; + const struct cpumask *msk; msk = irq_data_get_affinity_mask(data); seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk)); @@ -58,6 +52,8 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND), + BIT_MASK_DESCR(IRQCHIP_IMMUTABLE), + BIT_MASK_DESCR(IRQCHIP_MOVE_DEFERRED), }; static void @@ -69,8 +65,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind) seq_printf(m, "chip: None\n"); return; } - seq_printf(m, "%*schip: %s\n", ind, "", chip->name); - seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags); + seq_printf(m, "%*schip: ", ind, ""); + if (chip->irq_print_chip) + chip->irq_print_chip(data, m); + else + seq_printf(m, "%s", chip->name); + seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags); irq_debug_show_bits(m, ind, chip->flags, irqchip_flags, ARRAY_SIZE(irqchip_flags)); } @@ -109,14 +109,12 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_NO_BALANCING), BIT_MASK_DESCR(IRQD_SINGLE_TARGET), - BIT_MASK_DESCR(IRQD_MOVE_PCNTXT), BIT_MASK_DESCR(IRQD_AFFINITY_SET), BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), - BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), @@ -128,6 +126,8 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX), BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND), + + BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS), }; static const struct irq_bit_descr irqdesc_states[] = { diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index f6e5515ee077..eb16a58e0322 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/device.h> #include <linux/gfp.h> #include <linux/irq.h> @@ -140,9 +141,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) { struct irq_devres match_data = { irq, dev_id }; - WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match, + WARN_ON(devres_release(dev, devm_irq_release, devm_irq_match, &match_data)); - free_irq(irq, dev_id); } EXPORT_SYMBOL(devm_free_irq); @@ -282,3 +282,43 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, } EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip); #endif /* CONFIG_GENERIC_IRQ_CHIP */ + +#ifdef CONFIG_IRQ_DOMAIN +static void devm_irq_domain_remove(struct device *dev, void *res) +{ + struct irq_domain **domain = res; + + irq_domain_remove(*domain); +} + +/** + * devm_irq_domain_instantiate() - Instantiate a new irq domain data for a + * managed device. + * @dev: Device to instantiate the domain for + * @info: Domain information pointer pointing to the information for this + * domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *devm_irq_domain_instantiate(struct device *dev, + const struct irq_domain_info *info) +{ + struct irq_domain *domain; + struct irq_domain **dr; + + dr = devres_alloc(devm_irq_domain_remove, sizeof(*dr), GFP_KERNEL); + if (!dr) + return ERR_PTR(-ENOMEM); + + domain = irq_domain_instantiate(info); + if (!IS_ERR(domain)) { + *dr = domain; + devres_add(dev, dr); + } else { + devres_free(dr); + } + + return domain; +} +EXPORT_SYMBOL_GPL(devm_irq_domain_instantiate); +#endif /* CONFIG_IRQ_DOMAIN */ diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 0b0cdf206dc4..7fe6cffe7d0d 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c @@ -13,7 +13,7 @@ /* * What should we do if we get a hw irq event on an illegal vector? - * Each architecture has to answer this themself. + * Each architecture has to answer this themselves. */ static void ack_bad(struct irq_data *data) { diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index a23ac2bbf433..c4a8bca5f2b0 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -25,6 +25,7 @@ static DEFINE_RAW_SPINLOCK(gc_lock); void irq_gc_noop(struct irq_data *d) { } +EXPORT_SYMBOL_GPL(irq_gc_noop); /** * irq_gc_mask_disable_reg - Mask chip via disable register @@ -44,6 +45,7 @@ void irq_gc_mask_disable_reg(struct irq_data *d) *ct->mask_cache &= ~mask; irq_gc_unlock(gc); } +EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg); /** * irq_gc_mask_set_bit - Mask chip via setting bit in mask register @@ -103,6 +105,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d) *ct->mask_cache |= mask; irq_gc_unlock(gc); } +EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg); /** * irq_gc_ack_set_bit - Ack pending interrupt via setting bit @@ -159,6 +162,7 @@ void irq_gc_mask_disable_and_ack_set(struct irq_data *d) irq_reg_writel(gc, mask, ct->regs.ack); irq_gc_unlock(gc); } +EXPORT_SYMBOL_GPL(irq_gc_mask_disable_and_ack_set); /** * irq_gc_eoi - EOI interrupt @@ -200,6 +204,7 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on) irq_gc_unlock(gc); return 0; } +EXPORT_SYMBOL_GPL(irq_gc_set_wake); static u32 irq_readl_be(void __iomem *addr) { @@ -215,11 +220,15 @@ void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, int num_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler) { + struct irq_chip_type *ct = gc->chip_types; + int i; + raw_spin_lock_init(&gc->lock); gc->num_ct = num_ct; gc->irq_base = irq_base; gc->reg_base = reg_base; - gc->chip_types->chip.name = name; + for (i = 0; i < num_ct; i++) + ct[i].chip.name = name; gc->chip_types->handler = handler; } @@ -239,9 +248,8 @@ irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base, void __iomem *reg_base, irq_flow_handler_t handler) { struct irq_chip_generic *gc; - unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); - gc = kzalloc(sz, GFP_KERNEL); + gc = kzalloc(struct_size(gc, chip_types, num_ct), GFP_KERNEL); if (gc) { irq_init_generic_chip(gc, name, num_ct, irq_base, reg_base, handler); @@ -269,70 +277,139 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) } /** - * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain - * @d: irq domain for which to allocate chips - * @irqs_per_chip: Number of interrupts each chip handles (max 32) - * @num_ct: Number of irq_chip_type instances associated with this - * @name: Name of the irq chip - * @handler: Default flow handler associated with these chips - * @clr: IRQ_* bits to clear in the mapping function - * @set: IRQ_* bits to set in the mapping function - * @gcflags: Generic chip specific setup flags + * irq_domain_alloc_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @info: Generic chip information + * + * Return: 0 on success, negative error code on failure */ -int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags gcflags) +int irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info) { struct irq_domain_chip_generic *dgc; struct irq_chip_generic *gc; - int numchips, sz, i; unsigned long flags; + int numchips, i; + size_t dgc_sz; + size_t gc_sz; + size_t sz; void *tmp; + int ret; if (d->gc) return -EBUSY; - numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); + numchips = DIV_ROUND_UP(d->revmap_size, info->irqs_per_chip); if (!numchips) return -EINVAL; /* Allocate a pointer, generic chip and chiptypes for each chip */ - sz = sizeof(*dgc) + numchips * sizeof(gc); - sz += numchips * (sizeof(*gc) + num_ct * sizeof(struct irq_chip_type)); + gc_sz = struct_size(gc, chip_types, info->num_ct); + dgc_sz = struct_size(dgc, gc, numchips); + sz = dgc_sz + numchips * gc_sz; tmp = dgc = kzalloc(sz, GFP_KERNEL); if (!dgc) return -ENOMEM; - dgc->irqs_per_chip = irqs_per_chip; + dgc->irqs_per_chip = info->irqs_per_chip; dgc->num_chips = numchips; - dgc->irq_flags_to_set = set; - dgc->irq_flags_to_clear = clr; - dgc->gc_flags = gcflags; + dgc->irq_flags_to_set = info->irq_flags_to_set; + dgc->irq_flags_to_clear = info->irq_flags_to_clear; + dgc->gc_flags = info->gc_flags; + dgc->exit = info->exit; d->gc = dgc; /* Calc pointer to the first generic chip */ - tmp += sizeof(*dgc) + numchips * sizeof(gc); + tmp += dgc_sz; for (i = 0; i < numchips; i++) { /* Store the pointer to the generic chip */ dgc->gc[i] = gc = tmp; - irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, - NULL, handler); + irq_init_generic_chip(gc, info->name, info->num_ct, + i * dgc->irqs_per_chip, NULL, + info->handler); gc->domain = d; - if (gcflags & IRQ_GC_BE_IO) { + if (dgc->gc_flags & IRQ_GC_BE_IO) { gc->reg_readl = &irq_readl_be; gc->reg_writel = &irq_writel_be; } + if (info->init) { + ret = info->init(gc); + if (ret) + goto err; + } + raw_spin_lock_irqsave(&gc_lock, flags); list_add_tail(&gc->list, &gc_list); raw_spin_unlock_irqrestore(&gc_lock, flags); /* Calc pointer to the next generic chip */ - tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); + tmp += gc_sz; } return 0; + +err: + while (i--) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + } + d->gc = NULL; + kfree(dgc); + return ret; +} +EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips); + +/** + * irq_domain_remove_generic_chips - Remove generic chips from an irq domain + * @d: irq domain for which generic chips are to be removed + */ +void irq_domain_remove_generic_chips(struct irq_domain *d) +{ + struct irq_domain_chip_generic *dgc = d->gc; + unsigned int i; + + if (!dgc) + return; + + for (i = 0; i < dgc->num_chips; i++) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + } + d->gc = NULL; + kfree(dgc); +} +EXPORT_SYMBOL_GPL(irq_domain_remove_generic_chips); + +/** + * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with this + * @name: Name of the irq chip + * @handler: Default flow handler associated with these chips + * @clr: IRQ_* bits to clear in the mapping function + * @set: IRQ_* bits to set in the mapping function + * @gcflags: Generic chip specific setup flags + */ +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags gcflags) +{ + struct irq_domain_chip_generic_info info = { + .irqs_per_chip = irqs_per_chip, + .num_ct = num_ct, + .name = name, + .handler = handler, + .irq_flags_to_clear = clr, + .irq_flags_to_set = set, + .gc_flags = gcflags, + }; + + return irq_domain_alloc_generic_chips(d, &info); } EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); @@ -424,7 +501,7 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, return 0; } -static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) +void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) { struct irq_data *data = irq_domain_get_irq_data(d, virq); struct irq_domain_chip_generic *dgc = d->gc; @@ -444,7 +521,7 @@ static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) } -struct irq_domain_ops irq_generic_chip_ops = { +const struct irq_domain_ops irq_generic_chip_ops = { .map = irq_map_generic_chip, .unmap = irq_unmap_generic_chip, .xlate = irq_domain_xlate_onetwocell, @@ -537,21 +614,34 @@ EXPORT_SYMBOL_GPL(irq_setup_alt_chip); void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set) { - unsigned int i = gc->irq_base; + unsigned int i, virq; raw_spin_lock(&gc_lock); list_del(&gc->list); raw_spin_unlock(&gc_lock); - for (; msk; msk >>= 1, i++) { + for (i = 0; msk; msk >>= 1, i++) { if (!(msk & 0x01)) continue; + /* + * Interrupt domain based chips store the base hardware + * interrupt number in gc::irq_base. Otherwise gc::irq_base + * contains the base Linux interrupt number. + */ + if (gc->domain) { + virq = irq_find_mapping(gc->domain, gc->irq_base + i); + if (!virq) + continue; + } else { + virq = gc->irq_base + i; + } + /* Remove handler first. That will mask the irq line */ - irq_set_handler(i, NULL); - irq_set_chip(i, &no_irq_chip); - irq_set_chip_data(i, NULL); - irq_modify_status(i, clr, set); + irq_set_handler(virq, NULL); + irq_set_chip(virq, &no_irq_chip); + irq_set_chip_data(virq, NULL); + irq_modify_status(virq, clr, set); } } EXPORT_SYMBOL_GPL(irq_remove_generic_chip); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 762a928e18f9..9489f93b3db3 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -14,6 +14,8 @@ #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <asm/irq_regs.h> + #include <trace/events/irq.h> #include "internals.h" @@ -134,7 +136,7 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) wake_up_process(action->thread); } -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags) +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval = IRQ_NONE; unsigned int irq = desc->irq_data.irq; @@ -172,10 +174,6 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags } __irq_wake_thread(desc, action); - - fallthrough; /* to add to randomness */ - case IRQ_HANDLED: - *flags |= action->flags; break; default: @@ -191,13 +189,12 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) { irqreturn_t retval; - unsigned int flags = 0; - retval = __handle_irq_event_percpu(desc, &flags); + retval = __handle_irq_event_percpu(desc); - add_interrupt_randomness(desc->irq_data.irq, flags); + add_interrupt_randomness(desc->irq_data.irq); - if (!noirqdebug) + if (!irq_settings_no_debug(desc)) note_interrupt(desc, retval); return retval; } @@ -226,4 +223,20 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) handle_arch_irq = handle_irq; return 0; } + +/** + * generic_handle_arch_irq - root irq handler for architectures which do no + * entry accounting themselves + * @regs: Register file coming from the low-level handling code + */ +asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + + irq_enter(); + old_regs = set_irq_regs(regs); + handle_arch_irq(regs); + set_irq_regs(old_regs); + irq_exit(); +} #endif diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 54363527feea..a979523640d0 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,9 +12,9 @@ #include <linux/sched/clock.h> #ifdef CONFIG_SPARSE_IRQ -# define IRQ_BITMAP_BITS (NR_IRQS + 8196) +# define MAX_SPARSE_IRQS INT_MAX #else -# define IRQ_BITMAP_BITS NR_IRQS +# define MAX_SPARSE_IRQS NR_IRQS #endif #define istate core_internal_state__do_not_mess_with_it @@ -29,12 +29,14 @@ extern struct irqaction chained_action; * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed * IRQTF_AFFINITY - irq thread is requested to adjust affinity * IRQTF_FORCED_THREAD - irq action is force threaded + * IRQTF_READY - signals that irq thread is ready */ enum { IRQTF_RUNTHREAD, IRQTF_WARNED, IRQTF_AFFINITY, IRQTF_FORCED_THREAD, + IRQTF_READY, }; /* @@ -45,11 +47,15 @@ enum { * detection * IRQS_POLL_INPROGRESS - polling in progress * IRQS_ONESHOT - irq is not unmasked in primary handler - * IRQS_REPLAY - irq is replayed + * IRQS_REPLAY - irq has been resent and will not be resent + * again until the handler has run and cleared + * this flag. * IRQS_WAITING - irq is waiting - * IRQS_PENDING - irq is pending and replayed later + * IRQS_PENDING - irq needs to be resent and should be resent + * at the next available opportunity. * IRQS_SUSPENDED - irq is suspended * IRQS_NMI - irq line is used to deliver NMIs + * IRQS_SYSFS - descriptor has been added to sysfs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -62,6 +68,7 @@ enum { IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, IRQS_NMI = 0x00002000, + IRQS_SYSFS = 0x00004000, }; #include "debug.h" @@ -91,6 +98,8 @@ extern void mask_irq(struct irq_desc *desc); extern void unmask_irq(struct irq_desc *desc); extern void unmask_threaded_irq(struct irq_desc *desc); +extern unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask); + #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } #else @@ -101,17 +110,19 @@ extern int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state); -extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); - -irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); +irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc); irqreturn_t handle_irq_event(struct irq_desc *desc); /* Resending of interrupts :*/ int check_irq_resend(struct irq_desc *desc, bool inject); +void clear_irq_resend(struct irq_desc *desc); +void irq_resend_init(struct irq_desc *desc); bool irq_wait_for_poll(struct irq_desc *desc); void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); +void wake_threads_waitq(struct irq_desc *desc); + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc); @@ -249,7 +260,7 @@ static inline void irq_state_set_masked(struct irq_desc *desc) static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc) { - __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(desc->kstat_irqs->cnt); __this_cpu_inc(kstat.irqs_sum); } @@ -269,6 +280,11 @@ static inline int irq_desc_is_chained(struct irq_desc *desc) return (desc->action && desc->action == &chained_action); } +static inline bool irq_is_nmi(struct irq_desc *desc) +{ + return desc->istate & IRQS_NMI; +} + #ifdef CONFIG_PM_SLEEP bool irq_pm_check_wakeup(struct irq_desc *desc); void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action); @@ -405,7 +421,7 @@ irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, #ifdef CONFIG_GENERIC_PENDING_IRQ static inline bool irq_can_move_pcntxt(struct irq_data *data) { - return irqd_can_move_in_process_context(data); + return !(data->chip->flags & IRQCHIP_MOVE_DEFERRED); } static inline bool irq_move_pending(struct irq_data *data) { @@ -425,10 +441,6 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) { return desc->pending_mask; } -static inline bool handle_enforce_irqctx(struct irq_data *data) -{ - return irqd_is_handle_enforce_irqctx(data); -} bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) @@ -455,10 +467,6 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; } -static inline bool handle_enforce_irqctx(struct irq_data *data) -{ - return false; -} #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) @@ -485,6 +493,16 @@ static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd) #ifdef CONFIG_GENERIC_IRQ_DEBUGFS #include <linux/debugfs.h> +struct irq_bit_descr { + unsigned int mask; + char *name; +}; + +#define BIT_MASK_DESCR(m) { .mask = m, .name = #m } + +void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state, + const struct irq_bit_descr *sd, int size); + void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc); static inline void irq_remove_debugfs_entry(struct irq_desc *desc) { diff --git a/kernel/irq/ipi-mux.c b/kernel/irq/ipi-mux.c new file mode 100644 index 000000000000..fa4fc18c6131 --- /dev/null +++ b/kernel/irq/ipi-mux.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Multiplex several virtual IPIs over a single HW IPI. + * + * Copyright The Asahi Linux Contributors + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#define pr_fmt(fmt) "ipi-mux: " fmt +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqchip/chained_irq.h> +#include <linux/irqdomain.h> +#include <linux/jump_label.h> +#include <linux/percpu.h> +#include <linux/smp.h> + +struct ipi_mux_cpu { + atomic_t enable; + atomic_t bits; +}; + +static struct ipi_mux_cpu __percpu *ipi_mux_pcpu; +static struct irq_domain *ipi_mux_domain; +static void (*ipi_mux_send)(unsigned int cpu); + +static void ipi_mux_mask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + + atomic_andnot(BIT(irqd_to_hwirq(d)), &icpu->enable); +} + +static void ipi_mux_unmask(struct irq_data *d) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + + atomic_or(ibit, &icpu->enable); + + /* + * The atomic_or() above must complete before the atomic_read() + * below to avoid racing ipi_mux_send_mask(). + */ + smp_mb__after_atomic(); + + /* If a pending IPI was unmasked, raise a parent IPI immediately. */ + if (atomic_read(&icpu->bits) & ibit) + ipi_mux_send(smp_processor_id()); +} + +static void ipi_mux_send_mask(struct irq_data *d, const struct cpumask *mask) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + u32 ibit = BIT(irqd_to_hwirq(d)); + unsigned long pending; + int cpu; + + for_each_cpu(cpu, mask) { + icpu = per_cpu_ptr(ipi_mux_pcpu, cpu); + + /* + * This sequence is the mirror of the one in ipi_mux_unmask(); + * see the comment there. Additionally, release semantics + * ensure that the vIPI flag set is ordered after any shared + * memory accesses that precede it. This therefore also pairs + * with the atomic_fetch_andnot in ipi_mux_process(). + */ + pending = atomic_fetch_or_release(ibit, &icpu->bits); + + /* + * The atomic_fetch_or_release() above must complete + * before the atomic_read() below to avoid racing with + * ipi_mux_unmask(). + */ + smp_mb__after_atomic(); + + /* + * The flag writes must complete before the physical IPI is + * issued to another CPU. This is implied by the control + * dependency on the result of atomic_read() below, which is + * itself already ordered after the vIPI flag write. + */ + if (!(pending & ibit) && (atomic_read(&icpu->enable) & ibit)) + ipi_mux_send(cpu); + } +} + +static const struct irq_chip ipi_mux_chip = { + .name = "IPI Mux", + .irq_mask = ipi_mux_mask, + .irq_unmask = ipi_mux_unmask, + .ipi_send_mask = ipi_mux_send_mask, +}; + +static int ipi_mux_domain_alloc(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_set_percpu_devid(virq + i); + irq_domain_set_info(d, virq + i, i, &ipi_mux_chip, NULL, + handle_percpu_devid_irq, NULL, NULL); + } + + return 0; +} + +static const struct irq_domain_ops ipi_mux_domain_ops = { + .alloc = ipi_mux_domain_alloc, + .free = irq_domain_free_irqs_top, +}; + +/** + * ipi_mux_process - Process multiplexed virtual IPIs + */ +void ipi_mux_process(void) +{ + struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu); + irq_hw_number_t hwirq; + unsigned long ipis; + unsigned int en; + + /* + * Reading enable mask does not need to be ordered as long as + * this function is called from interrupt handler because only + * the CPU itself can change it's own enable mask. + */ + en = atomic_read(&icpu->enable); + + /* + * Clear the IPIs we are about to handle. This pairs with the + * atomic_fetch_or_release() in ipi_mux_send_mask(). + */ + ipis = atomic_fetch_andnot(en, &icpu->bits) & en; + + for_each_set_bit(hwirq, &ipis, BITS_PER_TYPE(int)) + generic_handle_domain_irq(ipi_mux_domain, hwirq); +} + +/** + * ipi_mux_create - Create virtual IPIs multiplexed on top of a single + * parent IPI. + * @nr_ipi: number of virtual IPIs to create. This should + * be <= BITS_PER_TYPE(int) + * @mux_send: callback to trigger parent IPI for a particular CPU + * + * Returns first virq of the newly created virtual IPIs upon success + * or <=0 upon failure + */ +int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu)) +{ + struct fwnode_handle *fwnode; + struct irq_domain *domain; + int rc; + + if (ipi_mux_domain) + return -EEXIST; + + if (BITS_PER_TYPE(int) < nr_ipi || !mux_send) + return -EINVAL; + + ipi_mux_pcpu = alloc_percpu(typeof(*ipi_mux_pcpu)); + if (!ipi_mux_pcpu) + return -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("IPI-Mux"); + if (!fwnode) { + pr_err("unable to create IPI Mux fwnode\n"); + rc = -ENOMEM; + goto fail_free_cpu; + } + + domain = irq_domain_create_linear(fwnode, nr_ipi, + &ipi_mux_domain_ops, NULL); + if (!domain) { + pr_err("unable to add IPI Mux domain\n"); + rc = -ENOMEM; + goto fail_free_fwnode; + } + + domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE; + irq_domain_update_bus_token(domain, DOMAIN_BUS_IPI); + + rc = irq_domain_alloc_irqs(domain, nr_ipi, NUMA_NO_NODE, NULL); + if (rc <= 0) { + pr_err("unable to alloc IRQs from IPI Mux domain\n"); + goto fail_free_domain; + } + + ipi_mux_domain = domain; + ipi_mux_send = mux_send; + + return rc; + +fail_free_domain: + irq_domain_remove(domain); +fail_free_fwnode: + irq_domain_free_fwnode(fwnode); +fail_free_cpu: + free_percpu(ipi_mux_pcpu); + return rc; +} diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 43e3d1be622c..961d4af76af3 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -14,11 +14,11 @@ /** * irq_reserve_ipi() - Setup an IPI to destination cpumask * @domain: IPI domain - * @dest: cpumask of cpus which can receive the IPI + * @dest: cpumask of CPUs which can receive the IPI * * Allocate a virq that can be used to send IPI to any CPU in dest mask. * - * On success it'll return linux irq number and error code on failure + * Return: Linux IRQ number on success or error code on failure */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest) @@ -104,22 +104,22 @@ free_descs: /** * irq_destroy_ipi() - unreserve an IPI that was previously allocated - * @irq: linux irq number to be destroyed - * @dest: cpumask of cpus which should have the IPI removed + * @irq: Linux IRQ number to be destroyed + * @dest: cpumask of CPUs which should have the IPI removed * - * The IPIs allocated with irq_reserve_ipi() are retuerned to the system + * The IPIs allocated with irq_reserve_ipi() are returned to the system * destroying all virqs associated with them. * - * Return 0 on success or error code on failure. + * Return: %0 on success or error code on failure. */ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) { struct irq_data *data = irq_get_irq_data(irq); - struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; + const struct cpumask *ipimask; struct irq_domain *domain; unsigned int nr_irqs; - if (!irq || !data || !ipimask) + if (!irq || !data) return -EINVAL; domain = data->domain; @@ -131,7 +131,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) return -EINVAL; } - if (WARN_ON(!cpumask_subset(dest, ipimask))) + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || WARN_ON(!cpumask_subset(dest, ipimask))) /* * Must be destroying a subset of CPUs to which this IPI * was set up to target @@ -150,24 +151,25 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest) } /** - * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu - * @irq: linux irq number - * @cpu: the target cpu + * ipi_get_hwirq - Get the hwirq associated with an IPI to a CPU + * @irq: Linux IRQ number + * @cpu: the target CPU * * When dealing with coprocessors IPI, we need to inform the coprocessor of * the hwirq it needs to use to receive and send IPIs. * - * Returns hwirq value on success and INVALID_HWIRQ on failure. + * Return: hwirq value on success or INVALID_HWIRQ on failure. */ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) { struct irq_data *data = irq_get_irq_data(irq); - struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; + const struct cpumask *ipimask; - if (!data || !ipimask || cpu >= nr_cpu_ids) + if (!data || cpu >= nr_cpu_ids) return INVALID_HWIRQ; - if (!cpumask_test_cpu(cpu, ipimask)) + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask || !cpumask_test_cpu(cpu, ipimask)) return INVALID_HWIRQ; /* @@ -186,9 +188,9 @@ EXPORT_SYMBOL_GPL(ipi_get_hwirq); static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, const struct cpumask *dest, unsigned int cpu) { - struct cpumask *ipimask = irq_data_get_affinity_mask(data); + const struct cpumask *ipimask; - if (!chip || !ipimask) + if (!chip || !data) return -EINVAL; if (!chip->ipi_send_single && !chip->ipi_send_mask) @@ -197,6 +199,10 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, if (cpu >= nr_cpu_ids) return -EINVAL; + ipimask = irq_data_get_affinity_mask(data); + if (!ipimask) + return -EINVAL; + if (dest) { if (!cpumask_subset(dest, ipimask)) return -EINVAL; @@ -216,7 +222,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, * This function is for architecture or core code to speed up IPI sending. Not * usable from driver code. * - * Returns zero on success and negative error number on failure. + * Return: %0 on success or negative error number on failure. */ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu) { @@ -250,7 +256,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu) } /** - * ipi_send_mask - send an IPI to target Linux SMP CPU(s) + * __ipi_send_mask - send an IPI to target Linux SMP CPU(s) * @desc: pointer to irq_desc of the IRQ * @dest: dest CPU(s), must be a subset of the mask passed to * irq_reserve_ipi() @@ -258,7 +264,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu) * This function is for architecture or core code to speed up IPI sending. Not * usable from driver code. * - * Returns zero on success and negative error number on failure. + * Return: %0 on success or negative error number on failure. */ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest) { @@ -298,11 +304,11 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest) /** * ipi_send_single - Send an IPI to a single CPU - * @virq: linux irq number from irq_reserve_ipi() + * @virq: Linux IRQ number from irq_reserve_ipi() * @cpu: destination CPU, must in the destination mask passed to * irq_reserve_ipi() * - * Returns zero on success and negative error number on failure. + * Return: %0 on success or negative error number on failure. */ int ipi_send_single(unsigned int virq, unsigned int cpu) { @@ -319,11 +325,11 @@ EXPORT_SYMBOL_GPL(ipi_send_single); /** * ipi_send_mask - Send an IPI to target CPU(s) - * @virq: linux irq number from irq_reserve_ipi() + * @virq: Linux IRQ number from irq_reserve_ipi() * @dest: dest CPU(s), must be a subset of the mask passed to * irq_reserve_ipi() * - * Returns zero on success and negative error number on failure. + * Return: %0 on success or negative error number on failure. */ int ipi_send_mask(unsigned int virq, const struct cpumask *dest) { diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 48006608baf0..1a3d483548e2 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -4,30 +4,27 @@ * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com> */ +#include <linux/cleanup.h> +#include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irq_sim.h> #include <linux/irq_work.h> -#include <linux/interrupt.h> #include <linux/slab.h> struct irq_sim_work_ctx { struct irq_work work; - int irq_base; unsigned int irq_count; unsigned long *pending; struct irq_domain *domain; + struct irq_sim_ops ops; + void *user_data; }; struct irq_sim_irq_ctx { - int irqnum; bool enabled; struct irq_sim_work_ctx *work_ctx; }; -struct irq_sim_devres { - struct irq_domain *domain; -}; - static void irq_sim_irqmask(struct irq_data *data) { struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); @@ -92,6 +89,31 @@ static int irq_sim_set_irqchip_state(struct irq_data *data, return 0; } +static int irq_sim_request_resources(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx; + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + if (work_ctx->ops.irq_sim_irq_requested) + return work_ctx->ops.irq_sim_irq_requested(work_ctx->domain, + hwirq, + work_ctx->user_data); + + return 0; +} + +static void irq_sim_release_resources(struct irq_data *data) +{ + struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data); + struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx; + irq_hw_number_t hwirq = irqd_to_hwirq(data); + + if (work_ctx->ops.irq_sim_irq_released) + work_ctx->ops.irq_sim_irq_released(work_ctx->domain, hwirq, + work_ctx->user_data); +} + static struct irq_chip irq_sim_irqchip = { .name = "irq_sim", .irq_mask = irq_sim_irqmask, @@ -99,6 +121,8 @@ static struct irq_chip irq_sim_irqchip = { .irq_set_type = irq_sim_set_type, .irq_get_irqchip_state = irq_sim_get_irqchip_state, .irq_set_irqchip_state = irq_sim_set_irqchip_state, + .irq_request_resources = irq_sim_request_resources, + .irq_release_resources = irq_sim_release_resources, }; static void irq_sim_handle_irq(struct irq_work *work) @@ -159,7 +183,7 @@ static const struct irq_domain_ops irq_sim_domain_ops = { * irq_domain_create_sim - Create a new interrupt simulator irq_domain and * allocate a range of dummy interrupts. * - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate. * * On success: return a new irq_domain object. @@ -168,35 +192,42 @@ static const struct irq_domain_ops irq_sim_domain_ops = { struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, unsigned int num_irqs) { - struct irq_sim_work_ctx *work_ctx; + return irq_domain_create_sim_full(fwnode, num_irqs, NULL, NULL); +} +EXPORT_SYMBOL_GPL(irq_domain_create_sim); + +struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data) +{ + struct irq_sim_work_ctx *work_ctx __free(kfree) = + kmalloc(sizeof(*work_ctx), GFP_KERNEL); - work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL); if (!work_ctx) - goto err_out; + return ERR_PTR(-ENOMEM); - work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL); - if (!work_ctx->pending) - goto err_free_work_ctx; + unsigned long *pending __free(bitmap) = bitmap_zalloc(num_irqs, GFP_KERNEL); + if (!pending) + return ERR_PTR(-ENOMEM); work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs, &irq_sim_domain_ops, work_ctx); if (!work_ctx->domain) - goto err_free_bitmap; + return ERR_PTR(-ENOMEM); work_ctx->irq_count = num_irqs; - init_irq_work(&work_ctx->work, irq_sim_handle_irq); + work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq); + work_ctx->pending = no_free_ptr(pending); + work_ctx->user_data = data; - return work_ctx->domain; + if (ops) + memcpy(&work_ctx->ops, ops, sizeof(*ops)); -err_free_bitmap: - bitmap_free(work_ctx->pending); -err_free_work_ctx: - kfree(work_ctx); -err_out: - return ERR_PTR(-ENOMEM); + return no_free_ptr(work_ctx)->domain; } -EXPORT_SYMBOL_GPL(irq_domain_create_sim); +EXPORT_SYMBOL_GPL(irq_domain_create_sim_full); /** * irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free @@ -216,11 +247,11 @@ void irq_domain_remove_sim(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_domain_remove_sim); -static void devm_irq_domain_release_sim(struct device *dev, void *res) +static void devm_irq_domain_remove_sim(void *data) { - struct irq_sim_devres *this = res; + struct irq_domain *domain = data; - irq_domain_remove_sim(this->domain); + irq_domain_remove_sim(domain); } /** @@ -228,7 +259,7 @@ static void devm_irq_domain_release_sim(struct device *dev, void *res) * a managed device. * * @dev: Device to initialize the simulator object for. - * @fnode: struct fwnode_handle to be associated with this domain. + * @fwnode: struct fwnode_handle to be associated with this domain. * @num_irqs: Number of interrupts to allocate * * On success: return a new irq_domain object. @@ -238,20 +269,29 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev, struct fwnode_handle *fwnode, unsigned int num_irqs) { - struct irq_sim_devres *dr; + return devm_irq_domain_create_sim_full(dev, fwnode, num_irqs, + NULL, NULL); +} +EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); - dr = devres_alloc(devm_irq_domain_release_sim, - sizeof(*dr), GFP_KERNEL); - if (!dr) - return ERR_PTR(-ENOMEM); +struct irq_domain * +devm_irq_domain_create_sim_full(struct device *dev, + struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data) +{ + struct irq_domain *domain; + int ret; - dr->domain = irq_domain_create_sim(fwnode, num_irqs); - if (IS_ERR(dr->domain)) { - devres_free(dr); - return dr->domain; - } + domain = irq_domain_create_sim_full(fwnode, num_irqs, ops, data); + if (IS_ERR(domain)) + return domain; - devres_add(dev, dr); - return dr->domain; + ret = devm_add_action_or_reset(dev, devm_irq_domain_remove_sim, domain); + if (ret) + return ERR_PTR(ret); + + return domain; } -EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim); +EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim_full); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cc1a09406c6e..287830739783 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -12,10 +12,10 @@ #include <linux/export.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> -#include <linux/radix-tree.h> -#include <linux/bitmap.h> +#include <linux/maple_tree.h> #include <linux/irqdomain.h> #include <linux/sysfs.h> +#include <linux/string_choices.h> #include "internals.h" @@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str) cpulist_parse(str, irq_default_affinity); /* * Set at least the boot cpu. We don't want to end up with - * bugreports caused by random comandline masks + * bugreports caused by random commandline masks */ cpumask_set_cpu(smp_processor_id(), irq_default_affinity); return 1; @@ -93,11 +93,23 @@ static void desc_smp_init(struct irq_desc *desc, int node, #endif } +static void free_masks(struct irq_desc *desc) +{ +#ifdef CONFIG_GENERIC_PENDING_IRQ + free_cpumask_var(desc->pending_mask); +#endif + free_cpumask_var(desc->irq_common_data.affinity); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + free_cpumask_var(desc->irq_common_data.effective_affinity); +#endif +} + #else static inline int alloc_masks(struct irq_desc *desc, int node) { return 0; } static inline void desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { } +static inline void free_masks(struct irq_desc *desc) { } #endif static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, @@ -123,15 +135,106 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, desc->name = NULL; desc->owner = owner; for_each_possible_cpu(cpu) - *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; + *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { }; desc_smp_init(desc, node, affinity); } -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL_GPL(nr_irqs); +static unsigned int nr_irqs = NR_IRQS; + +/** + * irq_get_nr_irqs() - Number of interrupts supported by the system. + */ +unsigned int irq_get_nr_irqs(void) +{ + return nr_irqs; +} +EXPORT_SYMBOL_GPL(irq_get_nr_irqs); + +/** + * irq_set_nr_irqs() - Set the number of interrupts supported by the system. + * @nr: New number of interrupts. + * + * Return: @nr. + */ +unsigned int irq_set_nr_irqs(unsigned int nr) +{ + nr_irqs = nr; + + return nr; +} +EXPORT_SYMBOL_GPL(irq_set_nr_irqs); static DEFINE_MUTEX(sparse_irq_lock); -static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); +static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs, + MT_FLAGS_ALLOC_RANGE | + MT_FLAGS_LOCK_EXTERN | + MT_FLAGS_USE_RCU, + sparse_irq_lock); + +static int irq_find_free_area(unsigned int from, unsigned int cnt) +{ + MA_STATE(mas, &sparse_irqs, 0, 0); + + if (mas_empty_area(&mas, from, MAX_SPARSE_IRQS, cnt)) + return -ENOSPC; + return mas.index; +} + +static unsigned int irq_find_at_or_after(unsigned int offset) +{ + unsigned long index = offset; + struct irq_desc *desc; + + guard(rcu)(); + desc = mt_find(&sparse_irqs, &index, nr_irqs); + + return desc ? irq_desc_get_irq(desc) : nr_irqs; +} + +static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + WARN_ON(mas_store_gfp(&mas, desc, GFP_KERNEL) != 0); +} + +static void delete_irq_desc(unsigned int irq) +{ + MA_STATE(mas, &sparse_irqs, irq, irq); + mas_erase(&mas); +} + +#ifdef CONFIG_SPARSE_IRQ +static const struct kobj_type irq_kobj_type; +#endif + +static int init_desc(struct irq_desc *desc, int irq, int node, + unsigned int flags, + const struct cpumask *affinity, + struct module *owner) +{ + desc->kstat_irqs = alloc_percpu(struct irqstat); + if (!desc->kstat_irqs) + return -ENOMEM; + + if (alloc_masks(desc, node)) { + free_percpu(desc->kstat_irqs); + return -ENOMEM; + } + + raw_spin_lock_init(&desc->lock); + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + mutex_init(&desc->request_mutex); + init_waitqueue_head(&desc->wait_for_threads); + desc_set_defaults(irq, desc, node, affinity, owner); + irqd_set(&desc->irq_data, flags); + irq_resend_init(desc); +#ifdef CONFIG_SPARSE_IRQ + kobject_init(&desc->kobj, &irq_kobj_type); + init_rcu_head(&desc->rcu); +#endif + + return 0; +} #ifdef CONFIG_SPARSE_IRQ @@ -188,7 +291,7 @@ static ssize_t hwirq_show(struct kobject *kobj, raw_spin_lock_irq(&desc->lock); if (desc->irq_data.domain) - ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq); + ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq); raw_spin_unlock_irq(&desc->lock); return ret; @@ -218,8 +321,7 @@ static ssize_t wakeup_show(struct kobject *kobj, ssize_t ret = 0; raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", - irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled"); + ret = sprintf(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); raw_spin_unlock_irq(&desc->lock); return ret; @@ -251,7 +353,7 @@ static ssize_t actions_show(struct kobject *kobj, char *p = ""; raw_spin_lock_irq(&desc->lock); - for (action = desc->action; action != NULL; action = action->next) { + for_each_action_of_desc(desc, action) { ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", p, action->name); p = ","; @@ -277,7 +379,7 @@ static struct attribute *irq_attrs[] = { }; ATTRIBUTE_GROUPS(irq); -static struct kobj_type irq_kobj_type = { +static const struct kobj_type irq_kobj_type = { .release = irq_kobj_release, .sysfs_ops = &kobj_sysfs_ops, .default_groups = irq_groups, @@ -288,22 +390,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) if (irq_kobj_base) { /* * Continue even in case of failure as this is nothing - * crucial. + * crucial and failures in the late irq_sysfs_init() + * cannot be rolled back. */ if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) pr_warn("Failed to add kobject for irq %d\n", irq); + else + desc->istate |= IRQS_SYSFS; } } static void irq_sysfs_del(struct irq_desc *desc) { /* - * If irq_sysfs_init() has not yet been invoked (early boot), then - * irq_kobj_base is NULL and the descriptor was never added. - * kobject_del() complains about a object with no parent, so make - * it conditional. + * Only invoke kobject_del() when kobject_add() was successfully + * invoked for the descriptor. This covers both early boot, where + * sysfs is not initialized yet, and the case of a failed + * kobject_add() invocation. */ - if (irq_kobj_base) + if (desc->istate & IRQS_SYSFS) kobject_del(&desc->kobj); } @@ -332,7 +437,7 @@ postcore_initcall(irq_sysfs_init); #else /* !CONFIG_SYSFS */ -static struct kobj_type irq_kobj_type = { +static const struct kobj_type irq_kobj_type = { .release = irq_kobj_release, }; @@ -341,41 +446,14 @@ static void irq_sysfs_del(struct irq_desc *desc) {} #endif /* CONFIG_SYSFS */ -static RADIX_TREE(irq_desc_tree, GFP_KERNEL); - -static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) -{ - radix_tree_insert(&irq_desc_tree, irq, desc); -} - struct irq_desc *irq_to_desc(unsigned int irq) { - return radix_tree_lookup(&irq_desc_tree, irq); + return mtree_load(&sparse_irqs, irq); } #ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE EXPORT_SYMBOL_GPL(irq_to_desc); #endif -static void delete_irq_desc(unsigned int irq) -{ - radix_tree_delete(&irq_desc_tree, irq); -} - -#ifdef CONFIG_SMP -static void free_masks(struct irq_desc *desc) -{ -#ifdef CONFIG_GENERIC_PENDING_IRQ - free_cpumask_var(desc->pending_mask); -#endif - free_cpumask_var(desc->irq_common_data.affinity); -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - free_cpumask_var(desc->irq_common_data.effective_affinity); -#endif -} -#else -static inline void free_masks(struct irq_desc *desc) { } -#endif - void irq_lock_sparse(void) { mutex_lock(&sparse_irq_lock); @@ -391,34 +469,19 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, struct module *owner) { struct irq_desc *desc; + int ret; desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node); if (!desc) return NULL; - /* allocate based on nr_cpu_ids */ - desc->kstat_irqs = alloc_percpu(unsigned int); - if (!desc->kstat_irqs) - goto err_desc; - if (alloc_masks(desc, node)) - goto err_kstat; - - raw_spin_lock_init(&desc->lock); - lockdep_set_class(&desc->lock, &irq_desc_lock_class); - mutex_init(&desc->request_mutex); - init_rcu_head(&desc->rcu); - - desc_set_defaults(irq, desc, node, affinity, owner); - irqd_set(&desc->irq_data, flags); - kobject_init(&desc->kobj, &irq_kobj_type); + ret = init_desc(desc, irq, node, flags, affinity, owner); + if (unlikely(ret)) { + kfree(desc); + return NULL; + } return desc; - -err_kstat: - free_percpu(desc->kstat_irqs); -err_desc: - kfree(desc); - return NULL; } static void irq_kobj_release(struct kobject *kobj) @@ -489,6 +552,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; @@ -501,7 +565,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, irq_sysfs_add(start + i, desc); irq_add_debugfs_entry(start + i, desc); } - bitmap_set(allocated_irqs, start, cnt); return start; err: @@ -512,7 +575,7 @@ err: static int irq_expand_nr_irqs(unsigned int nr) { - if (nr > IRQ_BITMAP_BITS) + if (nr > MAX_SPARSE_IRQS) return -ENOMEM; nr_irqs = nr; return 0; @@ -530,18 +593,17 @@ int __init early_irq_init(void) printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n", NR_IRQS, nr_irqs, initcnt); - if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) - nr_irqs = IRQ_BITMAP_BITS; + if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS)) + nr_irqs = MAX_SPARSE_IRQS; - if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) - initcnt = IRQ_BITMAP_BITS; + if (WARN_ON(initcnt > MAX_SPARSE_IRQS)) + initcnt = MAX_SPARSE_IRQS; if (initcnt > nr_irqs) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { desc = alloc_desc(i, node, 0, NULL, NULL); - set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } return arch_early_irq_init(); @@ -560,24 +622,29 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { int __init early_irq_init(void) { int count, i, node = first_online_node; - struct irq_desc *desc; + int ret; init_irq_default_affinity(); printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS); - desc = irq_desc; count = ARRAY_SIZE(irq_desc); for (i = 0; i < count; i++) { - desc[i].kstat_irqs = alloc_percpu(unsigned int); - alloc_masks(&desc[i], node); - raw_spin_lock_init(&desc[i].lock); - lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - mutex_init(&desc[i].request_mutex); - desc_set_defaults(i, &desc[i], node, NULL, NULL); + ret = init_desc(irq_desc + i, i, node, 0, NULL, NULL); + if (unlikely(ret)) + goto __free_desc_res; } + return arch_early_irq_init(); + +__free_desc_res: + while (--i >= 0) { + free_masks(irq_desc + i); + free_percpu(irq_desc[i].kstat_irqs); + } + + return ret; } struct irq_desc *irq_to_desc(unsigned int irq) @@ -594,6 +661,7 @@ static void free_desc(unsigned int irq) raw_spin_lock_irqsave(&desc->lock, flags); desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); + delete_irq_desc(irq); } static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, @@ -606,8 +674,8 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, struct irq_desc *desc = irq_to_desc(start + i); desc->owner = owner; + irq_insert_desc(start + i, desc); } - bitmap_set(allocated_irqs, start, cnt); return start; } @@ -619,7 +687,7 @@ static int irq_expand_nr_irqs(unsigned int nr) void irq_mark_irq(unsigned int irq) { mutex_lock(&sparse_irq_lock); - bitmap_set(allocated_irqs, irq, 1); + irq_insert_desc(irq, irq_desc + irq); mutex_unlock(&sparse_irq_lock); } @@ -632,106 +700,117 @@ void irq_init_desc(unsigned int irq) #endif /* !CONFIG_SPARSE_IRQ */ -/** - * generic_handle_irq - Invoke the handler for a particular irq - * @irq: The irq number to handle - * - */ -int generic_handle_irq(unsigned int irq) +int handle_irq_desc(struct irq_desc *desc) { - struct irq_desc *desc = irq_to_desc(irq); struct irq_data *data; if (!desc) return -EINVAL; data = irq_desc_get_irq_data(desc); - if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + if (WARN_ON_ONCE(!in_hardirq() && irqd_is_handle_enforce_irqctx(data))) return -EPERM; generic_handle_irq_desc(desc); return 0; } -EXPORT_SYMBOL_GPL(generic_handle_irq); -#ifdef CONFIG_HANDLE_DOMAIN_IRQ /** - * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain - * @domain: The domain where to perform the lookup - * @hwirq: The HW irq number to convert to a logical one - * @lookup: Whether to perform the domain lookup or not - * @regs: Register file coming from the low-level handling code + * generic_handle_irq - Invoke the handler for a particular irq + * @irq: The irq number to handle * * Returns: 0 on success, or -EINVAL if conversion has failed - */ -int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, - bool lookup, struct pt_regs *regs) + * + * This function must be called from an IRQ context with irq regs + * initialized. + */ +int generic_handle_irq(unsigned int irq) { - struct pt_regs *old_regs = set_irq_regs(regs); - unsigned int irq = hwirq; - int ret = 0; - - irq_enter(); - -#ifdef CONFIG_IRQ_DOMAIN - if (lookup) - irq = irq_find_mapping(domain, hwirq); -#endif + return handle_irq_desc(irq_to_desc(irq)); +} +EXPORT_SYMBOL_GPL(generic_handle_irq); - /* - * Some hardware gives randomly wrong interrupts. Rather - * than crashing, do something sensible. - */ - if (unlikely(!irq || irq >= nr_irqs)) { - ack_bad_irq(irq); - ret = -EINVAL; - } else { - generic_handle_irq(irq); - } +/** + * generic_handle_irq_safe - Invoke the handler for a particular irq from any + * context. + * @irq: The irq number to handle + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process context). It + * will report an error if not invoked from IRQ context and the irq has been + * marked to enforce IRQ-context only. + */ +int generic_handle_irq_safe(unsigned int irq) +{ + unsigned long flags; + int ret; - irq_exit(); - set_irq_regs(old_regs); + local_irq_save(flags); + ret = handle_irq_desc(irq_to_desc(irq)); + local_irq_restore(flags); return ret; } +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); #ifdef CONFIG_IRQ_DOMAIN /** - * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain + * generic_handle_domain_irq - Invoke the handler for a HW irq belonging + * to a domain. * @domain: The domain where to perform the lookup * @hwirq: The HW irq number to convert to a logical one - * @regs: Register file coming from the low-level handling code - * - * This function must be called from an NMI context. * * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an IRQ context with irq regs + * initialized. */ -int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, - struct pt_regs *regs) +int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq) { - struct pt_regs *old_regs = set_irq_regs(regs); - unsigned int irq; - int ret = 0; - - /* - * NMI context needs to be setup earlier in order to deal with tracing. - */ - WARN_ON(!in_nmi()); - - irq = irq_find_mapping(domain, hwirq); + return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); +} +EXPORT_SYMBOL_GPL(generic_handle_domain_irq); - /* - * ack_bad_irq is not NMI-safe, just report - * an invalid interrupt. - */ - if (likely(irq)) - generic_handle_irq(irq); - else - ret = -EINVAL; + /** + * generic_handle_irq_safe - Invoke the handler for a HW irq belonging + * to a domain from any context. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, a negative value on error. + * + * This function can be called from any context (IRQ or process + * context). If the interrupt is marked as 'enforce IRQ-context only' then + * the function must be invoked from hard interrupt context. + */ +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq) +{ + unsigned long flags; + int ret; - set_irq_regs(old_regs); + local_irq_save(flags); + ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq)); + local_irq_restore(flags); return ret; } -#endif +EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe); + +/** + * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging + * to a domain. + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * + * Returns: 0 on success, or -EINVAL if conversion has failed + * + * This function must be called from an NMI context with irq regs + * initialized. + **/ +int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq) +{ + WARN_ON_ONCE(!in_nmi()); + return handle_irq_desc(irq_resolve_mapping(domain, hwirq)); +} #endif /* Dynamic interrupt handling */ @@ -752,7 +831,6 @@ void irq_free_descs(unsigned int from, unsigned int cnt) for (i = 0; i < cnt; i++) free_desc(from + i); - bitmap_clear(allocated_irqs, from, cnt); mutex_unlock(&sparse_irq_lock); } EXPORT_SYMBOL_GPL(irq_free_descs); @@ -794,8 +872,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, mutex_lock(&sparse_irq_lock); - start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS, - from, cnt, 0); + start = irq_find_free_area(from, cnt); ret = -EEXIST; if (irq >=0 && start != irq) goto unlock; @@ -820,7 +897,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs); */ unsigned int irq_get_next_irq(unsigned int offset) { - return find_next_bit(allocated_irqs, nr_irqs, offset); + return irq_find_at_or_after(offset); } struct irq_desc * @@ -860,10 +937,7 @@ int irq_set_percpu_devid_partition(unsigned int irq, { struct irq_desc *desc = irq_to_desc(irq); - if (!desc) - return -EINVAL; - - if (desc->percpu_enabled) + if (!desc || desc->percpu_enabled) return -EINVAL; desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL); @@ -871,10 +945,7 @@ int irq_set_percpu_devid_partition(unsigned int irq, if (!desc->percpu_enabled) return -ENOMEM; - if (affinity) - desc->percpu_affinity = affinity; - else - desc->percpu_affinity = cpu_possible_mask; + desc->percpu_affinity = affinity ? : cpu_possible_mask; irq_set_percpu_devid_flags(irq); return 0; @@ -917,33 +988,58 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); - return desc && desc->kstat_irqs ? - *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; + return desc && desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } -static bool irq_is_nmi(struct irq_desc *desc) +unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask) { - return desc->istate & IRQS_NMI; -} - -static unsigned int kstat_irqs(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); unsigned int sum = 0; int cpu; - if (!desc || !desc->kstat_irqs) - return 0; if (!irq_settings_is_per_cpu_devid(desc) && !irq_settings_is_per_cpu(desc) && !irq_is_nmi(desc)) return data_race(desc->tot_count); - for_each_possible_cpu(cpu) - sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu)); + for_each_cpu(cpu, cpumask) + sum += data_race(per_cpu(desc->kstat_irqs->cnt, cpu)); return sum; } +static unsigned int kstat_irqs(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->kstat_irqs) + return 0; + return kstat_irqs_desc(desc, cpu_possible_mask); +} + +#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT + +void kstat_snapshot_irqs(void) +{ + struct irq_desc *desc; + unsigned int irq; + + for_each_irq_desc(irq, desc) { + if (!desc->kstat_irqs) + continue; + this_cpu_write(desc->kstat_irqs->ref, this_cpu_read(desc->kstat_irqs->cnt)); + } +} + +unsigned int kstat_get_irq_since_snapshot(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !desc->kstat_irqs) + return 0; + return this_cpu_read(desc->kstat_irqs->cnt) - this_cpu_read(desc->kstat_irqs->ref); +} + +#endif + /** * kstat_irqs_usr - Get the statistics for an interrupt from thread context * @irq: The interrupt number diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 6aacd342cd14..ec6d8e72d980 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -25,7 +25,11 @@ static DEFINE_MUTEX(irq_domain_mutex); static struct irq_domain *irq_default_domain; +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity); static void irq_domain_check_hierarchy(struct irq_domain *domain); +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq); struct irqchip_fwid { struct fwnode_handle fwnode; @@ -62,7 +66,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); * @name: Optional user provided domain name * @pa: Optional user-provided physical address * - * Allocate a struct irqchip_fwid, and return a poiner to the embedded + * Allocate a struct irqchip_fwid, and return a pointer to the embedded * fwnode_handle (or NULL on failure). * * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are @@ -107,6 +111,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode); /** * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle + * @fwnode: fwnode_handle to free * * Free a fwnode_handle allocated with irq_domain_alloc_fwnode. */ @@ -114,7 +119,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid; - if (WARN_ON(!is_fwnode_irqchip(fwnode))) + if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) return; fwid = container_of(fwnode, struct irqchip_fwid, fwnode); @@ -123,108 +128,251 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) } EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); -/** - * __irq_domain_add() - Allocate a new irq_domain data structure - * @fwnode: firmware node for the interrupt controller - * @size: Size of linear map; 0 for radix mapping only - * @hwirq_max: Maximum number of interrupts supported by controller - * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no - * direct mapping - * @ops: domain callbacks - * @host_data: Controller private data pointer - * - * Allocates and initializes an irq_domain structure. - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data) +static int alloc_name(struct irq_domain *domain, char *base, enum irq_domain_bus_token bus_token) { - struct irqchip_fwid *fwid; - struct irq_domain *domain; + if (bus_token == DOMAIN_BUS_ANY) + domain->name = kasprintf(GFP_KERNEL, "%s", base); + else + domain->name = kasprintf(GFP_KERNEL, "%s-%d", base, bus_token); + if (!domain->name) + return -ENOMEM; + + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} + +static int alloc_fwnode_name(struct irq_domain *domain, const struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token, const char *suffix) +{ + const char *sep = suffix ? "-" : ""; + const char *suf = suffix ? : ""; + char *name; + + if (bus_token == DOMAIN_BUS_ANY) + name = kasprintf(GFP_KERNEL, "%pfw%s%s", fwnode, sep, suf); + else + name = kasprintf(GFP_KERNEL, "%pfw%s%s-%d", fwnode, sep, suf, bus_token); + if (!name) + return -ENOMEM; + + /* + * fwnode paths contain '/', which debugfs is legitimately unhappy + * about. Replace them with ':', which does the trick and is not as + * offensive as '\'... + */ + domain->name = strreplace(name, '/', ':'); + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} +static int alloc_unknown_name(struct irq_domain *domain, enum irq_domain_bus_token bus_token) +{ static atomic_t unknown_domains; + int id = atomic_inc_return(&unknown_domains); - domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), - GFP_KERNEL, of_node_to_nid(to_of_node(fwnode))); - if (!domain) - return NULL; + if (bus_token == DOMAIN_BUS_ANY) + domain->name = kasprintf(GFP_KERNEL, "unknown-%d", id); + else + domain->name = kasprintf(GFP_KERNEL, "unknown-%d-%d", id, bus_token); + if (!domain->name) + return -ENOMEM; + + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + return 0; +} + +static int irq_domain_set_name(struct irq_domain *domain, const struct irq_domain_info *info) +{ + enum irq_domain_bus_token bus_token = info->bus_token; + const struct fwnode_handle *fwnode = info->fwnode; if (is_fwnode_irqchip(fwnode)) { - fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); + + /* + * The name_suffix is only intended to be used to avoid a name + * collision when multiple domains are created for a single + * device and the name is picked using a real device node. + * (Typical use-case is regmap-IRQ controllers for devices + * providing more than one physical IRQ.) There should be no + * need to use name_suffix with irqchip-fwnode. + */ + if (info->name_suffix) + return -EINVAL; switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: - domain->fwnode = fwnode; - domain->name = kstrdup(fwid->name, GFP_KERNEL); - if (!domain->name) { - kfree(domain); - return NULL; - } - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; - break; + return alloc_name(domain, fwid->name, bus_token); default: - domain->fwnode = fwnode; domain->name = fwid->name; - break; + if (bus_token != DOMAIN_BUS_ANY) + return alloc_name(domain, fwid->name, bus_token); } - } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || - is_software_node(fwnode)) { - char *name; - /* - * fwnode paths contain '/', which debugfs is legitimately - * unhappy about. Replace them with ':', which does - * the trick and is not as offensive as '\'... - */ - name = kasprintf(GFP_KERNEL, "%pfw", fwnode); - if (!name) { - kfree(domain); - return NULL; - } + } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || is_software_node(fwnode)) { + return alloc_fwnode_name(domain, fwnode, bus_token, info->name_suffix); + } - strreplace(name, '/', ':'); + if (domain->name) + return 0; - domain->name = name; - domain->fwnode = fwnode; - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; - } + if (fwnode) + pr_err("Invalid fwnode type for irqdomain\n"); + return alloc_unknown_name(domain, bus_token); +} - if (!domain->name) { - if (fwnode) - pr_err("Invalid fwnode type for irqdomain\n"); - domain->name = kasprintf(GFP_KERNEL, "unknown-%d", - atomic_inc_return(&unknown_domains)); - if (!domain->name) { - kfree(domain); - return NULL; - } - domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; +static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info) +{ + struct irq_domain *domain; + int err; + + if (WARN_ON((info->size && info->direct_max) || + (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && info->direct_max) || + (info->direct_max && info->direct_max != info->hwirq_max))) + return ERR_PTR(-EINVAL); + + domain = kzalloc_node(struct_size(domain, revmap, info->size), + GFP_KERNEL, of_node_to_nid(to_of_node(info->fwnode))); + if (!domain) + return ERR_PTR(-ENOMEM); + + err = irq_domain_set_name(domain, info); + if (err) { + kfree(domain); + return ERR_PTR(err); } - fwnode_handle_get(fwnode); + domain->fwnode = fwnode_handle_get(info->fwnode); + fwnode_dev_initialized(domain->fwnode, true); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); - mutex_init(&domain->revmap_tree_mutex); - domain->ops = ops; - domain->host_data = host_data; - domain->hwirq_max = hwirq_max; - domain->revmap_size = size; - domain->revmap_direct_max_irq = direct_max; + domain->ops = info->ops; + domain->host_data = info->host_data; + domain->bus_token = info->bus_token; + domain->hwirq_max = info->hwirq_max; + + if (info->direct_max) + domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP; + + domain->revmap_size = info->size; + + /* + * Hierarchical domains use the domain lock of the root domain + * (innermost domain). + * + * For non-hierarchical domains (as for root domains), the root + * pointer is set to the domain itself so that &domain->root->mutex + * always points to the right lock. + */ + mutex_init(&domain->mutex); + domain->root = domain; + irq_domain_check_hierarchy(domain); + return domain; +} + +static void __irq_domain_publish(struct irq_domain *domain) +{ mutex_lock(&irq_domain_mutex); debugfs_add_domain_dir(domain); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); pr_debug("Added domain %s\n", domain->name); +} + +static void irq_domain_free(struct irq_domain *domain) +{ + fwnode_dev_initialized(domain->fwnode, false); + fwnode_handle_put(domain->fwnode); + if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) + kfree(domain->name); + kfree(domain); +} + +static void irq_domain_instantiate_descs(const struct irq_domain_info *info) +{ + if (!IS_ENABLED(CONFIG_SPARSE_IRQ)) + return; + + if (irq_alloc_descs(info->virq_base, info->virq_base, info->size, + of_node_to_nid(to_of_node(info->fwnode))) < 0) { + pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", + info->virq_base); + } +} + +static struct irq_domain *__irq_domain_instantiate(const struct irq_domain_info *info, + bool cond_alloc_descs, bool force_associate) +{ + struct irq_domain *domain; + int err; + + domain = __irq_domain_create(info); + if (IS_ERR(domain)) + return domain; + + domain->flags |= info->domain_flags; + domain->exit = info->exit; + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (info->parent) { + domain->root = info->parent->root; + domain->parent = info->parent; + } +#endif + + if (info->dgc_info) { + err = irq_domain_alloc_generic_chips(domain, info->dgc_info); + if (err) + goto err_domain_free; + } + + if (info->init) { + err = info->init(domain); + if (err) + goto err_domain_gc_remove; + } + + __irq_domain_publish(domain); + + if (cond_alloc_descs && info->virq_base > 0) + irq_domain_instantiate_descs(info); + + /* + * Legacy interrupt domains have a fixed Linux interrupt number + * associated. Other interrupt domains can request association by + * providing a Linux interrupt number > 0. + */ + if (force_associate || info->virq_base > 0) { + irq_domain_associate_many(domain, info->virq_base, info->hwirq_base, + info->size - info->hwirq_base); + } + return domain; + +err_domain_gc_remove: + if (info->dgc_info) + irq_domain_remove_generic_chips(domain); +err_domain_free: + irq_domain_free(domain); + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(__irq_domain_add); + +/** + * irq_domain_instantiate() - Instantiate a new irq domain data structure + * @info: Domain information pointer pointing to the information for this domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) +{ + return __irq_domain_instantiate(info, false, false); +} +EXPORT_SYMBOL_GPL(irq_domain_instantiate); /** * irq_domain_remove() - Remove an irq domain. @@ -236,6 +384,9 @@ EXPORT_SYMBOL_GPL(__irq_domain_add); */ void irq_domain_remove(struct irq_domain *domain) { + if (domain->exit) + domain->exit(domain); + mutex_lock(&irq_domain_mutex); debugfs_remove_domain_dir(domain); @@ -251,12 +402,11 @@ void irq_domain_remove(struct irq_domain *domain) mutex_unlock(&irq_domain_mutex); - pr_debug("Removed domain %s\n", domain->name); + if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC) + irq_domain_remove_generic_chips(domain); - fwnode_handle_put(domain->fwnode); - if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) - kfree(domain->name); - kfree(domain); + pr_debug("Removed domain %s\n", domain->name); + irq_domain_free(domain); } EXPORT_SYMBOL_GPL(irq_domain_remove); @@ -293,8 +443,8 @@ void irq_domain_update_bus_token(struct irq_domain *domain, EXPORT_SYMBOL_GPL(irq_domain_update_bus_token); /** - * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs - * @of_node: pointer to interrupt controller's device tree node. + * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs + * @fwnode: firmware node for the interrupt controller * @size: total number of irqs in mapping * @first_irq: first number of irq block assigned to the domain, * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then @@ -310,33 +460,25 @@ EXPORT_SYMBOL_GPL(irq_domain_update_bus_token); * irqs get mapped dynamically on the fly. However, if the controller requires * static virq assignments (non-DT boot) then it will set that up correctly. */ -struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data) +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, + unsigned int size, + unsigned int first_irq, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain *domain; - - domain = __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data); - if (!domain) - return NULL; - - if (first_irq > 0) { - if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { - /* attempt to allocated irq_descs */ - int rc = irq_alloc_descs(first_irq, first_irq, size, - of_node_to_nid(of_node)); - if (rc < 0) - pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", - first_irq); - } - irq_domain_associate_many(domain, first_irq, 0, size); - } + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .virq_base = first_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *domain = __irq_domain_instantiate(&info, true, false); - return domain; + return IS_ERR(domain) ? NULL : domain; } -EXPORT_SYMBOL_GPL(irq_domain_add_simple); +EXPORT_SYMBOL_GPL(irq_domain_create_simple); /** * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. @@ -372,13 +514,18 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain *domain; + struct irq_domain_info info = { + .fwnode = fwnode, + .size = first_hwirq + size, + .hwirq_max = first_hwirq + size, + .hwirq_base = first_hwirq, + .virq_base = first_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *domain = __irq_domain_instantiate(&info, false, true); - domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data); - if (domain) - irq_domain_associate_many(domain, first_irq, first_hwirq, size); - - return domain; + return IS_ERR(domain) ? NULL : domain; } EXPORT_SYMBOL_GPL(irq_domain_create_legacy); @@ -394,7 +541,8 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, struct fwnode_handle *fwnode = fwspec->fwnode; int rc; - /* We might want to match the legacy controller last since + /* + * We might want to match the legacy controller last since * it might potentially be set to match all interrupts in * the absence of a device node. This isn't a problem so far * yet though... @@ -405,7 +553,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, */ mutex_lock(&irq_domain_mutex); list_for_each_entry(h, &irq_domain_list, link) { - if (h->ops->select && fwspec->param_count) + if (h->ops->select && bus_token != DOMAIN_BUS_ANY) rc = h->ops->select(h, fwspec, bus_token); else if (h->ops->match) rc = h->ops->match(h, to_of_node(fwnode), bus_token); @@ -425,31 +573,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); /** - * irq_domain_check_msi_remap - Check whether all MSI irq domains implement - * IRQ remapping - * - * Return: false if any MSI irq domain does not support IRQ remapping, - * true otherwise (including if there is no MSI irq domain) - */ -bool irq_domain_check_msi_remap(void) -{ - struct irq_domain *h; - bool ret = true; - - mutex_lock(&irq_domain_mutex); - list_for_each_entry(h, &irq_domain_list, link) { - if (irq_domain_is_msi(h) && - !irq_domain_hierarchical_is_msi_remap(h)) { - ret = false; - break; - } - } - mutex_unlock(&irq_domain_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap); - -/** * irq_set_default_host() - Set a "default" irq domain * @domain: default domain pointer * @@ -479,30 +602,45 @@ struct irq_domain *irq_get_default_host(void) { return irq_default_domain; } +EXPORT_SYMBOL_GPL(irq_get_default_host); + +static bool irq_domain_is_nomap(struct irq_domain *domain) +{ + return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && + (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP); +} static void irq_domain_clear_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - if (hwirq < domain->revmap_size) { - domain->linear_revmap[hwirq] = 0; - } else { - mutex_lock(&domain->revmap_tree_mutex); + lockdep_assert_held(&domain->root->mutex); + + if (irq_domain_is_nomap(domain)) + return; + + if (hwirq < domain->revmap_size) + rcu_assign_pointer(domain->revmap[hwirq], NULL); + else radix_tree_delete(&domain->revmap_tree, hwirq); - mutex_unlock(&domain->revmap_tree_mutex); - } } static void irq_domain_set_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, struct irq_data *irq_data) { - if (hwirq < domain->revmap_size) { - domain->linear_revmap[hwirq] = irq_data->irq; - } else { - mutex_lock(&domain->revmap_tree_mutex); + /* + * This also makes sure that all domains point to the same root when + * called from irq_domain_insert_irq() for each domain in a hierarchy. + */ + lockdep_assert_held(&domain->root->mutex); + + if (irq_domain_is_nomap(domain)) + return; + + if (hwirq < domain->revmap_size) + rcu_assign_pointer(domain->revmap[hwirq], irq_data); + else radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); - mutex_unlock(&domain->revmap_tree_mutex); - } } static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) @@ -515,6 +653,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) return; hwirq = irq_data->hwirq; + + mutex_lock(&domain->root->mutex); + irq_set_status_flags(irq, IRQ_NOREQUEST); /* remove chip and handler */ @@ -534,10 +675,12 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) /* Clear reverse map for this hwirq */ irq_domain_clear_mapping(domain, hwirq); + + mutex_unlock(&domain->root->mutex); } -int irq_domain_associate(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq) +static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) { struct irq_data *irq_data = irq_get_irq_data(virq); int ret; @@ -550,7 +693,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) return -EINVAL; - mutex_lock(&irq_domain_mutex); irq_data->hwirq = hwirq; irq_data->domain = domain; if (domain->ops->map) { @@ -567,23 +709,29 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq, } irq_data->domain = NULL; irq_data->hwirq = 0; - mutex_unlock(&irq_domain_mutex); return ret; } - - /* If not already assigned, give the domain the chip's name */ - if (!domain->name && irq_data->chip) - domain->name = irq_data->chip->name; } domain->mapcount++; irq_domain_set_mapping(domain, hwirq, irq_data); - mutex_unlock(&irq_domain_mutex); irq_clear_status_flags(virq, IRQ_NOREQUEST); return 0; } + +int irq_domain_associate(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + int ret; + + mutex_lock(&domain->root->mutex); + ret = irq_domain_associate_locked(domain, virq, hwirq); + mutex_unlock(&domain->root->mutex); + + return ret; +} EXPORT_SYMBOL_GPL(irq_domain_associate); void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, @@ -596,12 +744,12 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, of_node_full_name(of_node), irq_base, (int)hwirq_base, count); - for (i = 0; i < count; i++) { + for (i = 0; i < count; i++) irq_domain_associate(domain, irq_base + i, hwirq_base + i); - } } EXPORT_SYMBOL_GPL(irq_domain_associate_many); +#ifdef CONFIG_IRQ_DOMAIN_NOMAP /** * irq_create_direct_mapping() - Allocate an irq for direct mapping * @domain: domain to allocate the irq for or NULL for default domain @@ -626,9 +774,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) pr_debug("create_direct virq allocation failed\n"); return 0; } - if (virq >= domain->revmap_direct_max_irq) { - pr_err("ERROR: no free irqs available below %i maximum\n", - domain->revmap_direct_max_irq); + if (virq >= domain->hwirq_max) { + pr_err("ERROR: no free irqs available below %lu maximum\n", + domain->hwirq_max); irq_free_desc(virq); return 0; } @@ -642,6 +790,35 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) return virq; } EXPORT_SYMBOL_GPL(irq_create_direct_mapping); +#endif + +static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) +{ + struct device_node *of_node = irq_domain_get_of_node(domain); + int virq; + + pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + + /* Allocate a virtual interrupt number */ + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), + affinity); + if (virq <= 0) { + pr_debug("-> virq allocation failed\n"); + return 0; + } + + if (irq_domain_associate_locked(domain, virq, hwirq)) { + irq_free_desc(virq); + return 0; + } + + pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", + hwirq, of_node_full_name(of_node), virq); + + return virq; +} /** * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space @@ -655,87 +832,36 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping); * on the number returned from that call. */ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, - irq_hw_number_t hwirq, - const struct irq_affinity_desc *affinity) + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) { - struct device_node *of_node; int virq; - pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); - - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) { WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); return 0; } - pr_debug("-> using domain @%p\n", domain); - of_node = irq_domain_get_of_node(domain); + mutex_lock(&domain->root->mutex); /* Check if mapping already exists */ virq = irq_find_mapping(domain, hwirq); if (virq) { - pr_debug("-> existing mapping on virq %d\n", virq); - return virq; + pr_debug("existing mapping on virq %d\n", virq); + goto out; } - /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), - affinity); - if (virq <= 0) { - pr_debug("-> virq allocation failed\n"); - return 0; - } - - if (irq_domain_associate(domain, virq, hwirq)) { - irq_free_desc(virq); - return 0; - } - - pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", - hwirq, of_node_full_name(of_node), virq); + virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity); +out: + mutex_unlock(&domain->root->mutex); return virq; } EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); -/** - * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs - * @domain: domain owning the interrupt range - * @irq_base: beginning of linux IRQ range - * @hwirq_base: beginning of hardware IRQ range - * @count: Number of interrupts to map - * - * This routine is used for allocating and mapping a range of hardware - * irqs to linux irqs where the linux irq numbers are at pre-defined - * locations. For use by controllers that already have static mappings - * to insert in to the domain. - * - * Non-linear users can use irq_create_identity_mapping() for IRQ-at-a-time - * domain insertion. - * - * 0 is returned upon success, while any failure to establish a static - * mapping is treated as an error. - */ -int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base, - irq_hw_number_t hwirq_base, int count) -{ - struct device_node *of_node; - int ret; - - of_node = irq_domain_get_of_node(domain); - ret = irq_alloc_descs(irq_base, irq_base, count, - of_node_to_nid(of_node)); - if (unlikely(ret < 0)) - return ret; - - irq_domain_associate_many(domain, irq_base, hwirq_base, count); - return 0; -} -EXPORT_SYMBOL_GPL(irq_create_strict_mappings); - static int irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, irq_hw_number_t *hwirq, unsigned int *type) @@ -754,9 +880,8 @@ static int irq_domain_translate(struct irq_domain *d, return 0; } -static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, - unsigned int count, - struct irq_fwspec *fwspec) +void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec) { int i; @@ -766,6 +891,7 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, for (i = 0; i < count; i++) fwspec->param[i] = args[i]; } +EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) { @@ -799,6 +925,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) type &= IRQ_TYPE_SENSE_MASK; + mutex_lock(&domain->root->mutex); + /* * If we've already configured this interrupt, * don't do it again, or hell will break loose. @@ -811,7 +939,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) * interrupt number. */ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) - return virq; + goto out; /* * If the trigger type has not been set yet, then set @@ -819,40 +947,50 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) */ if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { irq_data = irq_get_irq_data(virq); - if (!irq_data) - return 0; + if (!irq_data) { + virq = 0; + goto out; + } irqd_set_trigger_type(irq_data, type); - return virq; + goto out; } pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); - return 0; + virq = 0; + goto out; } if (irq_domain_is_hierarchy(domain)) { - virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec); - if (virq <= 0) - return 0; + if (irq_domain_is_msi_device(domain)) { + mutex_unlock(&domain->root->mutex); + virq = msi_device_domain_alloc_wired(domain, hwirq, type); + mutex_lock(&domain->root->mutex); + } else + virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE, + fwspec, false, NULL); + if (virq <= 0) { + virq = 0; + goto out; + } } else { /* Create mapping */ - virq = irq_create_mapping(domain, hwirq); + virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL); if (!virq) - return virq; + goto out; } irq_data = irq_get_irq_data(virq); - if (!irq_data) { - if (irq_domain_is_hierarchy(domain)) - irq_domain_free_irqs(virq, 1); - else - irq_dispose_mapping(virq); - return 0; + if (WARN_ON(!irq_data)) { + virq = 0; + goto out; } /* Store trigger type */ irqd_set_trigger_type(irq_data, type); +out: + mutex_unlock(&domain->root->mutex); return virq; } @@ -875,10 +1013,11 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping); */ void irq_dispose_mapping(unsigned int virq) { - struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *irq_data; struct irq_domain *domain; - if (!virq || !irq_data) + irq_data = virq ? irq_get_irq_data(virq) : NULL; + if (!irq_data) return; domain = irq_data->domain; @@ -886,7 +1025,7 @@ void irq_dispose_mapping(unsigned int virq) return; if (irq_domain_is_hierarchy(domain)) { - irq_domain_free_irqs(virq, 1); + irq_domain_free_one_irq(domain, virq); } else { irq_domain_disassociate(domain, virq); irq_free_desc(virq); @@ -895,40 +1034,64 @@ void irq_dispose_mapping(unsigned int virq) EXPORT_SYMBOL_GPL(irq_dispose_mapping); /** - * irq_find_mapping() - Find a linux irq from a hw irq number. + * __irq_resolve_mapping() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space + * @irq: optional pointer to return the Linux irq if required + * + * Returns the interrupt descriptor. */ -unsigned int irq_find_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq) +struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq, + unsigned int *irq) { + struct irq_desc *desc = NULL; struct irq_data *data; - /* Look for default domain if nececssary */ + /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) - return 0; + return desc; + + if (irq_domain_is_nomap(domain)) { + if (hwirq < domain->hwirq_max) { + data = irq_domain_get_irq_data(domain, hwirq); + if (data && data->hwirq == hwirq) + desc = irq_data_to_desc(data); + if (irq && desc) + *irq = hwirq; + } - if (hwirq < domain->revmap_direct_max_irq) { - data = irq_domain_get_irq_data(domain, hwirq); - if (data && data->hwirq == hwirq) - return hwirq; + return desc; } + rcu_read_lock(); /* Check if the hwirq is in the linear revmap. */ if (hwirq < domain->revmap_size) - return domain->linear_revmap[hwirq]; + data = rcu_dereference(domain->revmap[hwirq]); + else + data = radix_tree_lookup(&domain->revmap_tree, hwirq); + + if (likely(data)) { + desc = irq_data_to_desc(data); + if (irq) + *irq = data->irq; + } - rcu_read_lock(); - data = radix_tree_lookup(&domain->revmap_tree, hwirq); rcu_read_unlock(); - return data ? data->irq : 0; + return desc; } -EXPORT_SYMBOL_GPL(irq_find_mapping); +EXPORT_SYMBOL_GPL(__irq_resolve_mapping); /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with one cell * bindings where the cell value maps directly to the hwirq number. @@ -947,6 +1110,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); /** * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number @@ -965,6 +1134,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); /** * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with either one * or two cell bindings where the cell values map directly to the hwirq number @@ -998,6 +1173,10 @@ EXPORT_SYMBOL_GPL(irq_domain_simple_ops); /** * irq_domain_translate_onecell() - Generic translate for direct one cell * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type */ int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, @@ -1015,6 +1194,10 @@ EXPORT_SYMBOL_GPL(irq_domain_translate_onecell); /** * irq_domain_translate_twocell() - Generic translate for direct two cell * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number @@ -1042,7 +1225,7 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, affinity); } else { - hint = hwirq % nr_irqs; + hint = hwirq % irq_get_nr_irqs(); if (hint == 0) hint++; virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, @@ -1091,18 +1274,22 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain *domain; + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + .domain_flags = flags, + .parent = parent, + }; + struct irq_domain *d; - if (size) - domain = irq_domain_create_linear(fwnode, size, ops, host_data); - else - domain = irq_domain_create_tree(fwnode, ops, host_data); - if (domain) { - domain->parent = parent; - domain->flags |= flags; - } + if (!info.size) + info.hwirq_max = ~0U; - return domain; + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy); @@ -1115,10 +1302,6 @@ static void irq_domain_insert_irq(int virq) domain->mapcount++; irq_domain_set_mapping(domain, data->hwirq, data); - - /* If not already assigned, give the domain the chip's name */ - if (!domain->name && data->chip) - domain->name = data->chip->name; } irq_clear_status_flags(virq, IRQ_NOREQUEST); @@ -1210,6 +1393,7 @@ int irq_domain_disconnect_hierarchy(struct irq_domain *domain, irqd->chip = ERR_PTR(-ENOTCONN); return 0; } +EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy); static int irq_domain_trim_hierarchy(unsigned int virq) { @@ -1219,7 +1403,7 @@ static int irq_domain_trim_hierarchy(unsigned int virq) tail = NULL; /* The first entry must have a valid irqchip */ - if (!irq_data->chip || IS_ERR(irq_data->chip)) + if (IS_ERR_OR_NULL(irq_data->chip)) return -EINVAL; /* @@ -1312,7 +1496,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @chip_data: The associated chip data */ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data) { struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); @@ -1321,7 +1506,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, return -ENOENT; irq_data->hwirq = hwirq; - irq_data->chip = chip ? chip : &no_irq_chip; + irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; return 0; @@ -1340,7 +1525,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1416,40 +1601,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } -/** - * __irq_domain_alloc_irqs - Allocate IRQs from domain - * @domain: domain to allocate from - * @irq_base: allocate specified IRQ number if irq_base >= 0 - * @nr_irqs: number of IRQs to allocate - * @node: NUMA node id for memory allocation - * @arg: domain specific argument - * @realloc: IRQ descriptors have already been allocated if true - * @affinity: Optional irq affinity mask for multiqueue devices - * - * Allocate IRQ numbers and initialized all data structures to support - * hierarchy IRQ domains. - * Parameter @realloc is mainly to support legacy IRQs. - * Returns error code or allocated IRQ number - * - * The whole process to setup an IRQ has been split into two steps. - * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ - * descriptor and required hardware resources. The second step, - * irq_domain_activate_irq(), is to program hardwares with preallocated - * resources. In this way, it's easier to rollback when failing to - * allocate resources. - */ -int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, const struct irq_affinity_desc *affinity) +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) { int i, ret, virq; - if (domain == NULL) { - domain = irq_default_domain; - if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) - return -EINVAL; - } - if (realloc && irq_base >= 0) { virq = irq_base; } else { @@ -1468,24 +1625,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, goto out_free_desc; } - mutex_lock(&irq_domain_mutex); ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); - if (ret < 0) { - mutex_unlock(&irq_domain_mutex); + if (ret < 0) goto out_free_irq_data; - } for (i = 0; i < nr_irqs; i++) { ret = irq_domain_trim_hierarchy(virq + i); - if (ret) { - mutex_unlock(&irq_domain_mutex); + if (ret) goto out_free_irq_data; - } } - + for (i = 0; i < nr_irqs; i++) irq_domain_insert_irq(virq + i); - mutex_unlock(&irq_domain_mutex); return virq; @@ -1496,20 +1647,68 @@ out_free_desc: return ret; } +/** + * __irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @irq_base: allocate specified IRQ number if irq_base >= 0 + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * @realloc: IRQ descriptors have already been allocated if true + * @affinity: Optional irq affinity mask for multiqueue devices + * + * Allocate IRQ numbers and initialized all data structures to support + * hierarchy IRQ domains. + * Parameter @realloc is mainly to support legacy IRQs. + * Returns error code or allocated IRQ number + * + * The whole process to setup an IRQ has been split into two steps. + * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ + * descriptor and required hardware resources. The second step, + * irq_domain_activate_irq(), is to program the hardware with preallocated + * resources. In this way, it's easier to rollback when failing to + * allocate resources. + */ +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) +{ + int ret; + + if (domain == NULL) { + domain = irq_default_domain; + if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) + return -EINVAL; + } + + mutex_lock(&domain->root->mutex); + ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, + realloc, affinity); + mutex_unlock(&domain->root->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); + /* The irq_data was moved, fix the revmap to refer to the new location */ static void irq_domain_fix_revmap(struct irq_data *d) { void __rcu **slot; - if (d->hwirq < d->domain->revmap_size) - return; /* Not using radix tree. */ + lockdep_assert_held(&d->domain->root->mutex); + + if (irq_domain_is_nomap(d->domain)) + return; /* Fix up the revmap. */ - mutex_lock(&d->domain->revmap_tree_mutex); - slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq); - if (slot) - radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); - mutex_unlock(&d->domain->revmap_tree_mutex); + if (d->hwirq < d->domain->revmap_size) { + /* Not using radix tree */ + rcu_assign_pointer(d->domain->revmap[d->hwirq], d); + } else { + slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq); + if (slot) + radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); + } } /** @@ -1525,8 +1724,8 @@ static void irq_domain_fix_revmap(struct irq_data *d) */ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) { - struct irq_data *child_irq_data; - struct irq_data *root_irq_data = irq_get_irq_data(virq); + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; struct irq_desc *desc; int rv = 0; @@ -1551,47 +1750,46 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (WARN_ON(!irq_domain_is_hierarchy(domain))) return -EINVAL; - if (!root_irq_data) + if (!irq_data) return -EINVAL; - if (domain->parent != root_irq_data->domain) + if (domain->parent != irq_data->domain) return -EINVAL; - child_irq_data = kzalloc_node(sizeof(*child_irq_data), GFP_KERNEL, - irq_data_get_node(root_irq_data)); - if (!child_irq_data) + parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL, + irq_data_get_node(irq_data)); + if (!parent_irq_data) return -ENOMEM; - mutex_lock(&irq_domain_mutex); + mutex_lock(&domain->root->mutex); /* Copy the original irq_data. */ - *child_irq_data = *root_irq_data; + *parent_irq_data = *irq_data; /* - * Overwrite the root_irq_data, which is embedded in struct - * irq_desc, with values for this domain. + * Overwrite the irq_data, which is embedded in struct irq_desc, with + * values for this domain. */ - root_irq_data->parent_data = child_irq_data; - root_irq_data->domain = domain; - root_irq_data->mask = 0; - root_irq_data->hwirq = 0; - root_irq_data->chip = NULL; - root_irq_data->chip_data = NULL; + irq_data->parent_data = parent_irq_data; + irq_data->domain = domain; + irq_data->mask = 0; + irq_data->hwirq = 0; + irq_data->chip = NULL; + irq_data->chip_data = NULL; /* May (probably does) set hwirq, chip, etc. */ rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); if (rv) { /* Restore the original irq_data. */ - *root_irq_data = *child_irq_data; - kfree(child_irq_data); + *irq_data = *parent_irq_data; + kfree(parent_irq_data); goto error; } - irq_domain_fix_revmap(child_irq_data); - irq_domain_set_mapping(domain, root_irq_data->hwirq, root_irq_data); - + irq_domain_fix_revmap(parent_irq_data); + irq_domain_set_mapping(domain, irq_data->hwirq, irq_data); error: - mutex_unlock(&irq_domain_mutex); + mutex_unlock(&domain->root->mutex); return rv; } @@ -1607,8 +1805,8 @@ EXPORT_SYMBOL_GPL(irq_domain_push_irq); */ int irq_domain_pop_irq(struct irq_domain *domain, int virq) { - struct irq_data *root_irq_data = irq_get_irq_data(virq); - struct irq_data *child_irq_data; + struct irq_data *irq_data = irq_get_irq_data(virq); + struct irq_data *parent_irq_data; struct irq_data *tmp_irq_data; struct irq_desc *desc; @@ -1630,37 +1828,37 @@ int irq_domain_pop_irq(struct irq_domain *domain, int virq) if (domain == NULL) return -EINVAL; - if (!root_irq_data) + if (!irq_data) return -EINVAL; tmp_irq_data = irq_domain_get_irq_data(domain, virq); /* We can only "pop" if this domain is at the top of the list */ - if (WARN_ON(root_irq_data != tmp_irq_data)) + if (WARN_ON(irq_data != tmp_irq_data)) return -EINVAL; - if (WARN_ON(root_irq_data->domain != domain)) + if (WARN_ON(irq_data->domain != domain)) return -EINVAL; - child_irq_data = root_irq_data->parent_data; - if (WARN_ON(!child_irq_data)) + parent_irq_data = irq_data->parent_data; + if (WARN_ON(!parent_irq_data)) return -EINVAL; - mutex_lock(&irq_domain_mutex); + mutex_lock(&domain->root->mutex); - root_irq_data->parent_data = NULL; + irq_data->parent_data = NULL; - irq_domain_clear_mapping(domain, root_irq_data->hwirq); + irq_domain_clear_mapping(domain, irq_data->hwirq); irq_domain_free_irqs_hierarchy(domain, virq, 1); /* Restore the original irq_data. */ - *root_irq_data = *child_irq_data; + *irq_data = *parent_irq_data; - irq_domain_fix_revmap(root_irq_data); + irq_domain_fix_revmap(irq_data); - mutex_unlock(&irq_domain_mutex); + mutex_unlock(&domain->root->mutex); - kfree(child_irq_data); + kfree(parent_irq_data); return 0; } @@ -1674,30 +1872,39 @@ EXPORT_SYMBOL_GPL(irq_domain_pop_irq); void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { struct irq_data *data = irq_get_irq_data(virq); + struct irq_domain *domain; int i; if (WARN(!data || !data->domain || !data->domain->ops->free, "NULL pointer, cannot free irq\n")) return; - mutex_lock(&irq_domain_mutex); + domain = data->domain; + + mutex_lock(&domain->root->mutex); for (i = 0; i < nr_irqs; i++) irq_domain_remove_irq(virq + i); - irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs); - mutex_unlock(&irq_domain_mutex); + irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs); + mutex_unlock(&domain->root->mutex); irq_domain_free_irq_data(virq, nr_irqs); irq_free_descs(virq, nr_irqs); } +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) +{ + if (irq_domain_is_msi_device(domain)) + msi_device_domain_free_wired(domain, virq); + else + irq_domain_free_irqs(virq, 1); +} + /** * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain + * @domain: Domain below which interrupts must be allocated * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to allocate * @arg: Allocation data (arch/domain specific) - * - * Check whether the domain has been setup recursive. If not allocate - * through the parent domain. */ int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, @@ -1713,11 +1920,9 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); /** * irq_domain_free_irqs_parent - Free interrupts from parent domain + * @domain: Domain below which interrupts must be freed * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to free - * - * Check whether the domain has been setup recursive. If not free - * through the parent domain. */ void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) @@ -1803,20 +2008,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain) if (domain->ops->alloc) domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; } - -/** - * irq_domain_hierarchical_is_msi_remap - Check if the domain or any - * parent has MSI remapping support - * @domain: domain pointer - */ -bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) -{ - for (; domain; domain = domain->parent) { - if (irq_domain_is_msi_remap(domain)) - return true; - } - return false; -} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ /** * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain @@ -1844,7 +2035,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, struct irq_chip *chip, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { @@ -1853,22 +2044,43 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_set_handler_data(virq, handler_data); } -static void irq_domain_check_hierarchy(struct irq_domain *domain) +static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, const struct irq_affinity_desc *affinity) { + return -EINVAL; } + +static void irq_domain_check_hierarchy(struct irq_domain *domain) { } +static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { } + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_IRQ_DEBUGFS +#include "internals.h" + static struct dentry *domain_dir; -static void -irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) +static const struct irq_bit_descr irqdomain_flags[] = { + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_HIERARCHY), + BIT_MASK_DESCR(IRQ_DOMAIN_NAME_ALLOCATED), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_PER_CPU), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_SINGLE), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_ISOLATED_MSI), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NO_MAP), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_PARENT), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_DEVICE), + BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NONCORE), +}; + +static void irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) { seq_printf(m, "%*sname: %s\n", ind, "", d->name); - seq_printf(m, "%*ssize: %u\n", ind + 1, "", - d->revmap_size + d->revmap_direct_max_irq); + seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size); seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); + irq_debug_show_bits(m, ind, d->flags, irqdomain_flags, ARRAY_SIZE(irqdomain_flags)); if (d->ops && d->ops->debug_show) d->ops->debug_show(m, d, NULL, ind + 1); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -1896,16 +2108,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug); static void debugfs_add_domain_dir(struct irq_domain *d) { - if (!d->name || !domain_dir || d->debugfs_file) + if (!d->name || !domain_dir) return; - d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d, - &irq_domain_debug_fops); + debugfs_create_file(d->name, 0444, domain_dir, d, + &irq_domain_debug_fops); } static void debugfs_remove_domain_dir(struct irq_domain *d) { - debugfs_remove(d->debugfs_file); - d->debugfs_file = NULL; + debugfs_lookup_and_remove(d->name, domain_dir); } void __init irq_domain_debugfs_init(struct dentry *root) diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c new file mode 100644 index 000000000000..1a3deffe6b5b --- /dev/null +++ b/kernel/irq/kexec.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> +#include <linux/irqnr.h> + +#include "internals.h" + +void machine_kexec_mask_interrupts(void) +{ + struct irq_desc *desc; + unsigned int i; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int check_eoi = 1; + + chip = irq_desc_get_chip(desc); + if (!chip || !irqd_is_started(&desc->irq_data)) + continue; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) { + /* + * First try to remove the active state from an interrupt which is forwarded + * to a VM. If the interrupt is not forwarded, try to EOI the interrupt. + */ + check_eoi = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + } + + if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + irq_shutdown(desc); + } +} diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dec3f73e8db9..f300bb6be3bd 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -25,12 +25,11 @@ #include "internals.h" #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT) -__read_mostly bool force_irqthreads; -EXPORT_SYMBOL_GPL(force_irqthreads); +DEFINE_STATIC_KEY_FALSE(force_irqthreads_key); static int __init setup_forced_irqthreads(char *arg) { - force_irqthreads = true; + static_branch_enable(&force_irqthreads_key); return 0; } early_param("threadirqs", setup_forced_irqthreads); @@ -109,6 +108,16 @@ bool synchronize_hardirq(unsigned int irq) } EXPORT_SYMBOL(synchronize_hardirq); +static void __synchronize_irq(struct irq_desc *desc) +{ + __synchronize_hardirq(desc, true); + /* + * We made sure that no hardirq handler is running. Now verify that no + * threaded handlers are active. + */ + wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); +} + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * @irq: interrupt number to wait for @@ -128,16 +137,8 @@ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) { - __synchronize_hardirq(desc, true); - /* - * We made sure that no hardirq handler is - * running. Now verify that no threaded handlers are - * active. - */ - wait_event(desc->wait_for_threads, - !atomic_read(&desc->threads_active)); - } + if (desc) + __synchronize_irq(desc); } EXPORT_SYMBOL(synchronize_irq); @@ -179,7 +180,7 @@ bool irq_can_set_affinity_usr(unsigned int irq) /** * irq_set_thread_affinity - Notify irq threads to adjust affinity - * @desc: irq descriptor which has affitnity changed + * @desc: irq descriptor which has affinity changed * * We just set IRQTF_AFFINITY and delegate the affinity setting * to the interrupt thread itself. We can not call @@ -190,9 +191,16 @@ void irq_set_thread_affinity(struct irq_desc *desc) { struct irqaction *action; - for_each_action_of_desc(desc, action) - if (action->thread) + for_each_action_of_desc(desc, action) { + if (action->thread) { set_bit(IRQTF_AFFINITY, &action->thread_flags); + wake_up_process(action->thread); + } + if (action->secondary && action->secondary->thread) { + set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags); + wake_up_process(action->secondary->thread); + } + } } #ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -206,23 +214,19 @@ static void irq_validate_effective_affinity(struct irq_data *data) pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", chip->name, data->irq); } - -static inline void irq_init_effective_affinity(struct irq_data *data, - const struct cpumask *mask) -{ - cpumask_copy(irq_data_get_effective_affinity_mask(data), mask); -} #else static inline void irq_validate_effective_affinity(struct irq_data *data) { } -static inline void irq_init_effective_affinity(struct irq_data *data, - const struct cpumask *mask) { } #endif +static DEFINE_PER_CPU(struct cpumask, __tmp_mask); + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { + struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); + const struct cpumask *prog_mask; int ret; if (!chip || !chip->irq_set_affinity) @@ -248,25 +252,33 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, * online. */ if (irqd_affinity_is_managed(data) && - housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) { - const struct cpumask *hk_mask, *prog_mask; - - static DEFINE_RAW_SPINLOCK(tmp_mask_lock); - static struct cpumask tmp_mask; + housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) { + const struct cpumask *hk_mask; - hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ); + hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); - raw_spin_lock(&tmp_mask_lock); - cpumask_and(&tmp_mask, mask, hk_mask); - if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) + cpumask_and(tmp_mask, mask, hk_mask); + if (!cpumask_intersects(tmp_mask, cpu_online_mask)) prog_mask = mask; else - prog_mask = &tmp_mask; - ret = chip->irq_set_affinity(data, prog_mask, force); - raw_spin_unlock(&tmp_mask_lock); + prog_mask = tmp_mask; } else { - ret = chip->irq_set_affinity(data, mask, force); + prog_mask = mask; } + + /* + * Make sure we only provide online CPUs to the irqchip, + * unless we are being asked to force the affinity (in which + * case we do as we are told). + */ + cpumask_and(tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(tmp_mask)) + ret = chip->irq_set_affinity(data, tmp_mask, force); + else if (force) + ret = chip->irq_set_affinity(data, mask, force); + else + ret = -EINVAL; + switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: @@ -315,7 +327,7 @@ static int irq_try_set_affinity(struct irq_data *data, } static bool irq_set_affinity_deactivated(struct irq_data *data, - const struct cpumask *mask, bool force) + const struct cpumask *mask) { struct irq_desc *desc = irq_data_to_desc(data); @@ -326,14 +338,14 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, * If the interrupt is not yet activated, just store the affinity * mask and do not call the chip driver at all. On activation the * driver has to make sure anyway that the interrupt is in a - * useable state so startup works. + * usable state so startup works. */ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data) || !irqd_affinity_on_activate(data)) return false; cpumask_copy(desc->irq_common_data.affinity, mask); - irq_init_effective_affinity(data, mask); + irq_data_update_effective_affinity(data, mask); irqd_set(data, IRQD_AFFINITY_SET); return true; } @@ -348,7 +360,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (!chip || !chip->irq_set_affinity) return -EINVAL; - if (irq_set_affinity_deactivated(data, mask, force)) + if (irq_set_affinity_deactivated(data, mask)) return 0; if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { @@ -441,7 +453,8 @@ out_unlock: return ret; } -int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) +static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, + bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -456,7 +469,38 @@ int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) return ret; } -int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, false); +} +EXPORT_SYMBOL_GPL(irq_set_affinity); + +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @cpumask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} +EXPORT_SYMBOL_GPL(irq_force_affinity); + +int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m, + bool setaffinity) { unsigned long flags; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); @@ -465,12 +509,11 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) return -EINVAL; desc->affinity_hint = m; irq_put_desc_unlock(desc, flags); - /* set the initial affinity to prevent every interrupt being on CPU0 */ - if (m) + if (m && setaffinity) __irq_set_affinity(irq, m, false); return 0; } -EXPORT_SYMBOL_GPL(irq_set_affinity_hint); +EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint); static void irq_affinity_notify(struct work_struct *work) { @@ -518,7 +561,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) /* The release function is promised process context */ might_sleep(); - if (!desc || desc->istate & IRQS_NMI) + if (!desc || irq_is_nmi(desc)) return -EINVAL; /* Complete initialisation of *notify */ @@ -686,10 +729,13 @@ EXPORT_SYMBOL(disable_irq_nosync); * to complete before returning. If you use this function while * holding a resource the IRQ handler may need you will deadlock. * - * This function may be called - with care - from IRQ context. + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * */ void disable_irq(unsigned int irq) { + might_sleep(); if (!__disable_irq_nosync(irq)) synchronize_irq(irq); } @@ -751,10 +797,14 @@ void __enable_irq(struct irq_desc *desc) irq_settings_set_noprobe(desc); /* * Call irq_startup() not irq_enable() here because the - * interrupt might be marked NOAUTOEN. So irq_startup() - * needs to be invoked when it gets enabled the first - * time. If it was already started up, then irq_startup() - * will invoke irq_enable() under the hood. + * interrupt might be marked NOAUTOEN so irq_startup() + * needs to be invoked when it gets enabled the first time. + * This is also required when __enable_irq() is invoked for + * a managed and shutdown interrupt from the S3 resume + * path. + * + * If it was already started up, then irq_startup() will + * invoke irq_enable() under the hood. */ irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE); break; @@ -849,7 +899,7 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) return -EINVAL; /* Don't use NMIs as wake up interrupts please */ - if (desc->istate & IRQS_NMI) { + if (irq_is_nmi(desc)) { ret = -EINVAL; goto out_unlock; } @@ -1004,10 +1054,57 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id) return IRQ_NONE; } -static int irq_wait_for_interrupt(struct irqaction *action) +#ifdef CONFIG_SMP +/* + * Check whether we need to change the affinity of the interrupt thread. + */ +static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +{ + cpumask_var_t mask; + bool valid = false; + + if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) + return; + + __set_current_state(TASK_RUNNING); + + /* + * In case we are out of memory we set IRQTF_AFFINITY again and + * try again next time + */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + set_bit(IRQTF_AFFINITY, &action->thread_flags); + return; + } + + raw_spin_lock_irq(&desc->lock); + /* + * This code is triggered unconditionally. Check the affinity + * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. + */ + if (cpumask_available(desc->irq_common_data.affinity)) { + const struct cpumask *m; + + m = irq_data_get_effective_affinity_mask(&desc->irq_data); + cpumask_copy(mask, m); + valid = true; + } + raw_spin_unlock_irq(&desc->lock); + + if (valid) + set_cpus_allowed_ptr(current, mask); + free_cpumask_var(mask); +} +#else +static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } +#endif + +static int irq_wait_for_interrupt(struct irq_desc *desc, + struct irqaction *action) { for (;;) { set_current_state(TASK_INTERRUPTIBLE); + irq_thread_check_affinity(desc, action); if (kthread_should_stop()) { /* may need to run one last time */ @@ -1054,7 +1151,7 @@ again: * to IRQS_INPROGRESS and the irq line is masked forever. * * This also serializes the state of shared oneshot handlers - * versus "desc->threads_onehsot |= action->thread_mask;" in + * versus "desc->threads_oneshot |= action->thread_mask;" in * irq_wake_thread(). See the comment there which explains the * serialization. */ @@ -1084,51 +1181,21 @@ out_unlock: chip_bus_sync_unlock(desc); } -#ifdef CONFIG_SMP /* - * Check whether we need to change the affinity of the interrupt thread. + * Interrupts explicitly requested as threaded interrupts want to be + * preemptible - many of them need to sleep and wait for slow busses to + * complete. */ -static void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action) { - cpumask_var_t mask; - bool valid = true; - - if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags)) - return; - - /* - * In case we are out of memory we set IRQTF_AFFINITY again and - * try again next time - */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { - set_bit(IRQTF_AFFINITY, &action->thread_flags); - return; - } - - raw_spin_lock_irq(&desc->lock); - /* - * This code is triggered unconditionally. Check the affinity - * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. - */ - if (cpumask_available(desc->irq_common_data.affinity)) { - const struct cpumask *m; + irqreturn_t ret = action->thread_fn(action->irq, action->dev_id); - m = irq_data_get_effective_affinity_mask(&desc->irq_data); - cpumask_copy(mask, m); - } else { - valid = false; - } - raw_spin_unlock_irq(&desc->lock); + if (ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); - if (valid) - set_cpus_allowed_ptr(current, mask); - free_cpumask_var(mask); + irq_finalize_oneshot(desc, action); + return ret; } -#else -static inline void -irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } -#endif /* * Interrupts which are not explicitly requested as threaded @@ -1136,40 +1203,21 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { } * context. So we need to disable bh here to avoid deadlocks and other * side effects. */ -static irqreturn_t -irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) +static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) { irqreturn_t ret; local_bh_disable(); - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); + ret = irq_thread_fn(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } -/* - * Interrupts explicitly requested as threaded interrupts want to be - * preemtible - many of them need to sleep and wait for slow busses to - * complete. - */ -static irqreturn_t irq_thread_fn(struct irq_desc *desc, - struct irqaction *action) -{ - irqreturn_t ret; - - ret = action->thread_fn(action->irq, action->dev_id); - if (ret == IRQ_HANDLED) - atomic_inc(&desc->threads_handled); - - irq_finalize_oneshot(desc, action); - return ret; -} - -static void wake_threads_waitq(struct irq_desc *desc) +void wake_threads_waitq(struct irq_desc *desc) { if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); @@ -1215,6 +1263,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action) } /* + * Internal function to notify that a interrupt thread is ready. + */ +static void irq_thread_set_ready(struct irq_desc *desc, + struct irqaction *action) +{ + set_bit(IRQTF_READY, &action->thread_flags); + wake_up(&desc->wait_for_threads); +} + +/* + * Internal function to wake up a interrupt thread and wait until it is + * ready. + */ +static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc, + struct irqaction *action) +{ + if (!action || !action->thread) + return; + + wake_up_process(action->thread); + wait_event(desc->wait_for_threads, + test_bit(IRQTF_READY, &action->thread_flags)); +} + +/* * Interrupt handler thread */ static int irq_thread(void *data) @@ -1225,8 +1298,12 @@ static int irq_thread(void *data) irqreturn_t (*handler_fn)(struct irq_desc *desc, struct irqaction *action); - if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, - &action->thread_flags)) + irq_thread_set_ready(desc, action); + + sched_set_fifo(current); + + if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) handler_fn = irq_forced_thread_fn; else handler_fn = irq_thread_fn; @@ -1234,13 +1311,9 @@ static int irq_thread(void *data) init_task_work(&on_exit_work, irq_thread_dtor); task_work_add(current, &on_exit_work, TWA_NONE); - irq_thread_check_affinity(desc, action); - - while (!irq_wait_for_interrupt(action)) { + while (!irq_wait_for_interrupt(desc, action)) { irqreturn_t action_ret; - irq_thread_check_affinity(desc, action); - action_ret = handler_fn(desc, action); if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); @@ -1254,7 +1327,7 @@ static int irq_thread(void *data) * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the * oneshot mask bit can be set. */ - task_work_cancel(current, irq_thread_dtor); + task_work_cancel_func(current, irq_thread_dtor); return 0; } @@ -1287,7 +1360,7 @@ EXPORT_SYMBOL_GPL(irq_wake_thread); static int irq_setup_forced_threading(struct irqaction *new) { - if (!force_irqthreads) + if (!force_irqthreads()) return 0; if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)) return 0; @@ -1390,8 +1463,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) if (IS_ERR(t)) return PTR_ERR(t); - sched_set_fifo(t); - /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1547,7 +1618,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ unsigned int oldtype; - if (desc->istate & IRQS_NMI) { + if (irq_is_nmi(desc)) { pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", new->name, irq, desc->irq_data.chip->name); ret = -EINVAL; @@ -1566,8 +1637,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ @@ -1649,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { - init_waitqueue_head(&desc->wait_for_threads); - /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, @@ -1682,8 +1756,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_PERCPU) { irqd_set(&desc->irq_data, IRQD_PER_CPU); irq_settings_set_per_cpu(desc); + if (new->flags & IRQF_NO_DEBUG) + irq_settings_set_no_debug(desc); } + if (noirqdebug) + irq_settings_set_no_debug(desc); + if (new->flags & IRQF_ONESHOT) desc->istate |= IRQS_ONESHOT; @@ -1693,7 +1772,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - if (irq_settings_can_autoenable(desc)) { + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); } else { /* @@ -1740,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irq_setup_timings(desc, new); - /* - * Strictly no need to wake it up, but hung_task complains - * when no hard interrupt wakes the thread up. - */ - if (new->thread) - wake_up_process(new->thread); - if (new->secondary) - wake_up_process(new->secondary->thread); + wake_up_and_wait_for_irq_thread_ready(desc, new); + wake_up_and_wait_for_irq_thread_ready(desc, new->secondary); register_irq_proc(irq, desc); new->dir = NULL; @@ -1778,15 +1852,13 @@ out_thread: struct task_struct *t = new->thread; new->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } if (new->secondary && new->secondary->thread) { struct task_struct *t = new->secondary->thread; new->secondary->thread = NULL; - kthread_stop(t); - put_task_struct(t); + kthread_stop_put(t); } out_mput: module_put(desc->owner); @@ -1872,7 +1944,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * supports it also make sure that there is no (not yet serviced) * interrupt in flight at the hardware level. */ - __synchronize_hardirq(desc, true); + __synchronize_irq(desc); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1897,18 +1969,15 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * the same bit to a newly requested action. */ if (action->thread) { - kthread_stop(action->thread); - put_task_struct(action->thread); - if (action->secondary && action->secondary->thread) { - kthread_stop(action->secondary->thread); - put_task_struct(action->secondary->thread); - } + kthread_stop_put(action->thread); + if (action->secondary && action->secondary->thread) + kthread_stop_put(action->secondary->thread); } /* Last action releases resources */ if (!desc->action) { /* - * Reaquire bus lock as irq_release_resources() might + * Reacquire bus lock as irq_release_resources() might * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); @@ -2007,7 +2076,7 @@ const void *free_nmi(unsigned int irq, void *dev_id) unsigned long flags; const void *devname; - if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + if (!desc || WARN_ON(!irq_is_nmi(desc))) return NULL; if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) @@ -2031,9 +2100,9 @@ const void *free_nmi(unsigned int irq, void *dev_id) * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. - * Primary handler for threaded interrupts - * If NULL and thread_fn != NULL the default - * primary handler is installed + * Primary handler for threaded interrupts. + * If handler is NULL and thread_fn != NULL + * the default primary handler is installed. * @thread_fn: Function called from the irq handler thread * If NULL, no irq thread is created * @irqflags: Interrupt type flags @@ -2067,7 +2136,7 @@ const void *free_nmi(unsigned int irq, void *dev_id) * * IRQF_SHARED Interrupt is shared * IRQF_TRIGGER_* Specify active edge(s) or level - * + * IRQF_ONESHOT Run thread_fn with interrupt line masked */ int request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long irqflags, @@ -2086,10 +2155,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * which interrupt is which (messes up the interrupt freeing * logic etc). * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and * it cannot be set along with IRQF_NO_SUSPEND. */ if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; @@ -2245,7 +2319,8 @@ int request_nmi(unsigned int irq, irq_handler_t handler, desc = irq_to_desc(irq); - if (!desc || irq_settings_can_autoenable(desc) || + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || !irq_settings_can_request(desc) || WARN_ON(irq_settings_is_per_cpu_devid(desc)) || !irq_supports_nmi(desc)) @@ -2467,7 +2542,7 @@ void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) if (!desc || !irq_settings_is_per_cpu_devid(desc)) return; - if (WARN_ON(!(desc->istate & IRQS_NMI))) + if (WARN_ON(!irq_is_nmi(desc))) return; kfree(__free_percpu_irq(irq, dev_id)); @@ -2603,7 +2678,7 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler, return -EINVAL; /* The line cannot already be NMI */ - if (desc->istate & IRQS_NMI) + if (irq_is_nmi(desc)) return -EINVAL; action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); @@ -2664,7 +2739,7 @@ int prepare_percpu_nmi(unsigned int irq) if (!desc) return -EINVAL; - if (WARN(!(desc->istate & IRQS_NMI), + if (WARN(!irq_is_nmi(desc), KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", irq)) { ret = -EINVAL; @@ -2706,7 +2781,7 @@ void teardown_percpu_nmi(unsigned int irq) if (!desc) return; - if (WARN_ON(!(desc->istate & IRQS_NMI))) + if (WARN_ON(!irq_is_nmi(desc))) goto out; irq_nmi_teardown(desc); @@ -2742,7 +2817,7 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM * @which: One of IRQCHIP_STATE_* the caller wants to know about - * @state: a pointer to a boolean where the state is to be storeed + * @state: a pointer to a boolean where the state is to be stored * * This call snapshots the internal irqchip state of an * interrupt, returning into @state the bit corresponding to @@ -2781,7 +2856,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * - * This function should be called with preemption disabled if the + * This function should be called with migration disabled if the * interrupt controller has per-cpu registers. */ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 651a4ad6d711..8f222d1cccec 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -8,8 +8,6 @@ #include <linux/cpu.h> #include <linux/irq.h> -#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS)) - struct cpumap { unsigned int available; unsigned int allocated; @@ -17,8 +15,8 @@ struct cpumap { unsigned int managed_allocated; bool initialized; bool online; - unsigned long alloc_map[IRQ_MATRIX_SIZE]; - unsigned long managed_map[IRQ_MATRIX_SIZE]; + unsigned long *managed_map; + unsigned long alloc_map[]; }; struct irq_matrix { @@ -32,8 +30,8 @@ struct irq_matrix { unsigned int total_allocated; unsigned int online_maps; struct cpumap __percpu *maps; - unsigned long scratch_map[IRQ_MATRIX_SIZE]; - unsigned long system_map[IRQ_MATRIX_SIZE]; + unsigned long *system_map; + unsigned long scratch_map[]; }; #define CREATE_TRACE_POINTS @@ -50,24 +48,32 @@ __init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, unsigned int alloc_start, unsigned int alloc_end) { + unsigned int cpu, matrix_size = BITS_TO_LONGS(matrix_bits); struct irq_matrix *m; - if (matrix_bits > IRQ_MATRIX_BITS) - return NULL; - - m = kzalloc(sizeof(*m), GFP_KERNEL); + m = kzalloc(struct_size(m, scratch_map, matrix_size * 2), GFP_KERNEL); if (!m) return NULL; + m->system_map = &m->scratch_map[matrix_size]; + m->matrix_bits = matrix_bits; m->alloc_start = alloc_start; m->alloc_end = alloc_end; m->alloc_size = alloc_end - alloc_start; - m->maps = alloc_percpu(*m->maps); + m->maps = __alloc_percpu(struct_size(m->maps, alloc_map, matrix_size * 2), + __alignof__(*m->maps)); if (!m->maps) { kfree(m); return NULL; } + + for_each_possible_cpu(cpu) { + struct cpumap *cm = per_cpu_ptr(m->maps, cpu); + + cm->managed_map = &cm->alloc_map[matrix_size]; + } + return m; } @@ -280,12 +286,13 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk) /** * irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map * @m: Matrix pointer - * @cpu: On which CPU the interrupt should be allocated + * @msk: Which CPUs to search in + * @mapped_cpu: Pointer to store the CPU for which the irq was allocated */ int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk, unsigned int *mapped_cpu) { - unsigned int bit, cpu, end = m->alloc_end; + unsigned int bit, cpu, end; struct cpumap *cm; if (cpumask_empty(msk)) @@ -337,15 +344,14 @@ void irq_matrix_assign(struct irq_matrix *m, unsigned int bit) * irq_matrix_reserve - Reserve interrupts * @m: Matrix pointer * - * This is merily a book keeping call. It increments the number of globally + * This is merely a book keeping call. It increments the number of globally * reserved interrupt bits w/o actually allocating them. This allows to * setup interrupt descriptors w/o assigning low level resources to it. * The actual allocation happens when the interrupt gets activated. */ void irq_matrix_reserve(struct irq_matrix *m) { - if (m->global_reserved <= m->global_available && - m->global_reserved + 1 > m->global_available) + if (m->global_reserved == m->global_available) pr_warn("Interrupt reservation exceeds available resources\n"); m->global_reserved++; @@ -356,7 +362,7 @@ void irq_matrix_reserve(struct irq_matrix *m) * irq_matrix_remove_reserved - Remove interrupt reservation * @m: Matrix pointer * - * This is merily a book keeping call. It decrements the number of globally + * This is merely a book keeping call. It decrements the number of globally * reserved interrupt bits. This is used to undo irq_matrix_reserve() when the * interrupt was never in use and a real vector allocated, which undid the * reservation. @@ -423,7 +429,9 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end)) return; - clear_bit(bit, cm->alloc_map); + if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map))) + return; + cm->allocated--; if(managed) cm->managed_allocated--; @@ -464,16 +472,16 @@ unsigned int irq_matrix_reserved(struct irq_matrix *m) } /** - * irq_matrix_allocated - Get the number of allocated irqs on the local cpu + * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU * @m: Pointer to the matrix to search * - * This returns number of allocated irqs + * This returns number of allocated non-managed interrupts. */ unsigned int irq_matrix_allocated(struct irq_matrix *m) { struct cpumap *cm = this_cpu_ptr(m->maps); - return cm->allocated; + return cm->allocated - cm->managed_allocated; } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index def48589ea48..eb150afd671f 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -7,7 +7,7 @@ /** * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU - * @desc: Interrupt descpriptor to clean up + * @desc: Interrupt descriptor to clean up * @force_clear: If set clear the move pending bit unconditionally. * If not set, clear it only when the dying CPU is the * last one in the pending mask. @@ -26,7 +26,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear) * The outgoing CPU might be the last online target in a pending * interrupt move. If that's the case clear the pending move bit. */ - if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) { + if (!cpumask_intersects(desc->pending_mask, cpu_online_mask)) { irqd_clr_move_pending(data); return false; } @@ -74,7 +74,7 @@ void irq_move_masked_irq(struct irq_data *idata) * For correct operation this depends on the caller * masking the irqs. */ - if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) { + if (cpumask_intersects(desc->pending_mask, cpu_online_mask)) { int ret; ret = irq_do_set_affinity(data, desc->pending_mask, false); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index b338d622f26e..396a067a8a56 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -5,55 +5,268 @@ * * This file is licensed under GPLv2. * - * This file contains common code to support Message Signalled Interrupt for + * This file contains common code to support Message Signaled Interrupts for * PCI compatible and non PCI compatible devices. */ -#include <linux/types.h> #include <linux/device.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/msi.h> +#include <linux/mutex.h> +#include <linux/pci.h> #include <linux/slab.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/xarray.h> #include "internals.h" /** - * alloc_msi_entry - Allocate an initialize msi_entry + * struct msi_device_data - MSI per device data + * @properties: MSI properties which are interesting to drivers + * @mutex: Mutex protecting the MSI descriptor store + * @__domains: Internal data for per device MSI domains + * @__iter_idx: Index to search the next entry for iterators + */ +struct msi_device_data { + unsigned long properties; + struct mutex mutex; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; + unsigned long __iter_idx; +}; + +/** + * struct msi_ctrl - MSI internal management control structure + * @domid: ID of the domain on which management operations should be done + * @first: First (hardware) slot index to operate on + * @last: Last (hardware) slot index to operate on + * @nirqs: The number of Linux interrupts to allocate. Can be larger + * than the range due to PCI/multi-MSI. + */ +struct msi_ctrl { + unsigned int domid; + unsigned int first; + unsigned int last; + unsigned int nirqs; +}; + +/* Invalid Xarray index which is outside of any searchable range */ +#define MSI_XA_MAX_INDEX (ULONG_MAX - 1) +/* The maximum domain size */ +#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) + +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid); +static inline int msi_sysfs_create_group(struct device *dev); + + +/** + * msi_alloc_desc - Allocate an initialized msi_desc * @dev: Pointer to the device for which this is allocated * @nvec: The number of vectors used in this entry * @affinity: Optional pointer to an affinity mask array size of @nvec * - * If @affinity is not NULL then an affinity array[@nvec] is allocated + * If @affinity is not %NULL then an affinity array[@nvec] is allocated * and the affinity masks and flags from @affinity are copied. + * + * Return: pointer to allocated &msi_desc on success or %NULL on failure */ -struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity) +static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, + const struct irq_affinity_desc *affinity) { - struct msi_desc *desc; + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); - desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) return NULL; - INIT_LIST_HEAD(&desc->list); desc->dev = dev; desc->nvec_used = nvec; if (affinity) { - desc->affinity = kmemdup(affinity, - nvec * sizeof(*desc->affinity), GFP_KERNEL); + desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL); if (!desc->affinity) { kfree(desc); return NULL; } } - return desc; } -void free_msi_entry(struct msi_desc *entry) +static void msi_free_desc(struct msi_desc *desc) { - kfree(entry->affinity); - kfree(entry); + kfree(desc->affinity); + kfree(desc); +} + +static int msi_insert_desc(struct device *dev, struct msi_desc *desc, + unsigned int domid, unsigned int index) +{ + struct msi_device_data *md = dev->msi.data; + struct xarray *xa = &md->__domains[domid].store; + unsigned int hwsize; + int ret; + + hwsize = msi_domain_get_hwsize(dev, domid); + + if (index == MSI_ANY_INDEX) { + struct xa_limit limit = { .min = 0, .max = hwsize - 1 }; + unsigned int index; + + /* Let the xarray allocate a free index within the limit */ + ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL); + if (ret) + goto fail; + + desc->msi_index = index; + return 0; + } else { + if (index >= hwsize) { + ret = -ERANGE; + goto fail; + } + + desc->msi_index = index; + ret = xa_insert(xa, index, desc, GFP_KERNEL); + if (ret) + goto fail; + return 0; + } +fail: + msi_free_desc(desc); + return ret; +} + +/** + * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and + * insert it at @init_desc->msi_index + * + * @dev: Pointer to the device for which the descriptor is allocated + * @domid: The id of the interrupt domain to which the desriptor is added + * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor + * + * Return: 0 on success or an appropriate failure code. + */ +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc) +{ + struct msi_desc *desc; + + lockdep_assert_held(&dev->msi.data->mutex); + + desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity); + if (!desc) + return -ENOMEM; + + /* Copy type specific data to the new descriptor. */ + desc->pci = init_desc->pci; + + return msi_insert_desc(dev, desc, domid, init_desc->msi_index); +} + +static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) +{ + switch (filter) { + case MSI_DESC_ALL: + return true; + case MSI_DESC_NOTASSOCIATED: + return !desc->irq; + case MSI_DESC_ASSOCIATED: + return !!desc->irq; + } + WARN_ON_ONCE(1); + return false; +} + +static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) +{ + unsigned int hwsize; + + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + (dev->msi.domain && + !dev->msi.data->__domains[ctrl->domid].domain))) + return false; + + hwsize = msi_domain_get_hwsize(dev, ctrl->domid); + if (WARN_ON_ONCE(ctrl->first > ctrl->last || + ctrl->first >= hwsize || + ctrl->last >= hwsize)) + return false; + return true; +} + +static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + struct xarray *xa; + unsigned long idx; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl->domid].store; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + xa_erase(xa, idx); + + /* Leak the descriptor when it is still referenced */ + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED))) + continue; + msi_free_desc(desc); + } +} + +/** + * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain + * @dev: Device for which to free the descriptors + * @domid: Id of the domain to operate on + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + + msi_domain_free_descs(dev, &ctrl); +} + +/** + * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors + * @dev: Pointer to the device for which the descriptors are allocated + * @ctrl: Allocation control struct + * + * Return: 0 on success or an appropriate failure code. + */ +static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + unsigned int idx; + int ret; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + for (idx = ctrl->first; idx <= ctrl->last; idx++) { + desc = msi_alloc_desc(dev, 1, NULL); + if (!desc) + goto fail_mem; + ret = msi_insert_desc(dev, desc, ctrl->domid, idx); + if (ret) + goto fail; + } + return 0; + +fail_mem: + ret = -ENOMEM; +fail: + msi_domain_free_descs(dev, ctrl); + return ret; } void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) @@ -69,7 +282,354 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) } EXPORT_SYMBOL_GPL(get_cached_msi_msg); -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +static void msi_device_data_release(struct device *dev, void *res) +{ + struct msi_device_data *md = res; + int i; + + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + msi_remove_device_irq_domain(dev, i); + WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); + xa_destroy(&md->__domains[i].store); + } + dev->msi.data = NULL; +} + +/** + * msi_setup_device_data - Setup MSI device data + * @dev: Device for which MSI device data should be set up + * + * Return: 0 on success, appropriate error code otherwise + * + * This can be called more than once for @dev. If the MSI device data is + * already allocated the call succeeds. The allocated memory is + * automatically released when the device is destroyed. + */ +int msi_setup_device_data(struct device *dev) +{ + struct msi_device_data *md; + int ret, i; + + if (dev->msi.data) + return 0; + + md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL); + if (!md) + return -ENOMEM; + + ret = msi_sysfs_create_group(dev); + if (ret) { + devres_free(md); + return ret; + } + + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) + xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC); + + /* + * If @dev::msi::domain is set and is a global MSI domain, copy the + * pointer into the domain array so all code can operate on domain + * ids. The NULL pointer check is required to keep the legacy + * architecture specific PCI/MSI support working. + */ + if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain)) + md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain; + + mutex_init(&md->mutex); + dev->msi.data = md; + devres_add(dev, md); + return 0; +} + +/** + * msi_lock_descs - Lock the MSI descriptor storage of a device + * @dev: Device to operate on + */ +void msi_lock_descs(struct device *dev) +{ + mutex_lock(&dev->msi.data->mutex); +} +EXPORT_SYMBOL_GPL(msi_lock_descs); + +/** + * msi_unlock_descs - Unlock the MSI descriptor storage of a device + * @dev: Device to operate on + */ +void msi_unlock_descs(struct device *dev) +{ + /* Invalidate the index which was cached by the iterator */ + dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; + mutex_unlock(&dev->msi.data->mutex); +} +EXPORT_SYMBOL_GPL(msi_unlock_descs); + +static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, + enum msi_desc_filter filter) +{ + struct xarray *xa = &md->__domains[domid].store; + struct msi_desc *desc; + + xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { + if (msi_desc_match(desc, filter)) + return desc; + } + md->__iter_idx = MSI_XA_MAX_INDEX; + return NULL; +} + +/** + * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device + * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. + * @filter: Descriptor state filter + * + * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() + * must be invoked before the call. + * + * Return: Pointer to the first MSI descriptor matching the search + * criteria, NULL if none found. + */ +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) +{ + struct msi_device_data *md = dev->msi.data; + + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + lockdep_assert_held(&md->mutex); + + md->__iter_idx = 0; + return msi_find_desc(md, domid, filter); +} +EXPORT_SYMBOL_GPL(msi_domain_first_desc); + +/** + * msi_next_desc - Get the next MSI descriptor of a device + * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. + * @filter: Descriptor state filter + * + * The first invocation of msi_next_desc() has to be preceeded by a + * successful invocation of __msi_first_desc(). Consecutive invocations are + * only valid if the previous one was successful. All these operations have + * to be done within the same MSI mutex held region. + * + * Return: Pointer to the next MSI descriptor matching the search + * criteria, NULL if none found. + */ +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) +{ + struct msi_device_data *md = dev->msi.data; + + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + lockdep_assert_held(&md->mutex); + + if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX) + return NULL; + + md->__iter_idx++; + return msi_find_desc(md, domid, filter); +} +EXPORT_SYMBOL_GPL(msi_next_desc); + +/** + * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain + * @dev: Device to operate on + * @domid: Domain ID of the interrupt domain associated to the device + * @index: MSI interrupt index to look for (0-based) + * + * Return: The Linux interrupt number on success (> 0), 0 if not found + */ +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index) +{ + struct msi_desc *desc; + unsigned int ret = 0; + bool pcimsi = false; + struct xarray *xa; + + if (!dev->msi.data) + return 0; + + if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return 0; + + /* This check is only valid for the PCI default MSI domain */ + if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN) + pcimsi = to_pci_dev(dev)->msi_enabled; + + msi_lock_descs(dev); + xa = &dev->msi.data->__domains[domid].store; + desc = xa_load(xa, pcimsi ? 0 : index); + if (desc && desc->irq) { + /* + * PCI-MSI has only one descriptor for multiple interrupts. + * PCI-MSIX and platform MSI use a descriptor per + * interrupt. + */ + if (pcimsi) { + if (index < desc->nvec_used) + ret = desc->irq + index; + } else { + ret = desc->irq; + } + } + + msi_unlock_descs(dev); + return ret; +} +EXPORT_SYMBOL_GPL(msi_domain_get_virq); + +#ifdef CONFIG_SYSFS +static struct attribute *msi_dev_attrs[] = { + NULL +}; + +static const struct attribute_group msi_irqs_group = { + .name = "msi_irqs", + .attrs = msi_dev_attrs, +}; + +static inline int msi_sysfs_create_group(struct device *dev) +{ + return devm_device_add_group(dev, &msi_irqs_group); +} + +static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + /* MSI vs. MSIX is per device not per interrupt */ + bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false; + + return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi"); +} + +static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) +{ + struct device_attribute *attrs = desc->sysfs_attrs; + int i; + + if (!attrs) + return; + + desc->sysfs_attrs = NULL; + for (i = 0; i < desc->nvec_used; i++) { + if (attrs[i].show) + sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); + kfree(attrs[i].attr.name); + } + kfree(attrs); +} + +static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) +{ + struct device_attribute *attrs; + int ret, i; + + attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + desc->sysfs_attrs = attrs; + for (i = 0; i < desc->nvec_used; i++) { + sysfs_attr_init(&attrs[i].attr); + attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i); + if (!attrs[i].attr.name) { + ret = -ENOMEM; + goto fail; + } + + attrs[i].attr.mode = 0444; + attrs[i].show = msi_mode_show; + + ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name); + if (ret) { + attrs[i].show = NULL; + goto fail; + } + } + return 0; + +fail: + msi_sysfs_remove_desc(dev, desc); + return ret; +} + +#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN) +/** + * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device + * @dev: The device (PCI, platform etc) which will get sysfs entries + */ +int msi_device_populate_sysfs(struct device *dev) +{ + struct msi_desc *desc; + int ret; + + msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { + if (desc->sysfs_attrs) + continue; + ret = msi_sysfs_populate_desc(dev, desc); + if (ret) + return ret; + } + return 0; +} + +/** + * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device + * @dev: The device (PCI, platform etc) for which to remove + * sysfs entries + */ +void msi_device_destroy_sysfs(struct device *dev) +{ + struct msi_desc *desc; + + msi_for_each_desc(desc, dev, MSI_DESC_ALL) + msi_sysfs_remove_desc(dev, desc); +} +#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */ +#else /* CONFIG_SYSFS */ +static inline int msi_sysfs_create_group(struct device *dev) { return 0; } +static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; } +static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } +#endif /* !CONFIG_SYSFS */ + +static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid) +{ + struct irq_domain *domain; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + domain = dev->msi.data->__domains[domid].domain; + if (!domain) + return NULL; + + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) + return NULL; + + return domain; +} + +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + domain = msi_get_device_domain(dev, domid); + if (domain) { + info = domain->host_data; + return info->hwsize; + } + /* No domain, default to MSI_XA_DOMAIN_SIZE */ + return MSI_XA_DOMAIN_SIZE; +} + static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) { @@ -97,6 +657,8 @@ static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg) * * Intended to be used by MSI interrupt controllers which are * implemented with hierarchical domains. + * + * Return: IRQ_SET_MASK_* result code */ int msi_domain_set_affinity(struct irq_data *irq_data, const struct cpumask *mask, bool force) @@ -156,7 +718,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg); if (ret < 0) { if (ops->msi_free) { - for (i--; i > 0; i--) + for (i--; i >= 0; i--) ops->msi_free(domain, info, virq + i); } irq_domain_free_irqs_top(domain, virq, nr_irqs); @@ -180,11 +742,26 @@ static void msi_domain_free(struct irq_domain *domain, unsigned int virq, irq_domain_free_irqs_top(domain, virq, nr_irqs); } +static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type) +{ + struct msi_domain_info *info = domain->host_data; + + /* + * This will catch allocations through the regular irqdomain path except + * for MSI domains which really support this, e.g. MBIGEN. + */ + if (!info->ops->msi_translate) + return -ENOTSUPP; + return info->ops->msi_translate(domain, fwspec, hwirq, type); +} + static const struct irq_domain_ops msi_domain_ops = { .alloc = msi_domain_alloc, .free = msi_domain_free, .activate = msi_domain_activate, .deactivate = msi_domain_deactivate, + .translate = msi_domain_translate, }; static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info, @@ -221,21 +798,11 @@ static int msi_domain_ops_init(struct irq_domain *domain, return 0; } -static int msi_domain_ops_check(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev) -{ - return 0; -} - static struct msi_domain_ops msi_domain_ops_default = { .get_hwirq = msi_domain_ops_get_hwirq, .msi_init = msi_domain_ops_init, - .msi_check = msi_domain_ops_check, .msi_prepare = msi_domain_ops_prepare, .set_desc = msi_domain_ops_set_desc, - .domain_alloc_irqs = __msi_domain_alloc_irqs, - .domain_free_irqs = __msi_domain_free_irqs, }; static void msi_domain_update_dom_ops(struct msi_domain_info *info) @@ -247,11 +814,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) return; } - if (ops->domain_alloc_irqs == NULL) - ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs; - if (ops->domain_free_irqs == NULL) - ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs; - if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS)) return; @@ -259,8 +821,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) ops->get_hwirq = msi_domain_ops_default.get_hwirq; if (ops->msi_init == NULL) ops->msi_init = msi_domain_ops_default.msi_init; - if (ops->msi_check == NULL) - ops->msi_check = msi_domain_ops_default.msi_check; if (ops->msi_prepare == NULL) ops->msi_prepare = msi_domain_ops_default.msi_prepare; if (ops->set_desc == NULL) @@ -272,90 +832,287 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info) struct irq_chip *chip = info->chip; BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); - if (!chip->irq_set_affinity) + if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY)) chip->irq_set_affinity = msi_domain_set_affinity; } +static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + unsigned int flags, + struct irq_domain *parent) +{ + struct irq_domain *domain; + + if (info->hwsize > MSI_XA_DOMAIN_SIZE) + return NULL; + + /* + * Hardware size 0 is valid for backwards compatibility and for + * domains which are not backed by a hardware table. Grant the + * maximum index space. + */ + if (!info->hwsize) + info->hwsize = MSI_XA_DOMAIN_SIZE; + + msi_domain_update_dom_ops(info); + if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) + msi_domain_update_chip_ops(info); + + domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0, + fwnode, &msi_domain_ops, info); + + if (domain) { + irq_domain_update_bus_token(domain, info->bus_token); + if (info->flags & MSI_FLAG_PARENT_PM_DEV) + domain->pm_dev = parent->pm_dev; + } + + return domain; +} + /** - * msi_create_irq_domain - Create a MSI interrupt domain + * msi_create_irq_domain - Create an MSI interrupt domain * @fwnode: Optional fwnode of the interrupt controller * @info: MSI domain info * @parent: Parent irq domain + * + * Return: pointer to the created &struct irq_domain or %NULL on failure */ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent) { - struct irq_domain *domain; - - msi_domain_update_dom_ops(info); - if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) - msi_domain_update_chip_ops(info); + return __msi_create_irq_domain(fwnode, info, 0, parent); +} - domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, - fwnode, &msi_domain_ops, info); +/** + * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down + * in the domain hierarchy + * @dev: The device for which the domain should be created + * @domain: The domain in the hierarchy this op is being called on + * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to + * be created + * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE + * domain to be created + * + * Return: true on success, false otherwise + * + * This is the most complex problem of per device MSI domains and the + * underlying interrupt domain hierarchy: + * + * The device domain to be initialized requests the broadest feature set + * possible and the underlying domain hierarchy puts restrictions on it. + * + * That's trivial for a simple parent->child relationship, but it gets + * interesting with an intermediate domain: root->parent->child. The + * intermediate 'parent' can expand the capabilities which the 'root' + * domain is providing. So that creates a classic hen and egg problem: + * Which entity is doing the restrictions/expansions? + * + * One solution is to let the root domain handle the initialization that's + * why there is the @domain and the @msi_parent_domain pointer. + */ +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info) +{ + struct irq_domain *parent = domain->parent; - if (domain && !domain->name && info->chip) - domain->name = info->chip->name; + if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops || + !parent->msi_parent_ops->init_dev_msi_info)) + return false; - return domain; + return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain, + msi_child_info); } -int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) +/** + * msi_create_device_irq_domain - Create a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + * @template: MSI domain info bundle used as template + * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited) + * @domain_data: Optional pointer to domain specific data which is set in + * msi_domain_info::data + * @chip_data: Optional pointer to chip specific data which is set in + * msi_domain_info::chip_data + * + * Return: True on success, false otherwise + * + * There is no firmware node required for this interface because the per + * device domains are software constructs which are actually closer to the + * hardware reality than any firmware can describe them. + * + * The domain name and the irq chip name for a MSI device domain are + * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)" + * + * $PREFIX: Optional prefix provided by the underlying MSI parent domain + * via msi_parent_ops::prefix. If that pointer is NULL the prefix + * is empty. + * $CHIPNAME: The name of the irq_chip in @template + * $DEVNAME: The name of the device + * + * This results in understandable chip names and hardware interrupt numbers + * in e.g. /proc/interrupts + * + * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix + * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-' + * + * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect + * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device + * IR-PCI-MSIX-0000:3d:00.0 2-edge + * + * On IMS domains the hardware interrupt number is either a table entry + * index or a purely software managed index but it is guaranteed to be + * unique. + * + * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All + * subsequent operations on the domain depend on the domain id. + * + * The domain is automatically freed when the device is removed via devres + * in the context of @dev::msi::data freeing, but it can also be + * independently removed via @msi_remove_device_irq_domain(). + */ +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - int ret; + struct irq_domain *domain, *parent = dev->msi.domain; + struct fwnode_handle *fwnode, *fwnalloced = NULL; + struct msi_domain_template *bundle; + const struct msi_parent_ops *pops; - ret = ops->msi_check(domain, info, dev); - if (ret == 0) - ret = ops->msi_prepare(domain, dev, nvec, arg); + if (!irq_domain_is_msi_parent(parent)) + return false; - return ret; + if (domid >= MSI_MAX_DEVICE_IRQDOMAINS) + return false; + + bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL); + if (!bundle) + return false; + + bundle->info.hwsize = hwsize; + bundle->info.chip = &bundle->chip; + bundle->info.ops = &bundle->ops; + bundle->info.data = domain_data; + bundle->info.chip_data = chip_data; + + pops = parent->msi_parent_ops; + snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s", + pops->prefix ? : "", bundle->chip.name, dev_name(dev)); + bundle->chip.name = bundle->name; + + /* + * Using the device firmware node is required for wire to MSI + * device domains so that the existing firmware results in a domain + * match. + * All other device domains like PCI/MSI use the named firmware + * node as they are not guaranteed to have a fwnode. They are never + * looked up and always handled in the context of the device. + */ + if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE) + fwnode = dev->fwnode; + else + fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name); + + if (!fwnode) + goto free_bundle; + + if (msi_setup_device_data(dev)) + goto free_fwnode; + + msi_lock_descs(dev); + + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid))) + goto fail; + + if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info)) + goto fail; + + domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent); + if (!domain) + goto fail; + + domain->dev = dev; + dev->msi.data->__domains[domid].domain = domain; + msi_unlock_descs(dev); + return true; + +fail: + msi_unlock_descs(dev); +free_fwnode: + irq_domain_free_fwnode(fwnalloced); +free_bundle: + kfree(bundle); + return false; } -int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, - int virq, int nvec, msi_alloc_info_t *arg) +/** + * msi_remove_device_irq_domain - Free a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + */ +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - struct msi_desc *desc; - int ret = 0; + struct fwnode_handle *fwnode = NULL; + struct msi_domain_info *info; + struct irq_domain *domain; - for_each_msi_entry(desc, dev) { - /* Don't even try the multi-MSI brain damage. */ - if (WARN_ON(!desc->irq || desc->nvec_used != 1)) { - ret = -EINVAL; - break; - } + msi_lock_descs(dev); - if (!(desc->irq >= virq && desc->irq < (virq + nvec))) - continue; + domain = msi_get_device_domain(dev, domid); - ops->set_desc(arg, desc); - /* Assumes the domain mutex is held! */ - ret = irq_domain_alloc_irqs_hierarchy(domain, desc->irq, 1, - arg); - if (ret) - break; + if (!domain || !irq_domain_is_msi_device(domain)) + goto unlock; - irq_set_msi_desc_off(desc->irq, 0, desc); - } + dev->msi.data->__domains[domid].domain = NULL; + info = domain->host_data; + if (irq_domain_is_msi_device(domain)) + fwnode = domain->fwnode; + irq_domain_remove(domain); + irq_domain_free_fwnode(fwnode); + kfree(container_of(info, struct msi_domain_template, info)); - if (ret) { - /* Mop up the damage */ - for_each_msi_entry(desc, dev) { - if (!(desc->irq >= virq && desc->irq < (virq + nvec))) - continue; +unlock: + msi_unlock_descs(dev); +} - irq_domain_free_irqs_common(domain, desc->irq, 1); - } - } +/** + * msi_match_device_irq_domain - Match a device irq domain against a bus token + * @dev: Pointer to the device + * @domid: Domain id + * @bus_token: Bus token to match against the domain bus token + * + * Return: True if device domain exists and bus tokens match. + */ +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + bool ret = false; + msi_lock_descs(dev); + domain = msi_get_device_domain(dev, domid); + if (domain && irq_domain_is_msi_device(domain)) { + info = domain->host_data; + ret = info->bus_token == bus_token; + } + msi_unlock_descs(dev); return ret; } +static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct msi_domain_info *info = domain->host_data; + struct msi_domain_ops *ops = info->ops; + + return ops->msi_prepare(domain, dev, nvec, arg); +} + /* * Carefully check whether the device can use reservation mode. If * reservation mode is enabled then the early activation will assign a @@ -375,6 +1132,8 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, switch(domain->bus_token) { case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: case DOMAIN_BUS_VMD_MSI: break; default: @@ -389,161 +1148,575 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, /* * Checking the first MSI descriptor is sufficient. MSIX supports - * masking and MSI does so when the maskbit is set. + * masking and MSI does so when the can_mask attribute is set. + */ + desc = msi_first_desc(dev, MSI_DESC_ALL); + return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask; +} + +static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, + int allocated) +{ + switch(domain->bus_token) { + case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + case DOMAIN_BUS_VMD_MSI: + if (IS_ENABLED(CONFIG_PCI_MSI)) + break; + fallthrough; + default: + return -ENOSPC; + } + + /* Let a failed PCI multi MSI allocation retry */ + if (desc->nvec_used > 1) + return 1; + + /* If there was a successful allocation let the caller know */ + return allocated ? allocated : -ENOSPC; +} + +#define VIRQ_CAN_RESERVE 0x01 +#define VIRQ_ACTIVATE 0x02 + +static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags) +{ + struct irq_data *irqd = irq_domain_get_irq_data(domain, virq); + int ret; + + if (!(vflags & VIRQ_CAN_RESERVE)) { + irqd_clr_can_reserve(irqd); + + /* + * If the interrupt is managed but no CPU is available to + * service it, shut it down until better times. Note that + * we only do this on the !RESERVE path as x86 (the only + * architecture using this flag) deals with this in a + * different way by using a catch-all vector. + */ + if ((vflags & VIRQ_ACTIVATE) && + irqd_affinity_is_managed(irqd) && + !cpumask_intersects(irq_data_get_affinity_mask(irqd), + cpu_online_mask)) { + irqd_set_managed_shutdown(irqd); + return 0; + } + } + + if (!(vflags & VIRQ_ACTIVATE)) + return 0; + + ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE); + if (ret) + return ret; + /* + * If the interrupt uses reservation mode, clear the activated bit + * so request_irq() will assign the final vector. */ - desc = first_msi_entry(dev); - return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; + if (vflags & VIRQ_CAN_RESERVE) + irqd_clr_activated(irqd); + return 0; } -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec) +static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - struct irq_data *irq_data; - struct msi_desc *desc; + unsigned int vflags = 0, allocated = 0; msi_alloc_info_t arg = { }; + struct msi_desc *desc; + unsigned long idx; int i, ret, virq; - bool can_reserve; - ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); + ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg); if (ret) return ret; - for_each_msi_entry(desc, dev) { + /* + * This flag is set by the PCI layer as we need to activate + * the MSI entries before the PCI layer enables MSI in the + * card. Otherwise the card latches a random msi message. + */ + if (info->flags & MSI_FLAG_ACTIVATE_EARLY) + vflags |= VIRQ_ACTIVATE; + + /* + * Interrupt can use a reserved vector and will not occupy + * a real device vector until the interrupt is requested. + */ + if (msi_check_reservation_mode(domain, info, dev)) + vflags |= VIRQ_CAN_RESERVE; + + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) + continue; + + /* This should return -ECONFUSED... */ + if (WARN_ON_ONCE(allocated >= ctrl->nirqs)) + return -EINVAL; + + if (ops->prepare_desc) + ops->prepare_desc(domain, &arg, desc); + ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, dev_to_node(dev), &arg, false, desc->affinity); - if (virq < 0) { - ret = -ENOSPC; - if (ops->handle_error) - ret = ops->handle_error(domain, desc, ret); - if (ops->msi_finish) - ops->msi_finish(&arg, ret); - return ret; - } + if (virq < 0) + return msi_handle_pci_fail(domain, desc, allocated); for (i = 0; i < desc->nvec_used; i++) { irq_set_msi_desc_off(virq, i, desc); irq_debugfs_copy_devname(virq + i, dev); + ret = msi_init_virq(domain, virq + i, vflags); + if (ret) + return ret; + } + if (info->flags & MSI_FLAG_DEV_SYSFS) { + ret = msi_sysfs_populate_desc(dev, desc); + if (ret) + return ret; } + allocated++; } + return 0; +} - if (ops->msi_finish) - ops->msi_finish(&arg, 0); +static int msi_domain_alloc_simple_msi_descs(struct device *dev, + struct msi_domain_info *info, + struct msi_ctrl *ctrl) +{ + if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) + return 0; - can_reserve = msi_check_reservation_mode(domain, info, dev); + return msi_domain_add_simple_msi_descs(dev, ctrl); +} - /* - * This flag is set by the PCI layer as we need to activate - * the MSI entries before the PCI layer enables MSI in the - * card. Otherwise the card latches a random msi message. - */ - if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) - goto skip_activate; - - for_each_msi_vector(desc, i, dev) { - if (desc->irq == i) { - virq = desc->irq; - dev_dbg(dev, "irq [%d-%d] for MSI\n", - virq, virq + desc->nvec_used - 1); - } +static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + int ret; - irq_data = irq_domain_get_irq_data(domain, i); - if (!can_reserve) { - irqd_clr_can_reserve(irq_data); - if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) - irqd_set_msi_nomask_quirk(irq_data); - } - ret = irq_domain_activate_irq(irq_data, can_reserve); - if (ret) - goto cleanup; + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return -ENODEV; + + info = domain->host_data; + + ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl); + if (ret) + return ret; + + ops = info->ops; + if (ops->domain_alloc_irqs) + return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs); + + return __msi_domain_alloc_irqs(dev, domain, ctrl); +} + +static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + int ret = __msi_domain_alloc_locked(dev, ctrl); + + if (ret) + msi_domain_free_locked(dev, ctrl); + return ret; +} + +/** + * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own descriptor + * allocation/free. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + .nirqs = last + 1 - first, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +/** + * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + int ret; + + msi_lock_descs(dev); + ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); + return ret; +} +EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range); + +/** + * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @nirqs: The number of interrupts to allocate + * + * This function scans all MSI descriptors of the MSI domain and allocates interrupts + * for all unassigned ones. That function is to be used for MSI domain usage where + * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X]. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = 0, + .last = msi_domain_get_hwsize(dev, domid) - 1, + .nirqs = nirqs, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, + unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) +{ + struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, }; + struct irq_domain *domain; + struct msi_map map = { }; + struct msi_desc *desc; + int ret; + + domain = msi_get_device_domain(dev, domid); + if (!domain) { + map.index = -ENODEV; + return map; } -skip_activate: - /* - * If these interrupts use reservation mode, clear the activated bit - * so request_irq() will assign the final vector. - */ - if (can_reserve) { - for_each_msi_vector(desc, i, dev) { - irq_data = irq_domain_get_irq_data(domain, i); - irqd_clr_activated(irq_data); - } + desc = msi_alloc_desc(dev, 1, affdesc); + if (!desc) { + map.index = -ENOMEM; + return map; } - return 0; -cleanup: - for_each_msi_vector(desc, i, dev) { - irq_data = irq_domain_get_irq_data(domain, i); - if (irqd_is_activated(irq_data)) - irq_domain_deactivate_irq(irq_data); + if (icookie) + desc->data.icookie = *icookie; + + ret = msi_insert_desc(dev, desc, domid, index); + if (ret) { + map.index = ret; + return map; } - msi_domain_free_irqs(domain, dev); - return ret; + + ctrl.first = ctrl.last = desc->msi_index; + + ret = __msi_domain_alloc_irqs(dev, domain, &ctrl); + if (ret) { + map.index = ret; + msi_domain_free_locked(dev, &ctrl); + } else { + map.index = desc->msi_index; + map.virq = desc->irq; + } + return map; } /** - * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from + * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at + * a given index - or at the next free index + * * @dev: Pointer to device struct of the device for which the interrupts * are allocated - * @nvec: The number of interrupts to allocate + * @domid: Id of the interrupt domain to operate on + * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation + * uses the next free index. + * @affdesc: Optional pointer to an interrupt affinity descriptor structure + * @icookie: Optional pointer to a domain specific per instance cookie. If + * non-NULL the content of the cookie is stored in msi_desc::data. + * Must be NULL for MSI-X allocations + * + * This requires a MSI interrupt domain which lets the core code manage the + * MSI descriptors. + * + * Return: struct msi_map + * + * On success msi_map::index contains the allocated index number and + * msi_map::virq the corresponding Linux interrupt number * - * Returns 0 on success or an error code. + * On failure msi_map::index contains the error code and msi_map::virq + * is %0. */ -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec) +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; + struct msi_map map; + + msi_lock_descs(dev); + map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie); + msi_unlock_descs(dev); + return map; +} - return ops->domain_alloc_irqs(domain, dev, nvec); +/** + * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain + * @domain: The domain to allocate on + * @hwirq: The hardware interrupt number to allocate for + * @type: The interrupt type + * + * This weirdness supports wire to MSI controllers like MBIGEN. + * + * @hwirq is the hardware interrupt number which is handed in from + * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but + * sized in firmware, the hardware interrupt number cannot be used as MSI + * index. For the underlying irq chip the MSI index is irrelevant and + * all it needs is the hardware interrupt number. + * + * To handle this the MSI index is allocated with MSI_ANY_INDEX and the + * hardware interrupt number is stored along with the type information in + * msi_desc::cookie so the underlying interrupt chip and domain code can + * retrieve it. + * + * Return: The Linux interrupt number (> 0) or an error code + */ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + unsigned int domid = MSI_DEFAULT_DOMAIN; + union msi_instance_cookie icookie = { }; + struct device *dev = domain->dev; + struct msi_map map = { }; + + if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI)) + return -EINVAL; + + icookie.value = ((u64)type << 32) | hwirq; + + msi_lock_descs(dev); + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain)) + map.index = -EINVAL; + else + map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie); + msi_unlock_descs(dev); + + return map.index >= 0 ? map.virq : map.index; } -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; + struct msi_domain_info *info = domain->host_data; + struct irq_data *irqd; struct msi_desc *desc; + unsigned long idx; + int i; - for_each_msi_entry(desc, dev) { - /* - * We might have failed to allocate an MSI early - * enough that there is no IRQ associated to this - * entry. If that's the case, don't do anything. - */ - if (desc->irq) { - irq_domain_free_irqs(desc->irq, desc->nvec_used); - desc->irq = 0; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + /* Only handle MSI entries which have an interrupt associated */ + if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED)) + continue; + + /* Make sure all interrupts are deactivated */ + for (i = 0; i < desc->nvec_used; i++) { + irqd = irq_domain_get_irq_data(domain, desc->irq + i); + if (irqd && irqd_is_activated(irqd)) + irq_domain_deactivate_irq(irqd); } + + irq_domain_free_irqs(desc->irq, desc->nvec_used); + if (info->flags & MSI_FLAG_DEV_SYSFS) + msi_sysfs_remove_desc(dev, desc); + desc->irq = 0; } } +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return; + + info = domain->host_data; + ops = info->ops; + + if (ops->domain_free_irqs) + ops->domain_free_irqs(domain, dev); + else + __msi_domain_free_irqs(dev, domain, ctrl); + + if (ops->msi_post_free) + ops->msi_post_free(domain, dev); + + if (info->flags & MSI_FLAG_FREE_MSI_DESCS) + msi_domain_free_descs(dev, ctrl); +} + /** - * __msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated tp @dev - * @domain: The domain to managing the interrupts + * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain + * associated to @dev with msi_lock held * @dev: Pointer to device struct of the device for which the interrupts - * are free + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) */ -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + msi_domain_free_locked(dev, &ctrl); +} - return ops->domain_free_irqs(domain, dev); +/** + * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain + * associated to @dev + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); +} +EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all); + +/** + * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own vector + * allocation. + */ +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) +{ + msi_domain_free_irqs_range_locked(dev, domid, 0, + msi_domain_get_hwsize(dev, domid) - 1); +} + +/** + * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + */ +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_all_locked(dev, domid); + msi_unlock_descs(dev); +} + +/** + * msi_device_domain_free_wired - Free a wired interrupt in @domain + * @domain: The domain to free the interrupt on + * @virq: The Linux interrupt number to free + * + * This is the counterpart of msi_device_domain_alloc_wired() for the + * weird wired to MSI converting domains. + */ +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + struct msi_desc *desc = irq_get_msi_desc(virq); + struct device *dev = domain->dev; + + if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI)) + return; + + msi_lock_descs(dev); + if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) { + msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index, + desc->msi_index); + } + msi_unlock_descs(dev); } /** * msi_get_domain_info - Get the MSI interrupt domain info for @domain * @domain: The interrupt domain to retrieve data from * - * Returns the pointer to the msi_domain_info stored in - * @domain->host_data. + * Return: the pointer to the msi_domain_info stored in @domain->host_data. */ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) { return (struct msi_domain_info *)domain->host_data; } -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +/** + * msi_device_has_isolated_msi - True if the device has isolated MSI + * @dev: The device to check + * + * Isolated MSI means that HW modeled by an irq_domain on the path from the + * initiating device to the CPU will validate that the MSI message specifies an + * interrupt number that the device is authorized to trigger. This must block + * devices from triggering interrupts they are not authorized to trigger. + * Currently authorization means the MSI vector is one assigned to the device. + * + * This is interesting for securing VFIO use cases where a rouge MSI (eg created + * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to + * impact outside its security domain, eg userspace triggering interrupts on + * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM + * triggering interrupts on another VM. + */ +bool msi_device_has_isolated_msi(struct device *dev) +{ + struct irq_domain *domain = dev_get_msi_domain(dev); + + for (; domain; domain = domain->parent) + if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI) + return true; + return arch_is_isolated_msi(); +} +EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index ce0adb22ee96..c556bc49d213 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -147,7 +147,6 @@ void suspend_device_irqs(void) synchronize_irq(irq); } } -EXPORT_SYMBOL_GPL(suspend_device_irqs); static void resume_irq(struct irq_desc *desc) { @@ -227,7 +226,7 @@ unlock: } /** - * irq_pm_syscore_ops - enable interrupt lines early + * irq_pm_syscore_resume - enable interrupt lines early * * Enable all interrupt lines with %IRQF_EARLY_RESUME set. */ @@ -259,4 +258,3 @@ void resume_device_irqs(void) { resume_irqs(false); } -EXPORT_SYMBOL_GPL(resume_device_irqs); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 98138788cb04..8e29809de38d 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -52,10 +52,8 @@ static int show_irq_affinity(int type, struct seq_file *m) case AFFINITY: case AFFINITY_LIST: mask = desc->irq_common_data.affinity; -#ifdef CONFIG_GENERIC_PENDING_IRQ - if (irqd_is_setaffinity_pending(&desc->irq_data)) - mask = desc->pending_mask; -#endif + if (irq_move_pending(&desc->irq_data)) + mask = irq_desc_get_pending_mask(desc); break; case EFFECTIVE: case EFFECTIVE_LIST: @@ -137,14 +135,14 @@ static inline int irq_select_affinity_usr(unsigned int irq) static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - unsigned int irq = (int)(long)PDE_DATA(file_inode(file)); + unsigned int irq = (int)(long)pde_data(file_inode(file)); cpumask_var_t new_value; int err; if (!irq_can_set_affinity_usr(irq) || no_irq_affinity) - return -EIO; + return -EPERM; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; if (type) @@ -190,12 +188,12 @@ static ssize_t irq_affinity_list_proc_write(struct file *file, static int irq_affinity_proc_open(struct inode *inode, struct file *file) { - return single_open(file, irq_affinity_proc_show, PDE_DATA(inode)); + return single_open(file, irq_affinity_proc_show, pde_data(inode)); } static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) { - return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode)); + return single_open(file, irq_affinity_list_proc_show, pde_data(inode)); } static const struct proc_ops irq_affinity_proc_ops = { @@ -238,7 +236,7 @@ static ssize_t default_affinity_write(struct file *file, cpumask_var_t new_value; int err; - if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) return -ENOMEM; err = cpumask_parse_user(buffer, count, new_value); @@ -265,7 +263,7 @@ out: static int default_affinity_open(struct inode *inode, struct file *file) { - return single_open(file, default_affinity_show, PDE_DATA(inode)); + return single_open(file, default_affinity_show, pde_data(inode)); } static const struct proc_ops default_affinity_proc_ops = { @@ -362,8 +360,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) goto out_unlock; #ifdef CONFIG_SMP + umode_t umode = S_IRUGO; + + if (irq_can_set_affinity_usr(desc->irq_data.irq)) + umode |= S_IWUSR; + /* create /proc/irq/<irq>/smp_affinity */ - proc_create_data("smp_affinity", 0644, desc->dir, + proc_create_data("smp_affinity", umode, desc->dir, &irq_affinity_proc_ops, irqp); /* create /proc/irq/<irq>/affinity_hint */ @@ -371,7 +374,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) irq_affinity_hint_proc_show, irqp); /* create /proc/irq/<irq>/smp_affinity_list */ - proc_create_data("smp_affinity_list", 0644, desc->dir, + proc_create_data("smp_affinity_list", umode, desc->dir, &irq_affinity_list_proc_ops, irqp); proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, @@ -454,17 +457,18 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec) } #ifndef ACTUAL_NR_IRQS -# define ACTUAL_NR_IRQS nr_irqs +# define ACTUAL_NR_IRQS irq_get_nr_irqs() #endif int show_interrupts(struct seq_file *p, void *v) { + const unsigned int nr_irqs = irq_get_nr_irqs(); static int prec; - unsigned long flags, any_count = 0; int i = *(loff_t *) v, j; struct irqaction *action; struct irq_desc *desc; + unsigned long flags; if (i > ACTUAL_NR_IRQS) return 0; @@ -488,32 +492,30 @@ int show_interrupts(struct seq_file *p, void *v) if (!desc || irq_settings_is_hidden(desc)) goto outsparse; - if (desc->kstat_irqs) { - for_each_online_cpu(j) - any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j)); - } - - if ((!desc->action || irq_desc_is_chained(desc)) && !any_count) + if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs) goto outsparse; - seq_printf(p, "%*d: ", prec, i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", desc->kstat_irqs ? - *per_cpu_ptr(desc->kstat_irqs, j) : 0); + seq_printf(p, "%*d:", prec, i); + for_each_online_cpu(j) { + unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0; + + seq_put_decimal_ull_width(p, " ", cnt, 10); + } + seq_putc(p, ' '); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); else if (desc->irq_data.chip->name) - seq_printf(p, " %8s", desc->irq_data.chip->name); + seq_printf(p, "%8s", desc->irq_data.chip->name); else - seq_printf(p, " %8s", "-"); + seq_printf(p, "%8s", "-"); } else { - seq_printf(p, " %8s", "None"); + seq_printf(p, "%8s", "None"); } if (desc->irq_data.domain) - seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq); + seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); else seq_printf(p, " %*s", prec, ""); #ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 8ccd32a0cc80..1b7fa72968bd 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -21,41 +21,39 @@ #ifdef CONFIG_HARDIRQS_SW_RESEND -/* Bitmap to handle software resend of interrupts: */ -static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); +/* hlist_head to handle software resend of interrupts: */ +static HLIST_HEAD(irq_resend_list); +static DEFINE_RAW_SPINLOCK(irq_resend_lock); /* * Run software resends of IRQ's */ -static void resend_irqs(unsigned long arg) +static void resend_irqs(struct tasklet_struct *unused) { struct irq_desc *desc; - int irq; - while (!bitmap_empty(irqs_resend, nr_irqs)) { - irq = find_first_bit(irqs_resend, nr_irqs); - clear_bit(irq, irqs_resend); - desc = irq_to_desc(irq); - if (!desc) - continue; - local_irq_disable(); + raw_spin_lock_irq(&irq_resend_lock); + while (!hlist_empty(&irq_resend_list)) { + desc = hlist_entry(irq_resend_list.first, struct irq_desc, + resend_node); + hlist_del_init(&desc->resend_node); + raw_spin_unlock(&irq_resend_lock); desc->handle_irq(desc); - local_irq_enable(); + raw_spin_lock(&irq_resend_lock); } + raw_spin_unlock_irq(&irq_resend_lock); } /* Tasklet to handle resend: */ -static DECLARE_TASKLET_OLD(resend_tasklet, resend_irqs); +static DECLARE_TASKLET(resend_tasklet, resend_irqs); static int irq_sw_resend(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); - /* * Validate whether this interrupt can be safely injected from * non interrupt context */ - if (handle_enforce_irqctx(&desc->irq_data)) + if (irqd_is_handle_enforce_irqctx(&desc->irq_data)) return -EINVAL; /* @@ -70,16 +68,36 @@ static int irq_sw_resend(struct irq_desc *desc) */ if (!desc->parent_irq) return -EINVAL; - irq = desc->parent_irq; + + desc = irq_to_desc(desc->parent_irq); + if (!desc) + return -EINVAL; } - /* Set it pending and activate the softirq: */ - set_bit(irq, irqs_resend); + /* Add to resend_list and activate the softirq: */ + raw_spin_lock(&irq_resend_lock); + if (hlist_unhashed(&desc->resend_node)) + hlist_add_head(&desc->resend_node, &irq_resend_list); + raw_spin_unlock(&irq_resend_lock); tasklet_schedule(&resend_tasklet); return 0; } +void clear_irq_resend(struct irq_desc *desc) +{ + raw_spin_lock(&irq_resend_lock); + hlist_del_init(&desc->resend_node); + raw_spin_unlock(&irq_resend_lock); +} + +void irq_resend_init(struct irq_desc *desc) +{ + INIT_HLIST_NODE(&desc->resend_node); +} #else +void clear_irq_resend(struct irq_desc *desc) {} +void irq_resend_init(struct irq_desc *desc) {} + static int irq_sw_resend(struct irq_desc *desc) { return -EINVAL; @@ -128,7 +146,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject) if (!try_retrigger(desc)) err = irq_sw_resend(desc); - /* If the retrigger was successfull, mark it with the REPLAY bit */ + /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; @@ -172,7 +190,7 @@ int irq_inject_interrupt(unsigned int irq) * - not NMI type * - activated */ - if ((desc->istate & IRQS_NMI) || !irqd_is_activated(&desc->irq_data)) + if (irq_is_nmi(desc) || !irqd_is_activated(&desc->irq_data)) err = -EINVAL; else err = check_irq_resend(desc, true); diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 403378b9947b..00b3bd127692 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h @@ -11,13 +11,13 @@ enum { _IRQ_NOREQUEST = IRQ_NOREQUEST, _IRQ_NOTHREAD = IRQ_NOTHREAD, _IRQ_NOAUTOEN = IRQ_NOAUTOEN, - _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT, _IRQ_NO_BALANCING = IRQ_NO_BALANCING, _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD, _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, _IRQ_IS_POLLED = IRQ_IS_POLLED, _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, _IRQ_HIDDEN = IRQ_HIDDEN, + _IRQ_NO_DEBUG = IRQ_NO_DEBUG, _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, }; @@ -33,6 +33,7 @@ enum { #define IRQ_IS_POLLED GOT_YOU_MORON #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON #define IRQ_HIDDEN GOT_YOU_MORON +#define IRQ_NO_DEBUG GOT_YOU_MORON #undef IRQF_MODIFY_MASK #define IRQF_MODIFY_MASK GOT_YOU_MORON @@ -140,11 +141,6 @@ static inline void irq_settings_set_noprobe(struct irq_desc *desc) desc->status_use_accessors |= _IRQ_NOPROBE; } -static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) -{ - return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; -} - static inline bool irq_settings_can_autoenable(struct irq_desc *desc) { return !(desc->status_use_accessors & _IRQ_NOAUTOEN); @@ -174,3 +170,13 @@ static inline bool irq_settings_is_hidden(struct irq_desc *desc) { return desc->status_use_accessors & _IRQ_HIDDEN; } + +static inline void irq_settings_set_no_debug(struct irq_desc *desc) +{ + desc->status_use_accessors |= _IRQ_NO_DEBUG; +} + +static inline bool irq_settings_no_debug(struct irq_desc *desc) +{ + return desc->status_use_accessors & _IRQ_NO_DEBUG; +} diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index f865e5f4d382..02b2daf07441 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -403,6 +403,10 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) desc->irqs_unhandled -= ok; } + if (likely(!desc->irqs_unhandled)) + return; + + /* Now getting into unhandled irq detection */ desc->irq_count++; if (likely(desc->irq_count < 100000)) return; @@ -443,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); + return 1; + } irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); @@ -455,6 +463,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); + return 1; + } irqfixup = 2; printk(KERN_WARNING "Misrouted IRQ fixup and polling support " "enabled\n"); diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 773b6105c4ae..4b7315e99bd6 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -84,7 +84,7 @@ void irq_timings_disable(void) * 2. Log interval * * We saw the irq timings allow to compute the interval of the - * occurrences for a specific interrupt. We can reasonibly assume the + * occurrences for a specific interrupt. We can reasonably assume the * longer is the interval, the higher is the error for the next event * and we can consider storing those interval values into an array * where each slot in the array correspond to an interval at the power @@ -416,7 +416,7 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) * Copy the content of the circular buffer into another buffer * in order to linearize the buffer instead of dealing with * wrapping indexes and shifted array which will be prone to - * error and extremelly difficult to debug. + * error and extremely difficult to debug. */ for (i = 0; i < count; i++) { int index = (start + i) & IRQ_TIMINGS_MASK; @@ -453,6 +453,11 @@ static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, */ index = irq_timings_interval_index(interval); + if (index > PREDICTION_BUFFER_SIZE - 1) { + irqs->count = 0; + return; + } + /* * Store the index as an element of the pattern in another * circular array. @@ -485,7 +490,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) /* * The interrupt triggered more than one second apart, that - * ends the sequence as predictible for our purpose. In this + * ends the sequence as predictable for our purpose. In this * case, assume we have the beginning of a sequence and the * timestamp is the first value. As it is impossible to * predict anything at this point, return. @@ -504,6 +509,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) /** * irq_timings_next_event - Return when the next event is supposed to arrive + * @now: current time * * During the last busy cycle, the number of interrupts is incremented * and stored in the irq_timings structure. This information is @@ -514,7 +520,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) * If more than the array size interrupts happened during the * last busy/idle cycle, the index wrapped up and we have to * begin with the next element in the array which is the last one - * in the sequence, otherwise it is a the index 0. + * in the sequence, otherwise it is at the index 0. * * - have an indication of the interrupts activity on this CPU * (eg. irq/sec) @@ -794,12 +800,14 @@ static int __init irq_timings_test_irqs(struct timings_intervals *ti) __irq_timings_store(irq, irqs, ti->intervals[i]); if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { + ret = -EBADSLT; pr_err("Failed to store in the circular buffer\n"); goto out; } } if (irqs->count != ti->count) { + ret = -ERANGE; pr_err("Count differs\n"); goto out; } |