summaryrefslogtreecommitdiff
path: root/kernel/irq
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/irq')
-rw-r--r--kernel/irq/Kconfig35
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/affinity.c408
-rw-r--r--kernel/irq/chip.c107
-rw-r--r--kernel/irq/cpuhotplug.c53
-rw-r--r--kernel/irq/debugfs.c26
-rw-r--r--kernel/irq/devres.c44
-rw-r--r--kernel/irq/dummychip.c2
-rw-r--r--kernel/irq/generic-chip.c166
-rw-r--r--kernel/irq/handle.c31
-rw-r--r--kernel/irq/internals.h52
-rw-r--r--kernel/irq/ipi-mux.c206
-rw-r--r--kernel/irq/ipi.c56
-rw-r--r--kernel/irq/irq_sim.c122
-rw-r--r--kernel/irq/irqdesc.c448
-rw-r--r--kernel/irq/irqdomain.c1001
-rw-r--r--kernel/irq/kexec.c36
-rw-r--r--kernel/irq/manage.c413
-rw-r--r--kernel/irq/matrix.c50
-rw-r--r--kernel/irq/migration.c6
-rw-r--r--kernel/irq/msi.c1525
-rw-r--r--kernel/irq/pm.c4
-rw-r--r--kernel/irq/proc.c60
-rw-r--r--kernel/irq/resend.c60
-rw-r--r--kernel/irq/settings.h18
-rw-r--r--kernel/irq/spurious.c12
-rw-r--r--kernel/irq/timings.c16
27 files changed, 3347 insertions, 1613 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d79ef2493a28..875f25ed6f71 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -24,6 +24,7 @@ config GENERIC_IRQ_SHOW_LEVEL
# Supports effective affinity mask
config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ depends on SMP
bool
# Support for delayed migration from interrupt context
@@ -70,6 +71,11 @@ config IRQ_DOMAIN_HIERARCHY
bool
select IRQ_DOMAIN
+# Support for obsolete non-mapping irq domains
+config IRQ_DOMAIN_NOMAP
+ bool
+ select IRQ_DOMAIN
+
# Support for hierarchical fasteoi+edge and fasteoi+level handlers
config IRQ_FASTEOI_HIERARCHY_HANDLERS
bool
@@ -77,24 +83,22 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ depends on SMP
select IRQ_DOMAIN_HIERARCHY
-# Generic MSI interrupt support
-config GENERIC_MSI_IRQ
+# Generic IRQ IPI Mux support
+config GENERIC_IRQ_IPI_MUX
bool
+ depends on SMP
# Generic MSI hierarchical interrupt domain support
-config GENERIC_MSI_IRQ_DOMAIN
+config GENERIC_MSI_IRQ
bool
select IRQ_DOMAIN_HIERARCHY
- select GENERIC_MSI_IRQ
config IRQ_MSI_IOMMU
bool
-config HANDLE_DOMAIN_IRQ
- bool
-
config IRQ_TIMINGS
bool
@@ -104,6 +108,10 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR
config GENERIC_IRQ_RESERVATION_MODE
bool
+# Snapshot for interrupt statistics
+config GENERIC_IRQ_STAT_SNAPSHOT
+ bool
+
# Support forced irq threading
config IRQ_FORCED_THREADING
bool
@@ -133,9 +141,22 @@ config GENERIC_IRQ_DEBUGFS
If you don't know what to do here, say N.
+# Clear forwarded VM interrupts during kexec.
+# This option ensures the kernel clears active states for interrupts
+# forwarded to virtual machines (VMs) during a machine kexec.
+config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD
+ bool
+
endmenu
config GENERIC_IRQ_MULTI_HANDLER
bool
help
Allow to specify the low level IRQ handler at run time.
+
+# Cavium Octeon is the last system to use this deprecated option
+# Do not even think of enabling this on any new platform
+config DEPRECATED_IRQ_CPU_ONOFFLINE
+ bool
+ depends on CAVIUM_OCTEON_SOC
+ default CAVIUM_OCTEON_SOC
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index b4f53717d143..c0f44c06d69d 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o
obj-$(CONFIG_IRQ_TIMINGS) += timings.o
ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y)
CFLAGS_timings.o += -DDEBUG
@@ -15,6 +15,7 @@ obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
obj-$(CONFIG_PM_SLEEP) += pm.o
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
+obj-$(CONFIG_GENERIC_IRQ_IPI_MUX) += ipi-mux.o
obj-$(CONFIG_SMP) += affinity.o
obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4d89ad4fae3b..44a4eba80315 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -7,397 +7,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/cpu.h>
-#include <linux/sort.h>
-
-static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
- unsigned int cpus_per_vec)
-{
- const struct cpumask *siblmsk;
- int cpu, sibl;
-
- for ( ; cpus_per_vec > 0; ) {
- cpu = cpumask_first(nmsk);
-
- /* Should not happen, but I'm too lazy to think about it */
- if (cpu >= nr_cpu_ids)
- return;
-
- cpumask_clear_cpu(cpu, nmsk);
- cpumask_set_cpu(cpu, irqmsk);
- cpus_per_vec--;
-
- /* If the cpu has siblings, use them first */
- siblmsk = topology_sibling_cpumask(cpu);
- for (sibl = -1; cpus_per_vec > 0; ) {
- sibl = cpumask_next(sibl, siblmsk);
- if (sibl >= nr_cpu_ids)
- break;
- if (!cpumask_test_and_clear_cpu(sibl, nmsk))
- continue;
- cpumask_set_cpu(sibl, irqmsk);
- cpus_per_vec--;
- }
- }
-}
-
-static cpumask_var_t *alloc_node_to_cpumask(void)
-{
- cpumask_var_t *masks;
- int node;
-
- masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
- if (!masks)
- return NULL;
-
- for (node = 0; node < nr_node_ids; node++) {
- if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
- goto out_unwind;
- }
-
- return masks;
-
-out_unwind:
- while (--node >= 0)
- free_cpumask_var(masks[node]);
- kfree(masks);
- return NULL;
-}
-
-static void free_node_to_cpumask(cpumask_var_t *masks)
-{
- int node;
-
- for (node = 0; node < nr_node_ids; node++)
- free_cpumask_var(masks[node]);
- kfree(masks);
-}
-
-static void build_node_to_cpumask(cpumask_var_t *masks)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
-}
-
-static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
- const struct cpumask *mask, nodemask_t *nodemsk)
-{
- int n, nodes = 0;
-
- /* Calculate the number of nodes in the supplied affinity mask */
- for_each_node(n) {
- if (cpumask_intersects(mask, node_to_cpumask[n])) {
- node_set(n, *nodemsk);
- nodes++;
- }
- }
- return nodes;
-}
-
-struct node_vectors {
- unsigned id;
-
- union {
- unsigned nvectors;
- unsigned ncpus;
- };
-};
-
-static int ncpus_cmp_func(const void *l, const void *r)
-{
- const struct node_vectors *ln = l;
- const struct node_vectors *rn = r;
-
- return ln->ncpus - rn->ncpus;
-}
-
-/*
- * Allocate vector number for each node, so that for each node:
- *
- * 1) the allocated number is >= 1
- *
- * 2) the allocated numbver is <= active CPU number of this node
- *
- * The actual allocated total vectors may be less than @numvecs when
- * active total CPU number is less than @numvecs.
- *
- * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
- * for each node.
- */
-static void alloc_nodes_vectors(unsigned int numvecs,
- cpumask_var_t *node_to_cpumask,
- const struct cpumask *cpu_mask,
- const nodemask_t nodemsk,
- struct cpumask *nmsk,
- struct node_vectors *node_vectors)
-{
- unsigned n, remaining_ncpus = 0;
-
- for (n = 0; n < nr_node_ids; n++) {
- node_vectors[n].id = n;
- node_vectors[n].ncpus = UINT_MAX;
- }
-
- for_each_node_mask(n, nodemsk) {
- unsigned ncpus;
-
- cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
- ncpus = cpumask_weight(nmsk);
-
- if (!ncpus)
- continue;
- remaining_ncpus += ncpus;
- node_vectors[n].ncpus = ncpus;
- }
-
- numvecs = min_t(unsigned, remaining_ncpus, numvecs);
-
- sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
- ncpus_cmp_func, NULL);
-
- /*
- * Allocate vectors for each node according to the ratio of this
- * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
- * bigger than number of active numa nodes. Always start the
- * allocation from the node with minimized nr_cpus.
- *
- * This way guarantees that each active node gets allocated at
- * least one vector, and the theory is simple: over-allocation
- * is only done when this node is assigned by one vector, so
- * other nodes will be allocated >= 1 vector, since 'numvecs' is
- * bigger than number of numa nodes.
- *
- * One perfect invariant is that number of allocated vectors for
- * each node is <= CPU count of this node:
- *
- * 1) suppose there are two nodes: A and B
- * ncpu(X) is CPU count of node X
- * vecs(X) is the vector count allocated to node X via this
- * algorithm
- *
- * ncpu(A) <= ncpu(B)
- * ncpu(A) + ncpu(B) = N
- * vecs(A) + vecs(B) = V
- *
- * vecs(A) = max(1, round_down(V * ncpu(A) / N))
- * vecs(B) = V - vecs(A)
- *
- * both N and V are integer, and 2 <= V <= N, suppose
- * V = N - delta, and 0 <= delta <= N - 2
- *
- * 2) obviously vecs(A) <= ncpu(A) because:
- *
- * if vecs(A) is 1, then vecs(A) <= ncpu(A) given
- * ncpu(A) >= 1
- *
- * otherwise,
- * vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
- *
- * 3) prove how vecs(B) <= ncpu(B):
- *
- * if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
- * over-allocated, so vecs(B) <= ncpu(B),
- *
- * otherwise:
- *
- * vecs(A) =
- * round_down(V * ncpu(A) / N) =
- * round_down((N - delta) * ncpu(A) / N) =
- * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >=
- * round_down((N * ncpu(A) - delta * N) / N) =
- * cpu(A) - delta
- *
- * then:
- *
- * vecs(A) - V >= ncpu(A) - delta - V
- * =>
- * V - vecs(A) <= V + delta - ncpu(A)
- * =>
- * vecs(B) <= N - ncpu(A)
- * =>
- * vecs(B) <= cpu(B)
- *
- * For nodes >= 3, it can be thought as one node and another big
- * node given that is exactly what this algorithm is implemented,
- * and we always re-calculate 'remaining_ncpus' & 'numvecs', and
- * finally for each node X: vecs(X) <= ncpu(X).
- *
- */
- for (n = 0; n < nr_node_ids; n++) {
- unsigned nvectors, ncpus;
-
- if (node_vectors[n].ncpus == UINT_MAX)
- continue;
-
- WARN_ON_ONCE(numvecs == 0);
-
- ncpus = node_vectors[n].ncpus;
- nvectors = max_t(unsigned, 1,
- numvecs * ncpus / remaining_ncpus);
- WARN_ON_ONCE(nvectors > ncpus);
-
- node_vectors[n].nvectors = nvectors;
-
- remaining_ncpus -= ncpus;
- numvecs -= nvectors;
- }
-}
-
-static int __irq_build_affinity_masks(unsigned int startvec,
- unsigned int numvecs,
- unsigned int firstvec,
- cpumask_var_t *node_to_cpumask,
- const struct cpumask *cpu_mask,
- struct cpumask *nmsk,
- struct irq_affinity_desc *masks)
-{
- unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
- unsigned int last_affv = firstvec + numvecs;
- unsigned int curvec = startvec;
- nodemask_t nodemsk = NODE_MASK_NONE;
- struct node_vectors *node_vectors;
-
- if (!cpumask_weight(cpu_mask))
- return 0;
-
- nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
-
- /*
- * If the number of nodes in the mask is greater than or equal the
- * number of vectors we just spread the vectors across the nodes.
- */
- if (numvecs <= nodes) {
- for_each_node_mask(n, nodemsk) {
- cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
- node_to_cpumask[n]);
- if (++curvec == last_affv)
- curvec = firstvec;
- }
- return numvecs;
- }
-
- node_vectors = kcalloc(nr_node_ids,
- sizeof(struct node_vectors),
- GFP_KERNEL);
- if (!node_vectors)
- return -ENOMEM;
-
- /* allocate vector number for each node */
- alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
- nodemsk, nmsk, node_vectors);
-
- for (i = 0; i < nr_node_ids; i++) {
- unsigned int ncpus, v;
- struct node_vectors *nv = &node_vectors[i];
-
- if (nv->nvectors == UINT_MAX)
- continue;
-
- /* Get the cpus on this node which are in the mask */
- cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
- ncpus = cpumask_weight(nmsk);
- if (!ncpus)
- continue;
-
- WARN_ON_ONCE(nv->nvectors > ncpus);
-
- /* Account for rounding errors */
- extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
-
- /* Spread allocated vectors on CPUs of the current node */
- for (v = 0; v < nv->nvectors; v++, curvec++) {
- cpus_per_vec = ncpus / nv->nvectors;
-
- /* Account for extra vectors to compensate rounding errors */
- if (extra_vecs) {
- cpus_per_vec++;
- --extra_vecs;
- }
-
- /*
- * wrapping has to be considered given 'startvec'
- * may start anywhere
- */
- if (curvec >= last_affv)
- curvec = firstvec;
- irq_spread_init_one(&masks[curvec].mask, nmsk,
- cpus_per_vec);
- }
- done += nv->nvectors;
- }
- kfree(node_vectors);
- return done;
-}
-
-/*
- * build affinity in two stages:
- * 1) spread present CPU on these vectors
- * 2) spread other possible CPUs on these vectors
- */
-static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
- unsigned int firstvec,
- struct irq_affinity_desc *masks)
-{
- unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
- cpumask_var_t *node_to_cpumask;
- cpumask_var_t nmsk, npresmsk;
- int ret = -ENOMEM;
-
- if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
- return ret;
-
- if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
- goto fail_nmsk;
-
- node_to_cpumask = alloc_node_to_cpumask();
- if (!node_to_cpumask)
- goto fail_npresmsk;
-
- /* Stabilize the cpumasks */
- get_online_cpus();
- build_node_to_cpumask(node_to_cpumask);
-
- /* Spread on present CPUs starting from affd->pre_vectors */
- ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
- node_to_cpumask, cpu_present_mask,
- nmsk, masks);
- if (ret < 0)
- goto fail_build_affinity;
- nr_present = ret;
-
- /*
- * Spread on non present CPUs starting from the next vector to be
- * handled. If the spreading of present CPUs already exhausted the
- * vector space, assign the non present CPUs to the already spread
- * out vectors.
- */
- if (nr_present >= numvecs)
- curvec = firstvec;
- else
- curvec = firstvec + nr_present;
- cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
- ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
- node_to_cpumask, npresmsk, nmsk,
- masks);
- if (ret >= 0)
- nr_others = ret;
-
- fail_build_affinity:
- put_online_cpus();
-
- if (ret >= 0)
- WARN_ON(nr_present + nr_others < numvecs);
-
- free_node_to_cpumask(node_to_cpumask);
-
- fail_npresmsk:
- free_cpumask_var(npresmsk);
-
- fail_nmsk:
- free_cpumask_var(nmsk);
- return ret < 0 ? ret : 0;
-}
+#include <linux/group_cpus.h>
static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
{
@@ -460,14 +70,18 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
*/
for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
unsigned int this_vecs = affd->set_size[i];
- int ret;
+ int j;
+ struct cpumask *result = group_cpus_evenly(this_vecs);
- ret = irq_build_affinity_masks(curvec, this_vecs,
- curvec, masks);
- if (ret) {
+ if (!result) {
kfree(masks);
return NULL;
}
+
+ for (j = 0; j < this_vecs; j++)
+ cpumask_copy(&masks[curvec + j].mask, &result[j]);
+ kfree(result);
+
curvec += this_vecs;
usedvecs += this_vecs;
}
@@ -505,9 +119,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
if (affd->calc_sets) {
set_vecs = maxvec - resv;
} else {
- get_online_cpus();
+ cpus_read_lock();
set_vecs = cpumask_weight(cpu_possible_mask);
- put_online_cpus();
+ cpus_read_unlock();
}
return resv + min(set_vecs, maxvec - resv);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6d89e33fe3aa..c901436ebd9f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -38,7 +38,7 @@ struct irqaction chained_action = {
* @irq: irq number
* @chip: pointer to irq chip description structure
*/
-int irq_set_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, const struct irq_chip *chip)
{
unsigned long flags;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0);
@@ -46,10 +46,7 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
if (!desc)
return -EINVAL;
- if (!chip)
- chip = &no_irq_chip;
-
- desc->irq_data.chip = chip;
+ desc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip);
irq_put_desc_unlock(desc, flags);
/*
* For !CONFIG_SPARSE_IRQ make the irq show up in
@@ -191,7 +188,8 @@ enum {
#ifdef CONFIG_SMP
static int
-__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
+ bool force)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
@@ -200,7 +198,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
irqd_clr_managed_shutdown(d);
- if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) {
+ if (!cpumask_intersects(aff, cpu_online_mask)) {
/*
* Catch code which fiddles with enable_irq() on a managed
* and potentially shutdown IRQ. Chained interrupt
@@ -227,7 +225,8 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
}
#else
static __always_inline int
-__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+__irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff,
+ bool force)
{
return IRQ_STARTUP_NORMAL;
}
@@ -255,7 +254,7 @@ static int __irq_startup(struct irq_desc *desc)
int irq_startup(struct irq_desc *desc, bool resend, bool force)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
- struct cpumask *aff = irq_data_get_affinity_mask(d);
+ const struct cpumask *aff = irq_data_get_affinity_mask(d);
int ret = 0;
desc->depth = 0;
@@ -265,8 +264,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force)
} else {
switch (__irq_startup_managed(desc, aff, force)) {
case IRQ_STARTUP_NORMAL:
+ if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)
+ irq_setup_affinity(desc);
ret = __irq_startup(desc);
- irq_setup_affinity(desc);
+ if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP))
+ irq_setup_affinity(desc);
break;
case IRQ_STARTUP_MANAGED:
irq_do_set_affinity(d, aff, false);
@@ -304,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
void irq_shutdown(struct irq_desc *desc)
{
if (irqd_is_started(&desc->irq_data)) {
+ clear_irq_resend(desc);
desc->depth = 1;
if (desc->irq_data.chip->irq_shutdown) {
desc->irq_data.chip->irq_shutdown(&desc->irq_data);
@@ -470,25 +473,22 @@ void handle_nested_irq(unsigned int irq)
action = desc->action;
if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
- goto out_unlock;
+ raw_spin_unlock_irq(&desc->lock);
+ return;
}
kstat_incr_irqs_this_cpu(desc);
- irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+ atomic_inc(&desc->threads_active);
raw_spin_unlock_irq(&desc->lock);
action_ret = IRQ_NONE;
for_each_action_of_desc(desc, action)
action_ret |= action->thread_fn(action->irq, action->dev_id);
- if (!noirqdebug)
+ if (!irq_settings_no_debug(desc))
note_interrupt(desc, action_ret);
- raw_spin_lock_irq(&desc->lock);
- irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-
-out_unlock:
- raw_spin_unlock_irq(&desc->lock);
+ wake_threads_waitq(desc);
}
EXPORT_SYMBOL_GPL(handle_nested_irq);
@@ -572,8 +572,6 @@ EXPORT_SYMBOL_GPL(handle_simple_irq);
*/
void handle_untracked_irq(struct irq_desc *desc)
{
- unsigned int flags = 0;
-
raw_spin_lock(&desc->lock);
if (!irq_may_run(desc))
@@ -590,7 +588,7 @@ void handle_untracked_irq(struct irq_desc *desc)
irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);
- __handle_irq_event_percpu(desc, &flags);
+ __handle_irq_event_percpu(desc);
raw_spin_lock(&desc->lock);
irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
@@ -692,8 +690,16 @@ void handle_fasteoi_irq(struct irq_desc *desc)
raw_spin_lock(&desc->lock);
- if (!irq_may_run(desc))
+ /*
+ * When an affinity change races with IRQ handling, the next interrupt
+ * can arrive on the new CPU before the original CPU has completed
+ * handling the previous one - it may need to be resent.
+ */
+ if (!irq_may_run(desc)) {
+ if (irqd_needs_resend_when_in_progress(&desc->irq_data))
+ desc->istate |= IRQS_PENDING;
goto out;
+ }
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
@@ -715,6 +721,12 @@ void handle_fasteoi_irq(struct irq_desc *desc)
cond_unmask_eoi_irq(desc, chip);
+ /*
+ * When the race described above happens this will resend the interrupt.
+ */
+ if (unlikely(desc->istate & IRQS_PENDING))
+ check_irq_resend(desc, false);
+
raw_spin_unlock(&desc->lock);
return;
out:
@@ -761,7 +773,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);
* handle_edge_irq - edge type IRQ handler
* @desc: the interrupt description structure for this irq
*
- * Interrupt occures on the falling and/or rising edge of a hardware
+ * Interrupt occurs on the falling and/or rising edge of a hardware
* signal. The occurrence is latched into the irq controller hardware
* and must be acked in order to be reenabled. After the ack another
* interrupt can happen on the same source even before the first one
@@ -808,7 +820,7 @@ void handle_edge_irq(struct irq_desc *desc)
/*
* When another irq arrived while we were handling
* one, we could have masked the irq.
- * Renable it, if it was not disabled in meantime.
+ * Reenable it, if it was not disabled in meantime.
*/
if (unlikely(desc->istate & IRQS_PENDING)) {
if (!irqd_irq_disabled(&desc->irq_data) &&
@@ -1008,8 +1020,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
irq_state_set_disabled(desc);
- if (is_chained)
+ if (is_chained) {
desc->action = NULL;
+ WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc)));
+ }
desc->depth = 1;
}
desc->handle_irq = handle;
@@ -1035,6 +1049,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
desc->action = &chained_action;
+ WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
irq_activate_and_startup(desc, IRQ_RESEND);
}
}
@@ -1072,7 +1087,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
EXPORT_SYMBOL_GPL(irq_set_chained_handler_and_data);
void
-irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, const struct irq_chip *chip,
irq_flow_handler_t handle, const char *name)
{
irq_set_chip(irq, chip);
@@ -1099,13 +1114,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
trigger = irqd_get_trigger_type(&desc->irq_data);
irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
- IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
+ IRQD_TRIGGER_MASK | IRQD_LEVEL);
if (irq_settings_has_no_balance_set(desc))
irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
if (irq_settings_is_per_cpu(desc))
irqd_set(&desc->irq_data, IRQD_PER_CPU);
- if (irq_settings_can_move_pcntxt(desc))
- irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
if (irq_settings_is_level(desc))
irqd_set(&desc->irq_data, IRQD_LEVEL);
@@ -1119,6 +1132,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
}
EXPORT_SYMBOL_GPL(irq_modify_status);
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
/**
* irq_cpu_online - Invoke all irq_cpu_online functions.
*
@@ -1178,6 +1192,7 @@ void irq_cpu_offline(void)
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
}
+#endif
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -1419,7 +1434,7 @@ EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);
* @dest: The affinity mask to set
* @force: Flag to enforce setting (disable online checks)
*
- * Conditinal, as the underlying parent chip might not implement it.
+ * Conditional, as the underlying parent chip might not implement it.
*/
int irq_chip_set_affinity_parent(struct irq_data *data,
const struct cpumask *dest, bool force)
@@ -1513,7 +1528,8 @@ int irq_chip_request_resources_parent(struct irq_data *data)
if (data->chip->irq_request_resources)
return data->chip->irq_request_resources(data);
- return -ENOSYS;
+ /* no error on missing optional irq_chip::irq_request_resources */
+ return 0;
}
EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);
@@ -1531,7 +1547,7 @@ EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
#endif
/**
- * irq_chip_compose_msi_msg - Componse msi message for a irq chip
+ * irq_chip_compose_msi_msg - Compose msi message for a irq chip
* @data: Pointer to interrupt specific data
* @msg: Pointer to the MSI message
*
@@ -1555,6 +1571,14 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return 0;
}
+static struct device *irq_get_pm_device(struct irq_data *data)
+{
+ if (data->domain)
+ return data->domain->pm_dev;
+
+ return NULL;
+}
+
/**
* irq_chip_pm_get - Enable power for an IRQ chip
* @data: Pointer to interrupt specific data
@@ -1564,17 +1588,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
*/
int irq_chip_pm_get(struct irq_data *data)
{
- int retval;
+ struct device *dev = irq_get_pm_device(data);
+ int retval = 0;
- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
- retval = pm_runtime_get_sync(data->chip->parent_device);
- if (retval < 0) {
- pm_runtime_put_noidle(data->chip->parent_device);
- return retval;
- }
- }
+ if (IS_ENABLED(CONFIG_PM) && dev)
+ retval = pm_runtime_resume_and_get(dev);
- return 0;
+ return retval;
}
/**
@@ -1587,10 +1607,11 @@ int irq_chip_pm_get(struct irq_data *data)
*/
int irq_chip_pm_put(struct irq_data *data)
{
+ struct device *dev = irq_get_pm_device(data);
int retval = 0;
- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
- retval = pm_runtime_put(data->chip->parent_device);
+ if (IS_ENABLED(CONFIG_PM) && dev)
+ retval = pm_runtime_put(dev);
return (retval < 0) ? retval : 0;
}
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 02236b13b359..15a7654eff68 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -37,7 +37,7 @@ static inline bool irq_needs_fixup(struct irq_data *d)
* has been removed from the online mask already.
*/
if (cpumask_any_but(m, cpu) < nr_cpu_ids &&
- cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) {
+ !cpumask_intersects(m, cpu_online_mask)) {
/*
* If this happens then there was a missed IRQ fixup at some
* point. Warn about it and enforce fixup.
@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
+ * Complete an eventually pending irq move cleanup. If this
+ * interrupt was moved in hard irq context, then the vectors need
+ * to be cleaned up. It can't wait until this interrupt actually
+ * happens and this CPU was involved.
+ */
+ irq_force_complete_move(desc);
+
+ /*
* No move required, if:
* - Interrupt is per cpu
* - Interrupt is not started
@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
- * Complete an eventually pending irq move cleanup. If this
- * interrupt was moved in hard irq context, then the vectors need
- * to be cleaned up. It can't wait until this interrupt actually
- * happens and this CPU was involved.
- */
- irq_force_complete_move(desc);
-
- /*
* If there is a setaffinity pending, then try to reuse the pending
* mask, so the last change of the affinity does not get lost. If
* there is no move pending or the pending mask does not contain
@@ -110,7 +110,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
if (maskchip && chip->irq_mask)
chip->irq_mask(d);
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ if (!cpumask_intersects(affinity, cpu_online_mask)) {
/*
* If the interrupt is managed, then shut it down and leave
* the affinity untouched.
@@ -130,6 +130,22 @@ static bool migrate_one_irq(struct irq_desc *desc)
* CPU.
*/
err = irq_do_set_affinity(d, affinity, false);
+
+ /*
+ * If there are online CPUs in the affinity mask, but they have no
+ * vectors left to make the migration work, try to break the
+ * affinity by migrating to any online CPU.
+ */
+ if (err == -ENOSPC && !irqd_affinity_is_managed(d) && affinity != cpu_online_mask) {
+ pr_debug("IRQ%u: set affinity failed for %*pbl, re-try with online CPUs\n",
+ d->irq, cpumask_pr_args(affinity));
+
+ affinity = cpu_online_mask;
+ brokeaff = true;
+
+ err = irq_do_set_affinity(d, affinity, false);
+ }
+
if (err) {
pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
d->irq, err);
@@ -166,7 +182,7 @@ void irq_migrate_all_off_this_cpu(void)
raw_spin_unlock(&desc->lock);
if (affinity_broken) {
- pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n",
+ pr_debug_ratelimited("IRQ %u: no longer affine to CPU%u\n",
irq, smp_processor_id());
}
}
@@ -176,10 +192,10 @@ static bool hk_should_isolate(struct irq_data *data, unsigned int cpu)
{
const struct cpumask *hk_mask;
- if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
+ if (!housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
return false;
- hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+ hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
return false;
@@ -195,10 +211,15 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
!irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
return;
- if (irqd_is_managed_and_shutdown(data)) {
- irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+ /*
+ * Don't restore suspended interrupts here when a system comes back
+ * from S3. They are reenabled via resume_device_irqs().
+ */
+ if (desc->istate & IRQS_SUSPENDED)
return;
- }
+
+ if (irqd_is_managed_and_shutdown(data))
+ irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
/*
* If the interrupt can only be directed to a single target
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index e4cff358b437..ca142b9a4db3 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -9,14 +9,8 @@
static struct dentry *irq_dir;
-struct irq_bit_descr {
- unsigned int mask;
- char *name;
-};
-#define BIT_MASK_DESCR(m) { .mask = m, .name = #m }
-
-static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
- const struct irq_bit_descr *sd, int size)
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+ const struct irq_bit_descr *sd, int size)
{
int i;
@@ -30,7 +24,7 @@ static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
{
struct irq_data *data = irq_desc_get_irq_data(desc);
- struct cpumask *msk;
+ const struct cpumask *msk;
msk = irq_data_get_affinity_mask(data);
seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
@@ -58,6 +52,8 @@ static const struct irq_bit_descr irqchip_flags[] = {
BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
BIT_MASK_DESCR(IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND),
+ BIT_MASK_DESCR(IRQCHIP_IMMUTABLE),
+ BIT_MASK_DESCR(IRQCHIP_MOVE_DEFERRED),
};
static void
@@ -69,8 +65,12 @@ irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
seq_printf(m, "chip: None\n");
return;
}
- seq_printf(m, "%*schip: %s\n", ind, "", chip->name);
- seq_printf(m, "%*sflags: 0x%lx\n", ind + 1, "", chip->flags);
+ seq_printf(m, "%*schip: ", ind, "");
+ if (chip->irq_print_chip)
+ chip->irq_print_chip(data, m);
+ else
+ seq_printf(m, "%s", chip->name);
+ seq_printf(m, "\n%*sflags: 0x%lx\n", ind + 1, "", chip->flags);
irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
ARRAY_SIZE(irqchip_flags));
}
@@ -109,14 +109,12 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_NO_BALANCING),
BIT_MASK_DESCR(IRQD_SINGLE_TARGET),
- BIT_MASK_DESCR(IRQD_MOVE_PCNTXT),
BIT_MASK_DESCR(IRQD_AFFINITY_SET),
BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
BIT_MASK_DESCR(IRQD_AFFINITY_ON_ACTIVATE),
BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
BIT_MASK_DESCR(IRQD_CAN_RESERVE),
- BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),
BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
@@ -128,6 +126,8 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),
BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND),
+
+ BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS),
};
static const struct irq_bit_descr irqdesc_states[] = {
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index f6e5515ee077..eb16a58e0322 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/device.h>
#include <linux/gfp.h>
#include <linux/irq.h>
@@ -140,9 +141,8 @@ void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
{
struct irq_devres match_data = { irq, dev_id };
- WARN_ON(devres_destroy(dev, devm_irq_release, devm_irq_match,
+ WARN_ON(devres_release(dev, devm_irq_release, devm_irq_match,
&match_data));
- free_irq(irq, dev_id);
}
EXPORT_SYMBOL(devm_free_irq);
@@ -282,3 +282,43 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
}
EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip);
#endif /* CONFIG_GENERIC_IRQ_CHIP */
+
+#ifdef CONFIG_IRQ_DOMAIN
+static void devm_irq_domain_remove(struct device *dev, void *res)
+{
+ struct irq_domain **domain = res;
+
+ irq_domain_remove(*domain);
+}
+
+/**
+ * devm_irq_domain_instantiate() - Instantiate a new irq domain data for a
+ * managed device.
+ * @dev: Device to instantiate the domain for
+ * @info: Domain information pointer pointing to the information for this
+ * domain
+ *
+ * Return: A pointer to the instantiated irq domain or an ERR_PTR value.
+ */
+struct irq_domain *devm_irq_domain_instantiate(struct device *dev,
+ const struct irq_domain_info *info)
+{
+ struct irq_domain *domain;
+ struct irq_domain **dr;
+
+ dr = devres_alloc(devm_irq_domain_remove, sizeof(*dr), GFP_KERNEL);
+ if (!dr)
+ return ERR_PTR(-ENOMEM);
+
+ domain = irq_domain_instantiate(info);
+ if (!IS_ERR(domain)) {
+ *dr = domain;
+ devres_add(dev, dr);
+ } else {
+ devres_free(dr);
+ }
+
+ return domain;
+}
+EXPORT_SYMBOL_GPL(devm_irq_domain_instantiate);
+#endif /* CONFIG_IRQ_DOMAIN */
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 0b0cdf206dc4..7fe6cffe7d0d 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -13,7 +13,7 @@
/*
* What should we do if we get a hw irq event on an illegal vector?
- * Each architecture has to answer this themself.
+ * Each architecture has to answer this themselves.
*/
static void ack_bad(struct irq_data *data)
{
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index a23ac2bbf433..c4a8bca5f2b0 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -25,6 +25,7 @@ static DEFINE_RAW_SPINLOCK(gc_lock);
void irq_gc_noop(struct irq_data *d)
{
}
+EXPORT_SYMBOL_GPL(irq_gc_noop);
/**
* irq_gc_mask_disable_reg - Mask chip via disable register
@@ -44,6 +45,7 @@ void irq_gc_mask_disable_reg(struct irq_data *d)
*ct->mask_cache &= ~mask;
irq_gc_unlock(gc);
}
+EXPORT_SYMBOL_GPL(irq_gc_mask_disable_reg);
/**
* irq_gc_mask_set_bit - Mask chip via setting bit in mask register
@@ -103,6 +105,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d)
*ct->mask_cache |= mask;
irq_gc_unlock(gc);
}
+EXPORT_SYMBOL_GPL(irq_gc_unmask_enable_reg);
/**
* irq_gc_ack_set_bit - Ack pending interrupt via setting bit
@@ -159,6 +162,7 @@ void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
irq_reg_writel(gc, mask, ct->regs.ack);
irq_gc_unlock(gc);
}
+EXPORT_SYMBOL_GPL(irq_gc_mask_disable_and_ack_set);
/**
* irq_gc_eoi - EOI interrupt
@@ -200,6 +204,7 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on)
irq_gc_unlock(gc);
return 0;
}
+EXPORT_SYMBOL_GPL(irq_gc_set_wake);
static u32 irq_readl_be(void __iomem *addr)
{
@@ -215,11 +220,15 @@ void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
int num_ct, unsigned int irq_base,
void __iomem *reg_base, irq_flow_handler_t handler)
{
+ struct irq_chip_type *ct = gc->chip_types;
+ int i;
+
raw_spin_lock_init(&gc->lock);
gc->num_ct = num_ct;
gc->irq_base = irq_base;
gc->reg_base = reg_base;
- gc->chip_types->chip.name = name;
+ for (i = 0; i < num_ct; i++)
+ ct[i].chip.name = name;
gc->chip_types->handler = handler;
}
@@ -239,9 +248,8 @@ irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
void __iomem *reg_base, irq_flow_handler_t handler)
{
struct irq_chip_generic *gc;
- unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
- gc = kzalloc(sz, GFP_KERNEL);
+ gc = kzalloc(struct_size(gc, chip_types, num_ct), GFP_KERNEL);
if (gc) {
irq_init_generic_chip(gc, name, num_ct, irq_base, reg_base,
handler);
@@ -269,70 +277,139 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags)
}
/**
- * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain
- * @d: irq domain for which to allocate chips
- * @irqs_per_chip: Number of interrupts each chip handles (max 32)
- * @num_ct: Number of irq_chip_type instances associated with this
- * @name: Name of the irq chip
- * @handler: Default flow handler associated with these chips
- * @clr: IRQ_* bits to clear in the mapping function
- * @set: IRQ_* bits to set in the mapping function
- * @gcflags: Generic chip specific setup flags
+ * irq_domain_alloc_generic_chips - Allocate generic chips for an irq domain
+ * @d: irq domain for which to allocate chips
+ * @info: Generic chip information
+ *
+ * Return: 0 on success, negative error code on failure
*/
-int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
- int num_ct, const char *name,
- irq_flow_handler_t handler,
- unsigned int clr, unsigned int set,
- enum irq_gc_flags gcflags)
+int irq_domain_alloc_generic_chips(struct irq_domain *d,
+ const struct irq_domain_chip_generic_info *info)
{
struct irq_domain_chip_generic *dgc;
struct irq_chip_generic *gc;
- int numchips, sz, i;
unsigned long flags;
+ int numchips, i;
+ size_t dgc_sz;
+ size_t gc_sz;
+ size_t sz;
void *tmp;
+ int ret;
if (d->gc)
return -EBUSY;
- numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip);
+ numchips = DIV_ROUND_UP(d->revmap_size, info->irqs_per_chip);
if (!numchips)
return -EINVAL;
/* Allocate a pointer, generic chip and chiptypes for each chip */
- sz = sizeof(*dgc) + numchips * sizeof(gc);
- sz += numchips * (sizeof(*gc) + num_ct * sizeof(struct irq_chip_type));
+ gc_sz = struct_size(gc, chip_types, info->num_ct);
+ dgc_sz = struct_size(dgc, gc, numchips);
+ sz = dgc_sz + numchips * gc_sz;
tmp = dgc = kzalloc(sz, GFP_KERNEL);
if (!dgc)
return -ENOMEM;
- dgc->irqs_per_chip = irqs_per_chip;
+ dgc->irqs_per_chip = info->irqs_per_chip;
dgc->num_chips = numchips;
- dgc->irq_flags_to_set = set;
- dgc->irq_flags_to_clear = clr;
- dgc->gc_flags = gcflags;
+ dgc->irq_flags_to_set = info->irq_flags_to_set;
+ dgc->irq_flags_to_clear = info->irq_flags_to_clear;
+ dgc->gc_flags = info->gc_flags;
+ dgc->exit = info->exit;
d->gc = dgc;
/* Calc pointer to the first generic chip */
- tmp += sizeof(*dgc) + numchips * sizeof(gc);
+ tmp += dgc_sz;
for (i = 0; i < numchips; i++) {
/* Store the pointer to the generic chip */
dgc->gc[i] = gc = tmp;
- irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip,
- NULL, handler);
+ irq_init_generic_chip(gc, info->name, info->num_ct,
+ i * dgc->irqs_per_chip, NULL,
+ info->handler);
gc->domain = d;
- if (gcflags & IRQ_GC_BE_IO) {
+ if (dgc->gc_flags & IRQ_GC_BE_IO) {
gc->reg_readl = &irq_readl_be;
gc->reg_writel = &irq_writel_be;
}
+ if (info->init) {
+ ret = info->init(gc);
+ if (ret)
+ goto err;
+ }
+
raw_spin_lock_irqsave(&gc_lock, flags);
list_add_tail(&gc->list, &gc_list);
raw_spin_unlock_irqrestore(&gc_lock, flags);
/* Calc pointer to the next generic chip */
- tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+ tmp += gc_sz;
}
return 0;
+
+err:
+ while (i--) {
+ if (dgc->exit)
+ dgc->exit(dgc->gc[i]);
+ irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0);
+ }
+ d->gc = NULL;
+ kfree(dgc);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips);
+
+/**
+ * irq_domain_remove_generic_chips - Remove generic chips from an irq domain
+ * @d: irq domain for which generic chips are to be removed
+ */
+void irq_domain_remove_generic_chips(struct irq_domain *d)
+{
+ struct irq_domain_chip_generic *dgc = d->gc;
+ unsigned int i;
+
+ if (!dgc)
+ return;
+
+ for (i = 0; i < dgc->num_chips; i++) {
+ if (dgc->exit)
+ dgc->exit(dgc->gc[i]);
+ irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0);
+ }
+ d->gc = NULL;
+ kfree(dgc);
+}
+EXPORT_SYMBOL_GPL(irq_domain_remove_generic_chips);
+
+/**
+ * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain
+ * @d: irq domain for which to allocate chips
+ * @irqs_per_chip: Number of interrupts each chip handles (max 32)
+ * @num_ct: Number of irq_chip_type instances associated with this
+ * @name: Name of the irq chip
+ * @handler: Default flow handler associated with these chips
+ * @clr: IRQ_* bits to clear in the mapping function
+ * @set: IRQ_* bits to set in the mapping function
+ * @gcflags: Generic chip specific setup flags
+ */
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+ int num_ct, const char *name,
+ irq_flow_handler_t handler,
+ unsigned int clr, unsigned int set,
+ enum irq_gc_flags gcflags)
+{
+ struct irq_domain_chip_generic_info info = {
+ .irqs_per_chip = irqs_per_chip,
+ .num_ct = num_ct,
+ .name = name,
+ .handler = handler,
+ .irq_flags_to_clear = clr,
+ .irq_flags_to_set = set,
+ .gc_flags = gcflags,
+ };
+
+ return irq_domain_alloc_generic_chips(d, &info);
}
EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
@@ -424,7 +501,7 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
return 0;
}
-static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
{
struct irq_data *data = irq_domain_get_irq_data(d, virq);
struct irq_domain_chip_generic *dgc = d->gc;
@@ -444,7 +521,7 @@ static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
}
-struct irq_domain_ops irq_generic_chip_ops = {
+const struct irq_domain_ops irq_generic_chip_ops = {
.map = irq_map_generic_chip,
.unmap = irq_unmap_generic_chip,
.xlate = irq_domain_xlate_onetwocell,
@@ -537,21 +614,34 @@ EXPORT_SYMBOL_GPL(irq_setup_alt_chip);
void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
unsigned int clr, unsigned int set)
{
- unsigned int i = gc->irq_base;
+ unsigned int i, virq;
raw_spin_lock(&gc_lock);
list_del(&gc->list);
raw_spin_unlock(&gc_lock);
- for (; msk; msk >>= 1, i++) {
+ for (i = 0; msk; msk >>= 1, i++) {
if (!(msk & 0x01))
continue;
+ /*
+ * Interrupt domain based chips store the base hardware
+ * interrupt number in gc::irq_base. Otherwise gc::irq_base
+ * contains the base Linux interrupt number.
+ */
+ if (gc->domain) {
+ virq = irq_find_mapping(gc->domain, gc->irq_base + i);
+ if (!virq)
+ continue;
+ } else {
+ virq = gc->irq_base + i;
+ }
+
/* Remove handler first. That will mask the irq line */
- irq_set_handler(i, NULL);
- irq_set_chip(i, &no_irq_chip);
- irq_set_chip_data(i, NULL);
- irq_modify_status(i, clr, set);
+ irq_set_handler(virq, NULL);
+ irq_set_chip(virq, &no_irq_chip);
+ irq_set_chip_data(virq, NULL);
+ irq_modify_status(virq, clr, set);
}
}
EXPORT_SYMBOL_GPL(irq_remove_generic_chip);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 762a928e18f9..9489f93b3db3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -14,6 +14,8 @@
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <asm/irq_regs.h>
+
#include <trace/events/irq.h>
#include "internals.h"
@@ -134,7 +136,7 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
wake_up_process(action->thread);
}
-irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
+irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval = IRQ_NONE;
unsigned int irq = desc->irq_data.irq;
@@ -172,10 +174,6 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
}
__irq_wake_thread(desc, action);
-
- fallthrough; /* to add to randomness */
- case IRQ_HANDLED:
- *flags |= action->flags;
break;
default:
@@ -191,13 +189,12 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
{
irqreturn_t retval;
- unsigned int flags = 0;
- retval = __handle_irq_event_percpu(desc, &flags);
+ retval = __handle_irq_event_percpu(desc);
- add_interrupt_randomness(desc->irq_data.irq, flags);
+ add_interrupt_randomness(desc->irq_data.irq);
- if (!noirqdebug)
+ if (!irq_settings_no_debug(desc))
note_interrupt(desc, retval);
return retval;
}
@@ -226,4 +223,20 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
handle_arch_irq = handle_irq;
return 0;
}
+
+/**
+ * generic_handle_arch_irq - root irq handler for architectures which do no
+ * entry accounting themselves
+ * @regs: Register file coming from the low-level handling code
+ */
+asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs;
+
+ irq_enter();
+ old_regs = set_irq_regs(regs);
+ handle_arch_irq(regs);
+ set_irq_regs(old_regs);
+ irq_exit();
+}
#endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 54363527feea..a979523640d0 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -12,9 +12,9 @@
#include <linux/sched/clock.h>
#ifdef CONFIG_SPARSE_IRQ
-# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
+# define MAX_SPARSE_IRQS INT_MAX
#else
-# define IRQ_BITMAP_BITS NR_IRQS
+# define MAX_SPARSE_IRQS NR_IRQS
#endif
#define istate core_internal_state__do_not_mess_with_it
@@ -29,12 +29,14 @@ extern struct irqaction chained_action;
* IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
* IRQTF_AFFINITY - irq thread is requested to adjust affinity
* IRQTF_FORCED_THREAD - irq action is force threaded
+ * IRQTF_READY - signals that irq thread is ready
*/
enum {
IRQTF_RUNTHREAD,
IRQTF_WARNED,
IRQTF_AFFINITY,
IRQTF_FORCED_THREAD,
+ IRQTF_READY,
};
/*
@@ -45,11 +47,15 @@ enum {
* detection
* IRQS_POLL_INPROGRESS - polling in progress
* IRQS_ONESHOT - irq is not unmasked in primary handler
- * IRQS_REPLAY - irq is replayed
+ * IRQS_REPLAY - irq has been resent and will not be resent
+ * again until the handler has run and cleared
+ * this flag.
* IRQS_WAITING - irq is waiting
- * IRQS_PENDING - irq is pending and replayed later
+ * IRQS_PENDING - irq needs to be resent and should be resent
+ * at the next available opportunity.
* IRQS_SUSPENDED - irq is suspended
* IRQS_NMI - irq line is used to deliver NMIs
+ * IRQS_SYSFS - descriptor has been added to sysfs
*/
enum {
IRQS_AUTODETECT = 0x00000001,
@@ -62,6 +68,7 @@ enum {
IRQS_SUSPENDED = 0x00000800,
IRQS_TIMINGS = 0x00001000,
IRQS_NMI = 0x00002000,
+ IRQS_SYSFS = 0x00004000,
};
#include "debug.h"
@@ -91,6 +98,8 @@ extern void mask_irq(struct irq_desc *desc);
extern void unmask_irq(struct irq_desc *desc);
extern void unmask_threaded_irq(struct irq_desc *desc);
+extern unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask);
+
#ifdef CONFIG_SPARSE_IRQ
static inline void irq_mark_irq(unsigned int irq) { }
#else
@@ -101,17 +110,19 @@ extern int __irq_get_irqchip_state(struct irq_data *data,
enum irqchip_irq_state which,
bool *state);
-extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
-
-irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
+irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
irqreturn_t handle_irq_event(struct irq_desc *desc);
/* Resending of interrupts :*/
int check_irq_resend(struct irq_desc *desc, bool inject);
+void clear_irq_resend(struct irq_desc *desc);
+void irq_resend_init(struct irq_desc *desc);
bool irq_wait_for_poll(struct irq_desc *desc);
void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
+void wake_threads_waitq(struct irq_desc *desc);
+
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -249,7 +260,7 @@ static inline void irq_state_set_masked(struct irq_desc *desc)
static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
{
- __this_cpu_inc(*desc->kstat_irqs);
+ __this_cpu_inc(desc->kstat_irqs->cnt);
__this_cpu_inc(kstat.irqs_sum);
}
@@ -269,6 +280,11 @@ static inline int irq_desc_is_chained(struct irq_desc *desc)
return (desc->action && desc->action == &chained_action);
}
+static inline bool irq_is_nmi(struct irq_desc *desc)
+{
+ return desc->istate & IRQS_NMI;
+}
+
#ifdef CONFIG_PM_SLEEP
bool irq_pm_check_wakeup(struct irq_desc *desc);
void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
@@ -405,7 +421,7 @@ irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
#ifdef CONFIG_GENERIC_PENDING_IRQ
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
- return irqd_can_move_in_process_context(data);
+ return !(data->chip->flags & IRQCHIP_MOVE_DEFERRED);
}
static inline bool irq_move_pending(struct irq_data *data)
{
@@ -425,10 +441,6 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
{
return desc->pending_mask;
}
-static inline bool handle_enforce_irqctx(struct irq_data *data)
-{
- return irqd_is_handle_enforce_irqctx(data);
-}
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline bool irq_can_move_pcntxt(struct irq_data *data)
@@ -455,10 +467,6 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
{
return false;
}
-static inline bool handle_enforce_irqctx(struct irq_data *data)
-{
- return false;
-}
#endif /* !CONFIG_GENERIC_PENDING_IRQ */
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
@@ -485,6 +493,16 @@ static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd)
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
#include <linux/debugfs.h>
+struct irq_bit_descr {
+ unsigned int mask;
+ char *name;
+};
+
+#define BIT_MASK_DESCR(m) { .mask = m, .name = #m }
+
+void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+ const struct irq_bit_descr *sd, int size);
+
void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
{
diff --git a/kernel/irq/ipi-mux.c b/kernel/irq/ipi-mux.c
new file mode 100644
index 000000000000..fa4fc18c6131
--- /dev/null
+++ b/kernel/irq/ipi-mux.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Multiplex several virtual IPIs over a single HW IPI.
+ *
+ * Copyright The Asahi Linux Contributors
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "ipi-mux: " fmt
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+
+struct ipi_mux_cpu {
+ atomic_t enable;
+ atomic_t bits;
+};
+
+static struct ipi_mux_cpu __percpu *ipi_mux_pcpu;
+static struct irq_domain *ipi_mux_domain;
+static void (*ipi_mux_send)(unsigned int cpu);
+
+static void ipi_mux_mask(struct irq_data *d)
+{
+ struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
+
+ atomic_andnot(BIT(irqd_to_hwirq(d)), &icpu->enable);
+}
+
+static void ipi_mux_unmask(struct irq_data *d)
+{
+ struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
+ u32 ibit = BIT(irqd_to_hwirq(d));
+
+ atomic_or(ibit, &icpu->enable);
+
+ /*
+ * The atomic_or() above must complete before the atomic_read()
+ * below to avoid racing ipi_mux_send_mask().
+ */
+ smp_mb__after_atomic();
+
+ /* If a pending IPI was unmasked, raise a parent IPI immediately. */
+ if (atomic_read(&icpu->bits) & ibit)
+ ipi_mux_send(smp_processor_id());
+}
+
+static void ipi_mux_send_mask(struct irq_data *d, const struct cpumask *mask)
+{
+ struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
+ u32 ibit = BIT(irqd_to_hwirq(d));
+ unsigned long pending;
+ int cpu;
+
+ for_each_cpu(cpu, mask) {
+ icpu = per_cpu_ptr(ipi_mux_pcpu, cpu);
+
+ /*
+ * This sequence is the mirror of the one in ipi_mux_unmask();
+ * see the comment there. Additionally, release semantics
+ * ensure that the vIPI flag set is ordered after any shared
+ * memory accesses that precede it. This therefore also pairs
+ * with the atomic_fetch_andnot in ipi_mux_process().
+ */
+ pending = atomic_fetch_or_release(ibit, &icpu->bits);
+
+ /*
+ * The atomic_fetch_or_release() above must complete
+ * before the atomic_read() below to avoid racing with
+ * ipi_mux_unmask().
+ */
+ smp_mb__after_atomic();
+
+ /*
+ * The flag writes must complete before the physical IPI is
+ * issued to another CPU. This is implied by the control
+ * dependency on the result of atomic_read() below, which is
+ * itself already ordered after the vIPI flag write.
+ */
+ if (!(pending & ibit) && (atomic_read(&icpu->enable) & ibit))
+ ipi_mux_send(cpu);
+ }
+}
+
+static const struct irq_chip ipi_mux_chip = {
+ .name = "IPI Mux",
+ .irq_mask = ipi_mux_mask,
+ .irq_unmask = ipi_mux_unmask,
+ .ipi_send_mask = ipi_mux_send_mask,
+};
+
+static int ipi_mux_domain_alloc(struct irq_domain *d, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ int i;
+
+ for (i = 0; i < nr_irqs; i++) {
+ irq_set_percpu_devid(virq + i);
+ irq_domain_set_info(d, virq + i, i, &ipi_mux_chip, NULL,
+ handle_percpu_devid_irq, NULL, NULL);
+ }
+
+ return 0;
+}
+
+static const struct irq_domain_ops ipi_mux_domain_ops = {
+ .alloc = ipi_mux_domain_alloc,
+ .free = irq_domain_free_irqs_top,
+};
+
+/**
+ * ipi_mux_process - Process multiplexed virtual IPIs
+ */
+void ipi_mux_process(void)
+{
+ struct ipi_mux_cpu *icpu = this_cpu_ptr(ipi_mux_pcpu);
+ irq_hw_number_t hwirq;
+ unsigned long ipis;
+ unsigned int en;
+
+ /*
+ * Reading enable mask does not need to be ordered as long as
+ * this function is called from interrupt handler because only
+ * the CPU itself can change it's own enable mask.
+ */
+ en = atomic_read(&icpu->enable);
+
+ /*
+ * Clear the IPIs we are about to handle. This pairs with the
+ * atomic_fetch_or_release() in ipi_mux_send_mask().
+ */
+ ipis = atomic_fetch_andnot(en, &icpu->bits) & en;
+
+ for_each_set_bit(hwirq, &ipis, BITS_PER_TYPE(int))
+ generic_handle_domain_irq(ipi_mux_domain, hwirq);
+}
+
+/**
+ * ipi_mux_create - Create virtual IPIs multiplexed on top of a single
+ * parent IPI.
+ * @nr_ipi: number of virtual IPIs to create. This should
+ * be <= BITS_PER_TYPE(int)
+ * @mux_send: callback to trigger parent IPI for a particular CPU
+ *
+ * Returns first virq of the newly created virtual IPIs upon success
+ * or <=0 upon failure
+ */
+int ipi_mux_create(unsigned int nr_ipi, void (*mux_send)(unsigned int cpu))
+{
+ struct fwnode_handle *fwnode;
+ struct irq_domain *domain;
+ int rc;
+
+ if (ipi_mux_domain)
+ return -EEXIST;
+
+ if (BITS_PER_TYPE(int) < nr_ipi || !mux_send)
+ return -EINVAL;
+
+ ipi_mux_pcpu = alloc_percpu(typeof(*ipi_mux_pcpu));
+ if (!ipi_mux_pcpu)
+ return -ENOMEM;
+
+ fwnode = irq_domain_alloc_named_fwnode("IPI-Mux");
+ if (!fwnode) {
+ pr_err("unable to create IPI Mux fwnode\n");
+ rc = -ENOMEM;
+ goto fail_free_cpu;
+ }
+
+ domain = irq_domain_create_linear(fwnode, nr_ipi,
+ &ipi_mux_domain_ops, NULL);
+ if (!domain) {
+ pr_err("unable to add IPI Mux domain\n");
+ rc = -ENOMEM;
+ goto fail_free_fwnode;
+ }
+
+ domain->flags |= IRQ_DOMAIN_FLAG_IPI_SINGLE;
+ irq_domain_update_bus_token(domain, DOMAIN_BUS_IPI);
+
+ rc = irq_domain_alloc_irqs(domain, nr_ipi, NUMA_NO_NODE, NULL);
+ if (rc <= 0) {
+ pr_err("unable to alloc IRQs from IPI Mux domain\n");
+ goto fail_free_domain;
+ }
+
+ ipi_mux_domain = domain;
+ ipi_mux_send = mux_send;
+
+ return rc;
+
+fail_free_domain:
+ irq_domain_remove(domain);
+fail_free_fwnode:
+ irq_domain_free_fwnode(fwnode);
+fail_free_cpu:
+ free_percpu(ipi_mux_pcpu);
+ return rc;
+}
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 43e3d1be622c..961d4af76af3 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -14,11 +14,11 @@
/**
* irq_reserve_ipi() - Setup an IPI to destination cpumask
* @domain: IPI domain
- * @dest: cpumask of cpus which can receive the IPI
+ * @dest: cpumask of CPUs which can receive the IPI
*
* Allocate a virq that can be used to send IPI to any CPU in dest mask.
*
- * On success it'll return linux irq number and error code on failure
+ * Return: Linux IRQ number on success or error code on failure
*/
int irq_reserve_ipi(struct irq_domain *domain,
const struct cpumask *dest)
@@ -104,22 +104,22 @@ free_descs:
/**
* irq_destroy_ipi() - unreserve an IPI that was previously allocated
- * @irq: linux irq number to be destroyed
- * @dest: cpumask of cpus which should have the IPI removed
+ * @irq: Linux IRQ number to be destroyed
+ * @dest: cpumask of CPUs which should have the IPI removed
*
- * The IPIs allocated with irq_reserve_ipi() are retuerned to the system
+ * The IPIs allocated with irq_reserve_ipi() are returned to the system
* destroying all virqs associated with them.
*
- * Return 0 on success or error code on failure.
+ * Return: %0 on success or error code on failure.
*/
int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
{
struct irq_data *data = irq_get_irq_data(irq);
- struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+ const struct cpumask *ipimask;
struct irq_domain *domain;
unsigned int nr_irqs;
- if (!irq || !data || !ipimask)
+ if (!irq || !data)
return -EINVAL;
domain = data->domain;
@@ -131,7 +131,8 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
return -EINVAL;
}
- if (WARN_ON(!cpumask_subset(dest, ipimask)))
+ ipimask = irq_data_get_affinity_mask(data);
+ if (!ipimask || WARN_ON(!cpumask_subset(dest, ipimask)))
/*
* Must be destroying a subset of CPUs to which this IPI
* was set up to target
@@ -150,24 +151,25 @@ int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest)
}
/**
- * ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu
- * @irq: linux irq number
- * @cpu: the target cpu
+ * ipi_get_hwirq - Get the hwirq associated with an IPI to a CPU
+ * @irq: Linux IRQ number
+ * @cpu: the target CPU
*
* When dealing with coprocessors IPI, we need to inform the coprocessor of
* the hwirq it needs to use to receive and send IPIs.
*
- * Returns hwirq value on success and INVALID_HWIRQ on failure.
+ * Return: hwirq value on success or INVALID_HWIRQ on failure.
*/
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
{
struct irq_data *data = irq_get_irq_data(irq);
- struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
+ const struct cpumask *ipimask;
- if (!data || !ipimask || cpu >= nr_cpu_ids)
+ if (!data || cpu >= nr_cpu_ids)
return INVALID_HWIRQ;
- if (!cpumask_test_cpu(cpu, ipimask))
+ ipimask = irq_data_get_affinity_mask(data);
+ if (!ipimask || !cpumask_test_cpu(cpu, ipimask))
return INVALID_HWIRQ;
/*
@@ -186,9 +188,9 @@ EXPORT_SYMBOL_GPL(ipi_get_hwirq);
static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
const struct cpumask *dest, unsigned int cpu)
{
- struct cpumask *ipimask = irq_data_get_affinity_mask(data);
+ const struct cpumask *ipimask;
- if (!chip || !ipimask)
+ if (!chip || !data)
return -EINVAL;
if (!chip->ipi_send_single && !chip->ipi_send_mask)
@@ -197,6 +199,10 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
if (cpu >= nr_cpu_ids)
return -EINVAL;
+ ipimask = irq_data_get_affinity_mask(data);
+ if (!ipimask)
+ return -EINVAL;
+
if (dest) {
if (!cpumask_subset(dest, ipimask))
return -EINVAL;
@@ -216,7 +222,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
* This function is for architecture or core code to speed up IPI sending. Not
* usable from driver code.
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
{
@@ -250,7 +256,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
}
/**
- * ipi_send_mask - send an IPI to target Linux SMP CPU(s)
+ * __ipi_send_mask - send an IPI to target Linux SMP CPU(s)
* @desc: pointer to irq_desc of the IRQ
* @dest: dest CPU(s), must be a subset of the mask passed to
* irq_reserve_ipi()
@@ -258,7 +264,7 @@ int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
* This function is for architecture or core code to speed up IPI sending. Not
* usable from driver code.
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
{
@@ -298,11 +304,11 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
/**
* ipi_send_single - Send an IPI to a single CPU
- * @virq: linux irq number from irq_reserve_ipi()
+ * @virq: Linux IRQ number from irq_reserve_ipi()
* @cpu: destination CPU, must in the destination mask passed to
* irq_reserve_ipi()
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int ipi_send_single(unsigned int virq, unsigned int cpu)
{
@@ -319,11 +325,11 @@ EXPORT_SYMBOL_GPL(ipi_send_single);
/**
* ipi_send_mask - Send an IPI to target CPU(s)
- * @virq: linux irq number from irq_reserve_ipi()
+ * @virq: Linux IRQ number from irq_reserve_ipi()
* @dest: dest CPU(s), must be a subset of the mask passed to
* irq_reserve_ipi()
*
- * Returns zero on success and negative error number on failure.
+ * Return: %0 on success or negative error number on failure.
*/
int ipi_send_mask(unsigned int virq, const struct cpumask *dest)
{
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 48006608baf0..1a3d483548e2 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -4,30 +4,27 @@
* Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com>
*/
+#include <linux/cleanup.h>
+#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/irq_sim.h>
#include <linux/irq_work.h>
-#include <linux/interrupt.h>
#include <linux/slab.h>
struct irq_sim_work_ctx {
struct irq_work work;
- int irq_base;
unsigned int irq_count;
unsigned long *pending;
struct irq_domain *domain;
+ struct irq_sim_ops ops;
+ void *user_data;
};
struct irq_sim_irq_ctx {
- int irqnum;
bool enabled;
struct irq_sim_work_ctx *work_ctx;
};
-struct irq_sim_devres {
- struct irq_domain *domain;
-};
-
static void irq_sim_irqmask(struct irq_data *data)
{
struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
@@ -92,6 +89,31 @@ static int irq_sim_set_irqchip_state(struct irq_data *data,
return 0;
}
+static int irq_sim_request_resources(struct irq_data *data)
+{
+ struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+ struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx;
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+ if (work_ctx->ops.irq_sim_irq_requested)
+ return work_ctx->ops.irq_sim_irq_requested(work_ctx->domain,
+ hwirq,
+ work_ctx->user_data);
+
+ return 0;
+}
+
+static void irq_sim_release_resources(struct irq_data *data)
+{
+ struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+ struct irq_sim_work_ctx *work_ctx = irq_ctx->work_ctx;
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+ if (work_ctx->ops.irq_sim_irq_released)
+ work_ctx->ops.irq_sim_irq_released(work_ctx->domain, hwirq,
+ work_ctx->user_data);
+}
+
static struct irq_chip irq_sim_irqchip = {
.name = "irq_sim",
.irq_mask = irq_sim_irqmask,
@@ -99,6 +121,8 @@ static struct irq_chip irq_sim_irqchip = {
.irq_set_type = irq_sim_set_type,
.irq_get_irqchip_state = irq_sim_get_irqchip_state,
.irq_set_irqchip_state = irq_sim_set_irqchip_state,
+ .irq_request_resources = irq_sim_request_resources,
+ .irq_release_resources = irq_sim_release_resources,
};
static void irq_sim_handle_irq(struct irq_work *work)
@@ -159,7 +183,7 @@ static const struct irq_domain_ops irq_sim_domain_ops = {
* irq_domain_create_sim - Create a new interrupt simulator irq_domain and
* allocate a range of dummy interrupts.
*
- * @fnode: struct fwnode_handle to be associated with this domain.
+ * @fwnode: struct fwnode_handle to be associated with this domain.
* @num_irqs: Number of interrupts to allocate.
*
* On success: return a new irq_domain object.
@@ -168,35 +192,42 @@ static const struct irq_domain_ops irq_sim_domain_ops = {
struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
unsigned int num_irqs)
{
- struct irq_sim_work_ctx *work_ctx;
+ return irq_domain_create_sim_full(fwnode, num_irqs, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(irq_domain_create_sim);
+
+struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode,
+ unsigned int num_irqs,
+ const struct irq_sim_ops *ops,
+ void *data)
+{
+ struct irq_sim_work_ctx *work_ctx __free(kfree) =
+ kmalloc(sizeof(*work_ctx), GFP_KERNEL);
- work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL);
if (!work_ctx)
- goto err_out;
+ return ERR_PTR(-ENOMEM);
- work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
- if (!work_ctx->pending)
- goto err_free_work_ctx;
+ unsigned long *pending __free(bitmap) = bitmap_zalloc(num_irqs, GFP_KERNEL);
+ if (!pending)
+ return ERR_PTR(-ENOMEM);
work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs,
&irq_sim_domain_ops,
work_ctx);
if (!work_ctx->domain)
- goto err_free_bitmap;
+ return ERR_PTR(-ENOMEM);
work_ctx->irq_count = num_irqs;
- init_irq_work(&work_ctx->work, irq_sim_handle_irq);
+ work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq);
+ work_ctx->pending = no_free_ptr(pending);
+ work_ctx->user_data = data;
- return work_ctx->domain;
+ if (ops)
+ memcpy(&work_ctx->ops, ops, sizeof(*ops));
-err_free_bitmap:
- bitmap_free(work_ctx->pending);
-err_free_work_ctx:
- kfree(work_ctx);
-err_out:
- return ERR_PTR(-ENOMEM);
+ return no_free_ptr(work_ctx)->domain;
}
-EXPORT_SYMBOL_GPL(irq_domain_create_sim);
+EXPORT_SYMBOL_GPL(irq_domain_create_sim_full);
/**
* irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free
@@ -216,11 +247,11 @@ void irq_domain_remove_sim(struct irq_domain *domain)
}
EXPORT_SYMBOL_GPL(irq_domain_remove_sim);
-static void devm_irq_domain_release_sim(struct device *dev, void *res)
+static void devm_irq_domain_remove_sim(void *data)
{
- struct irq_sim_devres *this = res;
+ struct irq_domain *domain = data;
- irq_domain_remove_sim(this->domain);
+ irq_domain_remove_sim(domain);
}
/**
@@ -228,7 +259,7 @@ static void devm_irq_domain_release_sim(struct device *dev, void *res)
* a managed device.
*
* @dev: Device to initialize the simulator object for.
- * @fnode: struct fwnode_handle to be associated with this domain.
+ * @fwnode: struct fwnode_handle to be associated with this domain.
* @num_irqs: Number of interrupts to allocate
*
* On success: return a new irq_domain object.
@@ -238,20 +269,29 @@ struct irq_domain *devm_irq_domain_create_sim(struct device *dev,
struct fwnode_handle *fwnode,
unsigned int num_irqs)
{
- struct irq_sim_devres *dr;
+ return devm_irq_domain_create_sim_full(dev, fwnode, num_irqs,
+ NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim);
- dr = devres_alloc(devm_irq_domain_release_sim,
- sizeof(*dr), GFP_KERNEL);
- if (!dr)
- return ERR_PTR(-ENOMEM);
+struct irq_domain *
+devm_irq_domain_create_sim_full(struct device *dev,
+ struct fwnode_handle *fwnode,
+ unsigned int num_irqs,
+ const struct irq_sim_ops *ops,
+ void *data)
+{
+ struct irq_domain *domain;
+ int ret;
- dr->domain = irq_domain_create_sim(fwnode, num_irqs);
- if (IS_ERR(dr->domain)) {
- devres_free(dr);
- return dr->domain;
- }
+ domain = irq_domain_create_sim_full(fwnode, num_irqs, ops, data);
+ if (IS_ERR(domain))
+ return domain;
- devres_add(dev, dr);
- return dr->domain;
+ ret = devm_add_action_or_reset(dev, devm_irq_domain_remove_sim, domain);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return domain;
}
-EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim);
+EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim_full);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index cc1a09406c6e..287830739783 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -12,10 +12,10 @@
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
-#include <linux/radix-tree.h>
-#include <linux/bitmap.h>
+#include <linux/maple_tree.h>
#include <linux/irqdomain.h>
#include <linux/sysfs.h>
+#include <linux/string_choices.h>
#include "internals.h"
@@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str)
cpulist_parse(str, irq_default_affinity);
/*
* Set at least the boot cpu. We don't want to end up with
- * bugreports caused by random comandline masks
+ * bugreports caused by random commandline masks
*/
cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
return 1;
@@ -93,11 +93,23 @@ static void desc_smp_init(struct irq_desc *desc, int node,
#endif
}
+static void free_masks(struct irq_desc *desc)
+{
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+ free_cpumask_var(desc->pending_mask);
+#endif
+ free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
+}
+
#else
static inline int
alloc_masks(struct irq_desc *desc, int node) { return 0; }
static inline void
desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
+static inline void free_masks(struct irq_desc *desc) { }
#endif
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
@@ -123,15 +135,106 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->name = NULL;
desc->owner = owner;
for_each_possible_cpu(cpu)
- *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
+ *per_cpu_ptr(desc->kstat_irqs, cpu) = (struct irqstat) { };
desc_smp_init(desc, node, affinity);
}
-int nr_irqs = NR_IRQS;
-EXPORT_SYMBOL_GPL(nr_irqs);
+static unsigned int nr_irqs = NR_IRQS;
+
+/**
+ * irq_get_nr_irqs() - Number of interrupts supported by the system.
+ */
+unsigned int irq_get_nr_irqs(void)
+{
+ return nr_irqs;
+}
+EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
+
+/**
+ * irq_set_nr_irqs() - Set the number of interrupts supported by the system.
+ * @nr: New number of interrupts.
+ *
+ * Return: @nr.
+ */
+unsigned int irq_set_nr_irqs(unsigned int nr)
+{
+ nr_irqs = nr;
+
+ return nr;
+}
+EXPORT_SYMBOL_GPL(irq_set_nr_irqs);
static DEFINE_MUTEX(sparse_irq_lock);
-static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
+static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs,
+ MT_FLAGS_ALLOC_RANGE |
+ MT_FLAGS_LOCK_EXTERN |
+ MT_FLAGS_USE_RCU,
+ sparse_irq_lock);
+
+static int irq_find_free_area(unsigned int from, unsigned int cnt)
+{
+ MA_STATE(mas, &sparse_irqs, 0, 0);
+
+ if (mas_empty_area(&mas, from, MAX_SPARSE_IRQS, cnt))
+ return -ENOSPC;
+ return mas.index;
+}
+
+static unsigned int irq_find_at_or_after(unsigned int offset)
+{
+ unsigned long index = offset;
+ struct irq_desc *desc;
+
+ guard(rcu)();
+ desc = mt_find(&sparse_irqs, &index, nr_irqs);
+
+ return desc ? irq_desc_get_irq(desc) : nr_irqs;
+}
+
+static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
+{
+ MA_STATE(mas, &sparse_irqs, irq, irq);
+ WARN_ON(mas_store_gfp(&mas, desc, GFP_KERNEL) != 0);
+}
+
+static void delete_irq_desc(unsigned int irq)
+{
+ MA_STATE(mas, &sparse_irqs, irq, irq);
+ mas_erase(&mas);
+}
+
+#ifdef CONFIG_SPARSE_IRQ
+static const struct kobj_type irq_kobj_type;
+#endif
+
+static int init_desc(struct irq_desc *desc, int irq, int node,
+ unsigned int flags,
+ const struct cpumask *affinity,
+ struct module *owner)
+{
+ desc->kstat_irqs = alloc_percpu(struct irqstat);
+ if (!desc->kstat_irqs)
+ return -ENOMEM;
+
+ if (alloc_masks(desc, node)) {
+ free_percpu(desc->kstat_irqs);
+ return -ENOMEM;
+ }
+
+ raw_spin_lock_init(&desc->lock);
+ lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ mutex_init(&desc->request_mutex);
+ init_waitqueue_head(&desc->wait_for_threads);
+ desc_set_defaults(irq, desc, node, affinity, owner);
+ irqd_set(&desc->irq_data, flags);
+ irq_resend_init(desc);
+#ifdef CONFIG_SPARSE_IRQ
+ kobject_init(&desc->kobj, &irq_kobj_type);
+ init_rcu_head(&desc->rcu);
+#endif
+
+ return 0;
+}
#ifdef CONFIG_SPARSE_IRQ
@@ -188,7 +291,7 @@ static ssize_t hwirq_show(struct kobject *kobj,
raw_spin_lock_irq(&desc->lock);
if (desc->irq_data.domain)
- ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
+ ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq);
raw_spin_unlock_irq(&desc->lock);
return ret;
@@ -218,8 +321,7 @@ static ssize_t wakeup_show(struct kobject *kobj,
ssize_t ret = 0;
raw_spin_lock_irq(&desc->lock);
- ret = sprintf(buf, "%s\n",
- irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled");
+ ret = sprintf(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data)));
raw_spin_unlock_irq(&desc->lock);
return ret;
@@ -251,7 +353,7 @@ static ssize_t actions_show(struct kobject *kobj,
char *p = "";
raw_spin_lock_irq(&desc->lock);
- for (action = desc->action; action != NULL; action = action->next) {
+ for_each_action_of_desc(desc, action) {
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
p, action->name);
p = ",";
@@ -277,7 +379,7 @@ static struct attribute *irq_attrs[] = {
};
ATTRIBUTE_GROUPS(irq);
-static struct kobj_type irq_kobj_type = {
+static const struct kobj_type irq_kobj_type = {
.release = irq_kobj_release,
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = irq_groups,
@@ -288,22 +390,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
if (irq_kobj_base) {
/*
* Continue even in case of failure as this is nothing
- * crucial.
+ * crucial and failures in the late irq_sysfs_init()
+ * cannot be rolled back.
*/
if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
pr_warn("Failed to add kobject for irq %d\n", irq);
+ else
+ desc->istate |= IRQS_SYSFS;
}
}
static void irq_sysfs_del(struct irq_desc *desc)
{
/*
- * If irq_sysfs_init() has not yet been invoked (early boot), then
- * irq_kobj_base is NULL and the descriptor was never added.
- * kobject_del() complains about a object with no parent, so make
- * it conditional.
+ * Only invoke kobject_del() when kobject_add() was successfully
+ * invoked for the descriptor. This covers both early boot, where
+ * sysfs is not initialized yet, and the case of a failed
+ * kobject_add() invocation.
*/
- if (irq_kobj_base)
+ if (desc->istate & IRQS_SYSFS)
kobject_del(&desc->kobj);
}
@@ -332,7 +437,7 @@ postcore_initcall(irq_sysfs_init);
#else /* !CONFIG_SYSFS */
-static struct kobj_type irq_kobj_type = {
+static const struct kobj_type irq_kobj_type = {
.release = irq_kobj_release,
};
@@ -341,41 +446,14 @@ static void irq_sysfs_del(struct irq_desc *desc) {}
#endif /* CONFIG_SYSFS */
-static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
-
-static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
-{
- radix_tree_insert(&irq_desc_tree, irq, desc);
-}
-
struct irq_desc *irq_to_desc(unsigned int irq)
{
- return radix_tree_lookup(&irq_desc_tree, irq);
+ return mtree_load(&sparse_irqs, irq);
}
#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
EXPORT_SYMBOL_GPL(irq_to_desc);
#endif
-static void delete_irq_desc(unsigned int irq)
-{
- radix_tree_delete(&irq_desc_tree, irq);
-}
-
-#ifdef CONFIG_SMP
-static void free_masks(struct irq_desc *desc)
-{
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- free_cpumask_var(desc->pending_mask);
-#endif
- free_cpumask_var(desc->irq_common_data.affinity);
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
- free_cpumask_var(desc->irq_common_data.effective_affinity);
-#endif
-}
-#else
-static inline void free_masks(struct irq_desc *desc) { }
-#endif
-
void irq_lock_sparse(void)
{
mutex_lock(&sparse_irq_lock);
@@ -391,34 +469,19 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
struct module *owner)
{
struct irq_desc *desc;
+ int ret;
desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node);
if (!desc)
return NULL;
- /* allocate based on nr_cpu_ids */
- desc->kstat_irqs = alloc_percpu(unsigned int);
- if (!desc->kstat_irqs)
- goto err_desc;
- if (alloc_masks(desc, node))
- goto err_kstat;
-
- raw_spin_lock_init(&desc->lock);
- lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- mutex_init(&desc->request_mutex);
- init_rcu_head(&desc->rcu);
-
- desc_set_defaults(irq, desc, node, affinity, owner);
- irqd_set(&desc->irq_data, flags);
- kobject_init(&desc->kobj, &irq_kobj_type);
+ ret = init_desc(desc, irq, node, flags, affinity, owner);
+ if (unlikely(ret)) {
+ kfree(desc);
+ return NULL;
+ }
return desc;
-
-err_kstat:
- free_percpu(desc->kstat_irqs);
-err_desc:
- kfree(desc);
- return NULL;
}
static void irq_kobj_release(struct kobject *kobj)
@@ -489,6 +552,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
flags = IRQD_AFFINITY_MANAGED |
IRQD_MANAGED_SHUTDOWN;
}
+ flags |= IRQD_AFFINITY_SET;
mask = &affinity->mask;
node = cpu_to_node(cpumask_first(mask));
affinity++;
@@ -501,7 +565,6 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
irq_sysfs_add(start + i, desc);
irq_add_debugfs_entry(start + i, desc);
}
- bitmap_set(allocated_irqs, start, cnt);
return start;
err:
@@ -512,7 +575,7 @@ err:
static int irq_expand_nr_irqs(unsigned int nr)
{
- if (nr > IRQ_BITMAP_BITS)
+ if (nr > MAX_SPARSE_IRQS)
return -ENOMEM;
nr_irqs = nr;
return 0;
@@ -530,18 +593,17 @@ int __init early_irq_init(void)
printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
NR_IRQS, nr_irqs, initcnt);
- if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
- nr_irqs = IRQ_BITMAP_BITS;
+ if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
+ nr_irqs = MAX_SPARSE_IRQS;
- if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
- initcnt = IRQ_BITMAP_BITS;
+ if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
+ initcnt = MAX_SPARSE_IRQS;
if (initcnt > nr_irqs)
nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
desc = alloc_desc(i, node, 0, NULL, NULL);
- set_bit(i, allocated_irqs);
irq_insert_desc(i, desc);
}
return arch_early_irq_init();
@@ -560,24 +622,29 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
int __init early_irq_init(void)
{
int count, i, node = first_online_node;
- struct irq_desc *desc;
+ int ret;
init_irq_default_affinity();
printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
- desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
for (i = 0; i < count; i++) {
- desc[i].kstat_irqs = alloc_percpu(unsigned int);
- alloc_masks(&desc[i], node);
- raw_spin_lock_init(&desc[i].lock);
- lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- mutex_init(&desc[i].request_mutex);
- desc_set_defaults(i, &desc[i], node, NULL, NULL);
+ ret = init_desc(irq_desc + i, i, node, 0, NULL, NULL);
+ if (unlikely(ret))
+ goto __free_desc_res;
}
+
return arch_early_irq_init();
+
+__free_desc_res:
+ while (--i >= 0) {
+ free_masks(irq_desc + i);
+ free_percpu(irq_desc[i].kstat_irqs);
+ }
+
+ return ret;
}
struct irq_desc *irq_to_desc(unsigned int irq)
@@ -594,6 +661,7 @@ static void free_desc(unsigned int irq)
raw_spin_lock_irqsave(&desc->lock, flags);
desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL);
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ delete_irq_desc(irq);
}
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
@@ -606,8 +674,8 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
struct irq_desc *desc = irq_to_desc(start + i);
desc->owner = owner;
+ irq_insert_desc(start + i, desc);
}
- bitmap_set(allocated_irqs, start, cnt);
return start;
}
@@ -619,7 +687,7 @@ static int irq_expand_nr_irqs(unsigned int nr)
void irq_mark_irq(unsigned int irq)
{
mutex_lock(&sparse_irq_lock);
- bitmap_set(allocated_irqs, irq, 1);
+ irq_insert_desc(irq, irq_desc + irq);
mutex_unlock(&sparse_irq_lock);
}
@@ -632,106 +700,117 @@ void irq_init_desc(unsigned int irq)
#endif /* !CONFIG_SPARSE_IRQ */
-/**
- * generic_handle_irq - Invoke the handler for a particular irq
- * @irq: The irq number to handle
- *
- */
-int generic_handle_irq(unsigned int irq)
+int handle_irq_desc(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_to_desc(irq);
struct irq_data *data;
if (!desc)
return -EINVAL;
data = irq_desc_get_irq_data(desc);
- if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data)))
+ if (WARN_ON_ONCE(!in_hardirq() && irqd_is_handle_enforce_irqctx(data)))
return -EPERM;
generic_handle_irq_desc(desc);
return 0;
}
-EXPORT_SYMBOL_GPL(generic_handle_irq);
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
/**
- * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
- * @domain: The domain where to perform the lookup
- * @hwirq: The HW irq number to convert to a logical one
- * @lookup: Whether to perform the domain lookup or not
- * @regs: Register file coming from the low-level handling code
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq: The irq number to handle
*
* Returns: 0 on success, or -EINVAL if conversion has failed
- */
-int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
- bool lookup, struct pt_regs *regs)
+ *
+ * This function must be called from an IRQ context with irq regs
+ * initialized.
+ */
+int generic_handle_irq(unsigned int irq)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
- unsigned int irq = hwirq;
- int ret = 0;
-
- irq_enter();
-
-#ifdef CONFIG_IRQ_DOMAIN
- if (lookup)
- irq = irq_find_mapping(domain, hwirq);
-#endif
+ return handle_irq_desc(irq_to_desc(irq));
+}
+EXPORT_SYMBOL_GPL(generic_handle_irq);
- /*
- * Some hardware gives randomly wrong interrupts. Rather
- * than crashing, do something sensible.
- */
- if (unlikely(!irq || irq >= nr_irqs)) {
- ack_bad_irq(irq);
- ret = -EINVAL;
- } else {
- generic_handle_irq(irq);
- }
+/**
+ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
+ * context.
+ * @irq: The irq number to handle
+ *
+ * Returns: 0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process context). It
+ * will report an error if not invoked from IRQ context and the irq has been
+ * marked to enforce IRQ-context only.
+ */
+int generic_handle_irq_safe(unsigned int irq)
+{
+ unsigned long flags;
+ int ret;
- irq_exit();
- set_irq_regs(old_regs);
+ local_irq_save(flags);
+ ret = handle_irq_desc(irq_to_desc(irq));
+ local_irq_restore(flags);
return ret;
}
+EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
#ifdef CONFIG_IRQ_DOMAIN
/**
- * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
+ * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
+ * to a domain.
* @domain: The domain where to perform the lookup
* @hwirq: The HW irq number to convert to a logical one
- * @regs: Register file coming from the low-level handling code
- *
- * This function must be called from an NMI context.
*
* Returns: 0 on success, or -EINVAL if conversion has failed
+ *
+ * This function must be called from an IRQ context with irq regs
+ * initialized.
*/
-int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
- struct pt_regs *regs)
+int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
- unsigned int irq;
- int ret = 0;
-
- /*
- * NMI context needs to be setup earlier in order to deal with tracing.
- */
- WARN_ON(!in_nmi());
-
- irq = irq_find_mapping(domain, hwirq);
+ return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+}
+EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
- /*
- * ack_bad_irq is not NMI-safe, just report
- * an invalid interrupt.
- */
- if (likely(irq))
- generic_handle_irq(irq);
- else
- ret = -EINVAL;
+ /**
+ * generic_handle_irq_safe - Invoke the handler for a HW irq belonging
+ * to a domain from any context.
+ * @domain: The domain where to perform the lookup
+ * @hwirq: The HW irq number to convert to a logical one
+ *
+ * Returns: 0 on success, a negative value on error.
+ *
+ * This function can be called from any context (IRQ or process
+ * context). If the interrupt is marked as 'enforce IRQ-context only' then
+ * the function must be invoked from hard interrupt context.
+ */
+int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq)
+{
+ unsigned long flags;
+ int ret;
- set_irq_regs(old_regs);
+ local_irq_save(flags);
+ ret = handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+ local_irq_restore(flags);
return ret;
}
-#endif
+EXPORT_SYMBOL_GPL(generic_handle_domain_irq_safe);
+
+/**
+ * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging
+ * to a domain.
+ * @domain: The domain where to perform the lookup
+ * @hwirq: The HW irq number to convert to a logical one
+ *
+ * Returns: 0 on success, or -EINVAL if conversion has failed
+ *
+ * This function must be called from an NMI context with irq regs
+ * initialized.
+ **/
+int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq)
+{
+ WARN_ON_ONCE(!in_nmi());
+ return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+}
#endif
/* Dynamic interrupt handling */
@@ -752,7 +831,6 @@ void irq_free_descs(unsigned int from, unsigned int cnt)
for (i = 0; i < cnt; i++)
free_desc(from + i);
- bitmap_clear(allocated_irqs, from, cnt);
mutex_unlock(&sparse_irq_lock);
}
EXPORT_SYMBOL_GPL(irq_free_descs);
@@ -794,8 +872,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
mutex_lock(&sparse_irq_lock);
- start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
- from, cnt, 0);
+ start = irq_find_free_area(from, cnt);
ret = -EEXIST;
if (irq >=0 && start != irq)
goto unlock;
@@ -820,7 +897,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
*/
unsigned int irq_get_next_irq(unsigned int offset)
{
- return find_next_bit(allocated_irqs, nr_irqs, offset);
+ return irq_find_at_or_after(offset);
}
struct irq_desc *
@@ -860,10 +937,7 @@ int irq_set_percpu_devid_partition(unsigned int irq,
{
struct irq_desc *desc = irq_to_desc(irq);
- if (!desc)
- return -EINVAL;
-
- if (desc->percpu_enabled)
+ if (!desc || desc->percpu_enabled)
return -EINVAL;
desc->percpu_enabled = kzalloc(sizeof(*desc->percpu_enabled), GFP_KERNEL);
@@ -871,10 +945,7 @@ int irq_set_percpu_devid_partition(unsigned int irq,
if (!desc->percpu_enabled)
return -ENOMEM;
- if (affinity)
- desc->percpu_affinity = affinity;
- else
- desc->percpu_affinity = cpu_possible_mask;
+ desc->percpu_affinity = affinity ? : cpu_possible_mask;
irq_set_percpu_devid_flags(irq);
return 0;
@@ -917,33 +988,58 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
- return desc && desc->kstat_irqs ?
- *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
+ return desc && desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0;
}
-static bool irq_is_nmi(struct irq_desc *desc)
+unsigned int kstat_irqs_desc(struct irq_desc *desc, const struct cpumask *cpumask)
{
- return desc->istate & IRQS_NMI;
-}
-
-static unsigned int kstat_irqs(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned int sum = 0;
int cpu;
- if (!desc || !desc->kstat_irqs)
- return 0;
if (!irq_settings_is_per_cpu_devid(desc) &&
!irq_settings_is_per_cpu(desc) &&
!irq_is_nmi(desc))
return data_race(desc->tot_count);
- for_each_possible_cpu(cpu)
- sum += data_race(*per_cpu_ptr(desc->kstat_irqs, cpu));
+ for_each_cpu(cpu, cpumask)
+ sum += data_race(per_cpu(desc->kstat_irqs->cnt, cpu));
return sum;
}
+static unsigned int kstat_irqs(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (!desc || !desc->kstat_irqs)
+ return 0;
+ return kstat_irqs_desc(desc, cpu_possible_mask);
+}
+
+#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT
+
+void kstat_snapshot_irqs(void)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+
+ for_each_irq_desc(irq, desc) {
+ if (!desc->kstat_irqs)
+ continue;
+ this_cpu_write(desc->kstat_irqs->ref, this_cpu_read(desc->kstat_irqs->cnt));
+ }
+}
+
+unsigned int kstat_get_irq_since_snapshot(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (!desc || !desc->kstat_irqs)
+ return 0;
+ return this_cpu_read(desc->kstat_irqs->cnt) - this_cpu_read(desc->kstat_irqs->ref);
+}
+
+#endif
+
/**
* kstat_irqs_usr - Get the statistics for an interrupt from thread context
* @irq: The interrupt number
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 6aacd342cd14..ec6d8e72d980 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -25,7 +25,11 @@ static DEFINE_MUTEX(irq_domain_mutex);
static struct irq_domain *irq_default_domain;
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity);
static void irq_domain_check_hierarchy(struct irq_domain *domain);
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq);
struct irqchip_fwid {
struct fwnode_handle fwnode;
@@ -62,7 +66,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
* @name: Optional user provided domain name
* @pa: Optional user-provided physical address
*
- * Allocate a struct irqchip_fwid, and return a poiner to the embedded
+ * Allocate a struct irqchip_fwid, and return a pointer to the embedded
* fwnode_handle (or NULL on failure).
*
* Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
@@ -107,6 +111,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);
/**
* irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
+ * @fwnode: fwnode_handle to free
*
* Free a fwnode_handle allocated with irq_domain_alloc_fwnode.
*/
@@ -114,7 +119,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
{
struct irqchip_fwid *fwid;
- if (WARN_ON(!is_fwnode_irqchip(fwnode)))
+ if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode)))
return;
fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
@@ -123,108 +128,251 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static int alloc_name(struct irq_domain *domain, char *base, enum irq_domain_bus_token bus_token)
{
- struct irqchip_fwid *fwid;
- struct irq_domain *domain;
+ if (bus_token == DOMAIN_BUS_ANY)
+ domain->name = kasprintf(GFP_KERNEL, "%s", base);
+ else
+ domain->name = kasprintf(GFP_KERNEL, "%s-%d", base, bus_token);
+ if (!domain->name)
+ return -ENOMEM;
+
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ return 0;
+}
+
+static int alloc_fwnode_name(struct irq_domain *domain, const struct fwnode_handle *fwnode,
+ enum irq_domain_bus_token bus_token, const char *suffix)
+{
+ const char *sep = suffix ? "-" : "";
+ const char *suf = suffix ? : "";
+ char *name;
+
+ if (bus_token == DOMAIN_BUS_ANY)
+ name = kasprintf(GFP_KERNEL, "%pfw%s%s", fwnode, sep, suf);
+ else
+ name = kasprintf(GFP_KERNEL, "%pfw%s%s-%d", fwnode, sep, suf, bus_token);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * fwnode paths contain '/', which debugfs is legitimately unhappy
+ * about. Replace them with ':', which does the trick and is not as
+ * offensive as '\'...
+ */
+ domain->name = strreplace(name, '/', ':');
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ return 0;
+}
+static int alloc_unknown_name(struct irq_domain *domain, enum irq_domain_bus_token bus_token)
+{
static atomic_t unknown_domains;
+ int id = atomic_inc_return(&unknown_domains);
- domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
- GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
- if (!domain)
- return NULL;
+ if (bus_token == DOMAIN_BUS_ANY)
+ domain->name = kasprintf(GFP_KERNEL, "unknown-%d", id);
+ else
+ domain->name = kasprintf(GFP_KERNEL, "unknown-%d-%d", id, bus_token);
+ if (!domain->name)
+ return -ENOMEM;
+
+ domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+ return 0;
+}
+
+static int irq_domain_set_name(struct irq_domain *domain, const struct irq_domain_info *info)
+{
+ enum irq_domain_bus_token bus_token = info->bus_token;
+ const struct fwnode_handle *fwnode = info->fwnode;
if (is_fwnode_irqchip(fwnode)) {
- fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+ struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+ /*
+ * The name_suffix is only intended to be used to avoid a name
+ * collision when multiple domains are created for a single
+ * device and the name is picked using a real device node.
+ * (Typical use-case is regmap-IRQ controllers for devices
+ * providing more than one physical IRQ.) There should be no
+ * need to use name_suffix with irqchip-fwnode.
+ */
+ if (info->name_suffix)
+ return -EINVAL;
switch (fwid->type) {
case IRQCHIP_FWNODE_NAMED:
case IRQCHIP_FWNODE_NAMED_ID:
- domain->fwnode = fwnode;
- domain->name = kstrdup(fwid->name, GFP_KERNEL);
- if (!domain->name) {
- kfree(domain);
- return NULL;
- }
- domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
- break;
+ return alloc_name(domain, fwid->name, bus_token);
default:
- domain->fwnode = fwnode;
domain->name = fwid->name;
- break;
+ if (bus_token != DOMAIN_BUS_ANY)
+ return alloc_name(domain, fwid->name, bus_token);
}
- } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) ||
- is_software_node(fwnode)) {
- char *name;
- /*
- * fwnode paths contain '/', which debugfs is legitimately
- * unhappy about. Replace them with ':', which does
- * the trick and is not as offensive as '\'...
- */
- name = kasprintf(GFP_KERNEL, "%pfw", fwnode);
- if (!name) {
- kfree(domain);
- return NULL;
- }
+ } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || is_software_node(fwnode)) {
+ return alloc_fwnode_name(domain, fwnode, bus_token, info->name_suffix);
+ }
- strreplace(name, '/', ':');
+ if (domain->name)
+ return 0;
- domain->name = name;
- domain->fwnode = fwnode;
- domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
- }
+ if (fwnode)
+ pr_err("Invalid fwnode type for irqdomain\n");
+ return alloc_unknown_name(domain, bus_token);
+}
- if (!domain->name) {
- if (fwnode)
- pr_err("Invalid fwnode type for irqdomain\n");
- domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
- atomic_inc_return(&unknown_domains));
- if (!domain->name) {
- kfree(domain);
- return NULL;
- }
- domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info)
+{
+ struct irq_domain *domain;
+ int err;
+
+ if (WARN_ON((info->size && info->direct_max) ||
+ (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && info->direct_max) ||
+ (info->direct_max && info->direct_max != info->hwirq_max)))
+ return ERR_PTR(-EINVAL);
+
+ domain = kzalloc_node(struct_size(domain, revmap, info->size),
+ GFP_KERNEL, of_node_to_nid(to_of_node(info->fwnode)));
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ err = irq_domain_set_name(domain, info);
+ if (err) {
+ kfree(domain);
+ return ERR_PTR(err);
}
- fwnode_handle_get(fwnode);
+ domain->fwnode = fwnode_handle_get(info->fwnode);
+ fwnode_dev_initialized(domain->fwnode, true);
/* Fill structure */
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
- mutex_init(&domain->revmap_tree_mutex);
- domain->ops = ops;
- domain->host_data = host_data;
- domain->hwirq_max = hwirq_max;
- domain->revmap_size = size;
- domain->revmap_direct_max_irq = direct_max;
+ domain->ops = info->ops;
+ domain->host_data = info->host_data;
+ domain->bus_token = info->bus_token;
+ domain->hwirq_max = info->hwirq_max;
+
+ if (info->direct_max)
+ domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
+
+ domain->revmap_size = info->size;
+
+ /*
+ * Hierarchical domains use the domain lock of the root domain
+ * (innermost domain).
+ *
+ * For non-hierarchical domains (as for root domains), the root
+ * pointer is set to the domain itself so that &domain->root->mutex
+ * always points to the right lock.
+ */
+ mutex_init(&domain->mutex);
+ domain->root = domain;
+
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+static void irq_domain_free(struct irq_domain *domain)
+{
+ fwnode_dev_initialized(domain->fwnode, false);
+ fwnode_handle_put(domain->fwnode);
+ if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+ kfree(domain->name);
+ kfree(domain);
+}
+
+static void irq_domain_instantiate_descs(const struct irq_domain_info *info)
+{
+ if (!IS_ENABLED(CONFIG_SPARSE_IRQ))
+ return;
+
+ if (irq_alloc_descs(info->virq_base, info->virq_base, info->size,
+ of_node_to_nid(to_of_node(info->fwnode))) < 0) {
+ pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
+ info->virq_base);
+ }
+}
+
+static struct irq_domain *__irq_domain_instantiate(const struct irq_domain_info *info,
+ bool cond_alloc_descs, bool force_associate)
+{
+ struct irq_domain *domain;
+ int err;
+
+ domain = __irq_domain_create(info);
+ if (IS_ERR(domain))
+ return domain;
+
+ domain->flags |= info->domain_flags;
+ domain->exit = info->exit;
+
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ if (info->parent) {
+ domain->root = info->parent->root;
+ domain->parent = info->parent;
+ }
+#endif
+
+ if (info->dgc_info) {
+ err = irq_domain_alloc_generic_chips(domain, info->dgc_info);
+ if (err)
+ goto err_domain_free;
+ }
+
+ if (info->init) {
+ err = info->init(domain);
+ if (err)
+ goto err_domain_gc_remove;
+ }
+
+ __irq_domain_publish(domain);
+
+ if (cond_alloc_descs && info->virq_base > 0)
+ irq_domain_instantiate_descs(info);
+
+ /*
+ * Legacy interrupt domains have a fixed Linux interrupt number
+ * associated. Other interrupt domains can request association by
+ * providing a Linux interrupt number > 0.
+ */
+ if (force_associate || info->virq_base > 0) {
+ irq_domain_associate_many(domain, info->virq_base, info->hwirq_base,
+ info->size - info->hwirq_base);
+ }
+
return domain;
+
+err_domain_gc_remove:
+ if (info->dgc_info)
+ irq_domain_remove_generic_chips(domain);
+err_domain_free:
+ irq_domain_free(domain);
+ return ERR_PTR(err);
}
-EXPORT_SYMBOL_GPL(__irq_domain_add);
+
+/**
+ * irq_domain_instantiate() - Instantiate a new irq domain data structure
+ * @info: Domain information pointer pointing to the information for this domain
+ *
+ * Return: A pointer to the instantiated irq domain or an ERR_PTR value.
+ */
+struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info)
+{
+ return __irq_domain_instantiate(info, false, false);
+}
+EXPORT_SYMBOL_GPL(irq_domain_instantiate);
/**
* irq_domain_remove() - Remove an irq domain.
@@ -236,6 +384,9 @@ EXPORT_SYMBOL_GPL(__irq_domain_add);
*/
void irq_domain_remove(struct irq_domain *domain)
{
+ if (domain->exit)
+ domain->exit(domain);
+
mutex_lock(&irq_domain_mutex);
debugfs_remove_domain_dir(domain);
@@ -251,12 +402,11 @@ void irq_domain_remove(struct irq_domain *domain)
mutex_unlock(&irq_domain_mutex);
- pr_debug("Removed domain %s\n", domain->name);
+ if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC)
+ irq_domain_remove_generic_chips(domain);
- fwnode_handle_put(domain->fwnode);
- if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
- kfree(domain->name);
- kfree(domain);
+ pr_debug("Removed domain %s\n", domain->name);
+ irq_domain_free(domain);
}
EXPORT_SYMBOL_GPL(irq_domain_remove);
@@ -293,8 +443,8 @@ void irq_domain_update_bus_token(struct irq_domain *domain,
EXPORT_SYMBOL_GPL(irq_domain_update_bus_token);
/**
- * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
- * @of_node: pointer to interrupt controller's device tree node.
+ * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs
+ * @fwnode: firmware node for the interrupt controller
* @size: total number of irqs in mapping
* @first_irq: first number of irq block assigned to the domain,
* pass zero to assign irqs on-the-fly. If first_irq is non-zero, then
@@ -310,33 +460,25 @@ EXPORT_SYMBOL_GPL(irq_domain_update_bus_token);
* irqs get mapped dynamically on the fly. However, if the controller requires
* static virq assignments (non-DT boot) then it will set that up correctly.
*/
-struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- const struct irq_domain_ops *ops,
- void *host_data)
+struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
+ unsigned int size,
+ unsigned int first_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
- struct irq_domain *domain;
-
- domain = __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
- if (!domain)
- return NULL;
-
- if (first_irq > 0) {
- if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
- /* attempt to allocated irq_descs */
- int rc = irq_alloc_descs(first_irq, first_irq, size,
- of_node_to_nid(of_node));
- if (rc < 0)
- pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
- first_irq);
- }
- irq_domain_associate_many(domain, first_irq, 0, size);
- }
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size,
+ .virq_base = first_irq,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *domain = __irq_domain_instantiate(&info, true, false);
- return domain;
+ return IS_ERR(domain) ? NULL : domain;
}
-EXPORT_SYMBOL_GPL(irq_domain_add_simple);
+EXPORT_SYMBOL_GPL(irq_domain_create_simple);
/**
* irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
@@ -372,13 +514,18 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
const struct irq_domain_ops *ops,
void *host_data)
{
- struct irq_domain *domain;
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = first_hwirq + size,
+ .hwirq_max = first_hwirq + size,
+ .hwirq_base = first_hwirq,
+ .virq_base = first_irq,
+ .ops = ops,
+ .host_data = host_data,
+ };
+ struct irq_domain *domain = __irq_domain_instantiate(&info, false, true);
- domain = __irq_domain_add(fwnode, first_hwirq + size, first_hwirq + size, 0, ops, host_data);
- if (domain)
- irq_domain_associate_many(domain, first_irq, first_hwirq, size);
-
- return domain;
+ return IS_ERR(domain) ? NULL : domain;
}
EXPORT_SYMBOL_GPL(irq_domain_create_legacy);
@@ -394,7 +541,8 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
struct fwnode_handle *fwnode = fwspec->fwnode;
int rc;
- /* We might want to match the legacy controller last since
+ /*
+ * We might want to match the legacy controller last since
* it might potentially be set to match all interrupts in
* the absence of a device node. This isn't a problem so far
* yet though...
@@ -405,7 +553,7 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
*/
mutex_lock(&irq_domain_mutex);
list_for_each_entry(h, &irq_domain_list, link) {
- if (h->ops->select && fwspec->param_count)
+ if (h->ops->select && bus_token != DOMAIN_BUS_ANY)
rc = h->ops->select(h, fwspec, bus_token);
else if (h->ops->match)
rc = h->ops->match(h, to_of_node(fwnode), bus_token);
@@ -425,31 +573,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
- * irq_domain_check_msi_remap - Check whether all MSI irq domains implement
- * IRQ remapping
- *
- * Return: false if any MSI irq domain does not support IRQ remapping,
- * true otherwise (including if there is no MSI irq domain)
- */
-bool irq_domain_check_msi_remap(void)
-{
- struct irq_domain *h;
- bool ret = true;
-
- mutex_lock(&irq_domain_mutex);
- list_for_each_entry(h, &irq_domain_list, link) {
- if (irq_domain_is_msi(h) &&
- !irq_domain_hierarchical_is_msi_remap(h)) {
- ret = false;
- break;
- }
- }
- mutex_unlock(&irq_domain_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap);
-
-/**
* irq_set_default_host() - Set a "default" irq domain
* @domain: default domain pointer
*
@@ -479,30 +602,45 @@ struct irq_domain *irq_get_default_host(void)
{
return irq_default_domain;
}
+EXPORT_SYMBOL_GPL(irq_get_default_host);
+
+static bool irq_domain_is_nomap(struct irq_domain *domain)
+{
+ return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) &&
+ (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP);
+}
static void irq_domain_clear_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
- if (hwirq < domain->revmap_size) {
- domain->linear_revmap[hwirq] = 0;
- } else {
- mutex_lock(&domain->revmap_tree_mutex);
+ lockdep_assert_held(&domain->root->mutex);
+
+ if (irq_domain_is_nomap(domain))
+ return;
+
+ if (hwirq < domain->revmap_size)
+ rcu_assign_pointer(domain->revmap[hwirq], NULL);
+ else
radix_tree_delete(&domain->revmap_tree, hwirq);
- mutex_unlock(&domain->revmap_tree_mutex);
- }
}
static void irq_domain_set_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq,
struct irq_data *irq_data)
{
- if (hwirq < domain->revmap_size) {
- domain->linear_revmap[hwirq] = irq_data->irq;
- } else {
- mutex_lock(&domain->revmap_tree_mutex);
+ /*
+ * This also makes sure that all domains point to the same root when
+ * called from irq_domain_insert_irq() for each domain in a hierarchy.
+ */
+ lockdep_assert_held(&domain->root->mutex);
+
+ if (irq_domain_is_nomap(domain))
+ return;
+
+ if (hwirq < domain->revmap_size)
+ rcu_assign_pointer(domain->revmap[hwirq], irq_data);
+ else
radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
- mutex_unlock(&domain->revmap_tree_mutex);
- }
}
static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
@@ -515,6 +653,9 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
return;
hwirq = irq_data->hwirq;
+
+ mutex_lock(&domain->root->mutex);
+
irq_set_status_flags(irq, IRQ_NOREQUEST);
/* remove chip and handler */
@@ -534,10 +675,12 @@ static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
/* Clear reverse map for this hwirq */
irq_domain_clear_mapping(domain, hwirq);
+
+ mutex_unlock(&domain->root->mutex);
}
-int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq)
+static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
{
struct irq_data *irq_data = irq_get_irq_data(virq);
int ret;
@@ -550,7 +693,6 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
return -EINVAL;
- mutex_lock(&irq_domain_mutex);
irq_data->hwirq = hwirq;
irq_data->domain = domain;
if (domain->ops->map) {
@@ -567,23 +709,29 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
}
irq_data->domain = NULL;
irq_data->hwirq = 0;
- mutex_unlock(&irq_domain_mutex);
return ret;
}
-
- /* If not already assigned, give the domain the chip's name */
- if (!domain->name && irq_data->chip)
- domain->name = irq_data->chip->name;
}
domain->mapcount++;
irq_domain_set_mapping(domain, hwirq, irq_data);
- mutex_unlock(&irq_domain_mutex);
irq_clear_status_flags(virq, IRQ_NOREQUEST);
return 0;
}
+
+int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ int ret;
+
+ mutex_lock(&domain->root->mutex);
+ ret = irq_domain_associate_locked(domain, virq, hwirq);
+ mutex_unlock(&domain->root->mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(irq_domain_associate);
void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
@@ -596,12 +744,12 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__,
of_node_full_name(of_node), irq_base, (int)hwirq_base, count);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < count; i++)
irq_domain_associate(domain, irq_base + i, hwirq_base + i);
- }
}
EXPORT_SYMBOL_GPL(irq_domain_associate_many);
+#ifdef CONFIG_IRQ_DOMAIN_NOMAP
/**
* irq_create_direct_mapping() - Allocate an irq for direct mapping
* @domain: domain to allocate the irq for or NULL for default domain
@@ -626,9 +774,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
pr_debug("create_direct virq allocation failed\n");
return 0;
}
- if (virq >= domain->revmap_direct_max_irq) {
- pr_err("ERROR: no free irqs available below %i maximum\n",
- domain->revmap_direct_max_irq);
+ if (virq >= domain->hwirq_max) {
+ pr_err("ERROR: no free irqs available below %lu maximum\n",
+ domain->hwirq_max);
irq_free_desc(virq);
return 0;
}
@@ -642,6 +790,35 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
return virq;
}
EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+#endif
+
+static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
+ irq_hw_number_t hwirq,
+ const struct irq_affinity_desc *affinity)
+{
+ struct device_node *of_node = irq_domain_get_of_node(domain);
+ int virq;
+
+ pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+
+ /* Allocate a virtual interrupt number */
+ virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
+ affinity);
+ if (virq <= 0) {
+ pr_debug("-> virq allocation failed\n");
+ return 0;
+ }
+
+ if (irq_domain_associate_locked(domain, virq, hwirq)) {
+ irq_free_desc(virq);
+ return 0;
+ }
+
+ pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
+ hwirq, of_node_full_name(of_node), virq);
+
+ return virq;
+}
/**
* irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
@@ -655,87 +832,36 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
* on the number returned from that call.
*/
unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
- irq_hw_number_t hwirq,
- const struct irq_affinity_desc *affinity)
+ irq_hw_number_t hwirq,
+ const struct irq_affinity_desc *affinity)
{
- struct device_node *of_node;
int virq;
- pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
-
- /* Look for default domain if nececssary */
+ /* Look for default domain if necessary */
if (domain == NULL)
domain = irq_default_domain;
if (domain == NULL) {
WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
return 0;
}
- pr_debug("-> using domain @%p\n", domain);
- of_node = irq_domain_get_of_node(domain);
+ mutex_lock(&domain->root->mutex);
/* Check if mapping already exists */
virq = irq_find_mapping(domain, hwirq);
if (virq) {
- pr_debug("-> existing mapping on virq %d\n", virq);
- return virq;
+ pr_debug("existing mapping on virq %d\n", virq);
+ goto out;
}
- /* Allocate a virtual interrupt number */
- virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
- affinity);
- if (virq <= 0) {
- pr_debug("-> virq allocation failed\n");
- return 0;
- }
-
- if (irq_domain_associate(domain, virq, hwirq)) {
- irq_free_desc(virq);
- return 0;
- }
-
- pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
- hwirq, of_node_full_name(of_node), virq);
+ virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
+out:
+ mutex_unlock(&domain->root->mutex);
return virq;
}
EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
-/**
- * irq_create_strict_mappings() - Map a range of hw irqs to fixed linux irqs
- * @domain: domain owning the interrupt range
- * @irq_base: beginning of linux IRQ range
- * @hwirq_base: beginning of hardware IRQ range
- * @count: Number of interrupts to map
- *
- * This routine is used for allocating and mapping a range of hardware
- * irqs to linux irqs where the linux irq numbers are at pre-defined
- * locations. For use by controllers that already have static mappings
- * to insert in to the domain.
- *
- * Non-linear users can use irq_create_identity_mapping() for IRQ-at-a-time
- * domain insertion.
- *
- * 0 is returned upon success, while any failure to establish a static
- * mapping is treated as an error.
- */
-int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base,
- irq_hw_number_t hwirq_base, int count)
-{
- struct device_node *of_node;
- int ret;
-
- of_node = irq_domain_get_of_node(domain);
- ret = irq_alloc_descs(irq_base, irq_base, count,
- of_node_to_nid(of_node));
- if (unlikely(ret < 0))
- return ret;
-
- irq_domain_associate_many(domain, irq_base, hwirq_base, count);
- return 0;
-}
-EXPORT_SYMBOL_GPL(irq_create_strict_mappings);
-
static int irq_domain_translate(struct irq_domain *d,
struct irq_fwspec *fwspec,
irq_hw_number_t *hwirq, unsigned int *type)
@@ -754,9 +880,8 @@ static int irq_domain_translate(struct irq_domain *d,
return 0;
}
-static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
- unsigned int count,
- struct irq_fwspec *fwspec)
+void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+ unsigned int count, struct irq_fwspec *fwspec)
{
int i;
@@ -766,6 +891,7 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
for (i = 0; i < count; i++)
fwspec->param[i] = args[i];
}
+EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec);
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
{
@@ -799,6 +925,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
type &= IRQ_TYPE_SENSE_MASK;
+ mutex_lock(&domain->root->mutex);
+
/*
* If we've already configured this interrupt,
* don't do it again, or hell will break loose.
@@ -811,7 +939,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
* interrupt number.
*/
if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
- return virq;
+ goto out;
/*
* If the trigger type has not been set yet, then set
@@ -819,40 +947,50 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
*/
if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
irq_data = irq_get_irq_data(virq);
- if (!irq_data)
- return 0;
+ if (!irq_data) {
+ virq = 0;
+ goto out;
+ }
irqd_set_trigger_type(irq_data, type);
- return virq;
+ goto out;
}
pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
- return 0;
+ virq = 0;
+ goto out;
}
if (irq_domain_is_hierarchy(domain)) {
- virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
- if (virq <= 0)
- return 0;
+ if (irq_domain_is_msi_device(domain)) {
+ mutex_unlock(&domain->root->mutex);
+ virq = msi_device_domain_alloc_wired(domain, hwirq, type);
+ mutex_lock(&domain->root->mutex);
+ } else
+ virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
+ fwspec, false, NULL);
+ if (virq <= 0) {
+ virq = 0;
+ goto out;
+ }
} else {
/* Create mapping */
- virq = irq_create_mapping(domain, hwirq);
+ virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
if (!virq)
- return virq;
+ goto out;
}
irq_data = irq_get_irq_data(virq);
- if (!irq_data) {
- if (irq_domain_is_hierarchy(domain))
- irq_domain_free_irqs(virq, 1);
- else
- irq_dispose_mapping(virq);
- return 0;
+ if (WARN_ON(!irq_data)) {
+ virq = 0;
+ goto out;
}
/* Store trigger type */
irqd_set_trigger_type(irq_data, type);
+out:
+ mutex_unlock(&domain->root->mutex);
return virq;
}
@@ -875,10 +1013,11 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
*/
void irq_dispose_mapping(unsigned int virq)
{
- struct irq_data *irq_data = irq_get_irq_data(virq);
+ struct irq_data *irq_data;
struct irq_domain *domain;
- if (!virq || !irq_data)
+ irq_data = virq ? irq_get_irq_data(virq) : NULL;
+ if (!irq_data)
return;
domain = irq_data->domain;
@@ -886,7 +1025,7 @@ void irq_dispose_mapping(unsigned int virq)
return;
if (irq_domain_is_hierarchy(domain)) {
- irq_domain_free_irqs(virq, 1);
+ irq_domain_free_one_irq(domain, virq);
} else {
irq_domain_disassociate(domain, virq);
irq_free_desc(virq);
@@ -895,40 +1034,64 @@ void irq_dispose_mapping(unsigned int virq)
EXPORT_SYMBOL_GPL(irq_dispose_mapping);
/**
- * irq_find_mapping() - Find a linux irq from a hw irq number.
+ * __irq_resolve_mapping() - Find a linux irq from a hw irq number.
* @domain: domain owning this hardware interrupt
* @hwirq: hardware irq number in that domain space
+ * @irq: optional pointer to return the Linux irq if required
+ *
+ * Returns the interrupt descriptor.
*/
-unsigned int irq_find_mapping(struct irq_domain *domain,
- irq_hw_number_t hwirq)
+struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+ irq_hw_number_t hwirq,
+ unsigned int *irq)
{
+ struct irq_desc *desc = NULL;
struct irq_data *data;
- /* Look for default domain if nececssary */
+ /* Look for default domain if necessary */
if (domain == NULL)
domain = irq_default_domain;
if (domain == NULL)
- return 0;
+ return desc;
+
+ if (irq_domain_is_nomap(domain)) {
+ if (hwirq < domain->hwirq_max) {
+ data = irq_domain_get_irq_data(domain, hwirq);
+ if (data && data->hwirq == hwirq)
+ desc = irq_data_to_desc(data);
+ if (irq && desc)
+ *irq = hwirq;
+ }
- if (hwirq < domain->revmap_direct_max_irq) {
- data = irq_domain_get_irq_data(domain, hwirq);
- if (data && data->hwirq == hwirq)
- return hwirq;
+ return desc;
}
+ rcu_read_lock();
/* Check if the hwirq is in the linear revmap. */
if (hwirq < domain->revmap_size)
- return domain->linear_revmap[hwirq];
+ data = rcu_dereference(domain->revmap[hwirq]);
+ else
+ data = radix_tree_lookup(&domain->revmap_tree, hwirq);
+
+ if (likely(data)) {
+ desc = irq_data_to_desc(data);
+ if (irq)
+ *irq = data->irq;
+ }
- rcu_read_lock();
- data = radix_tree_lookup(&domain->revmap_tree, hwirq);
rcu_read_unlock();
- return data ? data->irq : 0;
+ return desc;
}
-EXPORT_SYMBOL_GPL(irq_find_mapping);
+EXPORT_SYMBOL_GPL(__irq_resolve_mapping);
/**
* irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with one cell
* bindings where the cell value maps directly to the hwirq number.
@@ -947,6 +1110,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell);
/**
* irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with two cell
* bindings where the cell values map directly to the hwirq number
@@ -965,6 +1134,12 @@ EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
/**
* irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings
+ * @d: Interrupt domain involved in the translation
+ * @ctrlr: The device tree node for the device whose interrupt is translated
+ * @intspec: The interrupt specifier data from the device tree
+ * @intsize: The number of entries in @intspec
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with either one
* or two cell bindings where the cell values map directly to the hwirq number
@@ -998,6 +1173,10 @@ EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
/**
* irq_domain_translate_onecell() - Generic translate for direct one cell
* bindings
+ * @d: Interrupt domain involved in the translation
+ * @fwspec: The firmware interrupt specifier to translate
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*/
int irq_domain_translate_onecell(struct irq_domain *d,
struct irq_fwspec *fwspec,
@@ -1015,6 +1194,10 @@ EXPORT_SYMBOL_GPL(irq_domain_translate_onecell);
/**
* irq_domain_translate_twocell() - Generic translate for direct two cell
* bindings
+ * @d: Interrupt domain involved in the translation
+ * @fwspec: The firmware interrupt specifier to translate
+ * @out_hwirq: Pointer to storage for the hardware interrupt number
+ * @out_type: Pointer to storage for the interrupt type
*
* Device Tree IRQ specifier translation function which works with two cell
* bindings where the cell values map directly to the hwirq number
@@ -1042,7 +1225,7 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE,
affinity);
} else {
- hint = hwirq % nr_irqs;
+ hint = hwirq % irq_get_nr_irqs();
if (hint == 0)
hint++;
virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE,
@@ -1091,18 +1274,22 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
const struct irq_domain_ops *ops,
void *host_data)
{
- struct irq_domain *domain;
+ struct irq_domain_info info = {
+ .fwnode = fwnode,
+ .size = size,
+ .hwirq_max = size,
+ .ops = ops,
+ .host_data = host_data,
+ .domain_flags = flags,
+ .parent = parent,
+ };
+ struct irq_domain *d;
- if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
- else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
- if (domain) {
- domain->parent = parent;
- domain->flags |= flags;
- }
+ if (!info.size)
+ info.hwirq_max = ~0U;
- return domain;
+ d = irq_domain_instantiate(&info);
+ return IS_ERR(d) ? NULL : d;
}
EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
@@ -1115,10 +1302,6 @@ static void irq_domain_insert_irq(int virq)
domain->mapcount++;
irq_domain_set_mapping(domain, data->hwirq, data);
-
- /* If not already assigned, give the domain the chip's name */
- if (!domain->name && data->chip)
- domain->name = data->chip->name;
}
irq_clear_status_flags(virq, IRQ_NOREQUEST);
@@ -1210,6 +1393,7 @@ int irq_domain_disconnect_hierarchy(struct irq_domain *domain,
irqd->chip = ERR_PTR(-ENOTCONN);
return 0;
}
+EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy);
static int irq_domain_trim_hierarchy(unsigned int virq)
{
@@ -1219,7 +1403,7 @@ static int irq_domain_trim_hierarchy(unsigned int virq)
tail = NULL;
/* The first entry must have a valid irqchip */
- if (!irq_data->chip || IS_ERR(irq_data->chip))
+ if (IS_ERR_OR_NULL(irq_data->chip))
return -EINVAL;
/*
@@ -1312,7 +1496,8 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
* @chip_data: The associated chip data
*/
int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq,
+ const struct irq_chip *chip,
void *chip_data)
{
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
@@ -1321,7 +1506,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
return -ENOENT;
irq_data->hwirq = hwirq;
- irq_data->chip = chip ? chip : &no_irq_chip;
+ irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip);
irq_data->chip_data = chip_data;
return 0;
@@ -1340,7 +1525,7 @@ EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
* @handler_name: The interrupt handler name
*/
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq, const struct irq_chip *chip,
void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name)
{
@@ -1416,40 +1601,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program hardwares with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1468,24 +1625,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1496,20 +1647,68 @@ out_free_desc:
return ret;
}
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&domain->root->mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&domain->root->mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
+
/* The irq_data was moved, fix the revmap to refer to the new location */
static void irq_domain_fix_revmap(struct irq_data *d)
{
void __rcu **slot;
- if (d->hwirq < d->domain->revmap_size)
- return; /* Not using radix tree. */
+ lockdep_assert_held(&d->domain->root->mutex);
+
+ if (irq_domain_is_nomap(d->domain))
+ return;
/* Fix up the revmap. */
- mutex_lock(&d->domain->revmap_tree_mutex);
- slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
- if (slot)
- radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
- mutex_unlock(&d->domain->revmap_tree_mutex);
+ if (d->hwirq < d->domain->revmap_size) {
+ /* Not using radix tree */
+ rcu_assign_pointer(d->domain->revmap[d->hwirq], d);
+ } else {
+ slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
+ if (slot)
+ radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
+ }
}
/**
@@ -1525,8 +1724,8 @@ static void irq_domain_fix_revmap(struct irq_data *d)
*/
int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
{
- struct irq_data *child_irq_data;
- struct irq_data *root_irq_data = irq_get_irq_data(virq);
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+ struct irq_data *parent_irq_data;
struct irq_desc *desc;
int rv = 0;
@@ -1551,47 +1750,46 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg)
if (WARN_ON(!irq_domain_is_hierarchy(domain)))
return -EINVAL;
- if (!root_irq_data)
+ if (!irq_data)
return -EINVAL;
- if (domain->parent != root_irq_data->domain)
+ if (domain->parent != irq_data->domain)
return -EINVAL;
- child_irq_data = kzalloc_node(sizeof(*child_irq_data), GFP_KERNEL,
- irq_data_get_node(root_irq_data));
- if (!child_irq_data)
+ parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL,
+ irq_data_get_node(irq_data));
+ if (!parent_irq_data)
return -ENOMEM;
- mutex_lock(&irq_domain_mutex);
+ mutex_lock(&domain->root->mutex);
/* Copy the original irq_data. */
- *child_irq_data = *root_irq_data;
+ *parent_irq_data = *irq_data;
/*
- * Overwrite the root_irq_data, which is embedded in struct
- * irq_desc, with values for this domain.
+ * Overwrite the irq_data, which is embedded in struct irq_desc, with
+ * values for this domain.
*/
- root_irq_data->parent_data = child_irq_data;
- root_irq_data->domain = domain;
- root_irq_data->mask = 0;
- root_irq_data->hwirq = 0;
- root_irq_data->chip = NULL;
- root_irq_data->chip_data = NULL;
+ irq_data->parent_data = parent_irq_data;
+ irq_data->domain = domain;
+ irq_data->mask = 0;
+ irq_data->hwirq = 0;
+ irq_data->chip = NULL;
+ irq_data->chip_data = NULL;
/* May (probably does) set hwirq, chip, etc. */
rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
if (rv) {
/* Restore the original irq_data. */
- *root_irq_data = *child_irq_data;
- kfree(child_irq_data);
+ *irq_data = *parent_irq_data;
+ kfree(parent_irq_data);
goto error;
}
- irq_domain_fix_revmap(child_irq_data);
- irq_domain_set_mapping(domain, root_irq_data->hwirq, root_irq_data);
-
+ irq_domain_fix_revmap(parent_irq_data);
+ irq_domain_set_mapping(domain, irq_data->hwirq, irq_data);
error:
- mutex_unlock(&irq_domain_mutex);
+ mutex_unlock(&domain->root->mutex);
return rv;
}
@@ -1607,8 +1805,8 @@ EXPORT_SYMBOL_GPL(irq_domain_push_irq);
*/
int irq_domain_pop_irq(struct irq_domain *domain, int virq)
{
- struct irq_data *root_irq_data = irq_get_irq_data(virq);
- struct irq_data *child_irq_data;
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+ struct irq_data *parent_irq_data;
struct irq_data *tmp_irq_data;
struct irq_desc *desc;
@@ -1630,37 +1828,37 @@ int irq_domain_pop_irq(struct irq_domain *domain, int virq)
if (domain == NULL)
return -EINVAL;
- if (!root_irq_data)
+ if (!irq_data)
return -EINVAL;
tmp_irq_data = irq_domain_get_irq_data(domain, virq);
/* We can only "pop" if this domain is at the top of the list */
- if (WARN_ON(root_irq_data != tmp_irq_data))
+ if (WARN_ON(irq_data != tmp_irq_data))
return -EINVAL;
- if (WARN_ON(root_irq_data->domain != domain))
+ if (WARN_ON(irq_data->domain != domain))
return -EINVAL;
- child_irq_data = root_irq_data->parent_data;
- if (WARN_ON(!child_irq_data))
+ parent_irq_data = irq_data->parent_data;
+ if (WARN_ON(!parent_irq_data))
return -EINVAL;
- mutex_lock(&irq_domain_mutex);
+ mutex_lock(&domain->root->mutex);
- root_irq_data->parent_data = NULL;
+ irq_data->parent_data = NULL;
- irq_domain_clear_mapping(domain, root_irq_data->hwirq);
+ irq_domain_clear_mapping(domain, irq_data->hwirq);
irq_domain_free_irqs_hierarchy(domain, virq, 1);
/* Restore the original irq_data. */
- *root_irq_data = *child_irq_data;
+ *irq_data = *parent_irq_data;
- irq_domain_fix_revmap(root_irq_data);
+ irq_domain_fix_revmap(irq_data);
- mutex_unlock(&irq_domain_mutex);
+ mutex_unlock(&domain->root->mutex);
- kfree(child_irq_data);
+ kfree(parent_irq_data);
return 0;
}
@@ -1674,30 +1872,39 @@ EXPORT_SYMBOL_GPL(irq_domain_pop_irq);
void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *data = irq_get_irq_data(virq);
+ struct irq_domain *domain;
int i;
if (WARN(!data || !data->domain || !data->domain->ops->free,
"NULL pointer, cannot free irq\n"))
return;
- mutex_lock(&irq_domain_mutex);
+ domain = data->domain;
+
+ mutex_lock(&domain->root->mutex);
for (i = 0; i < nr_irqs; i++)
irq_domain_remove_irq(virq + i);
- irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
- mutex_unlock(&irq_domain_mutex);
+ irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs);
+ mutex_unlock(&domain->root->mutex);
irq_domain_free_irq_data(virq, nr_irqs);
irq_free_descs(virq, nr_irqs);
}
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq)
+{
+ if (irq_domain_is_msi_device(domain))
+ msi_device_domain_free_wired(domain, virq);
+ else
+ irq_domain_free_irqs(virq, 1);
+}
+
/**
* irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain
+ * @domain: Domain below which interrupts must be allocated
* @irq_base: Base IRQ number
* @nr_irqs: Number of IRQs to allocate
* @arg: Allocation data (arch/domain specific)
- *
- * Check whether the domain has been setup recursive. If not allocate
- * through the parent domain.
*/
int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
unsigned int irq_base, unsigned int nr_irqs,
@@ -1713,11 +1920,9 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
/**
* irq_domain_free_irqs_parent - Free interrupts from parent domain
+ * @domain: Domain below which interrupts must be freed
* @irq_base: Base IRQ number
* @nr_irqs: Number of IRQs to free
- *
- * Check whether the domain has been setup recursive. If not free
- * through the parent domain.
*/
void irq_domain_free_irqs_parent(struct irq_domain *domain,
unsigned int irq_base, unsigned int nr_irqs)
@@ -1803,20 +2008,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
if (domain->ops->alloc)
domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
}
-
-/**
- * irq_domain_hierarchical_is_msi_remap - Check if the domain or any
- * parent has MSI remapping support
- * @domain: domain pointer
- */
-bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
-{
- for (; domain; domain = domain->parent) {
- if (irq_domain_is_msi_remap(domain))
- return true;
- }
- return false;
-}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
/**
* irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
@@ -1844,7 +2035,7 @@ EXPORT_SYMBOL_GPL(irq_domain_get_irq_data);
* @handler_name: The interrupt handler name
*/
void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
- irq_hw_number_t hwirq, struct irq_chip *chip,
+ irq_hw_number_t hwirq, const struct irq_chip *chip,
void *chip_data, irq_flow_handler_t handler,
void *handler_data, const char *handler_name)
{
@@ -1853,22 +2044,43 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
irq_set_handler_data(virq, handler_data);
}
-static void irq_domain_check_hierarchy(struct irq_domain *domain)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
+ return -EINVAL;
}
+
+static void irq_domain_check_hierarchy(struct irq_domain *domain) { }
+static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { }
+
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+#include "internals.h"
+
static struct dentry *domain_dir;
-static void
-irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
+static const struct irq_bit_descr irqdomain_flags[] = {
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_HIERARCHY),
+ BIT_MASK_DESCR(IRQ_DOMAIN_NAME_ALLOCATED),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_PER_CPU),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_SINGLE),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_ISOLATED_MSI),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NO_MAP),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_PARENT),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_DEVICE),
+ BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NONCORE),
+};
+
+static void irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
{
seq_printf(m, "%*sname: %s\n", ind, "", d->name);
- seq_printf(m, "%*ssize: %u\n", ind + 1, "",
- d->revmap_size + d->revmap_direct_max_irq);
+ seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size);
seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags);
+ irq_debug_show_bits(m, ind, d->flags, irqdomain_flags, ARRAY_SIZE(irqdomain_flags));
if (d->ops && d->ops->debug_show)
d->ops->debug_show(m, d, NULL, ind + 1);
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -1896,16 +2108,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
static void debugfs_add_domain_dir(struct irq_domain *d)
{
- if (!d->name || !domain_dir || d->debugfs_file)
+ if (!d->name || !domain_dir)
return;
- d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
- &irq_domain_debug_fops);
+ debugfs_create_file(d->name, 0444, domain_dir, d,
+ &irq_domain_debug_fops);
}
static void debugfs_remove_domain_dir(struct irq_domain *d)
{
- debugfs_remove(d->debugfs_file);
- d->debugfs_file = NULL;
+ debugfs_lookup_and_remove(d->name, domain_dir);
}
void __init irq_domain_debugfs_init(struct dentry *root)
diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c
new file mode 100644
index 000000000000..1a3deffe6b5b
--- /dev/null
+++ b/kernel/irq/kexec.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/irqnr.h>
+
+#include "internals.h"
+
+void machine_kexec_mask_interrupts(void)
+{
+ struct irq_desc *desc;
+ unsigned int i;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int check_eoi = 1;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip || !irqd_is_started(&desc->irq_data))
+ continue;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) {
+ /*
+ * First try to remove the active state from an interrupt which is forwarded
+ * to a VM. If the interrupt is not forwarded, try to EOI the interrupt.
+ */
+ check_eoi = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+ }
+
+ if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+ chip->irq_eoi(&desc->irq_data);
+
+ irq_shutdown(desc);
+ }
+}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index dec3f73e8db9..f300bb6be3bd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -25,12 +25,11 @@
#include "internals.h"
#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
-__read_mostly bool force_irqthreads;
-EXPORT_SYMBOL_GPL(force_irqthreads);
+DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
static int __init setup_forced_irqthreads(char *arg)
{
- force_irqthreads = true;
+ static_branch_enable(&force_irqthreads_key);
return 0;
}
early_param("threadirqs", setup_forced_irqthreads);
@@ -109,6 +108,16 @@ bool synchronize_hardirq(unsigned int irq)
}
EXPORT_SYMBOL(synchronize_hardirq);
+static void __synchronize_irq(struct irq_desc *desc)
+{
+ __synchronize_hardirq(desc, true);
+ /*
+ * We made sure that no hardirq handler is running. Now verify that no
+ * threaded handlers are active.
+ */
+ wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
+}
+
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -128,16 +137,8 @@ void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- if (desc) {
- __synchronize_hardirq(desc, true);
- /*
- * We made sure that no hardirq handler is
- * running. Now verify that no threaded handlers are
- * active.
- */
- wait_event(desc->wait_for_threads,
- !atomic_read(&desc->threads_active));
- }
+ if (desc)
+ __synchronize_irq(desc);
}
EXPORT_SYMBOL(synchronize_irq);
@@ -179,7 +180,7 @@ bool irq_can_set_affinity_usr(unsigned int irq)
/**
* irq_set_thread_affinity - Notify irq threads to adjust affinity
- * @desc: irq descriptor which has affitnity changed
+ * @desc: irq descriptor which has affinity changed
*
* We just set IRQTF_AFFINITY and delegate the affinity setting
* to the interrupt thread itself. We can not call
@@ -190,9 +191,16 @@ void irq_set_thread_affinity(struct irq_desc *desc)
{
struct irqaction *action;
- for_each_action_of_desc(desc, action)
- if (action->thread)
+ for_each_action_of_desc(desc, action) {
+ if (action->thread) {
set_bit(IRQTF_AFFINITY, &action->thread_flags);
+ wake_up_process(action->thread);
+ }
+ if (action->secondary && action->secondary->thread) {
+ set_bit(IRQTF_AFFINITY, &action->secondary->thread_flags);
+ wake_up_process(action->secondary->thread);
+ }
+ }
}
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
@@ -206,23 +214,19 @@ static void irq_validate_effective_affinity(struct irq_data *data)
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
}
-
-static inline void irq_init_effective_affinity(struct irq_data *data,
- const struct cpumask *mask)
-{
- cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
-}
#else
static inline void irq_validate_effective_affinity(struct irq_data *data) { }
-static inline void irq_init_effective_affinity(struct irq_data *data,
- const struct cpumask *mask) { }
#endif
+static DEFINE_PER_CPU(struct cpumask, __tmp_mask);
+
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
+ struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask);
struct irq_desc *desc = irq_data_to_desc(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
+ const struct cpumask *prog_mask;
int ret;
if (!chip || !chip->irq_set_affinity)
@@ -248,25 +252,33 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
* online.
*/
if (irqd_affinity_is_managed(data) &&
- housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
- const struct cpumask *hk_mask, *prog_mask;
-
- static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
- static struct cpumask tmp_mask;
+ housekeeping_enabled(HK_TYPE_MANAGED_IRQ)) {
+ const struct cpumask *hk_mask;
- hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
+ hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
- raw_spin_lock(&tmp_mask_lock);
- cpumask_and(&tmp_mask, mask, hk_mask);
- if (!cpumask_intersects(&tmp_mask, cpu_online_mask))
+ cpumask_and(tmp_mask, mask, hk_mask);
+ if (!cpumask_intersects(tmp_mask, cpu_online_mask))
prog_mask = mask;
else
- prog_mask = &tmp_mask;
- ret = chip->irq_set_affinity(data, prog_mask, force);
- raw_spin_unlock(&tmp_mask_lock);
+ prog_mask = tmp_mask;
} else {
- ret = chip->irq_set_affinity(data, mask, force);
+ prog_mask = mask;
}
+
+ /*
+ * Make sure we only provide online CPUs to the irqchip,
+ * unless we are being asked to force the affinity (in which
+ * case we do as we are told).
+ */
+ cpumask_and(tmp_mask, prog_mask, cpu_online_mask);
+ if (!force && !cpumask_empty(tmp_mask))
+ ret = chip->irq_set_affinity(data, tmp_mask, force);
+ else if (force)
+ ret = chip->irq_set_affinity(data, mask, force);
+ else
+ ret = -EINVAL;
+
switch (ret) {
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
@@ -315,7 +327,7 @@ static int irq_try_set_affinity(struct irq_data *data,
}
static bool irq_set_affinity_deactivated(struct irq_data *data,
- const struct cpumask *mask, bool force)
+ const struct cpumask *mask)
{
struct irq_desc *desc = irq_data_to_desc(data);
@@ -326,14 +338,14 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
* If the interrupt is not yet activated, just store the affinity
* mask and do not call the chip driver at all. On activation the
* driver has to make sure anyway that the interrupt is in a
- * useable state so startup works.
+ * usable state so startup works.
*/
if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
irqd_is_activated(data) || !irqd_affinity_on_activate(data))
return false;
cpumask_copy(desc->irq_common_data.affinity, mask);
- irq_init_effective_affinity(data, mask);
+ irq_data_update_effective_affinity(data, mask);
irqd_set(data, IRQD_AFFINITY_SET);
return true;
}
@@ -348,7 +360,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (!chip || !chip->irq_set_affinity)
return -EINVAL;
- if (irq_set_affinity_deactivated(data, mask, force))
+ if (irq_set_affinity_deactivated(data, mask))
return 0;
if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
@@ -441,7 +453,8 @@ out_unlock:
return ret;
}
-int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
+static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask,
+ bool force)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -456,7 +469,38 @@ int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
return ret;
}
-int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+/**
+ * irq_set_affinity - Set the irq affinity of a given irq
+ * @irq: Interrupt to set affinity
+ * @cpumask: cpumask
+ *
+ * Fails if cpumask does not contain an online CPU
+ */
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+ return __irq_set_affinity(irq, cpumask, false);
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity);
+
+/**
+ * irq_force_affinity - Force the irq affinity of a given irq
+ * @irq: Interrupt to set affinity
+ * @cpumask: cpumask
+ *
+ * Same as irq_set_affinity, but without checking the mask against
+ * online cpus.
+ *
+ * Solely for low level cpu hotplug code, where we need to make per
+ * cpu interrupts affine before the cpu becomes online.
+ */
+int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+ return __irq_set_affinity(irq, cpumask, true);
+}
+EXPORT_SYMBOL_GPL(irq_force_affinity);
+
+int __irq_apply_affinity_hint(unsigned int irq, const struct cpumask *m,
+ bool setaffinity)
{
unsigned long flags;
struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
@@ -465,12 +509,11 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
return -EINVAL;
desc->affinity_hint = m;
irq_put_desc_unlock(desc, flags);
- /* set the initial affinity to prevent every interrupt being on CPU0 */
- if (m)
+ if (m && setaffinity)
__irq_set_affinity(irq, m, false);
return 0;
}
-EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+EXPORT_SYMBOL_GPL(__irq_apply_affinity_hint);
static void irq_affinity_notify(struct work_struct *work)
{
@@ -518,7 +561,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
/* The release function is promised process context */
might_sleep();
- if (!desc || desc->istate & IRQS_NMI)
+ if (!desc || irq_is_nmi(desc))
return -EINVAL;
/* Complete initialisation of *notify */
@@ -686,10 +729,13 @@ EXPORT_SYMBOL(disable_irq_nosync);
* to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock.
*
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
+ *
*/
void disable_irq(unsigned int irq)
{
+ might_sleep();
if (!__disable_irq_nosync(irq))
synchronize_irq(irq);
}
@@ -751,10 +797,14 @@ void __enable_irq(struct irq_desc *desc)
irq_settings_set_noprobe(desc);
/*
* Call irq_startup() not irq_enable() here because the
- * interrupt might be marked NOAUTOEN. So irq_startup()
- * needs to be invoked when it gets enabled the first
- * time. If it was already started up, then irq_startup()
- * will invoke irq_enable() under the hood.
+ * interrupt might be marked NOAUTOEN so irq_startup()
+ * needs to be invoked when it gets enabled the first time.
+ * This is also required when __enable_irq() is invoked for
+ * a managed and shutdown interrupt from the S3 resume
+ * path.
+ *
+ * If it was already started up, then irq_startup() will
+ * invoke irq_enable() under the hood.
*/
irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
break;
@@ -849,7 +899,7 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
return -EINVAL;
/* Don't use NMIs as wake up interrupts please */
- if (desc->istate & IRQS_NMI) {
+ if (irq_is_nmi(desc)) {
ret = -EINVAL;
goto out_unlock;
}
@@ -1004,10 +1054,57 @@ static irqreturn_t irq_forced_secondary_handler(int irq, void *dev_id)
return IRQ_NONE;
}
-static int irq_wait_for_interrupt(struct irqaction *action)
+#ifdef CONFIG_SMP
+/*
+ * Check whether we need to change the affinity of the interrupt thread.
+ */
+static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
+{
+ cpumask_var_t mask;
+ bool valid = false;
+
+ if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
+ return;
+
+ __set_current_state(TASK_RUNNING);
+
+ /*
+ * In case we are out of memory we set IRQTF_AFFINITY again and
+ * try again next time
+ */
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ set_bit(IRQTF_AFFINITY, &action->thread_flags);
+ return;
+ }
+
+ raw_spin_lock_irq(&desc->lock);
+ /*
+ * This code is triggered unconditionally. Check the affinity
+ * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
+ */
+ if (cpumask_available(desc->irq_common_data.affinity)) {
+ const struct cpumask *m;
+
+ m = irq_data_get_effective_affinity_mask(&desc->irq_data);
+ cpumask_copy(mask, m);
+ valid = true;
+ }
+ raw_spin_unlock_irq(&desc->lock);
+
+ if (valid)
+ set_cpus_allowed_ptr(current, mask);
+ free_cpumask_var(mask);
+}
+#else
+static inline void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
+#endif
+
+static int irq_wait_for_interrupt(struct irq_desc *desc,
+ struct irqaction *action)
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
+ irq_thread_check_affinity(desc, action);
if (kthread_should_stop()) {
/* may need to run one last time */
@@ -1054,7 +1151,7 @@ again:
* to IRQS_INPROGRESS and the irq line is masked forever.
*
* This also serializes the state of shared oneshot handlers
- * versus "desc->threads_onehsot |= action->thread_mask;" in
+ * versus "desc->threads_oneshot |= action->thread_mask;" in
* irq_wake_thread(). See the comment there which explains the
* serialization.
*/
@@ -1084,51 +1181,21 @@ out_unlock:
chip_bus_sync_unlock(desc);
}
-#ifdef CONFIG_SMP
/*
- * Check whether we need to change the affinity of the interrupt thread.
+ * Interrupts explicitly requested as threaded interrupts want to be
+ * preemptible - many of them need to sleep and wait for slow busses to
+ * complete.
*/
-static void
-irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
+static irqreturn_t irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
{
- cpumask_var_t mask;
- bool valid = true;
-
- if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
- return;
-
- /*
- * In case we are out of memory we set IRQTF_AFFINITY again and
- * try again next time
- */
- if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
- set_bit(IRQTF_AFFINITY, &action->thread_flags);
- return;
- }
-
- raw_spin_lock_irq(&desc->lock);
- /*
- * This code is triggered unconditionally. Check the affinity
- * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
- */
- if (cpumask_available(desc->irq_common_data.affinity)) {
- const struct cpumask *m;
+ irqreturn_t ret = action->thread_fn(action->irq, action->dev_id);
- m = irq_data_get_effective_affinity_mask(&desc->irq_data);
- cpumask_copy(mask, m);
- } else {
- valid = false;
- }
- raw_spin_unlock_irq(&desc->lock);
+ if (ret == IRQ_HANDLED)
+ atomic_inc(&desc->threads_handled);
- if (valid)
- set_cpus_allowed_ptr(current, mask);
- free_cpumask_var(mask);
+ irq_finalize_oneshot(desc, action);
+ return ret;
}
-#else
-static inline void
-irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
-#endif
/*
* Interrupts which are not explicitly requested as threaded
@@ -1136,40 +1203,21 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
* context. So we need to disable bh here to avoid deadlocks and other
* side effects.
*/
-static irqreturn_t
-irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
+static irqreturn_t irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
{
irqreturn_t ret;
local_bh_disable();
- ret = action->thread_fn(action->irq, action->dev_id);
- if (ret == IRQ_HANDLED)
- atomic_inc(&desc->threads_handled);
-
- irq_finalize_oneshot(desc, action);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
+ ret = irq_thread_fn(desc, action);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
local_bh_enable();
return ret;
}
-/*
- * Interrupts explicitly requested as threaded interrupts want to be
- * preemtible - many of them need to sleep and wait for slow busses to
- * complete.
- */
-static irqreturn_t irq_thread_fn(struct irq_desc *desc,
- struct irqaction *action)
-{
- irqreturn_t ret;
-
- ret = action->thread_fn(action->irq, action->dev_id);
- if (ret == IRQ_HANDLED)
- atomic_inc(&desc->threads_handled);
-
- irq_finalize_oneshot(desc, action);
- return ret;
-}
-
-static void wake_threads_waitq(struct irq_desc *desc)
+void wake_threads_waitq(struct irq_desc *desc)
{
if (atomic_dec_and_test(&desc->threads_active))
wake_up(&desc->wait_for_threads);
@@ -1215,6 +1263,31 @@ static void irq_wake_secondary(struct irq_desc *desc, struct irqaction *action)
}
/*
+ * Internal function to notify that a interrupt thread is ready.
+ */
+static void irq_thread_set_ready(struct irq_desc *desc,
+ struct irqaction *action)
+{
+ set_bit(IRQTF_READY, &action->thread_flags);
+ wake_up(&desc->wait_for_threads);
+}
+
+/*
+ * Internal function to wake up a interrupt thread and wait until it is
+ * ready.
+ */
+static void wake_up_and_wait_for_irq_thread_ready(struct irq_desc *desc,
+ struct irqaction *action)
+{
+ if (!action || !action->thread)
+ return;
+
+ wake_up_process(action->thread);
+ wait_event(desc->wait_for_threads,
+ test_bit(IRQTF_READY, &action->thread_flags));
+}
+
+/*
* Interrupt handler thread
*/
static int irq_thread(void *data)
@@ -1225,8 +1298,12 @@ static int irq_thread(void *data)
irqreturn_t (*handler_fn)(struct irq_desc *desc,
struct irqaction *action);
- if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
- &action->thread_flags))
+ irq_thread_set_ready(desc, action);
+
+ sched_set_fifo(current);
+
+ if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
handler_fn = irq_forced_thread_fn;
else
handler_fn = irq_thread_fn;
@@ -1234,13 +1311,9 @@ static int irq_thread(void *data)
init_task_work(&on_exit_work, irq_thread_dtor);
task_work_add(current, &on_exit_work, TWA_NONE);
- irq_thread_check_affinity(desc, action);
-
- while (!irq_wait_for_interrupt(action)) {
+ while (!irq_wait_for_interrupt(desc, action)) {
irqreturn_t action_ret;
- irq_thread_check_affinity(desc, action);
-
action_ret = handler_fn(desc, action);
if (action_ret == IRQ_WAKE_THREAD)
irq_wake_secondary(desc, action);
@@ -1254,7 +1327,7 @@ static int irq_thread(void *data)
* synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
* oneshot mask bit can be set.
*/
- task_work_cancel(current, irq_thread_dtor);
+ task_work_cancel_func(current, irq_thread_dtor);
return 0;
}
@@ -1287,7 +1360,7 @@ EXPORT_SYMBOL_GPL(irq_wake_thread);
static int irq_setup_forced_threading(struct irqaction *new)
{
- if (!force_irqthreads)
+ if (!force_irqthreads())
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;
@@ -1390,8 +1463,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
if (IS_ERR(t))
return PTR_ERR(t);
- sched_set_fifo(t);
-
/*
* We keep the reference to the task struct even if
* the thread dies to avoid that the interrupt code
@@ -1547,7 +1618,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
unsigned int oldtype;
- if (desc->istate & IRQS_NMI) {
+ if (irq_is_nmi(desc)) {
pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
new->name, irq, desc->irq_data.chip->name);
ret = -EINVAL;
@@ -1566,8 +1637,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
- ((old->flags ^ new->flags) & IRQF_ONESHOT))
+ (oldtype != (new->flags & IRQF_TRIGGER_MASK)))
+ goto mismatch;
+
+ if ((old->flags & IRQF_ONESHOT) &&
+ (new->flags & IRQF_COND_ONESHOT))
+ new->flags |= IRQF_ONESHOT;
+ else if ((old->flags ^ new->flags) & IRQF_ONESHOT)
goto mismatch;
/* All handlers must agree on per-cpuness */
@@ -1649,8 +1725,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!shared) {
- init_waitqueue_head(&desc->wait_for_threads);
-
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc,
@@ -1682,8 +1756,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (new->flags & IRQF_PERCPU) {
irqd_set(&desc->irq_data, IRQD_PER_CPU);
irq_settings_set_per_cpu(desc);
+ if (new->flags & IRQF_NO_DEBUG)
+ irq_settings_set_no_debug(desc);
}
+ if (noirqdebug)
+ irq_settings_set_no_debug(desc);
+
if (new->flags & IRQF_ONESHOT)
desc->istate |= IRQS_ONESHOT;
@@ -1693,7 +1772,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
}
- if (irq_settings_can_autoenable(desc)) {
+ if (!(new->flags & IRQF_NO_AUTOEN) &&
+ irq_settings_can_autoenable(desc)) {
irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
} else {
/*
@@ -1740,14 +1820,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
irq_setup_timings(desc, new);
- /*
- * Strictly no need to wake it up, but hung_task complains
- * when no hard interrupt wakes the thread up.
- */
- if (new->thread)
- wake_up_process(new->thread);
- if (new->secondary)
- wake_up_process(new->secondary->thread);
+ wake_up_and_wait_for_irq_thread_ready(desc, new);
+ wake_up_and_wait_for_irq_thread_ready(desc, new->secondary);
register_irq_proc(irq, desc);
new->dir = NULL;
@@ -1778,15 +1852,13 @@ out_thread:
struct task_struct *t = new->thread;
new->thread = NULL;
- kthread_stop(t);
- put_task_struct(t);
+ kthread_stop_put(t);
}
if (new->secondary && new->secondary->thread) {
struct task_struct *t = new->secondary->thread;
new->secondary->thread = NULL;
- kthread_stop(t);
- put_task_struct(t);
+ kthread_stop_put(t);
}
out_mput:
module_put(desc->owner);
@@ -1872,7 +1944,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* supports it also make sure that there is no (not yet serviced)
* interrupt in flight at the hardware level.
*/
- __synchronize_hardirq(desc, true);
+ __synchronize_irq(desc);
#ifdef CONFIG_DEBUG_SHIRQ
/*
@@ -1897,18 +1969,15 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* the same bit to a newly requested action.
*/
if (action->thread) {
- kthread_stop(action->thread);
- put_task_struct(action->thread);
- if (action->secondary && action->secondary->thread) {
- kthread_stop(action->secondary->thread);
- put_task_struct(action->secondary->thread);
- }
+ kthread_stop_put(action->thread);
+ if (action->secondary && action->secondary->thread)
+ kthread_stop_put(action->secondary->thread);
}
/* Last action releases resources */
if (!desc->action) {
/*
- * Reaquire bus lock as irq_release_resources() might
+ * Reacquire bus lock as irq_release_resources() might
* require it to deallocate resources over the slow bus.
*/
chip_bus_lock(desc);
@@ -2007,7 +2076,7 @@ const void *free_nmi(unsigned int irq, void *dev_id)
unsigned long flags;
const void *devname;
- if (!desc || WARN_ON(!(desc->istate & IRQS_NMI)))
+ if (!desc || WARN_ON(!irq_is_nmi(desc)))
return NULL;
if (WARN_ON(irq_settings_is_per_cpu_devid(desc)))
@@ -2031,9 +2100,9 @@ const void *free_nmi(unsigned int irq, void *dev_id)
* request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs.
- * Primary handler for threaded interrupts
- * If NULL and thread_fn != NULL the default
- * primary handler is installed
+ * Primary handler for threaded interrupts.
+ * If handler is NULL and thread_fn != NULL
+ * the default primary handler is installed.
* @thread_fn: Function called from the irq handler thread
* If NULL, no irq thread is created
* @irqflags: Interrupt type flags
@@ -2067,7 +2136,7 @@ const void *free_nmi(unsigned int irq, void *dev_id)
*
* IRQF_SHARED Interrupt is shared
* IRQF_TRIGGER_* Specify active edge(s) or level
- *
+ * IRQF_ONESHOT Run thread_fn with interrupt line masked
*/
int request_threaded_irq(unsigned int irq, irq_handler_t handler,
irq_handler_t thread_fn, unsigned long irqflags,
@@ -2086,10 +2155,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
* which interrupt is which (messes up the interrupt freeing
* logic etc).
*
+ * Also shared interrupts do not go well with disabling auto enable.
+ * The sharing interrupt might request it while it's still disabled
+ * and then wait for interrupts forever.
+ *
* Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
* it cannot be set along with IRQF_NO_SUSPEND.
*/
if (((irqflags & IRQF_SHARED) && !dev_id) ||
+ ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) ||
(!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
return -EINVAL;
@@ -2245,7 +2319,8 @@ int request_nmi(unsigned int irq, irq_handler_t handler,
desc = irq_to_desc(irq);
- if (!desc || irq_settings_can_autoenable(desc) ||
+ if (!desc || (irq_settings_can_autoenable(desc) &&
+ !(irqflags & IRQF_NO_AUTOEN)) ||
!irq_settings_can_request(desc) ||
WARN_ON(irq_settings_is_per_cpu_devid(desc)) ||
!irq_supports_nmi(desc))
@@ -2467,7 +2542,7 @@ void free_percpu_nmi(unsigned int irq, void __percpu *dev_id)
if (!desc || !irq_settings_is_per_cpu_devid(desc))
return;
- if (WARN_ON(!(desc->istate & IRQS_NMI)))
+ if (WARN_ON(!irq_is_nmi(desc)))
return;
kfree(__free_percpu_irq(irq, dev_id));
@@ -2603,7 +2678,7 @@ int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
return -EINVAL;
/* The line cannot already be NMI */
- if (desc->istate & IRQS_NMI)
+ if (irq_is_nmi(desc))
return -EINVAL;
action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
@@ -2664,7 +2739,7 @@ int prepare_percpu_nmi(unsigned int irq)
if (!desc)
return -EINVAL;
- if (WARN(!(desc->istate & IRQS_NMI),
+ if (WARN(!irq_is_nmi(desc),
KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n",
irq)) {
ret = -EINVAL;
@@ -2706,7 +2781,7 @@ void teardown_percpu_nmi(unsigned int irq)
if (!desc)
return;
- if (WARN_ON(!(desc->istate & IRQS_NMI)))
+ if (WARN_ON(!irq_is_nmi(desc)))
goto out;
irq_nmi_teardown(desc);
@@ -2742,7 +2817,7 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
* @irq: Interrupt line that is forwarded to a VM
* @which: One of IRQCHIP_STATE_* the caller wants to know about
- * @state: a pointer to a boolean where the state is to be storeed
+ * @state: a pointer to a boolean where the state is to be stored
*
* This call snapshots the internal irqchip state of an
* interrupt, returning into @state the bit corresponding to
@@ -2781,7 +2856,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
* This call sets the internal irqchip state of an interrupt,
* depending on the value of @which.
*
- * This function should be called with preemption disabled if the
+ * This function should be called with migration disabled if the
* interrupt controller has per-cpu registers.
*/
int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 651a4ad6d711..8f222d1cccec 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -8,8 +8,6 @@
#include <linux/cpu.h>
#include <linux/irq.h>
-#define IRQ_MATRIX_SIZE (BITS_TO_LONGS(IRQ_MATRIX_BITS))
-
struct cpumap {
unsigned int available;
unsigned int allocated;
@@ -17,8 +15,8 @@ struct cpumap {
unsigned int managed_allocated;
bool initialized;
bool online;
- unsigned long alloc_map[IRQ_MATRIX_SIZE];
- unsigned long managed_map[IRQ_MATRIX_SIZE];
+ unsigned long *managed_map;
+ unsigned long alloc_map[];
};
struct irq_matrix {
@@ -32,8 +30,8 @@ struct irq_matrix {
unsigned int total_allocated;
unsigned int online_maps;
struct cpumap __percpu *maps;
- unsigned long scratch_map[IRQ_MATRIX_SIZE];
- unsigned long system_map[IRQ_MATRIX_SIZE];
+ unsigned long *system_map;
+ unsigned long scratch_map[];
};
#define CREATE_TRACE_POINTS
@@ -50,24 +48,32 @@ __init struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits,
unsigned int alloc_start,
unsigned int alloc_end)
{
+ unsigned int cpu, matrix_size = BITS_TO_LONGS(matrix_bits);
struct irq_matrix *m;
- if (matrix_bits > IRQ_MATRIX_BITS)
- return NULL;
-
- m = kzalloc(sizeof(*m), GFP_KERNEL);
+ m = kzalloc(struct_size(m, scratch_map, matrix_size * 2), GFP_KERNEL);
if (!m)
return NULL;
+ m->system_map = &m->scratch_map[matrix_size];
+
m->matrix_bits = matrix_bits;
m->alloc_start = alloc_start;
m->alloc_end = alloc_end;
m->alloc_size = alloc_end - alloc_start;
- m->maps = alloc_percpu(*m->maps);
+ m->maps = __alloc_percpu(struct_size(m->maps, alloc_map, matrix_size * 2),
+ __alignof__(*m->maps));
if (!m->maps) {
kfree(m);
return NULL;
}
+
+ for_each_possible_cpu(cpu) {
+ struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
+
+ cm->managed_map = &cm->alloc_map[matrix_size];
+ }
+
return m;
}
@@ -280,12 +286,13 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
/**
* irq_matrix_alloc_managed - Allocate a managed interrupt in a CPU map
* @m: Matrix pointer
- * @cpu: On which CPU the interrupt should be allocated
+ * @msk: Which CPUs to search in
+ * @mapped_cpu: Pointer to store the CPU for which the irq was allocated
*/
int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
unsigned int *mapped_cpu)
{
- unsigned int bit, cpu, end = m->alloc_end;
+ unsigned int bit, cpu, end;
struct cpumap *cm;
if (cpumask_empty(msk))
@@ -337,15 +344,14 @@ void irq_matrix_assign(struct irq_matrix *m, unsigned int bit)
* irq_matrix_reserve - Reserve interrupts
* @m: Matrix pointer
*
- * This is merily a book keeping call. It increments the number of globally
+ * This is merely a book keeping call. It increments the number of globally
* reserved interrupt bits w/o actually allocating them. This allows to
* setup interrupt descriptors w/o assigning low level resources to it.
* The actual allocation happens when the interrupt gets activated.
*/
void irq_matrix_reserve(struct irq_matrix *m)
{
- if (m->global_reserved <= m->global_available &&
- m->global_reserved + 1 > m->global_available)
+ if (m->global_reserved == m->global_available)
pr_warn("Interrupt reservation exceeds available resources\n");
m->global_reserved++;
@@ -356,7 +362,7 @@ void irq_matrix_reserve(struct irq_matrix *m)
* irq_matrix_remove_reserved - Remove interrupt reservation
* @m: Matrix pointer
*
- * This is merily a book keeping call. It decrements the number of globally
+ * This is merely a book keeping call. It decrements the number of globally
* reserved interrupt bits. This is used to undo irq_matrix_reserve() when the
* interrupt was never in use and a real vector allocated, which undid the
* reservation.
@@ -423,7 +429,9 @@ void irq_matrix_free(struct irq_matrix *m, unsigned int cpu,
if (WARN_ON_ONCE(bit < m->alloc_start || bit >= m->alloc_end))
return;
- clear_bit(bit, cm->alloc_map);
+ if (WARN_ON_ONCE(!test_and_clear_bit(bit, cm->alloc_map)))
+ return;
+
cm->allocated--;
if(managed)
cm->managed_allocated--;
@@ -464,16 +472,16 @@ unsigned int irq_matrix_reserved(struct irq_matrix *m)
}
/**
- * irq_matrix_allocated - Get the number of allocated irqs on the local cpu
+ * irq_matrix_allocated - Get the number of allocated non-managed irqs on the local CPU
* @m: Pointer to the matrix to search
*
- * This returns number of allocated irqs
+ * This returns number of allocated non-managed interrupts.
*/
unsigned int irq_matrix_allocated(struct irq_matrix *m)
{
struct cpumap *cm = this_cpu_ptr(m->maps);
- return cm->allocated;
+ return cm->allocated - cm->managed_allocated;
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index def48589ea48..eb150afd671f 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -7,7 +7,7 @@
/**
* irq_fixup_move_pending - Cleanup irq move pending from a dying CPU
- * @desc: Interrupt descpriptor to clean up
+ * @desc: Interrupt descriptor to clean up
* @force_clear: If set clear the move pending bit unconditionally.
* If not set, clear it only when the dying CPU is the
* last one in the pending mask.
@@ -26,7 +26,7 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
* The outgoing CPU might be the last online target in a pending
* interrupt move. If that's the case clear the pending move bit.
*/
- if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) {
+ if (!cpumask_intersects(desc->pending_mask, cpu_online_mask)) {
irqd_clr_move_pending(data);
return false;
}
@@ -74,7 +74,7 @@ void irq_move_masked_irq(struct irq_data *idata)
* For correct operation this depends on the caller
* masking the irqs.
*/
- if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) {
+ if (cpumask_intersects(desc->pending_mask, cpu_online_mask)) {
int ret;
ret = irq_do_set_affinity(data, desc->pending_mask, false);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index b338d622f26e..396a067a8a56 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -5,55 +5,268 @@
*
* This file is licensed under GPLv2.
*
- * This file contains common code to support Message Signalled Interrupt for
+ * This file contains common code to support Message Signaled Interrupts for
* PCI compatible and non PCI compatible devices.
*/
-#include <linux/types.h>
#include <linux/device.h>
#include <linux/irq.h>
#include <linux/irqdomain.h>
#include <linux/msi.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
#include "internals.h"
/**
- * alloc_msi_entry - Allocate an initialize msi_entry
+ * struct msi_device_data - MSI per device data
+ * @properties: MSI properties which are interesting to drivers
+ * @mutex: Mutex protecting the MSI descriptor store
+ * @__domains: Internal data for per device MSI domains
+ * @__iter_idx: Index to search the next entry for iterators
+ */
+struct msi_device_data {
+ unsigned long properties;
+ struct mutex mutex;
+ struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS];
+ unsigned long __iter_idx;
+};
+
+/**
+ * struct msi_ctrl - MSI internal management control structure
+ * @domid: ID of the domain on which management operations should be done
+ * @first: First (hardware) slot index to operate on
+ * @last: Last (hardware) slot index to operate on
+ * @nirqs: The number of Linux interrupts to allocate. Can be larger
+ * than the range due to PCI/multi-MSI.
+ */
+struct msi_ctrl {
+ unsigned int domid;
+ unsigned int first;
+ unsigned int last;
+ unsigned int nirqs;
+};
+
+/* Invalid Xarray index which is outside of any searchable range */
+#define MSI_XA_MAX_INDEX (ULONG_MAX - 1)
+/* The maximum domain size */
+#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1)
+
+static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
+static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
+static inline int msi_sysfs_create_group(struct device *dev);
+
+
+/**
+ * msi_alloc_desc - Allocate an initialized msi_desc
* @dev: Pointer to the device for which this is allocated
* @nvec: The number of vectors used in this entry
* @affinity: Optional pointer to an affinity mask array size of @nvec
*
- * If @affinity is not NULL then an affinity array[@nvec] is allocated
+ * If @affinity is not %NULL then an affinity array[@nvec] is allocated
* and the affinity masks and flags from @affinity are copied.
+ *
+ * Return: pointer to allocated &msi_desc on success or %NULL on failure
*/
-struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
- const struct irq_affinity_desc *affinity)
+static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
+ const struct irq_affinity_desc *affinity)
{
- struct msi_desc *desc;
+ struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
- desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc)
return NULL;
- INIT_LIST_HEAD(&desc->list);
desc->dev = dev;
desc->nvec_used = nvec;
if (affinity) {
- desc->affinity = kmemdup(affinity,
- nvec * sizeof(*desc->affinity), GFP_KERNEL);
+ desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL);
if (!desc->affinity) {
kfree(desc);
return NULL;
}
}
-
return desc;
}
-void free_msi_entry(struct msi_desc *entry)
+static void msi_free_desc(struct msi_desc *desc)
{
- kfree(entry->affinity);
- kfree(entry);
+ kfree(desc->affinity);
+ kfree(desc);
+}
+
+static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
+ unsigned int domid, unsigned int index)
+{
+ struct msi_device_data *md = dev->msi.data;
+ struct xarray *xa = &md->__domains[domid].store;
+ unsigned int hwsize;
+ int ret;
+
+ hwsize = msi_domain_get_hwsize(dev, domid);
+
+ if (index == MSI_ANY_INDEX) {
+ struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
+ unsigned int index;
+
+ /* Let the xarray allocate a free index within the limit */
+ ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
+ if (ret)
+ goto fail;
+
+ desc->msi_index = index;
+ return 0;
+ } else {
+ if (index >= hwsize) {
+ ret = -ERANGE;
+ goto fail;
+ }
+
+ desc->msi_index = index;
+ ret = xa_insert(xa, index, desc, GFP_KERNEL);
+ if (ret)
+ goto fail;
+ return 0;
+ }
+fail:
+ msi_free_desc(desc);
+ return ret;
+}
+
+/**
+ * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
+ * insert it at @init_desc->msi_index
+ *
+ * @dev: Pointer to the device for which the descriptor is allocated
+ * @domid: The id of the interrupt domain to which the desriptor is added
+ * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor
+ *
+ * Return: 0 on success or an appropriate failure code.
+ */
+int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
+ struct msi_desc *init_desc)
+{
+ struct msi_desc *desc;
+
+ lockdep_assert_held(&dev->msi.data->mutex);
+
+ desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
+ if (!desc)
+ return -ENOMEM;
+
+ /* Copy type specific data to the new descriptor. */
+ desc->pci = init_desc->pci;
+
+ return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
+}
+
+static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
+{
+ switch (filter) {
+ case MSI_DESC_ALL:
+ return true;
+ case MSI_DESC_NOTASSOCIATED:
+ return !desc->irq;
+ case MSI_DESC_ASSOCIATED:
+ return !!desc->irq;
+ }
+ WARN_ON_ONCE(1);
+ return false;
+}
+
+static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
+{
+ unsigned int hwsize;
+
+ if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
+ (dev->msi.domain &&
+ !dev->msi.data->__domains[ctrl->domid].domain)))
+ return false;
+
+ hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
+ if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
+ ctrl->first >= hwsize ||
+ ctrl->last >= hwsize))
+ return false;
+ return true;
+}
+
+static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
+{
+ struct msi_desc *desc;
+ struct xarray *xa;
+ unsigned long idx;
+
+ lockdep_assert_held(&dev->msi.data->mutex);
+
+ if (!msi_ctrl_valid(dev, ctrl))
+ return;
+
+ xa = &dev->msi.data->__domains[ctrl->domid].store;
+ xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
+ xa_erase(xa, idx);
+
+ /* Leak the descriptor when it is still referenced */
+ if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
+ continue;
+ msi_free_desc(desc);
+ }
+}
+
+/**
+ * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
+ * @dev: Device for which to free the descriptors
+ * @domid: Id of the domain to operate on
+ * @first: Index to start freeing from (inclusive)
+ * @last: Last index to be freed (inclusive)
+ */
+void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
+{
+ struct msi_ctrl ctrl = {
+ .domid = domid,
+ .first = first,
+ .last = last,
+ };
+
+ msi_domain_free_descs(dev, &ctrl);
+}
+
+/**
+ * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
+ * @dev: Pointer to the device for which the descriptors are allocated
+ * @ctrl: Allocation control struct
+ *
+ * Return: 0 on success or an appropriate failure code.
+ */
+static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
+{
+ struct msi_desc *desc;
+ unsigned int idx;
+ int ret;
+
+ lockdep_assert_held(&dev->msi.data->mutex);
+
+ if (!msi_ctrl_valid(dev, ctrl))
+ return -EINVAL;
+
+ for (idx = ctrl->first; idx <= ctrl->last; idx++) {
+ desc = msi_alloc_desc(dev, 1, NULL);
+ if (!desc)
+ goto fail_mem;
+ ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
+ if (ret)
+ goto fail;
+ }
+ return 0;
+
+fail_mem:
+ ret = -ENOMEM;
+fail:
+ msi_domain_free_descs(dev, ctrl);
+ return ret;
}
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
@@ -69,7 +282,354 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
}
EXPORT_SYMBOL_GPL(get_cached_msi_msg);
-#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static void msi_device_data_release(struct device *dev, void *res)
+{
+ struct msi_device_data *md = res;
+ int i;
+
+ for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
+ msi_remove_device_irq_domain(dev, i);
+ WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
+ xa_destroy(&md->__domains[i].store);
+ }
+ dev->msi.data = NULL;
+}
+
+/**
+ * msi_setup_device_data - Setup MSI device data
+ * @dev: Device for which MSI device data should be set up
+ *
+ * Return: 0 on success, appropriate error code otherwise
+ *
+ * This can be called more than once for @dev. If the MSI device data is
+ * already allocated the call succeeds. The allocated memory is
+ * automatically released when the device is destroyed.
+ */
+int msi_setup_device_data(struct device *dev)
+{
+ struct msi_device_data *md;
+ int ret, i;
+
+ if (dev->msi.data)
+ return 0;
+
+ md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
+ if (!md)
+ return -ENOMEM;
+
+ ret = msi_sysfs_create_group(dev);
+ if (ret) {
+ devres_free(md);
+ return ret;
+ }
+
+ for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
+ xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
+
+ /*
+ * If @dev::msi::domain is set and is a global MSI domain, copy the
+ * pointer into the domain array so all code can operate on domain
+ * ids. The NULL pointer check is required to keep the legacy
+ * architecture specific PCI/MSI support working.
+ */
+ if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
+ md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
+
+ mutex_init(&md->mutex);
+ dev->msi.data = md;
+ devres_add(dev, md);
+ return 0;
+}
+
+/**
+ * msi_lock_descs - Lock the MSI descriptor storage of a device
+ * @dev: Device to operate on
+ */
+void msi_lock_descs(struct device *dev)
+{
+ mutex_lock(&dev->msi.data->mutex);
+}
+EXPORT_SYMBOL_GPL(msi_lock_descs);
+
+/**
+ * msi_unlock_descs - Unlock the MSI descriptor storage of a device
+ * @dev: Device to operate on
+ */
+void msi_unlock_descs(struct device *dev)
+{
+ /* Invalidate the index which was cached by the iterator */
+ dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
+ mutex_unlock(&dev->msi.data->mutex);
+}
+EXPORT_SYMBOL_GPL(msi_unlock_descs);
+
+static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
+ enum msi_desc_filter filter)
+{
+ struct xarray *xa = &md->__domains[domid].store;
+ struct msi_desc *desc;
+
+ xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
+ if (msi_desc_match(desc, filter))
+ return desc;
+ }
+ md->__iter_idx = MSI_XA_MAX_INDEX;
+ return NULL;
+}
+
+/**
+ * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
+ * @dev: Device to operate on
+ * @domid: The id of the interrupt domain which should be walked.
+ * @filter: Descriptor state filter
+ *
+ * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
+ * must be invoked before the call.
+ *
+ * Return: Pointer to the first MSI descriptor matching the search
+ * criteria, NULL if none found.
+ */
+struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
+ enum msi_desc_filter filter)
+{
+ struct msi_device_data *md = dev->msi.data;
+
+ if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
+ return NULL;
+
+ lockdep_assert_held(&md->mutex);
+
+ md->__iter_idx = 0;
+ return msi_find_desc(md, domid, filter);
+}
+EXPORT_SYMBOL_GPL(msi_domain_first_desc);
+
+/**
+ * msi_next_desc - Get the next MSI descriptor of a device
+ * @dev: Device to operate on
+ * @domid: The id of the interrupt domain which should be walked.
+ * @filter: Descriptor state filter
+ *
+ * The first invocation of msi_next_desc() has to be preceeded by a
+ * successful invocation of __msi_first_desc(). Consecutive invocations are
+ * only valid if the previous one was successful. All these operations have
+ * to be done within the same MSI mutex held region.
+ *
+ * Return: Pointer to the next MSI descriptor matching the search
+ * criteria, NULL if none found.
+ */
+struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
+ enum msi_desc_filter filter)
+{
+ struct msi_device_data *md = dev->msi.data;
+
+ if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
+ return NULL;
+
+ lockdep_assert_held(&md->mutex);
+
+ if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
+ return NULL;
+
+ md->__iter_idx++;
+ return msi_find_desc(md, domid, filter);
+}
+EXPORT_SYMBOL_GPL(msi_next_desc);
+
+/**
+ * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
+ * @dev: Device to operate on
+ * @domid: Domain ID of the interrupt domain associated to the device
+ * @index: MSI interrupt index to look for (0-based)
+ *
+ * Return: The Linux interrupt number on success (> 0), 0 if not found
+ */
+unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
+{
+ struct msi_desc *desc;
+ unsigned int ret = 0;
+ bool pcimsi = false;
+ struct xarray *xa;
+
+ if (!dev->msi.data)
+ return 0;
+
+ if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
+ return 0;
+
+ /* This check is only valid for the PCI default MSI domain */
+ if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
+ pcimsi = to_pci_dev(dev)->msi_enabled;
+
+ msi_lock_descs(dev);
+ xa = &dev->msi.data->__domains[domid].store;
+ desc = xa_load(xa, pcimsi ? 0 : index);
+ if (desc && desc->irq) {
+ /*
+ * PCI-MSI has only one descriptor for multiple interrupts.
+ * PCI-MSIX and platform MSI use a descriptor per
+ * interrupt.
+ */
+ if (pcimsi) {
+ if (index < desc->nvec_used)
+ ret = desc->irq + index;
+ } else {
+ ret = desc->irq;
+ }
+ }
+
+ msi_unlock_descs(dev);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(msi_domain_get_virq);
+
+#ifdef CONFIG_SYSFS
+static struct attribute *msi_dev_attrs[] = {
+ NULL
+};
+
+static const struct attribute_group msi_irqs_group = {
+ .name = "msi_irqs",
+ .attrs = msi_dev_attrs,
+};
+
+static inline int msi_sysfs_create_group(struct device *dev)
+{
+ return devm_device_add_group(dev, &msi_irqs_group);
+}
+
+static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ /* MSI vs. MSIX is per device not per interrupt */
+ bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
+
+ return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
+}
+
+static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
+{
+ struct device_attribute *attrs = desc->sysfs_attrs;
+ int i;
+
+ if (!attrs)
+ return;
+
+ desc->sysfs_attrs = NULL;
+ for (i = 0; i < desc->nvec_used; i++) {
+ if (attrs[i].show)
+ sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
+ kfree(attrs[i].attr.name);
+ }
+ kfree(attrs);
+}
+
+static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
+{
+ struct device_attribute *attrs;
+ int ret, i;
+
+ attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ desc->sysfs_attrs = attrs;
+ for (i = 0; i < desc->nvec_used; i++) {
+ sysfs_attr_init(&attrs[i].attr);
+ attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
+ if (!attrs[i].attr.name) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ attrs[i].attr.mode = 0444;
+ attrs[i].show = msi_mode_show;
+
+ ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
+ if (ret) {
+ attrs[i].show = NULL;
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ msi_sysfs_remove_desc(dev, desc);
+ return ret;
+}
+
+#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
+/**
+ * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
+ * @dev: The device (PCI, platform etc) which will get sysfs entries
+ */
+int msi_device_populate_sysfs(struct device *dev)
+{
+ struct msi_desc *desc;
+ int ret;
+
+ msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
+ if (desc->sysfs_attrs)
+ continue;
+ ret = msi_sysfs_populate_desc(dev, desc);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
+ * @dev: The device (PCI, platform etc) for which to remove
+ * sysfs entries
+ */
+void msi_device_destroy_sysfs(struct device *dev)
+{
+ struct msi_desc *desc;
+
+ msi_for_each_desc(desc, dev, MSI_DESC_ALL)
+ msi_sysfs_remove_desc(dev, desc);
+}
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
+#else /* CONFIG_SYSFS */
+static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
+static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
+static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
+#endif /* !CONFIG_SYSFS */
+
+static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
+{
+ struct irq_domain *domain;
+
+ lockdep_assert_held(&dev->msi.data->mutex);
+
+ if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
+ return NULL;
+
+ domain = dev->msi.data->__domains[domid].domain;
+ if (!domain)
+ return NULL;
+
+ if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
+ return NULL;
+
+ return domain;
+}
+
+static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
+{
+ struct msi_domain_info *info;
+ struct irq_domain *domain;
+
+ domain = msi_get_device_domain(dev, domid);
+ if (domain) {
+ info = domain->host_data;
+ return info->hwsize;
+ }
+ /* No domain, default to MSI_XA_DOMAIN_SIZE */
+ return MSI_XA_DOMAIN_SIZE;
+}
+
static inline void irq_chip_write_msi_msg(struct irq_data *data,
struct msi_msg *msg)
{
@@ -97,6 +657,8 @@ static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
*
* Intended to be used by MSI interrupt controllers which are
* implemented with hierarchical domains.
+ *
+ * Return: IRQ_SET_MASK_* result code
*/
int msi_domain_set_affinity(struct irq_data *irq_data,
const struct cpumask *mask, bool force)
@@ -156,7 +718,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
if (ret < 0) {
if (ops->msi_free) {
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
ops->msi_free(domain, info, virq + i);
}
irq_domain_free_irqs_top(domain, virq, nr_irqs);
@@ -180,11 +742,26 @@ static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
+static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
+ irq_hw_number_t *hwirq, unsigned int *type)
+{
+ struct msi_domain_info *info = domain->host_data;
+
+ /*
+ * This will catch allocations through the regular irqdomain path except
+ * for MSI domains which really support this, e.g. MBIGEN.
+ */
+ if (!info->ops->msi_translate)
+ return -ENOTSUPP;
+ return info->ops->msi_translate(domain, fwspec, hwirq, type);
+}
+
static const struct irq_domain_ops msi_domain_ops = {
.alloc = msi_domain_alloc,
.free = msi_domain_free,
.activate = msi_domain_activate,
.deactivate = msi_domain_deactivate,
+ .translate = msi_domain_translate,
};
static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
@@ -221,21 +798,11 @@ static int msi_domain_ops_init(struct irq_domain *domain,
return 0;
}
-static int msi_domain_ops_check(struct irq_domain *domain,
- struct msi_domain_info *info,
- struct device *dev)
-{
- return 0;
-}
-
static struct msi_domain_ops msi_domain_ops_default = {
.get_hwirq = msi_domain_ops_get_hwirq,
.msi_init = msi_domain_ops_init,
- .msi_check = msi_domain_ops_check,
.msi_prepare = msi_domain_ops_prepare,
.set_desc = msi_domain_ops_set_desc,
- .domain_alloc_irqs = __msi_domain_alloc_irqs,
- .domain_free_irqs = __msi_domain_free_irqs,
};
static void msi_domain_update_dom_ops(struct msi_domain_info *info)
@@ -247,11 +814,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info)
return;
}
- if (ops->domain_alloc_irqs == NULL)
- ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs;
- if (ops->domain_free_irqs == NULL)
- ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs;
-
if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
return;
@@ -259,8 +821,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info)
ops->get_hwirq = msi_domain_ops_default.get_hwirq;
if (ops->msi_init == NULL)
ops->msi_init = msi_domain_ops_default.msi_init;
- if (ops->msi_check == NULL)
- ops->msi_check = msi_domain_ops_default.msi_check;
if (ops->msi_prepare == NULL)
ops->msi_prepare = msi_domain_ops_default.msi_prepare;
if (ops->set_desc == NULL)
@@ -272,90 +832,287 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info)
struct irq_chip *chip = info->chip;
BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
- if (!chip->irq_set_affinity)
+ if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
chip->irq_set_affinity = msi_domain_set_affinity;
}
+static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
+ struct msi_domain_info *info,
+ unsigned int flags,
+ struct irq_domain *parent)
+{
+ struct irq_domain *domain;
+
+ if (info->hwsize > MSI_XA_DOMAIN_SIZE)
+ return NULL;
+
+ /*
+ * Hardware size 0 is valid for backwards compatibility and for
+ * domains which are not backed by a hardware table. Grant the
+ * maximum index space.
+ */
+ if (!info->hwsize)
+ info->hwsize = MSI_XA_DOMAIN_SIZE;
+
+ msi_domain_update_dom_ops(info);
+ if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+ msi_domain_update_chip_ops(info);
+
+ domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
+ fwnode, &msi_domain_ops, info);
+
+ if (domain) {
+ irq_domain_update_bus_token(domain, info->bus_token);
+ if (info->flags & MSI_FLAG_PARENT_PM_DEV)
+ domain->pm_dev = parent->pm_dev;
+ }
+
+ return domain;
+}
+
/**
- * msi_create_irq_domain - Create a MSI interrupt domain
+ * msi_create_irq_domain - Create an MSI interrupt domain
* @fwnode: Optional fwnode of the interrupt controller
* @info: MSI domain info
* @parent: Parent irq domain
+ *
+ * Return: pointer to the created &struct irq_domain or %NULL on failure
*/
struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
struct msi_domain_info *info,
struct irq_domain *parent)
{
- struct irq_domain *domain;
-
- msi_domain_update_dom_ops(info);
- if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
- msi_domain_update_chip_ops(info);
+ return __msi_create_irq_domain(fwnode, info, 0, parent);
+}
- domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
- fwnode, &msi_domain_ops, info);
+/**
+ * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
+ * in the domain hierarchy
+ * @dev: The device for which the domain should be created
+ * @domain: The domain in the hierarchy this op is being called on
+ * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
+ * be created
+ * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
+ * domain to be created
+ *
+ * Return: true on success, false otherwise
+ *
+ * This is the most complex problem of per device MSI domains and the
+ * underlying interrupt domain hierarchy:
+ *
+ * The device domain to be initialized requests the broadest feature set
+ * possible and the underlying domain hierarchy puts restrictions on it.
+ *
+ * That's trivial for a simple parent->child relationship, but it gets
+ * interesting with an intermediate domain: root->parent->child. The
+ * intermediate 'parent' can expand the capabilities which the 'root'
+ * domain is providing. So that creates a classic hen and egg problem:
+ * Which entity is doing the restrictions/expansions?
+ *
+ * One solution is to let the root domain handle the initialization that's
+ * why there is the @domain and the @msi_parent_domain pointer.
+ */
+bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *msi_parent_domain,
+ struct msi_domain_info *msi_child_info)
+{
+ struct irq_domain *parent = domain->parent;
- if (domain && !domain->name && info->chip)
- domain->name = info->chip->name;
+ if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
+ !parent->msi_parent_ops->init_dev_msi_info))
+ return false;
- return domain;
+ return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
+ msi_child_info);
}
-int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
- int nvec, msi_alloc_info_t *arg)
+/**
+ * msi_create_device_irq_domain - Create a device MSI interrupt domain
+ * @dev: Pointer to the device
+ * @domid: Domain id
+ * @template: MSI domain info bundle used as template
+ * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited)
+ * @domain_data: Optional pointer to domain specific data which is set in
+ * msi_domain_info::data
+ * @chip_data: Optional pointer to chip specific data which is set in
+ * msi_domain_info::chip_data
+ *
+ * Return: True on success, false otherwise
+ *
+ * There is no firmware node required for this interface because the per
+ * device domains are software constructs which are actually closer to the
+ * hardware reality than any firmware can describe them.
+ *
+ * The domain name and the irq chip name for a MSI device domain are
+ * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
+ *
+ * $PREFIX: Optional prefix provided by the underlying MSI parent domain
+ * via msi_parent_ops::prefix. If that pointer is NULL the prefix
+ * is empty.
+ * $CHIPNAME: The name of the irq_chip in @template
+ * $DEVNAME: The name of the device
+ *
+ * This results in understandable chip names and hardware interrupt numbers
+ * in e.g. /proc/interrupts
+ *
+ * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix
+ * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-'
+ *
+ * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect
+ * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device
+ * IR-PCI-MSIX-0000:3d:00.0 2-edge
+ *
+ * On IMS domains the hardware interrupt number is either a table entry
+ * index or a purely software managed index but it is guaranteed to be
+ * unique.
+ *
+ * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
+ * subsequent operations on the domain depend on the domain id.
+ *
+ * The domain is automatically freed when the device is removed via devres
+ * in the context of @dev::msi::data freeing, but it can also be
+ * independently removed via @msi_remove_device_irq_domain().
+ */
+bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
+ const struct msi_domain_template *template,
+ unsigned int hwsize, void *domain_data,
+ void *chip_data)
{
- struct msi_domain_info *info = domain->host_data;
- struct msi_domain_ops *ops = info->ops;
- int ret;
+ struct irq_domain *domain, *parent = dev->msi.domain;
+ struct fwnode_handle *fwnode, *fwnalloced = NULL;
+ struct msi_domain_template *bundle;
+ const struct msi_parent_ops *pops;
- ret = ops->msi_check(domain, info, dev);
- if (ret == 0)
- ret = ops->msi_prepare(domain, dev, nvec, arg);
+ if (!irq_domain_is_msi_parent(parent))
+ return false;
- return ret;
+ if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
+ return false;
+
+ bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
+ if (!bundle)
+ return false;
+
+ bundle->info.hwsize = hwsize;
+ bundle->info.chip = &bundle->chip;
+ bundle->info.ops = &bundle->ops;
+ bundle->info.data = domain_data;
+ bundle->info.chip_data = chip_data;
+
+ pops = parent->msi_parent_ops;
+ snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
+ pops->prefix ? : "", bundle->chip.name, dev_name(dev));
+ bundle->chip.name = bundle->name;
+
+ /*
+ * Using the device firmware node is required for wire to MSI
+ * device domains so that the existing firmware results in a domain
+ * match.
+ * All other device domains like PCI/MSI use the named firmware
+ * node as they are not guaranteed to have a fwnode. They are never
+ * looked up and always handled in the context of the device.
+ */
+ if (bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE)
+ fwnode = dev->fwnode;
+ else
+ fwnode = fwnalloced = irq_domain_alloc_named_fwnode(bundle->name);
+
+ if (!fwnode)
+ goto free_bundle;
+
+ if (msi_setup_device_data(dev))
+ goto free_fwnode;
+
+ msi_lock_descs(dev);
+
+ if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
+ goto fail;
+
+ if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
+ goto fail;
+
+ domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
+ if (!domain)
+ goto fail;
+
+ domain->dev = dev;
+ dev->msi.data->__domains[domid].domain = domain;
+ msi_unlock_descs(dev);
+ return true;
+
+fail:
+ msi_unlock_descs(dev);
+free_fwnode:
+ irq_domain_free_fwnode(fwnalloced);
+free_bundle:
+ kfree(bundle);
+ return false;
}
-int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
- int virq, int nvec, msi_alloc_info_t *arg)
+/**
+ * msi_remove_device_irq_domain - Free a device MSI interrupt domain
+ * @dev: Pointer to the device
+ * @domid: Domain id
+ */
+void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
{
- struct msi_domain_info *info = domain->host_data;
- struct msi_domain_ops *ops = info->ops;
- struct msi_desc *desc;
- int ret = 0;
+ struct fwnode_handle *fwnode = NULL;
+ struct msi_domain_info *info;
+ struct irq_domain *domain;
- for_each_msi_entry(desc, dev) {
- /* Don't even try the multi-MSI brain damage. */
- if (WARN_ON(!desc->irq || desc->nvec_used != 1)) {
- ret = -EINVAL;
- break;
- }
+ msi_lock_descs(dev);
- if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
- continue;
+ domain = msi_get_device_domain(dev, domid);
- ops->set_desc(arg, desc);
- /* Assumes the domain mutex is held! */
- ret = irq_domain_alloc_irqs_hierarchy(domain, desc->irq, 1,
- arg);
- if (ret)
- break;
+ if (!domain || !irq_domain_is_msi_device(domain))
+ goto unlock;
- irq_set_msi_desc_off(desc->irq, 0, desc);
- }
+ dev->msi.data->__domains[domid].domain = NULL;
+ info = domain->host_data;
+ if (irq_domain_is_msi_device(domain))
+ fwnode = domain->fwnode;
+ irq_domain_remove(domain);
+ irq_domain_free_fwnode(fwnode);
+ kfree(container_of(info, struct msi_domain_template, info));
- if (ret) {
- /* Mop up the damage */
- for_each_msi_entry(desc, dev) {
- if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
- continue;
+unlock:
+ msi_unlock_descs(dev);
+}
- irq_domain_free_irqs_common(domain, desc->irq, 1);
- }
- }
+/**
+ * msi_match_device_irq_domain - Match a device irq domain against a bus token
+ * @dev: Pointer to the device
+ * @domid: Domain id
+ * @bus_token: Bus token to match against the domain bus token
+ *
+ * Return: True if device domain exists and bus tokens match.
+ */
+bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
+ enum irq_domain_bus_token bus_token)
+{
+ struct msi_domain_info *info;
+ struct irq_domain *domain;
+ bool ret = false;
+ msi_lock_descs(dev);
+ domain = msi_get_device_domain(dev, domid);
+ if (domain && irq_domain_is_msi_device(domain)) {
+ info = domain->host_data;
+ ret = info->bus_token == bus_token;
+ }
+ msi_unlock_descs(dev);
return ret;
}
+static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct msi_domain_info *info = domain->host_data;
+ struct msi_domain_ops *ops = info->ops;
+
+ return ops->msi_prepare(domain, dev, nvec, arg);
+}
+
/*
* Carefully check whether the device can use reservation mode. If
* reservation mode is enabled then the early activation will assign a
@@ -375,6 +1132,8 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
switch(domain->bus_token) {
case DOMAIN_BUS_PCI_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
case DOMAIN_BUS_VMD_MSI:
break;
default:
@@ -389,161 +1148,575 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
/*
* Checking the first MSI descriptor is sufficient. MSIX supports
- * masking and MSI does so when the maskbit is set.
+ * masking and MSI does so when the can_mask attribute is set.
+ */
+ desc = msi_first_desc(dev, MSI_DESC_ALL);
+ return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
+}
+
+static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
+ int allocated)
+{
+ switch(domain->bus_token) {
+ case DOMAIN_BUS_PCI_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ case DOMAIN_BUS_VMD_MSI:
+ if (IS_ENABLED(CONFIG_PCI_MSI))
+ break;
+ fallthrough;
+ default:
+ return -ENOSPC;
+ }
+
+ /* Let a failed PCI multi MSI allocation retry */
+ if (desc->nvec_used > 1)
+ return 1;
+
+ /* If there was a successful allocation let the caller know */
+ return allocated ? allocated : -ENOSPC;
+}
+
+#define VIRQ_CAN_RESERVE 0x01
+#define VIRQ_ACTIVATE 0x02
+
+static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
+{
+ struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
+ int ret;
+
+ if (!(vflags & VIRQ_CAN_RESERVE)) {
+ irqd_clr_can_reserve(irqd);
+
+ /*
+ * If the interrupt is managed but no CPU is available to
+ * service it, shut it down until better times. Note that
+ * we only do this on the !RESERVE path as x86 (the only
+ * architecture using this flag) deals with this in a
+ * different way by using a catch-all vector.
+ */
+ if ((vflags & VIRQ_ACTIVATE) &&
+ irqd_affinity_is_managed(irqd) &&
+ !cpumask_intersects(irq_data_get_affinity_mask(irqd),
+ cpu_online_mask)) {
+ irqd_set_managed_shutdown(irqd);
+ return 0;
+ }
+ }
+
+ if (!(vflags & VIRQ_ACTIVATE))
+ return 0;
+
+ ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
+ if (ret)
+ return ret;
+ /*
+ * If the interrupt uses reservation mode, clear the activated bit
+ * so request_irq() will assign the final vector.
*/
- desc = first_msi_entry(dev);
- return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+ if (vflags & VIRQ_CAN_RESERVE)
+ irqd_clr_activated(irqd);
+ return 0;
}
-int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
- int nvec)
+static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
+ struct msi_ctrl *ctrl)
{
+ struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
struct msi_domain_info *info = domain->host_data;
struct msi_domain_ops *ops = info->ops;
- struct irq_data *irq_data;
- struct msi_desc *desc;
+ unsigned int vflags = 0, allocated = 0;
msi_alloc_info_t arg = { };
+ struct msi_desc *desc;
+ unsigned long idx;
int i, ret, virq;
- bool can_reserve;
- ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
+ ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
if (ret)
return ret;
- for_each_msi_entry(desc, dev) {
+ /*
+ * This flag is set by the PCI layer as we need to activate
+ * the MSI entries before the PCI layer enables MSI in the
+ * card. Otherwise the card latches a random msi message.
+ */
+ if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
+ vflags |= VIRQ_ACTIVATE;
+
+ /*
+ * Interrupt can use a reserved vector and will not occupy
+ * a real device vector until the interrupt is requested.
+ */
+ if (msi_check_reservation_mode(domain, info, dev))
+ vflags |= VIRQ_CAN_RESERVE;
+
+ xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
+ if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
+ continue;
+
+ /* This should return -ECONFUSED... */
+ if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
+ return -EINVAL;
+
+ if (ops->prepare_desc)
+ ops->prepare_desc(domain, &arg, desc);
+
ops->set_desc(&arg, desc);
virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
dev_to_node(dev), &arg, false,
desc->affinity);
- if (virq < 0) {
- ret = -ENOSPC;
- if (ops->handle_error)
- ret = ops->handle_error(domain, desc, ret);
- if (ops->msi_finish)
- ops->msi_finish(&arg, ret);
- return ret;
- }
+ if (virq < 0)
+ return msi_handle_pci_fail(domain, desc, allocated);
for (i = 0; i < desc->nvec_used; i++) {
irq_set_msi_desc_off(virq, i, desc);
irq_debugfs_copy_devname(virq + i, dev);
+ ret = msi_init_virq(domain, virq + i, vflags);
+ if (ret)
+ return ret;
+ }
+ if (info->flags & MSI_FLAG_DEV_SYSFS) {
+ ret = msi_sysfs_populate_desc(dev, desc);
+ if (ret)
+ return ret;
}
+ allocated++;
}
+ return 0;
+}
- if (ops->msi_finish)
- ops->msi_finish(&arg, 0);
+static int msi_domain_alloc_simple_msi_descs(struct device *dev,
+ struct msi_domain_info *info,
+ struct msi_ctrl *ctrl)
+{
+ if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
+ return 0;
- can_reserve = msi_check_reservation_mode(domain, info, dev);
+ return msi_domain_add_simple_msi_descs(dev, ctrl);
+}
- /*
- * This flag is set by the PCI layer as we need to activate
- * the MSI entries before the PCI layer enables MSI in the
- * card. Otherwise the card latches a random msi message.
- */
- if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
- goto skip_activate;
-
- for_each_msi_vector(desc, i, dev) {
- if (desc->irq == i) {
- virq = desc->irq;
- dev_dbg(dev, "irq [%d-%d] for MSI\n",
- virq, virq + desc->nvec_used - 1);
- }
+static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
+{
+ struct msi_domain_info *info;
+ struct msi_domain_ops *ops;
+ struct irq_domain *domain;
+ int ret;
- irq_data = irq_domain_get_irq_data(domain, i);
- if (!can_reserve) {
- irqd_clr_can_reserve(irq_data);
- if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
- irqd_set_msi_nomask_quirk(irq_data);
- }
- ret = irq_domain_activate_irq(irq_data, can_reserve);
- if (ret)
- goto cleanup;
+ if (!msi_ctrl_valid(dev, ctrl))
+ return -EINVAL;
+
+ domain = msi_get_device_domain(dev, ctrl->domid);
+ if (!domain)
+ return -ENODEV;
+
+ info = domain->host_data;
+
+ ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
+ if (ret)
+ return ret;
+
+ ops = info->ops;
+ if (ops->domain_alloc_irqs)
+ return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
+
+ return __msi_domain_alloc_irqs(dev, domain, ctrl);
+}
+
+static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
+{
+ int ret = __msi_domain_alloc_locked(dev, ctrl);
+
+ if (ret)
+ msi_domain_free_locked(dev, ctrl);
+ return ret;
+}
+
+/**
+ * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are allocated
+ * @domid: Id of the interrupt domain to operate on
+ * @first: First index to allocate (inclusive)
+ * @last: Last index to allocate (inclusive)
+ *
+ * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
+ * pair. Use this for MSI irqdomains which implement their own descriptor
+ * allocation/free.
+ *
+ * Return: %0 on success or an error code.
+ */
+int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
+{
+ struct msi_ctrl ctrl = {
+ .domid = domid,
+ .first = first,
+ .last = last,
+ .nirqs = last + 1 - first,
+ };
+
+ return msi_domain_alloc_locked(dev, &ctrl);
+}
+
+/**
+ * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are allocated
+ * @domid: Id of the interrupt domain to operate on
+ * @first: First index to allocate (inclusive)
+ * @last: Last index to allocate (inclusive)
+ *
+ * Return: %0 on success or an error code.
+ */
+int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
+{
+ int ret;
+
+ msi_lock_descs(dev);
+ ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
+ msi_unlock_descs(dev);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
+
+/**
+ * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
+ *
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are allocated
+ * @domid: Id of the interrupt domain to operate on
+ * @nirqs: The number of interrupts to allocate
+ *
+ * This function scans all MSI descriptors of the MSI domain and allocates interrupts
+ * for all unassigned ones. That function is to be used for MSI domain usage where
+ * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
+ *
+ * Return: %0 on success or an error code.
+ */
+int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
+{
+ struct msi_ctrl ctrl = {
+ .domid = domid,
+ .first = 0,
+ .last = msi_domain_get_hwsize(dev, domid) - 1,
+ .nirqs = nirqs,
+ };
+
+ return msi_domain_alloc_locked(dev, &ctrl);
+}
+
+static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
+ unsigned int index,
+ const struct irq_affinity_desc *affdesc,
+ union msi_instance_cookie *icookie)
+{
+ struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, };
+ struct irq_domain *domain;
+ struct msi_map map = { };
+ struct msi_desc *desc;
+ int ret;
+
+ domain = msi_get_device_domain(dev, domid);
+ if (!domain) {
+ map.index = -ENODEV;
+ return map;
}
-skip_activate:
- /*
- * If these interrupts use reservation mode, clear the activated bit
- * so request_irq() will assign the final vector.
- */
- if (can_reserve) {
- for_each_msi_vector(desc, i, dev) {
- irq_data = irq_domain_get_irq_data(domain, i);
- irqd_clr_activated(irq_data);
- }
+ desc = msi_alloc_desc(dev, 1, affdesc);
+ if (!desc) {
+ map.index = -ENOMEM;
+ return map;
}
- return 0;
-cleanup:
- for_each_msi_vector(desc, i, dev) {
- irq_data = irq_domain_get_irq_data(domain, i);
- if (irqd_is_activated(irq_data))
- irq_domain_deactivate_irq(irq_data);
+ if (icookie)
+ desc->data.icookie = *icookie;
+
+ ret = msi_insert_desc(dev, desc, domid, index);
+ if (ret) {
+ map.index = ret;
+ return map;
}
- msi_domain_free_irqs(domain, dev);
- return ret;
+
+ ctrl.first = ctrl.last = desc->msi_index;
+
+ ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
+ if (ret) {
+ map.index = ret;
+ msi_domain_free_locked(dev, &ctrl);
+ } else {
+ map.index = desc->msi_index;
+ map.virq = desc->irq;
+ }
+ return map;
}
/**
- * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
- * @domain: The domain to allocate from
+ * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
+ * a given index - or at the next free index
+ *
* @dev: Pointer to device struct of the device for which the interrupts
* are allocated
- * @nvec: The number of interrupts to allocate
+ * @domid: Id of the interrupt domain to operate on
+ * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation
+ * uses the next free index.
+ * @affdesc: Optional pointer to an interrupt affinity descriptor structure
+ * @icookie: Optional pointer to a domain specific per instance cookie. If
+ * non-NULL the content of the cookie is stored in msi_desc::data.
+ * Must be NULL for MSI-X allocations
+ *
+ * This requires a MSI interrupt domain which lets the core code manage the
+ * MSI descriptors.
+ *
+ * Return: struct msi_map
+ *
+ * On success msi_map::index contains the allocated index number and
+ * msi_map::virq the corresponding Linux interrupt number
*
- * Returns 0 on success or an error code.
+ * On failure msi_map::index contains the error code and msi_map::virq
+ * is %0.
*/
-int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
- int nvec)
+struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
+ const struct irq_affinity_desc *affdesc,
+ union msi_instance_cookie *icookie)
{
- struct msi_domain_info *info = domain->host_data;
- struct msi_domain_ops *ops = info->ops;
+ struct msi_map map;
+
+ msi_lock_descs(dev);
+ map = __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
+ msi_unlock_descs(dev);
+ return map;
+}
- return ops->domain_alloc_irqs(domain, dev, nvec);
+/**
+ * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
+ * @domain: The domain to allocate on
+ * @hwirq: The hardware interrupt number to allocate for
+ * @type: The interrupt type
+ *
+ * This weirdness supports wire to MSI controllers like MBIGEN.
+ *
+ * @hwirq is the hardware interrupt number which is handed in from
+ * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
+ * sized in firmware, the hardware interrupt number cannot be used as MSI
+ * index. For the underlying irq chip the MSI index is irrelevant and
+ * all it needs is the hardware interrupt number.
+ *
+ * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
+ * hardware interrupt number is stored along with the type information in
+ * msi_desc::cookie so the underlying interrupt chip and domain code can
+ * retrieve it.
+ *
+ * Return: The Linux interrupt number (> 0) or an error code
+ */
+int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
+ unsigned int type)
+{
+ unsigned int domid = MSI_DEFAULT_DOMAIN;
+ union msi_instance_cookie icookie = { };
+ struct device *dev = domain->dev;
+ struct msi_map map = { };
+
+ if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
+ return -EINVAL;
+
+ icookie.value = ((u64)type << 32) | hwirq;
+
+ msi_lock_descs(dev);
+ if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
+ map.index = -EINVAL;
+ else
+ map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
+ msi_unlock_descs(dev);
+
+ return map.index >= 0 ? map.virq : map.index;
}
-void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
+static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
+ struct msi_ctrl *ctrl)
{
+ struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
+ struct msi_domain_info *info = domain->host_data;
+ struct irq_data *irqd;
struct msi_desc *desc;
+ unsigned long idx;
+ int i;
- for_each_msi_entry(desc, dev) {
- /*
- * We might have failed to allocate an MSI early
- * enough that there is no IRQ associated to this
- * entry. If that's the case, don't do anything.
- */
- if (desc->irq) {
- irq_domain_free_irqs(desc->irq, desc->nvec_used);
- desc->irq = 0;
+ xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
+ /* Only handle MSI entries which have an interrupt associated */
+ if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
+ continue;
+
+ /* Make sure all interrupts are deactivated */
+ for (i = 0; i < desc->nvec_used; i++) {
+ irqd = irq_domain_get_irq_data(domain, desc->irq + i);
+ if (irqd && irqd_is_activated(irqd))
+ irq_domain_deactivate_irq(irqd);
}
+
+ irq_domain_free_irqs(desc->irq, desc->nvec_used);
+ if (info->flags & MSI_FLAG_DEV_SYSFS)
+ msi_sysfs_remove_desc(dev, desc);
+ desc->irq = 0;
}
}
+static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
+{
+ struct msi_domain_info *info;
+ struct msi_domain_ops *ops;
+ struct irq_domain *domain;
+
+ if (!msi_ctrl_valid(dev, ctrl))
+ return;
+
+ domain = msi_get_device_domain(dev, ctrl->domid);
+ if (!domain)
+ return;
+
+ info = domain->host_data;
+ ops = info->ops;
+
+ if (ops->domain_free_irqs)
+ ops->domain_free_irqs(domain, dev);
+ else
+ __msi_domain_free_irqs(dev, domain, ctrl);
+
+ if (ops->msi_post_free)
+ ops->msi_post_free(domain, dev);
+
+ if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
+ msi_domain_free_descs(dev, ctrl);
+}
+
/**
- * __msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated tp @dev
- * @domain: The domain to managing the interrupts
+ * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
+ * associated to @dev with msi_lock held
* @dev: Pointer to device struct of the device for which the interrupts
- * are free
+ * are freed
+ * @domid: Id of the interrupt domain to operate on
+ * @first: First index to free (inclusive)
+ * @last: Last index to free (inclusive)
*/
-void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
+void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
{
- struct msi_domain_info *info = domain->host_data;
- struct msi_domain_ops *ops = info->ops;
+ struct msi_ctrl ctrl = {
+ .domid = domid,
+ .first = first,
+ .last = last,
+ };
+ msi_domain_free_locked(dev, &ctrl);
+}
- return ops->domain_free_irqs(domain, dev);
+/**
+ * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
+ * associated to @dev
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are freed
+ * @domid: Id of the interrupt domain to operate on
+ * @first: First index to free (inclusive)
+ * @last: Last index to free (inclusive)
+ */
+void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
+ unsigned int first, unsigned int last)
+{
+ msi_lock_descs(dev);
+ msi_domain_free_irqs_range_locked(dev, domid, first, last);
+ msi_unlock_descs(dev);
+}
+EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
+
+/**
+ * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
+ * associated to a device
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are freed
+ * @domid: The id of the domain to operate on
+ *
+ * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
+ * pair. Use this for MSI irqdomains which implement their own vector
+ * allocation.
+ */
+void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
+{
+ msi_domain_free_irqs_range_locked(dev, domid, 0,
+ msi_domain_get_hwsize(dev, domid) - 1);
+}
+
+/**
+ * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
+ * associated to a device
+ * @dev: Pointer to device struct of the device for which the interrupts
+ * are freed
+ * @domid: The id of the domain to operate on
+ */
+void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
+{
+ msi_lock_descs(dev);
+ msi_domain_free_irqs_all_locked(dev, domid);
+ msi_unlock_descs(dev);
+}
+
+/**
+ * msi_device_domain_free_wired - Free a wired interrupt in @domain
+ * @domain: The domain to free the interrupt on
+ * @virq: The Linux interrupt number to free
+ *
+ * This is the counterpart of msi_device_domain_alloc_wired() for the
+ * weird wired to MSI converting domains.
+ */
+void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
+{
+ struct msi_desc *desc = irq_get_msi_desc(virq);
+ struct device *dev = domain->dev;
+
+ if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
+ return;
+
+ msi_lock_descs(dev);
+ if (!WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain)) {
+ msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
+ desc->msi_index);
+ }
+ msi_unlock_descs(dev);
}
/**
* msi_get_domain_info - Get the MSI interrupt domain info for @domain
* @domain: The interrupt domain to retrieve data from
*
- * Returns the pointer to the msi_domain_info stored in
- * @domain->host_data.
+ * Return: the pointer to the msi_domain_info stored in @domain->host_data.
*/
struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
{
return (struct msi_domain_info *)domain->host_data;
}
-#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
+/**
+ * msi_device_has_isolated_msi - True if the device has isolated MSI
+ * @dev: The device to check
+ *
+ * Isolated MSI means that HW modeled by an irq_domain on the path from the
+ * initiating device to the CPU will validate that the MSI message specifies an
+ * interrupt number that the device is authorized to trigger. This must block
+ * devices from triggering interrupts they are not authorized to trigger.
+ * Currently authorization means the MSI vector is one assigned to the device.
+ *
+ * This is interesting for securing VFIO use cases where a rouge MSI (eg created
+ * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
+ * impact outside its security domain, eg userspace triggering interrupts on
+ * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
+ * triggering interrupts on another VM.
+ */
+bool msi_device_has_isolated_msi(struct device *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(dev);
+
+ for (; domain; domain = domain->parent)
+ if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
+ return true;
+ return arch_is_isolated_msi();
+}
+EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index ce0adb22ee96..c556bc49d213 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -147,7 +147,6 @@ void suspend_device_irqs(void)
synchronize_irq(irq);
}
}
-EXPORT_SYMBOL_GPL(suspend_device_irqs);
static void resume_irq(struct irq_desc *desc)
{
@@ -227,7 +226,7 @@ unlock:
}
/**
- * irq_pm_syscore_ops - enable interrupt lines early
+ * irq_pm_syscore_resume - enable interrupt lines early
*
* Enable all interrupt lines with %IRQF_EARLY_RESUME set.
*/
@@ -259,4 +258,3 @@ void resume_device_irqs(void)
{
resume_irqs(false);
}
-EXPORT_SYMBOL_GPL(resume_device_irqs);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 98138788cb04..8e29809de38d 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -52,10 +52,8 @@ static int show_irq_affinity(int type, struct seq_file *m)
case AFFINITY:
case AFFINITY_LIST:
mask = desc->irq_common_data.affinity;
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (irqd_is_setaffinity_pending(&desc->irq_data))
- mask = desc->pending_mask;
-#endif
+ if (irq_move_pending(&desc->irq_data))
+ mask = irq_desc_get_pending_mask(desc);
break;
case EFFECTIVE:
case EFFECTIVE_LIST:
@@ -137,14 +135,14 @@ static inline int irq_select_affinity_usr(unsigned int irq)
static ssize_t write_irq_affinity(int type, struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
- unsigned int irq = (int)(long)PDE_DATA(file_inode(file));
+ unsigned int irq = (int)(long)pde_data(file_inode(file));
cpumask_var_t new_value;
int err;
if (!irq_can_set_affinity_usr(irq) || no_irq_affinity)
- return -EIO;
+ return -EPERM;
- if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
if (type)
@@ -190,12 +188,12 @@ static ssize_t irq_affinity_list_proc_write(struct file *file,
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_proc_show, pde_data(inode));
}
static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
+ return single_open(file, irq_affinity_list_proc_show, pde_data(inode));
}
static const struct proc_ops irq_affinity_proc_ops = {
@@ -238,7 +236,7 @@ static ssize_t default_affinity_write(struct file *file,
cpumask_var_t new_value;
int err;
- if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value);
@@ -265,7 +263,7 @@ out:
static int default_affinity_open(struct inode *inode, struct file *file)
{
- return single_open(file, default_affinity_show, PDE_DATA(inode));
+ return single_open(file, default_affinity_show, pde_data(inode));
}
static const struct proc_ops default_affinity_proc_ops = {
@@ -362,8 +360,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
goto out_unlock;
#ifdef CONFIG_SMP
+ umode_t umode = S_IRUGO;
+
+ if (irq_can_set_affinity_usr(desc->irq_data.irq))
+ umode |= S_IWUSR;
+
/* create /proc/irq/<irq>/smp_affinity */
- proc_create_data("smp_affinity", 0644, desc->dir,
+ proc_create_data("smp_affinity", umode, desc->dir,
&irq_affinity_proc_ops, irqp);
/* create /proc/irq/<irq>/affinity_hint */
@@ -371,7 +374,7 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
irq_affinity_hint_proc_show, irqp);
/* create /proc/irq/<irq>/smp_affinity_list */
- proc_create_data("smp_affinity_list", 0644, desc->dir,
+ proc_create_data("smp_affinity_list", umode, desc->dir,
&irq_affinity_list_proc_ops, irqp);
proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show,
@@ -454,17 +457,18 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec)
}
#ifndef ACTUAL_NR_IRQS
-# define ACTUAL_NR_IRQS nr_irqs
+# define ACTUAL_NR_IRQS irq_get_nr_irqs()
#endif
int show_interrupts(struct seq_file *p, void *v)
{
+ const unsigned int nr_irqs = irq_get_nr_irqs();
static int prec;
- unsigned long flags, any_count = 0;
int i = *(loff_t *) v, j;
struct irqaction *action;
struct irq_desc *desc;
+ unsigned long flags;
if (i > ACTUAL_NR_IRQS)
return 0;
@@ -488,32 +492,30 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc || irq_settings_is_hidden(desc))
goto outsparse;
- if (desc->kstat_irqs) {
- for_each_online_cpu(j)
- any_count |= data_race(*per_cpu_ptr(desc->kstat_irqs, j));
- }
-
- if ((!desc->action || irq_desc_is_chained(desc)) && !any_count)
+ if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
goto outsparse;
- seq_printf(p, "%*d: ", prec, i);
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", desc->kstat_irqs ?
- *per_cpu_ptr(desc->kstat_irqs, j) : 0);
+ seq_printf(p, "%*d:", prec, i);
+ for_each_online_cpu(j) {
+ unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0;
+
+ seq_put_decimal_ull_width(p, " ", cnt, 10);
+ }
+ seq_putc(p, ' ');
raw_spin_lock_irqsave(&desc->lock, flags);
if (desc->irq_data.chip) {
if (desc->irq_data.chip->irq_print_chip)
desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
else if (desc->irq_data.chip->name)
- seq_printf(p, " %8s", desc->irq_data.chip->name);
+ seq_printf(p, "%8s", desc->irq_data.chip->name);
else
- seq_printf(p, " %8s", "-");
+ seq_printf(p, "%8s", "-");
} else {
- seq_printf(p, " %8s", "None");
+ seq_printf(p, "%8s", "None");
}
if (desc->irq_data.domain)
- seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq);
+ seq_printf(p, " %*lu", prec, desc->irq_data.hwirq);
else
seq_printf(p, " %*s", prec, "");
#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 8ccd32a0cc80..1b7fa72968bd 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -21,41 +21,39 @@
#ifdef CONFIG_HARDIRQS_SW_RESEND
-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* hlist_head to handle software resend of interrupts: */
+static HLIST_HEAD(irq_resend_list);
+static DEFINE_RAW_SPINLOCK(irq_resend_lock);
/*
* Run software resends of IRQ's
*/
-static void resend_irqs(unsigned long arg)
+static void resend_irqs(struct tasklet_struct *unused)
{
struct irq_desc *desc;
- int irq;
- while (!bitmap_empty(irqs_resend, nr_irqs)) {
- irq = find_first_bit(irqs_resend, nr_irqs);
- clear_bit(irq, irqs_resend);
- desc = irq_to_desc(irq);
- if (!desc)
- continue;
- local_irq_disable();
+ raw_spin_lock_irq(&irq_resend_lock);
+ while (!hlist_empty(&irq_resend_list)) {
+ desc = hlist_entry(irq_resend_list.first, struct irq_desc,
+ resend_node);
+ hlist_del_init(&desc->resend_node);
+ raw_spin_unlock(&irq_resend_lock);
desc->handle_irq(desc);
- local_irq_enable();
+ raw_spin_lock(&irq_resend_lock);
}
+ raw_spin_unlock_irq(&irq_resend_lock);
}
/* Tasklet to handle resend: */
-static DECLARE_TASKLET_OLD(resend_tasklet, resend_irqs);
+static DECLARE_TASKLET(resend_tasklet, resend_irqs);
static int irq_sw_resend(struct irq_desc *desc)
{
- unsigned int irq = irq_desc_get_irq(desc);
-
/*
* Validate whether this interrupt can be safely injected from
* non interrupt context
*/
- if (handle_enforce_irqctx(&desc->irq_data))
+ if (irqd_is_handle_enforce_irqctx(&desc->irq_data))
return -EINVAL;
/*
@@ -70,16 +68,36 @@ static int irq_sw_resend(struct irq_desc *desc)
*/
if (!desc->parent_irq)
return -EINVAL;
- irq = desc->parent_irq;
+
+ desc = irq_to_desc(desc->parent_irq);
+ if (!desc)
+ return -EINVAL;
}
- /* Set it pending and activate the softirq: */
- set_bit(irq, irqs_resend);
+ /* Add to resend_list and activate the softirq: */
+ raw_spin_lock(&irq_resend_lock);
+ if (hlist_unhashed(&desc->resend_node))
+ hlist_add_head(&desc->resend_node, &irq_resend_list);
+ raw_spin_unlock(&irq_resend_lock);
tasklet_schedule(&resend_tasklet);
return 0;
}
+void clear_irq_resend(struct irq_desc *desc)
+{
+ raw_spin_lock(&irq_resend_lock);
+ hlist_del_init(&desc->resend_node);
+ raw_spin_unlock(&irq_resend_lock);
+}
+
+void irq_resend_init(struct irq_desc *desc)
+{
+ INIT_HLIST_NODE(&desc->resend_node);
+}
#else
+void clear_irq_resend(struct irq_desc *desc) {}
+void irq_resend_init(struct irq_desc *desc) {}
+
static int irq_sw_resend(struct irq_desc *desc)
{
return -EINVAL;
@@ -128,7 +146,7 @@ int check_irq_resend(struct irq_desc *desc, bool inject)
if (!try_retrigger(desc))
err = irq_sw_resend(desc);
- /* If the retrigger was successfull, mark it with the REPLAY bit */
+ /* If the retrigger was successful, mark it with the REPLAY bit */
if (!err)
desc->istate |= IRQS_REPLAY;
return err;
@@ -172,7 +190,7 @@ int irq_inject_interrupt(unsigned int irq)
* - not NMI type
* - activated
*/
- if ((desc->istate & IRQS_NMI) || !irqd_is_activated(&desc->irq_data))
+ if (irq_is_nmi(desc) || !irqd_is_activated(&desc->irq_data))
err = -EINVAL;
else
err = check_irq_resend(desc, true);
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 403378b9947b..00b3bd127692 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -11,13 +11,13 @@ enum {
_IRQ_NOREQUEST = IRQ_NOREQUEST,
_IRQ_NOTHREAD = IRQ_NOTHREAD,
_IRQ_NOAUTOEN = IRQ_NOAUTOEN,
- _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT,
_IRQ_NO_BALANCING = IRQ_NO_BALANCING,
_IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
_IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID,
_IRQ_IS_POLLED = IRQ_IS_POLLED,
_IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
_IRQ_HIDDEN = IRQ_HIDDEN,
+ _IRQ_NO_DEBUG = IRQ_NO_DEBUG,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -33,6 +33,7 @@ enum {
#define IRQ_IS_POLLED GOT_YOU_MORON
#define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
#define IRQ_HIDDEN GOT_YOU_MORON
+#define IRQ_NO_DEBUG GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
@@ -140,11 +141,6 @@ static inline void irq_settings_set_noprobe(struct irq_desc *desc)
desc->status_use_accessors |= _IRQ_NOPROBE;
}
-static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
-{
- return desc->status_use_accessors & _IRQ_MOVE_PCNTXT;
-}
-
static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
{
return !(desc->status_use_accessors & _IRQ_NOAUTOEN);
@@ -174,3 +170,13 @@ static inline bool irq_settings_is_hidden(struct irq_desc *desc)
{
return desc->status_use_accessors & _IRQ_HIDDEN;
}
+
+static inline void irq_settings_set_no_debug(struct irq_desc *desc)
+{
+ desc->status_use_accessors |= _IRQ_NO_DEBUG;
+}
+
+static inline bool irq_settings_no_debug(struct irq_desc *desc)
+{
+ return desc->status_use_accessors & _IRQ_NO_DEBUG;
+}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index f865e5f4d382..02b2daf07441 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -403,6 +403,10 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
desc->irqs_unhandled -= ok;
}
+ if (likely(!desc->irqs_unhandled))
+ return;
+
+ /* Now getting into unhandled irq detection */
desc->irq_count++;
if (likely(desc->irq_count < 100000))
return;
@@ -443,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
static int __init irqfixup_setup(char *str)
{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
+ return 1;
+ }
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
@@ -455,6 +463,10 @@ module_param(irqfixup, int, 0644);
static int __init irqpoll_setup(char *str)
{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
+ return 1;
+ }
irqfixup = 2;
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
"enabled\n");
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 773b6105c4ae..4b7315e99bd6 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -84,7 +84,7 @@ void irq_timings_disable(void)
* 2. Log interval
*
* We saw the irq timings allow to compute the interval of the
- * occurrences for a specific interrupt. We can reasonibly assume the
+ * occurrences for a specific interrupt. We can reasonably assume the
* longer is the interval, the higher is the error for the next event
* and we can consider storing those interval values into an array
* where each slot in the array correspond to an interval at the power
@@ -416,7 +416,7 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
* Copy the content of the circular buffer into another buffer
* in order to linearize the buffer instead of dealing with
* wrapping indexes and shifted array which will be prone to
- * error and extremelly difficult to debug.
+ * error and extremely difficult to debug.
*/
for (i = 0; i < count; i++) {
int index = (start + i) & IRQ_TIMINGS_MASK;
@@ -453,6 +453,11 @@ static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
*/
index = irq_timings_interval_index(interval);
+ if (index > PREDICTION_BUFFER_SIZE - 1) {
+ irqs->count = 0;
+ return;
+ }
+
/*
* Store the index as an element of the pattern in another
* circular array.
@@ -485,7 +490,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
/*
* The interrupt triggered more than one second apart, that
- * ends the sequence as predictible for our purpose. In this
+ * ends the sequence as predictable for our purpose. In this
* case, assume we have the beginning of a sequence and the
* timestamp is the first value. As it is impossible to
* predict anything at this point, return.
@@ -504,6 +509,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
/**
* irq_timings_next_event - Return when the next event is supposed to arrive
+ * @now: current time
*
* During the last busy cycle, the number of interrupts is incremented
* and stored in the irq_timings structure. This information is
@@ -514,7 +520,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
* If more than the array size interrupts happened during the
* last busy/idle cycle, the index wrapped up and we have to
* begin with the next element in the array which is the last one
- * in the sequence, otherwise it is a the index 0.
+ * in the sequence, otherwise it is at the index 0.
*
* - have an indication of the interrupts activity on this CPU
* (eg. irq/sec)
@@ -794,12 +800,14 @@ static int __init irq_timings_test_irqs(struct timings_intervals *ti)
__irq_timings_store(irq, irqs, ti->intervals[i]);
if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
+ ret = -EBADSLT;
pr_err("Failed to store in the circular buffer\n");
goto out;
}
}
if (irqs->count != ti->count) {
+ ret = -ERANGE;
pr_err("Count differs\n");
goto out;
}