Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner: "The irq departement proudly presents: - A rework of the core infrastructure to optimally spread interrupt for multiqueue devices. The first version was a bit naive and failed to take thread siblings and other details into account. Developed in cooperation with Christoph and Keith. - Proper delegation of softirqs to ksoftirqd, so if ksoftirqd is active then no further softirq processsing on interrupt return happens. Otherwise we try to delegate and still run another batch of network packets in the irq return path, which then tries to delegate to ksoftirqd ..... - A proper machine parseable sysfs based alternative for /proc/interrupts. - ACPI support for the GICV3-ITS and ARM interrupt remapping - Two new irq chips from the ARM SoC zoo: STM32-EXTI and MVEBU-PIC - A new irq chip for the JCore (SuperH) - The usual pile of small fixlets in core and irqchip drivers" * 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (42 commits) softirq: Let ksoftirqd do its job genirq: Make function __irq_do_set_handler() static ARM/dts: Add EXTI controller node to stm32f429 ARM/STM32: Select external interrupts controller drivers/irqchip: Add STM32 external interrupts support Documentation/dt-bindings: Document STM32 EXTI controller bindings irqchip/mips-gic: Use for_each_set_bit to iterate over local IRQs pci/msi: Retrieve affinity for a vector genirq/affinity: Remove old irq spread infrastructure genirq/msi: Switch to new irq spreading infrastructure genirq/affinity: Provide smarter irq spreading infrastructure genirq/msi: Add cpumask allocation to alloc_msi_entry genirq: Expose interrupt information through sysfs irqchip/gicv3-its: Use MADT ITS subtable to do PCI/MSI domain initialization irqchip/gicv3-its: Factor out PCI-MSI part that might be reused for ACPI irqchip/gicv3-its: Probe ITS in the ACPI way irqchip/gicv3-its: Refactor ITS DT init code to prepare for ACPI irqchip/gicv3-its: Cleanup for ITS domain initialization PCI/MSI: Setup MSI domain on a per-device basis using IORT ACPI table ACPI: Add new IORT functions to support MSI domain handling ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-03 19:10:15 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-03 19:10:15 -0700
commit: 999dcbe2414e15e19cdc1f91497d01f262c6e1cf (patch)
tree: 5ad7cfa7d337f4a38be8045130c7f1a213bbe925 /kernel
parent: 5e1b834b27fb2c27cde33a0752425f11d10c0b2d (diff)
parent: 4cd13c21b207e80ddb1144c576500098f2d5f882 (diff)
8 files changed, 445 insertions, 97 deletions
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 32f6cfcff212..17f51d63da56 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -4,60 +4,151 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 
-static int get_first_sibling(unsigned int cpu)
+static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+				int cpus_per_vec)
 {
-	unsigned int ret;
+	const struct cpumask *siblmsk;
+	int cpu, sibl;
 
-	ret = cpumask_first(topology_sibling_cpumask(cpu));
-	if (ret < nr_cpu_ids)
-		return ret;
-	return cpu;
+	for ( ; cpus_per_vec > 0; ) {
+		cpu = cpumask_first(nmsk);
+
+		/* Should not happen, but I'm too lazy to think about it */
+		if (cpu >= nr_cpu_ids)
+			return;
+
+		cpumask_clear_cpu(cpu, nmsk);
+		cpumask_set_cpu(cpu, irqmsk);
+		cpus_per_vec--;
+
+		/* If the cpu has siblings, use them first */
+		siblmsk = topology_sibling_cpumask(cpu);
+		for (sibl = -1; cpus_per_vec > 0; ) {
+			sibl = cpumask_next(sibl, siblmsk);
+			if (sibl >= nr_cpu_ids)
+				break;
+			if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+				continue;
+			cpumask_set_cpu(sibl, irqmsk);
+			cpus_per_vec--;
+		}
+	}
+}
+
+static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+{
+	int n, nodes;
+
+	/* Calculate the number of nodes in the supplied affinity mask */
+	for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
+		if (cpumask_intersects(mask, cpumask_of_node(n))) {
+			node_set(n, *nodemsk);
+			nodes++;
+		}
+	}
+	return nodes;
 }
 
-/*
- * Take a map of online CPUs and the number of available interrupt vectors
- * and generate an output cpumask suitable for spreading MSI/MSI-X vectors
- * so that they are distributed as good as possible around the CPUs.  If
- * more vectors than CPUs are available we'll map one to each CPU,
- * otherwise we map one to the first sibling of each socket.
+/**
+ * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
+ * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
+ *			is used
+ * @nvecs:		The number of vectors
  *
- * If there are more vectors than CPUs we will still only have one bit
- * set per CPU, but interrupt code will keep on assigning the vectors from
- * the start of the bitmap until we run out of vectors.
+ * Returns the masks pointer or NULL if allocation failed.
  */
-struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
+					  int nvec)
 {
-	struct cpumask *affinity_mask;
-	unsigned int max_vecs = *nr_vecs;
+	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+	nodemask_t nodemsk = NODE_MASK_NONE;
+	struct cpumask *masks;
+	cpumask_var_t nmsk;
 
-	if (max_vecs == 1)
+	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return NULL;
 
-	affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
-	if (!affinity_mask) {
-		*nr_vecs = 1;
-		return NULL;
-	}
+	masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		goto out;
 
+	/* Stabilize the cpumasks */
 	get_online_cpus();
-	if (max_vecs >= num_online_cpus()) {
-		cpumask_copy(affinity_mask, cpu_online_mask);
-		*nr_vecs = num_online_cpus();
-	} else {
-		unsigned int vecs = 0, cpu;
-
-		for_each_online_cpu(cpu) {
-			if (cpu == get_first_sibling(cpu)) {
-				cpumask_set_cpu(cpu, affinity_mask);
-				vecs++;
-			}
+	/* If the supplied affinity mask is NULL, use cpu online mask */
+	if (!affinity)
+		affinity = cpu_online_mask;
+
+	nodes = get_nodes_in_cpumask(affinity, &nodemsk);
 
-			if (--max_vecs == 0)
+	/*
+	 * If the number of nodes in the mask is less than or equal the
+	 * number of vectors we just spread the vectors across the nodes.
+	 */
+	if (nvec <= nodes) {
+		for_each_node_mask(n, nodemsk) {
+			cpumask_copy(masks + curvec, cpumask_of_node(n));
+			if (++curvec == nvec)
 				break;
 		}
-		*nr_vecs = vecs;
+		goto outonl;
 	}
+
+	/* Spread the vectors per node */
+	vecs_per_node = nvec / nodes;
+	/* Account for rounding errors */
+	extra_vecs = nvec - (nodes * vecs_per_node);
+
+	for_each_node_mask(n, nodemsk) {
+		int ncpus, v, vecs_to_assign = vecs_per_node;
+
+		/* Get the cpus on this node which are in the mask */
+		cpumask_and(nmsk, affinity, cpumask_of_node(n));
+
+		/* Calculate the number of cpus per vector */
+		ncpus = cpumask_weight(nmsk);
+
+		for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+			cpus_per_vec = ncpus / vecs_to_assign;
+
+			/* Account for extra vectors to compensate rounding errors */
+			if (extra_vecs) {
+				cpus_per_vec++;
+				if (!--extra_vecs)
+					vecs_per_node++;
+			}
+			irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
+		}
+
+		if (curvec >= nvec)
+			break;
+	}
+
+outonl:
 	put_online_cpus();
+out:
+	free_cpumask_var(nmsk);
+	return masks;
+}
+
+/**
+ * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
+ * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
+ *			is used
+ * @maxvec:		The maximum number of vectors available
+ */
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+	int cpus, ret;
 
-	return affinity_mask;
+	/* Stabilize the cpumasks */
+	get_online_cpus();
+	/* If the supplied affinity mask is NULL, use cpu online mask */
+	if (!affinity)
+		affinity = cpu_online_mask;
+
+	cpus = cpumask_weight(affinity);
+	ret = (cpus < maxvec) ? cpus : maxvec;
+
+	put_online_cpus();
+	return ret;
 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 26ba5654d9d5..be3c34e4f2ac 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,7 +76,6 @@ int irq_set_irq_type(unsigned int irq, unsigned int type)
 	if (!desc)
 		return -EINVAL;
 
-	type &= IRQ_TYPE_SENSE_MASK;
 	ret = __irq_set_trigger(desc, type);
 	irq_put_desc_busunlock(desc, flags);
 	return ret;
@@ -756,7 +755,6 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irqaction *action = desc->action;
-	void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
 	unsigned int irq = irq_desc_get_irq(desc);
 	irqreturn_t res;
 
@@ -765,15 +763,26 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
 
-	trace_irq_handler_entry(irq, action);
-	res = action->handler(irq, dev_id);
-	trace_irq_handler_exit(irq, action, res);
+	if (likely(action)) {
+		trace_irq_handler_entry(irq, action);
+		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+		trace_irq_handler_exit(irq, action, res);
+	} else {
+		unsigned int cpu = smp_processor_id();
+		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+
+		if (enabled)
+			irq_percpu_disable(desc, cpu);
+
+		pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
+			    enabled ? " and unmasked" : "", irq, cpu);
+	}
 
 	if (chip->irq_eoi)
 		chip->irq_eoi(&desc->irq_data);
 }
 
-void
+static void
 __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 		     int is_chained, const char *name)
 {
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index abd286afbd27..ee32870079c9 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -260,9 +260,9 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags)
 }
 
 /**
- * irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
+ * __irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
  * @d:			irq domain for which to allocate chips
- * @irqs_per_chip:	Number of interrupts each chip handles
+ * @irqs_per_chip:	Number of interrupts each chip handles (max 32)
  * @num_ct:		Number of irq_chip_type instances associated with this
  * @name:		Name of the irq chip
  * @handler:		Default flow handler associated with these chips
@@ -270,11 +270,11 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags)
  * @set:		IRQ_* bits to set in the mapping function
  * @gcflags:		Generic chip specific setup flags
  */
-int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
-				   int num_ct, const char *name,
-				   irq_flow_handler_t handler,
-				   unsigned int clr, unsigned int set,
-				   enum irq_gc_flags gcflags)
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+				     int num_ct, const char *name,
+				     irq_flow_handler_t handler,
+				     unsigned int clr, unsigned int set,
+				     enum irq_gc_flags gcflags)
 {
 	struct irq_domain_chip_generic *dgc;
 	struct irq_chip_generic *gc;
@@ -326,7 +326,21 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 	d->name = name;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
+EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
+
+static struct irq_chip_generic *
+__irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
+{
+	struct irq_domain_chip_generic *dgc = d->gc;
+	int idx;
+
+	if (!dgc)
+		return ERR_PTR(-ENODEV);
+	idx = hw_irq / dgc->irqs_per_chip;
+	if (idx >= dgc->num_chips)
+		return ERR_PTR(-EINVAL);
+	return dgc->gc[idx];
+}
 
 /**
  * irq_get_domain_generic_chip - Get a pointer to the generic chip of a hw_irq
@@ -336,15 +350,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
 struct irq_chip_generic *
 irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
 {
-	struct irq_domain_chip_generic *dgc = d->gc;
-	int idx;
+	struct irq_chip_generic *gc = __irq_get_domain_generic_chip(d, hw_irq);
 
-	if (!dgc)
-		return NULL;
-	idx = hw_irq / dgc->irqs_per_chip;
-	if (idx >= dgc->num_chips)
-		return NULL;
-	return dgc->gc[idx];
+	return !IS_ERR(gc) ? gc : NULL;
 }
 EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
 
@@ -368,13 +376,9 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
 	unsigned long flags;
 	int idx;
 
-	if (!d->gc)
-		return -ENODEV;
-
-	idx = hw_irq / dgc->irqs_per_chip;
-	if (idx >= dgc->num_chips)
-		return -EINVAL;
-	gc = dgc->gc[idx];
+	gc = __irq_get_domain_generic_chip(d, hw_irq);
+	if (IS_ERR(gc))
+		return PTR_ERR(gc);
 
 	idx = hw_irq % dgc->irqs_per_chip;
 
@@ -409,10 +413,30 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
 	irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(irq_map_generic_chip);
+
+static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+{
+	struct irq_data *data = irq_domain_get_irq_data(d, virq);
+	struct irq_domain_chip_generic *dgc = d->gc;
+	unsigned int hw_irq = data->hwirq;
+	struct irq_chip_generic *gc;
+	int irq_idx;
+
+	gc = irq_get_domain_generic_chip(d, hw_irq);
+	if (!gc)
+		return;
+
+	irq_idx = hw_irq % dgc->irqs_per_chip;
+
+	clear_bit(irq_idx, &gc->installed);
+	irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL,
+			    NULL);
+
+}
 
 struct irq_domain_ops irq_generic_chip_ops = {
 	.map	= irq_map_generic_chip,
+	.unmap  = irq_unmap_generic_chip,
 	.xlate	= irq_domain_xlate_onetwocell,
 };
 EXPORT_SYMBOL_GPL(irq_generic_chip_ops);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a623b44f2d4b..00bb0aeea1d0 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -15,6 +15,7 @@
 #include <linux/radix-tree.h>
 #include <linux/bitmap.h>
 #include <linux/irqdomain.h>
+#include <linux/sysfs.h>
 
 #include "internals.h"
 
@@ -123,6 +124,181 @@ static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
 
 #ifdef CONFIG_SPARSE_IRQ
 
+static void irq_kobj_release(struct kobject *kobj);
+
+#ifdef CONFIG_SYSFS
+static struct kobject *irq_kobj_base;
+
+#define IRQ_ATTR_RO(_name) \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+static ssize_t per_cpu_count_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	int cpu, irq = desc->irq_data.irq;
+	ssize_t ret = 0;
+	char *p = "";
+
+	for_each_possible_cpu(cpu) {
+		unsigned int c = kstat_irqs_cpu(irq, cpu);
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
+		p = ",";
+	}
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	return ret;
+}
+IRQ_ATTR_RO(per_cpu_count);
+
+static ssize_t chip_name_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->irq_data.chip && desc->irq_data.chip->name) {
+		ret = scnprintf(buf, PAGE_SIZE, "%s\n",
+				desc->irq_data.chip->name);
+	}
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(chip_name);
+
+static ssize_t hwirq_show(struct kobject *kobj,
+			  struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->irq_data.domain)
+		ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(hwirq);
+
+static ssize_t type_show(struct kobject *kobj,
+			 struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	ret = sprintf(buf, "%s\n",
+		      irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+
+}
+IRQ_ATTR_RO(type);
+
+static ssize_t name_show(struct kobject *kobj,
+			 struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->name)
+		ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(name);
+
+static ssize_t actions_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	struct irqaction *action;
+	ssize_t ret = 0;
+	char *p = "";
+
+	raw_spin_lock_irq(&desc->lock);
+	for (action = desc->action; action != NULL; action = action->next) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+				 p, action->name);
+		p = ",";
+	}
+	raw_spin_unlock_irq(&desc->lock);
+
+	if (ret)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+	return ret;
+}
+IRQ_ATTR_RO(actions);
+
+static struct attribute *irq_attrs[] = {
+	&per_cpu_count_attr.attr,
+	&chip_name_attr.attr,
+	&hwirq_attr.attr,
+	&type_attr.attr,
+	&name_attr.attr,
+	&actions_attr.attr,
+	NULL
+};
+
+static struct kobj_type irq_kobj_type = {
+	.release	= irq_kobj_release,
+	.sysfs_ops	= &kobj_sysfs_ops,
+	.default_attrs	= irq_attrs,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc)
+{
+	if (irq_kobj_base) {
+		/*
+		 * Continue even in case of failure as this is nothing
+		 * crucial.
+		 */
+		if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
+			pr_warn("Failed to add kobject for irq %d\n", irq);
+	}
+}
+
+static int __init irq_sysfs_init(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	/* Prevent concurrent irq alloc/free */
+	irq_lock_sparse();
+
+	irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
+	if (!irq_kobj_base) {
+		irq_unlock_sparse();
+		return -ENOMEM;
+	}
+
+	/* Add the already allocated interrupts */
+	for_each_irq_desc(irq, desc)
+		irq_sysfs_add(irq, desc);
+	irq_unlock_sparse();
+
+	return 0;
+}
+postcore_initcall(irq_sysfs_init);
+
+#else /* !CONFIG_SYSFS */
+
+static struct kobj_type irq_kobj_type = {
+	.release	= irq_kobj_release,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
+
+#endif /* CONFIG_SYSFS */
+
 static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
 
 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -187,6 +363,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 
 	desc_set_defaults(irq, desc, node, affinity, owner);
 	irqd_set(&desc->irq_data, flags);
+	kobject_init(&desc->kobj, &irq_kobj_type);
 
 	return desc;
 
@@ -197,15 +374,22 @@ err_desc:
 	return NULL;
 }
 
-static void delayed_free_desc(struct rcu_head *rhp)
+static void irq_kobj_release(struct kobject *kobj)
 {
-	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 
 	free_masks(desc);
 	free_percpu(desc->kstat_irqs);
 	kfree(desc);
 }
 
+static void delayed_free_desc(struct rcu_head *rhp)
+{
+	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+
+	kobject_put(&desc->kobj);
+}
+
 static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -217,8 +401,12 @@ static void free_desc(unsigned int irq)
 	 * kstat_irq_usr(). Once we deleted the descriptor from the
 	 * sparse tree we can free it. Access in proc will fail to
 	 * lookup the descriptor.
+	 *
+	 * The sysfs entry must be serialized against a concurrent
+	 * irq_sysfs_init() as well.
 	 */
 	mutex_lock(&sparse_irq_lock);
+	kobject_del(&desc->kobj);
 	delete_irq_desc(irq);
 	mutex_unlock(&sparse_irq_lock);
 
@@ -236,31 +424,31 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
 	const struct cpumask *mask = NULL;
 	struct irq_desc *desc;
 	unsigned int flags;
-	int i, cpu = -1;
+	int i;
 
-	if (affinity && cpumask_empty(affinity))
-		return -EINVAL;
+	/* Validate affinity mask(s) */
+	if (affinity) {
+		for (i = 0, mask = affinity; i < cnt; i++, mask++) {
+			if (cpumask_empty(mask))
+				return -EINVAL;
+		}
+	}
 
 	flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
+	mask = NULL;
 
 	for (i = 0; i < cnt; i++) {
 		if (affinity) {
-			cpu = cpumask_next(cpu, affinity);
-			if (cpu >= nr_cpu_ids)
-				cpu = cpumask_first(affinity);
-			node = cpu_to_node(cpu);
-
-			/*
-			 * For single allocations we use the caller provided
-			 * mask otherwise we use the mask of the target cpu
-			 */
-			mask = cnt == 1 ? affinity : cpumask_of(cpu);
+			node = cpu_to_node(cpumask_first(affinity));
+			mask = affinity;
+			affinity++;
 		}
 		desc = alloc_desc(start + i, node, flags, mask, owner);
 		if (!desc)
 			goto err;
 		mutex_lock(&sparse_irq_lock);
 		irq_insert_desc(start + i, desc);
+		irq_sysfs_add(start + i, desc);
 		mutex_unlock(&sparse_irq_lock);
 	}
 	return start;
@@ -481,9 +669,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
  * @cnt:	Number of consecutive irqs to allocate.
  * @node:	Preferred node on which the irq descriptor should be allocated
  * @owner:	Owning module (can be NULL)
- * @affinity:	Optional pointer to an affinity mask which hints where the
- *		irq descriptors should be allocated and which default
- *		affinities to use
+ * @affinity:	Optional pointer to an affinity mask array of size @cnt which
+ *		hints where the irq descriptors should be allocated and which
+ *		default affinities to use
  *
  * Returns the first irq number or error code
  */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4752b43662e0..8c0a0ae43521 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
 
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
- * @of_node: optional device-tree node of the interrupt controller
+ * @fwnode: firmware node for the interrupt controller
  * @size: Size of linear map; 0 for radix mapping only
  * @hwirq_max: Maximum number of interrupts supported by controller
  * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
@@ -96,10 +96,8 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    const struct irq_domain_ops *ops,
 				    void *host_data)
 {
+	struct device_node *of_node = to_of_node(fwnode);
 	struct irq_domain *domain;
-	struct device_node *of_node;
-
-	of_node = to_of_node(fwnode);
 
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
@@ -868,7 +866,10 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d,
 	if (WARN_ON(intsize < 1))
 		return -EINVAL;
 	*out_hwirq = intspec[0];
-	*out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE;
+	if (intsize > 1)
+		*out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+	else
+		*out_type = IRQ_TYPE_NONE;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9530fcd27704..0c5f1a5db654 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -669,8 +669,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
 		return 0;
 	}
 
-	flags &= IRQ_TYPE_SENSE_MASK;
-
 	if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
 		if (!irqd_irq_masked(&desc->irq_data))
 			mask_irq(desc);
@@ -678,7 +676,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
 			unmask = 1;
 	}
 
-	/* caller masked out all except trigger mode flags */
+	/* Mask all flags except trigger mode */
+	flags &= IRQ_TYPE_SENSE_MASK;
 	ret = chip->irq_set_type(&desc->irq_data, flags);
 
 	switch (ret) {
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 19e9dfbe97fa..8a3e872798f3 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -18,20 +18,42 @@
 /* Temparory solution for building, will be removed later */
 #include <linux/pci.h>
 
-struct msi_desc *alloc_msi_entry(struct device *dev)
+/**
+ * alloc_msi_entry - Allocate an initialize msi_entry
+ * @dev:	Pointer to the device for which this is allocated
+ * @nvec:	The number of vectors used in this entry
+ * @affinity:	Optional pointer to an affinity mask array size of @nvec
+ *
+ * If @affinity is not NULL then a an affinity array[@nvec] is allocated
+ * and the affinity masks from @affinity are copied.
+ */
+struct msi_desc *
+alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
 {
-	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	struct msi_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
 	if (!desc)
 		return NULL;
 
 	INIT_LIST_HEAD(&desc->list);
 	desc->dev = dev;
+	desc->nvec_used = nvec;
+	if (affinity) {
+		desc->affinity = kmemdup(affinity,
+			nvec * sizeof(*desc->affinity), GFP_KERNEL);
+		if (!desc->affinity) {
+			kfree(desc);
+			return NULL;
+		}
+	}
 
 	return desc;
 }
 
 void free_msi_entry(struct msi_desc *entry)
 {
+	kfree(entry->affinity);
 	kfree(entry);
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 17caf4b63342..8ed90e3a88d6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -78,6 +78,17 @@ static void wakeup_softirqd(void)
 }
 
 /*
+ * If ksoftirqd is scheduled, we do not want to process pending softirqs
+ * right now. Let ksoftirqd handle this at its own rate, to get fairness.
+ */
+static bool ksoftirqd_running(void)
+{
+	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+
+	return tsk && (tsk->state == TASK_RUNNING);
+}
+
+/*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  *   softirq processing.
@@ -313,7 +324,7 @@ asmlinkage __visible void do_softirq(void)
 
 	pending = local_softirq_pending();
 
-	if (pending)
+	if (pending && !ksoftirqd_running())
 		do_softirq_own_stack();
 
 	local_irq_restore(flags);
@@ -340,6 +351,9 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
+	if (ksoftirqd_running())
+		return;
+
 	if (!force_irqthreads) {
 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
 		/*
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-03 19:10:15 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-03 19:10:15 -0700
commit	999dcbe2414e15e19cdc1f91497d01f262c6e1cf (patch)
tree	5ad7cfa7d337f4a38be8045130c7f1a213bbe925 /kernel
parent	5e1b834b27fb2c27cde33a0752425f11d10c0b2d (diff)
parent	4cd13c21b207e80ddb1144c576500098f2d5f882 (diff)