From 28f4b04143c56135b1ca742fc64b664ed04de6a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:47 +0200 Subject: genirq/msi: Add cpumask allocation to alloc_msi_entry For irq spreading want to store affinity masks in the msi_entry. Add the infrastructure for it. We allocate an array of cpumasks with an array size of the number of used vectors in the entry, so we can hand in the information per linux interrupt later. As we hand in the number of used vectors, we assign them right away. Convert all the call sites. Signed-off-by: Thomas Gleixner Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/1473862739-15032-2-git-send-email-hch@lst.de --- kernel/irq/msi.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 19e9dfbe97fa..8a3e872798f3 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -18,20 +18,42 @@ /* Temparory solution for building, will be removed later */ #include -struct msi_desc *alloc_msi_entry(struct device *dev) +/** + * alloc_msi_entry - Allocate an initialize msi_entry + * @dev: Pointer to the device for which this is allocated + * @nvec: The number of vectors used in this entry + * @affinity: Optional pointer to an affinity mask array size of @nvec + * + * If @affinity is not NULL then a an affinity array[@nvec] is allocated + * and the affinity masks from @affinity are copied. + */ +struct msi_desc * +alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity) { - struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + struct msi_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) return NULL; INIT_LIST_HEAD(&desc->list); desc->dev = dev; + desc->nvec_used = nvec; + if (affinity) { + desc->affinity = kmemdup(affinity, + nvec * sizeof(*desc->affinity), GFP_KERNEL); + if (!desc->affinity) { + kfree(desc); + return NULL; + } + } return desc; } void free_msi_entry(struct msi_desc *entry) { + kfree(entry->affinity); kfree(entry); } -- cgit From 34c3d9819fda464be4f1bec59b63353814f76c73 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:48 +0200 Subject: genirq/affinity: Provide smarter irq spreading infrastructure The current irq spreading infrastructure is just looking at a cpumask and tries to spread the interrupts over the mask. Thats suboptimal as it does not take numa nodes into account. Change the logic so the interrupts are spread across numa nodes and inside the nodes. If there are more cpus than vectors per node, then we set the affinity to several cpus. If HT siblings are available we take that into account and try to set all siblings to a single vector. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-3-git-send-email-hch@lst.de --- kernel/irq/affinity.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) (limited to 'kernel/irq') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 32f6cfcff212..7812fecc6e2f 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -4,6 +4,155 @@ #include #include +static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, + int cpus_per_vec) +{ + const struct cpumask *siblmsk; + int cpu, sibl; + + for ( ; cpus_per_vec > 0; ) { + cpu = cpumask_first(nmsk); + + /* Should not happen, but I'm too lazy to think about it */ + if (cpu >= nr_cpu_ids) + return; + + cpumask_clear_cpu(cpu, nmsk); + cpumask_set_cpu(cpu, irqmsk); + cpus_per_vec--; + + /* If the cpu has siblings, use them first */ + siblmsk = topology_sibling_cpumask(cpu); + for (sibl = -1; cpus_per_vec > 0; ) { + sibl = cpumask_next(sibl, siblmsk); + if (sibl >= nr_cpu_ids) + break; + if (!cpumask_test_and_clear_cpu(sibl, nmsk)) + continue; + cpumask_set_cpu(sibl, irqmsk); + cpus_per_vec--; + } + } +} + +static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) +{ + int n, nodes; + + /* Calculate the number of nodes in the supplied affinity mask */ + for (n = 0, nodes = 0; n < num_online_nodes(); n++) { + if (cpumask_intersects(mask, cpumask_of_node(n))) { + node_set(n, *nodemsk); + nodes++; + } + } + return nodes; +} + +/** + * irq_create_affinity_masks - Create affinity masks for multiqueue spreading + * @affinity: The affinity mask to spread. If NULL cpu_online_mask + * is used + * @nvecs: The number of vectors + * + * Returns the masks pointer or NULL if allocation failed. + */ +struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, + int nvec) +{ + int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0; + nodemask_t nodemsk = NODE_MASK_NONE; + struct cpumask *masks; + cpumask_var_t nmsk; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + return NULL; + + masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL); + if (!masks) + goto out; + + /* Stabilize the cpumasks */ + get_online_cpus(); + /* If the supplied affinity mask is NULL, use cpu online mask */ + if (!affinity) + affinity = cpu_online_mask; + + nodes = get_nodes_in_cpumask(affinity, &nodemsk); + + /* + * If the number of nodes in the mask is less than or equal the + * number of vectors we just spread the vectors across the nodes. + */ + if (nvec <= nodes) { + for_each_node_mask(n, nodemsk) { + cpumask_copy(masks + curvec, cpumask_of_node(n)); + if (++curvec == nvec) + break; + } + goto outonl; + } + + /* Spread the vectors per node */ + vecs_per_node = nvec / nodes; + /* Account for rounding errors */ + extra_vecs = nvec - (nodes * vecs_per_node); + + for_each_node_mask(n, nodemsk) { + int ncpus, v, vecs_to_assign = vecs_per_node; + + /* Get the cpus on this node which are in the mask */ + cpumask_and(nmsk, affinity, cpumask_of_node(n)); + + /* Calculate the number of cpus per vector */ + ncpus = cpumask_weight(nmsk); + + for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) { + cpus_per_vec = ncpus / vecs_to_assign; + + /* Account for extra vectors to compensate rounding errors */ + if (extra_vecs) { + cpus_per_vec++; + if (!--extra_vecs) + vecs_per_node++; + } + irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec); + } + + if (curvec >= nvec) + break; + } + +outonl: + put_online_cpus(); +out: + free_cpumask_var(nmsk); + return masks; +} + +/** + * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask + * @affinity: The affinity mask to spread. If NULL cpu_online_mask + * is used + * @maxvec: The maximum number of vectors available + */ +int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +{ + int cpus, ret; + + /* Stabilize the cpumasks */ + get_online_cpus(); + /* If the supplied affinity mask is NULL, use cpu online mask */ + if (!affinity) + affinity = cpu_online_mask; + + cpus = cpumask_weight(affinity); + ret = (cpus < maxvec) ? cpus : maxvec; + + put_online_cpus(); + return ret; +} + static int get_first_sibling(unsigned int cpu) { unsigned int ret; -- cgit From e75eafb9b0395c338230b0eef2cc92ca8d20dee2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:49 +0200 Subject: genirq/msi: Switch to new irq spreading infrastructure Switch MSI over to the new spreading code. If a pci device contains a valid pointer to a cpumask, then this mask is used for spreading otherwise the online cpu mask is used. This allows a driver to restrict the spread to a subset of CPUs, e.g. cpus on a particular node. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- kernel/irq/irqdesc.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a623b44f2d4b..5a5a685aba33 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -236,25 +236,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, const struct cpumask *mask = NULL; struct irq_desc *desc; unsigned int flags; - int i, cpu = -1; + int i; - if (affinity && cpumask_empty(affinity)) - return -EINVAL; + /* Validate affinity mask(s) */ + if (affinity) { + for (i = 0, mask = affinity; i < cnt; i++, mask++) { + if (cpumask_empty(mask)) + return -EINVAL; + } + } flags = affinity ? IRQD_AFFINITY_MANAGED : 0; + mask = NULL; for (i = 0; i < cnt; i++) { if (affinity) { - cpu = cpumask_next(cpu, affinity); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(affinity); - node = cpu_to_node(cpu); - - /* - * For single allocations we use the caller provided - * mask otherwise we use the mask of the target cpu - */ - mask = cnt == 1 ? affinity : cpumask_of(cpu); + node = cpu_to_node(cpumask_first(affinity)); + mask = affinity; + affinity++; } desc = alloc_desc(start + i, node, flags, mask, owner); if (!desc) @@ -481,9 +480,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated * @owner: Owning module (can be NULL) - * @affinity: Optional pointer to an affinity mask which hints where the - * irq descriptors should be allocated and which default - * affinities to use + * @affinity: Optional pointer to an affinity mask array of size @cnt which + * hints where the irq descriptors should be allocated and which + * default affinities to use * * Returns the first irq number or error code */ -- cgit From 44082fd6702fb12020967fd375f8bf6dd7c111bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:50 +0200 Subject: genirq/affinity: Remove old irq spread infrastructure No more users. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-5-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- kernel/irq/affinity.c | 58 --------------------------------------------------- 1 file changed, 58 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 7812fecc6e2f..17f51d63da56 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -152,61 +152,3 @@ int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) put_online_cpus(); return ret; } - -static int get_first_sibling(unsigned int cpu) -{ - unsigned int ret; - - ret = cpumask_first(topology_sibling_cpumask(cpu)); - if (ret < nr_cpu_ids) - return ret; - return cpu; -} - -/* - * Take a map of online CPUs and the number of available interrupt vectors - * and generate an output cpumask suitable for spreading MSI/MSI-X vectors - * so that they are distributed as good as possible around the CPUs. If - * more vectors than CPUs are available we'll map one to each CPU, - * otherwise we map one to the first sibling of each socket. - * - * If there are more vectors than CPUs we will still only have one bit - * set per CPU, but interrupt code will keep on assigning the vectors from - * the start of the bitmap until we run out of vectors. - */ -struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) -{ - struct cpumask *affinity_mask; - unsigned int max_vecs = *nr_vecs; - - if (max_vecs == 1) - return NULL; - - affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!affinity_mask) { - *nr_vecs = 1; - return NULL; - } - - get_online_cpus(); - if (max_vecs >= num_online_cpus()) { - cpumask_copy(affinity_mask, cpu_online_mask); - *nr_vecs = num_online_cpus(); - } else { - unsigned int vecs = 0, cpu; - - for_each_online_cpu(cpu) { - if (cpu == get_first_sibling(cpu)) { - cpumask_set_cpu(cpu, affinity_mask); - vecs++; - } - - if (--max_vecs == 0) - break; - } - *nr_vecs = vecs; - } - put_online_cpus(); - - return affinity_mask; -} -- cgit