summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 12:21:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-05 12:21:47 -0800
commit78f860135433a8bba406352fbdcea8e8980583bf (patch)
tree0b7a9ba320e38b5d6eb0fb982bc2d9449aaf57f3 /drivers/nvme
parent18483190e7a2a6761b67c6824a31adf5b2b7be51 (diff)
parenta324ca9cad4736252c33c1e28cffe1d87f262d03 (diff)
Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq updates from Thomas Gleixner: "The interrupt departement delivers this time: - New infrastructure to manage NMIs on platforms which have a sane NMI delivery, i.e. identifiable NMI vectors instead of a single lump. - Simplification of the interrupt affinity management so drivers don't have to implement ugly loops around the PCI/MSI enablement. - Speedup for interrupt statistics in /proc/stat - Provide a function to retrieve the default irq domain - A new interrupt controller for the Loongson LS1X platform - Affinity support for the SiFive PLIC - Better support for the iMX irqsteer driver - NUMA aware memory allocations for GICv3 - The usual small fixes, improvements and cleanups all over the place" * 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) irqchip/imx-irqsteer: Add multi output interrupts support irqchip/imx-irqsteer: Change to use reg_num instead of irq_group dt-bindings: irq: imx-irqsteer: Add multi output interrupts support dt-binding: irq: imx-irqsteer: Use irq number instead of group number irqchip/brcmstb-l2: Use _irqsave locking variants in non-interrupt code irqchip/gicv3-its: Use NUMA aware memory allocation for ITS tables irqdomain: Allow the default irq domain to be retrieved irqchip/sifive-plic: Implement irq_set_affinity() for SMP host irqchip/sifive-plic: Differentiate between PLIC handler and context irqchip/sifive-plic: Add warning in plic_init() if handler already present irqchip/sifive-plic: Pre-compute context hart base and enable base PCI/MSI: Remove obsolete sanity checks for multiple interrupt sets genirq/affinity: Remove the leftovers of the original set support nvme-pci: Simplify interrupt allocation genirq/affinity: Add new callback for (re)calculating interrupt sets genirq/affinity: Store interrupt sets size in struct irq_affinity genirq/affinity: Code consolidation irqchip/irq-sifive-plic: Check and continue in case of an invalid cpuid. irqchip/i8259: Fix shutdown order by moving syscore_ops registration dt-bindings: interrupt-controller: loongson ls1x intc ...
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/pci.c117
1 files changed, 39 insertions, 78 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7fee665ec45e..e905861186e3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2041,53 +2041,52 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
-/* irq_queues covers admin queue */
-static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
+/*
+ * nirqs is the number of interrupts available for write and read
+ * queues. The core already reserved an interrupt for the admin queue.
+ */
+static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
{
- unsigned int this_w_queues = write_queues;
-
- WARN_ON(!irq_queues);
-
- /*
- * Setup read/write queue split, assign admin queue one independent
- * irq vector if irq_queues is > 1.
- */
- if (irq_queues <= 2) {
- dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
- dev->io_queues[HCTX_TYPE_READ] = 0;
- return;
- }
-
- /*
- * If 'write_queues' is set, ensure it leaves room for at least
- * one read queue and one admin queue
- */
- if (this_w_queues >= irq_queues)
- this_w_queues = irq_queues - 2;
+ struct nvme_dev *dev = affd->priv;
+ unsigned int nr_read_queues;
/*
- * If 'write_queues' is set to zero, reads and writes will share
- * a queue set.
+ * If there is no interupt available for queues, ensure that
+ * the default queue is set to 1. The affinity set size is
+ * also set to one, but the irq core ignores it for this case.
+ *
+ * If only one interrupt is available or 'write_queue' == 0, combine
+ * write and read queues.
+ *
+ * If 'write_queues' > 0, ensure it leaves room for at least one read
+ * queue.
*/
- if (!this_w_queues) {
- dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1;
- dev->io_queues[HCTX_TYPE_READ] = 0;
+ if (!nrirqs) {
+ nrirqs = 1;
+ nr_read_queues = 0;
+ } else if (nrirqs == 1 || !write_queues) {
+ nr_read_queues = 0;
+ } else if (write_queues >= nrirqs) {
+ nr_read_queues = 1;
} else {
- dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
- dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1;
+ nr_read_queues = nrirqs - write_queues;
}
+
+ dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
+ affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
+ dev->io_queues[HCTX_TYPE_READ] = nr_read_queues;
+ affd->set_size[HCTX_TYPE_READ] = nr_read_queues;
+ affd->nr_sets = nr_read_queues ? 2 : 1;
}
static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int irq_sets[2];
struct irq_affinity affd = {
- .pre_vectors = 1,
- .nr_sets = ARRAY_SIZE(irq_sets),
- .sets = irq_sets,
+ .pre_vectors = 1,
+ .calc_sets = nvme_calc_irq_sets,
+ .priv = dev,
};
- int result = 0;
unsigned int irq_queues, this_p_queues;
/*
@@ -2103,51 +2102,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
- /*
- * For irq sets, we have to ask for minvec == maxvec. This passes
- * any reduction back to us, so we can adjust our queue counts and
- * IRQ vector needs.
- */
- do {
- nvme_calc_io_queues(dev, irq_queues);
- irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
- irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
- if (!irq_sets[1])
- affd.nr_sets = 1;
-
- /*
- * If we got a failure and we're down to asking for just
- * 1 + 1 queues, just ask for a single vector. We'll share
- * that between the single IO queue and the admin queue.
- * Otherwise, we assign one independent vector to admin queue.
- */
- if (irq_queues > 1)
- irq_queues = irq_sets[0] + irq_sets[1] + 1;
+ /* Initialize for the single interrupt case */
+ dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
+ dev->io_queues[HCTX_TYPE_READ] = 0;
- result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
- irq_queues,
- PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
-
- /*
- * Need to reduce our vec counts. If we get ENOSPC, the
- * platform should support mulitple vecs, we just need
- * to decrease our ask. If we get EINVAL, the platform
- * likely does not. Back down to ask for just one vector.
- */
- if (result == -ENOSPC) {
- irq_queues--;
- if (!irq_queues)
- return result;
- continue;
- } else if (result == -EINVAL) {
- irq_queues = 1;
- continue;
- } else if (result <= 0)
- return -EIO;
- break;
- } while (1);
-
- return result;
+ return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
+ PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
}
static void nvme_disable_io_queues(struct nvme_dev *dev)
@@ -3024,6 +2984,7 @@ static struct pci_driver nvme_driver = {
static int __init nvme_init(void)
{
+ BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
return pci_register_driver(&nvme_driver);
}