From 8b078c603249239f597dc395ac182667c8e0af9c Mon Sep 17 00:00:00 2001 From: Mathias Koehrer Date: Tue, 9 Aug 2016 10:33:31 +0200 Subject: PCI: Update "pci=resource_alignment" documentation Some uio based PCI drivers, e.g., uio_cif, do not work if the assigned PCI memory resources are not page aligned. By using the kernel option "pci=resource_alignment=@:." it is possible to request page alignment for memory resources of devices. However, this is cumbersome when using several devices, and the bus/slot/func addresses may change if devices are added to or removed from the system. Extend the "pci=resource_alignment" option so we can specify the relevant devices via PCI vendor, device, subvendor, and subdevice IDs. The specification of the devices via IDs is indicated by a leading string "pci:" as argument to "pci=resource_alignment". The format of the specification is pci::[::] Examples: pci=resource_alignment=4096@pci:8086:9c22:103c:198f pci=resource_alignment=pci:8086:9c22 # defaults to PAGE_SIZE align [bhelgaas: changelog, use actual vendor/device IDs in examples] Signed-off-by: Mathias Koehrer Signed-off-by: Bjorn Helgaas --- Documentation/kernel-parameters.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 46c030a49186..a4f4d693e2c1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3032,6 +3032,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. + To specify the alignment for several + instances of a device, the PCI vendor, + device, subvendor, and subdevice may be + specified, e.g., 4096@pci:8086:9c22:103c:198f ecrc= Enable/disable PCIe ECRC (transaction layer end-to-end CRC checking). bios: Use BIOS/firmware settings. This is the -- cgit From 4fe0d154880bb6eb833cbe84fa6f385f400f0b9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Aug 2016 07:11:04 -0700 Subject: PCI: Use positive flags in pci_alloc_irq_vectors() Instead of passing negative flags like PCI_IRQ_NOMSI to prevent use of certain interrupt types, pass positive flags like PCI_IRQ_LEGACY, PCI_IRQ_MSI, etc., to specify the acceptable interrupt types. This is based on a number of pending driver conversions that just happend to be a whole more obvious to read this way, and given that we have no users in the tree yet it can still easily be done. I've also added a PCI_IRQ_ALL_TYPES catchall to keep the case of accepting all interrupt types very simple. [bhelgaas: changelog, fix PCI_IRQ_AFFINITY doc typo, remove mention of PCI_IRQ_NOLEGACY] Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Alexander Gordeev --- Documentation/PCI/MSI-HOWTO.txt | 24 ++++++++++-------------- drivers/pci/msi.c | 15 +++++++-------- include/linux/pci.h | 10 ++++++---- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index c55df2911136..cd9c9f6a7cd9 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -94,14 +94,11 @@ has a requirements for a minimum number of vectors the driver can pass a min_vecs argument set to this limit, and the PCI core will return -ENOSPC if it can't meet the minimum number of vectors. -The flags argument should normally be set to 0, but can be used to pass the -PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support -MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in -case the device does not support legacy interrupt lines. - -By default this function will spread the interrupts around the available -CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY -flag. +The flags argument is used to specify which type of interrupt can be used +by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX). +A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for +any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set, +pci_alloc_irq_vectors() will spread the interrupts around the available CPUs. To get the Linux IRQ numbers passed to request_irq() and free_irq() and the vectors, use the following function: @@ -131,7 +128,7 @@ larger than the number supported by the device it will automatically be capped to the supported limit, so there is no need to query the number of vectors supported beforehand: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES) if (nvec < 0) goto out_err; @@ -140,7 +137,7 @@ interrupts it can request a particular number of interrupts by passing that number to pci_alloc_irq_vectors() function as both 'min_vecs' and 'max_vecs' parameters: - ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0); + ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; @@ -148,15 +145,14 @@ The most notorious example of the request type described above is enabling the single MSI mode for a device. It could be done by passing two 1s as 'min_vecs' and 'max_vecs': - ret = pci_alloc_irq_vectors(pdev, 1, 1, 0); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; Some devices might not support using legacy line interrupts, in which case -the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform -can't provide MSI or MSI-X interrupts: +the driver can specify that only MSI or MSI-X is acceptable: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (nvec < 0) goto out_err; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a02981efdad5..9233e7f62f47 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1069,7 +1069,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, nvec = maxvec; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1105,7 +1105,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, **/ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) { - return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY); + return __pci_enable_msi_range(dev, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msi_range); @@ -1120,7 +1120,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ERANGE; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1160,8 +1160,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, - PCI_IRQ_NOAFFINITY); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1187,21 +1186,21 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, { int vecs = -ENOSPC; - if (!(flags & PCI_IRQ_NOMSIX)) { + if (flags & PCI_IRQ_MSIX) { vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } - if (!(flags & PCI_IRQ_NOMSI)) { + if (flags & PCI_IRQ_MSI) { vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } /* use legacy irq if allowed */ - if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1) + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) return 1; return vecs; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a980340f..fbc1fa625c3e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1251,10 +1251,12 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); int pci_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags); -#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ -#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ -#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ -#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */ +#define PCI_IRQ_LEGACY (1 << 0) /* allow legacy interrupts */ +#define PCI_IRQ_MSI (1 << 1) /* allow MSI interrupts */ +#define PCI_IRQ_MSIX (1 << 2) /* allow MSI-X interrupts */ +#define PCI_IRQ_AFFINITY (1 << 3) /* auto-assign affinity */ +#define PCI_IRQ_ALL_TYPES \ + (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) /* kmem_cache style wrapper around pci_alloc_consistent() */ -- cgit From c68db51589052ef9adee4dd699462681737849a2 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 29 Aug 2016 11:19:01 -0600 Subject: x86/PCI: VMD: Allocate IRQ lists with correct MSI-X count To reduce the amount of memory required for IRQ lists, only allocate their space after calling pci_msix_enable_range() which may reduce the number of MSI-X vectors allocated. Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch --- arch/x86/pci/vmd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index b814ca675131..7a8538da6289 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -671,11 +671,6 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) if (vmd->msix_count < 0) return -ENODEV; - vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), - GFP_KERNEL); - if (!vmd->irqs) - return -ENOMEM; - vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->msix_entries), GFP_KERNEL); @@ -689,6 +684,11 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) if (vmd->msix_count < 0) return vmd->msix_count; + vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), + GFP_KERNEL); + if (!vmd->irqs) + return -ENOMEM; + for (i = 0; i < vmd->msix_count; i++) { INIT_LIST_HEAD(&vmd->irqs[i].irq_list); vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; -- cgit From 75de9b4cd3119eaddce6616776f62a5b30fb0fae Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 29 Aug 2016 11:19:02 -0600 Subject: x86/PCI: VMD: Convert to use pci_alloc_irq_vectors() API Convert to use the pci_alloc_irq_vectors() API. Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch --- arch/x86/pci/vmd.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index 7a8538da6289..f576dcba2175 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -76,7 +76,6 @@ struct vmd_dev { char __iomem *cfgbar; int msix_count; - struct msix_entry *msix_entries; struct vmd_irq_list *irqs; struct pci_sysdata sysdata; @@ -671,16 +670,8 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) if (vmd->msix_count < 0) return -ENODEV; - vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, - sizeof(*vmd->msix_entries), - GFP_KERNEL); - if (!vmd->msix_entries) - return -ENOMEM; - for (i = 0; i < vmd->msix_count; i++) - vmd->msix_entries[i].entry = i; - - vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1, - vmd->msix_count); + vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, + PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); if (vmd->msix_count < 0) return vmd->msix_count; @@ -691,7 +682,7 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) for (i = 0; i < vmd->msix_count; i++) { INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; + vmd->irqs[i].vmd_vector = pci_irq_vector(dev, i); vmd->irqs[i].index = i; err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, -- cgit From 53db86adc2c9b3ce0454bece4487e8eca96e2614 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 2 Sep 2016 11:53:04 -0600 Subject: x86/PCI: VMD: Eliminate vmd_vector member from list type Eliminate unused vmd and vector members from vmd_irq_list and discover the vector using pci_irq_vector(). Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas Acked-by: Keith Busch --- arch/x86/pci/vmd.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index f576dcba2175..61a97ff36514 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -56,15 +56,12 @@ struct vmd_irq { /** * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector * @irq_list: the list of irq's the VMD one demuxes to. - * @vmd_vector: the h/w IRQ assigned to the VMD. * @index: index into the VMD MSI-X table; used for message routing. * @count: number of child IRQs assigned to this vector; used to track * sharing. */ struct vmd_irq_list { struct list_head irq_list; - struct vmd_dev *vmd; - unsigned int vmd_vector; unsigned int index; unsigned int count; }; @@ -201,8 +198,10 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, vmdirq->irq = vmd_next_irq(vmd, desc); vmdirq->virq = virq; - irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, - vmdirq, handle_untracked_irq, vmd, NULL); + irq_domain_set_info(domain, virq, + pci_irq_vector(vmd->dev, vmdirq->irq->index), + info->chip, vmdirq, + handle_untracked_irq, vmd, NULL); return 0; } @@ -682,10 +681,8 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) for (i = 0; i < vmd->msix_count; i++) { INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - vmd->irqs[i].vmd_vector = pci_irq_vector(dev, i); vmd->irqs[i].index = i; - - err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, + err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), vmd_irq, 0, "vmd", &vmd->irqs[i]); if (err) return err; -- cgit From b31822277abcd7c83d1c1c0af876da9ccdf3b7d6 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 2 Sep 2016 11:53:05 -0600 Subject: x86/PCI: VMD: Eliminate index member from IRQ list Use math to discover the IRQ list index number relative to the IRQ list head. Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas Acked-by: Keith Busch --- arch/x86/pci/vmd.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index 61a97ff36514..e785907acb79 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -56,13 +56,11 @@ struct vmd_irq { /** * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector * @irq_list: the list of irq's the VMD one demuxes to. - * @index: index into the VMD MSI-X table; used for message routing. * @count: number of child IRQs assigned to this vector; used to track * sharing. */ struct vmd_irq_list { struct list_head irq_list; - unsigned int index; unsigned int count; }; @@ -91,6 +89,12 @@ static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) return container_of(bus->sysdata, struct vmd_dev, sysdata); } +static inline unsigned int index_from_irqs(struct vmd_dev *vmd, + struct vmd_irq_list *irqs) +{ + return irqs - vmd->irqs; +} + /* * Drivers managing a device in a VMD domain allocate their own IRQs as before, * but the MSI entry for the hardware it's driving will be programmed with a @@ -103,9 +107,11 @@ static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { struct vmd_irq *vmdirq = data->chip_data; struct vmd_irq_list *irq = vmdirq->irq; + struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index); + msg->address_lo = MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq)); msg->data = 0; } @@ -190,6 +196,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, struct msi_desc *desc = arg->desc; struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); + unsigned int index, vector; if (!vmdirq) return -ENOMEM; @@ -197,10 +204,10 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, INIT_LIST_HEAD(&vmdirq->node); vmdirq->irq = vmd_next_irq(vmd, desc); vmdirq->virq = virq; + index = index_from_irqs(vmd, vmdirq->irq); + vector = pci_irq_vector(vmd->dev, index); - irq_domain_set_info(domain, virq, - pci_irq_vector(vmd->dev, vmdirq->irq->index), - info->chip, vmdirq, + irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, handle_untracked_irq, vmd, NULL); return 0; } @@ -681,7 +688,6 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) for (i = 0; i < vmd->msix_count; i++) { INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - vmd->irqs[i].index = i; err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), vmd_irq, 0, "vmd", &vmd->irqs[i]); if (err) -- cgit From ee6ee49fd09fa17c92aadf07961d0ff406fceab8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 4 Aug 2016 16:09:09 -0600 Subject: x86/PCI: VMD: Synchronize with RCU freeing MSI IRQ descs Fix a potential race when disabling MSI/MSI-X on a VMD domain device. If the VMD interrupt service is running, it may see a disabled IRQ. We can synchronize RCU just before freeing the MSI descriptor. This is safe since the irq_desc lock isn't held, and the descriptor is valid even though it is disabled. After vmd_msi_free(), though, the handler is reinitialized to handle_bad_irq(), so we can't let the VMD ISR's list iteration see the disabled IRQ after this. Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by Jon Derrick: --- arch/x86/pci/vmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index e785907acb79..57058520f219 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -218,6 +218,8 @@ static void vmd_msi_free(struct irq_domain *domain, struct vmd_irq *vmdirq = irq_get_chip_data(virq); unsigned long flags; + synchronize_rcu(); + /* XXX: Potential optimization to rebalance */ raw_spin_lock_irqsave(&list_lock, flags); vmdirq->irq->count--; -- cgit From 181ffd19cc9898f795646bf9a897072a419a51ed Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 4 Oct 2016 12:26:37 -0500 Subject: x86/PCI: VMD: Move VMD driver to drivers/pci/host Move the driver source and Kconfig to the PCI host bridge drivers directory and move the config option to a more appropriate sub-menu instead of occupying the top-level location. Update the Kconfig option with the X86_64 dependency that was implicitly included from the previous location, and add information about the module name when built as a loadable module. Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas CC: Jon Derrick --- arch/x86/Kconfig | 13 - arch/x86/pci/Makefile | 2 - arch/x86/pci/vmd.c | 761 ---------------------------------------------- drivers/pci/host/Kconfig | 16 + drivers/pci/host/Makefile | 1 + drivers/pci/host/vmd.c | 761 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 778 insertions(+), 776 deletions(-) delete mode 100644 arch/x86/pci/vmd.c create mode 100644 drivers/pci/host/vmd.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c6e7471b732..c32083808f27 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2744,19 +2744,6 @@ config PMC_ATOM def_bool y depends on PCI -config VMD - depends on PCI_MSI - tristate "Volume Management Device Driver" - default N - ---help--- - Adds support for the Intel Volume Management Device (VMD). VMD is a - secondary PCI host bridge that allows PCI Express root ports, - and devices attached to them, to be removed from the default - PCI domain and placed within the VMD domain. This provides - more bus resources than are otherwise possible with a - single domain. If you know your system provides one of these and - has devices attached to it, say Y; if you are not sure, say N. - source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 97062a635b77..5c6fc3577a49 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -23,8 +23,6 @@ obj-y += bus_numa.o obj-$(CONFIG_AMD_NB) += amd_bus.o obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o -obj-$(CONFIG_VMD) += vmd.o - ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c deleted file mode 100644 index 57058520f219..000000000000 --- a/arch/x86/pci/vmd.c +++ /dev/null @@ -1,761 +0,0 @@ -/* - * Volume Management Device driver - * Copyright (c) 2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define VMD_CFGBAR 0 -#define VMD_MEMBAR1 2 -#define VMD_MEMBAR2 4 - -/* - * Lock for manipulating VMD IRQ lists. - */ -static DEFINE_RAW_SPINLOCK(list_lock); - -/** - * struct vmd_irq - private data to map driver IRQ to the VMD shared vector - * @node: list item for parent traversal. - * @rcu: RCU callback item for freeing. - * @irq: back pointer to parent. - * @virq: the virtual IRQ value provided to the requesting driver. - * - * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to - * a VMD IRQ using this structure. - */ -struct vmd_irq { - struct list_head node; - struct rcu_head rcu; - struct vmd_irq_list *irq; - unsigned int virq; -}; - -/** - * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector - * @irq_list: the list of irq's the VMD one demuxes to. - * @count: number of child IRQs assigned to this vector; used to track - * sharing. - */ -struct vmd_irq_list { - struct list_head irq_list; - unsigned int count; -}; - -struct vmd_dev { - struct pci_dev *dev; - - spinlock_t cfg_lock; - char __iomem *cfgbar; - - int msix_count; - struct vmd_irq_list *irqs; - - struct pci_sysdata sysdata; - struct resource resources[3]; - struct irq_domain *irq_domain; - struct pci_bus *bus; - -#ifdef CONFIG_X86_DEV_DMA_OPS - struct dma_map_ops dma_ops; - struct dma_domain dma_domain; -#endif -}; - -static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) -{ - return container_of(bus->sysdata, struct vmd_dev, sysdata); -} - -static inline unsigned int index_from_irqs(struct vmd_dev *vmd, - struct vmd_irq_list *irqs) -{ - return irqs - vmd->irqs; -} - -/* - * Drivers managing a device in a VMD domain allocate their own IRQs as before, - * but the MSI entry for the hardware it's driving will be programmed with a - * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its - * domain into one of its own, and the VMD driver de-muxes these for the - * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations - * and irq_chip to set this up. - */ -static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) -{ - struct vmd_irq *vmdirq = data->chip_data; - struct vmd_irq_list *irq = vmdirq->irq; - struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO | - MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq)); - msg->data = 0; -} - -/* - * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. - */ -static void vmd_irq_enable(struct irq_data *data) -{ - struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; - - raw_spin_lock_irqsave(&list_lock, flags); - list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); - raw_spin_unlock_irqrestore(&list_lock, flags); - - data->chip->irq_unmask(data); -} - -static void vmd_irq_disable(struct irq_data *data) -{ - struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; - - data->chip->irq_mask(data); - - raw_spin_lock_irqsave(&list_lock, flags); - list_del_rcu(&vmdirq->node); - INIT_LIST_HEAD_RCU(&vmdirq->node); - raw_spin_unlock_irqrestore(&list_lock, flags); -} - -/* - * XXX: Stubbed until we develop acceptable way to not create conflicts with - * other devices sharing the same vector. - */ -static int vmd_irq_set_affinity(struct irq_data *data, - const struct cpumask *dest, bool force) -{ - return -EINVAL; -} - -static struct irq_chip vmd_msi_controller = { - .name = "VMD-MSI", - .irq_enable = vmd_irq_enable, - .irq_disable = vmd_irq_disable, - .irq_compose_msi_msg = vmd_compose_msi_msg, - .irq_set_affinity = vmd_irq_set_affinity, -}; - -static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, - msi_alloc_info_t *arg) -{ - return 0; -} - -/* - * XXX: We can be even smarter selecting the best IRQ once we solve the - * affinity problem. - */ -static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) -{ - int i, best = 1; - unsigned long flags; - - if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) - return &vmd->irqs[0]; - - raw_spin_lock_irqsave(&list_lock, flags); - for (i = 1; i < vmd->msix_count; i++) - if (vmd->irqs[i].count < vmd->irqs[best].count) - best = i; - vmd->irqs[best].count++; - raw_spin_unlock_irqrestore(&list_lock, flags); - - return &vmd->irqs[best]; -} - -static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, - unsigned int virq, irq_hw_number_t hwirq, - msi_alloc_info_t *arg) -{ - struct msi_desc *desc = arg->desc; - struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); - struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); - unsigned int index, vector; - - if (!vmdirq) - return -ENOMEM; - - INIT_LIST_HEAD(&vmdirq->node); - vmdirq->irq = vmd_next_irq(vmd, desc); - vmdirq->virq = virq; - index = index_from_irqs(vmd, vmdirq->irq); - vector = pci_irq_vector(vmd->dev, index); - - irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, - handle_untracked_irq, vmd, NULL); - return 0; -} - -static void vmd_msi_free(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - struct vmd_irq *vmdirq = irq_get_chip_data(virq); - unsigned long flags; - - synchronize_rcu(); - - /* XXX: Potential optimization to rebalance */ - raw_spin_lock_irqsave(&list_lock, flags); - vmdirq->irq->count--; - raw_spin_unlock_irqrestore(&list_lock, flags); - - kfree_rcu(vmdirq, rcu); -} - -static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); - - if (nvec > vmd->msix_count) - return vmd->msix_count; - - memset(arg, 0, sizeof(*arg)); - return 0; -} - -static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) -{ - arg->desc = desc; -} - -static struct msi_domain_ops vmd_msi_domain_ops = { - .get_hwirq = vmd_get_hwirq, - .msi_init = vmd_msi_init, - .msi_free = vmd_msi_free, - .msi_prepare = vmd_msi_prepare, - .set_desc = vmd_set_desc, -}; - -static struct msi_domain_info vmd_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .ops = &vmd_msi_domain_ops, - .chip = &vmd_msi_controller, -}; - -#ifdef CONFIG_X86_DEV_DMA_OPS -/* - * VMD replaces the requester ID with its own. DMA mappings for devices in a - * VMD domain need to be mapped for the VMD, not the device requiring - * the mapping. - */ -static struct device *to_vmd_dev(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); - - return &vmd->dev->dev; -} - -static struct dma_map_ops *vmd_dma_ops(struct device *dev) -{ - return get_dma_ops(to_vmd_dev(dev)); -} - -static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, - gfp_t flag, unsigned long attrs) -{ - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, - attrs); -} - -static void vmd_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t addr, unsigned long attrs) -{ - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, - attrs); -} - -static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, - size, attrs); -} - -static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, - addr, size, attrs); -} - -static dma_addr_t vmd_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, - dir, attrs); -} - -static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); -} - -static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); -} - -static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, - dir); -} - -static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); -} - -static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); -} - -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); -} - -static int vmd_dma_supported(struct device *dev, u64 mask) -{ - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); -} - -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK -static u64 vmd_get_required_mask(struct device *dev) -{ - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); -} -#endif - -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) -{ - struct dma_domain *domain = &vmd->dma_domain; - - if (get_dma_ops(&vmd->dev->dev)) - del_dma_domain(domain); -} - -#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ - do { \ - if (source->fn) \ - dest->fn = vmd_##fn; \ - } while (0) - -static void vmd_setup_dma_ops(struct vmd_dev *vmd) -{ - const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); - struct dma_map_ops *dest = &vmd->dma_ops; - struct dma_domain *domain = &vmd->dma_domain; - - domain->domain_nr = vmd->sysdata.domain; - domain->dma_ops = dest; - - if (!source) - return; - ASSIGN_VMD_DMA_OPS(source, dest, alloc); - ASSIGN_VMD_DMA_OPS(source, dest, free); - ASSIGN_VMD_DMA_OPS(source, dest, mmap); - ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); - ASSIGN_VMD_DMA_OPS(source, dest, map_page); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); - ASSIGN_VMD_DMA_OPS(source, dest, map_sg); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); - ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK - ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); -#endif - add_dma_domain(domain); -} -#undef ASSIGN_VMD_DMA_OPS -#else -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} -static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} -#endif - -static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, - unsigned int devfn, int reg, int len) -{ - char __iomem *addr = vmd->cfgbar + - (bus->number << 20) + (devfn << 12) + reg; - - if ((addr - vmd->cfgbar) + len >= - resource_size(&vmd->dev->resource[VMD_CFGBAR])) - return NULL; - - return addr; -} - -/* - * CPU may deadlock if config space is not serialized on some versions of this - * hardware, so all config space access is done under a spinlock. - */ -static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, - int len, u32 *value) -{ - struct vmd_dev *vmd = vmd_from_bus(bus); - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; - - if (!addr) - return -EFAULT; - - spin_lock_irqsave(&vmd->cfg_lock, flags); - switch (len) { - case 1: - *value = readb(addr); - break; - case 2: - *value = readw(addr); - break; - case 4: - *value = readl(addr); - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; -} - -/* - * VMD h/w converts non-posted config writes to posted memory writes. The - * read-back in this function forces the completion so it returns only after - * the config space was written, as expected. - */ -static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, - int len, u32 value) -{ - struct vmd_dev *vmd = vmd_from_bus(bus); - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; - - if (!addr) - return -EFAULT; - - spin_lock_irqsave(&vmd->cfg_lock, flags); - switch (len) { - case 1: - writeb(value, addr); - readb(addr); - break; - case 2: - writew(value, addr); - readw(addr); - break; - case 4: - writel(value, addr); - readl(addr); - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; -} - -static struct pci_ops vmd_ops = { - .read = vmd_pci_read, - .write = vmd_pci_write, -}; - -static void vmd_attach_resources(struct vmd_dev *vmd) -{ - vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; - vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; -} - -static void vmd_detach_resources(struct vmd_dev *vmd) -{ - vmd->dev->resource[VMD_MEMBAR1].child = NULL; - vmd->dev->resource[VMD_MEMBAR2].child = NULL; -} - -/* - * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. - */ -static int vmd_find_free_domain(void) -{ - int domain = 0xffff; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus)) != NULL) - domain = max_t(int, domain, pci_domain_nr(bus)); - return domain + 1; -} - -static int vmd_enable_domain(struct vmd_dev *vmd) -{ - struct pci_sysdata *sd = &vmd->sysdata; - struct resource *res; - u32 upper_bits; - unsigned long flags; - LIST_HEAD(resources); - - res = &vmd->dev->resource[VMD_CFGBAR]; - vmd->resources[0] = (struct resource) { - .name = "VMD CFGBAR", - .start = 0, - .end = (resource_size(res) >> 20) - 1, - .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, - }; - - /* - * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can - * put 32-bit resources in the window. - * - * There's no hardware reason why a 64-bit window *couldn't* - * contain a 32-bit resource, but pbus_size_mem() computes the - * bridge window size assuming a 64-bit window will contain no - * 32-bit resources. __pci_assign_resource() enforces that - * artificial restriction to make sure everything will fit. - * - * The only way we could use a 64-bit non-prefechable MEMBAR is - * if its address is <4GB so that we can convert it to a 32-bit - * resource. To be visible to the host OS, all VMD endpoints must - * be initially configured by platform BIOS, which includes setting - * up these resources. We can assume the device is configured - * according to the platform needs. - */ - res = &vmd->dev->resource[VMD_MEMBAR1]; - upper_bits = upper_32_bits(res->end); - flags = res->flags & ~IORESOURCE_SIZEALIGN; - if (!upper_bits) - flags &= ~IORESOURCE_MEM_64; - vmd->resources[1] = (struct resource) { - .name = "VMD MEMBAR1", - .start = res->start, - .end = res->end, - .flags = flags, - .parent = res, - }; - - res = &vmd->dev->resource[VMD_MEMBAR2]; - upper_bits = upper_32_bits(res->end); - flags = res->flags & ~IORESOURCE_SIZEALIGN; - if (!upper_bits) - flags &= ~IORESOURCE_MEM_64; - vmd->resources[2] = (struct resource) { - .name = "VMD MEMBAR2", - .start = res->start + 0x2000, - .end = res->end, - .flags = flags, - .parent = res, - }; - - sd->domain = vmd_find_free_domain(); - if (sd->domain < 0) - return sd->domain; - - sd->node = pcibus_to_node(vmd->dev->bus); - - vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, - x86_vector_domain); - if (!vmd->irq_domain) - return -ENODEV; - - pci_add_resource(&resources, &vmd->resources[0]); - pci_add_resource(&resources, &vmd->resources[1]); - pci_add_resource(&resources, &vmd->resources[2]); - vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, - &resources); - if (!vmd->bus) { - pci_free_resource_list(&resources); - irq_domain_remove(vmd->irq_domain); - return -ENODEV; - } - - vmd_attach_resources(vmd); - vmd_setup_dma_ops(vmd); - dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); - pci_rescan_bus(vmd->bus); - - WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, - "domain"), "Can't create symlink to domain\n"); - return 0; -} - -static irqreturn_t vmd_irq(int irq, void *data) -{ - struct vmd_irq_list *irqs = data; - struct vmd_irq *vmdirq; - - rcu_read_lock(); - list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) - generic_handle_irq(vmdirq->virq); - rcu_read_unlock(); - - return IRQ_HANDLED; -} - -static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct vmd_dev *vmd; - int i, err; - - if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) - return -ENOMEM; - - vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); - if (!vmd) - return -ENOMEM; - - vmd->dev = dev; - err = pcim_enable_device(dev); - if (err < 0) - return err; - - vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); - if (!vmd->cfgbar) - return -ENOMEM; - - pci_set_master(dev); - if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) - return -ENODEV; - - vmd->msix_count = pci_msix_vec_count(dev); - if (vmd->msix_count < 0) - return -ENODEV; - - vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); - if (vmd->msix_count < 0) - return vmd->msix_count; - - vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), - GFP_KERNEL); - if (!vmd->irqs) - return -ENOMEM; - - for (i = 0; i < vmd->msix_count; i++) { - INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), - vmd_irq, 0, "vmd", &vmd->irqs[i]); - if (err) - return err; - } - - spin_lock_init(&vmd->cfg_lock); - pci_set_drvdata(dev, vmd); - err = vmd_enable_domain(vmd); - if (err) - return err; - - dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", - vmd->sysdata.domain); - return 0; -} - -static void vmd_remove(struct pci_dev *dev) -{ - struct vmd_dev *vmd = pci_get_drvdata(dev); - - vmd_detach_resources(vmd); - pci_set_drvdata(dev, NULL); - sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); - pci_stop_root_bus(vmd->bus); - pci_remove_root_bus(vmd->bus); - vmd_teardown_dma_ops(vmd); - irq_domain_remove(vmd->irq_domain); -} - -#ifdef CONFIG_PM -static int vmd_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - pci_save_state(pdev); - return 0; -} - -static int vmd_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - pci_restore_state(pdev); - return 0; -} -#endif -static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); - -static const struct pci_device_id vmd_ids[] = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, - {0,} -}; -MODULE_DEVICE_TABLE(pci, vmd_ids); - -static struct pci_driver vmd_drv = { - .name = "vmd", - .id_table = vmd_ids, - .probe = vmd_probe, - .remove = vmd_remove, - .driver = { - .pm = &vmd_dev_pm_ops, - }, -}; -module_pci_driver(vmd_drv); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL v2"); -MODULE_VERSION("0.6"); diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 9b485d873b0d..93865eb2c4c8 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -274,4 +274,20 @@ config PCIE_ARTPEC6 Say Y here to enable PCIe controller support on Axis ARTPEC-6 SoCs. This PCIe controller uses the DesignWare core. +config VMD + depends on PCI_MSI && X86_64 + tristate "Intel Volume Management Device Driver" + default N + ---help--- + Adds support for the Intel Volume Management Device (VMD). VMD is a + secondary PCI host bridge that allows PCI Express root ports, + and devices attached to them, to be removed from the default + PCI domain and placed within the VMD domain. This provides + more bus resources than are otherwise possible with a + single domain. If you know your system provides one of these and + has devices attached to it, say Y; if you are not sure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmd. + endmenu diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile index 88434101e4c4..afea1c61e45a 100644 --- a/drivers/pci/host/Makefile +++ b/drivers/pci/host/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o +obj-$(CONFIG_VMD) += vmd.o diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c new file mode 100644 index 000000000000..57058520f219 --- /dev/null +++ b/drivers/pci/host/vmd.c @@ -0,0 +1,761 @@ +/* + * Volume Management Device driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define VMD_CFGBAR 0 +#define VMD_MEMBAR1 2 +#define VMD_MEMBAR2 4 + +/* + * Lock for manipulating VMD IRQ lists. + */ +static DEFINE_RAW_SPINLOCK(list_lock); + +/** + * struct vmd_irq - private data to map driver IRQ to the VMD shared vector + * @node: list item for parent traversal. + * @rcu: RCU callback item for freeing. + * @irq: back pointer to parent. + * @virq: the virtual IRQ value provided to the requesting driver. + * + * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to + * a VMD IRQ using this structure. + */ +struct vmd_irq { + struct list_head node; + struct rcu_head rcu; + struct vmd_irq_list *irq; + unsigned int virq; +}; + +/** + * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector + * @irq_list: the list of irq's the VMD one demuxes to. + * @count: number of child IRQs assigned to this vector; used to track + * sharing. + */ +struct vmd_irq_list { + struct list_head irq_list; + unsigned int count; +}; + +struct vmd_dev { + struct pci_dev *dev; + + spinlock_t cfg_lock; + char __iomem *cfgbar; + + int msix_count; + struct vmd_irq_list *irqs; + + struct pci_sysdata sysdata; + struct resource resources[3]; + struct irq_domain *irq_domain; + struct pci_bus *bus; + +#ifdef CONFIG_X86_DEV_DMA_OPS + struct dma_map_ops dma_ops; + struct dma_domain dma_domain; +#endif +}; + +static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) +{ + return container_of(bus->sysdata, struct vmd_dev, sysdata); +} + +static inline unsigned int index_from_irqs(struct vmd_dev *vmd, + struct vmd_irq_list *irqs) +{ + return irqs - vmd->irqs; +} + +/* + * Drivers managing a device in a VMD domain allocate their own IRQs as before, + * but the MSI entry for the hardware it's driving will be programmed with a + * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its + * domain into one of its own, and the VMD driver de-muxes these for the + * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations + * and irq_chip to set this up. + */ +static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct vmd_irq *vmdirq = data->chip_data; + struct vmd_irq_list *irq = vmdirq->irq; + struct vmd_dev *vmd = irq_data_get_irq_handler_data(data); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | + MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq)); + msg->data = 0; +} + +/* + * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. + */ +static void vmd_irq_enable(struct irq_data *data) +{ + struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&list_lock, flags); + list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + raw_spin_unlock_irqrestore(&list_lock, flags); + + data->chip->irq_unmask(data); +} + +static void vmd_irq_disable(struct irq_data *data) +{ + struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; + + data->chip->irq_mask(data); + + raw_spin_lock_irqsave(&list_lock, flags); + list_del_rcu(&vmdirq->node); + INIT_LIST_HEAD_RCU(&vmdirq->node); + raw_spin_unlock_irqrestore(&list_lock, flags); +} + +/* + * XXX: Stubbed until we develop acceptable way to not create conflicts with + * other devices sharing the same vector. + */ +static int vmd_irq_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force) +{ + return -EINVAL; +} + +static struct irq_chip vmd_msi_controller = { + .name = "VMD-MSI", + .irq_enable = vmd_irq_enable, + .irq_disable = vmd_irq_disable, + .irq_compose_msi_msg = vmd_compose_msi_msg, + .irq_set_affinity = vmd_irq_set_affinity, +}; + +static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return 0; +} + +/* + * XXX: We can be even smarter selecting the best IRQ once we solve the + * affinity problem. + */ +static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) +{ + int i, best = 1; + unsigned long flags; + + if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) + return &vmd->irqs[0]; + + raw_spin_lock_irqsave(&list_lock, flags); + for (i = 1; i < vmd->msix_count; i++) + if (vmd->irqs[i].count < vmd->irqs[best].count) + best = i; + vmd->irqs[best].count++; + raw_spin_unlock_irqrestore(&list_lock, flags); + + return &vmd->irqs[best]; +} + +static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, + unsigned int virq, irq_hw_number_t hwirq, + msi_alloc_info_t *arg) +{ + struct msi_desc *desc = arg->desc; + struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); + struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); + unsigned int index, vector; + + if (!vmdirq) + return -ENOMEM; + + INIT_LIST_HEAD(&vmdirq->node); + vmdirq->irq = vmd_next_irq(vmd, desc); + vmdirq->virq = virq; + index = index_from_irqs(vmd, vmdirq->irq); + vector = pci_irq_vector(vmd->dev, index); + + irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, + handle_untracked_irq, vmd, NULL); + return 0; +} + +static void vmd_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + struct vmd_irq *vmdirq = irq_get_chip_data(virq); + unsigned long flags; + + synchronize_rcu(); + + /* XXX: Potential optimization to rebalance */ + raw_spin_lock_irqsave(&list_lock, flags); + vmdirq->irq->count--; + raw_spin_unlock_irqrestore(&list_lock, flags); + + kfree_rcu(vmdirq, rcu); +} + +static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + + if (nvec > vmd->msix_count) + return vmd->msix_count; + + memset(arg, 0, sizeof(*arg)); + return 0; +} + +static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; +} + +static struct msi_domain_ops vmd_msi_domain_ops = { + .get_hwirq = vmd_get_hwirq, + .msi_init = vmd_msi_init, + .msi_free = vmd_msi_free, + .msi_prepare = vmd_msi_prepare, + .set_desc = vmd_set_desc, +}; + +static struct msi_domain_info vmd_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &vmd_msi_domain_ops, + .chip = &vmd_msi_controller, +}; + +#ifdef CONFIG_X86_DEV_DMA_OPS +/* + * VMD replaces the requester ID with its own. DMA mappings for devices in a + * VMD domain need to be mapped for the VMD, not the device requiring + * the mapping. + */ +static struct device *to_vmd_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + + return &vmd->dev->dev; +} + +static struct dma_map_ops *vmd_dma_ops(struct device *dev) +{ + return get_dma_ops(to_vmd_dev(dev)); +} + +static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, + gfp_t flag, unsigned long attrs) +{ + return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, + attrs); +} + +static void vmd_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t addr, unsigned long attrs) +{ + return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, + attrs); +} + +static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t addr, size_t size, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, + size, attrs); +} + +static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t addr, size_t size, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, + addr, size, attrs); +} + +static dma_addr_t vmd_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, + dir, attrs); +} + +static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); +} + +static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); +} + +static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); +} + +static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); +} + +static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, + dir); +} + +static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); +} + +static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); +} + +static int vmd_mapping_error(struct device *dev, dma_addr_t addr) +{ + return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); +} + +static int vmd_dma_supported(struct device *dev, u64 mask) +{ + return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); +} + +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK +static u64 vmd_get_required_mask(struct device *dev) +{ + return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); +} +#endif + +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) +{ + struct dma_domain *domain = &vmd->dma_domain; + + if (get_dma_ops(&vmd->dev->dev)) + del_dma_domain(domain); +} + +#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ + do { \ + if (source->fn) \ + dest->fn = vmd_##fn; \ + } while (0) + +static void vmd_setup_dma_ops(struct vmd_dev *vmd) +{ + const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); + struct dma_map_ops *dest = &vmd->dma_ops; + struct dma_domain *domain = &vmd->dma_domain; + + domain->domain_nr = vmd->sysdata.domain; + domain->dma_ops = dest; + + if (!source) + return; + ASSIGN_VMD_DMA_OPS(source, dest, alloc); + ASSIGN_VMD_DMA_OPS(source, dest, free); + ASSIGN_VMD_DMA_OPS(source, dest, mmap); + ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); + ASSIGN_VMD_DMA_OPS(source, dest, map_page); + ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); + ASSIGN_VMD_DMA_OPS(source, dest, map_sg); + ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); + ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); + ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK + ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); +#endif + add_dma_domain(domain); +} +#undef ASSIGN_VMD_DMA_OPS +#else +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} +static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} +#endif + +static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, + unsigned int devfn, int reg, int len) +{ + char __iomem *addr = vmd->cfgbar + + (bus->number << 20) + (devfn << 12) + reg; + + if ((addr - vmd->cfgbar) + len >= + resource_size(&vmd->dev->resource[VMD_CFGBAR])) + return NULL; + + return addr; +} + +/* + * CPU may deadlock if config space is not serialized on some versions of this + * hardware, so all config space access is done under a spinlock. + */ +static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, + int len, u32 *value) +{ + struct vmd_dev *vmd = vmd_from_bus(bus); + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); + unsigned long flags; + int ret = 0; + + if (!addr) + return -EFAULT; + + spin_lock_irqsave(&vmd->cfg_lock, flags); + switch (len) { + case 1: + *value = readb(addr); + break; + case 2: + *value = readw(addr); + break; + case 4: + *value = readl(addr); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&vmd->cfg_lock, flags); + return ret; +} + +/* + * VMD h/w converts non-posted config writes to posted memory writes. The + * read-back in this function forces the completion so it returns only after + * the config space was written, as expected. + */ +static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, + int len, u32 value) +{ + struct vmd_dev *vmd = vmd_from_bus(bus); + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); + unsigned long flags; + int ret = 0; + + if (!addr) + return -EFAULT; + + spin_lock_irqsave(&vmd->cfg_lock, flags); + switch (len) { + case 1: + writeb(value, addr); + readb(addr); + break; + case 2: + writew(value, addr); + readw(addr); + break; + case 4: + writel(value, addr); + readl(addr); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&vmd->cfg_lock, flags); + return ret; +} + +static struct pci_ops vmd_ops = { + .read = vmd_pci_read, + .write = vmd_pci_write, +}; + +static void vmd_attach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; + vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; +} + +static void vmd_detach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = NULL; + vmd->dev->resource[VMD_MEMBAR2].child = NULL; +} + +/* + * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. + */ +static int vmd_find_free_domain(void) +{ + int domain = 0xffff; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus)) != NULL) + domain = max_t(int, domain, pci_domain_nr(bus)); + return domain + 1; +} + +static int vmd_enable_domain(struct vmd_dev *vmd) +{ + struct pci_sysdata *sd = &vmd->sysdata; + struct resource *res; + u32 upper_bits; + unsigned long flags; + LIST_HEAD(resources); + + res = &vmd->dev->resource[VMD_CFGBAR]; + vmd->resources[0] = (struct resource) { + .name = "VMD CFGBAR", + .start = 0, + .end = (resource_size(res) >> 20) - 1, + .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, + }; + + /* + * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can + * put 32-bit resources in the window. + * + * There's no hardware reason why a 64-bit window *couldn't* + * contain a 32-bit resource, but pbus_size_mem() computes the + * bridge window size assuming a 64-bit window will contain no + * 32-bit resources. __pci_assign_resource() enforces that + * artificial restriction to make sure everything will fit. + * + * The only way we could use a 64-bit non-prefechable MEMBAR is + * if its address is <4GB so that we can convert it to a 32-bit + * resource. To be visible to the host OS, all VMD endpoints must + * be initially configured by platform BIOS, which includes setting + * up these resources. We can assume the device is configured + * according to the platform needs. + */ + res = &vmd->dev->resource[VMD_MEMBAR1]; + upper_bits = upper_32_bits(res->end); + flags = res->flags & ~IORESOURCE_SIZEALIGN; + if (!upper_bits) + flags &= ~IORESOURCE_MEM_64; + vmd->resources[1] = (struct resource) { + .name = "VMD MEMBAR1", + .start = res->start, + .end = res->end, + .flags = flags, + .parent = res, + }; + + res = &vmd->dev->resource[VMD_MEMBAR2]; + upper_bits = upper_32_bits(res->end); + flags = res->flags & ~IORESOURCE_SIZEALIGN; + if (!upper_bits) + flags &= ~IORESOURCE_MEM_64; + vmd->resources[2] = (struct resource) { + .name = "VMD MEMBAR2", + .start = res->start + 0x2000, + .end = res->end, + .flags = flags, + .parent = res, + }; + + sd->domain = vmd_find_free_domain(); + if (sd->domain < 0) + return sd->domain; + + sd->node = pcibus_to_node(vmd->dev->bus); + + vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, + x86_vector_domain); + if (!vmd->irq_domain) + return -ENODEV; + + pci_add_resource(&resources, &vmd->resources[0]); + pci_add_resource(&resources, &vmd->resources[1]); + pci_add_resource(&resources, &vmd->resources[2]); + vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, + &resources); + if (!vmd->bus) { + pci_free_resource_list(&resources); + irq_domain_remove(vmd->irq_domain); + return -ENODEV; + } + + vmd_attach_resources(vmd); + vmd_setup_dma_ops(vmd); + dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); + pci_rescan_bus(vmd->bus); + + WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, + "domain"), "Can't create symlink to domain\n"); + return 0; +} + +static irqreturn_t vmd_irq(int irq, void *data) +{ + struct vmd_irq_list *irqs = data; + struct vmd_irq *vmdirq; + + rcu_read_lock(); + list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) + generic_handle_irq(vmdirq->virq); + rcu_read_unlock(); + + return IRQ_HANDLED; +} + +static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct vmd_dev *vmd; + int i, err; + + if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) + return -ENOMEM; + + vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); + if (!vmd) + return -ENOMEM; + + vmd->dev = dev; + err = pcim_enable_device(dev); + if (err < 0) + return err; + + vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); + if (!vmd->cfgbar) + return -ENOMEM; + + pci_set_master(dev); + if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) + return -ENODEV; + + vmd->msix_count = pci_msix_vec_count(dev); + if (vmd->msix_count < 0) + return -ENODEV; + + vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count, + PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + if (vmd->msix_count < 0) + return vmd->msix_count; + + vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), + GFP_KERNEL); + if (!vmd->irqs) + return -ENOMEM; + + for (i = 0; i < vmd->msix_count; i++) { + INIT_LIST_HEAD(&vmd->irqs[i].irq_list); + err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), + vmd_irq, 0, "vmd", &vmd->irqs[i]); + if (err) + return err; + } + + spin_lock_init(&vmd->cfg_lock); + pci_set_drvdata(dev, vmd); + err = vmd_enable_domain(vmd); + if (err) + return err; + + dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", + vmd->sysdata.domain); + return 0; +} + +static void vmd_remove(struct pci_dev *dev) +{ + struct vmd_dev *vmd = pci_get_drvdata(dev); + + vmd_detach_resources(vmd); + pci_set_drvdata(dev, NULL); + sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); + pci_stop_root_bus(vmd->bus); + pci_remove_root_bus(vmd->bus); + vmd_teardown_dma_ops(vmd); + irq_domain_remove(vmd->irq_domain); +} + +#ifdef CONFIG_PM +static int vmd_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + pci_save_state(pdev); + return 0; +} + +static int vmd_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + pci_restore_state(pdev); + return 0; +} +#endif +static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); + +static const struct pci_device_id vmd_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, + {0,} +}; +MODULE_DEVICE_TABLE(pci, vmd_ids); + +static struct pci_driver vmd_drv = { + .name = "vmd", + .id_table = vmd_ids, + .probe = vmd_probe, + .remove = vmd_remove, + .driver = { + .pm = &vmd_dev_pm_ops, + }, +}; +module_pci_driver(vmd_drv); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION("0.6"); -- cgit