From aa667c6408d20a84c7637420bc3b7aa0abab59a2 Mon Sep 17 00:00:00 2001 From: James Puthukattukaran Date: Mon, 9 Jul 2018 11:31:25 -0400 Subject: PCI: Workaround IDT switch ACS Source Validation erratum Some IDT switches incorrectly flag an ACS Source Validation error on completions for config read requests even though PCIe r4.0, sec 6.12.1.1, says that completions are never affected by ACS Source Validation. Here's the text of IDT 89H32H8G3-YC, erratum #36: Item #36 - Downstream port applies ACS Source Validation to Completions Section 6.12.1.1 of the PCI Express Base Specification 3.1 states that completions are never affected by ACS Source Validation. However, completions received by a downstream port of the PCIe switch from a device that has not yet captured a PCIe bus number are incorrectly dropped by ACS Source Validation by the switch downstream port. Workaround: Issue a CfgWr1 to the downstream device before issuing the first CfgRd1 to the device. This allows the downstream device to capture its bus number; ACS Source Validation no longer stops completions from being forwarded by the downstream port. It has been observed that Microsoft Windows implements this workaround already; however, some versions of Linux and other operating systems may not. When doing the first config read to probe for a device, if the device is behind an IDT switch with this erratum: 1. Disable ACS Source Validation if enabled 2. Wait for device to become ready to accept config accesses (by using the Config Request Retry Status mechanism) 3. Do a config write to the endpoint 4. Enable ACS Source Validation (if it was enabled to begin with) The workaround suggested by IDT is basically only step 3, but we don't know when the device is ready to accept config requests. That means we need to do config reads until we receive a non-Config Request Retry Status, which means we need to disable ACS SV temporarily. Signed-off-by: James Puthukattukaran [bhelgaas: changelog, clean up whitespace, fold in unused variable fix from Anders Roxell ] Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson --- drivers/pci/probe.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ac876e32de4b..7c0c8ab94bcf 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2156,8 +2156,8 @@ static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, return true; } -bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, - int timeout) +bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, + int timeout) { if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) return false; @@ -2172,6 +2172,24 @@ bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, return true; } + +bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, + int timeout) +{ +#ifdef CONFIG_PCI_QUIRKS + struct pci_dev *bridge = bus->self; + + /* + * Certain IDT switches have an issue where they improperly trigger + * ACS Source Validation errors on completions for config reads. + */ + if (bridge && bridge->vendor == PCI_VENDOR_ID_IDT && + bridge->device == 0x80b5) + return pci_idt_bus_quirk(bus, devfn, l, timeout); +#endif + + return pci_bus_generic_read_dev_vendor_id(bus, devfn, l, timeout); +} EXPORT_SYMBOL(pci_bus_read_dev_vendor_id); /* -- cgit From 2d1ce5ec2117d16047334a1aa4b62e0cfb5a0605 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Mon, 6 Aug 2018 18:25:35 -0500 Subject: PCI: Check for PCIe Link downtraining When both ends of a PCIe Link are capable of a higher bandwidth than is currently in use, the Link is said to be "downtrained". A downtrained Link may indicate hardware or configuration problems in the system, but it's hard to identify such Links from userspace. Refactor pcie_print_link_status() so it continues to always print PCIe bandwidth information, as several NIC drivers desire. Add a new internal __pcie_print_link_status() to emit a message only when a device's bandwidth is constrained by the fabric and call it from the PCI core for all devices, which identifies all downtrained Links. It also emits messages for a few cases that are technically not downtrained, such as a x4 device in an open-ended x1 slot. Signed-off-by: Alexandru Gagniuc [bhelgaas: changelog, move __pcie_print_link_status() declaration to drivers/pci/, rename pcie_check_upstream_link() to pcie_report_downtraining()] Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 7c0c8ab94bcf..71412db3cbeb 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2223,6 +2223,25 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return dev; } +static void pcie_report_downtraining(struct pci_dev *dev) +{ + if (!pci_is_pcie(dev)) + return; + + /* Look from the device up to avoid downstream ports with no devices */ + if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) && + (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM)) + return; + + /* Multi-function PCIe devices share the same link/status */ + if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn) + return; + + /* Print link status only if the device is constrained by the fabric */ + __pcie_print_link_status(dev, false); +} + static void pci_init_capabilities(struct pci_dev *dev) { /* Enhanced Allocation */ @@ -2258,6 +2277,8 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Advanced Error Reporting */ pci_aer_init(dev); + pcie_report_downtraining(dev); + if (pci_probe_reset_function(dev) == 0) dev->reset_fn = 1; } -- cgit From 3dbe97efe8bf450b183d6dee2305cbc032e6b8a4 Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Mon, 13 Aug 2018 12:19:39 -0600 Subject: PCI: Skip MPS logic for Virtual Functions (VFs) PCIe r4.0, sec 9.3.5.4, "Device Control Register", shows both Max_Payload_Size (MPS) and Max_Read_request_Size (MRRS) to be 'RsvdP' for VFs. Just prior to the table it states: "PF and VF functionality is defined in Section 7.5.3.4 except where noted in Table 9-16. For VF fields marked 'RsvdP', the PF setting applies to the VF." All of which implies that with respect to Max_Payload_Size Supported (MPSS), MPS, and MRRS values, we should not be paying any attention to the VF's fields, but rather only to the PF's. Only looking at the PF's fields also logically makes sense as it's the sole physical interface to the PCIe bus. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200527 Fixes: 27d868b5e6cf ("PCI: Set MPS to match upstream bridge") Signed-off-by: Myron Stowe Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.3+ Cc: Keith Busch Cc: Sinan Kaya Cc: Dongdong Liu Cc: Jon Mason --- drivers/pci/probe.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 71412db3cbeb..c2533ed45fff 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1730,6 +1730,10 @@ static void pci_configure_mps(struct pci_dev *dev) if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge)) return; + /* MPS and MRRS fields are of type 'RsvdP' for VFs, short-circuit out */ + if (dev->is_virtfn) + return; + mps = pcie_get_mps(dev); p_mps = pcie_get_mps(bridge); -- cgit From 9f0e89359775ee21fe1ea732e34edb52aef5addf Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Mon, 13 Aug 2018 12:19:46 -0600 Subject: PCI: Match Root Port's MPS to endpoint's MPSS as necessary In commit 27d868b5e6cf ("PCI: Set MPS to match upstream bridge"), we made sure every device's MPS setting matches its upstream bridge, making it more likely that a hot-added device will work in a system with an optimized MPS configuration. Recently I've started encountering systems where the endpoint device's MPSS capability is less than its Root Port's current MPS value, thus the endpoint is not capable of matching its upstream bridge's MPS setting (see: bugzilla via "Link:" below). This leaves the system vulnerable - the upstream Root Port could respond with larger TLPs than the device can handle, and the device will consider them to be 'Malformed'. One could use the "pci=pcie_bus_safe" kernel parameter to work around the issue, but that forces a user to supply a kernel parameter to get the system to function reliably and may end up limiting MPS settings of other unrelated, sub-topologies which could benefit from maintaining their larger values. Augment Keith's approach to include tuning down a Root Port's MPS setting when its hot-added endpoint device is not capable of matching it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200527 Signed-off-by: Myron Stowe Signed-off-by: Bjorn Helgaas Acked-by: Jon Mason Cc: Keith Busch Cc: Sinan Kaya Cc: Dongdong Liu --- drivers/pci/probe.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/pci/probe.c') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c2533ed45fff..bca6d2741969 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1725,7 +1725,7 @@ int pci_setup_device(struct pci_dev *dev) static void pci_configure_mps(struct pci_dev *dev) { struct pci_dev *bridge = pci_upstream_bridge(dev); - int mps, p_mps, rc; + int mps, mpss, p_mps, rc; if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge)) return; @@ -1753,6 +1753,14 @@ static void pci_configure_mps(struct pci_dev *dev) if (pcie_bus_config != PCIE_BUS_DEFAULT) return; + mpss = 128 << dev->pcie_mpss; + if (mpss < p_mps && pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) { + pcie_set_mps(bridge, mpss); + pci_info(dev, "Upstream bridge's Max Payload Size set to %d (was %d, max %d)\n", + mpss, p_mps, 128 << bridge->pcie_mpss); + p_mps = pcie_get_mps(bridge); + } + rc = pcie_set_mps(dev, p_mps); if (rc) { pci_warn(dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n", @@ -1761,7 +1769,7 @@ static void pci_configure_mps(struct pci_dev *dev) } pci_info(dev, "Max Payload Size set to %d (was %d, max %d)\n", - p_mps, mps, 128 << dev->pcie_mpss); + p_mps, mps, mpss); } static struct hpp_type0 pci_default_type0 = { -- cgit