From 6568d82512b0a64809acff3d7a747362fa4288c8 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 18 Jan 2024 13:08:15 +0200 Subject: PCI/DPC: Print all TLP Prefixes, not just the first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TLP Prefix Log Register consists of multiple DWORDs (PCIe r6.1 sec 7.9.14.13) but the loop in dpc_process_rp_pio_error() keeps reading from the first DWORD, so we print only the first PIO TLP Prefix (duplicated several times), and we never print the second, third, etc., Prefixes. Add the iteration count based offset calculation into the config read. Fixes: f20c4ea49ec4 ("PCI/DPC: Add eDPC support") Link: https://lore.kernel.org/r/20240118110815.3867-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Ilpo Järvinen [bhelgaas: add user-visible details to commit log] Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 94111e438241..e5d7c12854fa 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -234,7 +234,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); } clear_status: -- cgit From 2ae8fbbe1cd42a6624d345151859982cba89bbd3 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Wed, 7 Feb 2024 18:18:54 +0000 Subject: PCI/DPC: Ignore Surprise Down error on hot removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to PCIe r6.0 sec 6.7.6 [1], async removal with DPC may result in surprise down error. This error is expected and is just a side-effect of async remove. Ignore surprise down error generated as a side-effect of async remove. Typically, this error is benign as the pciehp handler invoked by PDC or/and DLLSC alongside DPC, de-enumerates and brings down the device appropriately, but the error messages might confuse users. Get rid of these irritating log messages with a 1s delay while pciehp waits for DPC recovery. The implementation is as follows: On an async remove a DPC is triggered along with a Presence Detect State change and/or DLL State Change. Determine it's an async remove by checking for DPC Trigger Status in DPC Status Register and Surprise Down Error Status in AER Uncorrected Error Status to be non-zero. If true, treat the DPC event as a side-effect of async remove, clear the error status registers and continue with hot-plug tear down routines. If not, follow the existing routine to handle AER and DPC errors. Masking Surprise Down Errors was explored as an alternative approach, but discarded due to the odd behavior that masking only avoids the interrupt, but still records an error per PCIe r6.0, sec 6.2.3.2.2. That stale error would be reported the next time some error other than Surprise Down is handled. Dmesg before: pcieport 0000:00:01.4: DPC: containment event, status:0x1f01 source:0x0000 pcieport 0000:00:01.4: DPC: unmasked uncorrectable error detected pcieport 0000:00:01.4: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, (Receiver ID) pcieport 0000:00:01.4: device [1022:14ab] error status/mask=00000020/04004000 pcieport 0000:00:01.4: [ 5] SDES (First) nvme nvme2: frozen state error detected, reset controller pcieport 0000:00:01.4: DPC: Data Link Layer Link Active not set in 1000 msec pcieport 0000:00:01.4: AER: subordinate device reset failed pcieport 0000:00:01.4: AER: device recovery failed pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme2n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 49 Dmesg after: pcieport 0000:00:01.4: pciehp: Slot(16): Link Down nvme1n1: detected capacity change from 1953525168 to 0 pci 0000:04:00.0: Removing from iommu group 37 [1] PCI Express Base Specification Revision 6.0, Dec 16 2021. https://members.pcisig.com/wg/PCI-SIG/document/16609 Link: https://lore.kernel.org/r/20240207181854.121335-1-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Smita Koralahalli Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen --- drivers/pci/pcie/dpc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e5d7c12854fa..98b42e425bb9 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -303,10 +303,70 @@ void dpc_process_error(struct pci_dev *pdev) } } +static void pci_clear_surpdn_errors(struct pci_dev *pdev) +{ + if (pdev->dpc_rp_extensions) + pci_write_config_dword(pdev, pdev->dpc_cap + + PCI_EXP_DPC_RP_PIO_STATUS, ~0); + + /* + * In practice, Surprise Down errors have been observed to also set + * error bits in the Status Register as well as the Fatal Error + * Detected bit in the Device Status Register. + */ + pci_write_config_word(pdev, PCI_STATUS, 0xffff); + + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_FED); +} + +static void dpc_handle_surprise_removal(struct pci_dev *pdev) +{ + if (!pcie_wait_for_link(pdev, false)) { + pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); + goto out; + } + + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) + goto out; + + pci_aer_raw_clear_status(pdev); + pci_clear_surpdn_errors(pdev); + + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_TRIGGER); + +out: + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); +} + +static bool dpc_is_surprise_removal(struct pci_dev *pdev) +{ + u16 status; + + if (!pdev->is_hotplug_bridge) + return false; + + if (pci_read_config_word(pdev, pdev->aer_cap + PCI_ERR_UNCOR_STATUS, + &status)) + return false; + + return status & PCI_ERR_UNC_SURPDN; +} + static irqreturn_t dpc_handler(int irq, void *context) { struct pci_dev *pdev = context; + /* + * According to PCIe r6.0 sec 6.7.6, errors are an expected side effect + * of async removal and should be ignored by software. + */ + if (dpc_is_surprise_removal(pdev)) { + dpc_handle_surprise_removal(pdev); + return IRQ_HANDLED; + } + dpc_process_error(pdev); /* We configure DPC so it only triggers on ERR_FATAL */ -- cgit From 627c6db20703b5d18d928464f411d0d4ec327508 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 5 Mar 2024 12:30:56 +0100 Subject: PCI/DPC: Quirk PIO log size for Intel Raptor Lake Root Ports Commit 5459c0b70467 ("PCI/DPC: Quirk PIO log size for certain Intel Root Ports") and commit 3b8803494a06 ("PCI/DPC: Quirk PIO log size for Intel Ice Lake Root Ports") add quirks for Ice, Tiger and Alder Lake Root Ports. System firmware for Raptor Lake still has the bug, so Linux logs the warning below on several Raptor Lake systems like Dell Precision 3581 with Intel Raptor Lake processor (0W18NX) system firmware/BIOS version 1.10.1. pci 0000:00:07.0: [8086:a76e] type 01 class 0x060400 pci 0000:00:07.0: DPC: RP PIO log size 0 is invalid pci 0000:00:07.1: [8086:a73f] type 01 class 0x060400 pci 0000:00:07.1: DPC: RP PIO log size 0 is invalid Apply the quirk for Raptor Lake Root Ports as well. This also enables the DPC driver to dump the RP PIO Log registers when DPC is triggered. Link: https://lore.kernel.org/r/20240305113057.56468-1-pmenzel@molgen.mpg.de Reported-by: Niels van Aert Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218560 Signed-off-by: Paul Menzel Signed-off-by: Bjorn Helgaas Cc: Cc: Mika Westerberg Cc: Niels van Aert --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d797df6e5f3e..663d838fa861 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6225,6 +6225,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2b, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2d, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a2f, dpc_log_size); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa73f, dpc_log_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0xa76e, dpc_log_size); #endif /* -- cgit