From 02a06f5f1a6aa2e12b2046b51ed3462c2c340037 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 6 Dec 2023 16:42:29 -0600 Subject: PCI/AER: Use 'Correctable' and 'Uncorrectable' spec terms for errors The PCIe spec classifies errors as either "Correctable" or "Uncorrectable". Previously we printed these as "Corrected" or "Uncorrected". To avoid confusion, use the same terms as the spec. One confusing situation is when one agent detects an error, but another agent is responsible for recovery, e.g., by re-attempting the operation. The first agent may log a "correctable" error but it has not yet been corrected. The recovery agent must report an uncorrectable error if it is unable to recover. If we print the first agent's error as "Corrected", it gives the false impression that it has already been resolved. Sample message change: - pcieport 0000:00:1c.5: AER: Corrected error received: 0000:00:1c.5 + pcieport 0000:00:1c.5: AER: Correctable error received: 0000:00:1c.5 Link: https://lore.kernel.org/r/20231206224231.732765-2-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pcie/aer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 42a3bd35a3e1..4e39b64a58d4 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -435,10 +435,10 @@ void pci_aer_exit(struct pci_dev *dev) /* * AER error strings */ -static const char *aer_error_severity_string[] = { - "Uncorrected (Non-Fatal)", - "Uncorrected (Fatal)", - "Corrected" +static const char * const aer_error_severity_string[] = { + "Uncorrectable (Non-Fatal)", + "Uncorrectable (Fatal)", + "Correctable" }; static const char *aer_error_layer[] = { -- cgit From 1291b716bbf969e101d517bfb8ba18d958f758b8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 6 Dec 2023 16:42:30 -0600 Subject: PCI/AER: Decode Requester ID when no error info found When a device with AER detects an error, it logs error information in its own AER Error Status registers. It may send an Error Message to the Root Port (RCEC in the case of an RCiEP), which logs the fact that an Error Message was received (Root Error Status) and the Requester ID of the message source (Error Source Identification). aer_print_port_info() prints the Requester ID from the Root Port Error Source in the usual Linux "bb:dd.f" format, but when find_source_device() finds no error details in the hierarchy below the Root Port, it printed the raw Requester ID without decoding it. Decode the Requester ID in the usual Linux format so it matches other messages. Sample message changes: - pcieport 0000:00:1c.5: AER: Correctable error received: 0000:00:1c.5 - pcieport 0000:00:1c.5: AER: can't find device of ID00e5 + pcieport 0000:00:1c.5: AER: Correctable error message received from 0000:00:1c.5 + pcieport 0000:00:1c.5: AER: found no error details for 0000:00:1c.5 Link: https://lore.kernel.org/r/20231206224231.732765-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan --- drivers/pci/pcie/aer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 4e39b64a58d4..412b00bdf918 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -740,7 +740,7 @@ static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) u8 bus = info->id >> 8; u8 devfn = info->id & 0xff; - pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n", + pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n", info->multi_error_valid ? "Multiple " : "", aer_error_severity_string[info->severity], pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), @@ -929,7 +929,12 @@ static bool find_source_device(struct pci_dev *parent, pci_walk_bus(parent->subordinate, find_device_iter, e_info); if (!e_info->error_dev_num) { - pci_info(parent, "can't find device of ID%04x\n", e_info->id); + u8 bus = e_info->id >> 8; + u8 devfn = e_info->id & 0xff; + + pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n", + pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn), + PCI_FUNC(devfn)); return false; } return true; -- cgit From db02e176f597a14eb696141ffa008c2429453a15 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 6 Dec 2023 16:42:31 -0600 Subject: PCI/AER: Use explicit register sizes for struct members aer_irq() reads the AER Root Error Status and Error Source Identification (PCI_ERR_ROOT_STATUS and PCI_ERR_ROOT_ERR_SRC) registers directly into struct aer_err_source. Both registers are 32 bits, so declare the members explicitly as "u32" instead of "unsigned int". Similarly, aer_get_device_error_info() reads the AER Header Log (PCI_ERR_HEADER_LOG) registers, which are also 32 bits, into struct aer_header_log_regs. Declare those members as "u32" as well. No functional changes intended. Link: https://lore.kernel.org/r/20231206224231.732765-4-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron --- drivers/pci/pcie/aer.c | 4 ++-- include/linux/aer.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 412b00bdf918..05fc30bb5134 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -41,8 +41,8 @@ #define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/ struct aer_err_source { - unsigned int status; - unsigned int id; + u32 status; /* PCI_ERR_ROOT_STATUS */ + u32 id; /* PCI_ERR_ROOT_ERR_SRC */ }; struct aer_rpc { diff --git a/include/linux/aer.h b/include/linux/aer.h index f6ea2f57d808..ae0fae70d4bd 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -19,10 +19,10 @@ struct pci_dev; struct aer_header_log_regs { - unsigned int dw0; - unsigned int dw1; - unsigned int dw2; - unsigned int dw3; + u32 dw0; + u32 dw1; + u32 dw2; + u32 dw3; }; struct aer_capability_regs { -- cgit