From 032c3d86b4acc4c21e435c85c454eac670c15851 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Thu, 25 Aug 2016 17:26:10 -0600 Subject: PCI/AER: Add bus flag to skip source ID matching Allow root port buses to choose to skip source id matching when finding the faulting device. Certain root port devices may return an incorrect source ID and recommend to scan child device registers for AER notifications. Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 521e39c1b66d..8f5e14cac600 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -132,7 +132,9 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) * When bus id is equal to 0, it might be a bad id * reported by root port. */ - if (!nosourceid && (PCI_BUS_NUM(e_info->id) != 0)) { + if (!nosourceid && + (PCI_BUS_NUM(e_info->id) != 0) && + !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { /* Device ID match? */ if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) return true; @@ -147,7 +149,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) * 1) nosourceid==y; * 2) bus id is equal to 0. Some ports might lose the bus * id of error source id; - * 3) There are multiple errors and prior id comparing fails; + * 3) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 4) There are multiple errors and prior ID comparing fails; * We check AER status registers to find possible reporter. */ if (atomic_read(&dev->enable_cnt) == 0) -- cgit From 9ff25e6b3eb0012297288dfa87930c7b62ef6ab1 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Sep 2016 16:09:19 -0500 Subject: PCI/AER: Remove aerdriver.nosourceid kernel parameter The aerdriver.nosourceid kernel parameter was intended for working around broken chipsets don't supply the source ID for AER events. We recently added PCI_BUS_FLAGS_NO_AERSID, which can be set by quirks for the same purpose. Remove the aerdriver.nosourceid kernel parameter. For anything other than debugging, asking users to find and use kernel parameters is a poor user experience. Instead, we should add PCI_BUS_FLAGS_NO_AERSID quirks for any hardware that needs it. Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv_core.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8f5e14cac600..f8a9b17da949 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -28,9 +28,7 @@ #include "aerdrv.h" static bool forceload; -static bool nosourceid; module_param(forceload, bool, 0); -module_param(nosourceid, bool, 0); #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -132,8 +130,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) * When bus id is equal to 0, it might be a bad id * reported by root port. */ - if (!nosourceid && - (PCI_BUS_NUM(e_info->id) != 0) && + if ((PCI_BUS_NUM(e_info->id) != 0) && !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { /* Device ID match? */ if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) @@ -146,11 +143,10 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) /* * When either - * 1) nosourceid==y; - * 2) bus id is equal to 0. Some ports might lose the bus + * 1) bus id is equal to 0. Some ports might lose the bus * id of error source id; - * 3) bus flag PCI_BUS_FLAGS_NO_AERSID is set - * 4) There are multiple errors and prior ID comparing fails; + * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 3) There are multiple errors and prior ID comparing fails; * We check AER status registers to find possible reporter. */ if (atomic_read(&dev->enable_cnt) == 0) -- cgit From 7ece14175376051b18a9b97f0e6125cb8b864155 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Sep 2016 16:24:37 -0500 Subject: PCI/AER: Remove aerdriver.forceload kernel parameter Per the PCI Firmware spec, r3.0, sec 4.5.1, on ACPI systems, the OS must not use AER unless _OSC is present and _OSC grants AER control to the OS. The aerdriver.forceload kernel parameter was a way to enable Linux AER support on ACPI systems that lack _OSC or fail to grant control the the OS. Enabling Linux AER support when the firmware doesn't want us to is a recipe for problems, e.g., the firmware might be handling AER itself. Remove the aerdriver.forceload kernel parameter and related supporting code. Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv.c | 7 +------ drivers/pci/pcie/aer/aerdrv.h | 8 -------- drivers/pci/pcie/aer/aerdrv_core.c | 19 ------------------- 3 files changed, 1 insertion(+), 33 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 48d21e0edd56..08ce257077db 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -70,7 +70,7 @@ static int pcie_aer_disable; void pci_no_aer(void) { - pcie_aer_disable = 1; /* has priority over 'forceload' */ + pcie_aer_disable = 1; } bool pci_aer_available(void) @@ -304,11 +304,6 @@ static int aer_probe(struct pcie_device *dev) struct aer_rpc *rpc; struct device *device = &dev->device; - /* Init */ - status = aer_init(dev); - if (status) - return status; - /* Alloc rpc data structure */ rpc = aer_alloc_rpc(dev); if (!rpc) { diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 945c939a86c5..f15ca8dc3882 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -105,7 +105,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig, } extern struct bus_type pcie_port_bus_type; -int aer_init(struct pcie_device *dev); void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); @@ -121,11 +120,4 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) return 0; } #endif - -static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev, - int enable) -{ - pci_dev->__aer_firmware_first = !!enable; - pci_dev->__aer_firmware_first_valid = 1; -} #endif /* _AERDRV_H_ */ diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index f8a9b17da949..8262527e7fed 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -27,9 +27,6 @@ #include #include "aerdrv.h" -static bool forceload; -module_param(forceload, bool, 0); - #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -811,19 +808,3 @@ void aer_isr(struct work_struct *work) aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); } - -/** - * aer_init - provide AER initialization - * @dev: pointer to AER pcie device - * - * Invoked when AER service driver is loaded. - */ -int aer_init(struct pcie_device *dev) -{ - if (forceload) { - dev_printk(KERN_DEBUG, &dev->device, - "aerdrv forceload requested.\n"); - pcie_aer_force_firmware_first(dev->port, 0); - } - return 0; -} -- cgit From 95c35491f663962e476179076d24d0d2c45a8fb5 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 14 Sep 2016 15:14:45 -0600 Subject: PCI/AER: Remove duplicate AER severity translation Currently the AER severity is being translated twice in the code flow for PCIe errors. It is first translated in ghes_do_proc() before calling into the AER driver. Then it is translated again when the AER driver calls cper_print_aer(). This causes the severity that is used in cper_print_aer() to be incorrect. Remove the second translation that is in cper_print_aer() since this function is already receiving the correct AER severity. Signed-off-by: Tyler Baicar Signed-off-by: Bjorn Helgaas Reviewed-by: Borislav Petkov --- drivers/pci/pcie/aer/aerdrv_errprint.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 167fe411ce2e..54c4b691e51f 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -219,15 +219,13 @@ int cper_severity_to_aer(int cper_severity) } EXPORT_SYMBOL_GPL(cper_severity_to_aer); -void cper_print_aer(struct pci_dev *dev, int cper_severity, +void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { - int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; + int layer, agent, status_strs_size, tlp_header_valid = 0; u32 status, mask; const char **status_strs; - aer_severity = cper_severity_to_aer(cper_severity); - if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; -- cgit From 4b202b716e4e282c26c4a95952ea33e318c363ab Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 14 Sep 2016 10:38:55 -0600 Subject: PCI/AER: Avoid memory allocation in interrupt handling path When handling AER events, we previously allocated a struct aer_err_info, processed the error, and freed the struct. But aer_isr_one_error() is serialized by rpc_mutex, so we never need more than one copy of the struct, and the struct is only about 70 bytes, so we're not saving much by allocating it dynamically. Embed a struct aer_err_info directly in struct aer_rpc, which is allocated at probe-time by aer_probe(). [bhelgaas: changelog] Suggested-by: Bjorn Helgaas Signed-off-by: Jon Derrick Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv.h | 1 + drivers/pci/pcie/aer/aerdrv_core.c | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index f15ca8dc3882..d51e4a57b190 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -60,6 +60,7 @@ struct aer_rpc { struct pcie_device *rpd; /* Root Port device */ struct work_struct dpc_handler; struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; + struct aer_err_info e_info; unsigned short prod_idx; /* Error Producer Index */ unsigned short cons_idx; /* Error Consumer Index */ int isr; diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8262527e7fed..9fd18a08b23e 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -711,15 +711,8 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, static void aer_isr_one_error(struct pcie_device *p_device, struct aer_err_source *e_src) { - struct aer_err_info *e_info; - - /* struct aer_err_info might be big, so we allocate it with slab */ - e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL); - if (!e_info) { - dev_printk(KERN_DEBUG, &p_device->port->dev, - "Can't allocate mem when processing AER errors\n"); - return; - } + struct aer_rpc *rpc = get_service_data(p_device); + struct aer_err_info *e_info = &rpc->e_info; /* * There is a possibility that both correctable error and @@ -758,8 +751,6 @@ static void aer_isr_one_error(struct pcie_device *p_device, if (find_source_device(p_device->port, e_info)) aer_process_err_devices(p_device, e_info); } - - kfree(e_info); } /** -- cgit From 66b808099146166c44157600a166c8372172cd76 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 27 Sep 2016 16:23:34 -0400 Subject: PCI/AER: Cache capability position Save the position of the error reporting capability so it doesn't need to be rediscovered during error handling. Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas CC: Lukas Wunner --- drivers/pci/pcie/aer/aerdrv.c | 10 +++++----- drivers/pci/pcie/aer/aerdrv_core.c | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 08ce257077db..e99efaa723d5 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -134,7 +134,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc) pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, SYSTEM_ERROR_INTR_ON_MESG_MASK); - aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + aer_pos = pdev->aer_cap; /* Clear error status */ pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); @@ -173,7 +173,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc) */ set_downstream_devices_error_reporting(pdev, false); - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + pos = pdev->aer_cap; /* Disable Root's interrupt in response to error messages */ pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; @@ -200,7 +200,7 @@ irqreturn_t aer_irq(int irq, void *context) unsigned long flags; int pos; - pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR); + pos = pdev->port->aer_cap; /* * Must lock access to Root Error Status Reg, Root Error ID Reg, * and Root error producer/consumer index @@ -338,7 +338,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) u32 reg32; int pos; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; /* Disable Root's interrupt in response to error messages */ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); @@ -391,7 +391,7 @@ static void aer_error_resume(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16); /* Clean AER Root Error Status */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); if (dev->error_state == pci_channel_io_normal) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 9fd18a08b23e..b1303b32053f 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -35,7 +35,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) if (pcie_aer_get_firmware_first(dev)) return -EIO; - if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) + if (!dev->aer_cap) return -EIO; return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); @@ -57,7 +57,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) int pos; u32 status; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return -EIO; @@ -78,7 +78,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) if (!pci_is_pcie(dev)) return -ENODEV; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return -EIO; @@ -97,6 +97,12 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) return 0; } +int pci_aer_init(struct pci_dev *dev) +{ + dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + return pci_cleanup_aer_error_status_regs(dev); +} + /** * add_error_device - list device to be handled * @e_info: pointer to error info @@ -154,7 +160,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) if (!(reg16 & PCI_EXP_AER_FLAGS)) return false; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return false; @@ -551,7 +557,7 @@ static void handle_error_source(struct pcie_device *aerdev, * Correctable error does not need software intervention. * No need to go through error recovery process. */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (pos) pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); @@ -643,7 +649,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) info->status = 0; info->tlp_header_valid = 0; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; /* The device might not support AER */ if (!pos) -- cgit From 6b20f728549030056402d7f68ea670eea1eb8198 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 30 Sep 2016 10:06:23 -0500 Subject: PCI/AER: Fix aer_probe() kernel-doc comment 0516c8bcd252 ("PCI: PCIe portdrv: Simplily probe callback of service drivers") removed the "id" argument of aer_probe() but neglected to remove the kernel-doc comment. Update the comment. [bhelgaas: changelog] Signed-off-by: Cao jin Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/aer/aerdrv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/pci/pcie') diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index e99efaa723d5..b2992e60eeeb 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -294,7 +294,6 @@ static void aer_remove(struct pcie_device *dev) /** * aer_probe - initialize resources * @dev: pointer to the pcie_dev data structure - * @id: pointer to the service id data structure * * Invoked when PCI Express bus loads AER service driver. */ -- cgit