summaryrefslogtreecommitdiff
path: root/drivers/pci/pcie/aer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/pcie/aer.c')
-rw-r--r--drivers/pci/pcie/aer.c884
1 files changed, 613 insertions, 271 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 625f7b2cafe4..e0bcaa896803 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -2,7 +2,7 @@
/*
* Implement the AER root port service driver. The driver registers an IRQ
* handler. When a root port triggers an AER interrupt, the IRQ handler
- * collects root port status and schedules work.
+ * collects Root Port status and schedules work.
*
* Copyright (C) 2006 Intel Corp.
* Tom Long Nguyen (tom.l.nguyen@intel.com)
@@ -17,6 +17,7 @@
#include <linux/bitops.h>
#include <linux/cper.h>
+#include <linux/dev_printk.h>
#include <linux/pci.h>
#include <linux/pci-acpi.h>
#include <linux/sched.h>
@@ -27,21 +28,27 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/vmcore_info.h>
#include <acpi/apei.h>
+#include <acpi/ghes.h>
#include <ras/ras_event.h>
#include "../pci.h"
#include "portdrv.h"
+#define aer_printk(level, pdev, fmt, arg...) \
+ dev_printk(level, &(pdev)->dev, fmt, ##arg)
+
#define AER_ERROR_SOURCES_MAX 128
#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */
-#define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/
+#define AER_MAX_TYPEOF_UNCOR_ERRS 32 /* as per PCI_ERR_UNCOR_STATUS*/
struct aer_err_source {
- unsigned int status;
- unsigned int id;
+ u32 status; /* PCI_ERR_ROOT_STATUS */
+ u32 id; /* PCI_ERR_ROOT_ERR_SRC */
};
struct aer_rpc {
@@ -49,15 +56,15 @@ struct aer_rpc {
DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
};
-/* AER stats for the device */
-struct aer_stats {
+/* AER info for the device */
+struct aer_info {
/*
* Fields for all AER capable devices. They indicate the errors
* "as seen by this device". Note that this may mean that if an
- * end point is causing problems, the AER counters may increment
- * at its link partner (e.g. root port) because the errors will be
- * "seen" by the link partner and not the problematic end point
+ * Endpoint is causing problems, the AER counters may increment
+ * at its link partner (e.g. Root Port) because the errors will be
+ * "seen" by the link partner and not the problematic Endpoint
* itself (which may report all counters as 0 as it never saw any
* problems).
*/
@@ -75,22 +82,36 @@ struct aer_stats {
u64 dev_total_nonfatal_errs;
/*
- * Fields for Root ports & root complex event collectors only, these
+ * Fields for Root Ports & Root Complex Event Collectors only; these
* indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
- * messages received by the root port / event collector, INCLUDING the
- * ones that are generated internally (by the rootport itself)
+ * messages received by the Root Port / Event Collector, INCLUDING the
+ * ones that are generated internally (by the Root Port itself)
*/
u64 rootport_total_cor_errs;
u64 rootport_total_fatal_errs;
u64 rootport_total_nonfatal_errs;
+
+ /* Ratelimits for errors */
+ struct ratelimit_state correctable_ratelimit;
+ struct ratelimit_state nonfatal_ratelimit;
};
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
+ PCI_ERR_UNC_POISON_BLK | \
PCI_ERR_UNC_ECRC| \
PCI_ERR_UNC_UNSUP| \
PCI_ERR_UNC_COMP_ABORT| \
PCI_ERR_UNC_UNX_COMP| \
- PCI_ERR_UNC_MALF_TLP)
+ PCI_ERR_UNC_ACSV | \
+ PCI_ERR_UNC_MCBTLP | \
+ PCI_ERR_UNC_ATOMEG | \
+ PCI_ERR_UNC_DMWR_BLK | \
+ PCI_ERR_UNC_XLAT_BLK | \
+ PCI_ERR_UNC_TLPPRE | \
+ PCI_ERR_UNC_MALF_TLP | \
+ PCI_ERR_UNC_IDE_CHECK | \
+ PCI_ERR_UNC_MISR_IDE | \
+ PCI_ERR_UNC_PCRC_CHECK)
#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
PCI_EXP_RTCTL_SENFEE| \
@@ -106,12 +127,12 @@ struct aer_stats {
PCI_ERR_ROOT_MULTI_COR_RCV | \
PCI_ERR_ROOT_MULTI_UNCOR_RCV)
-static int pcie_aer_disable;
+static bool pcie_aer_disable;
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
void pci_no_aer(void)
{
- pcie_aer_disable = 1;
+ pcie_aer_disable = true;
}
bool pci_aer_available(void)
@@ -137,7 +158,7 @@ static const char * const ecrc_policy_str[] = {
* enable_ecrc_checking - enable PCIe ECRC checking for a device
* @dev: the PCI device
*
- * Returns 0 on success, or negative on failure.
+ * Return: 0 on success, or negative on failure.
*/
static int enable_ecrc_checking(struct pci_dev *dev)
{
@@ -158,10 +179,10 @@ static int enable_ecrc_checking(struct pci_dev *dev)
}
/**
- * disable_ecrc_checking - disables PCIe ECRC checking for a device
+ * disable_ecrc_checking - disable PCIe ECRC checking for a device
* @dev: the PCI device
*
- * Returns 0 on success, or negative on failure.
+ * Return: 0 on success, or negative on failure.
*/
static int disable_ecrc_checking(struct pci_dev *dev)
{
@@ -179,11 +200,15 @@ static int disable_ecrc_checking(struct pci_dev *dev)
}
/**
- * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy
+ * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based
+ * on global policy
* @dev: the PCI device
*/
void pcie_set_ecrc_checking(struct pci_dev *dev)
{
+ if (!pcie_aer_is_native(dev))
+ return;
+
switch (ecrc_policy) {
case ECRC_POLICY_DEFAULT:
return;
@@ -226,8 +251,9 @@ int pcie_aer_is_native(struct pci_dev *dev)
return pcie_ports_native || host->native_aer;
}
+EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, "CXL");
-int pci_enable_pcie_error_reporting(struct pci_dev *dev)
+static int pci_enable_pcie_error_reporting(struct pci_dev *dev)
{
int rc;
@@ -237,19 +263,6 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev)
rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
return pcibios_err_to_errno(rc);
}
-EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
-
-int pci_disable_pcie_error_reporting(struct pci_dev *dev)
-{
- int rc;
-
- if (!pcie_aer_is_native(dev))
- return -EIO;
-
- rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
- return pcibios_err_to_errno(rc);
-}
-EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
{
@@ -290,10 +303,10 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev)
* pci_aer_raw_clear_status - Clear AER error registers.
* @dev: the PCI device
*
- * Clearing AER error status registers unconditionally, regardless of
+ * Clear AER error status registers unconditionally, regardless of
* whether they're owned by firmware or the OS.
*
- * Returns 0 on success, or negative on failure.
+ * Return: 0 on success, or negative on failure.
*/
int pci_aer_raw_clear_status(struct pci_dev *dev)
{
@@ -380,13 +393,22 @@ void pci_aer_init(struct pci_dev *dev)
if (!dev->aer_cap)
return;
- dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+ dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL);
+ if (!dev->aer_info) {
+ dev->aer_cap = 0;
+ return;
+ }
+
+ ratelimit_state_init(&dev->aer_info->correctable_ratelimit,
+ DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
+ ratelimit_state_init(&dev->aer_info->nonfatal_ratelimit,
+ DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
/*
* We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER,
* PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event
- * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec
- * 7.8.4).
+ * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r6.0, sec
+ * 7.8.4.9).
*/
n = pcie_cap_has_rtctl(dev) ? 5 : 4;
pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);
@@ -401,8 +423,8 @@ void pci_aer_init(struct pci_dev *dev)
void pci_aer_exit(struct pci_dev *dev)
{
- kfree(dev->aer_stats);
- dev->aer_stats = NULL;
+ kfree(dev->aer_info);
+ dev->aer_info = NULL;
}
#define AER_AGENT_RECEIVER 0
@@ -443,10 +465,10 @@ void pci_aer_exit(struct pci_dev *dev)
/*
* AER error strings
*/
-static const char *aer_error_severity_string[] = {
- "Uncorrected (Non-Fatal)",
- "Uncorrected (Fatal)",
- "Corrected"
+static const char * const aer_error_severity_string[] = {
+ "Uncorrectable (Non-Fatal)",
+ "Uncorrectable (Fatal)",
+ "Correctable"
};
static const char *aer_error_layer[] = {
@@ -518,11 +540,11 @@ static const char *aer_uncorrectable_error_string[] = {
"AtomicOpBlocked", /* Bit Position 24 */
"TLPBlockedErr", /* Bit Position 25 */
"PoisonTLPBlocked", /* Bit Position 26 */
- NULL, /* Bit Position 27 */
- NULL, /* Bit Position 28 */
- NULL, /* Bit Position 29 */
- NULL, /* Bit Position 30 */
- NULL, /* Bit Position 31 */
+ "DMWrReqBlocked", /* Bit Position 27 */
+ "IDECheck", /* Bit Position 28 */
+ "MisIDETLP", /* Bit Position 29 */
+ "PCRC_CHECK", /* Bit Position 30 */
+ "TLPXlatBlocked", /* Bit Position 31 */
};
static const char *aer_agent_string[] = {
@@ -540,10 +562,10 @@ static const char *aer_agent_string[] = {
{ \
unsigned int i; \
struct pci_dev *pdev = to_pci_dev(dev); \
- u64 *stats = pdev->aer_stats->stats_array; \
+ u64 *stats = pdev->aer_info->stats_array; \
size_t len = 0; \
\
- for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+ for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) { \
if (strings_array[i]) \
len += sysfs_emit_at(buf, len, "%s %llu\n", \
strings_array[i], \
@@ -554,7 +576,7 @@ static const char *aer_agent_string[] = {
i, stats[i]); \
} \
len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \
- pdev->aer_stats->total_field); \
+ pdev->aer_info->total_field); \
return len; \
} \
static DEVICE_ATTR_RO(name)
@@ -575,7 +597,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
char *buf) \
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
- return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \
+ return sysfs_emit(buf, "%llu\n", pdev->aer_info->field); \
} \
static DEVICE_ATTR_RO(name)
@@ -602,7 +624,7 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct pci_dev *pdev = to_pci_dev(dev);
- if (!pdev->aer_stats)
+ if (!pdev->aer_info)
return 0;
if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
@@ -620,31 +642,137 @@ const struct attribute_group aer_stats_attr_group = {
.is_visible = aer_stats_attrs_are_visible,
};
+/*
+ * Ratelimit interval
+ * <=0: disabled with ratelimit.interval = 0
+ * >0: enabled with ratelimit.interval in ms
+ */
+#define aer_ratelimit_interval_attr(name, ratelimit) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ \
+ return sysfs_emit(buf, "%d\n", \
+ pdev->aer_info->ratelimit.interval); \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ int interval; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ if (kstrtoint(buf, 0, &interval) < 0) \
+ return -EINVAL; \
+ \
+ if (interval <= 0) \
+ interval = 0; \
+ else \
+ interval = msecs_to_jiffies(interval); \
+ \
+ pdev->aer_info->ratelimit.interval = interval; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR_RW(name);
+
+#define aer_ratelimit_burst_attr(name, ratelimit) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ \
+ return sysfs_emit(buf, "%d\n", \
+ pdev->aer_info->ratelimit.burst); \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ int burst; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ if (kstrtoint(buf, 0, &burst) < 0) \
+ return -EINVAL; \
+ \
+ pdev->aer_info->ratelimit.burst = burst; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR_RW(name);
+
+#define aer_ratelimit_attrs(name) \
+ aer_ratelimit_interval_attr(name##_ratelimit_interval_ms, \
+ name##_ratelimit) \
+ aer_ratelimit_burst_attr(name##_ratelimit_burst, \
+ name##_ratelimit)
+
+aer_ratelimit_attrs(correctable)
+aer_ratelimit_attrs(nonfatal)
+
+static struct attribute *aer_attrs[] = {
+ &dev_attr_correctable_ratelimit_interval_ms.attr,
+ &dev_attr_correctable_ratelimit_burst.attr,
+ &dev_attr_nonfatal_ratelimit_interval_ms.attr,
+ &dev_attr_nonfatal_ratelimit_burst.attr,
+ NULL
+};
+
+static umode_t aer_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pdev->aer_info)
+ return 0;
+
+ return a->mode;
+}
+
+const struct attribute_group aer_attr_group = {
+ .name = "aer",
+ .attrs = aer_attrs,
+ .is_visible = aer_attrs_are_visible,
+};
+
static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_info *info)
{
unsigned long status = info->status & ~info->mask;
int i, max = -1;
u64 *counter = NULL;
- struct aer_stats *aer_stats = pdev->aer_stats;
+ struct aer_info *aer_info = pdev->aer_info;
- if (!aer_stats)
+ if (!aer_info)
return;
switch (info->severity) {
case AER_CORRECTABLE:
- aer_stats->dev_total_cor_errs++;
- counter = &aer_stats->dev_cor_errs[0];
+ aer_info->dev_total_cor_errs++;
+ counter = &aer_info->dev_cor_errs[0];
max = AER_MAX_TYPEOF_COR_ERRS;
break;
case AER_NONFATAL:
- aer_stats->dev_total_nonfatal_errs++;
- counter = &aer_stats->dev_nonfatal_errs[0];
+ aer_info->dev_total_nonfatal_errs++;
+ hwerr_log_error_type(HWERR_RECOV_PCI);
+ counter = &aer_info->dev_nonfatal_errs[0];
max = AER_MAX_TYPEOF_UNCOR_ERRS;
break;
case AER_FATAL:
- aer_stats->dev_total_fatal_errs++;
- counter = &aer_stats->dev_fatal_errs[0];
+ aer_info->dev_total_fatal_errs++;
+ counter = &aer_info->dev_fatal_errs[0];
max = AER_MAX_TYPEOF_UNCOR_ERRS;
break;
}
@@ -656,61 +784,105 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_source *e_src)
{
- struct aer_stats *aer_stats = pdev->aer_stats;
+ struct aer_info *aer_info = pdev->aer_info;
- if (!aer_stats)
+ if (!aer_info)
return;
if (e_src->status & PCI_ERR_ROOT_COR_RCV)
- aer_stats->rootport_total_cor_errs++;
+ aer_info->rootport_total_cor_errs++;
if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
- aer_stats->rootport_total_fatal_errs++;
+ aer_info->rootport_total_fatal_errs++;
else
- aer_stats->rootport_total_nonfatal_errs++;
+ aer_info->rootport_total_nonfatal_errs++;
+ }
+}
+
+static int aer_ratelimit(struct pci_dev *dev, unsigned int severity)
+{
+ if (!dev->aer_info)
+ return 1;
+
+ switch (severity) {
+ case AER_NONFATAL:
+ return __ratelimit(&dev->aer_info->nonfatal_ratelimit);
+ case AER_CORRECTABLE:
+ return __ratelimit(&dev->aer_info->correctable_ratelimit);
+ default:
+ return 1; /* Don't ratelimit fatal errors */
}
}
-static void __print_tlp_header(struct pci_dev *dev,
- struct aer_header_log_regs *t)
+static bool tlp_header_logged(u32 status, u32 capctl)
{
- pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
- t->dw0, t->dw1, t->dw2, t->dw3);
+ /* Errors for which a header is always logged (PCIe r7.0 sec 6.2.7) */
+ if (status & AER_LOG_TLP_MASKS)
+ return true;
+
+ /* Completion Timeout header is only logged on capable devices */
+ if (status & PCI_ERR_UNC_COMP_TIME &&
+ capctl & PCI_ERR_CAP_COMP_TIME_LOG)
+ return true;
+
+ return false;
}
-static void __aer_print_error(struct pci_dev *dev,
- struct aer_err_info *info)
+static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
const char **strings;
unsigned long status = info->status & ~info->mask;
- const char *level, *errmsg;
+ const char *level = info->level;
+ const char *errmsg;
int i;
- if (info->severity == AER_CORRECTABLE) {
+ if (info->severity == AER_CORRECTABLE)
strings = aer_correctable_error_string;
- level = KERN_WARNING;
- } else {
+ else
strings = aer_uncorrectable_error_string;
- level = KERN_ERR;
- }
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
if (!errmsg)
errmsg = "Unknown Error Bit";
- pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
+ aer_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
- pci_dev_aer_stats_incr(dev, info);
}
-void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+static void aer_print_source(struct pci_dev *dev, struct aer_err_info *info,
+ bool found)
{
- int layer, agent;
- int id = ((dev->bus->number << 8) | dev->devfn);
- const char *level;
+ u16 source = info->id;
+
+ pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d%s\n",
+ info->multi_error_valid ? "Multiple " : "",
+ aer_error_severity_string[info->severity],
+ pci_domain_nr(dev->bus), PCI_BUS_NUM(source),
+ PCI_SLOT(source), PCI_FUNC(source),
+ found ? "" : " (no details found");
+}
+
+void aer_print_error(struct aer_err_info *info, int i)
+{
+ struct pci_dev *dev;
+ int layer, agent, id;
+ const char *level = info->level;
+
+ if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES))
+ return;
+
+ dev = info->dev[i];
+ id = pci_dev_id(dev);
+
+ pci_dev_aer_stats_incr(dev, info);
+ trace_aer_event(pci_name(dev), (info->status & ~info->mask),
+ info->severity, info->tlp_header_valid, &info->tlp);
+
+ if (!info->ratelimit_print[i])
+ return;
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
@@ -721,38 +893,21 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
- level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
-
- pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
+ aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
- pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
+ aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
dev->vendor, dev->device, info->status, info->mask);
__aer_print_error(dev, info);
if (info->tlp_header_valid)
- __print_tlp_header(dev, &info->tlp);
+ pcie_print_tlp_log(dev, &info->tlp, level, dev_fmt(" "));
out:
if (info->id && info->error_dev_num > 1 && info->id == id)
pci_err(dev, " Error of this Agent is reported first\n");
-
- trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
- info->severity, info->tlp_header_valid, &info->tlp);
-}
-
-static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
-{
- u8 bus = info->id >> 8;
- u8 devfn = info->id & 0xff;
-
- pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n",
- info->multi_error_valid ? "Multiple " : "",
- aer_error_severity_string[info->severity],
- pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
- PCI_FUNC(devfn));
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -768,48 +923,57 @@ int cper_severity_to_aer(int cper_severity)
}
}
EXPORT_SYMBOL_GPL(cper_severity_to_aer);
+#endif
-void cper_print_aer(struct pci_dev *dev, int aer_severity,
- struct aer_capability_regs *aer)
+void pci_print_aer(struct pci_dev *dev, int aer_severity,
+ struct aer_capability_regs *aer)
{
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
- struct aer_err_info info;
+ struct aer_err_info info = {
+ .severity = aer_severity,
+ .first_error = PCI_ERR_CAP_FEP(aer->cap_control),
+ };
if (aer_severity == AER_CORRECTABLE) {
status = aer->cor_status;
mask = aer->cor_mask;
+ info.level = KERN_WARNING;
} else {
status = aer->uncor_status;
mask = aer->uncor_mask;
- tlp_header_valid = status & AER_LOG_TLP_MASKS;
+ info.level = KERN_ERR;
+ tlp_header_valid = tlp_header_logged(status, aer->cap_control);
}
- layer = AER_GET_LAYER_ERROR(aer_severity, status);
- agent = AER_GET_AGENT(aer_severity, status);
-
- memset(&info, 0, sizeof(info));
- info.severity = aer_severity;
info.status = status;
info.mask = mask;
- info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
- pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
+ pci_dev_aer_stats_incr(dev, &info);
+ trace_aer_event(pci_name(dev), (status & ~mask),
+ aer_severity, tlp_header_valid, &aer->header_log);
+
+ if (!aer_ratelimit(dev, info.severity))
+ return;
+
+ layer = AER_GET_LAYER_ERROR(aer_severity, status);
+ agent = AER_GET_AGENT(aer_severity, status);
+
+ aer_printk(info.level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
+ status, mask);
__aer_print_error(dev, &info);
- pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
- aer_error_layer[layer], aer_agent_string[agent]);
+ aer_printk(info.level, dev, "aer_layer=%s, aer_agent=%s\n",
+ aer_error_layer[layer], aer_agent_string[agent]);
if (aer_severity != AER_CORRECTABLE)
- pci_err(dev, "aer_uncor_severity: 0x%08x\n",
- aer->uncor_severity);
+ aer_printk(info.level, dev, "aer_uncor_severity: 0x%08x\n",
+ aer->uncor_severity);
if (tlp_header_valid)
- __print_tlp_header(dev, &aer->header_log);
-
- trace_aer_event(dev_name(&dev->dev), (status & ~mask),
- aer_severity, tlp_header_valid, &aer->header_log);
+ pcie_print_tlp_log(dev, &aer->header_log, info.level,
+ dev_fmt(" "));
}
-#endif
+EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
/**
* add_error_device - list device to be handled
@@ -818,12 +982,27 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity,
*/
static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
{
- if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
- e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
- e_info->error_dev_num++;
- return 0;
+ int i = e_info->error_dev_num;
+
+ if (i >= AER_MAX_MULTI_ERR_DEVICES)
+ return -ENOSPC;
+
+ e_info->dev[i] = pci_dev_get(dev);
+ e_info->error_dev_num++;
+
+ /*
+ * Ratelimit AER log messages. "dev" is either the source
+ * identified by the root's Error Source ID or it has an unmasked
+ * error logged in its own AER Capability. Messages are emitted
+ * when "ratelimit_print[i]" is non-zero. If we will print detail
+ * for a downstream device, make sure we print the Error Source ID
+ * from the root as well.
+ */
+ if (aer_ratelimit(dev, e_info->severity)) {
+ e_info->ratelimit_print[i] = 1;
+ e_info->root_ratelimit_print = 1;
}
- return -ENOSPC;
+ return 0;
}
/**
@@ -838,24 +1017,24 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
u16 reg16;
/*
- * When bus id is equal to 0, it might be a bad id
- * reported by root port.
+ * When bus ID is equal to 0, it might be a bad ID
+ * reported by Root Port.
*/
if ((PCI_BUS_NUM(e_info->id) != 0) &&
!(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
/* Device ID match? */
- if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
+ if (e_info->id == pci_dev_id(dev))
return true;
- /* Continue id comparing if there is no multiple error */
+ /* Continue ID comparing if there is no multiple error */
if (!e_info->multi_error_valid)
return false;
}
/*
* When either
- * 1) bus id is equal to 0. Some ports might lose the bus
- * id of error source id;
+ * 1) bus ID is equal to 0. Some ports might lose the bus
+ * ID of error source id;
* 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set
* 3) There are multiple errors and prior ID comparing fails;
* We check AER status registers to find possible reporter.
@@ -893,7 +1072,8 @@ static int find_device_iter(struct pci_dev *dev, void *data)
/* List this device */
if (add_error_device(e_info, dev)) {
/* We cannot handle more... Stop iteration */
- /* TODO: Should print error message here? */
+ pci_err(dev, "Exceeded max supported (%d) devices with errors logged\n",
+ AER_MAX_MULTI_ERR_DEVICES);
return 1;
}
@@ -907,9 +1087,9 @@ static int find_device_iter(struct pci_dev *dev, void *data)
/**
* find_source_device - search through device hierarchy for source device
* @parent: pointer to Root Port pci_dev data structure
- * @e_info: including detailed error information such like id
+ * @e_info: including detailed error information such as ID
*
- * Return true if found.
+ * Return: true if found.
*
* Invoked by DPC when error is detected at the Root Port.
* Caller of this function must set id, severity, and multi_error_valid of
@@ -917,7 +1097,7 @@ static int find_device_iter(struct pci_dev *dev, void *data)
* e_info->error_dev_num and e_info->dev[], based on the given information.
*/
static bool find_source_device(struct pci_dev *parent,
- struct aer_err_info *e_info)
+ struct aer_err_info *e_info)
{
struct pci_dev *dev = parent;
int result;
@@ -935,21 +1115,158 @@ static bool find_source_device(struct pci_dev *parent,
else
pci_walk_bus(parent->subordinate, find_device_iter, e_info);
- if (!e_info->error_dev_num) {
- pci_info(parent, "can't find device of ID%04x\n", e_info->id);
+ if (!e_info->error_dev_num)
return false;
- }
return true;
}
+#ifdef CONFIG_PCIEAER_CXL
+
/**
- * handle_error_source - handle logging error into an event log
+ * pci_aer_unmask_internal_errors - unmask internal errors
+ * @dev: pointer to the pci_dev data structure
+ *
+ * Unmask internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+{
+ int aer = dev->aer_cap;
+ u32 mask;
+
+ pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
+ mask &= ~PCI_ERR_UNC_INTN;
+ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
+
+ pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
+ mask &= ~PCI_ERR_COR_INTERNAL;
+ pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
+}
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+ /*
+ * The capability, status, and control fields in Device 0,
+ * Function 0 DVSEC control the CXL functionality of the
+ * entire device (CXL 3.0, 8.1.3).
+ */
+ if (dev->devfn != PCI_DEVFN(0, 0))
+ return false;
+
+ /*
+ * CXL Memory Devices must have the 502h class code set (CXL
+ * 3.0, 8.1.12.1).
+ */
+ if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
+ return false;
+
+ return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+ struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+ return (pcie_ports_native || host->native_aer);
+}
+
+static bool is_internal_error(struct aer_err_info *info)
+{
+ if (info->severity == AER_CORRECTABLE)
+ return info->status & PCI_ERR_COR_INTERNAL;
+
+ return info->status & PCI_ERR_UNC_INTN;
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+ struct aer_err_info *info = (struct aer_err_info *)data;
+ const struct pci_error_handlers *err_handler;
+
+ if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+ return 0;
+
+ /* Protect dev->driver */
+ device_lock(&dev->dev);
+
+ err_handler = dev->driver ? dev->driver->err_handler : NULL;
+ if (!err_handler)
+ goto out;
+
+ if (info->severity == AER_CORRECTABLE) {
+ if (err_handler->cor_error_detected)
+ err_handler->cor_error_detected(dev);
+ } else if (err_handler->error_detected) {
+ if (info->severity == AER_NONFATAL)
+ err_handler->error_detected(dev, pci_channel_io_normal);
+ else if (info->severity == AER_FATAL)
+ err_handler->error_detected(dev, pci_channel_io_frozen);
+ }
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+ /*
+ * Internal errors of an RCEC indicate an AER error in an
+ * RCH's downstream port. Check and handle them in the CXL.mem
+ * device driver.
+ */
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
+ is_internal_error(info))
+ pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+}
+
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+ bool *handles_cxl = data;
+
+ if (!*handles_cxl)
+ *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+ /* Non-zero terminates iteration */
+ return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+ bool handles_cxl = false;
+
+ if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+ pcie_aer_is_native(rcec))
+ pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
+
+ return handles_cxl;
+}
+
+static void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+ if (!handles_cxl_errors(rcec))
+ return;
+
+ pci_aer_unmask_internal_errors(rcec);
+ pci_info(rcec, "CXL: Internal errors unmasked");
+}
+
+#else
+static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
+static inline void cxl_rch_handle_error(struct pci_dev *dev,
+ struct aer_err_info *info) { }
+#endif
+
+/**
+ * pci_aer_handle_error - handle logging error into an event log
* @dev: pointer to pci_dev data structure of error source device
* @info: comprehensive error information
*
* Invoked when an error being detected by Root Port.
*/
-static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
+static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
{
int aer = dev->aer_cap;
@@ -973,13 +1290,18 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
else if (info->severity == AER_FATAL)
pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
+}
+
+static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
+{
+ cxl_rch_handle_error(dev, info);
+ pci_aer_handle_error(dev, info);
pci_dev_put(dev);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
-#define AER_RECOVER_RING_ORDER 4
-#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
+#define AER_RECOVER_RING_SIZE 16
struct aer_recover_entry {
u8 bus;
@@ -1001,12 +1323,24 @@ static void aer_recover_work_func(struct work_struct *work)
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
entry.devfn);
if (!pdev) {
- pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
- entry.domain, entry.bus,
- PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
+ pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev found\n",
+ entry.domain, entry.bus,
+ PCI_SLOT(entry.devfn),
+ PCI_FUNC(entry.devfn));
continue;
}
- cper_print_aer(pdev, entry.severity, entry.regs);
+ pci_print_aer(pdev, entry.severity, entry.regs);
+
+ /*
+ * Memory for aer_capability_regs(entry.regs) is being
+ * allocated from the ghes_estatus_pool to protect it from
+ * overwriting when multiple sections are present in the
+ * error status. Thus free the same after processing the
+ * data.
+ */
+ ghes_estatus_pool_region_free((unsigned long)entry.regs,
+ sizeof(struct aer_capability_regs));
+
if (entry.severity == AER_NONFATAL)
pcie_do_recovery(pdev, pci_channel_io_normal,
aer_root_reset);
@@ -1048,18 +1382,25 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
/**
* aer_get_device_error_info - read error status from dev and store it to info
- * @dev: pointer to the device expected to have a error record
* @info: pointer to structure to store the error record
+ * @i: index into info->dev[]
*
- * Return 1 on success, 0 on error.
+ * Return: 1 on success, 0 on error.
*
* Note that @info is reused among all error devices. Clear fields properly.
*/
-int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+int aer_get_device_error_info(struct aer_err_info *info, int i)
{
- int type = pci_pcie_type(dev);
- int aer = dev->aer_cap;
- int temp;
+ struct pci_dev *dev;
+ int type, aer;
+ u32 aercc;
+
+ if (i >= AER_MAX_MULTI_ERR_DEVICES)
+ return 0;
+
+ dev = info->dev[i];
+ aer = dev->aer_cap;
+ type = pci_pcie_type(dev);
/* Must reset in this function */
info->status = 0;
@@ -1090,19 +1431,16 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
return 0;
/* Get First Error Pointer */
- pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
- info->first_error = PCI_ERR_CAP_FEP(temp);
+ pci_read_config_dword(dev, aer + PCI_ERR_CAP, &aercc);
+ info->first_error = PCI_ERR_CAP_FEP(aercc);
- if (info->status & AER_LOG_TLP_MASKS) {
+ if (tlp_header_logged(info->status, aercc)) {
info->tlp_header_valid = 1;
- pci_read_config_dword(dev,
- aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
- pci_read_config_dword(dev,
- aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
- pci_read_config_dword(dev,
- aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
- pci_read_config_dword(dev,
- aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
+ pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG,
+ aer + PCI_ERR_PREFIX_LOG,
+ aer_tlp_log_len(dev, aercc),
+ aercc & PCI_ERR_CAP_TLP_LOG_FLIT,
+ &info->tlp);
}
}
@@ -1113,74 +1451,98 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info)
{
int i;
- /* Report all before handle them, not to lost records by reset etc. */
+ /* Report all before handling them, to not lose records by reset etc. */
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
- aer_print_error(e_info->dev[i], e_info);
+ if (aer_get_device_error_info(e_info, i))
+ aer_print_error(e_info, i);
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info, i))
handle_error_source(e_info->dev[i], e_info);
}
}
/**
- * aer_isr_one_error - consume an error detected by root port
- * @rpc: pointer to the root port which holds an error
- * @e_src: pointer to an error source
+ * aer_isr_one_error_type - consume a Correctable or Uncorrectable Error
+ * detected by Root Port or RCEC
+ * @root: pointer to Root Port or RCEC that signaled AER interrupt
+ * @info: pointer to AER error info
*/
-static void aer_isr_one_error(struct aer_rpc *rpc,
- struct aer_err_source *e_src)
+static void aer_isr_one_error_type(struct pci_dev *root,
+ struct aer_err_info *info)
{
- struct pci_dev *pdev = rpc->rpd;
- struct aer_err_info e_info;
+ bool found;
- pci_rootport_aer_stats_incr(pdev, e_src);
+ found = find_source_device(root, info);
/*
- * There is a possibility that both correctable error and
- * uncorrectable error being logged. Report correctable error first.
+ * If we're going to log error messages, we've already set
+ * "info->root_ratelimit_print" and "info->ratelimit_print[i]" to
+ * non-zero (which enables printing) because this is either an
+ * ERR_FATAL or we found a device with an error logged in its AER
+ * Capability.
+ *
+ * If we didn't find the Error Source device, at least log the
+ * Requester ID from the ERR_* Message received by the Root Port or
+ * RCEC, ratelimited by the RP or RCEC.
*/
- if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
- e_info.id = ERR_COR_ID(e_src->id);
- e_info.severity = AER_CORRECTABLE;
-
- if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
- e_info.multi_error_valid = 1;
- else
- e_info.multi_error_valid = 0;
- aer_print_port_info(pdev, &e_info);
+ if (info->root_ratelimit_print ||
+ (!found && aer_ratelimit(root, info->severity)))
+ aer_print_source(root, info, found);
- if (find_source_device(pdev, &e_info))
- aer_process_err_devices(&e_info);
- }
-
- if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
- e_info.id = ERR_UNCOR_ID(e_src->id);
+ if (found)
+ aer_process_err_devices(info);
+}
- if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
- e_info.severity = AER_FATAL;
- else
- e_info.severity = AER_NONFATAL;
+/**
+ * aer_isr_one_error - consume error(s) signaled by an AER interrupt from
+ * Root Port or RCEC
+ * @root: pointer to Root Port or RCEC that signaled AER interrupt
+ * @e_src: pointer to an error source
+ */
+static void aer_isr_one_error(struct pci_dev *root,
+ struct aer_err_source *e_src)
+{
+ u32 status = e_src->status;
- if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
- e_info.multi_error_valid = 1;
- else
- e_info.multi_error_valid = 0;
+ pci_rootport_aer_stats_incr(root, e_src);
- aer_print_port_info(pdev, &e_info);
+ /*
+ * There is a possibility that both correctable error and
+ * uncorrectable error being logged. Report correctable error first.
+ */
+ if (status & PCI_ERR_ROOT_COR_RCV) {
+ int multi = status & PCI_ERR_ROOT_MULTI_COR_RCV;
+ struct aer_err_info e_info = {
+ .id = ERR_COR_ID(e_src->id),
+ .severity = AER_CORRECTABLE,
+ .level = KERN_WARNING,
+ .multi_error_valid = multi ? 1 : 0,
+ };
+
+ aer_isr_one_error_type(root, &e_info);
+ }
- if (find_source_device(pdev, &e_info))
- aer_process_err_devices(&e_info);
+ if (status & PCI_ERR_ROOT_UNCOR_RCV) {
+ int fatal = status & PCI_ERR_ROOT_FATAL_RCV;
+ int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV;
+ struct aer_err_info e_info = {
+ .id = ERR_UNCOR_ID(e_src->id),
+ .severity = fatal ? AER_FATAL : AER_NONFATAL,
+ .level = KERN_ERR,
+ .multi_error_valid = multi ? 1 : 0,
+ };
+
+ aer_isr_one_error_type(root, &e_info);
}
}
/**
- * aer_isr - consume errors detected by root port
+ * aer_isr - consume errors detected by Root Port
* @irq: IRQ assigned to Root Port
* @context: pointer to Root Port data structure
*
- * Invoked, as DPC, when root port records new detected error
+ * Invoked, as DPC, when Root Port records new detected error
*/
static irqreturn_t aer_isr(int irq, void *context)
{
@@ -1192,7 +1554,7 @@ static irqreturn_t aer_isr(int irq, void *context)
return IRQ_NONE;
while (kfifo_get(&rpc->aer_fifo, &e_src))
- aer_isr_one_error(rpc, &e_src);
+ aer_isr_one_error(rpc->rpd, &e_src);
return IRQ_HANDLED;
}
@@ -1224,40 +1586,26 @@ static irqreturn_t aer_irq(int irq, void *context)
return IRQ_WAKE_THREAD;
}
-static int set_device_error_reporting(struct pci_dev *dev, void *data)
+static void aer_enable_irq(struct pci_dev *pdev)
{
- bool enable = *((bool *)data);
- int type = pci_pcie_type(dev);
-
- if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
- (type == PCI_EXP_TYPE_RC_EC) ||
- (type == PCI_EXP_TYPE_UPSTREAM) ||
- (type == PCI_EXP_TYPE_DOWNSTREAM)) {
- if (enable)
- pci_enable_pcie_error_reporting(dev);
- else
- pci_disable_pcie_error_reporting(dev);
- }
+ int aer = pdev->aer_cap;
+ u32 reg32;
- return 0;
+ /* Enable Root Port's interrupt in response to error messages */
+ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
}
-/**
- * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
- * @dev: pointer to root port's pci_dev data structure
- * @enable: true = enable error reporting, false = disable error reporting.
- */
-static void set_downstream_devices_error_reporting(struct pci_dev *dev,
- bool enable)
+static void aer_disable_irq(struct pci_dev *pdev)
{
- set_device_error_reporting(dev, &enable);
-
- if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
- pcie_walk_rcec(dev, set_device_error_reporting, &enable);
- else if (dev->subordinate)
- pci_walk_bus(dev->subordinate, set_device_error_reporting,
- &enable);
+ int aer = pdev->aer_cap;
+ u32 reg32;
+ /* Disable Root Port's interrupt in response to error messages */
+ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
+ reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
+ pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
}
/**
@@ -1289,16 +1637,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc)
pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, &reg32);
pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
- /*
- * Enable error reporting for the root port device and downstream port
- * devices.
- */
- set_downstream_devices_error_reporting(pdev, true);
-
- /* Enable Root Port's interrupt in response to error messages */
- pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_enable_irq(pdev);
}
/**
@@ -1313,16 +1652,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc)
int aer = pdev->aer_cap;
u32 reg32;
- /*
- * Disable error reporting for the root port device and downstream port
- * devices.
- */
- set_downstream_devices_error_reporting(pdev, false);
-
- /* Disable Root's interrupt in response to error messages */
- pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_disable_irq(pdev);
/* Clear Root's error status reg */
pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, &reg32);
@@ -1380,11 +1710,28 @@ static int aer_probe(struct pcie_device *dev)
return status;
}
+ cxl_rch_enable_rcec(port);
aer_enable_rootport(rpc);
pci_info(port, "enabled with IRQ %d\n", dev->irq);
return 0;
}
+static int aer_suspend(struct pcie_device *dev)
+{
+ struct aer_rpc *rpc = get_service_data(dev);
+
+ aer_disable_rootport(rpc);
+ return 0;
+}
+
+static int aer_resume(struct pcie_device *dev)
+{
+ struct aer_rpc *rpc = get_service_data(dev);
+
+ aer_enable_rootport(rpc);
+ return 0;
+}
+
/**
* aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
* @dev: pointer to Root Port, RCEC, or RCiEP
@@ -1417,12 +1764,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
*/
aer = root ? root->aer_cap : 0;
- if ((host->native_aer || pcie_ports_native) && aer) {
- /* Disable Root's interrupt in response to error messages */
- pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
- }
+ if ((host->native_aer || pcie_ports_native) && aer)
+ aer_disable_irq(root);
if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
rc = pcie_reset_flr(dev, PCI_RESET_DO_RESET);
@@ -1441,10 +1784,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, &reg32);
pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
- /* Enable Root Port's interrupt in response to error messages */
- pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, &reg32);
- reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
- pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
+ aer_enable_irq(root);
}
return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -1456,13 +1796,15 @@ static struct pcie_port_service_driver aerdriver = {
.service = PCIE_PORT_SERVICE_AER,
.probe = aer_probe,
+ .suspend = aer_suspend,
+ .resume = aer_resume,
.remove = aer_remove,
};
/**
- * pcie_aer_init - register AER root service driver
+ * pcie_aer_init - register AER service driver
*
- * Invoked when AER root service driver is loaded.
+ * Invoked when AER service driver is loaded.
*/
int __init pcie_aer_init(void)
{