diff options
Diffstat (limited to 'drivers/pci/pcie')
-rw-r--r-- | drivers/pci/pcie/Kconfig | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/Makefile | 4 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 557 | ||||
-rw-r--r-- | drivers/pci/pcie/aer_inject.c | 6 | ||||
-rw-r--r-- | drivers/pci/pcie/aspm.c | 317 | ||||
-rw-r--r-- | drivers/pci/pcie/bwctrl.c | 329 | ||||
-rw-r--r-- | drivers/pci/pcie/dpc.c | 169 | ||||
-rw-r--r-- | drivers/pci/pcie/edr.c | 28 | ||||
-rw-r--r-- | drivers/pci/pcie/err.c | 13 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv.c | 31 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv.h | 6 | ||||
-rw-r--r-- | drivers/pci/pcie/ptm.c | 302 | ||||
-rw-r--r-- | drivers/pci/pcie/tlp.c | 137 |
13 files changed, 1484 insertions, 417 deletions
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 8999fcebde6a..17919b99fa66 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -47,7 +47,7 @@ config PCIEAER_INJECT error injection can fake almost all kinds of errors with the help of a user space helper tool aer-inject, which can be gotten from: - https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + https://github.com/intel/aer-inject.git config PCIEAER_CXL bool "PCI Express CXL RAS support" diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 6461aa93fe76..173829aa02e6 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -4,10 +4,10 @@ pcieportdrv-y := portdrv.o rcec.o -obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o -obj-$(CONFIG_PCIEAER) += aer.o err.o +obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ac6293c24976..70ac66188367 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -2,7 +2,7 @@ /* * Implement the AER root port service driver. The driver registers an IRQ * handler. When a root port triggers an AER interrupt, the IRQ handler - * collects root port status and schedules work. + * collects Root Port status and schedules work. * * Copyright (C) 2006 Intel Corp. * Tom Long Nguyen (tom.l.nguyen@intel.com) @@ -17,6 +17,7 @@ #include <linux/bitops.h> #include <linux/cper.h> +#include <linux/dev_printk.h> #include <linux/pci.h> #include <linux/pci-acpi.h> #include <linux/sched.h> @@ -27,6 +28,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/kfifo.h> +#include <linux/ratelimit.h> #include <linux/slab.h> #include <acpi/apei.h> #include <acpi/ghes.h> @@ -35,6 +37,9 @@ #include "../pci.h" #include "portdrv.h" +#define aer_printk(level, pdev, fmt, arg...) \ + dev_printk(level, &(pdev)->dev, fmt, ##arg) + #define AER_ERROR_SOURCES_MAX 128 #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ @@ -50,15 +55,15 @@ struct aer_rpc { DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); }; -/* AER stats for the device */ -struct aer_stats { +/* AER info for the device */ +struct aer_info { /* * Fields for all AER capable devices. They indicate the errors * "as seen by this device". Note that this may mean that if an - * end point is causing problems, the AER counters may increment - * at its link partner (e.g. root port) because the errors will be - * "seen" by the link partner and not the problematic end point + * Endpoint is causing problems, the AER counters may increment + * at its link partner (e.g. Root Port) because the errors will be + * "seen" by the link partner and not the problematic Endpoint * itself (which may report all counters as 0 as it never saw any * problems). */ @@ -76,14 +81,18 @@ struct aer_stats { u64 dev_total_nonfatal_errs; /* - * Fields for Root ports & root complex event collectors only, these + * Fields for Root Ports & Root Complex Event Collectors only; these * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL - * messages received by the root port / event collector, INCLUDING the - * ones that are generated internally (by the rootport itself) + * messages received by the Root Port / Event Collector, INCLUDING the + * ones that are generated internally (by the Root Port itself) */ u64 rootport_total_cor_errs; u64 rootport_total_fatal_errs; u64 rootport_total_nonfatal_errs; + + /* Ratelimits for errors */ + struct ratelimit_state correctable_ratelimit; + struct ratelimit_state nonfatal_ratelimit; }; #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ @@ -138,7 +147,7 @@ static const char * const ecrc_policy_str[] = { * enable_ecrc_checking - enable PCIe ECRC checking for a device * @dev: the PCI device * - * Returns 0 on success, or negative on failure. + * Return: 0 on success, or negative on failure. */ static int enable_ecrc_checking(struct pci_dev *dev) { @@ -159,10 +168,10 @@ static int enable_ecrc_checking(struct pci_dev *dev) } /** - * disable_ecrc_checking - disables PCIe ECRC checking for a device + * disable_ecrc_checking - disable PCIe ECRC checking for a device * @dev: the PCI device * - * Returns 0 on success, or negative on failure. + * Return: 0 on success, or negative on failure. */ static int disable_ecrc_checking(struct pci_dev *dev) { @@ -180,7 +189,8 @@ static int disable_ecrc_checking(struct pci_dev *dev) } /** - * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based + * on global policy * @dev: the PCI device */ void pcie_set_ecrc_checking(struct pci_dev *dev) @@ -230,7 +240,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } -EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, "CXL"); static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { @@ -282,10 +292,10 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) * pci_aer_raw_clear_status - Clear AER error registers. * @dev: the PCI device * - * Clearing AER error status registers unconditionally, regardless of + * Clear AER error status registers unconditionally, regardless of * whether they're owned by firmware or the OS. * - * Returns 0 on success, or negative on failure. + * Return: 0 on success, or negative on failure. */ int pci_aer_raw_clear_status(struct pci_dev *dev) { @@ -372,13 +382,18 @@ void pci_aer_init(struct pci_dev *dev) if (!dev->aer_cap) return; - dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); + dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL); + + ratelimit_state_init(&dev->aer_info->correctable_ratelimit, + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + ratelimit_state_init(&dev->aer_info->nonfatal_ratelimit, + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); /* * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, * PCI_ERR_COR_MASK, and PCI_ERR_CAP. Root and Root Complex Event - * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r5.0, sec - * 7.8.4). + * Collectors also implement PCI_ERR_ROOT_COMMAND (PCIe r6.0, sec + * 7.8.4.9). */ n = pcie_cap_has_rtctl(dev) ? 5 : 4; pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n); @@ -393,8 +408,8 @@ void pci_aer_init(struct pci_dev *dev) void pci_aer_exit(struct pci_dev *dev) { - kfree(dev->aer_stats); - dev->aer_stats = NULL; + kfree(dev->aer_info); + dev->aer_info = NULL; } #define AER_AGENT_RECEIVER 0 @@ -532,10 +547,10 @@ static const char *aer_agent_string[] = { { \ unsigned int i; \ struct pci_dev *pdev = to_pci_dev(dev); \ - u64 *stats = pdev->aer_stats->stats_array; \ + u64 *stats = pdev->aer_info->stats_array; \ size_t len = 0; \ \ - for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\ + for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) { \ if (strings_array[i]) \ len += sysfs_emit_at(buf, len, "%s %llu\n", \ strings_array[i], \ @@ -546,7 +561,7 @@ static const char *aer_agent_string[] = { i, stats[i]); \ } \ len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \ - pdev->aer_stats->total_field); \ + pdev->aer_info->total_field); \ return len; \ } \ static DEVICE_ATTR_RO(name) @@ -567,7 +582,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, char *buf) \ { \ struct pci_dev *pdev = to_pci_dev(dev); \ - return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \ + return sysfs_emit(buf, "%llu\n", pdev->aer_info->field); \ } \ static DEVICE_ATTR_RO(name) @@ -594,7 +609,7 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct pci_dev *pdev = to_pci_dev(dev); - if (!pdev->aer_stats) + if (!pdev->aer_info) return 0; if ((a == &dev_attr_aer_rootport_total_err_cor.attr || @@ -612,31 +627,136 @@ const struct attribute_group aer_stats_attr_group = { .is_visible = aer_stats_attrs_are_visible, }; +/* + * Ratelimit interval + * <=0: disabled with ratelimit.interval = 0 + * >0: enabled with ratelimit.interval in ms + */ +#define aer_ratelimit_interval_attr(name, ratelimit) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + \ + return sysfs_emit(buf, "%d\n", \ + pdev->aer_info->ratelimit.interval); \ + } \ + \ + static ssize_t \ + name##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + int interval; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + if (kstrtoint(buf, 0, &interval) < 0) \ + return -EINVAL; \ + \ + if (interval <= 0) \ + interval = 0; \ + else \ + interval = msecs_to_jiffies(interval); \ + \ + pdev->aer_info->ratelimit.interval = interval; \ + \ + return count; \ + } \ + static DEVICE_ATTR_RW(name); + +#define aer_ratelimit_burst_attr(name, ratelimit) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + \ + return sysfs_emit(buf, "%d\n", \ + pdev->aer_info->ratelimit.burst); \ + } \ + \ + static ssize_t \ + name##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + int burst; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + if (kstrtoint(buf, 0, &burst) < 0) \ + return -EINVAL; \ + \ + pdev->aer_info->ratelimit.burst = burst; \ + \ + return count; \ + } \ + static DEVICE_ATTR_RW(name); + +#define aer_ratelimit_attrs(name) \ + aer_ratelimit_interval_attr(name##_ratelimit_interval_ms, \ + name##_ratelimit) \ + aer_ratelimit_burst_attr(name##_ratelimit_burst, \ + name##_ratelimit) + +aer_ratelimit_attrs(correctable) +aer_ratelimit_attrs(nonfatal) + +static struct attribute *aer_attrs[] = { + &dev_attr_correctable_ratelimit_interval_ms.attr, + &dev_attr_correctable_ratelimit_burst.attr, + &dev_attr_nonfatal_ratelimit_interval_ms.attr, + &dev_attr_nonfatal_ratelimit_burst.attr, + NULL +}; + +static umode_t aer_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pdev->aer_info) + return 0; + + return a->mode; +} + +const struct attribute_group aer_attr_group = { + .name = "aer", + .attrs = aer_attrs, + .is_visible = aer_attrs_are_visible, +}; + static void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info) { unsigned long status = info->status & ~info->mask; int i, max = -1; u64 *counter = NULL; - struct aer_stats *aer_stats = pdev->aer_stats; + struct aer_info *aer_info = pdev->aer_info; - if (!aer_stats) + if (!aer_info) return; switch (info->severity) { case AER_CORRECTABLE: - aer_stats->dev_total_cor_errs++; - counter = &aer_stats->dev_cor_errs[0]; + aer_info->dev_total_cor_errs++; + counter = &aer_info->dev_cor_errs[0]; max = AER_MAX_TYPEOF_COR_ERRS; break; case AER_NONFATAL: - aer_stats->dev_total_nonfatal_errs++; - counter = &aer_stats->dev_nonfatal_errs[0]; + aer_info->dev_total_nonfatal_errs++; + counter = &aer_info->dev_nonfatal_errs[0]; max = AER_MAX_TYPEOF_UNCOR_ERRS; break; case AER_FATAL: - aer_stats->dev_total_fatal_errs++; - counter = &aer_stats->dev_fatal_errs[0]; + aer_info->dev_total_fatal_errs++; + counter = &aer_info->dev_fatal_errs[0]; max = AER_MAX_TYPEOF_UNCOR_ERRS; break; } @@ -648,60 +768,88 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev, static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, struct aer_err_source *e_src) { - struct aer_stats *aer_stats = pdev->aer_stats; + struct aer_info *aer_info = pdev->aer_info; - if (!aer_stats) + if (!aer_info) return; if (e_src->status & PCI_ERR_ROOT_COR_RCV) - aer_stats->rootport_total_cor_errs++; + aer_info->rootport_total_cor_errs++; if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - aer_stats->rootport_total_fatal_errs++; + aer_info->rootport_total_fatal_errs++; else - aer_stats->rootport_total_nonfatal_errs++; + aer_info->rootport_total_nonfatal_errs++; } } -static void __print_tlp_header(struct pci_dev *dev, struct pcie_tlp_log *t) +static int aer_ratelimit(struct pci_dev *dev, unsigned int severity) { - pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", - t->dw[0], t->dw[1], t->dw[2], t->dw[3]); + switch (severity) { + case AER_NONFATAL: + return __ratelimit(&dev->aer_info->nonfatal_ratelimit); + case AER_CORRECTABLE: + return __ratelimit(&dev->aer_info->correctable_ratelimit); + default: + return 1; /* Don't ratelimit fatal errors */ + } } -static void __aer_print_error(struct pci_dev *dev, - struct aer_err_info *info) +static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { const char **strings; unsigned long status = info->status & ~info->mask; - const char *level, *errmsg; + const char *level = info->level; + const char *errmsg; int i; - if (info->severity == AER_CORRECTABLE) { + if (info->severity == AER_CORRECTABLE) strings = aer_correctable_error_string; - level = KERN_WARNING; - } else { + else strings = aer_uncorrectable_error_string; - level = KERN_ERR; - } for_each_set_bit(i, &status, 32) { errmsg = strings[i]; if (!errmsg) errmsg = "Unknown Error Bit"; - pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, + aer_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, info->first_error == i ? " (First)" : ""); } - pci_dev_aer_stats_incr(dev, info); } -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +static void aer_print_source(struct pci_dev *dev, struct aer_err_info *info, + bool found) +{ + u16 source = info->id; + + pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d%s\n", + info->multi_error_valid ? "Multiple " : "", + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source), + found ? "" : " (no details found"); +} + +void aer_print_error(struct aer_err_info *info, int i) { - int layer, agent; - int id = pci_dev_id(dev); - const char *level; + struct pci_dev *dev; + int layer, agent, id; + const char *level = info->level; + + if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) + return; + + dev = info->dev[i]; + id = pci_dev_id(dev); + + pci_dev_aer_stats_incr(dev, info); + trace_aer_event(pci_name(dev), (info->status & ~info->mask), + info->severity, info->tlp_header_valid, &info->tlp); + + if (!info->ratelimit_print[i]) + return; if (!info->status) { pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", @@ -712,38 +860,21 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; - - pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", + aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); - pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", + aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", dev->vendor, dev->device, info->status, info->mask); __aer_print_error(dev, info); if (info->tlp_header_valid) - __print_tlp_header(dev, &info->tlp); + pcie_print_tlp_log(dev, &info->tlp, level, dev_fmt(" ")); out: if (info->id && info->error_dev_num > 1 && info->id == id) pci_err(dev, " Error of this Agent is reported first\n"); - - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), - info->severity, info->tlp_header_valid, &info->tlp); -} - -static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) -{ - u8 bus = info->id >> 8; - u8 devfn = info->id & 0xff; - - pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n", - info->multi_error_valid ? "Multiple " : "", - aer_error_severity_string[info->severity], - pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), - PCI_FUNC(devfn)); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -766,42 +897,50 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, { int layer, agent, tlp_header_valid = 0; u32 status, mask; - struct aer_err_info info; + struct aer_err_info info = { + .severity = aer_severity, + .first_error = PCI_ERR_CAP_FEP(aer->cap_control), + }; if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; + info.level = KERN_WARNING; } else { status = aer->uncor_status; mask = aer->uncor_mask; + info.level = KERN_ERR; tlp_header_valid = status & AER_LOG_TLP_MASKS; } - layer = AER_GET_LAYER_ERROR(aer_severity, status); - agent = AER_GET_AGENT(aer_severity, status); - - memset(&info, 0, sizeof(info)); - info.severity = aer_severity; info.status = status; info.mask = mask; - info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); - pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); + pci_dev_aer_stats_incr(dev, &info); + trace_aer_event(pci_name(dev), (status & ~mask), + aer_severity, tlp_header_valid, &aer->header_log); + + if (!aer_ratelimit(dev, info.severity)) + return; + + layer = AER_GET_LAYER_ERROR(aer_severity, status); + agent = AER_GET_AGENT(aer_severity, status); + + aer_printk(info.level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", + status, mask); __aer_print_error(dev, &info); - pci_err(dev, "aer_layer=%s, aer_agent=%s\n", - aer_error_layer[layer], aer_agent_string[agent]); + aer_printk(info.level, dev, "aer_layer=%s, aer_agent=%s\n", + aer_error_layer[layer], aer_agent_string[agent]); if (aer_severity != AER_CORRECTABLE) - pci_err(dev, "aer_uncor_severity: 0x%08x\n", - aer->uncor_severity); + aer_printk(info.level, dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); if (tlp_header_valid) - __print_tlp_header(dev, &aer->header_log); - - trace_aer_event(dev_name(&dev->dev), (status & ~mask), - aer_severity, tlp_header_valid, &aer->header_log); + pcie_print_tlp_log(dev, &aer->header_log, info.level, + dev_fmt(" ")); } -EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL); +EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); /** * add_error_device - list device to be handled @@ -810,12 +949,27 @@ EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL); */ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { - if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { - e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); - e_info->error_dev_num++; - return 0; + int i = e_info->error_dev_num; + + if (i >= AER_MAX_MULTI_ERR_DEVICES) + return -ENOSPC; + + e_info->dev[i] = pci_dev_get(dev); + e_info->error_dev_num++; + + /* + * Ratelimit AER log messages. "dev" is either the source + * identified by the root's Error Source ID or it has an unmasked + * error logged in its own AER Capability. Messages are emitted + * when "ratelimit_print[i]" is non-zero. If we will print detail + * for a downstream device, make sure we print the Error Source ID + * from the root as well. + */ + if (aer_ratelimit(dev, e_info->severity)) { + e_info->ratelimit_print[i] = 1; + e_info->root_ratelimit_print = 1; } - return -ENOSPC; + return 0; } /** @@ -830,8 +984,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) u16 reg16; /* - * When bus id is equal to 0, it might be a bad id - * reported by root port. + * When bus ID is equal to 0, it might be a bad ID + * reported by Root Port. */ if ((PCI_BUS_NUM(e_info->id) != 0) && !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { @@ -839,15 +993,15 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) if (e_info->id == pci_dev_id(dev)) return true; - /* Continue id comparing if there is no multiple error */ + /* Continue ID comparing if there is no multiple error */ if (!e_info->multi_error_valid) return false; } /* * When either - * 1) bus id is equal to 0. Some ports might lose the bus - * id of error source id; + * 1) bus ID is equal to 0. Some ports might lose the bus + * ID of error source id; * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set * 3) There are multiple errors and prior ID comparing fails; * We check AER status registers to find possible reporter. @@ -899,9 +1053,9 @@ static int find_device_iter(struct pci_dev *dev, void *data) /** * find_source_device - search through device hierarchy for source device * @parent: pointer to Root Port pci_dev data structure - * @e_info: including detailed error information such like id + * @e_info: including detailed error information such as ID * - * Return true if found. + * Return: true if found. * * Invoked by DPC when error is detected at the Root Port. * Caller of this function must set id, severity, and multi_error_valid of @@ -909,7 +1063,7 @@ static int find_device_iter(struct pci_dev *dev, void *data) * e_info->error_dev_num and e_info->dev[], based on the given information. */ static bool find_source_device(struct pci_dev *parent, - struct aer_err_info *e_info) + struct aer_err_info *e_info) { struct pci_dev *dev = parent; int result; @@ -927,15 +1081,8 @@ static bool find_source_device(struct pci_dev *parent, else pci_walk_bus(parent->subordinate, find_device_iter, e_info); - if (!e_info->error_dev_num) { - u8 bus = e_info->id >> 8; - u8 devfn = e_info->id & 0xff; - - pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n", - pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn), - PCI_FUNC(devfn)); + if (!e_info->error_dev_num) return false; - } return true; } @@ -943,9 +1090,9 @@ static bool find_source_device(struct pci_dev *parent, /** * pci_aer_unmask_internal_errors - unmask internal errors - * @dev: pointer to the pcie_dev data structure + * @dev: pointer to the pci_dev data structure * - * Unmasks internal errors in the Uncorrectable and Correctable Error + * Unmask internal errors in the Uncorrectable and Correctable Error * Mask registers. * * Note: AER must be enabled and supported by the device which must be @@ -1008,7 +1155,7 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) return 0; - /* protect dev->driver */ + /* Protect dev->driver */ device_lock(&dev->dev); err_handler = dev->driver ? dev->driver->err_handler : NULL; @@ -1142,20 +1289,23 @@ static void aer_recover_work_func(struct work_struct *work) pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, entry.devfn); if (!pdev) { - pr_err("no pci_dev for %04x:%02x:%02x.%x\n", - entry.domain, entry.bus, - PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev found\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), + PCI_FUNC(entry.devfn)); continue; } pci_print_aer(pdev, entry.severity, entry.regs); + /* - * Memory for aer_capability_regs(entry.regs) is being allocated from the - * ghes_estatus_pool to protect it from overwriting when multiple sections - * are present in the error status. Thus free the same after processing - * the data. + * Memory for aer_capability_regs(entry.regs) is being + * allocated from the ghes_estatus_pool to protect it from + * overwriting when multiple sections are present in the + * error status. Thus free the same after processing the + * data. */ ghes_estatus_pool_region_free((unsigned long)entry.regs, - sizeof(struct aer_capability_regs)); + sizeof(struct aer_capability_regs)); if (entry.severity == AER_NONFATAL) pcie_do_recovery(pdev, pci_channel_io_normal, @@ -1198,19 +1348,26 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); /** * aer_get_device_error_info - read error status from dev and store it to info - * @dev: pointer to the device expected to have a error record * @info: pointer to structure to store the error record + * @i: index into info->dev[] * - * Return 1 on success, 0 on error. + * Return: 1 on success, 0 on error. * * Note that @info is reused among all error devices. Clear fields properly. */ -int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +int aer_get_device_error_info(struct aer_err_info *info, int i) { - int type = pci_pcie_type(dev); - int aer = dev->aer_cap; + struct pci_dev *dev; + int type, aer; u32 aercc; + if (i >= AER_MAX_MULTI_ERR_DEVICES) + return 0; + + dev = info->dev[i]; + aer = dev->aer_cap; + type = pci_pcie_type(dev); + /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; @@ -1245,7 +1402,11 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) if (info->status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; - pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, &info->tlp); + pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, + aer + PCI_ERR_PREFIX_LOG, + aer_tlp_log_len(dev, aercc), + aercc & PCI_ERR_CAP_TLP_LOG_FLIT, + &info->tlp); } } @@ -1256,74 +1417,98 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info) { int i; - /* Report all before handle them, not to lost records by reset etc. */ + /* Report all before handling them, to not lose records by reset etc. */ for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (aer_get_device_error_info(e_info->dev[i], e_info)) - aer_print_error(e_info->dev[i], e_info); + if (aer_get_device_error_info(e_info, i)) + aer_print_error(e_info, i); } for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (aer_get_device_error_info(e_info->dev[i], e_info)) + if (aer_get_device_error_info(e_info, i)) handle_error_source(e_info->dev[i], e_info); } } /** - * aer_isr_one_error - consume an error detected by root port - * @rpc: pointer to the root port which holds an error - * @e_src: pointer to an error source + * aer_isr_one_error_type - consume a Correctable or Uncorrectable Error + * detected by Root Port or RCEC + * @root: pointer to Root Port or RCEC that signaled AER interrupt + * @info: pointer to AER error info */ -static void aer_isr_one_error(struct aer_rpc *rpc, - struct aer_err_source *e_src) +static void aer_isr_one_error_type(struct pci_dev *root, + struct aer_err_info *info) { - struct pci_dev *pdev = rpc->rpd; - struct aer_err_info e_info; + bool found; - pci_rootport_aer_stats_incr(pdev, e_src); + found = find_source_device(root, info); /* - * There is a possibility that both correctable error and - * uncorrectable error being logged. Report correctable error first. + * If we're going to log error messages, we've already set + * "info->root_ratelimit_print" and "info->ratelimit_print[i]" to + * non-zero (which enables printing) because this is either an + * ERR_FATAL or we found a device with an error logged in its AER + * Capability. + * + * If we didn't find the Error Source device, at least log the + * Requester ID from the ERR_* Message received by the Root Port or + * RCEC, ratelimited by the RP or RCEC. */ - if (e_src->status & PCI_ERR_ROOT_COR_RCV) { - e_info.id = ERR_COR_ID(e_src->id); - e_info.severity = AER_CORRECTABLE; - - if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) - e_info.multi_error_valid = 1; - else - e_info.multi_error_valid = 0; - aer_print_port_info(pdev, &e_info); + if (info->root_ratelimit_print || + (!found && aer_ratelimit(root, info->severity))) + aer_print_source(root, info, found); - if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); - } - - if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { - e_info.id = ERR_UNCOR_ID(e_src->id); + if (found) + aer_process_err_devices(info); +} - if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - e_info.severity = AER_FATAL; - else - e_info.severity = AER_NONFATAL; +/** + * aer_isr_one_error - consume error(s) signaled by an AER interrupt from + * Root Port or RCEC + * @root: pointer to Root Port or RCEC that signaled AER interrupt + * @e_src: pointer to an error source + */ +static void aer_isr_one_error(struct pci_dev *root, + struct aer_err_source *e_src) +{ + u32 status = e_src->status; - if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) - e_info.multi_error_valid = 1; - else - e_info.multi_error_valid = 0; + pci_rootport_aer_stats_incr(root, e_src); - aer_print_port_info(pdev, &e_info); + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + if (status & PCI_ERR_ROOT_COR_RCV) { + int multi = status & PCI_ERR_ROOT_MULTI_COR_RCV; + struct aer_err_info e_info = { + .id = ERR_COR_ID(e_src->id), + .severity = AER_CORRECTABLE, + .level = KERN_WARNING, + .multi_error_valid = multi ? 1 : 0, + }; + + aer_isr_one_error_type(root, &e_info); + } - if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); + if (status & PCI_ERR_ROOT_UNCOR_RCV) { + int fatal = status & PCI_ERR_ROOT_FATAL_RCV; + int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV; + struct aer_err_info e_info = { + .id = ERR_UNCOR_ID(e_src->id), + .severity = fatal ? AER_FATAL : AER_NONFATAL, + .level = KERN_ERR, + .multi_error_valid = multi ? 1 : 0, + }; + + aer_isr_one_error_type(root, &e_info); } } /** - * aer_isr - consume errors detected by root port + * aer_isr - consume errors detected by Root Port * @irq: IRQ assigned to Root Port * @context: pointer to Root Port data structure * - * Invoked, as DPC, when root port records new detected error + * Invoked, as DPC, when Root Port records new detected error */ static irqreturn_t aer_isr(int irq, void *context) { @@ -1335,7 +1520,7 @@ static irqreturn_t aer_isr(int irq, void *context) return IRQ_NONE; while (kfifo_get(&rpc->aer_fifo, &e_src)) - aer_isr_one_error(rpc, &e_src); + aer_isr_one_error(rpc->rpd, &e_src); return IRQ_HANDLED; } @@ -1383,7 +1568,7 @@ static void aer_disable_irq(struct pci_dev *pdev) int aer = pdev->aer_cap; u32 reg32; - /* Disable Root's interrupt in response to error messages */ + /* Disable Root Port's interrupt in response to error messages */ pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32); reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32); @@ -1497,6 +1682,22 @@ static int aer_probe(struct pcie_device *dev) return 0; } +static int aer_suspend(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + aer_disable_rootport(rpc); + return 0; +} + +static int aer_resume(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + aer_enable_rootport(rpc); + return 0; +} + /** * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP * @dev: pointer to Root Port, RCEC, or RCiEP @@ -1561,13 +1762,15 @@ static struct pcie_port_service_driver aerdriver = { .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, + .suspend = aer_suspend, + .resume = aer_resume, .remove = aer_remove, }; /** - * pcie_aer_init - register AER root service driver + * pcie_aer_init - register AER service driver * - * Invoked when AER root service driver is loaded. + * Invoked when AER service driver is loaded. */ int __init pcie_aer_init(void) { diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c index 2dab275d252f..91acc7b17f68 100644 --- a/drivers/pci/pcie/aer_inject.c +++ b/drivers/pci/pcie/aer_inject.c @@ -6,7 +6,7 @@ * trigger various real hardware errors. Software based error * injection can fake almost all kinds of errors with the help of a * user space helper tool aer-inject, which can be gotten from: - * https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + * https://github.com/intel/aer-inject.git * * Copyright 2009 Intel Corporation. * Huang Ying <ying.huang@intel.com> @@ -430,7 +430,7 @@ static int aer_inject(struct aer_error_inj *einj) else rperr->root_status |= PCI_ERR_ROOT_COR_RCV; rperr->source_id &= 0xffff0000; - rperr->source_id |= (einj->bus << 8) | devfn; + rperr->source_id |= PCI_DEVID(einj->bus, devfn); } if (einj->uncor_status) { if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) @@ -443,7 +443,7 @@ static int aer_inject(struct aer_error_inj *einj) rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; rperr->source_id &= 0x0000ffff; - rperr->source_id |= ((einj->bus << 8) | devfn) << 16; + rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16; } spin_unlock_irqrestore(&inject_lock, flags); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 2428d278e015..29fcb0689a91 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -8,6 +8,8 @@ */ #include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/limits.h> #include <linux/math.h> @@ -79,24 +81,44 @@ void pci_configure_aspm_l1ss(struct pci_dev *pdev) void pci_save_aspm_l1ss_state(struct pci_dev *pdev) { + struct pci_dev *parent = pdev->bus->self; struct pci_cap_saved_state *save_state; - u16 l1ss = pdev->l1ss; u32 *cap; /* + * If this is a Downstream Port, we never restore the L1SS state + * directly; we only restore it when we restore the state of the + * Upstream Port below it. + */ + if (pcie_downstream_port(pdev) || !parent) + return; + + if (!pdev->l1ss || !parent->l1ss) + return; + + /* * Save L1 substate configuration. The ASPM L0s/L1 configuration * in PCI_EXP_LNKCTL_ASPMC is saved by pci_save_pcie_state(). */ - if (!l1ss) + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_L1SS); + if (!save_state) return; - save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_L1SS); + cap = &save_state->cap.data[0]; + pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); + pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); + + /* + * Save parent's L1 substate configuration so we have it for + * pci_restore_aspm_l1ss_state(pdev) to restore. + */ + save_state = pci_find_saved_ext_cap(parent, PCI_EXT_CAP_ID_L1SS); if (!save_state) return; cap = &save_state->cap.data[0]; - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL2, cap++); - pci_read_config_dword(pdev, l1ss + PCI_L1SS_CTL1, cap++); + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, cap++); + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, cap++); } void pci_restore_aspm_l1ss_state(struct pci_dev *pdev) @@ -177,8 +199,8 @@ void pci_restore_aspm_l1ss_state(struct pci_dev *pdev) /* Restore L0s/L1 if they were enabled */ if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, clnkctl) || FIELD_GET(PCI_EXP_LNKCTL_ASPMC, plnkctl)) { - pcie_capability_write_word(parent, PCI_EXP_LNKCTL, clnkctl); - pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, plnkctl); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, plnkctl); + pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, clnkctl); } } @@ -189,21 +211,18 @@ void pci_restore_aspm_l1ss_state(struct pci_dev *pdev) #endif #define MODULE_PARAM_PREFIX "pcie_aspm." -/* Note: those are not register definitions */ -#define ASPM_STATE_L0S_UP (1) /* Upstream direction L0s state */ -#define ASPM_STATE_L0S_DW (2) /* Downstream direction L0s state */ -#define ASPM_STATE_L1 (4) /* L1 state */ -#define ASPM_STATE_L1_1 (8) /* ASPM L1.1 state */ -#define ASPM_STATE_L1_2 (0x10) /* ASPM L1.2 state */ -#define ASPM_STATE_L1_1_PCIPM (0x20) /* PCI PM L1.1 state */ -#define ASPM_STATE_L1_2_PCIPM (0x40) /* PCI PM L1.2 state */ -#define ASPM_STATE_L1_SS_PCIPM (ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM) -#define ASPM_STATE_L1_2_MASK (ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM) -#define ASPM_STATE_L1SS (ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\ - ASPM_STATE_L1_2_MASK) -#define ASPM_STATE_L0S (ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW) -#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \ - ASPM_STATE_L1SS) +/* Note: these are not register definitions */ +#define PCIE_LINK_STATE_L0S_UP BIT(0) /* Upstream direction L0s state */ +#define PCIE_LINK_STATE_L0S_DW BIT(1) /* Downstream direction L0s state */ +static_assert(PCIE_LINK_STATE_L0S == (PCIE_LINK_STATE_L0S_UP | PCIE_LINK_STATE_L0S_DW)); + +#define PCIE_LINK_STATE_L1_SS_PCIPM (PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_PCIPM) +#define PCIE_LINK_STATE_L1_2_MASK (PCIE_LINK_STATE_L1_2 |\ + PCIE_LINK_STATE_L1_2_PCIPM) +#define PCIE_LINK_STATE_L1SS (PCIE_LINK_STATE_L1_1 |\ + PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_MASK) struct pcie_link_state { struct pci_dev *pdev; /* Upstream component of the Link */ @@ -275,10 +294,10 @@ static int policy_to_aspm_state(struct pcie_link_state *link) return 0; case POLICY_POWERSAVE: /* Enable ASPM L0s/L1 */ - return (ASPM_STATE_L0S | ASPM_STATE_L1); + return PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1; case POLICY_POWER_SUPERSAVE: /* Enable Everything */ - return ASPM_STATE_ALL; + return PCIE_LINK_STATE_ASPM_ALL; case POLICY_DEFAULT: return link->aspm_default; } @@ -581,14 +600,14 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) latency_dw_l1 = calc_l1_latency(lnkcap_dw); /* Check upstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_UP) && + if ((link->aspm_capable & PCIE_LINK_STATE_L0S_UP) && (latency_up_l0s > acceptable_l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_UP; + link->aspm_capable &= ~PCIE_LINK_STATE_L0S_UP; /* Check downstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_DW) && + if ((link->aspm_capable & PCIE_LINK_STATE_L0S_DW) && (latency_dw_l0s > acceptable_l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_DW; + link->aspm_capable &= ~PCIE_LINK_STATE_L0S_DW; /* * Check L1 latency. * Every switch on the path to root complex need 1 @@ -603,9 +622,9 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) * substate latencies (and hence do not do any check). */ latency = max_t(u32, latency_up_l1, latency_dw_l1); - if ((link->aspm_capable & ASPM_STATE_L1) && + if ((link->aspm_capable & PCIE_LINK_STATE_L1) && (latency + l1_switch_latency > acceptable_l1)) - link->aspm_capable &= ~ASPM_STATE_L1; + link->aspm_capable &= ~PCIE_LINK_STATE_L1; l1_switch_latency += NSEC_PER_USEC; link = link->parent; @@ -741,13 +760,13 @@ static void aspm_l1ss_init(struct pcie_link_state *link) child_l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1) - link->aspm_support |= ASPM_STATE_L1_1; + link->aspm_support |= PCIE_LINK_STATE_L1_1; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2) - link->aspm_support |= ASPM_STATE_L1_2; + link->aspm_support |= PCIE_LINK_STATE_L1_2; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1) - link->aspm_support |= ASPM_STATE_L1_1_PCIPM; + link->aspm_support |= PCIE_LINK_STATE_L1_1_PCIPM; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2) - link->aspm_support |= ASPM_STATE_L1_2_PCIPM; + link->aspm_support |= PCIE_LINK_STATE_L1_2_PCIPM; if (parent_l1ss_cap) pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, @@ -757,15 +776,15 @@ static void aspm_l1ss_init(struct pcie_link_state *link) &child_l1ss_ctl1); if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1) - link->aspm_enabled |= ASPM_STATE_L1_1; + link->aspm_enabled |= PCIE_LINK_STATE_L1_1; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2) - link->aspm_enabled |= ASPM_STATE_L1_2; + link->aspm_enabled |= PCIE_LINK_STATE_L1_2; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1) - link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM; + link->aspm_enabled |= PCIE_LINK_STATE_L1_1_PCIPM; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2) - link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM; + link->aspm_enabled |= PCIE_LINK_STATE_L1_2_PCIPM; - if (link->aspm_support & ASPM_STATE_L1_2_MASK) + if (link->aspm_support & PCIE_LINK_STATE_L1_2_MASK) aspm_calc_l12_info(link, parent_l1ss_cap, child_l1ss_cap); } @@ -778,8 +797,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) if (blacklist) { /* Set enabled/disable so that we will disable ASPM later */ - link->aspm_enabled = ASPM_STATE_ALL; - link->aspm_disable = ASPM_STATE_ALL; + link->aspm_enabled = PCIE_LINK_STATE_ASPM_ALL; + link->aspm_disable = PCIE_LINK_STATE_ASPM_ALL; return; } @@ -806,6 +825,15 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); + /* Disable L0s/L1 before updating L1SS config */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(child, PCI_EXP_LNKCTL, + child_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, + parent_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + } + /* * Setup L0s state * @@ -814,22 +842,29 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * support L0s. */ if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) - link->aspm_support |= ASPM_STATE_L0S; + link->aspm_support |= PCIE_LINK_STATE_L0S; if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) - link->aspm_enabled |= ASPM_STATE_L0S_UP; + link->aspm_enabled |= PCIE_LINK_STATE_L0S_UP; if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) - link->aspm_enabled |= ASPM_STATE_L0S_DW; + link->aspm_enabled |= PCIE_LINK_STATE_L0S_DW; /* Setup L1 state */ if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1) - link->aspm_support |= ASPM_STATE_L1; + link->aspm_support |= PCIE_LINK_STATE_L1; if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1) - link->aspm_enabled |= ASPM_STATE_L1; + link->aspm_enabled |= PCIE_LINK_STATE_L1; aspm_l1ss_init(link); + /* Restore L0s/L1 if they were enabled */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_lnkctl); + pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl); + } + /* Save default state */ link->aspm_default = link->aspm_enabled; @@ -846,25 +881,28 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) } } -/* Configure the ASPM L1 substates */ +/* Configure the ASPM L1 substates. Caller must disable L1 first. */ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) { - u32 val, enable_req; + u32 val; struct pci_dev *child = link->downstream, *parent = link->pdev; - enable_req = (link->aspm_enabled ^ state) & state; + val = 0; + if (state & PCIE_LINK_STATE_L1_1) + val |= PCI_L1SS_CTL1_ASPM_L1_1; + if (state & PCIE_LINK_STATE_L1_2) + val |= PCI_L1SS_CTL1_ASPM_L1_2; + if (state & PCIE_LINK_STATE_L1_1_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_1; + if (state & PCIE_LINK_STATE_L1_2_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_2; /* - * Here are the rules specified in the PCIe spec for enabling L1SS: - * - When enabling L1.x, enable bit at parent first, then at child - * - When disabling L1.x, disable bit at child first, then at parent - * - When enabling ASPM L1.x, need to disable L1 - * (at child followed by parent). - * - The ASPM/PCIPM L1.2 must be disabled while programming timing + * PCIe r6.2, sec 5.5.4, rules for enabling L1 PM Substates: + * - Clear L1.x enable bits at child first, then at parent + * - Set L1.x enable bits at parent first, then at child + * - ASPM/PCIPM L1.2 must be disabled while programming timing * parameters - * - * To keep it simple, disable all L1SS bits first, and later enable - * what is needed. */ /* Disable all L1 substates */ @@ -872,26 +910,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) PCI_L1SS_CTL1_L1SS_MASK, 0); pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, PCI_L1SS_CTL1_L1SS_MASK, 0); - /* - * If needed, disable L1, and it gets enabled later - * in pcie_config_aspm_link(). - */ - if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { - pcie_capability_clear_word(child, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - } - - val = 0; - if (state & ASPM_STATE_L1_1) - val |= PCI_L1SS_CTL1_ASPM_L1_1; - if (state & ASPM_STATE_L1_2) - val |= PCI_L1SS_CTL1_ASPM_L1_2; - if (state & ASPM_STATE_L1_1_PCIPM) - val |= PCI_L1SS_CTL1_PCIPM_L1_1; - if (state & ASPM_STATE_L1_2_PCIPM) - val |= PCI_L1SS_CTL1_PCIPM_L1_2; /* Enable what we need to enable */ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, @@ -916,43 +934,52 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) state &= (link->aspm_capable & ~link->aspm_disable); /* Can't enable any substates if L1 is not enabled */ - if (!(state & ASPM_STATE_L1)) - state &= ~ASPM_STATE_L1SS; + if (!(state & PCIE_LINK_STATE_L1)) + state &= ~PCIE_LINK_STATE_L1SS; /* Spec says both ports must be in D0 before enabling PCI PM substates*/ if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) { - state &= ~ASPM_STATE_L1_SS_PCIPM; - state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM); + state &= ~PCIE_LINK_STATE_L1_SS_PCIPM; + state |= (link->aspm_enabled & PCIE_LINK_STATE_L1_SS_PCIPM); } /* Nothing to do if the link is already in the requested state */ if (link->aspm_enabled == state) return; /* Convert ASPM state to upstream/downstream ASPM register state */ - if (state & ASPM_STATE_L0S_UP) + if (state & PCIE_LINK_STATE_L0S_UP) dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; - if (state & ASPM_STATE_L0S_DW) + if (state & PCIE_LINK_STATE_L0S_DW) upstream |= PCI_EXP_LNKCTL_ASPM_L0S; - if (state & ASPM_STATE_L1) { + if (state & PCIE_LINK_STATE_L1) { upstream |= PCI_EXP_LNKCTL_ASPM_L1; dwstream |= PCI_EXP_LNKCTL_ASPM_L1; } - if (link->aspm_capable & ASPM_STATE_L1SS) - pcie_config_aspm_l1ss(link, state); - /* - * Spec 2.0 suggests all functions should be configured the - * same setting for ASPM. Enabling ASPM L1 should be done in - * upstream component first and then downstream, and vice - * versa for disabling ASPM L1. Spec doesn't mention L0S. + * Per PCIe r6.2, sec 5.5.4, setting either or both of the enable + * bits for ASPM L1 PM Substates must be done while ASPM L1 is + * disabled. Disable L1 here and apply new configuration after L1SS + * configuration has been completed. + * + * Per sec 7.5.3.7, when disabling ASPM L1, software must disable + * it in the Downstream component prior to disabling it in the + * Upstream component, and ASPM L1 must be enabled in the Upstream + * component prior to enabling it in the Downstream component. + * + * Sec 7.5.3.7 also recommends programming the same ASPM Control + * value for all functions of a multi-function device. */ - if (state & ASPM_STATE_L1) - pcie_config_aspm_dev(parent, upstream); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_config_aspm_dev(child, 0); + pcie_config_aspm_dev(parent, 0); + + if (link->aspm_capable & PCIE_LINK_STATE_L1SS) + pcie_config_aspm_l1ss(link, state); + + pcie_config_aspm_dev(parent, upstream); list_for_each_entry(child, &linkbus->devices, bus_list) pcie_config_aspm_dev(child, dwstream); - if (!(state & ASPM_STATE_L1)) - pcie_config_aspm_dev(parent, upstream); link->aspm_enabled = state; @@ -1243,16 +1270,16 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) parent_link = link->parent; /* - * link->downstream is a pointer to the pci_dev of function 0. If - * we remove that function, the pci_dev is about to be deallocated, - * so we can't use link->downstream again. Free the link state to - * avoid this. + * Free the parent link state, no later than function 0 (i.e. + * link->downstream) being removed. * - * If we're removing a non-0 function, it's possible we could - * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends - * programming the same ASPM Control value for all functions of - * multi-function devices, so disable ASPM for all of them. + * Do not free the link state any earlier. If function 0 is a + * switch upstream port, this link state is parent_link to all + * subordinate ones. */ + if (pdev != link->downstream) + goto out; + pcie_config_aspm_link(link, 0); list_del(&link->sibling); free_link_state(link); @@ -1263,6 +1290,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) pcie_config_aspm_path(parent_link); } + out: mutex_unlock(&aspm_lock); up_read(&pci_bus_sem); } @@ -1324,6 +1352,28 @@ static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev) return bridge->link_state; } +static u8 pci_calc_aspm_disable_mask(int state) +{ + state &= ~PCIE_LINK_STATE_CLKPM; + + /* L1 PM substates require L1 */ + if (state & PCIE_LINK_STATE_L1) + state |= PCIE_LINK_STATE_L1SS; + + return state; +} + +static u8 pci_calc_aspm_enable_mask(int state) +{ + state &= ~PCIE_LINK_STATE_CLKPM; + + /* L1 PM substates require L1 */ + if (state & PCIE_LINK_STATE_L1SS) + state |= PCIE_LINK_STATE_L1; + + return state; +} + static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1346,19 +1396,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); - if (state & PCIE_LINK_STATE_L0S) - link->aspm_disable |= ASPM_STATE_L0S; - if (state & PCIE_LINK_STATE_L1) - /* L1 PM substates require L1 */ - link->aspm_disable |= ASPM_STATE_L1 | ASPM_STATE_L1SS; - if (state & PCIE_LINK_STATE_L1_1) - link->aspm_disable |= ASPM_STATE_L1_1; - if (state & PCIE_LINK_STATE_L1_2) - link->aspm_disable |= ASPM_STATE_L1_2; - if (state & PCIE_LINK_STATE_L1_1_PCIPM) - link->aspm_disable |= ASPM_STATE_L1_1_PCIPM; - if (state & PCIE_LINK_STATE_L1_2_PCIPM) - link->aspm_disable |= ASPM_STATE_L1_2_PCIPM; + link->aspm_disable |= pci_calc_aspm_disable_mask(state); pcie_config_aspm_link(link, policy_to_aspm_state(link)); if (state & PCIE_LINK_STATE_CLKPM) @@ -1414,20 +1452,7 @@ static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); - link->aspm_default = 0; - if (state & PCIE_LINK_STATE_L0S) - link->aspm_default |= ASPM_STATE_L0S; - if (state & PCIE_LINK_STATE_L1) - link->aspm_default |= ASPM_STATE_L1; - /* L1 PM substates require L1 */ - if (state & PCIE_LINK_STATE_L1_1) - link->aspm_default |= ASPM_STATE_L1_1 | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_2) - link->aspm_default |= ASPM_STATE_L1_2 | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_1_PCIPM) - link->aspm_default |= ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_2_PCIPM) - link->aspm_default |= ASPM_STATE_L1_2_PCIPM | ASPM_STATE_L1; + link->aspm_default = pci_calc_aspm_enable_mask(state); pcie_config_aspm_link(link, policy_to_aspm_state(link)); link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; @@ -1446,6 +1471,9 @@ static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) * touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable */ @@ -1462,6 +1490,9 @@ EXPORT_SYMBOL(pci_enable_link_state); * can't touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable * @@ -1563,12 +1594,12 @@ static ssize_t aspm_attr_store_common(struct device *dev, if (state_enable) { link->aspm_disable &= ~state; /* need to enable L1 for substates */ - if (state & ASPM_STATE_L1SS) - link->aspm_disable &= ~ASPM_STATE_L1; + if (state & PCIE_LINK_STATE_L1SS) + link->aspm_disable &= ~PCIE_LINK_STATE_L1; } else { link->aspm_disable |= state; - if (state & ASPM_STATE_L1) - link->aspm_disable |= ASPM_STATE_L1SS; + if (state & PCIE_LINK_STATE_L1) + link->aspm_disable |= PCIE_LINK_STATE_L1SS; } pcie_config_aspm_link(link, policy_to_aspm_state(link)); @@ -1582,12 +1613,12 @@ static ssize_t aspm_attr_store_common(struct device *dev, #define ASPM_ATTR(_f, _s) \ static ssize_t _f##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ -{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); } \ +{ return aspm_attr_show_common(dev, attr, buf, PCIE_LINK_STATE_##_s); } \ \ static ssize_t _f##_store(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t len) \ -{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); } +{ return aspm_attr_store_common(dev, attr, buf, len, PCIE_LINK_STATE_##_s); } ASPM_ATTR(l0s_aspm, L0S) ASPM_ATTR(l1_aspm, L1) @@ -1654,12 +1685,12 @@ static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj, struct pci_dev *pdev = to_pci_dev(dev); struct pcie_link_state *link = pcie_aspm_get_link(pdev); static const u8 aspm_state_map[] = { - ASPM_STATE_L0S, - ASPM_STATE_L1, - ASPM_STATE_L1_1, - ASPM_STATE_L1_2, - ASPM_STATE_L1_1_PCIPM, - ASPM_STATE_L1_2_PCIPM, + PCIE_LINK_STATE_L0S, + PCIE_LINK_STATE_L1, + PCIE_LINK_STATE_L1_1, + PCIE_LINK_STATE_L1_2, + PCIE_LINK_STATE_L1_1_PCIPM, + PCIE_LINK_STATE_L1_2_PCIPM, }; if (aspm_disabled || !link) diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c new file mode 100644 index 000000000000..36f939f23d34 --- /dev/null +++ b/drivers/pci/pcie/bwctrl.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PCIe bandwidth controller + * + * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> + * + * Copyright (C) 2019 Dell Inc + * Copyright (C) 2023-2024 Intel Corporation + * + * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds + * and notify the operating system when the Link Width or Speed changes. The + * notification capability is required for all Root Ports and Downstream + * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. + * + * This service port driver hooks into the Bandwidth Notification interrupt + * watching for changes or links becoming degraded in operation. It updates + * the cached Current Link Speed that is exposed to user space through sysfs. + */ + +#define dev_fmt(fmt) "bwctrl: " fmt + +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/pci-bwctrl.h> +#include <linux/rwsem.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "../pci.h" +#include "portdrv.h" + +/** + * struct pcie_bwctrl_data - PCIe bandwidth controller + * @set_speed_mutex: Serializes link speed changes + * @cdev: Thermal cooling device associated with the port + */ +struct pcie_bwctrl_data { + struct mutex set_speed_mutex; + struct thermal_cooling_device *cdev; +}; + +/* Prevent port removal during Link Speed changes. */ +static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); + +static bool pcie_valid_speed(enum pci_bus_speed speed) +{ + return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); +} + +static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) +{ + static const u8 speed_conv[] = { + [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, + [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, + [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, + [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, + [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, + [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, + }; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed))) + return 0; + + return speed_conv[speed]; +} + +static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) +{ + return __fls(supported_speeds); +} + +/** + * pcie_bwctrl_select_speed - Select Target Link Speed + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * + * Select Target Link Speed by take into account Supported Link Speeds of + * both the Root Port and the Endpoint. + * + * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) + */ +static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) +{ + struct pci_bus *bus = port->subordinate; + u8 desired_speeds, supported_speeds; + struct pci_dev *dev; + + desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), + __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); + + supported_speeds = port->supported_speeds; + if (bus) { + down_read(&pci_bus_sem); + dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); + if (dev) + supported_speeds &= dev->supported_speeds; + up_read(&pci_bus_sem); + } + if (!supported_speeds) + supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB; + + return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); +} + +static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) +{ + int ret; + + ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, target_speed); + if (ret != PCIBIOS_SUCCESSFUL) + return pcibios_err_to_errno(ret); + + return pcie_retrain_link(port, use_lt); +} + +/** + * pcie_set_target_speed - Set downstream Link Speed for PCIe Port + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training + * + * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be + * adjusted downwards to the best speed supported by both the Port and PCIe + * Device underneath it. + * + * Return: + * * 0 - on success + * * -EINVAL - @speed_req is not a PCIe Link Speed + * * -ENODEV - @port is not controllable + * * -ETIMEDOUT - changing Link Speed took too long + * * -EAGAIN - Link Speed was changed but @speed_req was not achieved + */ +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt) +{ + struct pci_bus *bus = port->subordinate; + u16 target_speed; + int ret; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) + return -EINVAL; + + if (bus && bus->cur_bus_speed == speed_req) + return 0; + + target_speed = pcie_bwctrl_select_speed(port, speed_req); + + scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { + struct pcie_bwctrl_data *data = port->link_bwctrl; + + /* + * port->link_bwctrl is NULL during initial scan when called + * e.g. from the Target Speed quirk. + */ + if (data) + mutex_lock(&data->set_speed_mutex); + + ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + + if (data) + mutex_unlock(&data->set_speed_mutex); + } + + /* + * Despite setting higher speed into the Target Link Speed, empty + * bus won't train to 5GT+ speeds. + */ + if (!ret && bus && bus->cur_bus_speed != speed_req && + !list_empty(&bus->devices)) + ret = -EAGAIN; + + return ret; +} + +static void pcie_bwnotif_enable(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + u16 link_status; + int ret; + + /* Note if LBMS has been seen so far */ + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); + + pcie_capability_set_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + + /* + * Update after enabling notifications & clearing status bits ensures + * link speed is up to date. + */ + pcie_update_link_speed(port->subordinate); +} + +static void pcie_bwnotif_disable(struct pci_dev *port) +{ + pcie_capability_clear_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); +} + +static irqreturn_t pcie_bwnotif_irq(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pci_dev *port = srv->port; + u16 link_status, events; + int ret; + + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret != PCIBIOS_SUCCESSFUL) + return IRQ_NONE; + + events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + if (!events) + return IRQ_NONE; + + if (events & PCI_EXP_LNKSTA_LBMS) + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); + + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + + /* + * Interrupts will not be triggered from any further Link Speed + * change until LBMS is cleared by the write. Therefore, re-read the + * speed (inside pcie_update_link_speed()) after LBMS has been + * cleared to avoid missing link speed changes. + */ + pcie_update_link_speed(port->subordinate); + + return IRQ_HANDLED; +} + +void pcie_reset_lbms(struct pci_dev *port) +{ + clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); +} + +static int pcie_bwnotif_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + int ret; + + /* Can happen if we run out of bus numbers during enumeration. */ + if (!port->subordinate) + return -ENODEV; + + struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, + sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); + if (ret) + return ret; + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + port->link_bwctrl = data; + + ret = request_irq(srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) { + port->link_bwctrl = NULL; + return ret; + } + + pcie_bwnotif_enable(srv); + } + + pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + + /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + if (IS_ERR(port->link_bwctrl->cdev)) + port->link_bwctrl->cdev = NULL; + + return 0; +} + +static void pcie_bwnotif_remove(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + + pcie_cooling_device_unregister(data->cdev); + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + pcie_bwnotif_disable(srv->port); + + free_irq(srv->irq, srv); + + srv->port->link_bwctrl = NULL; + } +} + +static int pcie_bwnotif_suspend(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + return 0; +} + +static int pcie_bwnotif_resume(struct pcie_device *srv) +{ + pcie_bwnotif_enable(srv); + return 0; +} + +static struct pcie_port_service_driver pcie_bwctrl_driver = { + .name = "pcie_bwctrl", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_BWCTRL, + .probe = pcie_bwnotif_probe, + .suspend = pcie_bwnotif_suspend, + .resume = pcie_bwnotif_resume, + .remove = pcie_bwnotif_remove, +}; + +int __init pcie_bwctrl_init(void) +{ + return pcie_port_service_register(&pcie_bwctrl_driver); +} diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index a668820696dc..fc18349614d7 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -190,7 +190,7 @@ out: static void dpc_process_rp_pio_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, dpc_status, first_error; - u32 status, mask, sev, syserr, exc, log, prefix; + u32 status, mask, sev, syserr, exc, log; struct pcie_tlp_log tlp_log; int i; @@ -215,22 +215,20 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) first_error == i ? " (First)" : ""); } - if (pdev->dpc_rp_log_size < 4) + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG) goto clear_status; - pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, &tlp_log); - pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", - tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]); - - if (pdev->dpc_rp_log_size < 5) + pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, + dpc_tlp_log_len(pdev), + pdev->subordinate->flit_mode, + &tlp_log); + pcie_print_tlp_log(pdev, &tlp_log, KERN_ERR, dev_fmt("")); + + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1) goto clear_status; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); - for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { - pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, &prefix); - pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); - } clear_status: pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); } @@ -254,46 +252,59 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, else info->severity = AER_NONFATAL; + info->level = KERN_ERR; + + info->dev[0] = dev; + info->error_dev_num = 1; + return 1; } void dpc_process_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; - struct aer_err_info info; + struct aer_err_info info = {}; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); - pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); - - pci_info(pdev, "containment event, status:%#06x source:%#06x\n", - status, source); reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN; - ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; - pci_warn(pdev, "%s detected\n", - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ? - "unmasked uncorrectable error" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ? - "ERR_NONFATAL" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? - "ERR_FATAL" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? - "RP PIO error" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? - "software trigger" : - "reserved error"); - - /* show RP PIO error detail information */ - if (pdev->dpc_rp_extensions && - reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT && - ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) - dpc_process_rp_pio_error(pdev); - else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && - dpc_get_aer_uncorrect_severity(pdev, &info) && - aer_get_device_error_info(pdev, &info)) { - aer_print_error(pdev, &info); - pci_aer_clear_nonfatal_status(pdev); - pci_aer_clear_fatal_status(pdev); + + switch (reason) { + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR: + pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", + status); + if (dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(&info, 0)) { + aer_print_error(&info, 0); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE: + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE: + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, + &source); + pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n", + status, + (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? + "ERR_FATAL" : "ERR_NONFATAL", + pci_domain_nr(pdev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source)); + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT: + ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; + pci_warn(pdev, "containment event, status:%#06x: %s detected\n", + status, + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? + "RP PIO error" : + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? + "software trigger" : + "reserved error"); + /* show RP PIO error detail information */ + if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO && + pdev->dpc_rp_extensions) + dpc_process_rp_pio_error(pdev); + break; } } @@ -402,9 +413,21 @@ void pci_dpc_init(struct pci_dev *pdev) /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ if (!pdev->dpc_rp_log_size) { + u16 flags; + int ret; + + ret = pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &flags); + if (ret) + return; + pdev->dpc_rp_log_size = FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap); - if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { + if (FIELD_GET(PCI_EXP_FLAGS_FLIT, flags)) + pdev->dpc_rp_log_size += FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE4, + cap) << 4; + + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG || + pdev->dpc_rp_log_size > PCIE_STD_MAX_TLP_HEADERLOG + 1) { pci_err(pdev, "RP PIO log size %u is invalid\n", pdev->dpc_rp_log_size); pdev->dpc_rp_log_size = 0; @@ -412,13 +435,44 @@ void pci_dpc_init(struct pci_dev *pdev) } } +static void dpc_enable(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + int dpc = pdev->dpc_cap; + u16 ctl; + + /* + * Clear DPC Interrupt Status so we don't get an interrupt for an + * old event when setting DPC Interrupt Enable. + */ + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_INTERRUPT); + + pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~PCI_EXP_DPC_CTL_EN_MASK; + ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); +} + +static void dpc_disable(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + int dpc = pdev->dpc_cap; + u16 ctl; + + /* Disable DPC triggering and DPC interrupts */ + pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); +} + #define FLAG(x, y) (((x) & (y)) ? '+' : '-') static int dpc_probe(struct pcie_device *dev) { struct pci_dev *pdev = dev->port; struct device *device = &dev->device; int status; - u16 ctl, cap; + u16 cap; if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native) return -ENOTSUPP; @@ -433,11 +487,7 @@ static int dpc_probe(struct pcie_device *dev) } pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); - - pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~PCI_EXP_DPC_CTL_EN_MASK; - ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; - pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); + dpc_enable(dev); pci_info(pdev, "enabled with IRQ %d\n", dev->irq); pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -450,14 +500,21 @@ static int dpc_probe(struct pcie_device *dev) return status; } -static void dpc_remove(struct pcie_device *dev) +static int dpc_suspend(struct pcie_device *dev) { - struct pci_dev *pdev = dev->port; - u16 ctl; + dpc_disable(dev); + return 0; +} - pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); - pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); +static int dpc_resume(struct pcie_device *dev) +{ + dpc_enable(dev); + return 0; +} + +static void dpc_remove(struct pcie_device *dev) +{ + dpc_disable(dev); } static struct pcie_port_service_driver dpcdriver = { @@ -465,6 +522,8 @@ static struct pcie_port_service_driver dpcdriver = { .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, + .suspend = dpc_suspend, + .resume = dpc_resume, .remove = dpc_remove, }; diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c index 5f4914d313a1..e86298dbbcff 100644 --- a/drivers/pci/pcie/edr.c +++ b/drivers/pci/pcie/edr.c @@ -32,10 +32,10 @@ static int acpi_enable_dpc(struct pci_dev *pdev) int status = 0; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * Per PCI Firmware r3.3, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is + * optional. Return success if it's not implemented. */ - if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 6, 1ULL << EDR_PORT_DPC_ENABLE_DSM)) return 0; @@ -46,12 +46,7 @@ static int acpi_enable_dpc(struct pci_dev *pdev) argv4.package.count = 1; argv4.package.elements = &req; - /* - * Per Downstream Port Containment Related Enhancements ECN to PCI - * Firmware Specification r3.2, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is - * optional. Return success if it's not implemented. - */ - obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 6, EDR_PORT_DPC_ENABLE_DSM, &argv4); if (!obj) return 0; @@ -85,8 +80,9 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) u16 port; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * If EDR_PORT_LOCATE_DSM is not implemented under the target of + * EDR, the target is the port that experienced the containment + * event (PCI Firmware r3.3, sec 4.6.13). */ if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, 1ULL << EDR_PORT_LOCATE_DSM)) @@ -104,6 +100,16 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) } /* + * Bit 31 represents the success/failure of the operation. If bit + * 31 is set, the operation failed. + */ + if (obj->integer.value & BIT(31)) { + ACPI_FREE(obj); + pci_err(pdev, "Locate Port _DSM failed\n"); + return NULL; + } + + /* * Firmware returns DPC port BDF details in following format: * 15:8 = bus * 7:3 = device diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 705893b5f7b0..de6381c690f5 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -116,9 +116,7 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; - if (!pdrv || - !pdrv->err_handler || - !pdrv->err_handler->mmio_enabled) + if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->mmio_enabled) goto out; err_handler = pdrv->err_handler; @@ -137,9 +135,7 @@ static int report_slot_reset(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; - if (!pdrv || - !pdrv->err_handler || - !pdrv->err_handler->slot_reset) + if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->slot_reset) goto out; err_handler = pdrv->err_handler; @@ -158,9 +154,7 @@ static int report_resume(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || - !pdrv || - !pdrv->err_handler || - !pdrv->err_handler->resume) + !pdrv || !pdrv->err_handler || !pdrv->err_handler->resume) goto out; err_handler = pdrv->err_handler; @@ -277,7 +271,6 @@ failed: pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); - /* TODO: Should kernel panic here? */ pci_info(bridge, "device recovery failed\n"); return status; diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index 14a4b89a3b83..e8318fd5f6ed 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16); nvec = *pme + 1; @@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) /* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_irq = pci_irq_vector(dev, pme); irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq; irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq; - irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq; } if (mask & PCIE_PORT_SERVICE_AER) @@ -187,15 +187,15 @@ static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask) * interrupt. */ if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) - goto legacy_irq; + goto intx_irq; /* Try to use MSI-X or MSI if supported */ if (pcie_port_enable_irq_vec(dev, irqs, mask) == 0) return 0; -legacy_irq: - /* fall back to legacy IRQ */ - ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); +intx_irq: + /* fall back to INTX IRQ */ + ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_INTX); if (ret < 0) return -ENODEV; @@ -228,10 +228,12 @@ static int get_port_device_capability(struct pci_dev *dev) /* * Disable hot-plug interrupts in case they have been enabled - * by the BIOS and the hot-plug service driver is not loaded. + * by the BIOS and the hot-plug service driver won't be loaded + * to handle them. */ - pcie_capability_clear_word(dev, PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE); + if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + pcie_capability_clear_word(dev, PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE); } #ifdef CONFIG_PCIEAER @@ -265,13 +267,15 @@ static int get_port_device_capability(struct pci_dev *dev) (pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER))) services |= PCIE_PORT_SERVICE_DPC; + /* Enable bandwidth control if more than one speed is supported. */ if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) { u32 linkcap; pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); - if (linkcap & PCI_EXP_LNKCAP_LBNC) - services |= PCIE_PORT_SERVICE_BWNOTIF; + if (linkcap & PCI_EXP_LNKCAP_LBNC && + hweight8(dev->supported_speeds) > 1) + services |= PCIE_PORT_SERVICE_BWCTRL; } return services; @@ -786,7 +790,7 @@ static const struct pci_error_handlers pcie_portdrv_err_handler = { static struct pci_driver pcie_portdriver = { .name = "pcieport", - .id_table = &port_pci_ids[0], + .id_table = port_pci_ids, .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, @@ -828,6 +832,7 @@ static void __init pcie_init_services(void) pcie_aer_init(); pcie_pme_init(); pcie_dpc_init(); + pcie_bwctrl_init(); pcie_hp_init(); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 12c89ea0313b..bd29d1cc7b8b 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -20,8 +20,8 @@ #define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) #define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ #define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) -#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */ -#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT) +#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */ +#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT) #define PCIE_PORT_DEVICE_MAXSERVICES 5 @@ -51,6 +51,8 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +int pcie_bwctrl_init(void); + /* Port Type */ #define PCIE_ANY_PORT (~0) diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index 7cfb6c0d5dcb..4bd73f038ffb 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -5,6 +5,7 @@ */ #include <linux/bitfield.h> +#include <linux/debugfs.h> #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> @@ -252,3 +253,304 @@ bool pcie_ptm_enabled(struct pci_dev *dev) return dev->ptm_enabled; } EXPORT_SYMBOL(pcie_ptm_enabled); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static ssize_t context_update_write(struct file *file, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct pci_ptm_debugfs *ptm_debugfs = file->private_data; + char buf[7]; + int ret; + u8 mode; + + if (!ptm_debugfs->ops->context_update_write) + return -EOPNOTSUPP; + + if (count < 1 || count >= sizeof(buf)) + return -EINVAL; + + ret = copy_from_user(buf, ubuf, count); + if (ret) + return -EFAULT; + + buf[count] = '\0'; + + if (sysfs_streq(buf, "auto")) + mode = PCIE_PTM_CONTEXT_UPDATE_AUTO; + else if (sysfs_streq(buf, "manual")) + mode = PCIE_PTM_CONTEXT_UPDATE_MANUAL; + else + return -EINVAL; + + mutex_lock(&ptm_debugfs->lock); + ret = ptm_debugfs->ops->context_update_write(ptm_debugfs->pdata, mode); + mutex_unlock(&ptm_debugfs->lock); + if (ret) + return ret; + + return count; +} + +static ssize_t context_update_read(struct file *file, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct pci_ptm_debugfs *ptm_debugfs = file->private_data; + char buf[8]; /* Extra space for NULL termination at the end */ + ssize_t pos; + u8 mode; + + if (!ptm_debugfs->ops->context_update_read) + return -EOPNOTSUPP; + + mutex_lock(&ptm_debugfs->lock); + ptm_debugfs->ops->context_update_read(ptm_debugfs->pdata, &mode); + mutex_unlock(&ptm_debugfs->lock); + + if (mode == PCIE_PTM_CONTEXT_UPDATE_AUTO) + pos = scnprintf(buf, sizeof(buf), "auto\n"); + else + pos = scnprintf(buf, sizeof(buf), "manual\n"); + + return simple_read_from_buffer(ubuf, count, ppos, buf, pos); +} + +static const struct file_operations context_update_fops = { + .open = simple_open, + .read = context_update_read, + .write = context_update_write, +}; + +static int context_valid_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + bool valid; + int ret; + + if (!ptm_debugfs->ops->context_valid_read) + return -EOPNOTSUPP; + + mutex_lock(&ptm_debugfs->lock); + ret = ptm_debugfs->ops->context_valid_read(ptm_debugfs->pdata, &valid); + mutex_unlock(&ptm_debugfs->lock); + if (ret) + return ret; + + *val = valid; + + return 0; +} + +static int context_valid_set(void *data, u64 val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + int ret; + + if (!ptm_debugfs->ops->context_valid_write) + return -EOPNOTSUPP; + + mutex_lock(&ptm_debugfs->lock); + ret = ptm_debugfs->ops->context_valid_write(ptm_debugfs->pdata, !!val); + mutex_unlock(&ptm_debugfs->lock); + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(context_valid_fops, context_valid_get, + context_valid_set, "%llu\n"); + +static int local_clock_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->local_clock_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->local_clock_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(local_clock_fops, local_clock_get, NULL, "%llu\n"); + +static int master_clock_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->master_clock_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->master_clock_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(master_clock_fops, master_clock_get, NULL, "%llu\n"); + +static int t1_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->t1_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->t1_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(t1_fops, t1_get, NULL, "%llu\n"); + +static int t2_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->t2_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->t2_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(t2_fops, t2_get, NULL, "%llu\n"); + +static int t3_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->t3_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->t3_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(t3_fops, t3_get, NULL, "%llu\n"); + +static int t4_get(void *data, u64 *val) +{ + struct pci_ptm_debugfs *ptm_debugfs = data; + u64 clock; + int ret; + + if (!ptm_debugfs->ops->t4_read) + return -EOPNOTSUPP; + + ret = ptm_debugfs->ops->t4_read(ptm_debugfs->pdata, &clock); + if (ret) + return ret; + + *val = clock; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(t4_fops, t4_get, NULL, "%llu\n"); + +#define pcie_ptm_create_debugfs_file(pdata, mode, attr) \ + do { \ + if (ops->attr##_visible && ops->attr##_visible(pdata)) \ + debugfs_create_file(#attr, mode, ptm_debugfs->debugfs, \ + ptm_debugfs, &attr##_fops); \ + } while (0) + +/* + * pcie_ptm_create_debugfs() - Create debugfs entries for the PTM context + * @dev: PTM capable component device + * @pdata: Private data of the PTM capable component device + * @ops: PTM callback structure + * + * Create debugfs entries for exposing the PTM context of the PTM capable + * components such as Root Complex and Endpoint controllers. + * + * Return: Pointer to 'struct pci_ptm_debugfs' if success, NULL otherwise. + */ +struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata, + const struct pcie_ptm_ops *ops) +{ + struct pci_ptm_debugfs *ptm_debugfs; + char *dirname; + int ret; + + /* Caller must provide check_capability() callback */ + if (!ops->check_capability) + return NULL; + + /* Check for PTM capability before creating debugfs attrbutes */ + ret = ops->check_capability(pdata); + if (!ret) { + dev_dbg(dev, "PTM capability not present\n"); + return NULL; + } + + ptm_debugfs = kzalloc(sizeof(*ptm_debugfs), GFP_KERNEL); + if (!ptm_debugfs) + return NULL; + + dirname = devm_kasprintf(dev, GFP_KERNEL, "pcie_ptm_%s", dev_name(dev)); + if (!dirname) + return NULL; + + ptm_debugfs->debugfs = debugfs_create_dir(dirname, NULL); + ptm_debugfs->pdata = pdata; + ptm_debugfs->ops = ops; + mutex_init(&ptm_debugfs->lock); + + pcie_ptm_create_debugfs_file(pdata, 0644, context_update); + pcie_ptm_create_debugfs_file(pdata, 0644, context_valid); + pcie_ptm_create_debugfs_file(pdata, 0444, local_clock); + pcie_ptm_create_debugfs_file(pdata, 0444, master_clock); + pcie_ptm_create_debugfs_file(pdata, 0444, t1); + pcie_ptm_create_debugfs_file(pdata, 0444, t2); + pcie_ptm_create_debugfs_file(pdata, 0444, t3); + pcie_ptm_create_debugfs_file(pdata, 0444, t4); + + return ptm_debugfs; +} +EXPORT_SYMBOL_GPL(pcie_ptm_create_debugfs); + +/* + * pcie_ptm_destroy_debugfs() - Destroy debugfs entries for the PTM context + * @ptm_debugfs: Pointer to the PTM debugfs struct + */ +void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) +{ + if (!ptm_debugfs) + return; + + mutex_destroy(&ptm_debugfs->lock); + debugfs_remove_recursive(ptm_debugfs->debugfs); +} +EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs); +#endif diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c new file mode 100644 index 000000000000..71f8fc9ea2ed --- /dev/null +++ b/drivers/pci/pcie/tlp.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe TLP Log handling + * + * Copyright (C) 2024 Intel Corporation + */ + +#include <linux/aer.h> +#include <linux/array_size.h> +#include <linux/bitfield.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include "../pci.h" + +/** + * aer_tlp_log_len - Calculate AER Capability TLP Header/Prefix Log length + * @dev: PCIe device + * @aercc: AER Capabilities and Control register value + * + * Return: TLP Header/Prefix Log length + */ +unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc) +{ + if (aercc & PCI_ERR_CAP_TLP_LOG_FLIT) + return FIELD_GET(PCI_ERR_CAP_TLP_LOG_SIZE, aercc); + + return PCIE_STD_NUM_TLP_HEADERLOG + + ((aercc & PCI_ERR_CAP_PREFIX_LOG_PRESENT) ? + dev->eetlp_prefix_max : 0); +} + +#ifdef CONFIG_PCIE_DPC +/** + * dpc_tlp_log_len - Calculate DPC RP PIO TLP Header/Prefix Log length + * @dev: PCIe device + * + * Return: TLP Header/Prefix Log length + */ +unsigned int dpc_tlp_log_len(struct pci_dev *dev) +{ + /* Remove ImpSpec Log register from the count */ + if (dev->dpc_rp_log_size >= PCIE_STD_NUM_TLP_HEADERLOG + 1) + return dev->dpc_rp_log_size - 1; + + return dev->dpc_rp_log_size; +} +#endif + +/** + * pcie_read_tlp_log - read TLP Header Log + * @dev: PCIe device + * @where: PCI Config offset of TLP Header Log + * @where2: PCI Config offset of TLP Prefix Log + * @tlp_len: TLP Log length (Header Log + TLP Prefix Log in DWORDs) + * @flit: TLP Logged in Flit mode + * @log: TLP Log structure to fill + * + * Fill @log from TLP Header Log registers, e.g., AER or DPC. + * + * Return: 0 on success and filled TLP Log structure, <0 on error. + */ +int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, + unsigned int tlp_len, bool flit, struct pcie_tlp_log *log) +{ + unsigned int i; + int off, ret; + + if (tlp_len > ARRAY_SIZE(log->dw)) + tlp_len = ARRAY_SIZE(log->dw); + + memset(log, 0, sizeof(*log)); + + for (i = 0; i < tlp_len; i++) { + if (i < PCIE_STD_NUM_TLP_HEADERLOG) + off = where + i * 4; + else + off = where2 + (i - PCIE_STD_NUM_TLP_HEADERLOG) * 4; + + ret = pci_read_config_dword(dev, off, &log->dw[i]); + if (ret) + return pcibios_err_to_errno(ret); + } + + /* + * Hard-code non-Flit mode to 4 DWORDs, for now. The exact length + * can only be known if the TLP is parsed. + */ + log->header_len = flit ? tlp_len : 4; + log->flit = flit; + + return 0; +} + +#define EE_PREFIX_STR " E-E Prefixes:" + +/** + * pcie_print_tlp_log - Print TLP Header / Prefix Log contents + * @dev: PCIe device + * @log: TLP Log structure + * @level: Printk log level + * @pfx: String prefix + * + * Prints TLP Header and Prefix Log information held by @log. + */ +void pcie_print_tlp_log(const struct pci_dev *dev, + const struct pcie_tlp_log *log, const char *level, + const char *pfx) +{ + /* EE_PREFIX_STR fits the extended DW space needed for the Flit mode */ + char buf[11 * PCIE_STD_MAX_TLP_HEADERLOG + 1]; + unsigned int i; + int len; + + len = scnprintf(buf, sizeof(buf), "%#010x %#010x %#010x %#010x", + log->dw[0], log->dw[1], log->dw[2], log->dw[3]); + + if (log->flit) { + for (i = PCIE_STD_NUM_TLP_HEADERLOG; i < log->header_len; i++) { + len += scnprintf(buf + len, sizeof(buf) - len, + " %#010x", log->dw[i]); + } + } else { + if (log->prefix[0]) + len += scnprintf(buf + len, sizeof(buf) - len, + EE_PREFIX_STR); + for (i = 0; i < ARRAY_SIZE(log->prefix); i++) { + if (!log->prefix[i]) + break; + len += scnprintf(buf + len, sizeof(buf) - len, + " %#010x", log->prefix[i]); + } + } + + dev_printk(level, &dev->dev, "%sTLP Header%s: %s\n", pfx, + log->flit ? " (Flit)" : "", buf); +} |