diff options
Diffstat (limited to 'drivers/pci/pcie')
-rw-r--r-- | drivers/pci/pcie/Kconfig | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/Makefile | 6 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 66 | ||||
-rw-r--r-- | drivers/pci/pcie/aer_inject.c | 6 | ||||
-rw-r--r-- | drivers/pci/pcie/aspm.c | 554 | ||||
-rw-r--r-- | drivers/pci/pcie/bwctrl.c | 373 | ||||
-rw-r--r-- | drivers/pci/pcie/dpc.c | 152 | ||||
-rw-r--r-- | drivers/pci/pcie/edr.c | 28 | ||||
-rw-r--r-- | drivers/pci/pcie/err.c | 32 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv.c | 23 | ||||
-rw-r--r-- | drivers/pci/pcie/portdrv.h | 8 | ||||
-rw-r--r-- | drivers/pci/pcie/tlp.c | 115 |
12 files changed, 1136 insertions, 229 deletions
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 8999fcebde6a..17919b99fa66 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -47,7 +47,7 @@ config PCIEAER_INJECT error injection can fake almost all kinds of errors with the help of a user space helper tool aer-inject, which can be gotten from: - https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + https://github.com/intel/aer-inject.git config PCIEAER_CXL bool "PCI Express CXL RAS support" diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 8de4ed5f98f1..173829aa02e6 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -4,10 +4,10 @@ pcieportdrv-y := portdrv.o rcec.o -obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o +obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o -obj-$(CONFIG_PCIEASPM) += aspm.o -obj-$(CONFIG_PCIEAER) += aer.o err.o +obj-y += aspm.o +obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 05fc30bb5134..508474e17183 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -180,7 +180,8 @@ static int disable_ecrc_checking(struct pci_dev *dev) } /** - * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based on global policy + * pcie_set_ecrc_checking - set/unset PCIe ECRC checking for a device based + * on global policy * @dev: the PCI device */ void pcie_set_ecrc_checking(struct pci_dev *dev) @@ -230,7 +231,7 @@ int pcie_aer_is_native(struct pci_dev *dev) return pcie_ports_native || host->native_aer; } -EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, CXL); +EXPORT_SYMBOL_NS_GPL(pcie_aer_is_native, "CXL"); static int pci_enable_pcie_error_reporting(struct pci_dev *dev) { @@ -664,13 +665,6 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, } } -static void __print_tlp_header(struct pci_dev *dev, - struct aer_header_log_regs *t) -{ - pci_err(dev, " TLP Header: %08x %08x %08x %08x\n", - t->dw0, t->dw1, t->dw2, t->dw3); -} - static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { @@ -725,7 +719,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) __aer_print_error(dev, info); if (info->tlp_header_valid) - __print_tlp_header(dev, &info->tlp); + pcie_print_tlp_log(dev, &info->tlp, dev_fmt(" ")); out: if (info->id && info->error_dev_num > 1 && info->id == id) @@ -797,12 +791,12 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, aer->uncor_severity); if (tlp_header_valid) - __print_tlp_header(dev, &aer->header_log); + pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); trace_aer_event(dev_name(&dev->dev), (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); } -EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL); +EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); /** * add_error_device - list device to be handled @@ -1149,14 +1143,16 @@ static void aer_recover_work_func(struct work_struct *work) continue; } pci_print_aer(pdev, entry.severity, entry.regs); + /* - * Memory for aer_capability_regs(entry.regs) is being allocated from the - * ghes_estatus_pool to protect it from overwriting when multiple sections - * are present in the error status. Thus free the same after processing - * the data. + * Memory for aer_capability_regs(entry.regs) is being + * allocated from the ghes_estatus_pool to protect it from + * overwriting when multiple sections are present in the + * error status. Thus free the same after processing the + * data. */ ghes_estatus_pool_region_free((unsigned long)entry.regs, - sizeof(struct aer_capability_regs)); + sizeof(struct aer_capability_regs)); if (entry.severity == AER_NONFATAL) pcie_do_recovery(pdev, pci_channel_io_normal, @@ -1210,7 +1206,7 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) { int type = pci_pcie_type(dev); int aer = dev->aer_cap; - int temp; + u32 aercc; /* Must reset in this function */ info->status = 0; @@ -1241,19 +1237,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) return 0; /* Get First Error Pointer */ - pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp); - info->first_error = PCI_ERR_CAP_FEP(temp); + pci_read_config_dword(dev, aer + PCI_ERR_CAP, &aercc); + info->first_error = PCI_ERR_CAP_FEP(aercc); if (info->status & AER_LOG_TLP_MASKS) { info->tlp_header_valid = 1; - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); - pci_read_config_dword(dev, - aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); + pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, + aer + PCI_ERR_PREFIX_LOG, + aer_tlp_log_len(dev, aercc), + &info->tlp); } } @@ -1505,6 +1497,22 @@ static int aer_probe(struct pcie_device *dev) return 0; } +static int aer_suspend(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + aer_disable_rootport(rpc); + return 0; +} + +static int aer_resume(struct pcie_device *dev) +{ + struct aer_rpc *rpc = get_service_data(dev); + + aer_enable_rootport(rpc); + return 0; +} + /** * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP * @dev: pointer to Root Port, RCEC, or RCiEP @@ -1569,6 +1577,8 @@ static struct pcie_port_service_driver aerdriver = { .service = PCIE_PORT_SERVICE_AER, .probe = aer_probe, + .suspend = aer_suspend, + .resume = aer_resume, .remove = aer_remove, }; diff --git a/drivers/pci/pcie/aer_inject.c b/drivers/pci/pcie/aer_inject.c index 2dab275d252f..91acc7b17f68 100644 --- a/drivers/pci/pcie/aer_inject.c +++ b/drivers/pci/pcie/aer_inject.c @@ -6,7 +6,7 @@ * trigger various real hardware errors. Software based error * injection can fake almost all kinds of errors with the help of a * user space helper tool aer-inject, which can be gotten from: - * https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ + * https://github.com/intel/aer-inject.git * * Copyright 2009 Intel Corporation. * Huang Ying <ying.huang@intel.com> @@ -430,7 +430,7 @@ static int aer_inject(struct aer_error_inj *einj) else rperr->root_status |= PCI_ERR_ROOT_COR_RCV; rperr->source_id &= 0xffff0000; - rperr->source_id |= (einj->bus << 8) | devfn; + rperr->source_id |= PCI_DEVID(einj->bus, devfn); } if (einj->uncor_status) { if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) @@ -443,7 +443,7 @@ static int aer_inject(struct aer_error_inj *einj) rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; rperr->source_id &= 0x0000ffff; - rperr->source_id |= ((einj->bus << 8) | devfn) << 16; + rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16; } spin_unlock_irqrestore(&inject_lock, flags); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index bc0bd86695ec..da3e7edcf49d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -8,6 +8,8 @@ */ #include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/limits.h> #include <linux/math.h> @@ -24,26 +26,203 @@ #include "../pci.h" +void pci_save_ltr_state(struct pci_dev *dev) +{ + int ltr; + struct pci_cap_saved_state *save_state; + u32 *cap; + + if (!pci_is_pcie(dev)) + return; + + ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!ltr) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR); + if (!save_state) { + pci_err(dev, "no suspend buffer for LTR; ASPM issues possible after resume\n"); + return; + } + + /* Some broken devices only support dword access to LTR */ + cap = &save_state->cap.data[0]; + pci_read_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, cap); +} + +void pci_restore_ltr_state(struct pci_dev *dev) +{ + struct pci_cap_saved_state *save_state; + int ltr; + u32 *cap; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_LTR); + ltr = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_LTR); + if (!save_state || !ltr) + return; + + /* Some broken devices only support dword access to LTR */ + cap = &save_state->cap.data[0]; + pci_write_config_dword(dev, ltr + PCI_LTR_MAX_SNOOP_LAT, *cap); +} + +void pci_configure_aspm_l1ss(struct pci_dev *pdev) +{ + int rc; + + pdev->l1ss = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_L1SS); + + rc = pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_L1SS, + 2 * sizeof(u32)); + if (rc) + pci_err(pdev, "unable to allocate ASPM L1SS save buffer (%pe)\n", + ERR_PTR(rc)); +} + +void pci_save_aspm_l1ss_state(struct pci_dev *pdev) +{ + struct pci_dev *parent = pdev->bus->self; + struct pci_cap_saved_state *save_state; + u32 *cap; + + /* + * If this is a Downstream Port, we never restore the L1SS state + * directly; we only restore it when we restore the state of the + * Upstream Port below it. + */ + if (pcie_downstream_port(pdev) || !parent) + return; + + if (!pdev->l1ss || !parent->l1ss) + return; + + /* + * Save L1 substate configuration. The ASPM L0s/L1 configuration + * in PCI_EXP_LNKCTL_ASPMC is saved by pci_save_pcie_state(). + */ + save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_L1SS); + if (!save_state) + return; + + cap = &save_state->cap.data[0]; + pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); + pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); + + /* + * Save parent's L1 substate configuration so we have it for + * pci_restore_aspm_l1ss_state(pdev) to restore. + */ + save_state = pci_find_saved_ext_cap(parent, PCI_EXT_CAP_ID_L1SS); + if (!save_state) + return; + + cap = &save_state->cap.data[0]; + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, cap++); + pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, cap++); +} + +void pci_restore_aspm_l1ss_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *pl_save_state, *cl_save_state; + struct pci_dev *parent = pdev->bus->self; + u32 *cap, pl_ctl1, pl_ctl2, pl_l1_2_enable; + u32 cl_ctl1, cl_ctl2, cl_l1_2_enable; + u16 clnkctl, plnkctl; + + /* + * In case BIOS enabled L1.2 when resuming, we need to disable it first + * on the downstream component before the upstream. So, don't attempt to + * restore either until we are at the downstream component. + */ + if (pcie_downstream_port(pdev) || !parent) + return; + + if (!pdev->l1ss || !parent->l1ss) + return; + + cl_save_state = pci_find_saved_ext_cap(pdev, PCI_EXT_CAP_ID_L1SS); + pl_save_state = pci_find_saved_ext_cap(parent, PCI_EXT_CAP_ID_L1SS); + if (!cl_save_state || !pl_save_state) + return; + + cap = &cl_save_state->cap.data[0]; + cl_ctl2 = *cap++; + cl_ctl1 = *cap; + cap = &pl_save_state->cap.data[0]; + pl_ctl2 = *cap++; + pl_ctl1 = *cap; + + /* Make sure L0s/L1 are disabled before updating L1SS config */ + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &clnkctl); + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &plnkctl); + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, clnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, plnkctl)) { + pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, + clnkctl & ~PCI_EXP_LNKCTL_ASPMC); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, + plnkctl & ~PCI_EXP_LNKCTL_ASPMC); + } + + /* + * Disable L1.2 on this downstream endpoint device first, followed + * by the upstream + */ + pci_clear_and_set_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + PCI_L1SS_CTL1_L1_2_MASK, 0); + + /* + * In addition, Common_Mode_Restore_Time and LTR_L1.2_THRESHOLD + * in PCI_L1SS_CTL1 must be programmed *before* setting the L1.2 + * enable bits, even though they're all in PCI_L1SS_CTL1. + */ + pl_l1_2_enable = pl_ctl1 & PCI_L1SS_CTL1_L1_2_MASK; + pl_ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK; + cl_l1_2_enable = cl_ctl1 & PCI_L1SS_CTL1_L1_2_MASK; + cl_ctl1 &= ~PCI_L1SS_CTL1_L1_2_MASK; + + /* Write back without enables first (above we cleared them in ctl1) */ + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL2, pl_ctl2); + pci_write_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cl_ctl2); + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, pl_ctl1); + pci_write_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cl_ctl1); + + /* Then write back the enables */ + if (pl_l1_2_enable || cl_l1_2_enable) { + pci_write_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, + pl_ctl1 | pl_l1_2_enable); + pci_write_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, + cl_ctl1 | cl_l1_2_enable); + } + + /* Restore L0s/L1 if they were enabled */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, clnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, plnkctl)) { + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, plnkctl); + pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, clnkctl); + } +} + +#ifdef CONFIG_PCIEASPM + #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX #endif #define MODULE_PARAM_PREFIX "pcie_aspm." -/* Note: those are not register definitions */ -#define ASPM_STATE_L0S_UP (1) /* Upstream direction L0s state */ -#define ASPM_STATE_L0S_DW (2) /* Downstream direction L0s state */ -#define ASPM_STATE_L1 (4) /* L1 state */ -#define ASPM_STATE_L1_1 (8) /* ASPM L1.1 state */ -#define ASPM_STATE_L1_2 (0x10) /* ASPM L1.2 state */ -#define ASPM_STATE_L1_1_PCIPM (0x20) /* PCI PM L1.1 state */ -#define ASPM_STATE_L1_2_PCIPM (0x40) /* PCI PM L1.2 state */ -#define ASPM_STATE_L1_SS_PCIPM (ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1_2_PCIPM) -#define ASPM_STATE_L1_2_MASK (ASPM_STATE_L1_2 | ASPM_STATE_L1_2_PCIPM) -#define ASPM_STATE_L1SS (ASPM_STATE_L1_1 | ASPM_STATE_L1_1_PCIPM |\ - ASPM_STATE_L1_2_MASK) -#define ASPM_STATE_L0S (ASPM_STATE_L0S_UP | ASPM_STATE_L0S_DW) -#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \ - ASPM_STATE_L1SS) +/* Note: these are not register definitions */ +#define PCIE_LINK_STATE_L0S_UP BIT(0) /* Upstream direction L0s state */ +#define PCIE_LINK_STATE_L0S_DW BIT(1) /* Downstream direction L0s state */ +static_assert(PCIE_LINK_STATE_L0S == (PCIE_LINK_STATE_L0S_UP | PCIE_LINK_STATE_L0S_DW)); + +#define PCIE_LINK_STATE_L1_SS_PCIPM (PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_PCIPM) +#define PCIE_LINK_STATE_L1_2_MASK (PCIE_LINK_STATE_L1_2 |\ + PCIE_LINK_STATE_L1_2_PCIPM) +#define PCIE_LINK_STATE_L1SS (PCIE_LINK_STATE_L1_1 |\ + PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_MASK) struct pcie_link_state { struct pci_dev *pdev; /* Upstream component of the Link */ @@ -115,10 +294,10 @@ static int policy_to_aspm_state(struct pcie_link_state *link) return 0; case POLICY_POWERSAVE: /* Enable ASPM L0s/L1 */ - return (ASPM_STATE_L0S | ASPM_STATE_L1); + return PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1; case POLICY_POWER_SUPERSAVE: /* Enable Everything */ - return ASPM_STATE_ALL; + return PCIE_LINK_STATE_ASPM_ALL; case POLICY_DEFAULT: return link->aspm_default; } @@ -141,16 +320,42 @@ static int policy_to_clkpm_state(struct pcie_link_state *link) return 0; } +static void pci_update_aspm_saved_state(struct pci_dev *dev) +{ + struct pci_cap_saved_state *save_state; + u16 *cap, lnkctl, aspm_ctl; + + save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); + if (!save_state) + return; + + pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnkctl); + + /* + * Update ASPM and CLKREQ bits of LNKCTL in save_state. We only + * write PCI_EXP_LNKCTL_CCC during enumeration, so it shouldn't + * change after being captured in save_state. + */ + aspm_ctl = lnkctl & (PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_CLKREQ_EN); + lnkctl &= ~(PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_CLKREQ_EN); + + /* Depends on pci_save_pcie_state(): cap[1] is LNKCTL */ + cap = (u16 *)&save_state->cap.data[0]; + cap[1] = lnkctl | aspm_ctl; +} + static void pcie_set_clkpm_nocheck(struct pcie_link_state *link, int enable) { struct pci_dev *child; struct pci_bus *linkbus = link->pdev->subordinate; u32 val = enable ? PCI_EXP_LNKCTL_CLKREQ_EN : 0; - list_for_each_entry(child, &linkbus->devices, bus_list) + list_for_each_entry(child, &linkbus->devices, bus_list) { pcie_capability_clear_and_set_word(child, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_CLKREQ_EN, val); + pci_update_aspm_saved_state(child); + } link->clkpm_enabled = !!enable; } @@ -395,14 +600,14 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) latency_dw_l1 = calc_l1_latency(lnkcap_dw); /* Check upstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_UP) && + if ((link->aspm_capable & PCIE_LINK_STATE_L0S_UP) && (latency_up_l0s > acceptable_l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_UP; + link->aspm_capable &= ~PCIE_LINK_STATE_L0S_UP; /* Check downstream direction L0s latency */ - if ((link->aspm_capable & ASPM_STATE_L0S_DW) && + if ((link->aspm_capable & PCIE_LINK_STATE_L0S_DW) && (latency_dw_l0s > acceptable_l0s)) - link->aspm_capable &= ~ASPM_STATE_L0S_DW; + link->aspm_capable &= ~PCIE_LINK_STATE_L0S_DW; /* * Check L1 latency. * Every switch on the path to root complex need 1 @@ -417,9 +622,9 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint) * substate latencies (and hence do not do any check). */ latency = max_t(u32, latency_up_l1, latency_dw_l1); - if ((link->aspm_capable & ASPM_STATE_L1) && + if ((link->aspm_capable & PCIE_LINK_STATE_L1) && (latency + l1_switch_latency > acceptable_l1)) - link->aspm_capable &= ~ASPM_STATE_L1; + link->aspm_capable &= ~PCIE_LINK_STATE_L1; l1_switch_latency += NSEC_PER_USEC; link = link->parent; @@ -555,13 +760,13 @@ static void aspm_l1ss_init(struct pcie_link_state *link) child_l1ss_cap &= ~PCI_L1SS_CAP_ASPM_L1_2; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_1) - link->aspm_support |= ASPM_STATE_L1_1; + link->aspm_support |= PCIE_LINK_STATE_L1_1; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_ASPM_L1_2) - link->aspm_support |= ASPM_STATE_L1_2; + link->aspm_support |= PCIE_LINK_STATE_L1_2; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_1) - link->aspm_support |= ASPM_STATE_L1_1_PCIPM; + link->aspm_support |= PCIE_LINK_STATE_L1_1_PCIPM; if (parent_l1ss_cap & child_l1ss_cap & PCI_L1SS_CAP_PCIPM_L1_2) - link->aspm_support |= ASPM_STATE_L1_2_PCIPM; + link->aspm_support |= PCIE_LINK_STATE_L1_2_PCIPM; if (parent_l1ss_cap) pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, @@ -571,15 +776,15 @@ static void aspm_l1ss_init(struct pcie_link_state *link) &child_l1ss_ctl1); if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_1) - link->aspm_enabled |= ASPM_STATE_L1_1; + link->aspm_enabled |= PCIE_LINK_STATE_L1_1; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_ASPM_L1_2) - link->aspm_enabled |= ASPM_STATE_L1_2; + link->aspm_enabled |= PCIE_LINK_STATE_L1_2; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_1) - link->aspm_enabled |= ASPM_STATE_L1_1_PCIPM; + link->aspm_enabled |= PCIE_LINK_STATE_L1_1_PCIPM; if (parent_l1ss_ctl1 & child_l1ss_ctl1 & PCI_L1SS_CTL1_PCIPM_L1_2) - link->aspm_enabled |= ASPM_STATE_L1_2_PCIPM; + link->aspm_enabled |= PCIE_LINK_STATE_L1_2_PCIPM; - if (link->aspm_support & ASPM_STATE_L1_2_MASK) + if (link->aspm_support & PCIE_LINK_STATE_L1_2_MASK) aspm_calc_l12_info(link, parent_l1ss_cap, child_l1ss_cap); } @@ -592,8 +797,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) if (blacklist) { /* Set enabled/disable so that we will disable ASPM later */ - link->aspm_enabled = ASPM_STATE_ALL; - link->aspm_disable = ASPM_STATE_ALL; + link->aspm_enabled = PCIE_LINK_STATE_ASPM_ALL; + link->aspm_disable = PCIE_LINK_STATE_ASPM_ALL; return; } @@ -620,6 +825,15 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); + /* Disable L0s/L1 before updating L1SS config */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(child, PCI_EXP_LNKCTL, + child_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, + parent_lnkctl & ~PCI_EXP_LNKCTL_ASPMC); + } + /* * Setup L0s state * @@ -628,22 +842,29 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * support L0s. */ if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) - link->aspm_support |= ASPM_STATE_L0S; + link->aspm_support |= PCIE_LINK_STATE_L0S; if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) - link->aspm_enabled |= ASPM_STATE_L0S_UP; + link->aspm_enabled |= PCIE_LINK_STATE_L0S_UP; if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) - link->aspm_enabled |= ASPM_STATE_L0S_DW; + link->aspm_enabled |= PCIE_LINK_STATE_L0S_DW; /* Setup L1 state */ if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1) - link->aspm_support |= ASPM_STATE_L1; + link->aspm_support |= PCIE_LINK_STATE_L1; if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1) - link->aspm_enabled |= ASPM_STATE_L1; + link->aspm_enabled |= PCIE_LINK_STATE_L1; aspm_l1ss_init(link); + /* Restore L0s/L1 if they were enabled */ + if (FIELD_GET(PCI_EXP_LNKCTL_ASPMC, child_lnkctl) || + FIELD_GET(PCI_EXP_LNKCTL_ASPMC, parent_lnkctl)) { + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_lnkctl); + pcie_capability_write_word(child, PCI_EXP_LNKCTL, child_lnkctl); + } + /* Save default state */ link->aspm_default = link->aspm_enabled; @@ -660,25 +881,28 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) } } -/* Configure the ASPM L1 substates */ +/* Configure the ASPM L1 substates. Caller must disable L1 first. */ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) { - u32 val, enable_req; + u32 val; struct pci_dev *child = link->downstream, *parent = link->pdev; - enable_req = (link->aspm_enabled ^ state) & state; + val = 0; + if (state & PCIE_LINK_STATE_L1_1) + val |= PCI_L1SS_CTL1_ASPM_L1_1; + if (state & PCIE_LINK_STATE_L1_2) + val |= PCI_L1SS_CTL1_ASPM_L1_2; + if (state & PCIE_LINK_STATE_L1_1_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_1; + if (state & PCIE_LINK_STATE_L1_2_PCIPM) + val |= PCI_L1SS_CTL1_PCIPM_L1_2; /* - * Here are the rules specified in the PCIe spec for enabling L1SS: - * - When enabling L1.x, enable bit at parent first, then at child - * - When disabling L1.x, disable bit at child first, then at parent - * - When enabling ASPM L1.x, need to disable L1 - * (at child followed by parent). - * - The ASPM/PCIPM L1.2 must be disabled while programming timing + * PCIe r6.2, sec 5.5.4, rules for enabling L1 PM Substates: + * - Clear L1.x enable bits at child first, then at parent + * - Set L1.x enable bits at parent first, then at child + * - ASPM/PCIPM L1.2 must be disabled while programming timing * parameters - * - * To keep it simple, disable all L1SS bits first, and later enable - * what is needed. */ /* Disable all L1 substates */ @@ -686,26 +910,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) PCI_L1SS_CTL1_L1SS_MASK, 0); pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, PCI_L1SS_CTL1_L1SS_MASK, 0); - /* - * If needed, disable L1, and it gets enabled later - * in pcie_config_aspm_link(). - */ - if (enable_req & (ASPM_STATE_L1_1 | ASPM_STATE_L1_2)) { - pcie_capability_clear_word(child, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - pcie_capability_clear_word(parent, PCI_EXP_LNKCTL, - PCI_EXP_LNKCTL_ASPM_L1); - } - - val = 0; - if (state & ASPM_STATE_L1_1) - val |= PCI_L1SS_CTL1_ASPM_L1_1; - if (state & ASPM_STATE_L1_2) - val |= PCI_L1SS_CTL1_ASPM_L1_2; - if (state & ASPM_STATE_L1_1_PCIPM) - val |= PCI_L1SS_CTL1_PCIPM_L1_1; - if (state & ASPM_STATE_L1_2_PCIPM) - val |= PCI_L1SS_CTL1_PCIPM_L1_2; /* Enable what we need to enable */ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1, @@ -730,45 +934,60 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) state &= (link->aspm_capable & ~link->aspm_disable); /* Can't enable any substates if L1 is not enabled */ - if (!(state & ASPM_STATE_L1)) - state &= ~ASPM_STATE_L1SS; + if (!(state & PCIE_LINK_STATE_L1)) + state &= ~PCIE_LINK_STATE_L1SS; /* Spec says both ports must be in D0 before enabling PCI PM substates*/ if (parent->current_state != PCI_D0 || child->current_state != PCI_D0) { - state &= ~ASPM_STATE_L1_SS_PCIPM; - state |= (link->aspm_enabled & ASPM_STATE_L1_SS_PCIPM); + state &= ~PCIE_LINK_STATE_L1_SS_PCIPM; + state |= (link->aspm_enabled & PCIE_LINK_STATE_L1_SS_PCIPM); } /* Nothing to do if the link is already in the requested state */ if (link->aspm_enabled == state) return; /* Convert ASPM state to upstream/downstream ASPM register state */ - if (state & ASPM_STATE_L0S_UP) + if (state & PCIE_LINK_STATE_L0S_UP) dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; - if (state & ASPM_STATE_L0S_DW) + if (state & PCIE_LINK_STATE_L0S_DW) upstream |= PCI_EXP_LNKCTL_ASPM_L0S; - if (state & ASPM_STATE_L1) { + if (state & PCIE_LINK_STATE_L1) { upstream |= PCI_EXP_LNKCTL_ASPM_L1; dwstream |= PCI_EXP_LNKCTL_ASPM_L1; } - if (link->aspm_capable & ASPM_STATE_L1SS) - pcie_config_aspm_l1ss(link, state); - /* - * Spec 2.0 suggests all functions should be configured the - * same setting for ASPM. Enabling ASPM L1 should be done in - * upstream component first and then downstream, and vice - * versa for disabling ASPM L1. Spec doesn't mention L0S. + * Per PCIe r6.2, sec 5.5.4, setting either or both of the enable + * bits for ASPM L1 PM Substates must be done while ASPM L1 is + * disabled. Disable L1 here and apply new configuration after L1SS + * configuration has been completed. + * + * Per sec 7.5.3.7, when disabling ASPM L1, software must disable + * it in the Downstream component prior to disabling it in the + * Upstream component, and ASPM L1 must be enabled in the Upstream + * component prior to enabling it in the Downstream component. + * + * Sec 7.5.3.7 also recommends programming the same ASPM Control + * value for all functions of a multi-function device. */ - if (state & ASPM_STATE_L1) - pcie_config_aspm_dev(parent, upstream); + list_for_each_entry(child, &linkbus->devices, bus_list) + pcie_config_aspm_dev(child, 0); + pcie_config_aspm_dev(parent, 0); + + if (link->aspm_capable & PCIE_LINK_STATE_L1SS) + pcie_config_aspm_l1ss(link, state); + + pcie_config_aspm_dev(parent, upstream); list_for_each_entry(child, &linkbus->devices, bus_list) pcie_config_aspm_dev(child, dwstream); - if (!(state & ASPM_STATE_L1)) - pcie_config_aspm_dev(parent, upstream); link->aspm_enabled = state; + + /* Update latest ASPM configuration in saved context */ + pci_save_aspm_l1ss_state(link->downstream); + pci_update_aspm_saved_state(link->downstream); + pci_save_aspm_l1ss_state(parent); + pci_update_aspm_saved_state(parent); } static void pcie_config_aspm_path(struct pcie_link_state *link) @@ -938,6 +1157,78 @@ out: up_read(&pci_bus_sem); } +void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + u32 ctl; + + bridge = pci_upstream_bridge(pdev); + if (bridge && bridge->ltr_path) { + pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2, &ctl); + if (!(ctl & PCI_EXP_DEVCTL2_LTR_EN)) { + pci_dbg(bridge, "re-enabling LTR\n"); + pcie_capability_set_word(bridge, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_LTR_EN); + } + } +} + +void pci_configure_ltr(struct pci_dev *pdev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(pdev->bus); + struct pci_dev *bridge; + u32 cap, ctl; + + if (!pci_is_pcie(pdev)) + return; + + pcie_capability_read_dword(pdev, PCI_EXP_DEVCAP2, &cap); + if (!(cap & PCI_EXP_DEVCAP2_LTR)) + return; + + pcie_capability_read_dword(pdev, PCI_EXP_DEVCTL2, &ctl); + if (ctl & PCI_EXP_DEVCTL2_LTR_EN) { + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) { + pdev->ltr_path = 1; + return; + } + + bridge = pci_upstream_bridge(pdev); + if (bridge && bridge->ltr_path) + pdev->ltr_path = 1; + + return; + } + + if (!host->native_ltr) + return; + + /* + * Software must not enable LTR in an Endpoint unless the Root + * Complex and all intermediate Switches indicate support for LTR. + * PCIe r4.0, sec 6.18. + */ + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) { + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_LTR_EN); + pdev->ltr_path = 1; + return; + } + + /* + * If we're configuring a hot-added device, LTR was likely + * disabled in the upstream bridge, so re-enable it before enabling + * it in the new device. + */ + bridge = pci_upstream_bridge(pdev); + if (bridge && bridge->ltr_path) { + pci_bridge_reconfigure_ltr(pdev); + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_LTR_EN); + pdev->ltr_path = 1; + } +} + /* Recheck latencies and update aspm_capable for links under the root */ static void pcie_update_aspm_capable(struct pcie_link_state *root) { @@ -1060,6 +1351,28 @@ static struct pcie_link_state *pcie_aspm_get_link(struct pci_dev *pdev) return bridge->link_state; } +static u8 pci_calc_aspm_disable_mask(int state) +{ + state &= ~PCIE_LINK_STATE_CLKPM; + + /* L1 PM substates require L1 */ + if (state & PCIE_LINK_STATE_L1) + state |= PCIE_LINK_STATE_L1SS; + + return state; +} + +static u8 pci_calc_aspm_enable_mask(int state) +{ + state &= ~PCIE_LINK_STATE_CLKPM; + + /* L1 PM substates require L1 */ + if (state & PCIE_LINK_STATE_L1SS) + state |= PCIE_LINK_STATE_L1; + + return state; +} + static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked) { struct pcie_link_state *link = pcie_aspm_get_link(pdev); @@ -1082,19 +1395,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); - if (state & PCIE_LINK_STATE_L0S) - link->aspm_disable |= ASPM_STATE_L0S; - if (state & PCIE_LINK_STATE_L1) - /* L1 PM substates require L1 */ - link->aspm_disable |= ASPM_STATE_L1 | ASPM_STATE_L1SS; - if (state & PCIE_LINK_STATE_L1_1) - link->aspm_disable |= ASPM_STATE_L1_1; - if (state & PCIE_LINK_STATE_L1_2) - link->aspm_disable |= ASPM_STATE_L1_2; - if (state & PCIE_LINK_STATE_L1_1_PCIPM) - link->aspm_disable |= ASPM_STATE_L1_1_PCIPM; - if (state & PCIE_LINK_STATE_L1_2_PCIPM) - link->aspm_disable |= ASPM_STATE_L1_2_PCIPM; + link->aspm_disable |= pci_calc_aspm_disable_mask(state); pcie_config_aspm_link(link, policy_to_aspm_state(link)); if (state & PCIE_LINK_STATE_CLKPM) @@ -1150,20 +1451,7 @@ static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) if (!locked) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); - link->aspm_default = 0; - if (state & PCIE_LINK_STATE_L0S) - link->aspm_default |= ASPM_STATE_L0S; - if (state & PCIE_LINK_STATE_L1) - link->aspm_default |= ASPM_STATE_L1; - /* L1 PM substates require L1 */ - if (state & PCIE_LINK_STATE_L1_1) - link->aspm_default |= ASPM_STATE_L1_1 | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_2) - link->aspm_default |= ASPM_STATE_L1_2 | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_1_PCIPM) - link->aspm_default |= ASPM_STATE_L1_1_PCIPM | ASPM_STATE_L1; - if (state & PCIE_LINK_STATE_L1_2_PCIPM) - link->aspm_default |= ASPM_STATE_L1_2_PCIPM | ASPM_STATE_L1; + link->aspm_default = pci_calc_aspm_enable_mask(state); pcie_config_aspm_link(link, policy_to_aspm_state(link)); link->clkpm_default = (state & PCIE_LINK_STATE_CLKPM) ? 1 : 0; @@ -1182,6 +1470,9 @@ static int __pci_enable_link_state(struct pci_dev *pdev, int state, bool locked) * touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable */ @@ -1198,6 +1489,9 @@ EXPORT_SYMBOL(pci_enable_link_state); * can't touch the LNKCTL register. Also note that this does not enable states * disabled by pci_disable_link_state(). Return 0 or a negative errno. * + * Note: Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per + * PCIe r6.0, sec 5.5.4. + * * @pdev: PCI device * @state: Mask of ASPM link states to enable * @@ -1299,12 +1593,12 @@ static ssize_t aspm_attr_store_common(struct device *dev, if (state_enable) { link->aspm_disable &= ~state; /* need to enable L1 for substates */ - if (state & ASPM_STATE_L1SS) - link->aspm_disable &= ~ASPM_STATE_L1; + if (state & PCIE_LINK_STATE_L1SS) + link->aspm_disable &= ~PCIE_LINK_STATE_L1; } else { link->aspm_disable |= state; - if (state & ASPM_STATE_L1) - link->aspm_disable |= ASPM_STATE_L1SS; + if (state & PCIE_LINK_STATE_L1) + link->aspm_disable |= PCIE_LINK_STATE_L1SS; } pcie_config_aspm_link(link, policy_to_aspm_state(link)); @@ -1318,12 +1612,12 @@ static ssize_t aspm_attr_store_common(struct device *dev, #define ASPM_ATTR(_f, _s) \ static ssize_t _f##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ -{ return aspm_attr_show_common(dev, attr, buf, ASPM_STATE_##_s); } \ +{ return aspm_attr_show_common(dev, attr, buf, PCIE_LINK_STATE_##_s); } \ \ static ssize_t _f##_store(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t len) \ -{ return aspm_attr_store_common(dev, attr, buf, len, ASPM_STATE_##_s); } +{ return aspm_attr_store_common(dev, attr, buf, len, PCIE_LINK_STATE_##_s); } ASPM_ATTR(l0s_aspm, L0S) ASPM_ATTR(l1_aspm, L1) @@ -1390,12 +1684,12 @@ static umode_t aspm_ctrl_attrs_are_visible(struct kobject *kobj, struct pci_dev *pdev = to_pci_dev(dev); struct pcie_link_state *link = pcie_aspm_get_link(pdev); static const u8 aspm_state_map[] = { - ASPM_STATE_L0S, - ASPM_STATE_L1, - ASPM_STATE_L1_1, - ASPM_STATE_L1_2, - ASPM_STATE_L1_1_PCIPM, - ASPM_STATE_L1_2_PCIPM, + PCIE_LINK_STATE_L0S, + PCIE_LINK_STATE_L1, + PCIE_LINK_STATE_L1_1, + PCIE_LINK_STATE_L1_2, + PCIE_LINK_STATE_L1_1_PCIPM, + PCIE_LINK_STATE_L1_2_PCIPM, }; if (aspm_disabled || !link) @@ -1447,3 +1741,5 @@ bool pcie_aspm_support_enabled(void) { return aspm_support_enabled; } + +#endif /* CONFIG_PCIEASPM */ diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c new file mode 100644 index 000000000000..0a5e7efbce2c --- /dev/null +++ b/drivers/pci/pcie/bwctrl.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PCIe bandwidth controller + * + * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com> + * + * Copyright (C) 2019 Dell Inc + * Copyright (C) 2023-2024 Intel Corporation + * + * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds + * and notify the operating system when the Link Width or Speed changes. The + * notification capability is required for all Root Ports and Downstream + * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. + * + * This service port driver hooks into the Bandwidth Notification interrupt + * watching for changes or links becoming degraded in operation. It updates + * the cached Current Link Speed that is exposed to user space through sysfs. + */ + +#define dev_fmt(fmt) "bwctrl: " fmt + +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/pci-bwctrl.h> +#include <linux/rwsem.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "../pci.h" +#include "portdrv.h" + +/** + * struct pcie_bwctrl_data - PCIe bandwidth controller + * @set_speed_mutex: Serializes link speed changes + * @lbms_count: Count for LBMS (since last reset) + * @cdev: Thermal cooling device associated with the port + */ +struct pcie_bwctrl_data { + struct mutex set_speed_mutex; + atomic_t lbms_count; + struct thermal_cooling_device *cdev; +}; + +/* + * Prevent port removal during LBMS count accessors and Link Speed changes. + * + * These have to be differentiated because pcie_bwctrl_change_speed() calls + * pcie_retrain_link() which uses LBMS count reset accessor on success + * (using just one rwsem triggers "possible recursive locking detected" + * warning). + */ +static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); + +static bool pcie_valid_speed(enum pci_bus_speed speed) +{ + return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); +} + +static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) +{ + static const u8 speed_conv[] = { + [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, + [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, + [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, + [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, + [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, + [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, + }; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed))) + return 0; + + return speed_conv[speed]; +} + +static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) +{ + return __fls(supported_speeds); +} + +/** + * pcie_bwctrl_select_speed - Select Target Link Speed + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * + * Select Target Link Speed by take into account Supported Link Speeds of + * both the Root Port and the Endpoint. + * + * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) + */ +static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) +{ + struct pci_bus *bus = port->subordinate; + u8 desired_speeds, supported_speeds; + struct pci_dev *dev; + + desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), + __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); + + supported_speeds = port->supported_speeds; + if (bus) { + down_read(&pci_bus_sem); + dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); + if (dev) + supported_speeds &= dev->supported_speeds; + up_read(&pci_bus_sem); + } + if (!supported_speeds) + return PCI_EXP_LNKCAP2_SLS_2_5GB; + + return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); +} + +static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) +{ + int ret; + + ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, target_speed); + if (ret != PCIBIOS_SUCCESSFUL) + return pcibios_err_to_errno(ret); + + ret = pcie_retrain_link(port, use_lt); + if (ret < 0) + return ret; + + /* + * Ensure link speed updates also with platforms that have problems + * with notifications. + */ + if (port->subordinate) + pcie_update_link_speed(port->subordinate); + + return 0; +} + +/** + * pcie_set_target_speed - Set downstream Link Speed for PCIe Port + * @port: PCIe Port + * @speed_req: Requested PCIe Link Speed + * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training + * + * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be + * adjusted downwards to the best speed supported by both the Port and PCIe + * Device underneath it. + * + * Return: + * * 0 - on success + * * -EINVAL - @speed_req is not a PCIe Link Speed + * * -ENODEV - @port is not controllable + * * -ETIMEDOUT - changing Link Speed took too long + * * -EAGAIN - Link Speed was changed but @speed_req was not achieved + */ +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt) +{ + struct pci_bus *bus = port->subordinate; + u16 target_speed; + int ret; + + if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) + return -EINVAL; + + if (bus && bus->cur_bus_speed == speed_req) + return 0; + + target_speed = pcie_bwctrl_select_speed(port, speed_req); + + scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { + struct pcie_bwctrl_data *data = port->link_bwctrl; + + /* + * port->link_bwctrl is NULL during initial scan when called + * e.g. from the Target Speed quirk. + */ + if (data) + mutex_lock(&data->set_speed_mutex); + + ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); + + if (data) + mutex_unlock(&data->set_speed_mutex); + } + + /* + * Despite setting higher speed into the Target Link Speed, empty + * bus won't train to 5GT+ speeds. + */ + if (!ret && bus && bus->cur_bus_speed != speed_req && + !list_empty(&bus->devices)) + ret = -EAGAIN; + + return ret; +} + +static void pcie_bwnotif_enable(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status; + int ret; + + /* Count LBMS seen so far as one */ + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_set_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + + /* + * Update after enabling notifications & clearing status bits ensures + * link speed is up to date. + */ + pcie_update_link_speed(port->subordinate); +} + +static void pcie_bwnotif_disable(struct pci_dev *port) +{ + pcie_capability_clear_word(port, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); +} + +static irqreturn_t pcie_bwnotif_irq(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + struct pci_dev *port = srv->port; + u16 link_status, events; + int ret; + + ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); + if (ret != PCIBIOS_SUCCESSFUL) + return IRQ_NONE; + + events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); + if (!events) + return IRQ_NONE; + + if (events & PCI_EXP_LNKSTA_LBMS) + atomic_inc(&data->lbms_count); + + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + + /* + * Interrupts will not be triggered from any further Link Speed + * change until LBMS is cleared by the write. Therefore, re-read the + * speed (inside pcie_update_link_speed()) after LBMS has been + * cleared to avoid missing link speed changes. + */ + pcie_update_link_speed(port->subordinate); + + return IRQ_HANDLED; +} + +void pcie_reset_lbms_count(struct pci_dev *port) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (data) + atomic_set(&data->lbms_count, 0); + else + pcie_capability_write_word(port, PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_LBMS); +} + +int pcie_lbms_count(struct pci_dev *port, unsigned long *val) +{ + struct pcie_bwctrl_data *data; + + guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); + data = port->link_bwctrl; + if (!data) + return -ENOTTY; + + *val = atomic_read(&data->lbms_count); + + return 0; +} + +static int pcie_bwnotif_probe(struct pcie_device *srv) +{ + struct pci_dev *port = srv->port; + int ret; + + struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, + sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); + if (ret) + return ret; + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + port->link_bwctrl = data; + + ret = request_irq(srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) { + port->link_bwctrl = NULL; + return ret; + } + + pcie_bwnotif_enable(srv); + } + } + + pci_dbg(port, "enabled with IRQ %d\n", srv->irq); + + /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + if (IS_ERR(port->link_bwctrl->cdev)) + port->link_bwctrl->cdev = NULL; + + return 0; +} + +static void pcie_bwnotif_remove(struct pcie_device *srv) +{ + struct pcie_bwctrl_data *data = srv->port->link_bwctrl; + + pcie_cooling_device_unregister(data->cdev); + + scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { + scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { + pcie_bwnotif_disable(srv->port); + + free_irq(srv->irq, srv); + + srv->port->link_bwctrl = NULL; + } + } +} + +static int pcie_bwnotif_suspend(struct pcie_device *srv) +{ + pcie_bwnotif_disable(srv->port); + return 0; +} + +static int pcie_bwnotif_resume(struct pcie_device *srv) +{ + pcie_bwnotif_enable(srv); + return 0; +} + +static struct pcie_port_service_driver pcie_bwctrl_driver = { + .name = "pcie_bwctrl", + .port_type = PCIE_ANY_PORT, + .service = PCIE_PORT_SERVICE_BWCTRL, + .probe = pcie_bwnotif_probe, + .suspend = pcie_bwnotif_suspend, + .resume = pcie_bwnotif_resume, + .remove = pcie_bwnotif_remove, +}; + +int __init pcie_bwctrl_init(void) +{ + return pcie_port_service_register(&pcie_bwctrl_driver); +} diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 94111e438241..242cabd5eeeb 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -190,7 +190,8 @@ out: static void dpc_process_rp_pio_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, dpc_status, first_error; - u32 status, mask, sev, syserr, exc, dw0, dw1, dw2, dw3, log, prefix; + u32 status, mask, sev, syserr, exc, log; + struct pcie_tlp_log tlp_log; int i; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status); @@ -214,29 +215,18 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) first_error == i ? " (First)" : ""); } - if (pdev->dpc_rp_log_size < 4) + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG) goto clear_status; - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, - &dw0); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4, - &dw1); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 8, - &dw2); - pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 12, - &dw3); - pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n", - dw0, dw1, dw2, dw3); - - if (pdev->dpc_rp_log_size < 5) + pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, + cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, + dpc_tlp_log_len(pdev), &tlp_log); + pcie_print_tlp_log(pdev, &tlp_log, dev_fmt("")); + + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1) goto clear_status; pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); - for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) { - pci_read_config_dword(pdev, - cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix); - pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix); - } clear_status: pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); } @@ -303,10 +293,70 @@ void dpc_process_error(struct pci_dev *pdev) } } +static void pci_clear_surpdn_errors(struct pci_dev *pdev) +{ + if (pdev->dpc_rp_extensions) + pci_write_config_dword(pdev, pdev->dpc_cap + + PCI_EXP_DPC_RP_PIO_STATUS, ~0); + + /* + * In practice, Surprise Down errors have been observed to also set + * error bits in the Status Register as well as the Fatal Error + * Detected bit in the Device Status Register. + */ + pci_write_config_word(pdev, PCI_STATUS, 0xffff); + + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_FED); +} + +static void dpc_handle_surprise_removal(struct pci_dev *pdev) +{ + if (!pcie_wait_for_link(pdev, false)) { + pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); + goto out; + } + + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) + goto out; + + pci_aer_raw_clear_status(pdev); + pci_clear_surpdn_errors(pdev); + + pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_TRIGGER); + +out: + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); +} + +static bool dpc_is_surprise_removal(struct pci_dev *pdev) +{ + u16 status; + + if (!pdev->is_hotplug_bridge) + return false; + + if (pci_read_config_word(pdev, pdev->aer_cap + PCI_ERR_UNCOR_STATUS, + &status)) + return false; + + return status & PCI_ERR_UNC_SURPDN; +} + static irqreturn_t dpc_handler(int irq, void *context) { struct pci_dev *pdev = context; + /* + * According to PCIe r6.0 sec 6.7.6, errors are an expected side effect + * of async removal and should be ignored by software. + */ + if (dpc_is_surprise_removal(pdev)) { + dpc_handle_surprise_removal(pdev); + return IRQ_HANDLED; + } + dpc_process_error(pdev); /* We configure DPC so it only triggers on ERR_FATAL */ @@ -350,7 +400,9 @@ void pci_dpc_init(struct pci_dev *pdev) if (!pdev->dpc_rp_log_size) { pdev->dpc_rp_log_size = FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap); - if (pdev->dpc_rp_log_size < 4 || pdev->dpc_rp_log_size > 9) { + if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG || + pdev->dpc_rp_log_size > PCIE_STD_NUM_TLP_HEADERLOG + 1 + + PCIE_STD_MAX_TLP_PREFIXLOG) { pci_err(pdev, "RP PIO log size %u is invalid\n", pdev->dpc_rp_log_size); pdev->dpc_rp_log_size = 0; @@ -358,13 +410,44 @@ void pci_dpc_init(struct pci_dev *pdev) } } +static void dpc_enable(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + int dpc = pdev->dpc_cap; + u16 ctl; + + /* + * Clear DPC Interrupt Status so we don't get an interrupt for an + * old event when setting DPC Interrupt Enable. + */ + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_STATUS, + PCI_EXP_DPC_STATUS_INTERRUPT); + + pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~PCI_EXP_DPC_CTL_EN_MASK; + ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); +} + +static void dpc_disable(struct pcie_device *dev) +{ + struct pci_dev *pdev = dev->port; + int dpc = pdev->dpc_cap; + u16 ctl; + + /* Disable DPC triggering and DPC interrupts */ + pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); + pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); +} + #define FLAG(x, y) (((x) & (y)) ? '+' : '-') static int dpc_probe(struct pcie_device *dev) { struct pci_dev *pdev = dev->port; struct device *device = &dev->device; int status; - u16 ctl, cap; + u16 cap; if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native) return -ENOTSUPP; @@ -379,11 +462,7 @@ static int dpc_probe(struct pcie_device *dev) } pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); - - pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~PCI_EXP_DPC_CTL_EN_MASK; - ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; - pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); + dpc_enable(dev); pci_info(pdev, "enabled with IRQ %d\n", dev->irq); pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -396,14 +475,21 @@ static int dpc_probe(struct pcie_device *dev) return status; } -static void dpc_remove(struct pcie_device *dev) +static int dpc_suspend(struct pcie_device *dev) { - struct pci_dev *pdev = dev->port; - u16 ctl; + dpc_disable(dev); + return 0; +} - pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, &ctl); - ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); - pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CTL, ctl); +static int dpc_resume(struct pcie_device *dev) +{ + dpc_enable(dev); + return 0; +} + +static void dpc_remove(struct pcie_device *dev) +{ + dpc_disable(dev); } static struct pcie_port_service_driver dpcdriver = { @@ -411,6 +497,8 @@ static struct pcie_port_service_driver dpcdriver = { .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, + .suspend = dpc_suspend, + .resume = dpc_resume, .remove = dpc_remove, }; diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c index 5f4914d313a1..e86298dbbcff 100644 --- a/drivers/pci/pcie/edr.c +++ b/drivers/pci/pcie/edr.c @@ -32,10 +32,10 @@ static int acpi_enable_dpc(struct pci_dev *pdev) int status = 0; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * Per PCI Firmware r3.3, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is + * optional. Return success if it's not implemented. */ - if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 6, 1ULL << EDR_PORT_DPC_ENABLE_DSM)) return 0; @@ -46,12 +46,7 @@ static int acpi_enable_dpc(struct pci_dev *pdev) argv4.package.count = 1; argv4.package.elements = &req; - /* - * Per Downstream Port Containment Related Enhancements ECN to PCI - * Firmware Specification r3.2, sec 4.6.12, EDR_PORT_DPC_ENABLE_DSM is - * optional. Return success if it's not implemented. - */ - obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 5, + obj = acpi_evaluate_dsm(adev->handle, &pci_acpi_dsm_guid, 6, EDR_PORT_DPC_ENABLE_DSM, &argv4); if (!obj) return 0; @@ -85,8 +80,9 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) u16 port; /* - * Behavior when calling unsupported _DSM functions is undefined, - * so check whether EDR_PORT_DPC_ENABLE_DSM is supported. + * If EDR_PORT_LOCATE_DSM is not implemented under the target of + * EDR, the target is the port that experienced the containment + * event (PCI Firmware r3.3, sec 4.6.13). */ if (!acpi_check_dsm(adev->handle, &pci_acpi_dsm_guid, 5, 1ULL << EDR_PORT_LOCATE_DSM)) @@ -104,6 +100,16 @@ static struct pci_dev *acpi_dpc_port_get(struct pci_dev *pdev) } /* + * Bit 31 represents the success/failure of the operation. If bit + * 31 is set, the operation failed. + */ + if (obj->integer.value & BIT(31)) { + ACPI_FREE(obj); + pci_err(pdev, "Locate Port _DSM failed\n"); + return NULL; + } + + /* * Firmware returns DPC port BDF details in following format: * 15:8 = bus * 7:3 = device diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 59c90d04a609..31090770fffc 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -13,6 +13,7 @@ #define dev_fmt(fmt) "AER: " fmt #include <linux/pci.h> +#include <linux/pm_runtime.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/errno.h> @@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev, return 0; } +static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data) +{ + pm_runtime_get_sync(&pdev->dev); + return 0; +} + +static int pci_pm_runtime_put(struct pci_dev *pdev, void *data) +{ + pm_runtime_put(&pdev->dev); + return 0; +} + static int report_frozen_detected(struct pci_dev *dev, void *data) { return report_error_detected(dev, pci_channel_io_frozen, data); @@ -103,9 +116,7 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; - if (!pdrv || - !pdrv->err_handler || - !pdrv->err_handler->mmio_enabled) + if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->mmio_enabled) goto out; err_handler = pdrv->err_handler; @@ -124,9 +135,7 @@ static int report_slot_reset(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; - if (!pdrv || - !pdrv->err_handler || - !pdrv->err_handler->slot_reset) + if (!pdrv || !pdrv->err_handler || !pdrv->err_handler->slot_reset) goto out; err_handler = pdrv->err_handler; @@ -145,9 +154,7 @@ static int report_resume(struct pci_dev *dev, void *data) device_lock(&dev->dev); pdrv = dev->driver; if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || - !pdrv || - !pdrv->err_handler || - !pdrv->err_handler->resume) + !pdrv || !pdrv->err_handler || !pdrv->err_handler->resume) goto out; err_handler = pdrv->err_handler; @@ -207,6 +214,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, else bridge = pci_upstream_bridge(dev); + pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL); + pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); @@ -251,10 +260,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pcie_clear_device_status(dev); pci_aer_clear_nonfatal_status(dev); } + + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_info(bridge, "device recovery successful\n"); return status; failed: + pci_walk_bridge(bridge, pci_pm_runtime_put, NULL); + pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); /* TODO: Should kernel panic here? */ diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c index 14a4b89a3b83..02e73099bad0 100644 --- a/drivers/pci/pcie/portdrv.c +++ b/drivers/pci/pcie/portdrv.c @@ -68,7 +68,7 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask, */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_capability_read_word(dev, PCI_EXP_FLAGS, ®16); *pme = FIELD_GET(PCI_EXP_FLAGS_IRQ, reg16); nvec = *pme + 1; @@ -150,11 +150,11 @@ static int pcie_port_enable_irq_vec(struct pci_dev *dev, int *irqs, int mask) /* PME, hotplug and bandwidth notification share an MSI/MSI-X vector */ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP | - PCIE_PORT_SERVICE_BWNOTIF)) { + PCIE_PORT_SERVICE_BWCTRL)) { pcie_irq = pci_irq_vector(dev, pme); irqs[PCIE_PORT_SERVICE_PME_SHIFT] = pcie_irq; irqs[PCIE_PORT_SERVICE_HP_SHIFT] = pcie_irq; - irqs[PCIE_PORT_SERVICE_BWNOTIF_SHIFT] = pcie_irq; + irqs[PCIE_PORT_SERVICE_BWCTRL_SHIFT] = pcie_irq; } if (mask & PCIE_PORT_SERVICE_AER) @@ -187,15 +187,15 @@ static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask) * interrupt. */ if ((mask & PCIE_PORT_SERVICE_PME) && pcie_pme_no_msi()) - goto legacy_irq; + goto intx_irq; /* Try to use MSI-X or MSI if supported */ if (pcie_port_enable_irq_vec(dev, irqs, mask) == 0) return 0; -legacy_irq: - /* fall back to legacy IRQ */ - ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY); +intx_irq: + /* fall back to INTX IRQ */ + ret = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_INTX); if (ret < 0) return -ENODEV; @@ -265,13 +265,15 @@ static int get_port_device_capability(struct pci_dev *dev) (pcie_ports_dpc_native || (services & PCIE_PORT_SERVICE_AER))) services |= PCIE_PORT_SERVICE_DPC; + /* Enable bandwidth control if more than one speed is supported. */ if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) { u32 linkcap; pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &linkcap); - if (linkcap & PCI_EXP_LNKCAP_LBNC) - services |= PCIE_PORT_SERVICE_BWNOTIF; + if (linkcap & PCI_EXP_LNKCAP_LBNC && + hweight8(dev->supported_speeds) > 1) + services |= PCIE_PORT_SERVICE_BWCTRL; } return services; @@ -786,7 +788,7 @@ static const struct pci_error_handlers pcie_portdrv_err_handler = { static struct pci_driver pcie_portdriver = { .name = "pcieport", - .id_table = &port_pci_ids[0], + .id_table = port_pci_ids, .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, @@ -828,6 +830,7 @@ static void __init pcie_init_services(void) pcie_aer_init(); pcie_pme_init(); pcie_dpc_init(); + pcie_bwctrl_init(); pcie_hp_init(); } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 1f3803bde7ee..bd29d1cc7b8b 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -20,8 +20,8 @@ #define PCIE_PORT_SERVICE_HP (1 << PCIE_PORT_SERVICE_HP_SHIFT) #define PCIE_PORT_SERVICE_DPC_SHIFT 3 /* Downstream Port Containment */ #define PCIE_PORT_SERVICE_DPC (1 << PCIE_PORT_SERVICE_DPC_SHIFT) -#define PCIE_PORT_SERVICE_BWNOTIF_SHIFT 4 /* Bandwidth notification */ -#define PCIE_PORT_SERVICE_BWNOTIF (1 << PCIE_PORT_SERVICE_BWNOTIF_SHIFT) +#define PCIE_PORT_SERVICE_BWCTRL_SHIFT 4 /* Bandwidth Controller (notifications) */ +#define PCIE_PORT_SERVICE_BWCTRL (1 << PCIE_PORT_SERVICE_BWCTRL_SHIFT) #define PCIE_PORT_DEVICE_MAXSERVICES 5 @@ -51,6 +51,8 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif +int pcie_bwctrl_init(void); + /* Port Type */ #define PCIE_ANY_PORT (~0) @@ -96,7 +98,7 @@ struct pcie_port_service_driver { int pcie_port_service_register(struct pcie_port_service_driver *new); void pcie_port_service_unregister(struct pcie_port_service_driver *new); -extern struct bus_type pcie_port_bus_type; +extern const struct bus_type pcie_port_bus_type; struct pci_dev; diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c new file mode 100644 index 000000000000..0860b5da837f --- /dev/null +++ b/drivers/pci/pcie/tlp.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe TLP Log handling + * + * Copyright (C) 2024 Intel Corporation + */ + +#include <linux/aer.h> +#include <linux/array_size.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include "../pci.h" + +/** + * aer_tlp_log_len - Calculate AER Capability TLP Header/Prefix Log length + * @dev: PCIe device + * @aercc: AER Capabilities and Control register value + * + * Return: TLP Header/Prefix Log length + */ +unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc) +{ + return PCIE_STD_NUM_TLP_HEADERLOG + + ((aercc & PCI_ERR_CAP_PREFIX_LOG_PRESENT) ? + dev->eetlp_prefix_max : 0); +} + +#ifdef CONFIG_PCIE_DPC +/** + * dpc_tlp_log_len - Calculate DPC RP PIO TLP Header/Prefix Log length + * @dev: PCIe device + * + * Return: TLP Header/Prefix Log length + */ +unsigned int dpc_tlp_log_len(struct pci_dev *dev) +{ + /* Remove ImpSpec Log register from the count */ + if (dev->dpc_rp_log_size >= PCIE_STD_NUM_TLP_HEADERLOG + 1) + return dev->dpc_rp_log_size - 1; + + return dev->dpc_rp_log_size; +} +#endif + +/** + * pcie_read_tlp_log - read TLP Header Log + * @dev: PCIe device + * @where: PCI Config offset of TLP Header Log + * @where2: PCI Config offset of TLP Prefix Log + * @tlp_len: TLP Log length (Header Log + TLP Prefix Log in DWORDs) + * @log: TLP Log structure to fill + * + * Fill @log from TLP Header Log registers, e.g., AER or DPC. + * + * Return: 0 on success and filled TLP Log structure, <0 on error. + */ +int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, + unsigned int tlp_len, struct pcie_tlp_log *log) +{ + unsigned int i; + int off, ret; + u32 *to; + + memset(log, 0, sizeof(*log)); + + for (i = 0; i < tlp_len; i++) { + if (i < PCIE_STD_NUM_TLP_HEADERLOG) { + off = where + i * 4; + to = &log->dw[i]; + } else { + off = where2 + (i - PCIE_STD_NUM_TLP_HEADERLOG) * 4; + to = &log->prefix[i - PCIE_STD_NUM_TLP_HEADERLOG]; + } + + ret = pci_read_config_dword(dev, off, to); + if (ret) + return pcibios_err_to_errno(ret); + } + + return 0; +} + +#define EE_PREFIX_STR " E-E Prefixes:" + +/** + * pcie_print_tlp_log - Print TLP Header / Prefix Log contents + * @dev: PCIe device + * @log: TLP Log structure + * @pfx: String prefix + * + * Prints TLP Header and Prefix Log information held by @log. + */ +void pcie_print_tlp_log(const struct pci_dev *dev, + const struct pcie_tlp_log *log, const char *pfx) +{ + char buf[11 * (PCIE_STD_NUM_TLP_HEADERLOG + ARRAY_SIZE(log->prefix)) + + sizeof(EE_PREFIX_STR)]; + unsigned int i; + int len; + + len = scnprintf(buf, sizeof(buf), "%#010x %#010x %#010x %#010x", + log->dw[0], log->dw[1], log->dw[2], log->dw[3]); + + if (log->prefix[0]) + len += scnprintf(buf + len, sizeof(buf) - len, EE_PREFIX_STR); + for (i = 0; i < ARRAY_SIZE(log->prefix); i++) { + if (!log->prefix[i]) + break; + len += scnprintf(buf + len, sizeof(buf) - len, + " %#010x", log->prefix[i]); + } + + pci_err(dev, "%sTLP Header: %s\n", pfx, buf); +} |