diff options
Diffstat (limited to 'arch/powerpc/kernel/eeh.c')
| -rw-r--r-- | arch/powerpc/kernel/eeh.c | 1802 |
1 files changed, 1330 insertions, 472 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 39954fe941b8..bb836f02101c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright IBM Corporation 2001, 2005, 2006 * Copyright Dave Engebretsen & Todd Inglett 2001 * Copyright Linas Vepstas 2005, 2006 * Copyright 2001-2012 IBM Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> */ @@ -26,24 +13,29 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/iommu.h> #include <linux/proc_fs.h> #include <linux/rbtree.h> +#include <linux/reboot.h> #include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/export.h> #include <linux/of.h> +#include <linux/debugfs.h> #include <linux/atomic.h> #include <asm/eeh.h> #include <asm/eeh_event.h> #include <asm/io.h> +#include <asm/iommu.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/rtas.h> +#include <asm/pte-walk.h> /** Overview: - * EEH, or "Extended Error Handling" is a PCI bridge technology for + * EEH, or "Enhanced Error Handling" is a PCI bridge technology for * dealing with PCI bus errors that can't be dealt with within the * usual PCI framework, except by check-stopping the CPU. Systems * that are designed for high-availability/reliability cannot afford @@ -84,33 +76,50 @@ #define EEH_MAX_FAILS 2100000 /* Time to wait for a PCI slot to report status, in milliseconds */ -#define PCI_BUS_RESET_WAIT_MSEC (60*1000) +#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; +/* + * EEH probe mode support, which is part of the flags, + * is to support multiple platforms for EEH. Some platforms + * like pSeries do PCI emunation based on device tree. + * However, other platforms like powernv probe PCI devices + * from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for + * particular OF node or PCI device so that the corresponding + * PE would be created there. + */ +int eeh_subsystem_flags; +EXPORT_SYMBOL(eeh_subsystem_flags); -int eeh_subsystem_enabled; -EXPORT_SYMBOL(eeh_subsystem_enabled); +/* + * EEH allowed maximal frozen times. If one particular PE's + * frozen count in last hour exceeds this limit, the PE will + * be forced to be offline permanently. + */ +u32 eeh_max_freezes = 5; /* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. + * Controls whether a recovery event should be scheduled when an + * isolated device is discovered. This is only really useful for + * debugging problems with the EEH core. */ -int eeh_probe_mode; +bool eeh_debugfs_no_recover; + +/* Platform dependent EEH operations */ +struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +EXPORT_SYMBOL_GPL(confirm_error_lock); + +/* Lock to protect passed flags */ +static DEFINE_MUTEX(eeh_dev_mutex); /* Buffer for reporting pci register dumps. Its here in BSS, and * not dynamically alloced, so that it ends up in RMO where RTAS * can access it. */ -#define EEH_PCI_REGS_LOG_LEN 4096 +#define EEH_PCI_REGS_LOG_LEN 8192 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; /* @@ -130,94 +139,145 @@ struct eeh_stats { static struct eeh_stats eeh_stats; -#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +static int __init eeh_setup(char *str) +{ + if (!strcmp(str, "off")) + eeh_add_flag(EEH_FORCE_DISABLED); + else if (!strcmp(str, "early_log")) + eeh_add_flag(EEH_EARLY_DUMP_LOG); -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. + return 1; +} +__setup("eeh=", eeh_setup); + +void eeh_show_enabled(void) +{ + if (eeh_has_flag(EEH_FORCE_DISABLED)) + pr_info("EEH: Recovery disabled by kernel parameter.\n"); + else if (eeh_has_flag(EEH_ENABLED)) + pr_info("EEH: Capable adapter found: recovery enabled.\n"); + else + pr_info("EEH: No capable adapters found: recovery disabled.\n"); +} + +/* + * This routine captures assorted PCI configuration space data + * for the indicated PCI device, and puts them into a buffer + * for RTAS error logging. */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) +static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); u32 cfg; int cap, i; - int n = 0; + int n = 0, l = 0; + char buffer[128]; - n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); + pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); - printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); + pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - - if (!dev) { - printk(KERN_WARNING "EEH: no PCI device for this of node\n"); - return n; - } + pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); + if (edev->mode & EEH_DEV_BRIDGE) { + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); + pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); + pr_warn("EEH: Bridge control: %04x\n", cfg); } /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); + cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(dn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); + pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(dn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); + pr_warn("EEH: PCI-X status: %08x\n", cfg); } - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - cap = pci_find_capability(dev, PCI_CAP_ID_EXP); + /* If PCI-E capable, dump PCI-E cap 10 */ + cap = edev->pcie_cap; if (cap) { n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); - printk(KERN_WARNING - "EEH: PCI-E capabilities and status follow:\n"); + pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); + + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); + } + } - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - printk(KERN_WARNING - "EEH: PCI-E AER capability register set follows:\n"); + pr_warn("%s\n", buffer); + } + + /* If AER capable, dump it */ + cap = edev->aer_cap; + if (cap) { + n += scnprintf(buf+n, len-n, "pci-e AER:\n"); + pr_warn("EEH: PCI-E AER capability register set follows:\n"); + + for (i=0; i<=13; i++) { + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); + n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); + + if ((i % 4) == 0) { + if (i != 0) + pr_warn("%s\n", buffer); - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); + l = scnprintf(buffer, sizeof(buffer), + "EEH: PCI-E AER %02x: %08x ", + 4*i, cfg); + } else { + l += scnprintf(buffer+l, sizeof(buffer)-l, + "%08x ", cfg); } } + + pr_warn("%s\n", buffer); } return n; } +static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) +{ + struct eeh_dev *edev, *tmp; + size_t *plen = flag; + + eeh_pe_for_each_dev(pe, edev, tmp) + *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, + EEH_PCI_REGS_LOG_LEN - *plen); + + return NULL; +} + /** * eeh_slot_error_detail - Generate combined log including driver log and error log * @pe: EEH PE @@ -231,29 +291,46 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - struct eeh_dev *edev; - bool valid_cfg_log = true; /* * When the PHB is fenced or dead, it's pointless to collect * the data from PCI config space because it should return * 0xFF's. For ER, we still retrieve the data from the PCI * config space. + * + * For pHyp, we have to enable IO for log retrieval. Otherwise, + * 0xFF's is always returned from PCI config space. + * + * When the @severity is EEH_LOG_PERM, the PE is going to be + * removed. Prior to that, the drivers for devices included in + * the PE will be closed. The drivers rely on working IO path + * to bring the devices to quiet state. Otherwise, PCI traffic + * from those devices after they are removed is like to cause + * another unexpected EEH error. */ - if (eeh_probe_mode_dev() && - (pe->type & EEH_PE_PHB) && - (pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD))) - valid_cfg_log = false; - - if (valid_cfg_log) { - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (!(pe->type & EEH_PE_PHB)) { + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || + severity == EEH_LOG_PERM) + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + + /* + * The config space of some PCI devices can't be accessed + * when their PEs are in frozen state. Otherwise, fenced + * PHB might be seen. Those PEs are identified with flag + * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED + * is set automatically when the PE is put to EEH_PE_ISOLATED. + * + * Restoring BARs possibly triggers PCI config access in + * (OPAL) firmware and then causes fenced PHB. If the + * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's + * pointless to restore BARs and dump config space. + */ eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); + if (!(pe->state & EEH_PE_CFG_BLOCKED)) { + eeh_pe_restore_bars(pe); - pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen, - EEH_PCI_REGS_LOG_LEN - loglen); + pci_regs_buf[0] = 0; + eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); } } @@ -269,20 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) */ static inline unsigned long eeh_token_to_phys(unsigned long token) { - pte_t *ptep; - unsigned long pa; - int hugepage_shift; - - /* - * We won't find hugepages here, iomem - */ - ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); - if (!ptep) - return token; - WARN_ON(hugepage_shift); - pa = pte_pfn(*ptep) << PAGE_SHIFT; - - return pa | (token & (PAGE_SIZE-1)); + return ppc_find_vmap_phys(token); } /* @@ -296,20 +360,20 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) unsigned long flags; int ret; - if (!eeh_probe_mode_dev()) + if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) return -EPERM; /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warning("%s Can't find PE for PHB#%d\n", - __func__, pe->phb->global_number); + pr_warn("%s Can't find PE for PHB#%x\n", + __func__, pe->phb->global_number); return -EEXIST; } /* If the PHB has been in problematic state */ eeh_serialize_lock(&flags); - if (phb_pe->state & (EEH_PE_ISOLATED | EEH_PE_PHB_DEAD)) { + if (phb_pe->state & EEH_PE_ISOLATED) { ret = 0; goto out; } @@ -317,28 +381,32 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Check PHB state */ ret = eeh_ops->get_state(phb_pe, NULL); if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { ret = 0; goto out; } /* Isolate the PHB and send event */ - eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(phb_pe); eeh_serialize_unlock(flags); - eeh_send_failure_event(phb_pe); - - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); - dump_stack(); + pr_debug("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); + eeh_send_failure_event(phb_pe); return 1; out: eeh_serialize_unlock(flags); return ret; } +static inline const char *eeh_driver_name(struct pci_dev *pdev) +{ + if (pdev) + return dev_driver_string(&pdev->dev); + + return "<null>"; +} + /** * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze * @edev: eeh device @@ -359,33 +427,26 @@ int eeh_dev_check_failure(struct eeh_dev *edev) unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe; + struct eeh_pe *pe, *parent_pe; int rc = 0; - const char *location; + const char *location = NULL; eeh_stats.total_mmio_ffs++; - if (!eeh_subsystem_enabled) + if (!eeh_enabled()) return 0; if (!edev) { eeh_stats.no_dn++; return 0; } - dn = eeh_dev_to_of_node(edev); dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; + pe = eeh_dev_to_pe(edev); /* Access to IO BARs might get this far and still not want checking. */ if (!pe) { eeh_stats.ignored_check++; - pr_debug("EEH: Ignored check for %s %s\n", - eeh_pci_name(dev), dn->full_name); - return 0; - } - - if (!pe->addr && !pe->config_addr) { - eeh_stats.no_cfg_addr++; + eeh_edev_dbg(edev, "Ignored check\n"); return 0; } @@ -397,6 +458,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev) if (ret > 0) return ret; + /* + * If the PE isn't owned by us, we shouldn't check the + * state. Instead, let the owner handle it if the PE has + * been frozen. + */ + if (eeh_pe_passed(pe)) + return 0; + /* If we already have a pending isolation event for this * slot, we know it's bad already, we don't need to check. * Do this checking under a lock; as multiple PCI devices @@ -407,13 +476,16 @@ int eeh_dev_check_failure(struct eeh_dev *edev) rc = 1; if (pe->state & EEH_PE_ISOLATED) { pe->check_count++; - if (pe->check_count % EEH_MAX_FAILS == 0) { - location = of_get_property(dn, "ibm,loc-code", NULL); - printk(KERN_ERR "EEH: %d reads ignored for recovering device at " - "location=%s driver=%s pci addr=%s\n", - pe->check_count, location, - eeh_driver_name(dev), eeh_pci_name(dev)); - printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", + if (pe->check_count == EEH_MAX_FAILS) { + dn = pci_device_to_OF_node(dev); + if (dn) + location = of_get_property(dn, "ibm,loc-code", + NULL); + eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n", + pe->check_count, + location ? location : "unknown", + eeh_driver_name(dev)); + eeh_edev_err(edev, "Might be infinite loop in %s driver\n", eeh_driver_name(dev)); dump_stack(); } @@ -434,35 +506,64 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; goto dn_unlock; } + /* + * It should be corner case that the parent PE has been + * put into frozen state as well. We should take care + * that at first. + */ + parent_pe = pe->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + ret = eeh_ops->get_state(parent_pe, NULL); + if (ret > 0 && !eeh_state_active(ret)) { + pe = parent_pe; + pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", + pe->phb->global_number, pe->addr, + pe->phb->global_number, parent_pe->addr); + } + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + eeh_stats.slot_resets++; /* Avoid repeated reports of this failure, including problems * with other functions on this device, and functions under * bridges. */ - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); eeh_serialize_unlock(flags); - eeh_send_failure_event(pe); - /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); - dump_stack(); + pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n", + __func__, pe->phb->global_number, pe->addr); + eeh_send_failure_event(pe); return 1; @@ -475,17 +576,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) + * @token: I/O address * - * Check for an EEH failure at the given token address. Call this + * Check for an EEH failure at the given I/O address. Call this * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine + * find out if this is due to an EEH slot freeze event. This routine * will query firmware for the EEH status. * * Note this routine is safe to call in an interrupt context. */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) +int eeh_check_failure(const volatile void __iomem *token) { unsigned long addr; struct eeh_dev *edev; @@ -495,21 +595,18 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; - return val; + return 0; } - eeh_dev_check_failure(edev); - - pci_dev_put(eeh_dev_to_pci_dev(edev)); - return val; + return eeh_dev_check_failure(edev); } - EXPORT_SYMBOL(eeh_check_failure); /** * eeh_pci_enable - Enable MMIO or DMA transfers for this slot * @pe: EEH PE + * @function: EEH option * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing @@ -517,23 +614,118 @@ EXPORT_SYMBOL(eeh_check_failure); */ int eeh_pci_enable(struct eeh_pe *pe, int function) { - int rc; + int active_flag, rc; + + /* + * pHyp doesn't allow to enable IO or DMA on unfrozen PE. + * Also, it's pointless to enable them on unfrozen PE. So + * we have to check before enabling IO or DMA. + */ + switch (function) { + case EEH_OPT_THAW_MMIO: + active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; + break; + case EEH_OPT_THAW_DMA: + active_flag = EEH_STATE_DMA_ACTIVE; + break; + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_FREEZE_PE: + active_flag = 0; + break; + default: + pr_warn("%s: Invalid function %d\n", + __func__, function); + return -EINVAL; + } + + /* + * Check if IO or DMA has been enabled before + * enabling them. + */ + if (active_flag) { + rc = eeh_ops->get_state(pe, NULL); + if (rc < 0) + return rc; + + /* Needn't enable it at all */ + if (rc == EEH_STATE_NOT_SUPPORT) + return 0; + + /* It's already enabled */ + if (rc & active_flag) + return 0; + } + + /* Issue the request */ rc = eeh_ops->set_option(pe, function); if (rc) - pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", - __func__, function, pe->phb->global_number, pe->addr, rc); + pr_warn("%s: Unexpected state change %d on " + "PHB#%x-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, + pe->addr, rc); + + /* Check if the request is finished successfully */ + if (active_flag) { + rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (rc < 0) + return rc; + + if (rc & active_flag) + return 0; - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && - (function == EEH_OPT_THAW_MMIO)) - return 0; + return -EIO; + } return rc; } +static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, + void *userdata) +{ + struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); + struct pci_dev *dev = userdata; + + /* + * The caller should have disabled and saved the + * state for the specified device + */ + if (!pdev || pdev == dev) + return; + + /* Ensure we have D0 power state */ + pci_set_power_state(pdev, PCI_D0); + + /* Save device state */ + pci_save_state(pdev); + + /* + * Disable device to avoid any DMA traffic and + * interrupt from the device + */ + pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); +} + +static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) +{ + struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); + struct pci_dev *dev = userdata; + + if (!pdev) + return; + + /* Apply customization from firmware */ + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); + + /* The caller should restore state for the specified device */ + if (pdev != dev) + pci_restore_state(pdev); +} + /** - * pcibios_set_pcie_slot_reset - Set PCI-E reset state + * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct * @state: reset state to enter * @@ -543,7 +735,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; + struct eeh_pe *pe = eeh_dev_to_pe(edev); if (!pe) { pr_err("%s: No PE found on PCI device %s\n", @@ -554,23 +746,41 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); + eeh_unfreeze_pe(pe); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); + eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); break; case pcie_hot_reset: + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: + eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; - }; + } return 0; } /** - * eeh_set_pe_freset - Check the required reset for the indicated device - * @data: EEH device + * eeh_set_dev_freset - Check the required reset for the indicated device + * @edev: EEH device * @flag: return value * * Each device might have its preferred reset type: fundamental or @@ -578,95 +788,106 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -static void *eeh_set_dev_freset(void *data, void *flag) +static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag) { struct pci_dev *dev; unsigned int *freset = (unsigned int *)flag; - struct eeh_dev *edev = (struct eeh_dev *)data; dev = eeh_dev_to_pci_dev(edev); if (dev) *freset |= dev->needs_freset; +} - return NULL; +static void eeh_pe_refreeze_passed(struct eeh_pe *root) +{ + struct eeh_pe *pe; + int state; + + eeh_for_each_pe(root, pe) { + if (eeh_pe_passed(pe)) { + state = eeh_ops->get_state(pe, NULL); + if (state & + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { + pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", + pe->phb->global_number, pe->addr); + eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); + } + } + } } /** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE + * @include_passed: include passed-through devices? * - * Assert the PCI #RST line for 1/4 second. + * This function executes a full reset procedure on a PE, including setting + * the appropriate flags, performing a fundamental or hot reset, and then + * deactivating the reset status. It is designed to be used within the EEH + * subsystem, as opposed to eeh_pe_reset which is exported to drivers and + * only performs a single operation at a time. + * + * This function will attempt to reset a PE three times before failing. */ -static void eeh_reset_pe_once(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) { + int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + int type = EEH_RESET_HOT; unsigned int freset = 0; + int i, state = 0, ret; - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. + /* + * Determine the type of reset to perform - hot or fundamental. + * Hot reset is the default operation, unless any device under the + * PE requires a fundamental reset. */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); - - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the - * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. - */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); -} - -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, rc; - - /* Take three shots at resetting the bus */ - for (i=0; i<3; i++) { - eeh_reset_pe_once(pe); - - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((rc & flags) == flags) - return 0; - - if (rc < 0) { - pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", - __func__, pe->phb->global_number, pe->addr); - return -1; + type = EEH_RESET_FUNDAMENTAL; + + /* Mark the PE as in reset state and block config space accesses */ + eeh_pe_state_mark(pe, reset_state); + + /* Make three attempts at resetting the bus */ + for (i = 0; i < 3; i++) { + ret = eeh_pe_reset(pe, type, include_passed); + if (!ret) + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, + include_passed); + if (ret) { + ret = -EIO; + pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n", + state, pe->phb->global_number, pe->addr, i + 1); + continue; + } + if (i) + pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n", + pe->phb->global_number, pe->addr, i + 1); + + /* Wait until the PE is in a functioning state */ + state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + if (state < 0) { + pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x", + pe->phb->global_number, pe->addr); + ret = -ENOTRECOVERABLE; + break; } - pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", - i+1, pe->phb->global_number, pe->addr, rc); + if (eeh_state_active(state)) + break; + else + pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n", + pe->phb->global_number, pe->addr, state, i + 1); } - return -1; + /* Resetting the PE may have unfrozen child PEs. If those PEs have been + * (potentially) passed through to a guest, re-freeze them: + */ + if (!include_passed) + eeh_pe_refreeze_passed(pe); + + eeh_pe_state_clear(pe, reset_state, true); + return ret; } /** @@ -681,350 +902,650 @@ int eeh_reset_pe(struct eeh_pe *pe) void eeh_save_bars(struct eeh_dev *edev) { int i; - struct device_node *dn; if (!edev) return; - dn = eeh_dev_to_of_node(edev); for (i = 0; i < 16; i++) - eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); + + /* + * For PCI bridges including root port, we need enable bus + * master explicitly. Otherwise, it can't fetch IODA table + * entries correctly. So we cache the bit in advance so that + * we can restore it after reset, either PHB range or PE range. + */ + if (edev->mode & EEH_DEV_BRIDGE) + edev->config_space[1] |= PCI_COMMAND_MASTER; +} + +static int eeh_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + eeh_clear_flag(EEH_ENABLED); + return NOTIFY_DONE; } +static struct notifier_block eeh_reboot_nb = { + .notifier_call = eeh_reboot_notifier, +}; + +static int eeh_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + /* + * Note: It's not possible to perform EEH device addition (i.e. + * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on + * the device's resources, which have not yet been set up. + */ + case BUS_NOTIFY_DEL_DEVICE: + eeh_remove_device(to_pci_dev(dev)); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block eeh_device_nb = { + .notifier_call = eeh_device_notifier, +}; + /** - * eeh_ops_register - Register platform dependent EEH operations - * @ops: platform dependent EEH operations + * eeh_init - System wide EEH initialization + * @ops: struct to trace EEH operation callback functions * - * Register the platform dependent EEH operation callback - * functions. The platform should call this function before - * any other EEH operations. + * It's the platform's job to call this from an arch_initcall(). */ -int __init eeh_ops_register(struct eeh_ops *ops) +int eeh_init(struct eeh_ops *ops) { - if (!ops->name) { - pr_warning("%s: Invalid EEH ops name for %p\n", - __func__, ops); - return -EINVAL; - } + struct pci_controller *hose, *tmp; + int ret = 0; - if (eeh_ops && eeh_ops != ops) { - pr_warning("%s: EEH ops of platform %s already existing (%s)\n", - __func__, eeh_ops->name, ops->name); + /* the platform should only initialise EEH once */ + if (WARN_ON(eeh_ops)) return -EEXIST; + if (WARN_ON(!ops)) + return -ENOENT; + eeh_ops = ops; + + /* Register reboot notifier */ + ret = register_reboot_notifier(&eeh_reboot_nb); + if (ret) { + pr_warn("%s: Failed to register reboot notifier (%d)\n", + __func__, ret); + return ret; } - eeh_ops = ops; + ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb); + if (ret) { + pr_warn("%s: Failed to register bus notifier (%d)\n", + __func__, ret); + return ret; + } - return 0; + /* Initialize PHB PEs */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + eeh_phb_pe_create(hose); + + eeh_addr_cache_init(); + + /* Initialize EEH event */ + return eeh_event_init(); } /** - * eeh_ops_unregister - Unreigster platform dependent EEH operations - * @name: name of EEH platform operations + * eeh_probe_device() - Perform EEH initialization for the indicated pci device + * @dev: pci device for which to set up EEH * - * Unregister the platform dependent EEH operation callback - * functions. + * This routine must be used to complete EEH initialization for PCI + * devices that were added after system boot (e.g. hotplug, dlpar). */ -int __exit eeh_ops_unregister(const char *name) +void eeh_probe_device(struct pci_dev *dev) { - if (!name || !strlen(name)) { - pr_warning("%s: Invalid EEH ops name\n", - __func__); - return -EINVAL; + struct eeh_dev *edev; + + pr_debug("EEH: Adding device %s\n", pci_name(dev)); + + /* + * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was + * already called for this device. + */ + if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) { + pci_dbg(dev, "Already bound to an eeh_dev!\n"); + return; } - if (eeh_ops && !strcmp(eeh_ops->name, name)) { - eeh_ops = NULL; - return 0; + edev = eeh_ops->probe(dev); + if (!edev) { + pr_debug("EEH: Adding device failed\n"); + return; + } + + /* + * FIXME: We rely on pcibios_release_device() to remove the + * existing EEH state. The release function is only called if + * the pci_dev's refcount drops to zero so if something is + * keeping a ref to a device (e.g. a filesystem) we need to + * remove the old EEH state. + * + * FIXME: HEY MA, LOOK AT ME, NO LOCKING! + */ + if (edev->pdev && edev->pdev != dev) { + eeh_pe_tree_remove(edev); + eeh_addr_cache_rmv_dev(edev->pdev); + eeh_sysfs_remove_device(edev->pdev); + + /* + * We definitely should have the PCI device removed + * though it wasn't correctly. So we needn't call + * into error handler afterwards. + */ + edev->mode |= EEH_DEV_NO_HANDLER; } - return -EEXIST; + /* bind the pdev and the edev together */ + edev->pdev = dev; + dev->dev.archdata.edev = edev; + eeh_addr_cache_insert_dev(dev); + eeh_sysfs_add_device(dev); } /** - * eeh_init - EEH initialization - * - * Initialize EEH by trying to enable it for all of the adapters in the system. - * As a side effect we can determine here if eeh is supported at all. - * Note that we leave EEH on so failed config cycles won't cause a machine - * check. If a user turns off EEH for a particular adapter they are really - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't - * grant access to a slot if EEH isn't enabled, and so we always enable - * EEH for all slots/all devices. + * eeh_remove_device - Undo EEH setup for the indicated pci device + * @dev: pci device to be removed * - * The eeh-force-off option disables EEH checking globally, for all slots. - * Even if force-off is set, the EEH hardware is still enabled, so that - * newer systems can boot. + * This routine should be called when a device is removed from + * a running system (e.g. by hotplug or dlpar). It unregisters + * the PCI device from the EEH subsystem. I/O errors affecting + * this device will no longer be detected after this call; thus, + * i/o errors affecting this slot may leave this device unusable. */ -int eeh_init(void) +void eeh_remove_device(struct pci_dev *dev) { - struct pci_controller *hose, *tmp; - struct device_node *phb; - static int cnt = 0; - int ret = 0; + struct eeh_dev *edev; + + if (!dev || !eeh_enabled()) + return; + edev = pci_dev_to_eeh_dev(dev); + + /* Unregister the device with the EEH/PCI address search system */ + dev_dbg(&dev->dev, "EEH: Removing device\n"); + + if (!edev || !edev->pdev || !edev->pe) { + dev_dbg(&dev->dev, "EEH: Device not referenced!\n"); + return; + } /* - * We have to delay the initialization on PowerNV after - * the PCI hierarchy tree has been built because the PEs - * are figured out based on PCI devices instead of device - * tree nodes + * During the hotplug for EEH error recovery, we need the EEH + * device attached to the parent PE in order for BAR restore + * a bit later. So we keep it for BAR restore and remove it + * from the parent PE during the BAR resotre. */ - if (machine_is(powernv) && cnt++ <= 0) - return ret; + edev->pdev = NULL; - /* call platform initialization function */ - if (!eeh_ops) { - pr_warning("%s: Platform EEH operation not found\n", - __func__); - return -EEXIST; - } else if ((ret = eeh_ops->init())) { - pr_warning("%s: Failed to call platform init function (%d)\n", - __func__, ret); - return ret; - } + /* + * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to + * remove the sysfs files before clearing dev.archdata.edev + */ + if (edev->mode & EEH_DEV_SYSFS) + eeh_sysfs_remove_device(dev); - /* Initialize EEH event */ - ret = eeh_event_init(); - if (ret) - return ret; + /* + * We're removing from the PCI subsystem, that means + * the PCI device driver can't support EEH or not + * well. So we rely on hotplug completely to do recovery + * for the specific PCI device. + */ + edev->mode |= EEH_DEV_NO_HANDLER; - /* Enable EEH for all adapters */ - if (eeh_probe_mode_devtree()) { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb = hose->dn; - traverse_pci_devices(phb, eeh_ops->of_probe, NULL); - } - } else if (eeh_probe_mode_dev()) { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) - pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL); - } else { - pr_warning("%s: Invalid probe mode %d\n", - __func__, eeh_probe_mode); - return -EINVAL; - } + eeh_addr_cache_rmv_dev(dev); /* - * Call platform post-initialization. Actually, It's good chance - * to inform platform that EEH is ready to supply service if the - * I/O cache stuff has been built up. + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. */ - if (eeh_ops->post_init) { - ret = eeh_ops->post_init(); - if (ret) - return ret; + edev->in_error = false; + dev->dev.archdata.edev = NULL; + if (!(edev->pe->state & EEH_PE_KEEP)) + eeh_pe_tree_remove(edev); + else + edev->mode |= EEH_DEV_DISCONNECTED; +} + +int eeh_unfreeze_pe(struct eeh_pe *pe) +{ + int ret; + + ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (ret) { + pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; } - if (eeh_subsystem_enabled) - pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else - pr_warning("EEH: No capable adapters found\n"); + ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (ret) { + pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", + __func__, ret, pe->phb->global_number, pe->addr); + return ret; + } return ret; } +EXPORT_SYMBOL_GPL(eeh_unfreeze_pe); -core_initcall_sync(eeh_init); + +static struct pci_device_id eeh_reset_ids[] = { + { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ + { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ + { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ + { 0 } +}; + +static int eeh_pe_change_owner(struct eeh_pe *pe) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + struct pci_device_id *id; + int ret; + + /* Check PE state */ + ret = eeh_ops->get_state(pe, NULL); + if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) + return 0; + + /* Unfrozen PE, nothing to do */ + if (eeh_state_active(ret)) + return 0; + + /* Frozen PE, check if it needs PE level reset */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { + if (id->vendor != PCI_ANY_ID && + id->vendor != pdev->vendor) + continue; + if (id->device != PCI_ANY_ID && + id->device != pdev->device) + continue; + if (id->subvendor != PCI_ANY_ID && + id->subvendor != pdev->subsystem_vendor) + continue; + if (id->subdevice != PCI_ANY_ID && + id->subdevice != pdev->subsystem_device) + continue; + + return eeh_pe_reset_and_recover(pe); + } + } + + ret = eeh_unfreeze_pe(pe); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); + return ret; +} /** - * eeh_add_device_early - Enable EEH for the indicated device_node - * @dn: device node for which to set up EEH + * eeh_dev_open - Increase count of pass through devices for PE + * @pdev: PCI device * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. + * Increase count of passed through devices for the indicated + * PE. In the result, the EEH errors detected on the PE won't be + * reported. The PE owner will be responsible for detection + * and recovery. */ -static void eeh_add_device_early(struct device_node *dn) +int eeh_dev_open(struct pci_dev *pdev) { - struct pci_controller *phb; + struct eeh_dev *edev; + int ret = -ENODEV; + + guard(mutex)(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + return ret; + + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + return ret; /* - * If we're doing EEH probe based on PCI device, we - * would delay the probe until late stage because - * the PCI device isn't available this moment. + * The PE might have been put into frozen state, but we + * didn't detect that yet. The passed through PCI devices + * in frozen PE won't work properly. Clear the frozen state + * in advance. */ - if (!eeh_probe_mode_devtree()) - return; - - if (!of_node_to_eeh_dev(dn)) - return; - phb = of_node_to_eeh_dev(dn)->phb; + ret = eeh_pe_change_owner(edev->pe); + if (ret) + return ret; - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || 0 == phb->buid) - return; + /* Increase PE's pass through count */ + atomic_inc(&edev->pe->pass_dev_cnt); - eeh_ops->of_probe(dn, NULL); + return 0; } +EXPORT_SYMBOL_GPL(eeh_dev_open); /** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @dn: device node + * eeh_dev_release - Decrease count of pass through devices for PE + * @pdev: PCI device * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). + * Decrease count of pass through devices for the indicated PE. If + * there is no passed through device in PE, the EEH errors detected + * on the PE will be reported and handled as usual. */ -void eeh_add_device_tree_early(struct device_node *dn) +void eeh_dev_release(struct pci_dev *pdev) { - struct device_node *sib; + struct eeh_dev *edev; - for_each_child_of_node(dn, sib) - eeh_add_device_tree_early(sib); - eeh_add_device_early(dn); + guard(mutex)(&eeh_dev_mutex); + + /* No PCI device ? */ + if (!pdev) + return; + + /* No EEH device ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) + return; + + /* Decrease PE's pass through count */ + WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); + eeh_pe_change_owner(edev->pe); } -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); +EXPORT_SYMBOL(eeh_dev_release); + +#ifdef CONFIG_IOMMU_API /** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device - * @dev: pci device for which to set up EEH + * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE + * @group: IOMMU group * - * This routine must be used to complete EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). + * The routine is called to convert IOMMU group to EEH PE. */ -static void eeh_add_device_late(struct pci_dev *dev) +struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) { - struct device_node *dn; + struct pci_dev *pdev = NULL; struct eeh_dev *edev; + int ret; - if (!dev || !eeh_subsystem_enabled) - return; - - pr_debug("EEH: Adding device %s\n", pci_name(dev)); - - dn = pci_device_to_OF_node(dev); - edev = of_node_to_eeh_dev(dn); - if (edev->pdev == dev) { - pr_debug("EEH: Already referenced !\n"); - return; - } - WARN_ON(edev->pdev); + /* No IOMMU group ? */ + if (!group) + return NULL; - pci_dev_get(dev); - edev->pdev = dev; - dev->dev.archdata.edev = edev; + ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); + if (!ret || !pdev) + return NULL; - /* - * We have to do the EEH probe here because the PCI device - * hasn't been created yet in the early stage. - */ - if (eeh_probe_mode_dev()) - eeh_ops->dev_probe(dev, NULL); + /* No EEH device or PE ? */ + edev = pci_dev_to_eeh_dev(pdev); + if (!edev || !edev->pe) + return NULL; - eeh_addr_cache_insert_dev(dev); + return edev->pe; } +EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); + +#endif /* CONFIG_IOMMU_API */ /** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus + * eeh_pe_set_option - Set options for the indicated PE + * @pe: EEH PE + * @option: requested option * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. + * The routine is called to enable or disable EEH functionality + * on the indicated PE, to enable IO or DMA for the frozen PE. */ -void eeh_add_device_tree_late(struct pci_bus *bus) +int eeh_pe_set_option(struct eeh_pe *pe, int option) { - struct pci_dev *dev; + int ret = 0; + + /* Invalid PE ? */ + if (!pe) + return -ENODEV; - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); + /* + * EEH functionality could possibly be disabled, just + * return error for the case. And the EEH functionality + * isn't expected to be disabled on one specific PE. + */ + switch (option) { + case EEH_OPT_ENABLE: + if (eeh_enabled()) { + ret = eeh_pe_change_owner(pe); + break; } + ret = -EIO; + break; + case EEH_OPT_DISABLE: + break; + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + case EEH_OPT_FREEZE_PE: + if (!eeh_ops || !eeh_ops->set_option) { + ret = -ENOENT; + break; + } + + ret = eeh_pci_enable(pe, option); + break; + default: + pr_debug("%s: Option %d out of range (%d, %d)\n", + __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); + ret = -EINVAL; } + + return ret; } -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); +EXPORT_SYMBOL_GPL(eeh_pe_set_option); /** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus + * eeh_pe_get_state - Retrieve PE's state + * @pe: EEH PE * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. + * Retrieve the PE's state, which includes 3 aspects: enabled + * DMA, enabled IO and asserted reset. */ -void eeh_add_sysfs_files(struct pci_bus *bus) +int eeh_pe_get_state(struct eeh_pe *pe) { - struct pci_dev *dev; + int result, ret = 0; + bool rst_active, dma_en, mmio_en; + + /* Existing PE ? */ + if (!pe) + return -ENODEV; + + if (!eeh_ops || !eeh_ops->get_state) + return -ENOENT; + + /* + * If the parent PE is owned by the host kernel and is undergoing + * error recovery, we should return the PE state as temporarily + * unavailable so that the error recovery on the guest is suspended + * until the recovery completes on the host. + */ + if (pe->parent && + !(pe->state & EEH_PE_REMOVED) && + (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return EEH_PE_STATE_UNAVAIL; + + result = eeh_ops->get_state(pe, NULL); + rst_active = !!(result & EEH_STATE_RESET_ACTIVE); + dma_en = !!(result & EEH_STATE_DMA_ENABLED); + mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); + + if (rst_active) + ret = EEH_PE_STATE_RESET; + else if (dma_en && mmio_en) + ret = EEH_PE_STATE_NORMAL; + else if (!dma_en && !mmio_en) + ret = EEH_PE_STATE_STOPPED_IO_DMA; + else if (!dma_en && mmio_en) + ret = EEH_PE_STATE_STOPPED_DMA; + else + ret = EEH_PE_STATE_UNAVAIL; + + return ret; +} +EXPORT_SYMBOL_GPL(eeh_pe_get_state); - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); +static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) +{ + struct eeh_dev *edev, *tmp; + struct pci_dev *pdev; + int ret = 0; + + eeh_pe_restore_bars(pe); + + /* + * Reenable PCI devices as the devices passed + * through are always enabled before the reset. + */ + eeh_pe_for_each_dev(pe, edev, tmp) { + pdev = eeh_dev_to_pci_dev(edev); + if (!pdev) + continue; + + ret = pci_reenable_device(pdev); + if (ret) { + pr_warn("%s: Failure %d reenabling %s\n", + __func__, ret, pci_name(pdev)); + return ret; } } + + /* The PE is still in frozen state */ + if (include_passed || !eeh_pe_passed(pe)) { + ret = eeh_unfreeze_pe(pe); + } else + pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", + pe->phb->global_number, pe->addr); + if (!ret) + eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); + return ret; } -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); + /** - * eeh_remove_device - Undo EEH setup for the indicated pci device - * @dev: pci device to be removed - * @purge_pe: remove the PE or not + * eeh_pe_reset - Issue PE reset according to specified type + * @pe: EEH PE + * @option: reset type + * @include_passed: include passed-through devices? * - * This routine should be called when a device is removed from - * a running system (e.g. by hotplug or dlpar). It unregisters - * the PCI device from the EEH subsystem. I/O errors affecting - * this device will no longer be detected after this call; thus, - * i/o errors affecting this slot may leave this device unusable. + * The routine is called to reset the specified PE with the + * indicated type, either fundamental reset or hot reset. + * PE reset is the most important part for error recovery. */ -static void eeh_remove_device(struct pci_dev *dev, int purge_pe) +int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) { - struct eeh_dev *edev; + int ret = 0; - if (!dev || !eeh_subsystem_enabled) - return; - edev = pci_dev_to_eeh_dev(dev); + /* Invalid PE ? */ + if (!pe) + return -ENODEV; - /* Unregister the device with the EEH/PCI address search system */ - pr_debug("EEH: Removing device %s\n", pci_name(dev)); + if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) + return -ENOENT; - if (!edev || !edev->pdev) { - pr_debug("EEH: Not referenced !\n"); - return; + switch (option) { + case EEH_RESET_DEACTIVATE: + ret = eeh_ops->reset(pe, option); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); + if (ret) + break; + + ret = eeh_pe_reenable_devices(pe, include_passed); + break; + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Proactively freeze the PE to drop all MMIO access + * during reset, which should be banned as it's always + * cause recursive EEH error. + */ + eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); + + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + ret = eeh_ops->reset(pe, option); + break; + default: + pr_debug("%s: Unsupported option %d\n", + __func__, option); + ret = -EINVAL; } - edev->pdev = NULL; - dev->dev.archdata.edev = NULL; - pci_dev_put(dev); - eeh_rmv_from_parent_pe(edev, purge_pe); - eeh_addr_cache_rmv_dev(dev); - eeh_sysfs_remove_device(dev); + return ret; } +EXPORT_SYMBOL_GPL(eeh_pe_reset); /** - * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device - * @dev: PCI device - * @purge_pe: remove the corresponding PE or not + * eeh_pe_configure - Configure PCI bridges after PE reset + * @pe: EEH PE * - * This routine must be called when a device is removed from the - * running system through hotplug or dlpar. The corresponding - * PCI address cache will be removed. + * The routine is called to restore the PCI config space for + * those PCI devices, especially PCI bridges affected by PE + * reset issued previously. */ -void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) +int eeh_pe_configure(struct eeh_pe *pe) { - struct pci_bus *bus = dev->subordinate; - struct pci_dev *child, *tmp; + int ret = 0; - eeh_remove_device(dev, purge_pe); + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); - if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child, purge_pe); - } + return ret; } -EXPORT_SYMBOL_GPL(eeh_remove_bus_device); +EXPORT_SYMBOL_GPL(eeh_pe_configure); + +/** + * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: error function + * @addr: address + * @mask: address mask + * + * The routine is called to inject the specified PCI error, which + * is determined by @type and @func, to the indicated PE for + * testing purpose. + */ +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + /* Unsupported operation ? */ + if (!eeh_ops || !eeh_ops->err_inject) + return -ENOENT; + + /* Check on PCI error function */ + if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) + return -EINVAL; + + return eeh_ops->err_inject(pe, type, func, addr, mask); +} +EXPORT_SYMBOL_GPL(eeh_pe_inject_err); + +#ifdef CONFIG_PROC_FS static int proc_eeh_show(struct seq_file *m, void *v) { - if (0 == eeh_subsystem_enabled) { + if (!eeh_enabled()) { seq_printf(m, "EEH Subsystem is globally disabled\n"); seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); } else { @@ -1048,23 +1569,360 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } +#endif /* CONFIG_PROC_FS */ + +static int eeh_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asynchronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + + return 0; +} + +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_break_device(pdev); +} -static int proc_eeh_open(struct inode *inode, struct file *file) +#ifdef CONFIG_DEBUG_FS + + +static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) { - return single_open(file, proc_eeh_show, NULL); + uint32_t domain, bus, dev, fn; + struct pci_dev *pdev; + char buf[20]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count); + if (!ret) + return ERR_PTR(-EFAULT); + + ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn); + if (ret != 4) { + pr_err("%s: expected 4 args, got %d\n", __func__, ret); + return ERR_PTR(-EINVAL); + } + + pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn); + if (!pdev) + return ERR_PTR(-ENODEV); + + return pdev; } -static const struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, +static int eeh_enable_dbgfs_set(void *data, u64 val) +{ + if (val) + eeh_clear_flag(EEH_FORCE_DISABLED); + else + eeh_add_flag(EEH_FORCE_DISABLED); + + return 0; +} + +static int eeh_enable_dbgfs_get(void *data, u64 *val) +{ + if (eeh_enabled()) + *val = 0x1ul; + else + *val = 0x0ul; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, + eeh_enable_dbgfs_set, "0x%llx\n"); + +static ssize_t eeh_force_recover_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose; + uint32_t phbid, pe_no; + struct eeh_pe *pe; + char buf[20]; + int ret; + + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* + * When PE is NULL the event is a "special" event. Rather than + * recovering a specific PE it forces the EEH core to scan for failed + * PHBs and recovers each. This needs to be done before any device + * recoveries can occur. + */ + if (!strncmp(buf, "hwcheck", 7)) { + __eeh_send_failure_event(NULL); + return count; + } + + ret = sscanf(buf, "%x:%x", &phbid, &pe_no); + if (ret != 2) + return -EINVAL; + + hose = pci_find_controller_for_domain(phbid); + if (!hose) + return -ENODEV; + + /* Retrieve PE */ + pe = eeh_pe_get(hose, pe_no); + if (!pe) + return -ENODEV; + + /* + * We don't do any state checking here since the detection + * process is async to the recovery process. The recovery + * thread *should* not break even if we schedule a recovery + * from an odd state (e.g. PE removed, or recovery of a + * non-isolated PE) + */ + __eeh_send_failure_event(pe); + + return ret < 0 ? ret : count; +} + +static const struct file_operations eeh_force_recover_fops = { + .open = simple_open, + .write = eeh_force_recover_write, }; +static ssize_t eeh_debugfs_dev_usage(struct file *filp, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n"; + + return simple_read_from_buffer(user_buf, count, ppos, + usage, sizeof(usage) - 1); +} + +static ssize_t eeh_dev_check_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_dev *pdev; + struct eeh_dev *edev; + int ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + edev = pci_dev_to_eeh_dev(pdev); + if (!edev) { + pci_err(pdev, "No eeh_dev for this device!\n"); + pci_dev_put(pdev); + return -ENODEV; + } + + ret = eeh_dev_check_failure(edev); + pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n", + pci_name(pdev), ret); + + pci_dev_put(pdev); + + return count; +} + +static const struct file_operations eeh_dev_check_fops = { + .open = simple_open, + .write = eeh_dev_check_write, + .read = eeh_debugfs_dev_usage, +}; + +static ssize_t eeh_dev_break_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_dev *pdev; + int ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + ret = eeh_break_device(pdev); + pci_dev_put(pdev); + + if (ret < 0) + return ret; + + return count; +} + +static const struct file_operations eeh_dev_break_fops = { + .open = simple_open, + .write = eeh_dev_break_write, + .read = eeh_debugfs_dev_usage, +}; + +static ssize_t eeh_dev_can_recover(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_driver *drv; + struct pci_dev *pdev; + size_t ret; + + pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + /* + * In order for error recovery to work the driver needs to implement + * .error_detected(), so it can quiesce IO to the device, and + * .slot_reset() so it can re-initialise the device after a reset. + * + * Ideally they'd implement .resume() too, but some drivers which + * we need to support (notably IPR) don't so I guess we can tolerate + * that. + * + * .mmio_enabled() is mostly there as a work-around for devices which + * take forever to re-init after a hot reset. Implementing that is + * strictly optional. + */ + drv = pci_dev_driver(pdev); + if (drv && + drv->err_handler && + drv->err_handler->error_detected && + drv->err_handler->slot_reset) { + ret = count; + } else { + ret = -EOPNOTSUPP; + } + + pci_dev_put(pdev); + + return ret; +} + +static const struct file_operations eeh_dev_can_recover_fops = { + .open = simple_open, + .write = eeh_dev_can_recover, + .read = eeh_debugfs_dev_usage, +}; + +#endif + static int __init eeh_init_proc(void) { - if (machine_is(pseries)) - proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); + if (machine_is(pseries) || machine_is(powernv)) { + proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); +#ifdef CONFIG_DEBUG_FS + debugfs_create_file_unsafe("eeh_enable", 0600, + arch_debugfs_dir, NULL, + &eeh_enable_dbgfs_ops); + debugfs_create_u32("eeh_max_freezes", 0600, + arch_debugfs_dir, &eeh_max_freezes); + debugfs_create_bool("eeh_disable_recovery", 0600, + arch_debugfs_dir, + &eeh_debugfs_no_recover); + debugfs_create_file_unsafe("eeh_dev_check", 0600, + arch_debugfs_dir, NULL, + &eeh_dev_check_fops); + debugfs_create_file_unsafe("eeh_dev_break", 0600, + arch_debugfs_dir, NULL, + &eeh_dev_break_fops); + debugfs_create_file_unsafe("eeh_force_recover", 0600, + arch_debugfs_dir, NULL, + &eeh_force_recover_fops); + debugfs_create_file_unsafe("eeh_dev_can_recover", 0600, + arch_debugfs_dir, NULL, + &eeh_dev_can_recover_fops); + eeh_cache_debugfs_init(); +#endif + } + return 0; } __initcall(eeh_init_proc); |
