summaryrefslogtreecommitdiff
path: root/arch/s390/pci
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/pci')
-rw-r--r--arch/s390/pci/Makefile6
-rw-r--r--arch/s390/pci/pci.c697
-rw-r--r--arch/s390/pci/pci_bus.c374
-rw-r--r--arch/s390/pci/pci_bus.h42
-rw-r--r--arch/s390/pci/pci_clp.c304
-rw-r--r--arch/s390/pci/pci_debug.c24
-rw-r--r--arch/s390/pci/pci_dma.c684
-rw-r--r--arch/s390/pci/pci_event.c355
-rw-r--r--arch/s390/pci/pci_insn.c177
-rw-r--r--arch/s390/pci/pci_iov.c99
-rw-r--r--arch/s390/pci/pci_iov.h30
-rw-r--r--arch/s390/pci/pci_irq.c150
-rw-r--r--arch/s390/pci/pci_kvm_hook.c11
-rw-r--r--arch/s390/pci/pci_mmio.c292
-rw-r--r--arch/s390/pci/pci_sysfs.c137
15 files changed, 2065 insertions, 1317 deletions
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 748626a33028..0547a10406e7 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,5 +3,7 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
- pci_event.o pci_debug.o pci_insn.o pci_mmio.o
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
+ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
+ pci_bus.o pci_kvm_hook.o
+obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8e872951c07b..26afde0d1ed3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -28,6 +28,7 @@
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/lockdep.h>
#include <asm/isc.h>
#include <asm/airq.h>
@@ -36,17 +37,22 @@
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
+#include "pci_bus.h"
+#include "pci_iov.h"
+
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
#define ZPCI_IOMAP_ENTRIES \
min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
+unsigned int s390_pci_no_rid;
+
static DEFINE_SPINLOCK(zpci_iomap_lock);
static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
@@ -56,6 +62,12 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
static struct kmem_cache *zdev_fmb_cache;
+/* AEN structures that must be preserved over KVM module re-insertion */
+union zpci_sic_iib *zpci_aipb;
+EXPORT_SYMBOL_GPL(zpci_aipb);
+struct airq_iv *zpci_aif_sbv;
+EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -64,6 +76,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
list_for_each_entry(tmp, &zpci_list, entry) {
if (tmp->fid == fid) {
zdev = tmp;
+ zpci_zdev_get(zdev);
break;
}
}
@@ -87,17 +100,12 @@ void zpci_remove_reserved_devices(void)
spin_unlock(&zpci_list_lock);
list_for_each_entry_safe(zdev, tmp, &remove, entry)
- zpci_remove_device(zdev);
-}
-
-static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
-{
- return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+ zpci_device_reserved(zdev);
}
int pci_domain_nr(struct pci_bus *bus)
{
- return ((struct zpci_dev *) bus->sysdata)->domain;
+ return ((struct zpci_bus *) bus->sysdata)->domain_nr;
}
EXPORT_SYMBOL_GPL(pci_domain_nr);
@@ -109,18 +117,27 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
- u64 base, u64 limit, u64 iota)
+ u64 base, u64 limit, u64 iota, u8 *status)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
struct zpci_fib fib = {0};
- u8 status;
+ u8 cc;
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
- fib.pal = limit;
+ /* Work around off by one in ISM virt device */
+ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+ fib.pal = limit + (1 << 12);
+ else
+ fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
- return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+ fib.gd = zdev->gisa;
+ cc = zpci_mod_fc(req, &fib, status);
+ if (cc)
+ zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
+ return cc;
}
+EXPORT_SYMBOL_GPL(zpci_register_ioat);
/* Modify PCI: Unregister I/O address translation parameters */
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
@@ -129,16 +146,19 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
- if (cc == 3) /* Function already gone. */
- cc = 0;
- return cc ? -EIO : 0;
+ if (cc)
+ zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+ return cc;
}
/* Modify PCI: Set PCI function measurement parameters */
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
u8 cc, status;
@@ -151,11 +171,18 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
- atomic64_set(&zdev->allocated_pages, 0);
- atomic64_set(&zdev->mapped_pages, 0);
- atomic64_set(&zdev->unmapped_pages, 0);
+ ctrs = zpci_get_iommu_ctrs(zdev);
+ if (ctrs) {
+ atomic64_set(&ctrs->mapped_pages, 0);
+ atomic64_set(&ctrs->unmapped_pages, 0);
+ atomic64_set(&ctrs->global_rpcits, 0);
+ atomic64_set(&ctrs->sync_map_rpcits, 0);
+ atomic64_set(&ctrs->sync_rpcits, 0);
+ }
+
fib.fmb_addr = virt_to_phys(zdev->fmb);
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc) {
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
@@ -174,6 +201,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
if (!zdev->fmb)
return -EINVAL;
+ fib.gd = zdev->gisa;
+
/* Function measurement is disabled if fmb address is zero */
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3) /* Function already gone. */
@@ -224,41 +253,28 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
/* combine single writes by using store-block insn */
void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- zpci_memcpy_toio(to, from, count);
+ zpci_memcpy_toio(to, from, count * 8);
}
-void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ unsigned long prot)
{
- struct vm_struct *area;
- unsigned long offset;
-
- if (!size)
- return NULL;
-
+ /*
+ * When PCI MIO instructions are unavailable the "physical" address
+ * encodes a hint for accessing the PCI memory space it represents.
+ * Just pass it unchanged such that ioread/iowrite can decode it.
+ */
if (!static_branch_unlikely(&have_mio))
- return (void __iomem *) ioaddr;
+ return (void __iomem *)phys_addr;
- offset = ioaddr & ~PAGE_MASK;
- ioaddr &= PAGE_MASK;
- size = PAGE_ALIGN(size + offset);
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
-
- if (ioremap_page_range((unsigned long) area->addr,
- (unsigned long) area->addr + size,
- ioaddr, PAGE_KERNEL)) {
- vunmap(area->addr);
- return NULL;
- }
- return (void __iomem *) ((unsigned long) area->addr + offset);
+ return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
}
-EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(ioremap_prot);
void iounmap(volatile void __iomem *addr)
{
if (static_branch_likely(&have_mio))
- vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
@@ -372,29 +388,17 @@ EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
-
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_load(zdev, where, val, size);
+ struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
- return ret;
+ return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
+ struct zpci_dev *zdev = zdev_from_bus(bus, devfn);
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_store(zdev, where, val, size);
-
- return ret;
+ return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
}
static struct pci_ops pci_root_ops = {
@@ -402,15 +406,6 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-#ifdef CONFIG_PCI_IOV
-static struct resource iov_res = {
- .name = "PCI IOV res",
- .start = 0,
- .end = -1,
- .flags = IORESOURCE_MEM,
-};
-#endif
-
static void zpci_map_resources(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
@@ -424,23 +419,14 @@ static void zpci_map_resources(struct pci_dev *pdev)
if (zpci_use_mio(zdev))
pdev->resource[i].start =
- (resource_size_t __force) zdev->bars[i].mio_wb;
+ (resource_size_t __force) zdev->bars[i].mio_wt;
else
pdev->resource[i].start = (resource_size_t __force)
pci_iomap_range_fh(pdev, i, 0, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
-#ifdef CONFIG_PCI_IOV
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- int bar = i + PCI_IOV_RESOURCES;
-
- len = pci_resource_len(pdev, bar);
- if (!len)
- continue;
- pdev->resource[bar].parent = &iov_res;
- }
-#endif
+ zpci_iov_map_resources(pdev);
}
static void zpci_unmap_resources(struct pci_dev *pdev)
@@ -484,6 +470,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
spin_unlock(&zpci_iomap_lock);
}
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+ int bar, idx;
+
+ spin_lock(&zpci_iomap_lock);
+ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+ if (!zdev->bars[bar].size)
+ continue;
+ idx = zdev->bars[bar].map_idx;
+ if (!zpci_iomap_start[idx].count)
+ continue;
+ WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+ }
+ spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+ if (!fh || zdev->fh == fh)
+ return;
+
+ zdev->fh = fh;
+ if (zpci_use_mio(zdev))
+ return;
+ if (zdev->has_resources && zdev_enabled(zdev))
+ zpci_do_update_iomap_fh(zdev, fh);
+}
+
static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
unsigned long size, unsigned long flags)
{
@@ -505,15 +519,14 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
return r;
}
-static int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
{
unsigned long addr, size, flags;
struct resource *res;
int i, entry;
snprintf(zdev->res_name, sizeof(zdev->res_name),
- "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+ "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (!zdev->bars[i].size)
@@ -531,7 +544,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
flags |= IORESOURCE_MEM_64;
if (zpci_use_mio(zdev))
- addr = (unsigned long) zdev->bars[i].mio_wb;
+ addr = (unsigned long) zdev->bars[i].mio_wt;
else
addr = ZPCI_ADDR(entry);
size = 1UL << zdev->bars[i].size;
@@ -542,36 +555,45 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
return -ENOMEM;
}
zdev->bars[i].res = res;
- pci_add_resource(resources, res);
}
+ zdev->has_resources = 1;
return 0;
}
static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
{
+ struct resource *res;
int i;
+ pci_lock_rescan_remove();
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (!zdev->bars[i].size || !zdev->bars[i].res)
+ res = zdev->bars[i].res;
+ if (!res)
continue;
+ release_resource(res);
+ pci_bus_remove_resource(zdev->zbus->bus, res);
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
- release_resource(zdev->bars[i].res);
- kfree(zdev->bars[i].res);
+ zdev->bars[i].res = NULL;
+ kfree(res);
}
+ zdev->has_resources = 0;
+ pci_unlock_rescan_remove();
}
-int pcibios_add_device(struct pci_dev *pdev)
+int pcibios_device_add(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
+ /* The pdev has a reference to the zdev via its bus */
+ zpci_zdev_get(zdev);
if (pdev->is_physfn)
pdev->no_vf_scan = 1;
pdev->dev.groups = zpci_attr_groups;
- pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -586,7 +608,10 @@ int pcibios_add_device(struct pci_dev *pdev)
void pcibios_release_device(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
zpci_unmap_resources(pdev);
+ zpci_zdev_put(zdev);
}
int pcibios_enable_device(struct pci_dev *pdev, int mask)
@@ -607,210 +632,330 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_debug_exit_device(zdev);
}
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static int zpci_restore(struct device *dev)
+static int __zpci_register_domain(int domain)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
- int ret = 0;
-
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- goto out;
-
- ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
- if (ret)
- goto out;
-
- zpci_map_resources(pdev);
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64) zdev->dma_table);
-
-out:
- return ret;
+ spin_lock(&zpci_domain_lock);
+ if (test_bit(domain, zpci_domain)) {
+ spin_unlock(&zpci_domain_lock);
+ pr_err("Domain %04x is already assigned\n", domain);
+ return -EEXIST;
+ }
+ set_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return domain;
}
-static int zpci_freeze(struct device *dev)
+static int __zpci_alloc_domain(void)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
+ int domain;
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- return 0;
-
- zpci_unregister_ioat(zdev, 0);
- zpci_unmap_resources(pdev);
- return clp_disable_fh(zdev);
+ spin_lock(&zpci_domain_lock);
+ /*
+ * We can always auto allocate domains below ZPCI_NR_DEVICES.
+ * There is either a free domain or we have reached the maximum in
+ * which case we would have bailed earlier.
+ */
+ domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+ set_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return domain;
}
-struct dev_pm_ops pcibios_pm_ops = {
- .thaw_noirq = zpci_restore,
- .freeze_noirq = zpci_freeze,
- .restore_noirq = zpci_restore,
- .poweroff_noirq = zpci_freeze,
-};
-#endif /* CONFIG_HIBERNATE_CALLBACKS */
-
-static int zpci_alloc_domain(struct zpci_dev *zdev)
+int zpci_alloc_domain(int domain)
{
if (zpci_unique_uid) {
- zdev->domain = (u16) zdev->uid;
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return 0;
-
- spin_lock(&zpci_domain_lock);
- if (test_bit(zdev->domain, zpci_domain)) {
- spin_unlock(&zpci_domain_lock);
- pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n",
- zdev->fid, zdev->domain);
- return -EEXIST;
- }
- set_bit(zdev->domain, zpci_domain);
- spin_unlock(&zpci_domain_lock);
- return 0;
- }
-
- spin_lock(&zpci_domain_lock);
- zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
- if (zdev->domain == ZPCI_NR_DEVICES) {
- spin_unlock(&zpci_domain_lock);
- pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n",
- zdev->fid, ZPCI_NR_DEVICES);
- return -ENOSPC;
+ if (domain)
+ return __zpci_register_domain(domain);
+ pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
+ update_uid_checking(false);
}
- set_bit(zdev->domain, zpci_domain);
- spin_unlock(&zpci_domain_lock);
- return 0;
+ return __zpci_alloc_domain();
}
-static void zpci_free_domain(struct zpci_dev *zdev)
+void zpci_free_domain(int domain)
{
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return;
-
spin_lock(&zpci_domain_lock);
- clear_bit(zdev->domain, zpci_domain);
+ clear_bit(domain, zpci_domain);
spin_unlock(&zpci_domain_lock);
}
-void pcibios_remove_bus(struct pci_bus *bus)
+
+int zpci_enable_device(struct zpci_dev *zdev)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
+ u32 fh = zdev->fh;
+ int rc = 0;
- zpci_exit_slot(zdev);
- zpci_cleanup_bus_resources(zdev);
- zpci_destroy_iommu(zdev);
- zpci_free_domain(zdev);
+ if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
+ rc = -EIO;
+ else
+ zpci_update_fh(zdev, fh);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+ u32 fh = zdev->fh;
+ int cc, rc = 0;
- zpci_dbg(3, "rem fid:%x\n", zdev->fid);
- kfree(zdev);
+ cc = clp_disable_fh(zdev, &fh);
+ if (!cc) {
+ zpci_update_fh(zdev, fh);
+ } else if (cc == CLP_RC_SETPCIFN_ALRDY) {
+ pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
+ zdev->fid);
+ /* Function is already disabled - update handle */
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (!rc) {
+ zpci_update_fh(zdev, fh);
+ rc = -EINVAL;
+ }
+ } else {
+ rc = -EIO;
+ }
+ return rc;
}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
-static int zpci_scan_bus(struct zpci_dev *zdev)
+/**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g. pci_enable_device_flags(). The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking - asserted by lockdep.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- LIST_HEAD(resources);
- int ret;
+ u8 status;
+ int rc;
- ret = zpci_setup_bus_resources(zdev, &resources);
- if (ret)
- goto error;
+ lockdep_assert_held(&zdev->state_lock);
+ zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+ if (zdev_enabled(zdev)) {
+ /* Disables device access, DMAs and IRQs (reset state) */
+ rc = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error state the
+ * FH may indicate an enabled device but disable says the
+ * device is already disabled don't treat it as an error here.
+ */
+ if (rc == -EINVAL)
+ rc = 0;
+ if (rc)
+ return rc;
+ }
- zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
- zdev, &resources);
- if (!zdev->bus) {
- ret = -EIO;
- goto error;
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ if (zdev->dma_table)
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (rc) {
+ zpci_disable_device(zdev);
+ return rc;
}
- zdev->bus->max_bus_speed = zdev->max_bus_speed;
- pci_bus_add_devices(zdev->bus);
- return 0;
-error:
- zpci_cleanup_bus_resources(zdev);
- pci_free_resource_list(&resources);
- return ret;
+ return 0;
}
-int zpci_enable_device(struct zpci_dev *zdev)
+/**
+ * zpci_create_device() - Create a new zpci_dev and add it to the zbus
+ * @fid: Function ID of the device to be created
+ * @fh: Current Function Handle of the device to be created
+ * @state: Initial state after creation either Standby or Configured
+ *
+ * Creates a new zpci device and adds it to its, possibly newly created, zbus
+ * as well as zpci_list.
+ *
+ * Returns: the zdev on success or an error pointer otherwise
+ */
+struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
{
+ struct zpci_dev *zdev;
int rc;
- rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+ zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ return ERR_PTR(-ENOMEM);
+
+ /* FID and Function Handle are the static/dynamic identifiers */
+ zdev->fid = fid;
+ zdev->fh = fh;
+
+ /* Query function properties and update zdev */
+ rc = clp_query_pci_fn(zdev);
if (rc)
- goto out;
+ goto error;
+ zdev->state = state;
+
+ kref_init(&zdev->kref);
+ mutex_init(&zdev->state_lock);
+ mutex_init(&zdev->fmb_lock);
+ mutex_init(&zdev->kzdev_lock);
- rc = zpci_dma_init_device(zdev);
+ rc = zpci_init_iommu(zdev);
if (rc)
- goto out_dma;
+ goto error;
- zdev->state = ZPCI_FN_STATE_ONLINE;
- return 0;
+ rc = zpci_bus_device_register(zdev, &pci_root_ops);
+ if (rc)
+ goto error_destroy_iommu;
-out_dma:
- clp_disable_fh(zdev);
-out:
- return rc;
+ spin_lock(&zpci_list_lock);
+ list_add_tail(&zdev->entry, &zpci_list);
+ spin_unlock(&zpci_list_lock);
+
+ return zdev;
+
+error_destroy_iommu:
+ zpci_destroy_iommu(zdev);
+error:
+ zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
+ kfree(zdev);
+ return ERR_PTR(rc);
}
-EXPORT_SYMBOL_GPL(zpci_enable_device);
-int zpci_disable_device(struct zpci_dev *zdev)
+bool zpci_is_device_configured(struct zpci_dev *zdev)
{
- zpci_dma_exit_device(zdev);
- return clp_disable_fh(zdev);
+ enum zpci_state state = zdev->state;
+
+ return state != ZPCI_FN_STATE_RESERVED &&
+ state != ZPCI_FN_STATE_STANDBY;
+}
+
+/**
+ * zpci_scan_configured_device() - Scan a freshly configured zpci_dev
+ * @zdev: The zpci_dev to be configured
+ * @fh: The general function handle supplied by the platform
+ *
+ * Given a device in the configuration state Configured, enables, scans and
+ * adds it to the common code PCI subsystem if possible. If any failure occurs,
+ * the zpci_dev is left disabled.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
+{
+ zpci_update_fh(zdev, fh);
+ return zpci_bus_scan_device(zdev);
}
-EXPORT_SYMBOL_GPL(zpci_disable_device);
-int zpci_create_device(struct zpci_dev *zdev)
+/**
+ * zpci_deconfigure_device() - Deconfigure a zpci_dev
+ * @zdev: The zpci_dev to configure
+ *
+ * Deconfigure a zPCI function that is currently configured and possibly known
+ * to the common code PCI subsystem.
+ * If any failure occurs the device is left as is.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_deconfigure_device(struct zpci_dev *zdev)
{
int rc;
- rc = zpci_alloc_domain(zdev);
- if (rc)
- goto out;
+ lockdep_assert_held(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ return 0;
- rc = zpci_init_iommu(zdev);
- if (rc)
- goto out_free;
+ if (zdev->zbus->bus)
+ zpci_bus_remove_device(zdev, false);
- mutex_init(&zdev->lock);
- if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
- rc = zpci_enable_device(zdev);
+ if (zdev_enabled(zdev)) {
+ rc = zpci_disable_device(zdev);
if (rc)
- goto out_destroy_iommu;
+ return rc;
}
- rc = zpci_scan_bus(zdev);
+
+ rc = sclp_pci_deconfigure(zdev->fid);
+ zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, rc);
if (rc)
- goto out_disable;
+ return rc;
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ return 0;
+}
+
+/**
+ * zpci_device_reserved() - Mark device as reserved
+ * @zdev: the zpci_dev that was reserved
+ *
+ * Handle the case that a given zPCI function was reserved by another system.
+ * After a call to this function the zpci_dev can not be found via
+ * get_zdev_by_fid() anymore but may still be accessible via existing
+ * references though it will not be functional anymore.
+ */
+void zpci_device_reserved(struct zpci_dev *zdev)
+{
+ /*
+ * Remove device from zpci_list as it is going away. This also
+ * makes sure we ignore subsequent zPCI events for this device.
+ */
spin_lock(&zpci_list_lock);
- list_add_tail(&zdev->entry, &zpci_list);
+ list_del(&zdev->entry);
spin_unlock(&zpci_list_lock);
+ zdev->state = ZPCI_FN_STATE_RESERVED;
+ zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ zpci_zdev_put(zdev);
+}
- zpci_init_slot(zdev);
+void zpci_release_device(struct kref *kref)
+{
+ struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+ int ret;
- return 0;
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
-out_disable:
- if (zdev->state == ZPCI_FN_STATE_ONLINE)
- zpci_disable_device(zdev);
-out_destroy_iommu:
- zpci_destroy_iommu(zdev);
-out_free:
- zpci_free_domain(zdev);
-out:
- return rc;
-}
+ if (zdev->zbus->bus)
+ zpci_bus_remove_device(zdev, false);
-void zpci_remove_device(struct zpci_dev *zdev)
-{
- if (!zdev->bus)
- return;
+ if (zdev_enabled(zdev))
+ zpci_disable_device(zdev);
- pci_stop_root_bus(zdev->bus);
- pci_remove_root_bus(zdev->bus);
+ switch (zdev->state) {
+ case ZPCI_FN_STATE_CONFIGURED:
+ ret = sclp_pci_deconfigure(zdev->fid);
+ zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+ fallthrough;
+ case ZPCI_FN_STATE_STANDBY:
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+ zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ fallthrough;
+ case ZPCI_FN_STATE_RESERVED:
+ if (zdev->has_resources)
+ zpci_cleanup_bus_resources(zdev);
+ zpci_bus_device_unregister(zdev);
+ zpci_destroy_iommu(zdev);
+ fallthrough;
+ default:
+ break;
+ }
+ zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+ kfree_rcu(zdev, rcu);
}
int zpci_report_error(struct pci_dev *pdev,
@@ -822,6 +967,59 @@ int zpci_report_error(struct pci_dev *pdev,
}
EXPORT_SYMBOL(zpci_report_error);
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int cc;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * Re-enables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+ struct zpci_fib fib = {0};
+ u8 status;
+ int cc;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc) {
+ zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
static int zpci_mem_init(void)
{
BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
@@ -842,6 +1040,9 @@ static int zpci_mem_init(void)
if (!zpci_iomap_bitmap)
goto error_iomap_bitmap;
+ if (static_branch_likely(&have_mio))
+ clp_setup_writeback_mio();
+
return 0;
error_iomap_bitmap:
kfree(zpci_iomap_start);
@@ -859,7 +1060,6 @@ static void zpci_mem_exit(void)
}
static unsigned int s390_pci_probe __initdata = 1;
-static unsigned int s390_pci_no_mio __initdata;
unsigned int s390_pci_force_floating __initdata;
static unsigned int s390_pci_initialized;
@@ -870,13 +1070,17 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- s390_pci_no_mio = 1;
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
return NULL;
}
if (!strcmp(str, "force_floating")) {
s390_pci_force_floating = 1;
return NULL;
}
+ if (!strcmp(str, "norid")) {
+ s390_pci_no_rid = 1;
+ return NULL;
+ }
return str;
}
@@ -892,12 +1096,14 @@ static int __init pci_base_init(void)
if (!s390_pci_probe)
return 0;
- if (!test_facility(69) || !test_facility(71))
+ if (!test_facility(69) || !test_facility(71)) {
+ pr_info("PCI is not supported because CPU facilities 69 or 71 are not available\n");
return 0;
+ }
- if (test_facility(153) && !s390_pci_no_mio) {
+ if (MACHINE_HAS_PCI_MIO) {
static_branch_enable(&have_mio);
- ctl_set_bit(2, 5);
+ system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
rc = zpci_debug_init();
@@ -912,20 +1118,15 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = zpci_dma_init();
- if (rc)
- goto out_dma;
-
rc = clp_scan_pci_devices();
if (rc)
goto out_find;
+ zpci_bus_scan_busses();
s390_pci_initialized = 1;
return 0;
out_find:
- zpci_dma_exit();
-out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
@@ -935,9 +1136,3 @@ out:
return rc;
}
subsys_initcall_sync(pci_base_init);
-
-void zpci_rescan(void)
-{
- if (zpci_is_enabled())
- clp_rescan_pci_devices_simple();
-}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
new file mode 100644
index 000000000000..daa5d7450c7d
--- /dev/null
+++ b/arch/s390/pci/pci_bus.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+#include "pci_iov.h"
+
+static LIST_HEAD(zbus_list);
+static DEFINE_MUTEX(zbus_list_lock);
+static int zpci_nb_devices;
+
+/* zpci_bus_prepare_device - Prepare a zPCI function for scanning
+ * @zdev: the zPCI function to be prepared
+ *
+ * The PCI resources for the function are set up and added to its zbus and the
+ * function is enabled. The function must be added to a zbus which must have
+ * a PCI bus created. If an error occurs the zPCI function is not enabled.
+ *
+ * Return: 0 on success, an error code otherwise
+ */
+static int zpci_bus_prepare_device(struct zpci_dev *zdev)
+{
+ int rc, i;
+
+ if (!zdev_enabled(zdev)) {
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+ }
+
+ if (!zdev->has_resources) {
+ zpci_setup_bus_resources(zdev);
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (zdev->bars[i].res)
+ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
+ }
+ }
+
+ return 0;
+}
+
+/* zpci_bus_scan_device - Scan a single device adding it to the PCI core
+ * @zdev: the zdev to be scanned
+ *
+ * Scans the PCI function making it available to the common PCI code.
+ *
+ * Return: 0 on success, an error value otherwise
+ */
+int zpci_bus_scan_device(struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+ int rc;
+
+ rc = zpci_bus_prepare_device(zdev);
+ if (rc)
+ return rc;
+
+ pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
+ if (!pdev)
+ return -ENODEV;
+
+ pci_lock_rescan_remove();
+ pci_bus_add_device(pdev);
+ pci_unlock_rescan_remove();
+
+ return 0;
+}
+
+/* zpci_bus_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error)
+{
+ struct zpci_bus *zbus = zdev->zbus;
+ struct pci_dev *pdev;
+
+ if (!zdev->zbus->bus)
+ return;
+
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (pdev) {
+ if (set_error)
+ pdev->error_state = pci_channel_io_perm_failure;
+ if (pdev->is_virtfn) {
+ zpci_iov_remove_virtfn(pdev, zdev->vfn);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
+ return;
+ }
+ pci_stop_and_remove_bus_device_locked(pdev);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
+ }
+}
+
+/* zpci_bus_scan_bus - Scan all configured zPCI functions on the bus
+ * @zbus: the zbus to be scanned
+ *
+ * Enables and scans all PCI functions on the bus making them available to the
+ * common PCI code. If a PCI function fails to be initialized an error will be
+ * returned but attempts will still be made for all other functions on the bus.
+ *
+ * Return: 0 on success, an error value otherwise
+ */
+int zpci_bus_scan_bus(struct zpci_bus *zbus)
+{
+ struct zpci_dev *zdev;
+ int devfn, rc, ret = 0;
+
+ for (devfn = 0; devfn < ZPCI_FUNCTIONS_PER_BUS; devfn++) {
+ zdev = zbus->function[devfn];
+ if (zdev && zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+ rc = zpci_bus_prepare_device(zdev);
+ if (rc)
+ ret = -EIO;
+ }
+ }
+
+ pci_lock_rescan_remove();
+ pci_scan_child_bus(zbus->bus);
+ pci_bus_add_devices(zbus->bus);
+ pci_unlock_rescan_remove();
+
+ return ret;
+}
+
+/* zpci_bus_scan_busses - Scan all registered busses
+ *
+ * Scan all available zbusses
+ *
+ */
+void zpci_bus_scan_busses(void)
+{
+ struct zpci_bus *zbus = NULL;
+
+ mutex_lock(&zbus_list_lock);
+ list_for_each_entry(zbus, &zbus_list, bus_next) {
+ zpci_bus_scan_bus(zbus);
+ cond_resched();
+ }
+ mutex_unlock(&zbus_list_lock);
+}
+
+/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
+ * @zbus: the zbus holding the zdevices
+ * @fr: PCI root function that will determine the bus's domain, and bus speeed
+ * @ops: the pci operations
+ *
+ * The PCI function @fr determines the domain (its UID), multifunction property
+ * and maximum bus speed of the entire bus.
+ *
+ * Return: 0 on success, an error code otherwise
+ */
+static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, struct pci_ops *ops)
+{
+ struct pci_bus *bus;
+ int domain;
+
+ domain = zpci_alloc_domain((u16)fr->uid);
+ if (domain < 0)
+ return domain;
+
+ zbus->domain_nr = domain;
+ zbus->multifunction = fr->rid_available;
+ zbus->max_bus_speed = fr->max_bus_speed;
+
+ /*
+ * Note that the zbus->resources are taken over and zbus->resources
+ * is empty after a successful call
+ */
+ bus = pci_create_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
+ if (!bus) {
+ zpci_free_domain(zbus->domain_nr);
+ return -EFAULT;
+ }
+
+ zbus->bus = bus;
+
+ return 0;
+}
+
+static void zpci_bus_release(struct kref *kref)
+{
+ struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+
+ if (zbus->bus) {
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(zbus->bus);
+
+ zpci_free_domain(zbus->domain_nr);
+ pci_free_resource_list(&zbus->resources);
+
+ pci_remove_root_bus(zbus->bus);
+ pci_unlock_rescan_remove();
+ }
+
+ mutex_lock(&zbus_list_lock);
+ list_del(&zbus->bus_next);
+ mutex_unlock(&zbus_list_lock);
+ kfree(zbus);
+}
+
+static void zpci_bus_put(struct zpci_bus *zbus)
+{
+ kref_put(&zbus->kref, zpci_bus_release);
+}
+
+static struct zpci_bus *zpci_bus_get(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ mutex_lock(&zbus_list_lock);
+ list_for_each_entry(zbus, &zbus_list, bus_next) {
+ if (pchid == zbus->pchid) {
+ kref_get(&zbus->kref);
+ goto out_unlock;
+ }
+ }
+ zbus = NULL;
+out_unlock:
+ mutex_unlock(&zbus_list_lock);
+ return zbus;
+}
+
+static struct zpci_bus *zpci_bus_alloc(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
+ if (!zbus)
+ return NULL;
+
+ zbus->pchid = pchid;
+ INIT_LIST_HEAD(&zbus->bus_next);
+ mutex_lock(&zbus_list_lock);
+ list_add_tail(&zbus->bus_next, &zbus_list);
+ mutex_unlock(&zbus_list_lock);
+
+ kref_init(&zbus->kref);
+ INIT_LIST_HEAD(&zbus->resources);
+
+ zbus->bus_resource.start = 0;
+ zbus->bus_resource.end = ZPCI_BUS_NR;
+ zbus->bus_resource.flags = IORESOURCE_BUS;
+ pci_add_resource(&zbus->resources, &zbus->bus_resource);
+
+ return zbus;
+}
+
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ /*
+ * With pdev->no_vf_scan the common PCI probing code does not
+ * perform PF/VF linking.
+ */
+ if (zdev->vfn) {
+ zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+ pdev->no_command_memory = 1;
+ }
+}
+
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ int rc = -EINVAL;
+
+ if (zbus->function[zdev->devfn]) {
+ pr_err("devfn %04x is already assigned\n", zdev->devfn);
+ return rc;
+ }
+
+ zdev->zbus = zbus;
+ zbus->function[zdev->devfn] = zdev;
+ zpci_nb_devices++;
+
+ if (zbus->multifunction && !zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ goto error;
+ }
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ goto error;
+ zdev->has_hp_slot = 1;
+
+ return 0;
+
+error:
+ zbus->function[zdev->devfn] = NULL;
+ zdev->zbus = NULL;
+ zpci_nb_devices--;
+ return rc;
+}
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
+{
+ struct zpci_bus *zbus = NULL;
+ int rc = -EBADF;
+
+ if (zpci_nb_devices == ZPCI_NR_DEVICES) {
+ pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+ zdev->fid, ZPCI_NR_DEVICES);
+ return -ENOSPC;
+ }
+
+ if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
+ return -EINVAL;
+
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zbus = zpci_bus_get(zdev->pchid);
+
+ if (!zbus) {
+ zbus = zpci_bus_alloc(zdev->pchid);
+ if (!zbus)
+ return -ENOMEM;
+ }
+
+ if (!zbus->bus) {
+ /* The UID of the first PCI function registered with a zpci_bus
+ * is used as the domain number for that bus. Currently there
+ * is exactly one zpci_bus per domain.
+ */
+ rc = zpci_bus_create_pci_bus(zbus, zdev, ops);
+ if (rc)
+ goto error;
+ }
+
+ rc = zpci_bus_add_device(zbus, zdev);
+ if (rc)
+ goto error;
+
+ return 0;
+
+error:
+ pr_err("Adding PCI function %08x failed\n", zdev->fid);
+ zpci_bus_put(zbus);
+ return rc;
+}
+
+void zpci_bus_device_unregister(struct zpci_dev *zdev)
+{
+ struct zpci_bus *zbus = zdev->zbus;
+
+ zpci_nb_devices--;
+ zbus->function[zdev->devfn] = NULL;
+ zpci_bus_put(zbus);
+}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
new file mode 100644
index 000000000000..af9f0ac79a1b
--- /dev/null
+++ b/arch/s390/pci/pci_bus.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
+void zpci_bus_device_unregister(struct zpci_dev *zdev);
+
+int zpci_bus_scan_bus(struct zpci_bus *zbus);
+void zpci_bus_scan_busses(void);
+
+int zpci_bus_scan_device(struct zpci_dev *zdev);
+void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
+
+void zpci_release_device(struct kref *kref);
+static inline void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (zdev)
+ kref_put(&zdev->kref, zpci_release_device);
+}
+
+static inline void zpci_zdev_get(struct zpci_dev *zdev)
+{
+ kref_get(&zdev->kref);
+}
+
+int zpci_alloc_domain(int domain);
+void zpci_free_domain(int domain);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
+
+static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
+ unsigned int devfn)
+{
+ struct zpci_bus *zbus = bus->sysdata;
+
+ return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
+}
+
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 4c613e569fe0..ee90a91ed888 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -17,17 +17,20 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
#include <asm/clp.h>
#include <uapi/asm/clp.h>
+#include "pci_bus.h"
+
bool zpci_unique_uid;
-static void update_uid_checking(bool new)
+void update_uid_checking(bool new)
{
if (zpci_unique_uid != new)
- zpci_dbg(1, "uid checking:%d\n", new);
+ zpci_dbg(3, "uid checking:%d\n", new);
zpci_unique_uid = new;
}
@@ -102,6 +105,9 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->msi_addr = response->msia;
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
+ zdev->version = response->version;
+ zdev->maxstbl = response->maxstbl;
+ zdev->dtsm = response->dtsm;
switch (response->version) {
case 1:
@@ -155,13 +161,19 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
zdev->pfgid = response->pfgid;
zdev->pft = response->pft;
zdev->vfn = response->vfn;
+ zdev->port = response->port;
zdev->uid = response->uid;
zdev->fmb_length = sizeof(u32) * response->fmb_len;
+ zdev->rid_available = response->rid_avail;
+ zdev->is_physfn = response->is_physfn;
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
if (response->util_str_avail) {
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
+ zdev->util_str_avail = 1;
}
zdev->mio_capable = response->mio_addr_avail;
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -174,7 +186,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
return 0;
}
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int clp_query_pci_fn(struct zpci_dev *zdev)
{
struct clp_req_rsp_query_pci *rrb;
int rc;
@@ -187,7 +199,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
rrb->response.hdr.len = sizeof(rrb->response);
- rrb->request.fh = fh;
+ rrb->request.fh = zdev->fh;
rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
@@ -205,60 +217,39 @@ out:
return rc;
}
-int clp_add_pci_device(u32 fid, u32 fh, int configured)
-{
- struct zpci_dev *zdev;
- int rc = -ENOMEM;
-
- zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
- zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
- if (!zdev)
- goto error;
-
- zdev->fh = fh;
- zdev->fid = fid;
-
- /* Query function properties and update zdev */
- rc = clp_query_pci_fn(zdev, fh);
- if (rc)
- goto error;
-
- if (configured)
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
- else
- zdev->state = ZPCI_FN_STATE_STANDBY;
-
- rc = zpci_create_device(zdev);
- if (rc)
- goto error;
- return 0;
-
-error:
- zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
- kfree(zdev);
- return rc;
-}
-
-/*
- * Enable/Disable a given PCI function defined by its function handle.
+/**
+ * clp_set_pci_fn() - Execute a command on a PCI function
+ * @zdev: Function that will be affected
+ * @fh: Out parameter for updated function handle
+ * @nr_dma_as: DMA address space number
+ * @command: The command code to execute
+ *
+ * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and
+ * > 0 for non-success platform responses
*/
-static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
+ u32 gisa = 0;
+ *fh = 0;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
+ if (command != CLP_SET_DISABLE_PCI_FN)
+ gisa = zdev->gisa;
+
do {
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_SET_PCI_FN;
rrb->response.hdr.len = sizeof(rrb->response);
- rrb->request.fh = *fh;
+ rrb->request.fh = zdev->fh;
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
+ rrb->request.gisa = gisa;
rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
@@ -269,50 +260,107 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
- if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
*fh = rrb->response.fh;
- else {
+ } else {
zpci_err("Set PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
- rc = -EIO;
+ if (!rc)
+ rc = rrb->response.hdr.rsp;
}
clp_free_block(rrb);
return rc;
}
-int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
+int clp_setup_writeback_mio(void)
{
- u32 fh = zdev->fh;
+ struct clp_req_rsp_slpc_pci *rrb;
+ u8 wb_bit_pos;
int rc;
- rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
- if (rc)
- goto out;
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_SLPC;
+ rrb->response.hdr.len = sizeof(rrb->response);
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ if (rrb->response.vwb) {
+ wb_bit_pos = rrb->response.mio_wb;
+ set_bit_inv(wb_bit_pos, &mio_wb_bit_mask);
+ zpci_dbg(3, "wb bit: %d\n", wb_bit_pos);
+ } else {
+ zpci_dbg(3, "wb bit: n.a.\n");
+ }
+
+ } else {
+ zpci_err("SLPC PCI:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as)
+{
+ int rc;
- zdev->fh = fh;
- if (zpci_use_mio(zdev)) {
- rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
- zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+ if (!rc && zpci_use_mio(zdev)) {
+ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_MIO);
+ zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
+ zdev->fid, *fh, rc);
if (rc)
- clp_disable_fh(zdev);
+ clp_disable_fh(zdev, fh);
}
-out:
return rc;
}
-int clp_disable_fh(struct zpci_dev *zdev)
+int clp_disable_fh(struct zpci_dev *zdev, u32 *fh)
{
- u32 fh = zdev->fh;
int rc;
if (!zdev_enabled(zdev))
return 0;
- rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
- if (!rc)
- zdev->fh = fh;
+ rc = clp_set_pci_fn(zdev, fh, 0, CLP_SET_DISABLE_PCI_FN);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc);
+ return rc;
+}
+
+static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb,
+ u64 *resume_token, int *nentries)
+{
+ int rc;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_LIST_PCI;
+ /* store as many entries as possible */
+ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+ rrb->request.resume_token = *resume_token;
+
+ /* Get PCI function handle list */
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+ zpci_err("List PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ return -EIO;
+ }
+
+ update_uid_checking(rrb->response.uid_checking);
+ WARN_ON_ONCE(rrb->response.entry_size !=
+ sizeof(struct clp_fh_list_entry));
+
+ *nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+ rrb->response.entry_size;
+ *resume_token = rrb->response.resume_token;
return rc;
}
@@ -321,53 +369,43 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, void *data,
void (*cb)(struct clp_fh_list_entry *, void *))
{
u64 resume_token = 0;
- int entries, i, rc;
+ int nentries, i, rc;
do {
- memset(rrb, 0, sizeof(*rrb));
- rrb->request.hdr.len = sizeof(rrb->request);
- rrb->request.hdr.cmd = CLP_LIST_PCI;
- /* store as many entries as possible */
- rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
- rrb->request.resume_token = resume_token;
-
- /* Get PCI function handle list */
- rc = clp_req(rrb, CLP_LPS_PCI);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
- zpci_err("List PCI FN:\n");
- zpci_err_clp(rrb->response.hdr.rsp, rc);
- rc = -EIO;
- goto out;
- }
-
- update_uid_checking(rrb->response.uid_checking);
- WARN_ON_ONCE(rrb->response.entry_size !=
- sizeof(struct clp_fh_list_entry));
-
- entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
- rrb->response.entry_size;
-
- resume_token = rrb->response.resume_token;
- for (i = 0; i < entries; i++)
+ rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+ if (rc)
+ return rc;
+ for (i = 0; i < nentries; i++)
cb(&rrb->response.fh_list[i], data);
} while (resume_token);
-out:
+
return rc;
}
-static void __clp_add(struct clp_fh_list_entry *entry, void *data)
+static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
+ struct clp_fh_list_entry *entry)
{
- struct zpci_dev *zdev;
+ struct clp_fh_list_entry *fh_list;
+ u64 resume_token = 0;
+ int nentries, i, rc;
- if (!entry->vendor_id)
- return;
+ do {
+ rc = clp_list_pci_req(rrb, &resume_token, &nentries);
+ if (rc)
+ return rc;
+ fh_list = rrb->response.fh_list;
+ for (i = 0; i < nentries; i++) {
+ if (fh_list[i].fid == fid) {
+ *entry = fh_list[i];
+ return 0;
+ }
+ }
+ } while (resume_token);
- zdev = get_zdev_by_fid(entry->fid);
- if (!zdev)
- clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+ return -ENODEV;
}
-static void __clp_update(struct clp_fh_list_entry *entry, void *data)
+static void __clp_add(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
@@ -375,10 +413,11 @@ static void __clp_update(struct clp_fh_list_entry *entry, void *data)
return;
zdev = get_zdev_by_fid(entry->fid);
- if (!zdev)
+ if (zdev) {
+ zpci_zdev_put(zdev);
return;
-
- zdev->fh = entry->fh;
+ }
+ zpci_create_device(entry->fid, entry->fh, entry->config_state);
}
int clp_scan_pci_devices(void)
@@ -396,66 +435,44 @@ int clp_scan_pci_devices(void)
return rc;
}
-int clp_rescan_pci_devices(void)
-{
- struct clp_req_rsp_list_pci *rrb;
- int rc;
-
- zpci_remove_reserved_devices();
-
- rrb = clp_alloc_block(GFP_KERNEL);
- if (!rrb)
- return -ENOMEM;
-
- rc = clp_list_pci(rrb, NULL, __clp_add);
-
- clp_free_block(rrb);
- return rc;
-}
-
-int clp_rescan_pci_devices_simple(void)
+/*
+ * Get the current function handle of the function matching @fid
+ */
+int clp_refresh_fh(u32 fid, u32 *fh)
{
struct clp_req_rsp_list_pci *rrb;
+ struct clp_fh_list_entry entry;
int rc;
rrb = clp_alloc_block(GFP_NOWAIT);
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, NULL, __clp_update);
+ rc = clp_find_pci(rrb, fid, &entry);
+ if (!rc)
+ *fh = entry.fh;
clp_free_block(rrb);
return rc;
}
-struct clp_state_data {
- u32 fid;
- enum zpci_state state;
-};
-
-static void __clp_get_state(struct clp_fh_list_entry *entry, void *data)
-{
- struct clp_state_data *sd = data;
-
- if (entry->fid != sd->fid)
- return;
-
- sd->state = entry->config_state;
-}
-
int clp_get_state(u32 fid, enum zpci_state *state)
{
struct clp_req_rsp_list_pci *rrb;
- struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED};
+ struct clp_fh_list_entry entry;
int rc;
rrb = clp_alloc_block(GFP_ATOMIC);
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, &sd, __clp_get_state);
- if (!rc)
- *state = sd.state;
+ rc = clp_find_pci(rrb, fid, &entry);
+ if (!rc) {
+ *state = entry.config_state;
+ } else if (rc == -ENODEV) {
+ *state = ZPCI_FN_STATE_RESERVED;
+ rc = 0;
+ }
clp_free_block(rrb);
return rc;
@@ -481,7 +498,7 @@ static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
}
}
-static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb)
{
unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
@@ -649,9 +666,4 @@ static struct miscdevice clp_misc_device = {
.fops = &clp_misc_fops,
};
-static int __init clp_misc_init(void)
-{
- return misc_register(&clp_misc_device);
-}
-
-device_initcall(clp_misc_init);
+builtin_misc_device(clp_misc_device);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 3408c0df3ebf..2cb5043a997d 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
- "Allocated pages",
"Mapped pages",
"Unmapped pages",
+ "Global RPCITs",
+ "Sync Map RPCITs",
+ "Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
- struct zpci_dev *zdev = m->private;
- atomic64_t *counter = &zdev->allocated_pages;
+ struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
+ atomic64_t *counter;
int i;
+ if (!ctrs)
+ return;
+
+ counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
@@ -85,9 +91,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
if (!zdev)
return 0;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
if (!zdev->fmb) {
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
seq_puts(m, "FMB statistics disabled\n");
return 0;
}
@@ -124,7 +130,7 @@ static int pci_perf_show(struct seq_file *m, void *v)
}
pci_sw_counter_show(m);
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return 0;
}
@@ -142,7 +148,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
if (rc)
return rc;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
switch (val) {
case 0:
rc = zpci_fmb_disable_device(zdev);
@@ -151,7 +157,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
rc = zpci_fmb_enable_device(zdev);
break;
}
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return rc ? rc : count;
}
@@ -196,7 +202,7 @@ int __init zpci_debug_init(void)
if (!pci_debug_err_id)
return -EINVAL;
debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
- debug_set_level(pci_debug_err_id, 6);
+ debug_set_level(pci_debug_err_id, 3);
debugfs_root = debugfs_create_dir("pci", NULL);
return 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index 64b1399a73f0..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,684 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-mapping.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
- return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(void)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID;
- return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
- kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(void)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID;
- return table;
-}
-
-static void dma_free_page_table(void *table)
-{
- kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
-{
- unsigned long *sto;
-
- if (reg_entry_isvalid(*entry))
- sto = get_rt_sto(*entry);
- else {
- sto = dma_alloc_cpu_table();
- if (!sto)
- return NULL;
-
- set_rt_sto(entry, sto);
- validate_rt_entry(entry);
- entry_clr_protected(entry);
- }
- return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *entry)
-{
- unsigned long *pto;
-
- if (reg_entry_isvalid(*entry))
- pto = get_st_pto(*entry);
- else {
- pto = dma_alloc_page_table();
- if (!pto)
- return NULL;
- set_st_pto(entry, pto);
- validate_st_entry(entry);
- entry_clr_protected(entry);
- }
- return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
-{
- unsigned long *sto, *pto;
- unsigned int rtx, sx, px;
-
- rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx]);
- if (!sto)
- return NULL;
-
- sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx]);
- if (!pto)
- return NULL;
-
- px = calc_px(dma_addr);
- return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
-{
- if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(entry);
- } else {
- set_pt_pfaa(entry, page_addr);
- validate_pt_entry(entry);
- }
-
- if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(entry);
- else
- entry_clr_protected(entry);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- u8 *page_addr = (u8 *) (pa & PAGE_MASK);
- unsigned long irq_flags;
- unsigned long *entry;
- int i, rc = 0;
-
- if (!nr_pages)
- return -EINVAL;
-
- spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
- if (!zdev->dma_table) {
- rc = -EINVAL;
- goto out_unlock;
- }
-
- for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
- if (!entry) {
- rc = -ENOMEM;
- goto undo_cpu_trans;
- }
- dma_update_cpu_trans(entry, page_addr, flags);
- page_addr += PAGE_SIZE;
- dma_addr += PAGE_SIZE;
- }
-
-undo_cpu_trans:
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
- flags = ZPCI_PTE_INVALID;
- while (i-- > 0) {
- page_addr -= PAGE_SIZE;
- dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
- if (!entry)
- break;
- dma_update_cpu_trans(entry, page_addr, flags);
- }
- }
-out_unlock:
- spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
- return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
- size_t size, int flags)
-{
- unsigned long irqflags;
- int ret;
-
- /*
- * With zdev->tlb_refresh == 0, rpcit is not required to establish new
- * translations when previously invalid translation-table entries are
- * validated. With lazy unmap, rpcit is skipped for previously valid
- * entries, but a global rpcit is then required before any address can
- * be re-used, i.e. after each iommu bitmap wrap-around.
- */
- if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
- if (!zdev->tlb_refresh)
- return 0;
- } else {
- if (!s390_iommu_strict)
- return 0;
- }
-
- ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
- if (ret == -ENOMEM && !s390_iommu_strict) {
- /* enable the hypervisor to free some resources */
- if (zpci_refresh_global(zdev))
- goto out;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
- ret = 0;
- }
-out:
- return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- int rc;
-
- rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (rc)
- return rc;
-
- rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
- return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
- unsigned long *sto = get_rt_sto(entry);
- int sx;
-
- for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
- if (reg_entry_isvalid(sto[sx]))
- dma_free_page_table(get_st_pto(sto[sx]));
-
- dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
- int rtx;
-
- if (!table)
- return;
-
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
-
- dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
- unsigned long start, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long boundary_size;
-
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
- return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
- start, size, zdev->start_dma >> PAGE_SHIFT,
- boundary_size, 0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long offset, flags;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
- if (offset == -1) {
- if (!s390_iommu_strict) {
- /* global flush before DMA addresses are reused */
- if (zpci_refresh_global(zdev))
- goto out_error;
-
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- }
- /* wrap-around */
- offset = __dma_alloc_iommu(dev, 0, size);
- if (offset == -1)
- goto out_error;
- }
- zdev->next_bit = offset + size;
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
- return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long flags, offset;
-
- offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- if (!zdev->iommu_bitmap)
- goto out;
-
- if (s390_iommu_strict)
- bitmap_clear(zdev->iommu_bitmap, offset, size);
- else
- bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
- struct {
- unsigned long rc;
- unsigned long addr;
- } __packed data = {rc, addr};
-
- zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long pa = page_to_phys(page) + offset;
- int flags = ZPCI_PTE_VALID;
- unsigned long nr_pages;
- dma_addr_t dma_addr;
- int ret;
-
- /* This rounds up number of pages based on size and offset */
- nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == DMA_MAPPING_ERROR) {
- ret = -ENOSPC;
- goto out_err;
- }
-
- /* Use rounded up size */
- size = nr_pages * PAGE_SIZE;
-
- if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (ret)
- goto out_free;
-
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
- dma_free_address(dev, dma_addr, nr_pages);
-out_err:
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- int npages, ret;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr = dma_addr & PAGE_MASK;
- ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_PTE_INVALID);
- if (ret) {
- zpci_err("unmap error:\n");
- zpci_err_dma(ret, dma_addr);
- return;
- }
-
- atomic64_add(npages, &zdev->unmapped_pages);
- dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- struct page *page;
- unsigned long pa;
- dma_addr_t map;
-
- size = PAGE_ALIGN(size);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- pa = page_to_phys(page);
- map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
- if (dma_mapping_error(dev, map)) {
- free_pages(pa, get_order(size));
- return NULL;
- }
-
- atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
- if (dma_handle)
- *dma_handle = map;
- return (void *) pa;
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
- void *pa, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
- s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long) pa, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
-{
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- dma_addr_t dma_addr_base, dma_addr;
- int flags = ZPCI_PTE_VALID;
- struct scatterlist *s;
- unsigned long pa = 0;
- int ret;
-
- dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == DMA_MAPPING_ERROR)
- return -ENOMEM;
-
- dma_addr = dma_addr_base;
- if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s));
- ret = __dma_update_trans(zdev, pa, dma_addr,
- s->offset + s->length, flags);
- if (ret)
- goto unmap;
-
- dma_addr += s->offset + s->length;
- }
- ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
- if (ret)
- goto unmap;
-
- *handle = dma_addr_base;
- atomic64_add(nr_pages, &zdev->mapped_pages);
-
- return ret;
-
-unmap:
- dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
- ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, nr_pages);
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s = sg, *start = sg, *dma = sg;
- unsigned int max = dma_get_max_seg_size(dev);
- unsigned int size = s->offset + s->length;
- unsigned int offset = s->offset;
- int count = 0, i;
-
- for (i = 1; i < nr_elements; i++) {
- s = sg_next(s);
-
- s->dma_address = DMA_MAPPING_ERROR;
- s->dma_length = 0;
-
- if (s->offset || (size & ~PAGE_MASK) ||
- size + s->length > max) {
- if (__s390_dma_map_sg(dev, start, size,
- &dma->dma_address, dir))
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- size = offset = s->offset;
- start = s;
- dma = sg_next(dma);
- count++;
- }
- size += s->length;
- }
- if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- return count + 1;
-unmap:
- for_each_sg(sg, s, count, i)
- s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
- dir, attrs);
-
- return 0;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nr_elements, i) {
- if (s->dma_length)
- s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
- dir, attrs);
- s->dma_address = 0;
- s->dma_length = 0;
- }
-}
-
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
- int rc;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- spin_lock_init(&zdev->iommu_bitmap_lock);
- spin_lock_init(&zdev->dma_table_lock);
-
- zdev->dma_table = dma_alloc_cpu_table();
- if (!zdev->dma_table) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * Restrict the iommu bitmap size to the minimum of the following:
- * - main memory size
- * - 3-level pagetable address limit minus start_dma offset
- * - DMA address range allowed by the hardware (clp query pci fn)
- *
- * Also set zdev->end_dma to the actual end address of the usable
- * range, instead of the theoretical maximum as reported by hardware.
- */
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- zdev->iommu_size = min3((u64) high_memory,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
- zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
- if (!zdev->iommu_bitmap) {
- rc = -ENOMEM;
- goto free_dma_table;
- }
- if (!s390_iommu_strict) {
- zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
- if (!zdev->lazy_bitmap) {
- rc = -ENOMEM;
- goto free_bitmap;
- }
-
- }
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64) zdev->dma_table);
- if (rc)
- goto free_bitmap;
-
- return 0;
-free_bitmap:
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-free_dma_table:
- dma_free_cpu_table(zdev->dma_table);
- zdev->dma_table = NULL;
-out:
- return rc;
-}
-
-void zpci_dma_exit_device(struct zpci_dev *zdev)
-{
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- if (zpci_unregister_ioat(zdev, 0))
- return;
-
- dma_cleanup_tables(zdev->dma_table);
- zdev->dma_table = NULL;
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-
- zdev->next_bit = 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
- dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
- ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
- 0, NULL);
- if (!dma_region_table_cache)
- return -ENOMEM;
-
- dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
- ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
- 0, NULL);
- if (!dma_page_table_cache) {
- kmem_cache_destroy(dma_region_table_cache);
- return -ENOMEM;
- }
- return 0;
-}
-
-int __init zpci_dma_init(void)
-{
- return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
- kmem_cache_destroy(dma_page_table_cache);
- kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
- .alloc = s390_dma_alloc,
- .free = s390_dma_free,
- .map_sg = s390_dma_map_sg,
- .unmap_sg = s390_dma_unmap_sg,
- .map_page = s390_dma_map_pages,
- .unmap_page = s390_dma_unmap_pages,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
- /* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
- if (!strcmp(str, "strict"))
- s390_iommu_strict = 1;
- return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 8d6ee4af4230..dbe95ec5917e 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -12,8 +12,11 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <asm/pci_debug.h>
+#include <asm/pci_dma.h>
#include <asm/sclp.h>
+#include "pci_bus.h"
+
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
u32 reserved1;
@@ -44,25 +47,257 @@ struct zpci_ccdf_avail {
u16 pec; /* PCI event code */
} __packed;
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+ switch (ers_res) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ case PCI_ERS_RESULT_NEED_RESET:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool is_passed_through(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ bool ret;
+
+ mutex_lock(&zdev->kzdev_lock);
+ ret = !!zdev->kzdev;
+ mutex_unlock(&zdev->kzdev_lock);
+
+ return ret;
+}
+
+static bool is_driver_supported(struct pci_driver *driver)
+{
+ if (!driver || !driver->err_handler)
+ return false;
+ if (!driver->err_handler->error_detected)
+ return false;
+ if (!driver->err_handler->slot_reset)
+ return false;
+ if (!driver->err_handler->resume)
+ return false;
+ return true;
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+ ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ if (ers_result_indicates_abort(ers_res))
+ pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
+ else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int rc;
+
+ pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
+ rc = zpci_reset_load_store_blocked(zdev);
+ if (rc) {
+ pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ if (driver->err_handler->mmio_enabled) {
+ ers_res = driver->err_handler->mmio_enabled(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
+ }
+ }
+
+ pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
+ rc = zpci_clear_error_state(zdev);
+ if (!rc) {
+ pdev->error_state = pci_channel_io_normal;
+ } else {
+ pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
+ /* Let's try a full reset instead */
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+
+ return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+ pr_info("%s: Initiating reset\n", pci_name(pdev));
+ if (zpci_hot_reset_device(to_zpci(pdev))) {
+ pr_err("%s: The reset request failed\n", pci_name(pdev));
+ return ers_res;
+ }
+ pdev->error_state = pci_channel_io_normal;
+ ers_res = driver->err_handler->slot_reset(pdev);
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
+ return ers_res;
+ }
+
+ return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct pci_driver *driver;
+
+ /*
+ * Ensure that the PCI function is not removed concurrently, no driver
+ * is unbound or probed and that userspace can't access its
+ * configuration space while we perform recovery.
+ */
+ pci_dev_lock(pdev);
+ if (pdev->error_state == pci_channel_io_perm_failure) {
+ ers_res = PCI_ERS_RESULT_DISCONNECT;
+ goto out_unlock;
+ }
+ pdev->error_state = pci_channel_io_frozen;
+
+ if (is_passed_through(pdev)) {
+ pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
+ pci_name(pdev));
+ goto out_unlock;
+ }
+
+ driver = to_pci_driver(pdev->dev.driver);
+ if (!is_driver_supported(driver)) {
+ if (!driver)
+ pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
+ pci_name(pdev));
+ else
+ pr_info("%s: The %s driver bound to the device does not support error recovery\n",
+ pci_name(pdev),
+ driver->name);
+ goto out_unlock;
+ }
+
+ ers_res = zpci_event_notify_error_detected(pdev, driver);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+
+ if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ ers_res = zpci_event_do_error_state_clear(pdev, driver);
+ if (ers_result_indicates_abort(ers_res))
+ goto out_unlock;
+ }
+
+ if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+ ers_res = zpci_event_do_reset(pdev, driver);
+
+ if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pr_err("%s: Automatic recovery failed; operator intervention is required\n",
+ pci_name(pdev));
+ goto out_unlock;
+ }
+
+ pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
+ if (driver->err_handler->resume)
+ driver->err_handler->resume(pdev);
+out_unlock:
+ pci_dev_unlock(pdev);
+
+ return ers_res;
+}
+
+/* zpci_event_io_failure - Report PCI channel failure state to driver
+ * @pdev: PCI function for which to report
+ * @es: PCI channel failure state to report
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+ struct pci_driver *driver;
+
+ pci_dev_lock(pdev);
+ pdev->error_state = es;
+ /**
+ * While vfio-pci's error_detected callback notifies user-space QEMU
+ * reacts to this by freezing the guest. In an s390 environment PCI
+ * errors are rarely fatal so this is overkill. Instead in the future
+ * we will inject the error event and let the guest recover the device
+ * itself.
+ */
+ if (is_passed_through(pdev))
+ goto out;
+ driver = to_pci_driver(pdev->dev.driver);
+ if (driver && driver->err_handler && driver->err_handler->error_detected)
+ driver->err_handler->error_detected(pdev, pdev->error_state);
+out:
+ pci_dev_unlock(pdev);
+}
+
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
+ pci_ers_result_t ers_res;
+ zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
+ ccdf->fid, ccdf->fh, ccdf->pec);
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
- if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+ if (zdev) {
+ mutex_lock(&zdev->state_lock);
+ zpci_update_fh(zdev, ccdf->fh);
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ }
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
if (!pdev)
- return;
+ goto no_pdev;
- pdev->error_state = pci_channel_io_perm_failure;
+ switch (ccdf->pec) {
+ case 0x003a: /* Service Action or Error Recovery Successful */
+ ers_res = zpci_event_attempt_error_recovery(pdev);
+ if (ers_res != PCI_ERS_RESULT_RECOVERED)
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default:
+ /*
+ * Mark as frozen not permanently failed because the device
+ * could be subsequently recovered by the platform.
+ */
+ zpci_event_io_failure(pdev, pci_channel_io_frozen);
+ break;
+ }
pci_dev_put(pdev);
+no_pdev:
+ if (zdev)
+ mutex_unlock(&zdev->state_lock);
+ zpci_zdev_put(zdev);
}
void zpci_event_error(void *data)
@@ -71,90 +306,94 @@ void zpci_event_error(void *data)
__zpci_event_error(data);
}
+static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
+{
+ zpci_update_fh(zdev, fh);
+ /* Give the driver a hint that the function is
+ * already unusable.
+ */
+ zpci_bus_remove_device(zdev, true);
+ /* Even though the device is already gone we still
+ * need to free zPCI resources as part of the disable.
+ */
+ if (zdev_enabled(zdev))
+ zpci_disable_device(zdev);
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = NULL;
+ bool existing_zdev = !!zdev;
enum zpci_state state;
- int ret;
- if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+ zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
+ ccdf->fid, ccdf->fh, ccdf->pec);
- pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
- pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
- zpci_err("avail CCDF:\n");
- zpci_err_hex(ccdf, sizeof(*ccdf));
+ if (existing_zdev)
+ mutex_lock(&zdev->state_lock);
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
- ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
- if (ret)
+ zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
+ if (IS_ERR(zdev))
+ break;
+ } else {
+ /* the configuration request may be stale */
+ if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
- zdev = get_zdev_by_fid(ccdf->fid);
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
- if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
- break;
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
- zdev->fh = ccdf->fh;
- ret = zpci_enable_device(zdev);
- if (ret)
- break;
- pci_lock_rescan_remove();
- pci_rescan_bus(zdev->bus);
- pci_unlock_rescan_remove();
+ zpci_scan_configured_device(zdev, ccdf->fh);
break;
case 0x0302: /* Reserved -> Standby */
if (!zdev)
- clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+ else
+ zpci_update_fh(zdev, ccdf->fh);
break;
case 0x0303: /* Deconfiguration requested */
- if (!zdev)
- break;
- if (pdev)
- pci_stop_and_remove_bus_device_locked(pdev);
-
- ret = zpci_disable_device(zdev);
- if (ret)
- break;
-
- ret = sclp_pci_deconfigure(zdev->fid);
- zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
- if (!ret)
- zdev->state = ZPCI_FN_STATE_STANDBY;
-
+ if (zdev) {
+ /* The event may have been queued before we configured
+ * the device.
+ */
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ break;
+ zpci_update_fh(zdev, ccdf->fh);
+ zpci_deconfigure_device(zdev);
+ }
break;
case 0x0304: /* Configured -> Standby|Reserved */
- if (!zdev)
- break;
- if (pdev) {
- /* Give the driver a hint that the function is
- * already unusable. */
- pdev->error_state = pci_channel_io_perm_failure;
- pci_stop_and_remove_bus_device_locked(pdev);
- }
-
- zdev->fh = ccdf->fh;
- zpci_disable_device(zdev);
- zdev->state = ZPCI_FN_STATE_STANDBY;
- if (!clp_get_state(ccdf->fid, &state) &&
- state == ZPCI_FN_STATE_RESERVED) {
- zpci_remove_device(zdev);
+ if (zdev) {
+ /* The event may have been queued before we configured
+ * the device.:
+ */
+ if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
+ zpci_event_hard_deconfigured(zdev, ccdf->fh);
+ /* The 0x0304 event may immediately reserve the device */
+ if (!clp_get_state(zdev->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED) {
+ zpci_device_reserved(zdev);
+ }
}
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
- clp_rescan_pci_devices();
+ zpci_remove_reserved_devices();
+ clp_scan_pci_devices();
break;
case 0x0308: /* Standby -> Reserved */
if (!zdev)
break;
- zpci_remove_device(zdev);
+ zpci_device_reserved(zdev);
break;
default:
break;
}
- pci_dev_put(pdev);
+ if (existing_zdev) {
+ mutex_unlock(&zdev->state_lock);
+ zpci_zdev_put(zdev);
+ }
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 02f9505c99a8..56480be48244 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/delay.h>
#include <linux/jump_label.h>
+#include <asm/asm-extable.h>
#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
@@ -17,16 +18,40 @@
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
-static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+struct zpci_err_insn_data {
+ u8 insn;
+ u8 cc;
+ u8 status;
+ union {
+ struct {
+ u64 req;
+ u64 offset;
+ };
+ struct {
+ u64 addr;
+ u64 len;
+ };
+ };
+} __packed;
+
+static inline void zpci_err_insn_req(int lvl, u8 insn, u8 cc, u8 status,
+ u64 req, u64 offset)
{
- struct {
- u64 req;
- u64 offset;
- u8 cc;
- u8 status;
- } __packed data = {req, offset, cc, status};
-
- zpci_err_hex(&data, sizeof(data));
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .req = req, .offset = offset};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
+}
+
+static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
+ u64 addr, u64 len)
+{
+ struct zpci_err_insn_data data = {
+ .insn = insn, .cc = cc, .status = status,
+ .addr = addr, .len = len};
+
+ zpci_err_hex_level(lvl, &data, sizeof(data));
}
/* Modify PCI Function Controls */
@@ -46,33 +71,41 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
{
+ bool retried = false;
u8 cc;
do {
cc = __mpcifc(req, fib, status);
- if (cc == 2)
+ if (cc == 2) {
msleep(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, *status, req, 0);
+ zpci_err_insn_req(0, 'M', cc, *status, req, 0);
+ else if (retried)
+ zpci_err_insn_req(1, 'M', cc, *status, req, 0);
return cc;
}
+EXPORT_SYMBOL_GPL(zpci_mod_fc);
/* Refresh PCI Translations */
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
{
- register u64 __addr asm("2") = addr;
- register u64 __range asm("3") = range;
+ union register_pair addr_range = {.even = addr, .odd = range};
u8 cc;
asm volatile (
- " .insn rre,0xb9d30000,%[fn],%[addr]\n"
+ " .insn rre,0xb9d30000,%[fn],%[addr_range]\n"
" ipm %[cc]\n"
" srl %[cc],28\n"
: [cc] "=d" (cc), [fn] "+d" (fn)
- : [addr] "d" (__addr), "d" (__range)
+ : [addr_range] "d" (addr_range.pair)
: "cc");
*status = fn >> 24 & 0xff;
return cc;
@@ -80,16 +113,24 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
{
+ bool retried = false;
u8 cc, status;
do {
cc = __rpcit(fn, addr, range, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, addr, range);
+ zpci_err_insn_addr(0, 'R', cc, status, addr, range);
+ else if (retried)
+ zpci_err_insn_addr(1, 'R', cc, status, addr, range);
if (cc == 1 && (status == 4 || status == 16))
return -ENOMEM;
@@ -98,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
+int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
{
if (!test_facility(72))
return -EIO;
@@ -109,25 +150,24 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return 0;
}
+EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
/* PCI Load */
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
{
- register u64 __req asm("2") = req;
- register u64 __offset asm("3") = offset;
+ union register_pair req_off = {.even = req, .odd = offset};
int cc = -ENXIO;
u64 __data;
asm volatile (
- " .insn rre,0xb9d20000,%[data],%[req]\n"
+ " .insn rre,0xb9d20000,%[data],%[req_off]\n"
"0: ipm %[cc]\n"
" srl %[cc],28\n"
"1:\n"
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req)
- : "d" (__offset)
- : "cc");
- *status = __req >> 24 & 0xff;
+ : [cc] "+d" (cc), [data] "=d" (__data),
+ [req_off] "+&d" (req_off.pair) :: "cc");
+ *status = req_off.even >> 24 & 0xff;
*data = __data;
return cc;
}
@@ -146,17 +186,25 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_load(u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcilg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'l', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'l', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -166,28 +214,26 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_load(data, req, ZPCI_OFFSET(addr));
}
static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
{
- register u64 addr asm("2") = ioaddr;
- register u64 r3 asm("3") = len;
+ union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
int cc = -ENXIO;
u64 __data;
asm volatile (
- " .insn rre,0xb9d60000,%[data],%[ioaddr]\n"
+ " .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n"
"0: ipm %[cc]\n"
" srl %[cc],28\n"
"1:\n"
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data), "+d" (r3)
- : [ioaddr] "d" (addr)
- : "cc");
- *status = r3 >> 24 & 0xff;
+ : [cc] "+d" (cc), [data] "=d" (__data),
+ [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+ *status = ioaddr_len.odd >> 24 & 0xff;
*data = __data;
return cc;
}
@@ -202,7 +248,7 @@ int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
cc = __pcilg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'L', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -211,36 +257,43 @@ EXPORT_SYMBOL_GPL(zpci_load);
/* PCI Store */
static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
{
- register u64 __req asm("2") = req;
- register u64 __offset asm("3") = offset;
+ union register_pair req_off = {.even = req, .odd = offset};
int cc = -ENXIO;
asm volatile (
- " .insn rre,0xb9d00000,%[data],%[req]\n"
+ " .insn rre,0xb9d00000,%[data],%[req_off]\n"
"0: ipm %[cc]\n"
" srl %[cc],28\n"
"1:\n"
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [req] "+d" (__req)
- : "d" (__offset), [data] "d" (data)
+ : [cc] "+d" (cc), [req_off] "+&d" (req_off.pair)
+ : [data] "d" (data)
: "cc");
- *status = __req >> 24 & 0xff;
+ *status = req_off.even >> 24 & 0xff;
return cc;
}
int __zpci_store(u64 data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistg(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 's', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 's', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -250,27 +303,26 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
unsigned long len)
{
struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
return __zpci_store(data, req, ZPCI_OFFSET(addr));
}
static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
{
- register u64 addr asm("2") = ioaddr;
- register u64 r3 asm("3") = len;
+ union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
int cc = -ENXIO;
asm volatile (
- " .insn rre,0xb9d40000,%[data],%[ioaddr]\n"
+ " .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n"
"0: ipm %[cc]\n"
" srl %[cc],28\n"
"1:\n"
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), "+d" (r3)
- : [data] "d" (data), [ioaddr] "d" (addr)
- : "cc");
- *status = r3 >> 24 & 0xff;
+ : [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+ : [data] "d" (data)
+ : "cc", "memory");
+ *status = ioaddr_len.odd >> 24 & 0xff;
return cc;
}
@@ -284,7 +336,7 @@ int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
cc = __pcistg_mio(data, (__force u64) addr, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) addr);
+ zpci_err_insn_addr(0, 'S', cc, status, (__force u64) addr, len);
return (cc > 0) ? -EIO : cc;
}
@@ -310,17 +362,25 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
int __zpci_store_block(const u64 *data, u64 req, u64 offset)
{
+ bool retried = false;
u8 status;
int cc;
do {
cc = __pcistb(data, req, offset, &status);
- if (cc == 2)
+ if (cc == 2) {
udelay(ZPCI_INSN_BUSY_DELAY);
+ if (!retried) {
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ retried = true;
+ }
+ }
} while (cc == 2);
if (cc)
- zpci_err_insn(cc, status, req, offset);
+ zpci_err_insn_req(0, 'b', cc, status, req, offset);
+ else if (retried)
+ zpci_err_insn_req(1, 'b', cc, status, req, offset);
return (cc > 0) ? -EIO : cc;
}
@@ -364,7 +424,7 @@ int zpci_write_block(volatile void __iomem *dst,
cc = __pcistb_mio(src, (__force u64) dst, len, &status);
if (cc)
- zpci_err_insn(cc, status, 0, (__force u64) dst);
+ zpci_err_insn_addr(0, 'B', cc, status, (__force u64) dst, len);
return (cc > 0) ? -EIO : cc;
}
@@ -372,10 +432,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
static inline void __pciwb_mio(void)
{
- unsigned long unused = 0;
-
- asm volatile (".insn rre,0xb9d50000,%[op],%[op]\n"
- : [op] "+d" (unused));
+ asm volatile (".insn rre,0xb9d50000,0,0\n");
}
void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
new file mode 100644
index 000000000000..ead062bf2b41
--- /dev/null
+++ b/arch/s390/pci/pci_iov.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include "pci_iov.h"
+
+static struct resource iov_res = {
+ .name = "PCI IOV res",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+};
+
+void zpci_iov_map_resources(struct pci_dev *pdev)
+{
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ int bar = i + PCI_IOV_RESOURCES;
+
+ len = pci_resource_len(pdev, bar);
+ if (!len)
+ continue;
+ pdev->resource[bar].parent = &iov_res;
+ }
+}
+
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+ pci_lock_rescan_remove();
+ /* Linux' vfid's start at 0 vfn at 1 */
+ pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+ pci_unlock_rescan_remove();
+}
+
+static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid)
+{
+ int rc;
+
+ rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+ if (rc)
+ return rc;
+
+ virtfn->is_virtfn = 1;
+ virtfn->multifunction = 0;
+ virtfn->physfn = pci_dev_get(pdev);
+
+ return 0;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ int i, cand_devfn;
+ struct zpci_dev *zdev;
+ struct pci_dev *pdev;
+ int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+ int rc = 0;
+
+ if (!zbus->multifunction)
+ return 0;
+
+ /* If the parent PF for the given VF is also configured in the
+ * instance, it must be on the same zbus.
+ * We can then identify the parent PF by checking what
+ * devfn the VF would have if it belonged to that PF using the PF's
+ * stride and offset. Only if this candidate devfn matches the
+ * actual devfn will we link both functions.
+ */
+ for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+ zdev = zbus->function[i];
+ if (zdev && zdev->is_physfn) {
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (!pdev)
+ continue;
+ cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+ if (cand_devfn == virtfn->devfn) {
+ rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ break;
+ }
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ }
+ }
+ return rc;
+}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
new file mode 100644
index 000000000000..b2c828003bad
--- /dev/null
+++ b/arch/s390/pci/pci_iov.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#ifndef __S390_PCI_IOV_H
+#define __S390_PCI_IOV_H
+
+#ifdef CONFIG_PCI_IOV
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
+
+void zpci_iov_map_resources(struct pci_dev *pdev);
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+
+static inline void zpci_iov_map_resources(struct pci_dev *pdev) {}
+
+static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index fbe97ab2e228..ff8f24854c64 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -11,16 +11,10 @@
#include <asm/isc.h>
#include <asm/airq.h>
+#include <asm/tpi.h>
static enum {FLOATING, DIRECTED} irq_delivery;
-#define SIC_IRQ_MODE_ALL 0
-#define SIC_IRQ_MODE_SINGLE 1
-#define SIC_IRQ_MODE_DIRECT 4
-#define SIC_IRQ_MODE_D_ALL 16
-#define SIC_IRQ_MODE_D_SINGLE 17
-#define SIC_IRQ_MODE_SET_CPU 18
-
/*
* summary bit vector
* FLOATING - summary bit per function
@@ -35,7 +29,7 @@ static struct airq_iv *zpci_sbv;
*/
static struct airq_iv **zpci_ibv;
-/* Modify PCI: Register adapter interruptions */
+/* Modify PCI: Register floating adapter interruptions */
static int zpci_set_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
@@ -45,21 +39,24 @@ static int zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.isc = PCI_ISC;
fib.fmt0.sum = 1; /* enable summary notifications */
fib.fmt0.noi = airq_iv_end(zdev->aibv);
- fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+ fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
- fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+ fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
-/* Modify PCI: Unregister adapter interruptions */
+/* Modify PCI: Unregister floating adapter interruptions */
static int zpci_clear_airq(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
struct zpci_fib fib = {0};
u8 cc, status;
+ fib.gd = zdev->gisa;
+
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
fib.fmt = 1;
fib.fmt1.noi = zdev->msi_nr_irqs;
fib.fmt1.dibvo = zdev->msi_first_bit;
+ fib.gd = zdev->gisa;
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
}
@@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
u8 cc, status;
fib.fmt = 1;
+ fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, &status);
if (cc == 3 || (cc == 1 && status == 24))
/* Function already gone or IRQs already deregistered. */
@@ -98,14 +97,47 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
return cc ? -EIO : 0;
}
+/* Register adapter interruptions */
+static int zpci_set_irq(struct zpci_dev *zdev)
+{
+ int rc;
+
+ if (irq_delivery == DIRECTED)
+ rc = zpci_set_directed_irq(zdev);
+ else
+ rc = zpci_set_airq(zdev);
+
+ if (!rc)
+ zdev->irqs_registered = 1;
+
+ return rc;
+}
+
+/* Clear adapter interruptions */
+static int zpci_clear_irq(struct zpci_dev *zdev)
+{
+ int rc;
+
+ if (irq_delivery == DIRECTED)
+ rc = zpci_clear_directed_irq(zdev);
+ else
+ rc = zpci_clear_airq(zdev);
+
+ if (!rc)
+ zdev->irqs_registered = 0;
+
+ return rc;
+}
+
static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- struct msi_desc *entry = irq_get_msi_desc(data->irq);
+ struct msi_desc *entry = irq_data_get_msi_desc(data);
struct msi_msg msg = entry->msg;
+ int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
msg.address_lo &= 0xff0000ff;
- msg.address_lo |= (cpumask_first(dest) << 8);
+ msg.address_lo |= (cpu_addr << 8);
pci_write_msi_msg(data->irq, &msg);
return IRQ_SET_MASK_OK;
@@ -115,12 +147,12 @@ static struct irq_chip zpci_irq_chip = {
.name = "PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
- .irq_set_affinity = zpci_set_irq_affinity,
};
static void zpci_handle_cpu_local_irq(bool rescan)
{
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+ union zpci_sic_iib iib = {{0}};
unsigned long bit;
int irqs_on = 0;
@@ -131,8 +163,8 @@ static void zpci_handle_cpu_local_irq(bool rescan)
if (!rescan || irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+ /* First scan complete, re-enable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
break;
bit = 0;
continue;
@@ -160,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
static void zpci_handle_fallback_irq(void)
{
struct cpu_irq_data *cpu_data;
+ union zpci_sic_iib iib = {{0}};
unsigned long cpu;
int irqs_on = 0;
@@ -169,8 +202,8 @@ static void zpci_handle_fallback_irq(void)
if (irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ /* First scan complete, re-enable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
cpu = 0;
continue;
@@ -179,15 +212,16 @@ static void zpci_handle_fallback_irq(void)
if (atomic_inc_return(&cpu_data->scheduled) > 1)
continue;
- cpu_data->csd.func = zpci_handle_remote_irq;
- cpu_data->csd.info = &cpu_data->scheduled;
- cpu_data->csd.flags = 0;
+ INIT_CSD(&cpu_data->csd, zpci_handle_remote_irq, &cpu_data->scheduled);
smp_call_function_single_async(cpu, &cpu_data->csd);
}
}
-static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_directed_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ bool floating = !tpi_info->directed_irq;
+
if (floating) {
inc_irq_stat(IRQIO_PCF);
zpci_handle_fallback_irq();
@@ -197,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
}
}
-static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+static void zpci_floating_irq_handler(struct airq_struct *airq,
+ struct tpi_info *tpi_info)
{
+ union zpci_sic_iib iib = {{0}};
unsigned long si, ai;
struct airq_iv *aibv;
int irqs_on = 0;
@@ -211,8 +247,8 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
if (irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ /* First scan complete, re-enable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
si = 0;
continue;
@@ -239,6 +275,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
unsigned long bit;
struct msi_desc *msi;
struct msi_msg msg;
+ int cpu_addr;
int rc, irq;
zdev->aisb = -1UL;
@@ -260,7 +297,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
zdev->aisb = bit;
/* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
if (!zdev->aibv)
return -ENOMEM;
@@ -272,11 +309,13 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
/* Request MSI interrupts */
hwirq = bit;
- for_each_pci_msi_entry(msi, pdev) {
+ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
+ irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
@@ -286,9 +325,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
handle_percpu_irq);
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
+ if (msi->affinity)
+ cpu = cpumask_first(&msi->affinity->mask);
+ else
+ cpu = 0;
+ cpu_addr = smp_cpu_get_cpu_address(cpu);
+
msg.address_lo = zdev->msi_addr & 0xff0000ff;
- msg.address_lo |= msi->affinity ?
- (cpumask_first(&msi->affinity->mask) << 8) : 0;
+ msg.address_lo |= (cpu_addr << 8);
+
for_each_possible_cpu(cpu) {
airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
}
@@ -304,10 +349,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
zdev->msi_first_bit = bit;
zdev->msi_nr_irqs = msi_vecs;
- if (irq_delivery == DIRECTED)
- rc = zpci_set_directed_irq(zdev);
- else
- rc = zpci_set_airq(zdev);
+ rc = zpci_set_irq(zdev);
if (rc)
return rc;
@@ -321,21 +363,12 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
int rc;
/* Disable interrupts */
- if (irq_delivery == DIRECTED)
- rc = zpci_clear_directed_irq(zdev);
- else
- rc = zpci_clear_airq(zdev);
+ rc = zpci_clear_irq(zdev);
if (rc)
return;
/* Release MSI interrupts */
- for_each_pci_msi_entry(msi, pdev) {
- if (!msi->irq)
- continue;
- if (msi->msi_attrib.is_msix)
- __pci_msix_desc_mask_irq(msi, 1);
- else
- __pci_msi_desc_mask_irq(msi, 1, 1);
+ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
msi->msg.address_lo = 0;
@@ -358,6 +391,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
}
+bool arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ if (!zdev->irqs_registered)
+ zpci_set_irq(zdev);
+ return true;
+}
+
static struct airq_struct zpci_airq = {
.handler = zpci_floating_irq_handler,
.isc = PCI_ISC,
@@ -366,11 +408,12 @@ static struct airq_struct zpci_airq = {
static void __init cpu_enable_directed_irq(void *unused)
{
union zpci_sic_iib iib = {{0}};
+ union zpci_sic_iib ziib = {{0}};
iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
- zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
}
static int __init zpci_directed_irq_init(void)
@@ -378,14 +421,14 @@ static int __init zpci_directed_irq_init(void)
union zpci_sic_iib iib = {{0}};
unsigned int cpu;
- zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+ zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
if (!zpci_sbv)
return -ENOMEM;
iib.diib.isc = PCI_ISC;
iib.diib.nr_cpus = num_possible_cpus();
- iib.diib.disb_addr = (u64) zpci_sbv->vector;
- __zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+ iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
GFP_KERNEL);
@@ -400,7 +443,7 @@ static int __init zpci_directed_irq_init(void)
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
AIRQ_IV_DATA |
AIRQ_IV_CACHELINE |
- (!cpu ? AIRQ_IV_ALLOC : 0));
+ (!cpu ? AIRQ_IV_ALLOC : 0), NULL);
if (!zpci_ibv[cpu])
return -ENOMEM;
}
@@ -417,7 +460,7 @@ static int __init zpci_floating_irq_init(void)
if (!zpci_ibv)
return -ENOMEM;
- zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
if (!zpci_sbv)
goto out_free;
@@ -430,6 +473,7 @@ out_free:
int __init zpci_irq_init(void)
{
+ union zpci_sic_iib iib = {{0}};
int rc;
irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
@@ -461,7 +505,7 @@ int __init zpci_irq_init(void)
* Enable floating IRQs (with suppression after one IRQ). When using
* directed IRQs this enables the fallback path.
*/
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
return 0;
out_airq:
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
new file mode 100644
index 000000000000..ff34baf50a3e
--- /dev/null
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2022. All rights reserved.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ */
+#include <linux/kvm_host.h>
+
+struct zpci_kvm_hook zpci_kvm_hook;
+EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 7d42a8794f10..a90499c087f0 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -11,25 +11,108 @@
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/pci.h>
+#include <asm/asm-extable.h>
+#include <asm/pci_io.h>
+#include <asm/pci_debug.h>
-static long get_pfn(unsigned long user_addr, unsigned long access,
- unsigned long *pfn)
+static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
{
- struct vm_area_struct *vma;
- long ret;
+ struct {
+ u64 offset;
+ u8 cc;
+ u8 status;
+ } data = {offset, cc, status};
- down_read(&current->mm->mmap_sem);
- ret = -EINVAL;
- vma = find_vma(current->mm, user_addr);
- if (!vma)
- goto out;
- ret = -EACCES;
- if (!(vma->vm_flags & access))
- goto out;
- ret = follow_pfn(vma, user_addr, pfn);
-out:
- up_read(&current->mm->mmap_sem);
- return ret;
+ zpci_err_hex(&data, sizeof(data));
+}
+
+static inline int __pcistb_mio_inuser(
+ void __iomem *ioaddr, const void __user *src,
+ u64 len, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " sacf 256\n"
+ "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+ "1: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : [cc] "+d" (cc), [len] "+d" (len)
+ : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
+ : "cc", "memory");
+ *status = len >> 24 & 0xff;
+ return cc;
+}
+
+static inline int __pcistg_mio_inuser(
+ void __iomem *ioaddr, const void __user *src,
+ u64 ulen, u8 *status)
+{
+ union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ int cc = -ENXIO;
+ u64 val = 0;
+ u64 cnt = ulen;
+ u8 tmp;
+
+ /*
+ * copy 0 < @len <= 8 bytes from @src into the right most bytes of
+ * a register, then store it to PCI at @ioaddr while in secondary
+ * address space. pcistg then uses the user mappings.
+ */
+ asm volatile (
+ " sacf 256\n"
+ "0: llgc %[tmp],0(%[src])\n"
+ "4: sllg %[val],%[val],8\n"
+ " aghi %[src],1\n"
+ " ogr %[val],%[tmp]\n"
+ " brctg %[cnt],0b\n"
+ "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+ "2: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "3: sacf 768\n"
+ EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ :
+ [src] "+a" (src), [cnt] "+d" (cnt),
+ [val] "+d" (val), [tmp] "=d" (tmp),
+ [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+ :: "cc", "memory");
+ *status = ioaddr_len.odd >> 24 & 0xff;
+
+ /* did we read everything from user memory? */
+ if (!cc && cnt != 0)
+ cc = -EFAULT;
+
+ return cc;
+}
+
+static inline int __memcpy_toio_inuser(void __iomem *dst,
+ const void __user *src, size_t n)
+{
+ int size, rc = 0;
+ u8 status = 0;
+
+ if (!src)
+ return -EINVAL;
+
+ while (n > 0) {
+ size = zpci_get_max_io_size((u64 __force) dst,
+ (u64 __force) src, n,
+ ZPCI_MAX_WRITE_SIZE);
+ if (size > 8) /* main path */
+ rc = __pcistb_mio_inuser(dst, src, size, &status);
+ else
+ rc = __pcistg_mio_inuser(dst, src, size, &status);
+ if (rc)
+ break;
+ src += size;
+ dst += size;
+ n -= size;
+ }
+ if (rc)
+ zpci_err_mmio(rc, status, (__force u64) dst);
+ return rc;
}
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
@@ -38,7 +121,9 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
- unsigned long pfn;
+ struct vm_area_struct *vma;
+ pte_t *ptep;
+ spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -46,6 +131,22 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
+
+ /*
+ * We only support write access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a pfn lookup which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
+ */
+ if (static_branch_likely(&have_mio)) {
+ ret = __memcpy_toio_inuser((void __iomem *) mmio_addr,
+ user_buffer,
+ length);
+ return ret;
+ }
+
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
@@ -53,32 +154,118 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
} else
buf = local_buf;
- ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
+ ret = -EFAULT;
+ if (copy_from_user(buf, user_buffer, length))
+ goto out_free;
+
+ mmap_read_lock(current->mm);
+ ret = -EINVAL;
+ vma = vma_lookup(current->mm, mmio_addr);
+ if (!vma)
+ goto out_unlock_mmap;
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out_unlock_mmap;
+ ret = -EACCES;
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out_unlock_mmap;
+
+ ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
if (ret)
- goto out;
- io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+ goto out_unlock_mmap;
- ret = -EFAULT;
- if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
- goto out;
+ io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ (mmio_addr & ~PAGE_MASK));
- if (copy_from_user(buf, user_buffer, length))
- goto out;
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+ goto out_unlock_pt;
ret = zpci_memcpy_toio(io_addr, buf, length);
-out:
+out_unlock_pt:
+ pte_unmap_unlock(ptep, ptl);
+out_unlock_mmap:
+ mmap_read_unlock(current->mm);
+out_free:
if (buf != local_buf)
kfree(buf);
return ret;
}
+static inline int __pcilg_mio_inuser(
+ void __user *dst, const void __iomem *ioaddr,
+ u64 ulen, u8 *status)
+{
+ union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ u64 cnt = ulen;
+ int shift = ulen * 8;
+ int cc = -ENXIO;
+ u64 val, tmp;
+
+ /*
+ * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in
+ * user space) into a register using pcilg then store these bytes at
+ * user address @dst
+ */
+ asm volatile (
+ " sacf 256\n"
+ "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+ "1: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ " ltr %[cc],%[cc]\n"
+ " jne 4f\n"
+ "2: ahi %[shift],-8\n"
+ " srlg %[tmp],%[val],0(%[shift])\n"
+ "3: stc %[tmp],0(%[dst])\n"
+ "5: aghi %[dst],1\n"
+ " brctg %[cnt],2b\n"
+ "4: sacf 768\n"
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+ :
+ [ioaddr_len] "+&d" (ioaddr_len.pair),
+ [cc] "+d" (cc), [val] "=d" (val),
+ [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+ [shift] "+d" (shift)
+ :: "cc", "memory");
+
+ /* did we write everything to the user space buffer? */
+ if (!cc && cnt != 0)
+ cc = -EFAULT;
+
+ *status = ioaddr_len.odd >> 24 & 0xff;
+ return cc;
+}
+
+static inline int __memcpy_fromio_inuser(void __user *dst,
+ const void __iomem *src,
+ unsigned long n)
+{
+ int size, rc = 0;
+ u8 status;
+
+ while (n > 0) {
+ size = zpci_get_max_io_size((u64 __force) src,
+ (u64 __force) dst, n,
+ ZPCI_MAX_READ_SIZE);
+ rc = __pcilg_mio_inuser(dst, src, size, &status);
+ if (rc)
+ break;
+ src += size;
+ dst += size;
+ n -= size;
+ }
+ if (rc)
+ zpci_err_mmio(rc, status, (__force u64) dst);
+ return rc;
+}
+
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
- unsigned long pfn;
+ struct vm_area_struct *vma;
+ pte_t *ptep;
+ spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -86,29 +273,62 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
+
+ /*
+ * We only support read access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a pfn lookup which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
+ */
+ if (static_branch_likely(&have_mio)) {
+ ret = __memcpy_fromio_inuser(
+ user_buffer, (const void __iomem *)mmio_addr,
+ length);
+ return ret;
+ }
+
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
return -ENOMEM;
- } else
+ } else {
buf = local_buf;
+ }
- ret = get_pfn(mmio_addr, VM_READ, &pfn);
+ mmap_read_lock(current->mm);
+ ret = -EINVAL;
+ vma = vma_lookup(current->mm, mmio_addr);
+ if (!vma)
+ goto out_unlock_mmap;
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out_unlock_mmap;
+ ret = -EACCES;
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out_unlock_mmap;
+
+ ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
if (ret)
- goto out;
- io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+ goto out_unlock_mmap;
+
+ io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ (mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
ret = -EFAULT;
- goto out;
+ goto out_unlock_pt;
}
ret = zpci_memcpy_fromio(buf, io_addr, length);
- if (ret)
- goto out;
- if (copy_to_user(user_buffer, buf, length))
+
+out_unlock_pt:
+ pte_unmap_unlock(ptep, ptl);
+out_unlock_mmap:
+ mmap_read_unlock(current->mm);
+
+ if (!ret && copy_to_user(user_buffer, buf, length))
ret = -EFAULT;
-out:
if (buf != local_buf)
kfree(buf);
return ret;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index a433ba01a317..a0b872b74fe3 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -13,6 +13,8 @@
#include <linux/stat.h>
#include <linux/pci.h>
+#include "../../../drivers/pci/pci.h"
+
#include <asm/sclp.h>
#define zpci_attr(name, fmt, member) \
@@ -31,6 +33,7 @@ zpci_attr(pchid, "0x%04x\n", pchid);
zpci_attr(pfgid, "0x%02x\n", pfgid);
zpci_attr(vfn, "0x%04x\n", vfn);
zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(port, "%d\n", port);
zpci_attr(uid, "0x%x\n", uid);
zpci_attr(segment0, "0x%02x\n", pfip[0]);
zpci_attr(segment1, "0x%02x\n", pfip[1]);
@@ -46,34 +49,87 @@ static ssize_t mio_enabled_show(struct device *dev,
}
static DEVICE_ATTR_RO(mio_enabled);
-static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
+ u8 status;
int ret;
- if (!device_remove_file_self(dev, attr))
- return count;
-
- pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
- ret = zpci_disable_device(zdev);
- if (ret)
- goto error;
+ if (zdev_enabled(zdev)) {
+ ret = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error
+ * state the FH may indicate an enabled device but
+ * disable says the device is already disabled don't
+ * treat it as an error here.
+ */
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ return ret;
+ }
ret = zpci_enable_device(zdev);
if (ret)
- goto error;
+ return ret;
+
+ if (zdev->dma_table) {
+ ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (ret)
+ zpci_disable_device(zdev);
+ }
+ return ret;
+}
- pci_rescan_bus(zdev->bus);
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kernfs_node *kn;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int ret = 0;
+
+ /* Can't use device_remove_self() here as that would lead us to lock
+ * the pci_rescan_remove_lock while holding the device' kernfs lock.
+ * This would create a possible deadlock with disable_slot() which is
+ * not directly protected by the device' kernfs lock but takes it
+ * during the device removal which happens under
+ * pci_rescan_remove_lock.
+ *
+ * This is analogous to sdev_store_delete() in
+ * drivers/scsi/scsi_sysfs.c
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ WARN_ON_ONCE(!kn);
+
+ /* Device needs to be configured and state must not change */
+ mutex_lock(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ goto out;
+
+ /* device_remove_file() serializes concurrent calls ignoring all but
+ * the first
+ */
+ device_remove_file(dev, attr);
+
+ /* A concurrent call to recover_store() may slip between
+ * sysfs_break_active_protection() and the sysfs file removal.
+ * Once it unblocks from pci_lock_rescan_remove() the original pdev
+ * will already be removed.
+ */
+ pci_lock_rescan_remove();
+ if (pci_dev_is_added(pdev)) {
+ ret = _do_recover(pdev, zdev);
+ }
+ pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
- return count;
-
-error:
- pci_unlock_rescan_remove();
- return ret;
+out:
+ mutex_unlock(&zdev->state_lock);
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
+ return ret ? ret : count;
}
static DEVICE_ATTR_WO(recover);
@@ -109,6 +165,45 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
}
static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static ssize_t uid_is_unique_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0);
+}
+static DEVICE_ATTR_RO(uid_is_unique);
+
+#ifndef CONFIG_DMI
+/* analogous to smbios index */
+static ssize_t index_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+ u32 index = ~0;
+
+ if (zpci_unique_uid)
+ index = zdev->uid;
+
+ return sysfs_emit(buf, "%u\n", index);
+}
+static DEVICE_ATTR_RO(index);
+
+static umode_t zpci_index_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ return zpci_unique_uid ? attr->mode : 0;
+}
+
+static struct attribute *zpci_ident_attrs[] = {
+ &dev_attr_index.attr,
+ NULL,
+};
+
+static struct attribute_group zpci_ident_attr_group = {
+ .attrs = zpci_ident_attrs,
+ .is_visible = zpci_index_is_visible,
+};
+#endif
+
static struct bin_attribute *zpci_bin_attrs[] = {
&bin_attr_util_string,
&bin_attr_report_error,
@@ -121,12 +216,15 @@ static struct attribute *zpci_dev_attrs[] = {
&dev_attr_pchid.attr,
&dev_attr_pfgid.attr,
&dev_attr_pft.attr,
+ &dev_attr_port.attr,
&dev_attr_vfn.attr,
&dev_attr_uid.attr,
&dev_attr_recover.attr,
&dev_attr_mio_enabled.attr,
+ &dev_attr_uid_is_unique.attr,
NULL,
};
+
static struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
.bin_attrs = zpci_bin_attrs,
@@ -147,5 +245,8 @@ static struct attribute_group pfip_attr_group = {
const struct attribute_group *zpci_attr_groups[] = {
&zpci_attr_group,
&pfip_attr_group,
+#ifndef CONFIG_DMI
+ &zpci_ident_attr_group,
+#endif
NULL,
};