summaryrefslogtreecommitdiff
path: root/arch/s390/pci
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/pci')
-rw-r--r--arch/s390/pci/Makefile5
-rw-r--r--arch/s390/pci/pci.c365
-rw-r--r--arch/s390/pci/pci_bus.c123
-rw-r--r--arch/s390/pci/pci_bus.h15
-rw-r--r--arch/s390/pci/pci_clp.c68
-rw-r--r--arch/s390/pci/pci_debug.c29
-rw-r--r--arch/s390/pci/pci_dma.c732
-rw-r--r--arch/s390/pci/pci_event.c180
-rw-r--r--arch/s390/pci/pci_fixup.c23
-rw-r--r--arch/s390/pci/pci_insn.c122
-rw-r--r--arch/s390/pci/pci_iov.c59
-rw-r--r--arch/s390/pci/pci_iov.h9
-rw-r--r--arch/s390/pci/pci_irq.c130
-rw-r--r--arch/s390/pci/pci_kvm_hook.c2
-rw-r--r--arch/s390/pci/pci_mmio.c160
-rw-r--r--arch/s390/pci/pci_report.c157
-rw-r--r--arch/s390/pci/pci_report.h16
-rw-r--r--arch/s390/pci/pci_sysfs.c122
18 files changed, 1036 insertions, 1281 deletions
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 5ae31ca9dd44..1810e0944a4e 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,8 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
- pci_bus.o pci_kvm_hook.o
+ pci_bus.o pci_kvm_hook.o pci_report.o pci_fixup.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
+obj-$(CONFIG_SYSFS) += pci_sysfs.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ef38b1514c77..93d2c9c780fc 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -16,8 +16,7 @@
* Thomas Klein
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -28,7 +27,10 @@
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/lockdep.h>
+#include <linux/list_sort.h>
+#include <asm/machine.h>
#include <asm/isc.h>
#include <asm/airq.h>
#include <asm/facility.h>
@@ -42,6 +44,7 @@
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
+static DEFINE_MUTEX(zpci_add_remove_lock);
static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
@@ -67,6 +70,15 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
struct airq_iv *zpci_aif_sbv;
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
+void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ if (!zdev)
+ return;
+ mutex_lock(&zpci_add_remove_lock);
+ kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
+ mutex_unlock(&zpci_add_remove_lock);
+}
+
struct zpci_dev *get_zdev_by_fid(u32 fid)
{
struct zpci_dev *tmp, *zdev = NULL;
@@ -122,10 +134,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
struct zpci_fib fib = {0};
u8 cc;
- WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
- fib.pal = limit;
- fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
+ /* Work around off by one in ISM virt device */
+ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+ fib.pal = limit + (1 << 12);
+ else
+ fib.pal = limit;
+ fib.iota = iota;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
if (cc)
@@ -153,7 +168,9 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
+ unsigned long flags;
u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@@ -165,9 +182,17 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
- atomic64_set(&zdev->allocated_pages, 0);
- atomic64_set(&zdev->mapped_pages, 0);
- atomic64_set(&zdev->unmapped_pages, 0);
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+ ctrs = zpci_get_iommu_ctrs(zdev);
+ if (ctrs) {
+ atomic64_set(&ctrs->mapped_pages, 0);
+ atomic64_set(&ctrs->unmapped_pages, 0);
+ atomic64_set(&ctrs->global_rpcits, 0);
+ atomic64_set(&ctrs->sync_map_rpcits, 0);
+ atomic64_set(&ctrs->sync_rpcits, 0);
+ }
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
fib.fmb_addr = virt_to_phys(zdev->fmb);
fib.gd = zdev->gisa;
@@ -238,68 +263,25 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
return 0;
}
-/* combine single writes by using store-block insn */
-void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
-{
- zpci_memcpy_toio(to, from, count);
-}
-
-static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
+void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
+ pgprot_t prot)
{
- unsigned long offset, vaddr;
- struct vm_struct *area;
- phys_addr_t last_addr;
-
- last_addr = addr + size - 1;
- if (!size || last_addr < addr)
- return NULL;
-
+ /*
+ * When PCI MIO instructions are unavailable the "physical" address
+ * encodes a hint for accessing the PCI memory space it represents.
+ * Just pass it unchanged such that ioread/iowrite can decode it.
+ */
if (!static_branch_unlikely(&have_mio))
- return (void __iomem *) addr;
-
- offset = addr & ~PAGE_MASK;
- addr &= PAGE_MASK;
- size = PAGE_ALIGN(size + offset);
- area = get_vm_area(size, VM_IOREMAP);
- if (!area)
- return NULL;
+ return (void __iomem *)phys_addr;
- vaddr = (unsigned long) area->addr;
- if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
- free_vm_area(area);
- return NULL;
- }
- return (void __iomem *) ((unsigned long) area->addr + offset);
-}
-
-void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
-{
- return __ioremap(addr, size, __pgprot(prot));
+ return generic_ioremap_prot(phys_addr, size, prot);
}
EXPORT_SYMBOL(ioremap_prot);
-void __iomem *ioremap(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, PAGE_KERNEL);
-}
-EXPORT_SYMBOL(ioremap);
-
-void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wc);
-
-void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
-{
- return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
-}
-EXPORT_SYMBOL(ioremap_wt);
-
void iounmap(volatile void __iomem *addr)
{
if (static_branch_likely(&have_mio))
- vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+ generic_iounmap(addr);
}
EXPORT_SYMBOL(iounmap);
@@ -544,8 +526,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
return r;
}
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
{
unsigned long addr, size, flags;
struct resource *res;
@@ -581,7 +562,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
return -ENOMEM;
}
zdev->bars[i].res = res;
- pci_add_resource(resources, res);
}
zdev->has_resources = 1;
@@ -590,17 +570,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
{
+ struct resource *res;
int i;
+ pci_lock_rescan_remove();
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (!zdev->bars[i].size || !zdev->bars[i].res)
+ res = zdev->bars[i].res;
+ if (!res)
continue;
+ release_resource(res);
+ pci_bus_remove_resource(zdev->zbus->bus, res);
zpci_free_iomap(zdev, zdev->bars[i].map_idx);
- release_resource(zdev->bars[i].res);
- kfree(zdev->bars[i].res);
+ zdev->bars[i].res = NULL;
+ kfree(res);
}
zdev->has_resources = 0;
+ pci_unlock_rescan_remove();
}
int pcibios_device_add(struct pci_dev *pdev)
@@ -614,8 +600,6 @@ int pcibios_device_add(struct pci_dev *pdev)
if (pdev->is_physfn)
pdev->no_vf_scan = 1;
- pdev->dev.groups = zpci_attr_groups;
- pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -715,6 +699,23 @@ int zpci_enable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_enable_device);
+int zpci_reenable_device(struct zpci_dev *zdev)
+{
+ u8 status;
+ int rc;
+
+ rc = zpci_enable_device(zdev);
+ if (rc)
+ return rc;
+
+ rc = zpci_iommu_register_ioat(zdev, &status);
+ if (rc)
+ zpci_disable_device(zdev);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_reenable_device);
+
int zpci_disable_device(struct zpci_dev *zdev)
{
u32 fh = zdev->fh;
@@ -753,20 +754,20 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
* equivalent to its state during boot when first probing a driver.
* Consequently after reset the PCI function requires re-initialization via the
* common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
- * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * and enabling the function via e.g. pci_enable_device_flags(). The caller
* must guard against concurrent reset attempts.
*
* In most cases this function should not be called directly but through
* pci_reset_function() or pci_reset_bus() which handle the save/restore and
- * locking.
+ * locking - asserted by lockdep.
*
* Return: 0 on success and an error value otherwise
*/
int zpci_hot_reset_device(struct zpci_dev *zdev)
{
- u8 status;
int rc;
+ lockdep_assert_held(&zdev->state_lock);
zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
if (zdev_enabled(zdev)) {
/* Disables device access, DMAs and IRQs (reset state) */
@@ -782,21 +783,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
return rc;
}
- rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
+ rc = zpci_reenable_device(zdev);
- if (zdev->dma_table)
- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status);
- else
- rc = zpci_dma_init_device(zdev);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
-
- return 0;
+ return rc;
}
/**
@@ -805,8 +794,9 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
* @fh: Current Function Handle of the device to be created
* @state: Initial state after creation either Standby or Configured
*
- * Creates a new zpci device and adds it to its, possibly newly created, zbus
- * as well as zpci_list.
+ * Allocates a new struct zpci_dev and queries the platform for its details.
+ * If successful the device can subsequently be added to the zPCI subsystem
+ * using zpci_add_device().
*
* Returns: the zdev on success or an error pointer otherwise
*/
@@ -815,7 +805,6 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
struct zpci_dev *zdev;
int rc;
- zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
if (!zdev)
return ERR_PTR(-ENOMEM);
@@ -830,10 +819,35 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
goto error;
zdev->state = state;
- kref_init(&zdev->kref);
- mutex_init(&zdev->lock);
+ mutex_init(&zdev->state_lock);
+ mutex_init(&zdev->fmb_lock);
mutex_init(&zdev->kzdev_lock);
+ return zdev;
+
+error:
+ zpci_dbg(0, "crt fid:%x, rc:%d\n", fid, rc);
+ kfree(zdev);
+ return ERR_PTR(rc);
+}
+
+/**
+ * zpci_add_device() - Add a previously created zPCI device to the zPCI subsystem
+ * @zdev: The zPCI device to be added
+ *
+ * A struct zpci_dev is added to the zPCI subsystem and to a virtual PCI bus creating
+ * a new one as necessary. A hotplug slot is created and events start to be handled.
+ * If successful from this point on zpci_zdev_get() and zpci_zdev_put() must be used.
+ * If adding the struct zpci_dev fails the device was not added and should be freed.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int zpci_add_device(struct zpci_dev *zdev)
+{
+ int rc;
+
+ mutex_lock(&zpci_add_remove_lock);
+ zpci_dbg(1, "add fid:%x, fh:%x, c:%d\n", zdev->fid, zdev->fh, zdev->state);
rc = zpci_init_iommu(zdev);
if (rc)
goto error;
@@ -842,18 +856,19 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
if (rc)
goto error_destroy_iommu;
+ kref_init(&zdev->kref);
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
-
- return zdev;
+ mutex_unlock(&zpci_add_remove_lock);
+ return 0;
error_destroy_iommu:
zpci_destroy_iommu(zdev);
error:
- zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
- kfree(zdev);
- return ERR_PTR(rc);
+ zpci_dbg(0, "add fid:%x, rc:%d\n", zdev->fid, rc);
+ mutex_unlock(&zpci_add_remove_lock);
+ return rc;
}
bool zpci_is_device_configured(struct zpci_dev *zdev)
@@ -870,32 +885,15 @@ bool zpci_is_device_configured(struct zpci_dev *zdev)
* @fh: The general function handle supplied by the platform
*
* Given a device in the configuration state Configured, enables, scans and
- * adds it to the common code PCI subsystem if possible. If the PCI device is
- * parked because we can not yet create a PCI bus because we have not seen
- * function 0, it is ignored but will be scanned once function 0 appears.
- * If any failure occurs, the zpci_dev is left disabled.
+ * adds it to the common code PCI subsystem if possible. If any failure occurs,
+ * the zpci_dev is left disabled.
*
* Return: 0 on success, or an error code otherwise
*/
int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
{
- int rc;
-
zpci_update_fh(zdev, fh);
- /* the PCI function will be scanned once function 0 appears */
- if (!zdev->zbus->bus)
- return 0;
-
- /* For function 0 on a multi-function bus scan whole bus as we might
- * have to pick up existing functions waiting for it to allow creating
- * the PCI bus
- */
- if (zdev->devfn == 0 && zdev->zbus->multifunction)
- rc = zpci_bus_scan_bus(zdev->zbus);
- else
- rc = zpci_bus_scan_device(zdev);
-
- return rc;
+ return zpci_bus_scan_device(zdev);
}
/**
@@ -912,14 +910,13 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
{
int rc;
+ lockdep_assert_held(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ return 0;
+
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table) {
- rc = zpci_dma_exit_device(zdev);
- if (rc)
- return rc;
- }
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
@@ -936,65 +933,48 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
}
/**
- * zpci_device_reserved() - Mark device as resverved
+ * zpci_device_reserved() - Mark device as reserved
* @zdev: the zpci_dev that was reserved
*
* Handle the case that a given zPCI function was reserved by another system.
- * After a call to this function the zpci_dev can not be found via
- * get_zdev_by_fid() anymore but may still be accessible via existing
- * references though it will not be functional anymore.
*/
void zpci_device_reserved(struct zpci_dev *zdev)
{
- if (zdev->has_hp_slot)
- zpci_exit_slot(zdev);
- /*
- * Remove device from zpci_list as it is going away. This also
- * makes sure we ignore subsequent zPCI events for this device.
- */
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
+ lockdep_assert_held(&zdev->state_lock);
+ /* We may declare the device reserved multiple times */
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ return;
zdev->state = ZPCI_FN_STATE_RESERVED;
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ /*
+ * The underlying device is gone. Allow the zdev to be freed
+ * as soon as all other references are gone by accounting for
+ * the removal as a dropped reference.
+ */
zpci_zdev_put(zdev);
}
void zpci_release_device(struct kref *kref)
{
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
- int ret;
- if (zdev->zbus->bus)
- zpci_bus_remove_device(zdev, false);
+ lockdep_assert_held(&zpci_add_remove_lock);
+ WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
+ /*
+ * We already hold zpci_list_lock thanks to kref_put_lock().
+ * This makes sure no new reference can be taken from the list.
+ */
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
- if (zdev_enabled(zdev))
- zpci_disable_device(zdev);
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
- switch (zdev->state) {
- case ZPCI_FN_STATE_CONFIGURED:
- ret = sclp_pci_deconfigure(zdev->fid);
- zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
- fallthrough;
- case ZPCI_FN_STATE_STANDBY:
- if (zdev->has_hp_slot)
- zpci_exit_slot(zdev);
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
- zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
- fallthrough;
- case ZPCI_FN_STATE_RESERVED:
- if (zdev->has_resources)
- zpci_cleanup_bus_resources(zdev);
- zpci_bus_device_unregister(zdev);
- zpci_destroy_iommu(zdev);
- fallthrough;
- default:
- break;
- }
+ if (zdev->has_resources)
+ zpci_cleanup_bus_resources(zdev);
+
+ zpci_bus_device_unregister(zdev);
+ zpci_destroy_iommu(zdev);
zpci_dbg(3, "rem fid:%x\n", zdev->fid);
kfree_rcu(zdev, rcu);
}
@@ -1111,7 +1091,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+ clear_machine_feature(MFEATURE_PCI_MIO);
return NULL;
}
if (!strcmp(str, "force_floating")) {
@@ -1130,6 +1110,50 @@ bool zpci_is_enabled(void)
return s390_pci_initialized;
}
+static int zpci_cmp_rid(void *priv, const struct list_head *a,
+ const struct list_head *b)
+{
+ struct zpci_dev *za = container_of(a, struct zpci_dev, entry);
+ struct zpci_dev *zb = container_of(b, struct zpci_dev, entry);
+
+ /*
+ * PCI functions without RID available maintain original order
+ * between themselves but sort before those with RID.
+ */
+ if (za->rid == zb->rid)
+ return za->rid_available > zb->rid_available;
+ /*
+ * PCI functions with RID sort by RID ascending.
+ */
+ return za->rid > zb->rid;
+}
+
+static void zpci_add_devices(struct list_head *scan_list)
+{
+ struct zpci_dev *zdev, *tmp;
+
+ list_sort(NULL, scan_list, &zpci_cmp_rid);
+ list_for_each_entry_safe(zdev, tmp, scan_list, entry) {
+ list_del_init(&zdev->entry);
+ if (zpci_add_device(zdev))
+ kfree(zdev);
+ }
+}
+
+int zpci_scan_devices(void)
+{
+ LIST_HEAD(scan_list);
+ int rc;
+
+ rc = clp_scan_pci_devices(&scan_list);
+ if (rc)
+ return rc;
+
+ zpci_add_devices(&scan_list);
+ zpci_bus_scan_busses();
+ return 0;
+}
+
static int __init pci_base_init(void)
{
int rc;
@@ -1142,9 +1166,9 @@ static int __init pci_base_init(void)
return 0;
}
- if (MACHINE_HAS_PCI_MIO) {
+ if (test_machine_feature(MFEATURE_PCI_MIO)) {
static_branch_enable(&have_mio);
- ctl_set_bit(2, 5);
+ system_ctl_set_bit(2, CR2_MIO_ADDRESSING_BIT);
}
rc = zpci_debug_init();
@@ -1159,21 +1183,18 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = zpci_dma_init();
+ rc = zpci_scan_devices();
if (rc)
- goto out_dma;
+ goto out_find;
- rc = clp_scan_pci_devices();
+ rc = zpci_fw_sysfs_init();
if (rc)
goto out_find;
- zpci_bus_scan_busses();
s390_pci_initialized = 1;
return 0;
out_find:
- zpci_dma_exit();
-out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 6a8da1b742ae..72adc8f6e94f 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -7,18 +7,17 @@
*
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/err.h>
-#include <linux/export.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
#include <linux/printk.h>
+#include <linux/dma-direct.h>
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
@@ -41,26 +40,21 @@ static int zpci_nb_devices;
*/
static int zpci_bus_prepare_device(struct zpci_dev *zdev)
{
- struct resource_entry *window, *n;
- struct resource *res;
- int rc;
+ int rc, i;
if (!zdev_enabled(zdev)) {
rc = zpci_enable_device(zdev);
- if (rc)
- return rc;
- rc = zpci_dma_init_device(zdev);
if (rc) {
- zpci_disable_device(zdev);
+ pr_err("Enabling PCI function %08x failed\n", zdev->fid);
return rc;
}
}
if (!zdev->has_resources) {
- zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
- resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
- res = window->res;
- pci_bus_add_resource(zdev->zbus->bus, res, 0);
+ zpci_setup_bus_resources(zdev);
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (zdev->bars[i].res)
+ pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res);
}
}
@@ -87,9 +81,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev)
if (!pdev)
return -ENODEV;
- pci_bus_add_device(pdev);
pci_lock_rescan_remove();
- pci_bus_add_devices(zdev->zbus->bus);
+ pci_bus_add_device(pdev);
pci_unlock_rescan_remove();
return 0;
@@ -132,11 +125,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error)
* @zbus: the zbus to be scanned
*
* Enables and scans all PCI functions on the bus making them available to the
- * common PCI code. If there is no function 0 on the zbus nothing is scanned. If
- * a function does not have a slot yet because it was added to the zbus before
- * function 0 the slot is created. If a PCI function fails to be initialized
- * an error will be returned but attempts will still be made for all other
- * functions on the bus.
+ * common PCI code. If a PCI function fails to be initialized an error will be
+ * returned but attempts will still be made for all other functions on the bus.
*
* Return: 0 on success, an error value otherwise
*/
@@ -179,9 +169,15 @@ void zpci_bus_scan_busses(void)
mutex_unlock(&zbus_list_lock);
}
+static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
+{
+ return !s390_pci_no_rid && zdev->rid_available &&
+ !zdev->vfn;
+}
+
/* zpci_bus_create_pci_bus - Create the PCI bus associated with this zbus
* @zbus: the zbus holding the zdevices
- * @fr: PCI root function that will determine the bus's domain, and bus speeed
+ * @fr: PCI root function that will determine the bus's domain, and bus speed
* @ops: the pci operations
*
* The PCI function @fr determines the domain (its UID), multifunction property
@@ -199,7 +195,7 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
return domain;
zbus->domain_nr = domain;
- zbus->multifunction = fr->rid_available;
+ zbus->multifunction = zpci_bus_is_multifunction_root(fr);
zbus->max_bus_speed = fr->max_bus_speed;
/*
@@ -213,7 +209,6 @@ static int zpci_bus_create_pci_bus(struct zpci_bus *zbus, struct zpci_dev *fr, s
}
zbus->bus = bus;
- pci_bus_add_devices(bus);
return 0;
}
@@ -244,13 +239,15 @@ static void zpci_bus_put(struct zpci_bus *zbus)
kref_put(&zbus->kref, zpci_bus_release);
}
-static struct zpci_bus *zpci_bus_get(int pchid)
+static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
mutex_lock(&zbus_list_lock);
list_for_each_entry(zbus, &zbus_list, bus_next) {
- if (pchid == zbus->pchid) {
+ if (!zbus->multifunction)
+ continue;
+ if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
kref_get(&zbus->kref);
goto out_unlock;
}
@@ -261,7 +258,7 @@ out_unlock:
return zbus;
}
-static struct zpci_bus *zpci_bus_alloc(int pchid)
+static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
{
struct zpci_bus *zbus;
@@ -269,7 +266,8 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
if (!zbus)
return NULL;
- zbus->pchid = pchid;
+ zbus->topo = topo;
+ zbus->topo_is_tid = topo_is_tid;
INIT_LIST_HEAD(&zbus->bus_next);
mutex_lock(&zbus_list_lock);
list_add_tail(&zbus->bus_next, &zbus_list);
@@ -286,10 +284,32 @@ static struct zpci_bus *zpci_bus_alloc(int pchid)
return zbus;
}
+static void pci_dma_range_setup(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ u64 aligned_end, size;
+ dma_addr_t dma_start;
+ int ret;
+
+ dma_start = PAGE_ALIGN(zdev->start_dma);
+ aligned_end = PAGE_ALIGN_DOWN(zdev->end_dma + 1);
+ if (aligned_end >= dma_start)
+ size = aligned_end - dma_start;
+ else
+ size = 0;
+ WARN_ON_ONCE(size == 0);
+
+ ret = dma_direct_set_offset(&pdev->dev, 0, dma_start, size);
+ if (ret)
+ pr_err("Failed to allocate DMA range map for %s\n", pci_name(pdev));
+}
+
void pcibios_bus_add_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
+ pci_dma_range_setup(pdev);
+
/*
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
@@ -304,19 +324,22 @@ static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
int rc = -EINVAL;
+ if (zbus->multifunction) {
+ if (!zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ return rc;
+ }
+ zdev->devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ }
+
if (zbus->function[zdev->devfn]) {
pr_err("devfn %04x is already assigned\n", zdev->devfn);
return rc;
}
-
zdev->zbus = zbus;
zbus->function[zdev->devfn] = zdev;
zpci_nb_devices++;
- if (zbus->multifunction && !zdev->rid_available) {
- WARN_ONCE(1, "rid_available not set for multifunction\n");
- goto error;
- }
rc = zpci_init_slot(zdev);
if (rc)
goto error;
@@ -331,10 +354,25 @@ error:
return rc;
}
+static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_dev *pdev;
+
+ if (!zdev->vfn)
+ return false;
+
+ pdev = zpci_iov_find_parent_pf(zbus, zdev);
+ if (!pdev)
+ return true;
+ pci_dev_put(pdev);
+ return false;
+}
+
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
{
+ bool topo_is_tid = zdev->tid_avail;
struct zpci_bus *zbus = NULL;
- int rc = -EBADF;
+ int topo, rc = -EBADF;
if (zpci_nb_devices == ZPCI_NR_DEVICES) {
pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
@@ -342,14 +380,19 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
return -ENOSPC;
}
- if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
- return -EINVAL;
-
- if (!s390_pci_no_rid && zdev->rid_available)
- zbus = zpci_bus_get(zdev->pchid);
+ topo = topo_is_tid ? zdev->tid : zdev->pchid;
+ zbus = zpci_bus_get(topo, topo_is_tid);
+ /*
+ * An isolated VF gets its own domain/bus even if there exists
+ * a matching domain/bus already
+ */
+ if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) {
+ zpci_bus_put(zbus);
+ zbus = NULL;
+ }
if (!zbus) {
- zbus = zpci_bus_alloc(zdev->pchid);
+ zbus = zpci_bus_alloc(topo, topo_is_tid);
if (!zbus)
return -ENOMEM;
}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e96c9860e064..ae3d7a9159bd 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -6,6 +6,10 @@
* Pierre Morel <pmorel@linux.ibm.com>
*
*/
+#ifndef __S390_PCI_BUS_H
+#define __S390_PCI_BUS_H
+
+#include <linux/pci.h>
int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
void zpci_bus_device_unregister(struct zpci_dev *zdev);
@@ -17,11 +21,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
void zpci_release_device(struct kref *kref);
-static inline void zpci_zdev_put(struct zpci_dev *zdev)
-{
- if (zdev)
- kref_put(&zdev->kref, zpci_release_device);
-}
+
+void zpci_zdev_put(struct zpci_dev *zdev);
static inline void zpci_zdev_get(struct zpci_dev *zdev)
{
@@ -30,8 +31,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
int zpci_alloc_domain(int domain);
void zpci_free_domain(int domain);
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
unsigned int devfn)
@@ -41,3 +41,4 @@ static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
}
+#endif /* __S390_PCI_BUS_H */
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index ee367798e388..177aa0214547 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -6,10 +6,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
-#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
@@ -20,6 +18,7 @@
#include <asm/asm-extable.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+#include <asm/asm.h>
#include <asm/clp.h>
#include <uapi/asm/clp.h>
@@ -52,18 +51,20 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
static inline int clp_get_ilp(unsigned long *ilp)
{
unsigned long mask;
- int cc = 3;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
- : "cc");
+ : CC_OUT(cc, cc), [mask] "=d" (mask), [exc] "+d" (exception)
+ : [cmd] "a" (1)
+ : CC_CLOBBER);
*ilp = mask;
- return cc;
+ return exception ? 3 : CC_TRANSFORM(cc);
}
/*
@@ -72,19 +73,20 @@ static inline int clp_get_ilp(unsigned long *ilp)
static __always_inline int clp_req(void *data, unsigned int lps)
{
struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+ int cc, exception;
u64 ignored;
- int cc = 3;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : CC_OUT(cc, cc), [ign] "=d" (ignored), "+m" (*req), [exc] "+d" (exception)
: [req] "a" (req), [lps] "i" (lps)
- : "cc");
- return cc;
+ : CC_CLOBBER);
+ return exception ? 3 : CC_TRANSFORM(cc);
}
static void *clp_alloc_block(gfp_t gfp_mask)
@@ -108,6 +110,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->version = response->version;
zdev->maxstbl = response->maxstbl;
zdev->dtsm = response->dtsm;
+ zdev->rtr_avail = response->rtr;
switch (response->version) {
case 1:
@@ -162,12 +165,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
zdev->pft = response->pft;
zdev->vfn = response->vfn;
zdev->port = response->port;
+ zdev->fidparm = response->fidparm;
zdev->uid = response->uid;
zdev->fmb_length = sizeof(u32) * response->fmb_len;
- zdev->rid_available = response->rid_avail;
zdev->is_physfn = response->is_physfn;
- if (!s390_pci_no_rid && zdev->rid_available)
- zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
+ zdev->rid_available = response->rid_avail;
+ if (zdev->rid_available)
+ zdev->rid = response->rid;
+ zdev->tid_avail = response->tid_avail;
+ if (zdev->tid_avail)
+ zdev->tid = response->tid;
memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
if (response->util_str_avail) {
@@ -407,6 +414,7 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
static void __clp_add(struct clp_fh_list_entry *entry, void *data)
{
+ struct list_head *scan_list = data;
struct zpci_dev *zdev;
if (!entry->vendor_id)
@@ -417,10 +425,13 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
zpci_zdev_put(zdev);
return;
}
- zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state);
+ if (IS_ERR(zdev))
+ return;
+ list_add_tail(&zdev->entry, scan_list);
}
-int clp_scan_pci_devices(void)
+int clp_scan_pci_devices(struct list_head *scan_list)
{
struct clp_req_rsp_list_pci *rrb;
int rc;
@@ -429,7 +440,7 @@ int clp_scan_pci_devices(void)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, NULL, __clp_add);
+ rc = clp_list_pci(rrb, scan_list, __clp_add);
clp_free_block(rrb);
return rc;
@@ -638,7 +649,7 @@ static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
if (cmd != CLP_SYNC)
return -EINVAL;
- argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ argp = (void __user *)arg;
if (copy_from_user(&req, argp, sizeof(req)))
return -EFAULT;
if (req.r != 0)
@@ -656,8 +667,6 @@ static const struct file_operations clp_misc_fops = {
.open = nonseekable_open,
.release = clp_misc_release,
.unlocked_ioctl = clp_misc_ioctl,
- .compat_ioctl = clp_misc_ioctl,
- .llseek = no_llseek,
};
static struct miscdevice clp_misc_device = {
@@ -666,9 +675,4 @@ static struct miscdevice clp_misc_device = {
.fops = &clp_misc_fops,
};
-static int __init clp_misc_init(void)
-{
- return misc_register(&clp_misc_device);
-}
-
-device_initcall(clp_misc_init);
+builtin_misc_device(clp_misc_device);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98eec13..c7ed7bf254b5 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/seq_file.h>
@@ -53,9 +52,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
- "Allocated pages",
"Mapped pages",
"Unmapped pages",
+ "Global RPCITs",
+ "Sync Map RPCITs",
+ "Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -70,12 +71,22 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
struct zpci_dev *zdev = m->private;
- atomic64_t *counter = &zdev->allocated_pages;
+ struct zpci_iommu_ctrs *ctrs;
+ atomic64_t *counter;
+ unsigned long flags;
int i;
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+ ctrs = zpci_get_iommu_ctrs(m->private);
+ if (!ctrs)
+ goto unlock;
+
+ counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
+unlock:
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
static int pci_perf_show(struct seq_file *m, void *v)
@@ -85,9 +96,9 @@ static int pci_perf_show(struct seq_file *m, void *v)
if (!zdev)
return 0;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
if (!zdev->fmb) {
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
seq_puts(m, "FMB statistics disabled\n");
return 0;
}
@@ -124,7 +135,7 @@ static int pci_perf_show(struct seq_file *m, void *v)
}
pci_sw_counter_show(m);
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return 0;
}
@@ -142,7 +153,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
if (rc)
return rc;
- mutex_lock(&zdev->lock);
+ mutex_lock(&zdev->fmb_lock);
switch (val) {
case 0:
rc = zpci_fmb_disable_device(zdev);
@@ -151,7 +162,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
rc = zpci_fmb_enable_device(zdev);
break;
}
- mutex_unlock(&zdev->lock);
+ mutex_unlock(&zdev->fmb_lock);
return rc ? rc : count;
}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index ea478d11fbd1..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,732 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
- return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(void)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID;
- return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
- kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(void)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID;
- return table;
-}
-
-static void dma_free_page_table(void *table)
-{
- kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
-{
- unsigned long old_rte, rte;
- unsigned long *sto;
-
- rte = READ_ONCE(*rtep);
- if (reg_entry_isvalid(rte)) {
- sto = get_rt_sto(rte);
- } else {
- sto = dma_alloc_cpu_table();
- if (!sto)
- return NULL;
-
- set_rt_sto(&rte, virt_to_phys(sto));
- validate_rt_entry(&rte);
- entry_clr_protected(&rte);
-
- old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
- if (old_rte != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_cpu_table(sto);
- sto = get_rt_sto(old_rte);
- }
- }
- return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *step)
-{
- unsigned long old_ste, ste;
- unsigned long *pto;
-
- ste = READ_ONCE(*step);
- if (reg_entry_isvalid(ste)) {
- pto = get_st_pto(ste);
- } else {
- pto = dma_alloc_page_table();
- if (!pto)
- return NULL;
- set_st_pto(&ste, virt_to_phys(pto));
- validate_st_entry(&ste);
- entry_clr_protected(&ste);
-
- old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
- if (old_ste != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_page_table(pto);
- pto = get_st_pto(old_ste);
- }
- }
- return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
-{
- unsigned long *sto, *pto;
- unsigned int rtx, sx, px;
-
- rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx]);
- if (!sto)
- return NULL;
-
- sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx]);
- if (!pto)
- return NULL;
-
- px = calc_px(dma_addr);
- return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
-{
- unsigned long pte;
-
- pte = READ_ONCE(*ptep);
- if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(&pte);
- } else {
- set_pt_pfaa(&pte, page_addr);
- validate_pt_entry(&pte);
- }
-
- if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(&pte);
- else
- entry_clr_protected(&pte);
-
- xchg(ptep, pte);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- phys_addr_t page_addr = (pa & PAGE_MASK);
- unsigned long *entry;
- int i, rc = 0;
-
- if (!nr_pages)
- return -EINVAL;
-
- if (!zdev->dma_table)
- return -EINVAL;
-
- for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
- if (!entry) {
- rc = -ENOMEM;
- goto undo_cpu_trans;
- }
- dma_update_cpu_trans(entry, page_addr, flags);
- page_addr += PAGE_SIZE;
- dma_addr += PAGE_SIZE;
- }
-
-undo_cpu_trans:
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
- flags = ZPCI_PTE_INVALID;
- while (i-- > 0) {
- page_addr -= PAGE_SIZE;
- dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
- if (!entry)
- break;
- dma_update_cpu_trans(entry, page_addr, flags);
- }
- }
- return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
- size_t size, int flags)
-{
- unsigned long irqflags;
- int ret;
-
- /*
- * With zdev->tlb_refresh == 0, rpcit is not required to establish new
- * translations when previously invalid translation-table entries are
- * validated. With lazy unmap, rpcit is skipped for previously valid
- * entries, but a global rpcit is then required before any address can
- * be re-used, i.e. after each iommu bitmap wrap-around.
- */
- if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
- if (!zdev->tlb_refresh)
- return 0;
- } else {
- if (!s390_iommu_strict)
- return 0;
- }
-
- ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
- if (ret == -ENOMEM && !s390_iommu_strict) {
- /* enable the hypervisor to free some resources */
- if (zpci_refresh_global(zdev))
- goto out;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
- ret = 0;
- }
-out:
- return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- int rc;
-
- rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (rc)
- return rc;
-
- rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
- return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
- unsigned long *sto = get_rt_sto(entry);
- int sx;
-
- for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
- if (reg_entry_isvalid(sto[sx]))
- dma_free_page_table(get_st_pto(sto[sx]));
-
- dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
- int rtx;
-
- if (!table)
- return;
-
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
-
- dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
- unsigned long start, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
- start, size, zdev->start_dma >> PAGE_SHIFT,
- dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
- 0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long offset, flags;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
- if (offset == -1) {
- if (!s390_iommu_strict) {
- /* global flush before DMA addresses are reused */
- if (zpci_refresh_global(zdev))
- goto out_error;
-
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- }
- /* wrap-around */
- offset = __dma_alloc_iommu(dev, 0, size);
- if (offset == -1)
- goto out_error;
- }
- zdev->next_bit = offset + size;
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
- return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long flags, offset;
-
- offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- if (!zdev->iommu_bitmap)
- goto out;
-
- if (s390_iommu_strict)
- bitmap_clear(zdev->iommu_bitmap, offset, size);
- else
- bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
- struct {
- unsigned long rc;
- unsigned long addr;
- } __packed data = {rc, addr};
-
- zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long pa = page_to_phys(page) + offset;
- int flags = ZPCI_PTE_VALID;
- unsigned long nr_pages;
- dma_addr_t dma_addr;
- int ret;
-
- /* This rounds up number of pages based on size and offset */
- nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == DMA_MAPPING_ERROR) {
- ret = -ENOSPC;
- goto out_err;
- }
-
- /* Use rounded up size */
- size = nr_pages * PAGE_SIZE;
-
- if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (ret)
- goto out_free;
-
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
- dma_free_address(dev, dma_addr, nr_pages);
-out_err:
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- int npages, ret;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr = dma_addr & PAGE_MASK;
- ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_PTE_INVALID);
- if (ret) {
- zpci_err("unmap error:\n");
- zpci_err_dma(ret, dma_addr);
- return;
- }
-
- atomic64_add(npages, &zdev->unmapped_pages);
- dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- struct page *page;
- phys_addr_t pa;
- dma_addr_t map;
-
- size = PAGE_ALIGN(size);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- pa = page_to_phys(page);
- map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
- if (dma_mapping_error(dev, map)) {
- __free_pages(page, get_order(size));
- return NULL;
- }
-
- atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
- if (dma_handle)
- *dma_handle = map;
- return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
- s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
-{
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- dma_addr_t dma_addr_base, dma_addr;
- int flags = ZPCI_PTE_VALID;
- struct scatterlist *s;
- phys_addr_t pa = 0;
- int ret;
-
- dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == DMA_MAPPING_ERROR)
- return -ENOMEM;
-
- dma_addr = dma_addr_base;
- if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s));
- ret = __dma_update_trans(zdev, pa, dma_addr,
- s->offset + s->length, flags);
- if (ret)
- goto unmap;
-
- dma_addr += s->offset + s->length;
- }
- ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
- if (ret)
- goto unmap;
-
- *handle = dma_addr_base;
- atomic64_add(nr_pages, &zdev->mapped_pages);
-
- return ret;
-
-unmap:
- dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
- ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, nr_pages);
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s = sg, *start = sg, *dma = sg;
- unsigned int max = dma_get_max_seg_size(dev);
- unsigned int size = s->offset + s->length;
- unsigned int offset = s->offset;
- int count = 0, i, ret;
-
- for (i = 1; i < nr_elements; i++) {
- s = sg_next(s);
-
- s->dma_length = 0;
-
- if (s->offset || (size & ~PAGE_MASK) ||
- size + s->length > max) {
- ret = __s390_dma_map_sg(dev, start, size,
- &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- size = offset = s->offset;
- start = s;
- dma = sg_next(dma);
- count++;
- }
- size += s->length;
- }
- ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- return count + 1;
-unmap:
- for_each_sg(sg, s, count, i)
- s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
- dir, attrs);
-
- return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nr_elements, i) {
- if (s->dma_length)
- s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
- dir, attrs);
- s->dma_address = 0;
- s->dma_length = 0;
- }
-}
-
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
- u8 status;
- int rc;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- spin_lock_init(&zdev->iommu_bitmap_lock);
-
- zdev->dma_table = dma_alloc_cpu_table();
- if (!zdev->dma_table) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * Restrict the iommu bitmap size to the minimum of the following:
- * - s390_iommu_aperture which defaults to high_memory
- * - 3-level pagetable address limit minus start_dma offset
- * - DMA address range allowed by the hardware (clp query pci fn)
- *
- * Also set zdev->end_dma to the actual end address of the usable
- * range, instead of the theoretical maximum as reported by hardware.
- *
- * This limits the number of concurrently usable DMA mappings since
- * for each DMA mapped memory address we need a DMA address including
- * extra DMA addresses for multiple mappings of the same memory address.
- */
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- zdev->iommu_size = min3(s390_iommu_aperture,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
- zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
- if (!zdev->iommu_bitmap) {
- rc = -ENOMEM;
- goto free_dma_table;
- }
- if (!s390_iommu_strict) {
- zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
- if (!zdev->lazy_bitmap) {
- rc = -ENOMEM;
- goto free_bitmap;
- }
-
- }
- if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status)) {
- rc = -EIO;
- goto free_bitmap;
- }
-
- return 0;
-free_bitmap:
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-free_dma_table:
- dma_free_cpu_table(zdev->dma_table);
- zdev->dma_table = NULL;
-out:
- return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
- int cc = 0;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
- if (zdev_enabled(zdev))
- cc = zpci_unregister_ioat(zdev, 0);
- /*
- * cc == 3 indicates the function is gone already. This can happen
- * if the function was deconfigured/disabled suddenly and we have not
- * received a new handle yet.
- */
- if (cc && cc != 3)
- return -EIO;
-
- dma_cleanup_tables(zdev->dma_table);
- zdev->dma_table = NULL;
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
- zdev->next_bit = 0;
- return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
- dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
- ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
- 0, NULL);
- if (!dma_region_table_cache)
- return -ENOMEM;
-
- dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
- ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
- 0, NULL);
- if (!dma_page_table_cache) {
- kmem_cache_destroy(dma_region_table_cache);
- return -ENOMEM;
- }
- return 0;
-}
-
-int __init zpci_dma_init(void)
-{
- s390_iommu_aperture = (u64)virt_to_phys(high_memory);
- if (!s390_iommu_aperture_factor)
- s390_iommu_aperture = ULONG_MAX;
- else
- s390_iommu_aperture *= s390_iommu_aperture_factor;
-
- return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
- kmem_cache_destroy(dma_page_table_cache);
- kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
- .alloc = s390_dma_alloc,
- .free = s390_dma_free,
- .map_sg = s390_dma_map_sg,
- .unmap_sg = s390_dma_unmap_sg,
- .map_page = s390_dma_map_pages,
- .unmap_page = s390_dma_unmap_pages,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
- .free_pages = dma_common_free_pages,
- /* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
- if (!strcmp(str, "strict"))
- s390_iommu_strict = 1;
- return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
- if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
- s390_iommu_aperture_factor = 1;
- return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2eb94..839bd91c056e 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -16,6 +15,7 @@
#include <asm/sclp.h>
#include "pci_bus.h"
+#include "pci_report.h"
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
@@ -53,15 +53,23 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
case PCI_ERS_RESULT_CAN_RECOVER:
case PCI_ERS_RESULT_RECOVERED:
case PCI_ERS_RESULT_NEED_RESET:
+ case PCI_ERS_RESULT_NONE:
return false;
default:
return true;
}
}
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
{
- return zdev->s390_domain;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ bool ret;
+
+ mutex_lock(&zdev->kzdev_lock);
+ ret = !!zdev->kzdev;
+ mutex_unlock(&zdev->kzdev_lock);
+
+ return ret;
}
static bool is_driver_supported(struct pci_driver *driver)
@@ -70,10 +78,6 @@ static bool is_driver_supported(struct pci_driver *driver)
return false;
if (!driver->err_handler->error_detected)
return false;
- if (!driver->err_handler->slot_reset)
- return false;
- if (!driver->err_handler->resume)
- return false;
return true;
}
@@ -83,6 +87,7 @@ static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
ers_res = driver->err_handler->error_detected(pdev, pdev->error_state);
+ pci_uevent_ers(pdev, ers_res);
if (ers_result_indicates_abort(ers_res))
pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
@@ -98,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
@@ -106,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
return PCI_ERS_RESULT_NEED_RESET;
}
- if (driver->err_handler->mmio_enabled) {
+ if (driver->err_handler->mmio_enabled)
ers_res = driver->err_handler->mmio_enabled(pdev);
- if (ers_result_indicates_abort(ers_res)) {
- pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
- pci_name(pdev));
- return ers_res;
- } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
- pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
- return ers_res;
- }
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
+ if (ers_result_indicates_abort(ers_res)) {
+ pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+ pci_name(pdev));
+ return ers_res;
+ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+ return ers_res;
}
pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
@@ -142,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
return ers_res;
}
pdev->error_state = pci_channel_io_normal;
- ers_res = driver->err_handler->slot_reset(pdev);
+
+ if (driver->err_handler->slot_reset)
+ ers_res = driver->err_handler->slot_reset(pdev);
+ else
+ ers_res = PCI_ERS_RESULT_NONE;
+
if (ers_result_indicates_abort(ers_res)) {
pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
return ers_res;
@@ -162,6 +178,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
{
pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ char *status_str = "success";
struct pci_driver *driver;
/*
@@ -169,55 +187,77 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
* is unbound or probed and that userspace can't access its
* configuration space while we perform recovery.
*/
- pci_dev_lock(pdev);
+ device_lock(&pdev->dev);
if (pdev->error_state == pci_channel_io_perm_failure) {
ers_res = PCI_ERS_RESULT_DISCONNECT;
goto out_unlock;
}
pdev->error_state = pci_channel_io_frozen;
- if (is_passed_through(to_zpci(pdev))) {
+ if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
+ status_str = "failed (pass-through)";
goto out_unlock;
}
driver = to_pci_driver(pdev->dev.driver);
if (!is_driver_supported(driver)) {
- if (!driver)
+ if (!driver) {
pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
pci_name(pdev));
- else
+ status_str = "failed (no driver)";
+ } else {
pr_info("%s: The %s driver bound to the device does not support error recovery\n",
pci_name(pdev),
driver->name);
+ status_str = "failed (no driver support)";
+ }
goto out_unlock;
}
ers_res = zpci_event_notify_error_detected(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on detection)";
goto out_unlock;
+ }
- if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+ if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
ers_res = zpci_event_do_error_state_clear(pdev, driver);
- if (ers_result_indicates_abort(ers_res))
+ if (ers_result_indicates_abort(ers_res)) {
+ status_str = "failed (abort on MMIO enable)";
goto out_unlock;
+ }
}
if (ers_res == PCI_ERS_RESULT_NEED_RESET)
ers_res = zpci_event_do_reset(pdev, driver);
+ /*
+ * ers_res can be PCI_ERS_RESULT_NONE either because the driver
+ * decided to return it, indicating that it abstains from voting
+ * on how to recover, or because it didn't implement the callback.
+ * Both cases assume, that if there is nothing else causing a
+ * disconnect, we recovered successfully.
+ */
+ if (ers_res == PCI_ERS_RESULT_NONE)
+ ers_res = PCI_ERS_RESULT_RECOVERED;
+
if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
pr_err("%s: Automatic recovery failed; operator intervention is required\n",
pci_name(pdev));
+ status_str = "failed (driver can't recover)";
goto out_unlock;
}
pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
if (driver->err_handler->resume)
driver->err_handler->resume(pdev);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED);
out_unlock:
- pci_dev_unlock(pdev);
+ device_unlock(&pdev->dev);
+ zpci_report_status(zdev, "recovery", status_str);
return ers_res;
}
@@ -239,7 +279,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
* we will inject the error event and let the guest recover the device
* itself.
*/
- if (is_passed_through(to_zpci(pdev)))
+ if (is_passed_through(pdev))
goto out;
driver = to_pci_driver(pdev->dev.driver);
if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -253,6 +293,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
struct pci_dev *pdev = NULL;
pci_ers_result_t ers_res;
+ u32 fh = 0;
+ int rc;
zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
@@ -260,6 +302,16 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev) {
+ mutex_lock(&zdev->state_lock);
+ rc = clp_refresh_fh(zdev->fid, &fh);
+ if (rc)
+ goto no_pdev;
+ if (!fh || ccdf->fh != fh) {
+ /* Ignore events with stale handles */
+ zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n",
+ ccdf->fid, fh, ccdf->fh);
+ goto no_pdev;
+ }
zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
@@ -272,21 +324,24 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
goto no_pdev;
switch (ccdf->pec) {
- case 0x003a: /* Service Action or Error Recovery Successful */
+ case 0x002a: /* Error event concerns FMB */
+ case 0x002b:
+ case 0x002c:
+ break;
+ case 0x0040: /* Service Action or Error Recovery Failed */
+ case 0x003b:
+ zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+ break;
+ default: /* PCI function left in the error state attempt to recover */
ers_res = zpci_event_attempt_error_recovery(pdev);
if (ers_res != PCI_ERS_RESULT_RECOVERED)
zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
break;
- default:
- /*
- * Mark as frozen not permanently failed because the device
- * could be subsequently recovered by the platform.
- */
- zpci_event_io_failure(pdev, pci_channel_io_frozen);
- break;
}
pci_dev_put(pdev);
no_pdev:
+ if (zdev)
+ mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev);
}
@@ -306,13 +361,27 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
}
+static void zpci_event_reappear(struct zpci_dev *zdev)
+{
+ lockdep_assert_held(&zdev->state_lock);
+ /*
+ * The zdev is in the reserved state. This means that it was presumed to
+ * go away but there are still undropped references. Now, the platform
+ * announced its availability again. Bring back the lingering zdev
+ * to standby. This is safe because we hold a temporary reference
+ * now so that it won't go away. Account for the re-appearance of the
+ * underlying device by incrementing the reference count.
+ */
+ zdev->state = ZPCI_FN_STATE_STANDBY;
+ zpci_zdev_get(zdev);
+ zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+}
+
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -321,29 +390,48 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec);
+
+ if (existing_zdev)
+ mutex_lock(&zdev->state_lock);
+
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
if (IS_ERR(zdev))
break;
+ if (zpci_add_device(zdev)) {
+ kfree(zdev);
+ break;
+ }
} else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
/* the configuration request may be stale */
- if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ else if (zdev->state != ZPCI_FN_STATE_STANDBY)
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
zpci_scan_configured_device(zdev, ccdf->fh);
break;
case 0x0302: /* Reserved -> Standby */
- if (!zdev)
- zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
- else
+ if (!zdev) {
+ zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+ if (IS_ERR(zdev))
+ break;
+ if (zpci_add_device(zdev)) {
+ kfree(zdev);
+ break;
+ }
+ } else {
+ if (zdev->state == ZPCI_FN_STATE_RESERVED)
+ zpci_event_reappear(zdev);
zpci_update_fh(zdev, ccdf->fh);
+ }
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
- /* The event may have been queued before we confirgured
+ /* The event may have been queued before we configured
* the device.
*/
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
@@ -354,7 +442,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0304: /* Configured -> Standby|Reserved */
if (zdev) {
- /* The event may have been queued before we confirgured
+ /* The event may have been queued before we configured
* the device.:
*/
if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
@@ -368,7 +456,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
zpci_remove_reserved_devices();
- clp_scan_pci_devices();
+ zpci_scan_devices();
break;
case 0x0308: /* Standby -> Reserved */
if (!zdev)
@@ -378,8 +466,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
- if (existing_zdev)
+ if (existing_zdev) {
+ mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev);
+ }
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_fixup.c b/arch/s390/pci/pci_fixup.c
new file mode 100644
index 000000000000..35688b645098
--- /dev/null
+++ b/arch/s390/pci/pci_fixup.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exceptions for specific devices,
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ */
+#include <linux/pci.h>
+
+static void zpci_ism_bar_no_mmap(struct pci_dev *pdev)
+{
+ /*
+ * ISM's BAR is special. Drivers written for ISM know
+ * how to handle this but others need to be aware of their
+ * special nature e.g. to prevent attempts to mmap() it.
+ */
+ pdev->non_mappable_bars = 1;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM,
+ PCI_DEVICE_ID_IBM_ISM,
+ zpci_ism_bar_no_mmap);
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 56480be48244..35ceb1bea1c6 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -15,6 +15,7 @@
#include <asm/pci_debug.h>
#include <asm/pci_io.h>
#include <asm/processor.h>
+#include <asm/asm.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
@@ -57,16 +58,16 @@ static inline void zpci_err_insn_addr(int lvl, u8 insn, u8 cc, u8 status,
/* Modify PCI Function Controls */
static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
{
- u8 cc;
+ int cc;
asm volatile (
" .insn rxy,0xe300000000d0,%[req],%[fib]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
- : : "cc");
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [req] "+d" (req), [fib] "+Q" (*fib)
+ :
+ : CC_CLOBBER);
*status = req >> 24 & 0xff;
- return cc;
+ return CC_TRANSFORM(cc);
}
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
@@ -98,17 +99,16 @@ EXPORT_SYMBOL_GPL(zpci_mod_fc);
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
{
union register_pair addr_range = {.even = addr, .odd = range};
- u8 cc;
+ int cc;
asm volatile (
" .insn rre,0xb9d30000,%[fn],%[addr_range]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=d" (cc), [fn] "+d" (fn)
+ CC_IPM(cc)
+ : CC_OUT(cc, cc), [fn] "+d" (fn)
: [addr_range] "d" (addr_range.pair)
- : "cc");
+ : CC_CLOBBER);
*status = fn >> 24 & 0xff;
- return cc;
+ return CC_TRANSFORM(cc);
}
int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
@@ -145,7 +145,7 @@ int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
return -EIO;
asm volatile(
- ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+ ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]"
: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
return 0;
@@ -156,20 +156,23 @@ EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
{
union register_pair req_off = {.even = req, .odd = offset};
- int cc = -ENXIO;
+ int cc, exception;
u64 __data;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d20000,%[data],%[req_off]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data),
- [req_off] "+&d" (req_off.pair) :: "cc");
+ : CC_OUT(cc, cc), [data] "=d" (__data),
+ [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
+ :
+ : CC_CLOBBER);
*status = req_off.even >> 24 & 0xff;
*data = __data;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
@@ -222,20 +225,23 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
{
union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
- int cc = -ENXIO;
+ int cc, exception;
u64 __data;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d60000,%[data],%[ioaddr_len]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [data] "=d" (__data),
- [ioaddr_len] "+&d" (ioaddr_len.pair) :: "cc");
+ : CC_OUT(cc, cc), [data] "=d" (__data),
+ [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
+ :
+ : CC_CLOBBER);
*status = ioaddr_len.odd >> 24 & 0xff;
*data = __data;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
@@ -258,19 +264,20 @@ EXPORT_SYMBOL_GPL(zpci_load);
static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
{
union register_pair req_off = {.even = req, .odd = offset};
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d00000,%[data],%[req_off]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [req_off] "+&d" (req_off.pair)
+ : CC_OUT(cc, cc), [req_off] "+d" (req_off.pair), [exc] "+d" (exception)
: [data] "d" (data)
- : "cc");
+ : CC_CLOBBER);
*status = req_off.even >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int __zpci_store(u64 data, u64 req, u64 offset)
@@ -311,19 +318,20 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
{
union register_pair ioaddr_len = {.even = ioaddr, .odd = len};
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rre,0xb9d40000,%[data],%[ioaddr_len]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
+ : CC_OUT(cc, cc), [ioaddr_len] "+d" (ioaddr_len.pair), [exc] "+d" (exception)
: [data] "d" (data)
- : "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
*status = ioaddr_len.odd >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
@@ -345,19 +353,20 @@ EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
{
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [req] "+d" (req)
+ : CC_OUT(cc, cc), [req] "+d" (req), [exc] "+d" (exception)
: [offset] "d" (offset), [data] "Q" (*data)
- : "cc");
+ : CC_CLOBBER);
*status = req >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int __zpci_store_block(const u64 *data, u64 req, u64 offset)
@@ -398,19 +407,20 @@ static inline int zpci_write_block_fh(volatile void __iomem *dst,
static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
{
- int cc = -ENXIO;
+ int cc, exception;
- asm volatile (
+ exception = 1;
+ asm_inline volatile (
" .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
- "0: ipm %[cc]\n"
- " srl %[cc],28\n"
+ "0: lhi %[exc],0\n"
"1:\n"
+ CC_IPM(cc)
EX_TABLE(0b, 1b)
- : [cc] "+d" (cc), [len] "+d" (len)
+ : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "d" (ioaddr), [data] "Q" (*data)
- : "cc");
+ : CC_CLOBBER);
*status = len >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
int zpci_write_block(volatile void __iomem *dst,
@@ -432,7 +442,7 @@ EXPORT_SYMBOL_GPL(zpci_write_block);
static inline void __pciwb_mio(void)
{
- asm volatile (".insn rre,0xb9d50000,0,0\n");
+ asm volatile (".insn rre,0xb9d50000,0,0");
}
void zpci_barrier(void)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
index ead062bf2b41..13050ce5c3e9 100644
--- a/arch/s390/pci/pci_iov.c
+++ b/arch/s390/pci/pci_iov.c
@@ -7,8 +7,7 @@
*
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -60,18 +59,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in
return 0;
}
-int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+/**
+ * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function
+ * @zbus: The bus that the PCI function is on, or would be added on
+ * @zdev: The PCI function
+ *
+ * Finds the parent PF, if it exists and is configured, of the given PCI function
+ * and increments its refcount. Th PF is searched for on the provided bus so the
+ * caller has to ensure that this is the correct bus to search. This function may
+ * be used before adding the PCI function to a zbus.
+ *
+ * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not
+ * found. If the function is not a VF or has no RequesterID information,
+ * NULL is returned as well.
+ */
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
{
- int i, cand_devfn;
- struct zpci_dev *zdev;
+ int i, vfid, devfn, cand_devfn;
struct pci_dev *pdev;
- int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
- int rc = 0;
if (!zbus->multifunction)
- return 0;
-
- /* If the parent PF for the given VF is also configured in the
+ return NULL;
+ /* Non-VFs and VFs without RID available don't have a parent */
+ if (!zdev->vfn || !zdev->rid_available)
+ return NULL;
+ /* Linux vfid starts at 0 vfn at 1 */
+ vfid = zdev->vfn - 1;
+ devfn = zdev->rid & ZPCI_RID_MASK_DEVFN;
+ /*
+ * If the parent PF for the given VF is also configured in the
* instance, it must be on the same zbus.
* We can then identify the parent PF by checking what
* devfn the VF would have if it belonged to that PF using the PF's
@@ -85,15 +101,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn
if (!pdev)
continue;
cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
- if (cand_devfn == virtfn->devfn) {
- rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
- /* balance pci_get_slot() */
- pci_dev_put(pdev);
- break;
- }
+ if (cand_devfn == devfn)
+ return pdev;
/* balance pci_get_slot() */
pci_dev_put(pdev);
}
}
+ return NULL;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ struct zpci_dev *zdev = to_zpci(virtfn);
+ struct pci_dev *pdev_pf;
+ int rc = 0;
+
+ pdev_pf = zpci_iov_find_parent_pf(zbus, zdev);
+ if (pdev_pf) {
+ /* Linux' vfids start at 0 while zdev->vfn starts at 1 */
+ rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1);
+ pci_dev_put(pdev_pf);
+ }
return rc;
}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
index b2c828003bad..d2c2793eb0f3 100644
--- a/arch/s390/pci/pci_iov.h
+++ b/arch/s390/pci/pci_iov.h
@@ -10,6 +10,8 @@
#ifndef __S390_PCI_IOV_H
#define __S390_PCI_IOV_H
+#include <linux/pci.h>
+
#ifdef CONFIG_PCI_IOV
void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
@@ -17,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev);
int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev);
+
#else /* CONFIG_PCI_IOV */
static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
@@ -26,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v
{
return 0;
}
+
+static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ return NULL;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 4ab0cf829999..2a06df8c2498 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/irq.h>
@@ -107,9 +106,6 @@ static int zpci_set_irq(struct zpci_dev *zdev)
else
rc = zpci_set_airq(zdev);
- if (!rc)
- zdev->irqs_registered = 1;
-
return rc;
}
@@ -123,9 +119,6 @@ static int zpci_clear_irq(struct zpci_dev *zdev)
else
rc = zpci_clear_airq(zdev);
- if (!rc)
- zdev->irqs_registered = 0;
-
return rc;
}
@@ -163,7 +156,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
if (!rescan || irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
+ /* First scan complete, re-enable interrupts. */
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
break;
bit = 0;
@@ -202,7 +195,7 @@ static void zpci_handle_fallback_irq(void)
if (irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
+ /* First scan complete, re-enable interrupts. */
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
cpu = 0;
@@ -247,7 +240,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
if (irqs_on++)
/* End of second scan with interrupts on. */
break;
- /* First scan complete, reenable interrupts. */
+ /* First scan complete, re-enable interrupts. */
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
break;
si = 0;
@@ -268,33 +261,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
}
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
+ unsigned long *bit)
{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs, cpu;
- unsigned long bit;
- struct msi_desc *msi;
- struct msi_msg msg;
- int cpu_addr;
- int rc, irq;
-
- zdev->aisb = -1UL;
- zdev->msi_first_bit = -1U;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
if (irq_delivery == DIRECTED) {
/* Allocate cpu vector bits */
- bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
- if (bit == -1UL)
+ *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (*bit == -1UL)
return -EIO;
} else {
/* Allocate adapter summary indicator bit */
- bit = airq_iv_alloc_bit(zpci_sbv);
- if (bit == -1UL)
+ *bit = airq_iv_alloc_bit(zpci_sbv);
+ if (*bit == -1UL)
return -EIO;
- zdev->aisb = bit;
+ zdev->aisb = *bit;
/* Create adapter interrupt vector */
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
@@ -302,27 +282,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOMEM;
/* Wire up shortcut pointer */
- zpci_ibv[bit] = zdev->aibv;
+ zpci_ibv[*bit] = zdev->aibv;
/* Each function has its own interrupt vector */
- bit = 0;
+ *bit = 0;
}
+ return 0;
+}
- /* Request MSI interrupts */
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ unsigned long bit;
+ int cpu_addr;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+ if (msi_vecs < nvec) {
+ pr_info("%s requested %d irqs, allocate system limit of %d",
+ pci_name(pdev), nvec, zdev->max_msi);
+ }
+
+ rc = __alloc_airq(zdev, msi_vecs, &bit);
+ if (rc < 0)
+ return rc;
+
+ /*
+ * Request MSI interrupts:
+ * When using MSI, nvec_used interrupt sources and their irq
+ * descriptors are controlled through one msi descriptor.
+ * Thus the outer loop over msi descriptors shall run only once,
+ * while two inner loops iterate over the interrupt vectors.
+ * When using MSI-X, each interrupt vector/irq descriptor
+ * is bound to exactly one msi descriptor (nvec_used is one).
+ * So the inner loops are executed once, while the outer iterates
+ * over the MSI-X descriptors.
+ */
hwirq = bit;
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
- rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
- (irq_delivery == DIRECTED) ?
- msi->affinity : NULL);
+ irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
+ irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_percpu_irq);
+
+ for (i = 0; i < irqs_per_msi; i++) {
+ rc = irq_set_msi_desc_off(irq, i, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
+ handle_percpu_irq);
+ }
+
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
if (msi->affinity)
@@ -335,31 +354,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_lo |= (cpu_addr << 8);
for_each_possible_cpu(cpu) {
- airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zpci_ibv[cpu],
+ hwirq + i, irq + i);
}
} else {
msg.address_lo = zdev->msi_addr & 0xffffffff;
- airq_iv_set_data(zdev->aibv, hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
}
msg.address_hi = zdev->msi_addr >> 32;
pci_write_msi_msg(irq, &msg);
- hwirq++;
+ hwirq += irqs_per_msi;
}
zdev->msi_first_bit = bit;
- zdev->msi_nr_irqs = msi_vecs;
+ zdev->msi_nr_irqs = hwirq - bit;
rc = zpci_set_irq(zdev);
if (rc)
return rc;
- return (msi_vecs == nvec) ? 0 : msi_vecs;
+ return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
+ unsigned int i;
int rc;
/* Disable interrupts */
@@ -369,8 +392,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
+ for (i = 0; i < msi->nvec_used; i++) {
+ irq_set_msi_desc(msi->irq + i, NULL);
+ irq_free_desc(msi->irq + i);
+ }
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;
@@ -395,8 +420,7 @@ bool arch_restore_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
- if (!zdev->irqs_registered)
- zpci_set_irq(zdev);
+ zpci_set_irq(zdev);
return true;
}
@@ -410,7 +434,7 @@ static void __init cpu_enable_directed_irq(void *unused)
union zpci_sic_iib iib = {{0}};
union zpci_sic_iib ziib = {{0}};
- iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+ iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
diff --git a/arch/s390/pci/pci_kvm_hook.c b/arch/s390/pci/pci_kvm_hook.c
index ff34baf50a3e..df5b25dbe9ca 100644
--- a/arch/s390/pci/pci_kvm_hook.c
+++ b/arch/s390/pci/pci_kvm_hook.c
@@ -5,7 +5,9 @@
* Copyright (C) IBM Corp. 2022. All rights reserved.
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
*/
+
#include <linux/kvm_host.h>
+#include <linux/export.h>
struct zpci_kvm_hook zpci_kvm_hook;
EXPORT_SYMBOL_GPL(zpci_kvm_hook);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 588089332931..51e7a28af899 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -14,6 +14,7 @@
#include <asm/asm-extable.h>
#include <asm/pci_io.h>
#include <asm/pci_debug.h>
+#include <asm/asm.h>
static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
{
@@ -30,20 +31,24 @@ static inline int __pcistb_mio_inuser(
void __iomem *ioaddr, const void __user *src,
u64 len, u8 *status)
{
- int cc = -ENXIO;
-
- asm volatile (
- " sacf 256\n"
- "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
- "1: ipm %[cc]\n"
- " srl %[cc],28\n"
- "2: sacf 768\n"
+ int cc, exception;
+ bool sacf_flag;
+
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+ "1: lhi %[exc],0\n"
+ "2: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : [cc] "+d" (cc), [len] "+d" (len)
+ : CC_OUT(cc, cc), [len] "+d" (len), [exc] "+d" (exception)
: [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
- : "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = len >> 24 & 0xff;
- return cc;
+ return exception ? -ENXIO : CC_TRANSFORM(cc);
}
static inline int __pcistg_mio_inuser(
@@ -51,7 +56,8 @@ static inline int __pcistg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
- int cc = -ENXIO;
+ int cc, exception;
+ bool sacf_flag;
u64 val = 0;
u64 cnt = ulen;
u8 tmp;
@@ -61,25 +67,29 @@ static inline int __pcistg_mio_inuser(
* a register, then store it to PCI at @ioaddr while in secondary
* address space. pcistg then uses the user mappings.
*/
- asm volatile (
- " sacf 256\n"
- "0: llgc %[tmp],0(%[src])\n"
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: llgc %[tmp],0(%[src])\n"
"4: sllg %[val],%[val],8\n"
- " aghi %[src],1\n"
- " ogr %[val],%[tmp]\n"
- " brctg %[cnt],0b\n"
- "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
- "2: ipm %[cc]\n"
- " srl %[cc],28\n"
- "3: sacf 768\n"
+ " aghi %[src],1\n"
+ " ogr %[val],%[tmp]\n"
+ " brctg %[cnt],0b\n"
+ "1: .insn rre,0xb9d40000,%[val],%[ioaddr_len]\n"
+ "2: lhi %[exc],0\n"
+ "3: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 3b) EX_TABLE(4b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ : [src] "+a" (src), [cnt] "+d" (cnt),
+ [val] "+d" (val), [tmp] "=d" (tmp), [exc] "+d" (exception),
+ CC_OUT(cc, cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
:
- [src] "+a" (src), [cnt] "+d" (cnt),
- [val] "+d" (val), [tmp] "=d" (tmp),
- [cc] "+d" (cc), [ioaddr_len] "+&d" (ioaddr_len.pair)
- :: "cc", "memory");
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
*status = ioaddr_len.odd >> 24 & 0xff;
+ cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we read everything from user memory? */
if (!cc && cnt != 0)
cc = -EFAULT;
@@ -97,9 +107,9 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
return -EINVAL;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) dst,
- (u64 __force) src, n,
- ZPCI_MAX_WRITE_SIZE);
+ size = zpci_get_max_io_size((u64 __force) dst,
+ (u64 __force) src, n,
+ ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = __pcistb_mio_inuser(dst, src, size, &status);
else
@@ -118,12 +128,11 @@ static inline int __memcpy_toio_inuser(void __iomem *dst,
SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
const void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -169,11 +178,17 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ args.address = mmio_addr;
+ args.vma = vma;
+ ret = follow_pfnmap_start(&args);
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -181,7 +196,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
ret = zpci_memcpy_toio(io_addr, buf, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
out_free:
@@ -195,9 +210,10 @@ static inline int __pcilg_mio_inuser(
u64 ulen, u8 *status)
{
union register_pair ioaddr_len = {.even = (u64 __force)ioaddr, .odd = ulen};
+ bool sacf_flag;
u64 cnt = ulen;
int shift = ulen * 8;
- int cc = -ENXIO;
+ int cc, exception;
u64 val, tmp;
/*
@@ -205,27 +221,34 @@ static inline int __pcilg_mio_inuser(
* user space) into a register using pcilg then store these bytes at
* user address @dst
*/
- asm volatile (
- " sacf 256\n"
- "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
- "1: ipm %[cc]\n"
- " srl %[cc],28\n"
- " ltr %[cc],%[cc]\n"
- " jne 4f\n"
- "2: ahi %[shift],-8\n"
- " srlg %[tmp],%[val],0(%[shift])\n"
- "3: stc %[tmp],0(%[dst])\n"
+ exception = 1;
+ sacf_flag = enable_sacf_uaccess();
+ asm_inline volatile (
+ " sacf 256\n"
+ "0: .insn rre,0xb9d60000,%[val],%[ioaddr_len]\n"
+ "1: lhi %[exc],0\n"
+ " jne 4f\n"
+ "2: ahi %[shift],-8\n"
+ " srlg %[tmp],%[val],0(%[shift])\n"
+ "3: stc %[tmp],0(%[dst])\n"
"5: aghi %[dst],1\n"
- " brctg %[cnt],2b\n"
- "4: sacf 768\n"
+ " brctg %[cnt],2b\n"
+ /*
+ * Use xr to clear exc and set condition code to zero
+ * to ensure flag output is correct for this branch.
+ */
+ " xr %[exc],%[exc]\n"
+ "4: sacf 768\n"
+ CC_IPM(cc)
EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) EX_TABLE(5b, 4b)
+ : [ioaddr_len] "+&d" (ioaddr_len.pair), [exc] "+d" (exception),
+ CC_OUT(cc, cc), [val] "=d" (val),
+ [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+ [shift] "+a" (shift)
:
- [ioaddr_len] "+&d" (ioaddr_len.pair),
- [cc] "+d" (cc), [val] "=d" (val),
- [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
- :: "cc", "memory");
-
+ : CC_CLOBBER_LIST("memory"));
+ disable_sacf_uaccess(sacf_flag);
+ cc = exception ? -ENXIO : CC_TRANSFORM(cc);
/* did we write everything to the user space buffer? */
if (!cc && cnt != 0)
cc = -EFAULT;
@@ -242,9 +265,9 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
u8 status;
while (n > 0) {
- size = zpci_get_max_write_size((u64 __force) src,
- (u64 __force) dst, n,
- ZPCI_MAX_READ_SIZE);
+ size = zpci_get_max_io_size((u64 __force) src,
+ (u64 __force) dst, n,
+ ZPCI_MAX_READ_SIZE);
rc = __pcilg_mio_inuser(dst, src, size, &status);
if (rc)
break;
@@ -260,12 +283,11 @@ static inline int __memcpy_fromio_inuser(void __user *dst,
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
+ struct follow_pfnmap_args args = { };
u8 local_buf[64];
void __iomem *io_addr;
void *buf;
struct vm_area_struct *vma;
- pte_t *ptep;
- spinlock_t *ptl;
long ret;
if (!zpci_is_enabled())
@@ -305,14 +327,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out_unlock_mmap;
ret = -EACCES;
- if (!(vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_READ))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ args.vma = vma;
+ args.address = mmio_addr;
+ ret = follow_pfnmap_start(&args);
+ if (ret) {
+ fixup_user_fault(current->mm, mmio_addr, 0, NULL);
+ ret = follow_pfnmap_start(&args);
+ if (ret)
+ goto out_unlock_mmap;
+ }
- io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
+ io_addr = (void __iomem *)((args.pfn << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
@@ -322,7 +350,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
ret = zpci_memcpy_fromio(buf, io_addr, length);
out_unlock_pt:
- pte_unmap_unlock(ptep, ptl);
+ follow_pfnmap_end(&args);
out_unlock_mmap:
mmap_read_unlock(current->mm);
diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c
new file mode 100644
index 000000000000..7030f7052926
--- /dev/null
+++ b/arch/s390/pci/pci_report.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define pr_fmt(fmt) "zpci: " fmt
+
+#include <linux/kernel.h>
+#include <linux/sprintf.h>
+#include <linux/pci.h>
+
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/pci_debug.h>
+
+#include "pci_report.h"
+
+#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714
+
+struct zpci_report_error_data {
+ u64 timestamp;
+ u64 err_log_id;
+ char log_data[];
+} __packed;
+
+#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb))
+#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data))
+
+struct zpci_report_error {
+ struct zpci_report_error_header header;
+ struct zpci_report_error_data data;
+} __packed;
+
+static const char *zpci_state_str(pci_channel_state_t state)
+{
+ switch (state) {
+ case pci_channel_io_normal:
+ return "normal";
+ case pci_channel_io_frozen:
+ return "frozen";
+ case pci_channel_io_perm_failure:
+ return "permanent-failure";
+ default:
+ return "invalid";
+ };
+}
+
+static int debug_log_header_fn(debug_info_t *id, struct debug_view *view,
+ int area, debug_entry_t *entry, char *out_buf,
+ size_t out_buf_size)
+{
+ unsigned long sec, usec;
+ unsigned int level;
+ char *except_str;
+ int rc = 0;
+
+ level = entry->level;
+ sec = entry->clock;
+ usec = do_div(sec, USEC_PER_SEC);
+
+ if (entry->exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ",
+ sec, usec, level, except_str,
+ entry->cpu);
+ return rc;
+}
+
+static int debug_prolog_header(debug_info_t *id, struct debug_view *view,
+ char *out_buf, size_t out_buf_size)
+{
+ return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n");
+}
+
+static struct debug_view debug_log_view = {
+ "pci_msg_log",
+ &debug_prolog_header,
+ &debug_log_header_fn,
+ &debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+
+/**
+ * zpci_report_status - Report the status of operations on a PCI device
+ * @zdev: The PCI device for which to report status
+ * @operation: A string representing the operation reported
+ * @status: A string representing the status of the operation
+ *
+ * This function creates a human readable report about an operation such as
+ * PCI device recovery and forwards this to the platform using the SCLP Write
+ * Event Data mechanism. Besides the operation and status strings the report
+ * also contains additional information about the device deemed useful for
+ * debug such as the currently bound device driver, if any, and error state.
+ * Additionally a string representation of pci_debug_msg_id, or as much as fits,
+ * is also included.
+ *
+ * Return: 0 on success an error code < 0 otherwise.
+ */
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status)
+{
+ struct zpci_report_error *report;
+ struct pci_driver *driver = NULL;
+ struct pci_dev *pdev = NULL;
+ char *buf, *end;
+ int ret;
+
+ if (!zdev || !zdev->zbus)
+ return -ENODEV;
+
+ /* Protected virtualization hosts get nothing from us */
+ if (prot_virt_guest)
+ return -ENODATA;
+
+ report = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!report)
+ return -ENOMEM;
+ if (zdev->zbus->bus)
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+ if (pdev)
+ driver = to_pci_driver(pdev->dev.driver);
+
+ buf = report->data.log_data;
+ end = report->data.log_data + ZPCI_REPORT_DATA_SIZE;
+ buf += scnprintf(buf, end - buf, "report: %s\n", operation);
+ buf += scnprintf(buf, end - buf, "status: %s\n", status);
+ buf += scnprintf(buf, end - buf, "state: %s\n",
+ (pdev) ? zpci_state_str(pdev->error_state) : "n/a");
+ buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a");
+ ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true);
+ if (ret < 0)
+ pr_err("Reading PCI debug messages failed with code %d\n", ret);
+ else
+ buf += ret;
+
+ report->header.version = 1;
+ report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG;
+ report->header.length = buf - (char *)&report->data;
+ report->data.timestamp = ktime_get_clocktai_seconds();
+ report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT;
+
+ ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid);
+ if (ret)
+ pr_err("Reporting PCI status failed with code %d\n", ret);
+ else
+ pr_info("Reported PCI device status\n");
+
+ free_page((unsigned long)report);
+
+ return ret;
+}
diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h
new file mode 100644
index 000000000000..e08003d51a97
--- /dev/null
+++ b/arch/s390/pci/pci_report.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2024
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+#ifndef __S390_PCI_REPORT_H
+#define __S390_PCI_REPORT_H
+
+struct zpci_dev;
+
+int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status);
+
+#endif /* __S390_PCI_REPORT_H */
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c047..c2444a23e26c 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -6,8 +6,7 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define KMSG_COMPONENT "zpci"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "zpci: " fmt
#include <linux/kernel.h>
#include <linux/stat.h>
@@ -23,7 +22,7 @@ static ssize_t name##_show(struct device *dev, \
{ \
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); \
\
- return sprintf(buf, fmt, zdev->member); \
+ return sysfs_emit(buf, fmt, zdev->member); \
} \
static DEVICE_ATTR_RO(name)
@@ -34,21 +33,49 @@ zpci_attr(pfgid, "0x%02x\n", pfgid);
zpci_attr(vfn, "0x%04x\n", vfn);
zpci_attr(pft, "0x%02x\n", pft);
zpci_attr(port, "%d\n", port);
+zpci_attr(fidparm, "0x%02x\n", fidparm);
zpci_attr(uid, "0x%x\n", uid);
zpci_attr(segment0, "0x%02x\n", pfip[0]);
zpci_attr(segment1, "0x%02x\n", pfip[1]);
zpci_attr(segment2, "0x%02x\n", pfip[2]);
zpci_attr(segment3, "0x%02x\n", pfip[3]);
+#define ZPCI_FW_ATTR_RO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
static ssize_t mio_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- return sprintf(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
+ return sysfs_emit(buf, zpci_use_mio(zdev) ? "1\n" : "0\n");
}
static DEVICE_ATTR_RO(mio_enabled);
+static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
+{
+ int ret;
+
+ pci_stop_and_remove_bus_device(pdev);
+ if (zdev_enabled(zdev)) {
+ ret = zpci_disable_device(zdev);
+ /*
+ * Due to a z/VM vs LPAR inconsistency in the error
+ * state the FH may indicate an enabled device but
+ * disable says the device is already disabled don't
+ * treat it as an error here.
+ */
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ return ret;
+ }
+
+ ret = zpci_reenable_device(zdev);
+
+ return ret;
+}
+
static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -69,6 +96,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/
kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
WARN_ON_ONCE(!kn);
+
+ /* Device needs to be configured and state must not change */
+ mutex_lock(&zdev->state_lock);
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ goto out;
+
/* device_remove_file() serializes concurrent calls ignoring all but
* the first
*/
@@ -81,39 +114,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
- pci_stop_and_remove_bus_device(pdev);
- if (zdev->dma_table) {
- ret = zpci_dma_exit_device(zdev);
- if (ret)
- goto out;
- }
-
- if (zdev_enabled(zdev)) {
- ret = zpci_disable_device(zdev);
- /*
- * Due to a z/VM vs LPAR inconsistency in the error
- * state the FH may indicate an enabled device but
- * disable says the device is already disabled don't
- * treat it as an error here.
- */
- if (ret == -EINVAL)
- ret = 0;
- if (ret)
- goto out;
- }
-
- ret = zpci_enable_device(zdev);
- if (ret)
- goto out;
- ret = zpci_dma_init_device(zdev);
- if (ret) {
- zpci_disable_device(zdev);
- goto out;
- }
- pci_rescan_bus(zdev->zbus->bus);
+ ret = _do_recover(pdev, zdev);
}
-out:
+ pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
+
+out:
+ mutex_unlock(&zdev->state_lock);
if (kn)
sysfs_unbreak_active_protection(kn);
return ret ? ret : count;
@@ -121,7 +128,7 @@ out:
static DEVICE_ATTR_WO(recover);
static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -131,10 +138,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
return memory_read_from_buffer(buf, count, &off, zdev->util_str,
sizeof(zdev->util_str));
}
-static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
struct zpci_report_error_header *report = (void *) buf;
@@ -150,7 +157,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
return ret ? ret : count;
}
-static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
static ssize_t uid_is_unique_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -159,7 +166,13 @@ static ssize_t uid_is_unique_show(struct device *dev,
}
static DEVICE_ATTR_RO(uid_is_unique);
-#ifndef CONFIG_DMI
+static ssize_t uid_checking_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", zpci_unique_uid ? 1 : 0);
+}
+ZPCI_FW_ATTR_RO(uid_checking);
+
/* analogous to smbios index */
static ssize_t index_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -185,13 +198,12 @@ static struct attribute *zpci_ident_attrs[] = {
NULL,
};
-static struct attribute_group zpci_ident_attr_group = {
+const struct attribute_group zpci_ident_attr_group = {
.attrs = zpci_ident_attrs,
.is_visible = zpci_index_is_visible,
};
-#endif
-static struct bin_attribute *zpci_bin_attrs[] = {
+static const struct bin_attribute *const zpci_bin_attrs[] = {
&bin_attr_util_string,
&bin_attr_report_error,
NULL,
@@ -204,6 +216,7 @@ static struct attribute *zpci_dev_attrs[] = {
&dev_attr_pfgid.attr,
&dev_attr_pft.attr,
&dev_attr_port.attr,
+ &dev_attr_fidparm.attr,
&dev_attr_vfn.attr,
&dev_attr_uid.attr,
&dev_attr_recover.attr,
@@ -212,7 +225,7 @@ static struct attribute *zpci_dev_attrs[] = {
NULL,
};
-static struct attribute_group zpci_attr_group = {
+const struct attribute_group zpci_attr_group = {
.attrs = zpci_dev_attrs,
.bin_attrs = zpci_bin_attrs,
};
@@ -224,16 +237,23 @@ static struct attribute *pfip_attrs[] = {
&dev_attr_segment3.attr,
NULL,
};
-static struct attribute_group pfip_attr_group = {
+
+const struct attribute_group pfip_attr_group = {
.name = "pfip",
.attrs = pfip_attrs,
};
-const struct attribute_group *zpci_attr_groups[] = {
- &zpci_attr_group,
- &pfip_attr_group,
-#ifndef CONFIG_DMI
- &zpci_ident_attr_group,
-#endif
+static struct attribute *clp_fw_attrs[] = {
+ &uid_checking_attr.attr,
NULL,
};
+
+static struct attribute_group clp_fw_attr_group = {
+ .name = "clp",
+ .attrs = clp_fw_attrs,
+};
+
+int __init __zpci_fw_sysfs_init(void)
+{
+ return sysfs_create_group(firmware_kobj, &clp_fw_attr_group);
+}