summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci/vfio_pci_rdwr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_rdwr.c')
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c419
1 files changed, 343 insertions, 76 deletions
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 210db24d2204..6192788c8ba3 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* VFIO PCI I/O Port & MMIO access
*
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Derived from original vfio:
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
* Author: Tom Lyon, pugs@cisco.com
@@ -17,9 +14,117 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/vfio.h>
#include <linux/vgaarb.h>
-
-#include "vfio_pci_private.h"
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "vfio_pci_priv.h"
+
+#ifdef __LITTLE_ENDIAN
+#define vfio_ioread64 ioread64
+#define vfio_iowrite64 iowrite64
+#define vfio_ioread32 ioread32
+#define vfio_iowrite32 iowrite32
+#define vfio_ioread16 ioread16
+#define vfio_iowrite16 iowrite16
+#else
+#define vfio_ioread64 ioread64be
+#define vfio_iowrite64 iowrite64be
+#define vfio_ioread32 ioread32be
+#define vfio_iowrite32 iowrite32be
+#define vfio_ioread16 ioread16be
+#define vfio_iowrite16 iowrite16be
+#endif
+#define vfio_ioread8 ioread8
+#define vfio_iowrite8 iowrite8
+
+#define VFIO_IOWRITE(size) \
+int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \
+ bool test_mem, u##size val, void __iomem *io) \
+{ \
+ if (test_mem) { \
+ down_read(&vdev->memory_lock); \
+ if (!__vfio_pci_memory_enabled(vdev)) { \
+ up_read(&vdev->memory_lock); \
+ return -EIO; \
+ } \
+ } \
+ \
+ vfio_iowrite##size(val, io); \
+ \
+ if (test_mem) \
+ up_read(&vdev->memory_lock); \
+ \
+ return 0; \
+} \
+EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
+
+VFIO_IOWRITE(8)
+VFIO_IOWRITE(16)
+VFIO_IOWRITE(32)
+VFIO_IOWRITE(64)
+
+#define VFIO_IOREAD(size) \
+int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
+ bool test_mem, u##size *val, void __iomem *io) \
+{ \
+ if (test_mem) { \
+ down_read(&vdev->memory_lock); \
+ if (!__vfio_pci_memory_enabled(vdev)) { \
+ up_read(&vdev->memory_lock); \
+ return -EIO; \
+ } \
+ } \
+ \
+ *val = vfio_ioread##size(io); \
+ \
+ if (test_mem) \
+ up_read(&vdev->memory_lock); \
+ \
+ return 0; \
+} \
+EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
+
+VFIO_IOREAD(8)
+VFIO_IOREAD(16)
+VFIO_IOREAD(32)
+VFIO_IOREAD(64)
+
+#define VFIO_IORDWR(size) \
+static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
+ bool iswrite, bool test_mem, \
+ void __iomem *io, char __user *buf, \
+ loff_t off, size_t *filled) \
+{ \
+ u##size val; \
+ int ret; \
+ \
+ if (iswrite) { \
+ if (copy_from_user(&val, buf, sizeof(val))) \
+ return -EFAULT; \
+ \
+ ret = vfio_pci_core_iowrite##size(vdev, test_mem, \
+ val, io + off); \
+ if (ret) \
+ return ret; \
+ } else { \
+ ret = vfio_pci_core_ioread##size(vdev, test_mem, \
+ &val, io + off); \
+ if (ret) \
+ return ret; \
+ \
+ if (copy_to_user(buf, &val, sizeof(val))) \
+ return -EFAULT; \
+ } \
+ \
+ *filled = sizeof(val); \
+ return 0; \
+} \
+
+VFIO_IORDWR(8)
+VFIO_IORDWR(16)
+VFIO_IORDWR(32)
+VFIO_IORDWR(64)
/*
* Read or write from an __iomem region (MMIO or I/O port) with an excluded
@@ -27,11 +132,13 @@
* reads with -1. This is intended for handling MSI-X vector tables and
* leftover space for ROM BARs.
*/
-static ssize_t do_io_rw(void __iomem *io, char __user *buf,
- loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite)
+ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
+ void __iomem *io, char __user *buf,
+ loff_t off, size_t count, size_t x_start,
+ size_t x_end, bool iswrite)
{
ssize_t done = 0;
+ int ret;
while (count) {
size_t fillable, filled;
@@ -43,54 +150,31 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
else
fillable = 0;
- if (fillable >= 4 && !(off % 4)) {
- __le32 val;
-
- if (iswrite) {
- if (copy_from_user(&val, buf, 4))
- return -EFAULT;
-
- iowrite32(le32_to_cpu(val), io + off);
- } else {
- val = cpu_to_le32(ioread32(io + off));
+ if (fillable >= 8 && !(off % 8)) {
+ ret = vfio_pci_iordwr64(vdev, iswrite, test_mem,
+ io, buf, off, &filled);
+ if (ret)
+ return ret;
- if (copy_to_user(buf, &val, 4))
- return -EFAULT;
- }
+ } else
+ if (fillable >= 4 && !(off % 4)) {
+ ret = vfio_pci_iordwr32(vdev, iswrite, test_mem,
+ io, buf, off, &filled);
+ if (ret)
+ return ret;
- filled = 4;
} else if (fillable >= 2 && !(off % 2)) {
- __le16 val;
-
- if (iswrite) {
- if (copy_from_user(&val, buf, 2))
- return -EFAULT;
-
- iowrite16(le16_to_cpu(val), io + off);
- } else {
- val = cpu_to_le16(ioread16(io + off));
-
- if (copy_to_user(buf, &val, 2))
- return -EFAULT;
- }
+ ret = vfio_pci_iordwr16(vdev, iswrite, test_mem,
+ io, buf, off, &filled);
+ if (ret)
+ return ret;
- filled = 2;
} else if (fillable) {
- u8 val;
+ ret = vfio_pci_iordwr8(vdev, iswrite, test_mem,
+ io, buf, off, &filled);
+ if (ret)
+ return ret;
- if (iswrite) {
- if (copy_from_user(&val, buf, 1))
- return -EFAULT;
-
- iowrite8(val, io + off);
- } else {
- val = ioread8(io + off);
-
- if (copy_to_user(buf, &val, 1))
- return -EFAULT;
- }
-
- filled = 1;
} else {
/* Fill reads with -1, drop writes */
filled = min(count, (size_t)(x_end - off));
@@ -112,8 +196,34 @@ static ssize_t do_io_rw(void __iomem *io, char __user *buf,
return done;
}
+EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
+
+int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+ void __iomem *io;
+
+ if (vdev->barmap[bar])
+ return 0;
+
+ ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
+ if (ret)
+ return ret;
+
+ io = pci_iomap(pdev, bar, 0);
+ if (!io) {
+ pci_release_selected_regions(pdev, 1 << bar);
+ return -ENOMEM;
+ }
+
+ vdev->barmap[bar] = io;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
-ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
+ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
struct pci_dev *pdev = vdev->pdev;
@@ -122,13 +232,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
size_t x_start = 0, x_end = 0;
resource_size_t end;
void __iomem *io;
+ struct resource *res = &vdev->pdev->resource[bar];
ssize_t done;
- if (!pci_resource_start(pdev, bar))
+ if (pci_resource_start(pdev, bar))
+ end = pci_resource_len(pdev, bar);
+ else if (bar == PCI_ROM_RESOURCE && pdev->rom && pdev->romlen)
+ end = roundup_pow_of_two(pdev->romlen);
+ else
return -EINVAL;
- end = pci_resource_len(pdev, bar);
-
if (pos >= end)
return -EINVAL;
@@ -140,44 +253,49 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
* excluded range at the end of the actual ROM. This makes
* filling large ROM BARs much faster.
*/
- io = pci_map_rom(pdev, &x_start);
+ if (pci_resource_start(pdev, bar)) {
+ io = pci_map_rom(pdev, &x_start);
+ } else {
+ io = ioremap(pdev->rom, pdev->romlen);
+ x_start = pdev->romlen;
+ }
if (!io)
return -ENOMEM;
x_end = end;
- } else if (!vdev->barmap[bar]) {
- int ret;
-
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
-
- io = pci_iomap(pdev, bar, 0);
- if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -ENOMEM;
+ } else {
+ int ret = vfio_pci_core_setup_barmap(vdev, bar);
+ if (ret) {
+ done = ret;
+ goto out;
}
- vdev->barmap[bar] = io;
- } else
io = vdev->barmap[bar];
+ }
if (bar == vdev->msix_bar) {
x_start = vdev->msix_offset;
x_end = vdev->msix_offset + vdev->msix_size;
}
- done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
+ done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
+ count, x_start, x_end, iswrite);
if (done >= 0)
*ppos += done;
- if (bar == PCI_ROM_RESOURCE)
- pci_unmap_rom(pdev, io);
+ if (bar == PCI_ROM_RESOURCE) {
+ if (pci_resource_start(pdev, bar))
+ pci_unmap_rom(pdev, io);
+ else
+ iounmap(io);
+ }
+out:
return done;
}
-ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
+#ifdef CONFIG_VFIO_PCI_VGA
+ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
int ret;
@@ -190,10 +308,13 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
if (!vdev->has_vga)
return -EINVAL;
- switch (pos) {
+ if (pos > 0xbfffful)
+ return -EINVAL;
+
+ switch ((u32)pos) {
case 0xa0000 ... 0xbffff:
count = min(count, (size_t)(0xc0000 - pos));
- iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1);
+ iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
off = pos - 0xa0000;
rsrc = VGA_RSRC_LEGACY_MEM;
is_ioport = false;
@@ -225,7 +346,13 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
return ret;
}
- done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
+ /*
+ * VGA MMIO is a legacy, non-BAR resource that hopefully allows
+ * probing, so we don't currently worry about access in relation
+ * to the memory enable bit in the command register.
+ */
+ done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
+ 0, 0, iswrite);
vga_put(vdev->pdev, rsrc);
@@ -236,3 +363,143 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
return done;
}
+#endif
+
+static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
+ bool test_mem)
+{
+ switch (ioeventfd->count) {
+ case 1:
+ vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
+ ioeventfd->data, ioeventfd->addr);
+ break;
+ case 2:
+ vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
+ ioeventfd->data, ioeventfd->addr);
+ break;
+ case 4:
+ vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
+ ioeventfd->data, ioeventfd->addr);
+ break;
+ case 8:
+ vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
+ ioeventfd->data, ioeventfd->addr);
+ break;
+ }
+}
+
+static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
+{
+ struct vfio_pci_ioeventfd *ioeventfd = opaque;
+ struct vfio_pci_core_device *vdev = ioeventfd->vdev;
+
+ if (ioeventfd->test_mem) {
+ if (!down_read_trylock(&vdev->memory_lock))
+ return 1; /* Lock contended, use thread */
+ if (!__vfio_pci_memory_enabled(vdev)) {
+ up_read(&vdev->memory_lock);
+ return 0;
+ }
+ }
+
+ vfio_pci_ioeventfd_do_write(ioeventfd, false);
+
+ if (ioeventfd->test_mem)
+ up_read(&vdev->memory_lock);
+
+ return 0;
+}
+
+static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
+{
+ struct vfio_pci_ioeventfd *ioeventfd = opaque;
+
+ vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
+}
+
+int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
+ uint64_t data, int count, int fd)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
+ int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
+ struct vfio_pci_ioeventfd *ioeventfd;
+
+ /* Only support ioeventfds into BARs */
+ if (bar > VFIO_PCI_BAR5_REGION_INDEX)
+ return -EINVAL;
+
+ if (pos + count > pci_resource_len(pdev, bar))
+ return -EINVAL;
+
+ /* Disallow ioeventfds working around MSI-X table writes */
+ if (bar == vdev->msix_bar &&
+ !(pos + count <= vdev->msix_offset ||
+ pos >= vdev->msix_offset + vdev->msix_size))
+ return -EINVAL;
+
+ if (count == 8)
+ return -EINVAL;
+
+ ret = vfio_pci_core_setup_barmap(vdev, bar);
+ if (ret)
+ return ret;
+
+ mutex_lock(&vdev->ioeventfds_lock);
+
+ list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
+ if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
+ ioeventfd->data == data && ioeventfd->count == count) {
+ if (fd == -1) {
+ vfio_virqfd_disable(&ioeventfd->virqfd);
+ list_del(&ioeventfd->next);
+ vdev->ioeventfds_nr--;
+ kfree(ioeventfd);
+ ret = 0;
+ } else
+ ret = -EEXIST;
+
+ goto out_unlock;
+ }
+ }
+
+ if (fd < 0) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
+ ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
+ if (!ioeventfd) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ioeventfd->vdev = vdev;
+ ioeventfd->addr = vdev->barmap[bar] + pos;
+ ioeventfd->data = data;
+ ioeventfd->pos = pos;
+ ioeventfd->bar = bar;
+ ioeventfd->count = count;
+ ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
+
+ ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
+ vfio_pci_ioeventfd_thread, NULL,
+ &ioeventfd->virqfd, fd);
+ if (ret) {
+ kfree(ioeventfd);
+ goto out_unlock;
+ }
+
+ list_add(&ioeventfd->next, &vdev->ioeventfds_list);
+ vdev->ioeventfds_nr++;
+
+out_unlock:
+ mutex_unlock(&vdev->ioeventfds_lock);
+
+ return ret;
+}