diff options
author | Alex Williamson <alex.williamson@redhat.com> | 2018-03-21 12:46:21 -0600 |
---|---|---|
committer | Alex Williamson <alex.williamson@redhat.com> | 2018-03-26 13:22:58 -0600 |
commit | 30656177c4080460b936709ff6648f201d7d2c1a (patch) | |
tree | 736aca0117c845222f0fd44cdcd661f431bd810d /drivers/vfio/pci/vfio_pci_rdwr.c | |
parent | 07fd7ef3a1c25a11015bb5821c9c5982f722d4a2 (diff) |
vfio/pci: Add ioeventfd support
The ioeventfd here is actually irqfd handling of an ioeventfd such as
supported in KVM. A user is able to pre-program a device write to
occur when the eventfd triggers. This is yet another instance of
eventfd-irqfd triggering between KVM and vfio. The impetus for this
is high frequency writes to pages which are virtualized in QEMU.
Enabling this near-direct write path for selected registers within
the virtualized page can improve performance and reduce overhead.
Specifically this is initially targeted at NVIDIA graphics cards where
the driver issues a write to an MMIO register within a virtualized
region in order to allow the MSI interrupt to re-trigger.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_rdwr.c')
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 925419e0f459..a6029d0a5524 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -17,6 +17,7 @@ #include <linux/pci.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/vfio.h> #include <linux/vgaarb.h> #include "vfio_pci_private.h" @@ -275,3 +276,113 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, return done; } + +static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) +{ + struct vfio_pci_ioeventfd *ioeventfd = opaque; + + switch (ioeventfd->count) { + case 1: + vfio_iowrite8(ioeventfd->data, ioeventfd->addr); + break; + case 2: + vfio_iowrite16(ioeventfd->data, ioeventfd->addr); + break; + case 4: + vfio_iowrite32(ioeventfd->data, ioeventfd->addr); + break; +#ifdef iowrite64 + case 8: + vfio_iowrite64(ioeventfd->data, ioeventfd->addr); + break; +#endif + } + + return 0; +} + +long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, + uint64_t data, int count, int fd) +{ + struct pci_dev *pdev = vdev->pdev; + loff_t pos = offset & VFIO_PCI_OFFSET_MASK; + int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); + struct vfio_pci_ioeventfd *ioeventfd; + + /* Only support ioeventfds into BARs */ + if (bar > VFIO_PCI_BAR5_REGION_INDEX) + return -EINVAL; + + if (pos + count > pci_resource_len(pdev, bar)) + return -EINVAL; + + /* Disallow ioeventfds working around MSI-X table writes */ + if (bar == vdev->msix_bar && + !(pos + count <= vdev->msix_offset || + pos >= vdev->msix_offset + vdev->msix_size)) + return -EINVAL; + +#ifndef iowrite64 + if (count == 8) + return -EINVAL; +#endif + + ret = vfio_pci_setup_barmap(vdev, bar); + if (ret) + return ret; + + mutex_lock(&vdev->ioeventfds_lock); + + list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { + if (ioeventfd->pos == pos && ioeventfd->bar == bar && + ioeventfd->data == data && ioeventfd->count == count) { + if (fd == -1) { + vfio_virqfd_disable(&ioeventfd->virqfd); + list_del(&ioeventfd->next); + vdev->ioeventfds_nr--; + kfree(ioeventfd); + ret = 0; + } else + ret = -EEXIST; + + goto out_unlock; + } + } + + if (fd < 0) { + ret = -ENODEV; + goto out_unlock; + } + + if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { + ret = -ENOSPC; + goto out_unlock; + } + + ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); + if (!ioeventfd) { + ret = -ENOMEM; + goto out_unlock; + } + + ioeventfd->addr = vdev->barmap[bar] + pos; + ioeventfd->data = data; + ioeventfd->pos = pos; + ioeventfd->bar = bar; + ioeventfd->count = count; + + ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, + NULL, NULL, &ioeventfd->virqfd, fd); + if (ret) { + kfree(ioeventfd); + goto out_unlock; + } + + list_add(&ioeventfd->next, &vdev->ioeventfds_list); + vdev->ioeventfds_nr++; + +out_unlock: + mutex_unlock(&vdev->ioeventfds_lock); + + return ret; +} |