summaryrefslogtreecommitdiff
path: root/drivers/xen/privcmd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen/privcmd.c')
-rw-r--r--drivers/xen/privcmd.c724
1 files changed, 711 insertions, 13 deletions
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 1edf45ee9890..f52a457b302d 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -9,11 +9,17 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/eventfd.h>
+#include <linux/file.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
#include <linux/string.h>
+#include <linux/workqueue.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -24,22 +30,29 @@
#include <linux/seq_file.h>
#include <linux/miscdevice.h>
#include <linux/moduleparam.h>
+#include <linux/virtio_mmio.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include <xen/xen.h>
+#include <xen/events.h>
#include <xen/privcmd.h>
#include <xen/interface/xen.h>
#include <xen/interface/memory.h>
#include <xen/interface/hvm/dm_op.h>
+#include <xen/interface/hvm/ioreq.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/balloon.h>
+#ifdef CONFIG_XEN_ACPI
+#include <xen/acpi.h>
+#endif
#include "privcmd.h"
+MODULE_DESCRIPTION("Xen hypercall passthrough driver");
MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1)
@@ -258,7 +271,7 @@ static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
struct mmap_gfn_state state;
/* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
return -ENOSYS;
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
@@ -340,7 +353,7 @@ static int mmap_batch_fn(void *data, int nr, void *state)
struct page **cur_pages = NULL;
int ret;
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ if (!xen_pv_domain())
cur_pages = &pages[st->index];
BUG_ON(nr < 0);
@@ -522,7 +535,7 @@ static long privcmd_ioctl_mmap_batch(
ret = -EINVAL;
goto out_unlock;
}
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (!xen_pv_domain()) {
ret = alloc_empty_pages(vma, nr_pages);
if (ret < 0)
goto out_unlock;
@@ -766,8 +779,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
goto out;
}
- if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
- xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && !xen_pv_domain()) {
unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE);
struct page **pages;
unsigned int i;
@@ -777,6 +789,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
goto out;
pages = vma->vm_private_data;
+
for (i = 0; i < kdata.num; i++) {
xen_pfn_t pfn =
page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
@@ -797,8 +810,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
if (rc)
goto out;
- if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
- xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) && !xen_pv_domain()) {
rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT);
} else {
unsigned int domid =
@@ -833,6 +845,667 @@ out:
return rc;
}
+static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata)
+{
+#if defined(CONFIG_XEN_ACPI)
+ int rc;
+ struct privcmd_pcidev_get_gsi kdata;
+
+ if (copy_from_user(&kdata, udata, sizeof(kdata)))
+ return -EFAULT;
+
+ rc = xen_acpi_get_gsi_from_sbdf(kdata.sbdf);
+ if (rc < 0)
+ return rc;
+
+ kdata.gsi = rc;
+ if (copy_to_user(udata, &kdata, sizeof(kdata)))
+ return -EFAULT;
+
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
+#ifdef CONFIG_XEN_PRIVCMD_EVENTFD
+/* Irqfd support */
+static struct workqueue_struct *irqfd_cleanup_wq;
+static DEFINE_SPINLOCK(irqfds_lock);
+DEFINE_STATIC_SRCU(irqfds_srcu);
+static LIST_HEAD(irqfds_list);
+
+struct privcmd_kernel_irqfd {
+ struct xen_dm_op_buf xbufs;
+ domid_t dom;
+ bool error;
+ struct eventfd_ctx *eventfd;
+ struct work_struct shutdown;
+ wait_queue_entry_t wait;
+ struct list_head list;
+ poll_table pt;
+};
+
+static void irqfd_deactivate(struct privcmd_kernel_irqfd *kirqfd)
+{
+ lockdep_assert_held(&irqfds_lock);
+
+ list_del_init(&kirqfd->list);
+ queue_work(irqfd_cleanup_wq, &kirqfd->shutdown);
+}
+
+static void irqfd_shutdown(struct work_struct *work)
+{
+ struct privcmd_kernel_irqfd *kirqfd =
+ container_of(work, struct privcmd_kernel_irqfd, shutdown);
+ u64 cnt;
+
+ /* Make sure irqfd has been initialized in assign path */
+ synchronize_srcu(&irqfds_srcu);
+
+ eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt);
+ eventfd_ctx_put(kirqfd->eventfd);
+ kfree(kirqfd);
+}
+
+static void irqfd_inject(struct privcmd_kernel_irqfd *kirqfd)
+{
+ u64 cnt;
+ long rc;
+
+ eventfd_ctx_do_read(kirqfd->eventfd, &cnt);
+
+ xen_preemptible_hcall_begin();
+ rc = HYPERVISOR_dm_op(kirqfd->dom, 1, &kirqfd->xbufs);
+ xen_preemptible_hcall_end();
+
+ /* Don't repeat the error message for consecutive failures */
+ if (rc && !kirqfd->error) {
+ pr_err("Failed to configure irq for guest domain: %d\n",
+ kirqfd->dom);
+ }
+
+ kirqfd->error = rc;
+}
+
+static int
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
+{
+ struct privcmd_kernel_irqfd *kirqfd =
+ container_of(wait, struct privcmd_kernel_irqfd, wait);
+ __poll_t flags = key_to_poll(key);
+
+ if (flags & EPOLLIN)
+ irqfd_inject(kirqfd);
+
+ if (flags & EPOLLHUP) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&irqfds_lock, flags);
+ irqfd_deactivate(kirqfd);
+ spin_unlock_irqrestore(&irqfds_lock, flags);
+ }
+
+ return 0;
+}
+
+static void
+irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
+{
+ struct privcmd_kernel_irqfd *kirqfd =
+ container_of(pt, struct privcmd_kernel_irqfd, pt);
+
+ add_wait_queue_priority(wqh, &kirqfd->wait);
+}
+
+static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd)
+{
+ struct privcmd_kernel_irqfd *kirqfd, *tmp;
+ unsigned long flags;
+ __poll_t events;
+ void *dm_op;
+ int ret, idx;
+
+ CLASS(fd, f)(irqfd->fd);
+
+ kirqfd = kzalloc(sizeof(*kirqfd) + irqfd->size, GFP_KERNEL);
+ if (!kirqfd)
+ return -ENOMEM;
+ dm_op = kirqfd + 1;
+
+ if (copy_from_user(dm_op, u64_to_user_ptr(irqfd->dm_op), irqfd->size)) {
+ ret = -EFAULT;
+ goto error_kfree;
+ }
+
+ kirqfd->xbufs.size = irqfd->size;
+ set_xen_guest_handle(kirqfd->xbufs.h, dm_op);
+ kirqfd->dom = irqfd->dom;
+ INIT_WORK(&kirqfd->shutdown, irqfd_shutdown);
+
+ if (fd_empty(f)) {
+ ret = -EBADF;
+ goto error_kfree;
+ }
+
+ kirqfd->eventfd = eventfd_ctx_fileget(fd_file(f));
+ if (IS_ERR(kirqfd->eventfd)) {
+ ret = PTR_ERR(kirqfd->eventfd);
+ goto error_kfree;
+ }
+
+ /*
+ * Install our own custom wake-up handling so we are notified via a
+ * callback whenever someone signals the underlying eventfd.
+ */
+ init_waitqueue_func_entry(&kirqfd->wait, irqfd_wakeup);
+ init_poll_funcptr(&kirqfd->pt, irqfd_poll_func);
+
+ spin_lock_irqsave(&irqfds_lock, flags);
+
+ list_for_each_entry(tmp, &irqfds_list, list) {
+ if (kirqfd->eventfd == tmp->eventfd) {
+ ret = -EBUSY;
+ spin_unlock_irqrestore(&irqfds_lock, flags);
+ goto error_eventfd;
+ }
+ }
+
+ idx = srcu_read_lock(&irqfds_srcu);
+ list_add_tail(&kirqfd->list, &irqfds_list);
+ spin_unlock_irqrestore(&irqfds_lock, flags);
+
+ /*
+ * Check if there was an event already pending on the eventfd before we
+ * registered, and trigger it as if we didn't miss it.
+ */
+ events = vfs_poll(fd_file(f), &kirqfd->pt);
+ if (events & EPOLLIN)
+ irqfd_inject(kirqfd);
+
+ srcu_read_unlock(&irqfds_srcu, idx);
+ return 0;
+
+error_eventfd:
+ eventfd_ctx_put(kirqfd->eventfd);
+
+error_kfree:
+ kfree(kirqfd);
+ return ret;
+}
+
+static int privcmd_irqfd_deassign(struct privcmd_irqfd *irqfd)
+{
+ struct privcmd_kernel_irqfd *kirqfd;
+ struct eventfd_ctx *eventfd;
+ unsigned long flags;
+
+ eventfd = eventfd_ctx_fdget(irqfd->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ spin_lock_irqsave(&irqfds_lock, flags);
+
+ list_for_each_entry(kirqfd, &irqfds_list, list) {
+ if (kirqfd->eventfd == eventfd) {
+ irqfd_deactivate(kirqfd);
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&irqfds_lock, flags);
+
+ eventfd_ctx_put(eventfd);
+
+ /*
+ * Block until we know all outstanding shutdown jobs have completed so
+ * that we guarantee there will not be any more interrupts once this
+ * deassign function returns.
+ */
+ flush_workqueue(irqfd_cleanup_wq);
+
+ return 0;
+}
+
+static long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+ struct privcmd_data *data = file->private_data;
+ struct privcmd_irqfd irqfd;
+
+ if (copy_from_user(&irqfd, udata, sizeof(irqfd)))
+ return -EFAULT;
+
+ /* No other flags should be set */
+ if (irqfd.flags & ~PRIVCMD_IRQFD_FLAG_DEASSIGN)
+ return -EINVAL;
+
+ /* If restriction is in place, check the domid matches */
+ if (data->domid != DOMID_INVALID && data->domid != irqfd.dom)
+ return -EPERM;
+
+ if (irqfd.flags & PRIVCMD_IRQFD_FLAG_DEASSIGN)
+ return privcmd_irqfd_deassign(&irqfd);
+
+ return privcmd_irqfd_assign(&irqfd);
+}
+
+static int privcmd_irqfd_init(void)
+{
+ irqfd_cleanup_wq = alloc_workqueue("privcmd-irqfd-cleanup", 0, 0);
+ if (!irqfd_cleanup_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void privcmd_irqfd_exit(void)
+{
+ struct privcmd_kernel_irqfd *kirqfd, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&irqfds_lock, flags);
+
+ list_for_each_entry_safe(kirqfd, tmp, &irqfds_list, list)
+ irqfd_deactivate(kirqfd);
+
+ spin_unlock_irqrestore(&irqfds_lock, flags);
+
+ destroy_workqueue(irqfd_cleanup_wq);
+}
+
+/* Ioeventfd Support */
+#define QUEUE_NOTIFY_VQ_MASK 0xFFFF
+
+static DEFINE_MUTEX(ioreq_lock);
+static LIST_HEAD(ioreq_list);
+
+/* per-eventfd structure */
+struct privcmd_kernel_ioeventfd {
+ struct eventfd_ctx *eventfd;
+ struct list_head list;
+ u64 addr;
+ unsigned int addr_len;
+ unsigned int vq;
+};
+
+/* per-guest CPU / port structure */
+struct ioreq_port {
+ int vcpu;
+ unsigned int port;
+ struct privcmd_kernel_ioreq *kioreq;
+};
+
+/* per-guest structure */
+struct privcmd_kernel_ioreq {
+ domid_t dom;
+ unsigned int vcpus;
+ u64 uioreq;
+ struct ioreq *ioreq;
+ spinlock_t lock; /* Protects ioeventfds list */
+ struct list_head ioeventfds;
+ struct list_head list;
+ struct ioreq_port ports[] __counted_by(vcpus);
+};
+
+static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id)
+{
+ struct ioreq_port *port = dev_id;
+ struct privcmd_kernel_ioreq *kioreq = port->kioreq;
+ struct ioreq *ioreq = &kioreq->ioreq[port->vcpu];
+ struct privcmd_kernel_ioeventfd *kioeventfd;
+ unsigned int state = STATE_IOREQ_READY;
+
+ if (ioreq->state != STATE_IOREQ_READY ||
+ ioreq->type != IOREQ_TYPE_COPY || ioreq->dir != IOREQ_WRITE)
+ return IRQ_NONE;
+
+ /*
+ * We need a barrier, smp_mb(), here to ensure reads are finished before
+ * `state` is updated. Since the lock implementation ensures that
+ * appropriate barrier will be added anyway, we can avoid adding
+ * explicit barrier here.
+ *
+ * Ideally we don't need to update `state` within the locks, but we do
+ * that here to avoid adding explicit barrier.
+ */
+
+ spin_lock(&kioreq->lock);
+ ioreq->state = STATE_IOREQ_INPROCESS;
+
+ list_for_each_entry(kioeventfd, &kioreq->ioeventfds, list) {
+ if (ioreq->addr == kioeventfd->addr + VIRTIO_MMIO_QUEUE_NOTIFY &&
+ ioreq->size == kioeventfd->addr_len &&
+ (ioreq->data & QUEUE_NOTIFY_VQ_MASK) == kioeventfd->vq) {
+ eventfd_signal(kioeventfd->eventfd);
+ state = STATE_IORESP_READY;
+ break;
+ }
+ }
+ spin_unlock(&kioreq->lock);
+
+ /*
+ * We need a barrier, smp_mb(), here to ensure writes are finished
+ * before `state` is updated. Since the lock implementation ensures that
+ * appropriate barrier will be added anyway, we can avoid adding
+ * explicit barrier here.
+ */
+
+ ioreq->state = state;
+
+ if (state == STATE_IORESP_READY) {
+ notify_remote_via_evtchn(port->port);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+static void ioreq_free(struct privcmd_kernel_ioreq *kioreq)
+{
+ struct ioreq_port *ports = kioreq->ports;
+ int i;
+
+ lockdep_assert_held(&ioreq_lock);
+
+ list_del(&kioreq->list);
+
+ for (i = kioreq->vcpus - 1; i >= 0; i--)
+ unbind_from_irqhandler(irq_from_evtchn(ports[i].port), &ports[i]);
+
+ kfree(kioreq);
+}
+
+static
+struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd)
+{
+ struct privcmd_kernel_ioreq *kioreq;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct page **pages;
+ unsigned int *ports;
+ int ret, size, i;
+
+ lockdep_assert_held(&ioreq_lock);
+
+ size = struct_size(kioreq, ports, ioeventfd->vcpus);
+ kioreq = kzalloc(size, GFP_KERNEL);
+ if (!kioreq)
+ return ERR_PTR(-ENOMEM);
+
+ kioreq->dom = ioeventfd->dom;
+ kioreq->vcpus = ioeventfd->vcpus;
+ kioreq->uioreq = ioeventfd->ioreq;
+ spin_lock_init(&kioreq->lock);
+ INIT_LIST_HEAD(&kioreq->ioeventfds);
+
+ /* The memory for ioreq server must have been mapped earlier */
+ mmap_write_lock(mm);
+ vma = find_vma(mm, (unsigned long)ioeventfd->ioreq);
+ if (!vma) {
+ pr_err("Failed to find vma for ioreq page!\n");
+ mmap_write_unlock(mm);
+ ret = -EFAULT;
+ goto error_kfree;
+ }
+
+ pages = vma->vm_private_data;
+ kioreq->ioreq = (struct ioreq *)(page_to_virt(pages[0]));
+ mmap_write_unlock(mm);
+
+ ports = memdup_array_user(u64_to_user_ptr(ioeventfd->ports),
+ kioreq->vcpus, sizeof(*ports));
+ if (IS_ERR(ports)) {
+ ret = PTR_ERR(ports);
+ goto error_kfree;
+ }
+
+ for (i = 0; i < kioreq->vcpus; i++) {
+ kioreq->ports[i].vcpu = i;
+ kioreq->ports[i].port = ports[i];
+ kioreq->ports[i].kioreq = kioreq;
+
+ ret = bind_evtchn_to_irqhandler_lateeoi(ports[i],
+ ioeventfd_interrupt, IRQF_SHARED, "ioeventfd",
+ &kioreq->ports[i]);
+ if (ret < 0)
+ goto error_unbind;
+ }
+
+ kfree(ports);
+
+ list_add_tail(&kioreq->list, &ioreq_list);
+
+ return kioreq;
+
+error_unbind:
+ while (--i >= 0)
+ unbind_from_irqhandler(irq_from_evtchn(ports[i]), &kioreq->ports[i]);
+
+ kfree(ports);
+error_kfree:
+ kfree(kioreq);
+ return ERR_PTR(ret);
+}
+
+static struct privcmd_kernel_ioreq *
+get_ioreq(struct privcmd_ioeventfd *ioeventfd, struct eventfd_ctx *eventfd)
+{
+ struct privcmd_kernel_ioreq *kioreq;
+ unsigned long flags;
+
+ list_for_each_entry(kioreq, &ioreq_list, list) {
+ struct privcmd_kernel_ioeventfd *kioeventfd;
+
+ /*
+ * kioreq fields can be accessed here without a lock as they are
+ * never updated after being added to the ioreq_list.
+ */
+ if (kioreq->uioreq != ioeventfd->ioreq) {
+ continue;
+ } else if (kioreq->dom != ioeventfd->dom ||
+ kioreq->vcpus != ioeventfd->vcpus) {
+ pr_err("Invalid ioeventfd configuration mismatch, dom (%u vs %u), vcpus (%u vs %u)\n",
+ kioreq->dom, ioeventfd->dom, kioreq->vcpus,
+ ioeventfd->vcpus);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Look for a duplicate eventfd for the same guest */
+ spin_lock_irqsave(&kioreq->lock, flags);
+ list_for_each_entry(kioeventfd, &kioreq->ioeventfds, list) {
+ if (eventfd == kioeventfd->eventfd) {
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+ return ERR_PTR(-EBUSY);
+ }
+ }
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+
+ return kioreq;
+ }
+
+ /* Matching kioreq isn't found, allocate a new one */
+ return alloc_ioreq(ioeventfd);
+}
+
+static void ioeventfd_free(struct privcmd_kernel_ioeventfd *kioeventfd)
+{
+ list_del(&kioeventfd->list);
+ eventfd_ctx_put(kioeventfd->eventfd);
+ kfree(kioeventfd);
+}
+
+static int privcmd_ioeventfd_assign(struct privcmd_ioeventfd *ioeventfd)
+{
+ struct privcmd_kernel_ioeventfd *kioeventfd;
+ struct privcmd_kernel_ioreq *kioreq;
+ unsigned long flags;
+ int ret;
+
+ /* Check for range overflow */
+ if (ioeventfd->addr + ioeventfd->addr_len < ioeventfd->addr)
+ return -EINVAL;
+
+ /* Vhost requires us to support length 1, 2, 4, and 8 */
+ if (!(ioeventfd->addr_len == 1 || ioeventfd->addr_len == 2 ||
+ ioeventfd->addr_len == 4 || ioeventfd->addr_len == 8))
+ return -EINVAL;
+
+ /* 4096 vcpus limit enough ? */
+ if (!ioeventfd->vcpus || ioeventfd->vcpus > 4096)
+ return -EINVAL;
+
+ kioeventfd = kzalloc(sizeof(*kioeventfd), GFP_KERNEL);
+ if (!kioeventfd)
+ return -ENOMEM;
+
+ kioeventfd->eventfd = eventfd_ctx_fdget(ioeventfd->event_fd);
+ if (IS_ERR(kioeventfd->eventfd)) {
+ ret = PTR_ERR(kioeventfd->eventfd);
+ goto error_kfree;
+ }
+
+ kioeventfd->addr = ioeventfd->addr;
+ kioeventfd->addr_len = ioeventfd->addr_len;
+ kioeventfd->vq = ioeventfd->vq;
+
+ mutex_lock(&ioreq_lock);
+ kioreq = get_ioreq(ioeventfd, kioeventfd->eventfd);
+ if (IS_ERR(kioreq)) {
+ mutex_unlock(&ioreq_lock);
+ ret = PTR_ERR(kioreq);
+ goto error_eventfd;
+ }
+
+ spin_lock_irqsave(&kioreq->lock, flags);
+ list_add_tail(&kioeventfd->list, &kioreq->ioeventfds);
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+
+ mutex_unlock(&ioreq_lock);
+
+ return 0;
+
+error_eventfd:
+ eventfd_ctx_put(kioeventfd->eventfd);
+
+error_kfree:
+ kfree(kioeventfd);
+ return ret;
+}
+
+static int privcmd_ioeventfd_deassign(struct privcmd_ioeventfd *ioeventfd)
+{
+ struct privcmd_kernel_ioreq *kioreq, *tkioreq;
+ struct eventfd_ctx *eventfd;
+ unsigned long flags;
+ int ret = 0;
+
+ eventfd = eventfd_ctx_fdget(ioeventfd->event_fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ mutex_lock(&ioreq_lock);
+ list_for_each_entry_safe(kioreq, tkioreq, &ioreq_list, list) {
+ struct privcmd_kernel_ioeventfd *kioeventfd, *tmp;
+ /*
+ * kioreq fields can be accessed here without a lock as they are
+ * never updated after being added to the ioreq_list.
+ */
+ if (kioreq->dom != ioeventfd->dom ||
+ kioreq->uioreq != ioeventfd->ioreq ||
+ kioreq->vcpus != ioeventfd->vcpus)
+ continue;
+
+ spin_lock_irqsave(&kioreq->lock, flags);
+ list_for_each_entry_safe(kioeventfd, tmp, &kioreq->ioeventfds, list) {
+ if (eventfd == kioeventfd->eventfd) {
+ ioeventfd_free(kioeventfd);
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+
+ if (list_empty(&kioreq->ioeventfds))
+ ioreq_free(kioreq);
+ goto unlock;
+ }
+ }
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+ break;
+ }
+
+ pr_err("Ioeventfd isn't already assigned, dom: %u, addr: %llu\n",
+ ioeventfd->dom, ioeventfd->addr);
+ ret = -ENODEV;
+
+unlock:
+ mutex_unlock(&ioreq_lock);
+ eventfd_ctx_put(eventfd);
+
+ return ret;
+}
+
+static long privcmd_ioctl_ioeventfd(struct file *file, void __user *udata)
+{
+ struct privcmd_data *data = file->private_data;
+ struct privcmd_ioeventfd ioeventfd;
+
+ if (copy_from_user(&ioeventfd, udata, sizeof(ioeventfd)))
+ return -EFAULT;
+
+ /* No other flags should be set */
+ if (ioeventfd.flags & ~PRIVCMD_IOEVENTFD_FLAG_DEASSIGN)
+ return -EINVAL;
+
+ /* If restriction is in place, check the domid matches */
+ if (data->domid != DOMID_INVALID && data->domid != ioeventfd.dom)
+ return -EPERM;
+
+ if (ioeventfd.flags & PRIVCMD_IOEVENTFD_FLAG_DEASSIGN)
+ return privcmd_ioeventfd_deassign(&ioeventfd);
+
+ return privcmd_ioeventfd_assign(&ioeventfd);
+}
+
+static void privcmd_ioeventfd_exit(void)
+{
+ struct privcmd_kernel_ioreq *kioreq, *tmp;
+ unsigned long flags;
+
+ mutex_lock(&ioreq_lock);
+ list_for_each_entry_safe(kioreq, tmp, &ioreq_list, list) {
+ struct privcmd_kernel_ioeventfd *kioeventfd, *tmp;
+
+ spin_lock_irqsave(&kioreq->lock, flags);
+ list_for_each_entry_safe(kioeventfd, tmp, &kioreq->ioeventfds, list)
+ ioeventfd_free(kioeventfd);
+ spin_unlock_irqrestore(&kioreq->lock, flags);
+
+ ioreq_free(kioreq);
+ }
+ mutex_unlock(&ioreq_lock);
+}
+#else
+static inline long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int privcmd_irqfd_init(void)
+{
+ return 0;
+}
+
+static inline void privcmd_irqfd_exit(void)
+{
+}
+
+static inline long privcmd_ioctl_ioeventfd(struct file *file, void __user *udata)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void privcmd_ioeventfd_exit(void)
+{
+}
+#endif /* CONFIG_XEN_PRIVCMD_EVENTFD */
+
static long privcmd_ioctl(struct file *file,
unsigned int cmd, unsigned long data)
{
@@ -868,6 +1541,18 @@ static long privcmd_ioctl(struct file *file,
ret = privcmd_ioctl_mmap_resource(file, udata);
break;
+ case IOCTL_PRIVCMD_IRQFD:
+ ret = privcmd_ioctl_irqfd(file, udata);
+ break;
+
+ case IOCTL_PRIVCMD_IOEVENTFD:
+ ret = privcmd_ioctl_ioeventfd(file, udata);
+ break;
+
+ case IOCTL_PRIVCMD_PCIDEV_GET_GSI:
+ ret = privcmd_ioctl_pcidev_get_gsi(file, udata);
+ break;
+
default:
break;
}
@@ -904,7 +1589,7 @@ static void privcmd_close(struct vm_area_struct *vma)
int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
int rc;
- if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
+ if (xen_pv_domain() || !numpgs || !pages)
return;
rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
@@ -934,8 +1619,8 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
{
/* DONTCOPY is essential for Xen because copy_page_range doesn't know
* how to recreate these mappings */
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
- VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTCOPY |
+ VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &privcmd_vm_ops;
vma->vm_private_data = NULL;
@@ -949,7 +1634,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
*/
static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
{
- return pte_none(*pte) ? 0 : -EBUSY;
+ return pte_none(ptep_get(pte)) ? 0 : -EBUSY;
}
static int privcmd_vma_range_is_mapped(
@@ -992,15 +1677,28 @@ static int __init privcmd_init(void)
err = misc_register(&xen_privcmdbuf_dev);
if (err != 0) {
pr_err("Could not register Xen hypercall-buf device\n");
- misc_deregister(&privcmd_dev);
- return err;
+ goto err_privcmdbuf;
+ }
+
+ err = privcmd_irqfd_init();
+ if (err != 0) {
+ pr_err("irqfd init failed\n");
+ goto err_irqfd;
}
return 0;
+
+err_irqfd:
+ misc_deregister(&xen_privcmdbuf_dev);
+err_privcmdbuf:
+ misc_deregister(&privcmd_dev);
+ return err;
}
static void __exit privcmd_exit(void)
{
+ privcmd_ioeventfd_exit();
+ privcmd_irqfd_exit();
misc_deregister(&privcmd_dev);
misc_deregister(&xen_privcmdbuf_dev);
}