diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/kvmgt.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 1688 |
1 files changed, 707 insertions, 981 deletions
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1ac98f8aba31..3abc9206f1a8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1,7 +1,7 @@ /* * KVMGT - the implementation of Intel mediated pass-through framework for KVM * - * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,10 +26,14 @@ * Kevin Tian <kevin.tian@intel.com> * Jike Song <jike.song@intel.com> * Xiaoguang Chen <xiaoguang.chen@intel.com> + * Eddie Dong <eddie.dong@intel.com> + * + * Contributors: + * Niu Bing <bing.niu@intel.com> + * Zhi Wang <zhi.a.wang@intel.com> */ #include <linux/init.h> -#include <linux/device.h> #include <linux/mm.h> #include <linux/kthread.h> #include <linux/sched/mm.h> @@ -38,18 +42,20 @@ #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/eventfd.h> -#include <linux/uuid.h> -#include <linux/kvm_host.h> -#include <linux/vfio.h> #include <linux/mdev.h> #include <linux/debugfs.h> #include <linux/nospec.h> +#include <drm/drm_edid.h> +#include <drm/drm_print.h> + #include "i915_drv.h" +#include "intel_gvt.h" #include "gvt.h" -static const struct intel_gvt_ops *intel_gvt_ops; +MODULE_IMPORT_NS("DMA_BUF"); +MODULE_IMPORT_NS("I915_GVT"); /* helper macros copied from vfio-pci */ #define VFIO_PCI_OFFSET_SHIFT 40 @@ -88,16 +94,6 @@ struct kvmgt_pgfn { struct hlist_node hnode; }; -#define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries" -struct kvmgt_guest_info { - struct kvm *kvm; - struct intel_vgpu *vgpu; - struct kvm_page_track_notifier_node track_node; -#define NR_BKT (1 << 18) - struct hlist_head ptable[NR_BKT]; -#undef NR_BKT -}; - struct gvt_dma { struct intel_vgpu *vgpu; struct rb_node gfn_node; @@ -108,211 +104,74 @@ struct gvt_dma { struct kref ref; }; -struct kvmgt_vdev { - struct intel_vgpu *vgpu; - struct mdev_device *mdev; - struct vfio_region *region; - int num_regions; - struct eventfd_ctx *intx_trigger; - struct eventfd_ctx *msi_trigger; - - /* - * Two caches are used to avoid mapping duplicated pages (eg. - * scratch pages). This help to reduce dma setup overhead. - */ - struct rb_root gfn_cache; - struct rb_root dma_addr_cache; - unsigned long nr_cache_entries; - struct mutex cache_lock; - - struct notifier_block iommu_notifier; - struct notifier_block group_notifier; - struct kvm *kvm; - struct work_struct release_work; - atomic_t released; - struct vfio_device *vfio_device; - struct vfio_group *vfio_group; -}; - -static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu) -{ - return intel_vgpu_vdev(vgpu); -} - -static inline bool handle_valid(unsigned long handle) -{ - return !!(handle & ~0xff); -} - -static ssize_t available_instances_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, - char *buf) -{ - struct intel_vgpu_type *type; - unsigned int num = 0; - struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = &gvt->types[mtype_get_type_group_id(mtype)]; - if (!type) - num = 0; - else - num = type->avail_instance; - - return sprintf(buf, "%u\n", num); -} +#define vfio_dev_to_vgpu(vfio_dev) \ + container_of((vfio_dev), struct intel_vgpu, vfio_device) -static ssize_t device_api_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); -} +static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len, + struct kvm_page_track_notifier_node *node); +static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages, + struct kvm_page_track_notifier_node *node); -static ssize_t description_show(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf) +static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf) { - struct intel_vgpu_type *type; - struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt; - - type = &gvt->types[mtype_get_type_group_id(mtype)]; - if (!type) - return 0; + struct intel_vgpu_type *type = + container_of(mtype, struct intel_vgpu_type, type); return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" "fence: %d\nresolution: %s\n" "weight: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence, vgpu_edid_str(type->resolution), - type->weight); -} - -static MDEV_TYPE_ATTR_RO(available_instances); -static MDEV_TYPE_ATTR_RO(device_api); -static MDEV_TYPE_ATTR_RO(description); - -static struct attribute *gvt_type_attrs[] = { - &mdev_type_attr_available_instances.attr, - &mdev_type_attr_device_api.attr, - &mdev_type_attr_description.attr, - NULL, -}; - -static struct attribute_group *gvt_vgpu_type_groups[] = { - [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, -}; - -static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i, j; - struct intel_vgpu_type *type; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - type = &gvt->types[i]; - - group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); - if (!group) - goto unwind; - - group->name = type->name; - group->attrs = gvt_type_attrs; - gvt_vgpu_type_groups[i] = group; - } - - return 0; - -unwind: - for (j = 0; j < i; j++) { - group = gvt_vgpu_type_groups[j]; - kfree(group); - } - - return -ENOMEM; -} - -static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) -{ - int i; - struct attribute_group *group; - - for (i = 0; i < gvt->num_types; i++) { - group = gvt_vgpu_type_groups[i]; - gvt_vgpu_type_groups[i] = NULL; - kfree(group); - } + BYTES_TO_MB(type->conf->low_mm), + BYTES_TO_MB(type->conf->high_mm), + type->conf->fence, vgpu_edid_str(type->conf->edid), + type->conf->weight); } -static int kvmgt_guest_init(struct mdev_device *mdev); -static void intel_vgpu_release_work(struct work_struct *work); -static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); - static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - int total_pages; - int npage; - int ret; - - total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; - - for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - - ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1); - drm_WARN_ON(&i915->drm, ret != 1); - } + vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT, + DIV_ROUND_UP(size, PAGE_SIZE)); } /* Pin a normal or compound guest page for dma. */ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - unsigned long base_pfn = 0; - int total_pages; + int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); + struct page *base_page = NULL; int npage; int ret; - total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; /* - * We pin the pages one-by-one to avoid allocating a big arrary + * We pin the pages one-by-one to avoid allocating a big array * on stack to hold pfns. */ for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - unsigned long pfn; + dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; + struct page *cur_page; - ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, + IOMMU_READ | IOMMU_WRITE, &cur_page); if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", - cur_gfn, ret); - goto err; - } - - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - npage++; - ret = -EFAULT; + gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", + &cur_iova, ret); goto err; } if (npage == 0) - base_pfn = pfn; - else if (base_pfn + npage != pfn) { - gvt_vgpu_err("The pages are not continuous\n"); + base_page = cur_page; + else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) { ret = -EINVAL; npage++; goto err; } } - *page = pfn_to_page(base_pfn); + *page = base_page; return 0; err: - gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); + if (npage) + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); return ret; } @@ -328,7 +187,7 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, return ret; /* Setup DMA mapping. */ - *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); + *dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, *dma_addr)) { gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", page_to_pfn(page), ret); @@ -344,14 +203,14 @@ static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, { struct device *dev = vgpu->gvt->gt->i915->drm.dev; - dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL); gvt_unpin_guest_page(vgpu, gfn, size); } static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, dma_addr_t dma_addr) { - struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node; + struct rb_node *node = vgpu->dma_addr_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -369,7 +228,7 @@ static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) { - struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node; + struct rb_node *node = vgpu->gfn_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -390,7 +249,6 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) @@ -403,7 +261,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ - link = &vdev->gfn_cache.rb_node; + link = &vgpu->gfn_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, gfn_node); @@ -414,11 +272,11 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, link = &parent->rb_right; } rb_link_node(&new->gfn_node, parent, link); - rb_insert_color(&new->gfn_node, &vdev->gfn_cache); + rb_insert_color(&new->gfn_node, &vgpu->gfn_cache); /* dma_addr_cache maps dma addr to struct gvt_dma. */ parent = NULL; - link = &vdev->dma_addr_cache.rb_node; + link = &vgpu->dma_addr_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, dma_addr_node); @@ -429,59 +287,54 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, link = &parent->rb_right; } rb_link_node(&new->dma_addr_node, parent, link); - rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache); + rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache); - vdev->nr_cache_entries++; + vgpu->nr_cache_entries++; return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, struct gvt_dma *entry) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - rb_erase(&entry->gfn_node, &vdev->gfn_cache); - rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache); + rb_erase(&entry->gfn_node, &vgpu->gfn_cache); + rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache); kfree(entry); - vdev->nr_cache_entries--; + vgpu->nr_cache_entries--; } static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); for (;;) { - mutex_lock(&vdev->cache_lock); - node = rb_first(&vdev->gfn_cache); + mutex_lock(&vgpu->cache_lock); + node = rb_first(&vgpu->gfn_cache); if (!node) { - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); break; } dma = rb_entry(node, struct gvt_dma, gfn_node); gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); } } static void gvt_cache_init(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - vdev->gfn_cache = RB_ROOT; - vdev->dma_addr_cache = RB_ROOT; - vdev->nr_cache_entries = 0; - mutex_init(&vdev->cache_lock); + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + vgpu->nr_cache_entries = 0; + mutex_init(&vgpu->cache_lock); } -static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) +static void kvmgt_protect_table_init(struct intel_vgpu *info) { hash_init(info->ptable); } -static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) +static void kvmgt_protect_table_destroy(struct intel_vgpu *info) { struct kvmgt_pgfn *p; struct hlist_node *tmp; @@ -494,10 +347,12 @@ static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) } static struct kvmgt_pgfn * -__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) +__kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p, *res = NULL; + lockdep_assert_held(&info->vgpu_lock); + hash_for_each_possible(info->ptable, p, hnode, gfn) { if (gfn == p->gfn) { res = p; @@ -508,8 +363,7 @@ __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) return res; } -static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, - gfn_t gfn) +static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -517,7 +371,7 @@ static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, return !!p; } -static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) +static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -532,8 +386,7 @@ static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) hash_add(info->ptable, &p->hnode, gfn); } -static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, - gfn_t gfn) +static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -547,18 +400,17 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, size_t count, loff_t *ppos, bool iswrite) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - void *base = vdev->region[i].data; + void *base = vgpu->region[i].data; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - if (pos >= vdev->region[i].size || iswrite) { + if (pos >= vgpu->region[i].size || iswrite) { gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); return -EINVAL; } - count = min(count, (size_t)(vdev->region[i].size - pos)); + count = min(count, (size_t)(vgpu->region[i].size - pos)); memcpy(buf, base + pos, count); return count; @@ -574,6 +426,18 @@ static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { .release = intel_vgpu_reg_release_opregion, }; +static bool edid_valid(const void *edid, size_t size) +{ + const struct drm_edid *drm_edid; + bool is_valid; + + drm_edid = drm_edid_alloc(edid, size); + is_valid = drm_edid_valid(drm_edid); + drm_edid_free(drm_edid); + + return is_valid; +} + static int handle_edid_regs(struct intel_vgpu *vgpu, struct vfio_edid_region *region, char *buf, size_t count, u16 offset, bool is_write) @@ -592,17 +456,13 @@ static int handle_edid_regs(struct intel_vgpu *vgpu, switch (offset) { case offsetof(struct vfio_region_gfx_edid, link_state): if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) { - if (!drm_edid_block_valid( - (u8 *)region->edid_blob, - 0, - true, - NULL)) { + if (!edid_valid(region->edid_blob, EDID_SIZE)) { gvt_vgpu_err("invalid EDID blob\n"); return -EINVAL; } - intel_gvt_ops->emulate_hotplug(vgpu, true); + intel_vgpu_emulate_hotplug(vgpu, true); } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) - intel_gvt_ops->emulate_hotplug(vgpu, false); + intel_vgpu_emulate_hotplug(vgpu, false); else { gvt_vgpu_err("invalid EDID link state %d\n", regs->link_state); @@ -651,8 +511,7 @@ static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, int ret; unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - struct vfio_edid_region *region = - (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data; + struct vfio_edid_region *region = vgpu->region[i].data; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; if (pos < region->vfio_edid_regs.edid_offset) { @@ -684,44 +543,27 @@ static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, const struct intel_vgpu_regops *ops, size_t size, u32 flags, void *data) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct vfio_region *region; - region = krealloc(vdev->region, - (vdev->num_regions + 1) * sizeof(*region), + region = krealloc(vgpu->region, + (vgpu->num_regions + 1) * sizeof(*region), GFP_KERNEL); if (!region) return -ENOMEM; - vdev->region = region; - vdev->region[vdev->num_regions].type = type; - vdev->region[vdev->num_regions].subtype = subtype; - vdev->region[vdev->num_regions].ops = ops; - vdev->region[vdev->num_regions].size = size; - vdev->region[vdev->num_regions].flags = flags; - vdev->region[vdev->num_regions].data = data; - vdev->num_regions++; - return 0; -} - -static int kvmgt_get_vfio_device(void *p_vgpu) -{ - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - vdev->vfio_device = vfio_device_get_from_dev( - mdev_dev(vdev->mdev)); - if (!vdev->vfio_device) { - gvt_vgpu_err("failed to get vfio device\n"); - return -ENODEV; - } + vgpu->region = region; + vgpu->region[vgpu->num_regions].type = type; + vgpu->region[vgpu->num_regions].subtype = subtype; + vgpu->region[vgpu->num_regions].ops = ops; + vgpu->region[vgpu->num_regions].size = size; + vgpu->region[vgpu->num_regions].flags = flags; + vgpu->region[vgpu->num_regions].data = data; + vgpu->num_regions++; return 0; } - -static int kvmgt_set_opregion(void *p_vgpu) +int intel_gvt_set_opregion(struct intel_vgpu *vgpu) { - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; void *base; int ret; @@ -741,15 +583,14 @@ static int kvmgt_set_opregion(void *p_vgpu) ret = intel_vgpu_register_reg(vgpu, PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, - &intel_vgpu_regops_opregion, OPREGION_SIZE, + &intel_vgpu_regops_opregion, INTEL_GVT_OPREGION_SIZE, VFIO_REGION_INFO_FLAG_READ, base); return ret; } -static int kvmgt_set_edid(void *p_vgpu, int port_num) +int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) { - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); struct vfio_edid_region *base; int ret; @@ -777,246 +618,108 @@ static int kvmgt_set_edid(void *p_vgpu, int port_num) return ret; } -static void kvmgt_put_vfio_device(void *vgpu) -{ - struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu); - - if (WARN_ON(!vdev->vfio_device)) - return; - - vfio_device_put(vdev->vfio_device); -} - -static int intel_vgpu_create(struct mdev_device *mdev) +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, + u64 length) { - struct intel_vgpu *vgpu = NULL; - struct intel_vgpu_type *type; - struct device *pdev; - struct intel_gvt *gvt; - int ret; - - pdev = mdev_parent_dev(mdev); - gvt = kdev_to_i915(pdev)->gvt; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct gvt_dma *entry; + u64 iov_pfn = iova >> PAGE_SHIFT; + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; - type = &gvt->types[mdev_get_type_group_id(mdev)]; - if (!type) { - ret = -EINVAL; - goto out; - } + mutex_lock(&vgpu->cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - vgpu = intel_gvt_ops->vgpu_create(gvt, type); - if (IS_ERR_OR_NULL(vgpu)) { - ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); - gvt_err("failed to create intel vgpu: %d\n", ret); - goto out; + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); + __gvt_cache_remove_entry(vgpu, entry); } - - INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work); - - kvmgt_vdev(vgpu)->mdev = mdev; - mdev_set_drvdata(mdev, vgpu); - - gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", - dev_name(mdev_dev(mdev))); - ret = 0; - -out: - return ret; -} - -static int intel_vgpu_remove(struct mdev_device *mdev) -{ - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - - if (handle_valid(vgpu->handle)) - return -EBUSY; - - intel_gvt_ops->vgpu_destroy(vgpu); - return 0; + mutex_unlock(&vgpu->cache_lock); } -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = container_of(nb, - struct kvmgt_vdev, - iommu_notifier); - struct intel_vgpu *vgpu = vdev->vgpu; - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - struct gvt_dma *entry; - unsigned long iov_pfn, end_iov_pfn; - - iov_pfn = unmap->iova >> PAGE_SHIFT; - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; + struct intel_vgpu *itr; + int id; + bool ret = false; - mutex_lock(&vdev->cache_lock); - for (; iov_pfn < end_iov_pfn; iov_pfn++) { - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); - if (!entry) - continue; + mutex_lock(&vgpu->gvt->lock); + for_each_active_vgpu(vgpu->gvt, itr, id) { + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status)) + continue; - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, - entry->size); - __gvt_cache_remove_entry(vgpu, entry); + if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { + ret = true; + goto out; } - mutex_unlock(&vdev->cache_lock); - } - - return NOTIFY_OK; -} - -static int intel_vgpu_group_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct kvmgt_vdev *vdev = container_of(nb, - struct kvmgt_vdev, - group_notifier); - - /* the only action we care about */ - if (action == VFIO_GROUP_NOTIFY_SET_KVM) { - vdev->kvm = data; - - if (!data) - schedule_work(&vdev->release_work); } - - return NOTIFY_OK; +out: + mutex_unlock(&vgpu->gvt->lock); + return ret; } -static int intel_vgpu_open(struct mdev_device *mdev) +static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - unsigned long events; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); int ret; - struct vfio_group *vfio_group; - - vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - vdev->group_notifier.notifier_call = intel_vgpu_group_notifier; - - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, - &vdev->iommu_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", - ret); - goto out; - } - - events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, - &vdev->group_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", - ret); - goto undo_iommu; - } - vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev)); - if (IS_ERR_OR_NULL(vfio_group)) { - ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); - gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); - goto undo_register; - } - vdev->vfio_group = vfio_group; + if (__kvmgt_vgpu_exist(vgpu)) + return -EEXIST; - /* Take a module reference as mdev core doesn't take - * a reference for vendor driver. - */ - if (!try_module_get(THIS_MODULE)) { - ret = -ENODEV; - goto undo_group; + vgpu->track_node.track_write = kvmgt_page_track_write; + vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region; + ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm, + &vgpu->track_node); + if (ret) { + gvt_vgpu_err("KVM is required to use Intel vGPU\n"); + return ret; } - ret = kvmgt_guest_init(mdev); - if (ret) - goto undo_group; - - intel_gvt_ops->vgpu_activate(vgpu); + set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); - atomic_set(&vdev->released, 0); - return ret; - -undo_group: - vfio_group_put_external_user(vdev->vfio_group); - vdev->vfio_group = NULL; + debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, + &vgpu->nr_cache_entries); -undo_register: - vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, - &vdev->group_notifier); + intel_gvt_activate_vgpu(vgpu); -undo_iommu: - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, - &vdev->iommu_notifier); -out: - return ret; + return 0; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct eventfd_ctx *trigger; - trigger = vdev->msi_trigger; + trigger = vgpu->msi_trigger; if (trigger) { eventfd_ctx_put(trigger); - vdev->msi_trigger = NULL; + vgpu->msi_trigger = NULL; } } -static void __intel_vgpu_release(struct intel_vgpu *vgpu) +static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - struct kvmgt_guest_info *info; - int ret; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (!handle_valid(vgpu->handle)) - return; + intel_gvt_release_vgpu(vgpu); - if (atomic_cmpxchg(&vdev->released, 0, 1)) - return; + clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status); - intel_gvt_ops->vgpu_release(vgpu); + debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs); - ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY, - &vdev->iommu_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for iommu failed: %d\n", ret); + kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, + &vgpu->track_node); - ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY, - &vdev->group_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for group failed: %d\n", ret); + kvmgt_protect_table_destroy(vgpu); + gvt_cache_destroy(vgpu); - /* dereference module reference taken at open */ - module_put(THIS_MODULE); + WARN_ON(vgpu->nr_cache_entries); - info = (struct kvmgt_guest_info *)vgpu->handle; - kvmgt_guest_exit(info); + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; intel_vgpu_release_msi_eventfd_ctx(vgpu); - vfio_group_put_external_user(vdev->vfio_group); - - vdev->kvm = NULL; - vgpu->handle = 0; -} - -static void intel_vgpu_release(struct mdev_device *mdev) -{ - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - - __intel_vgpu_release(vgpu); -} - -static void intel_vgpu_release_work(struct work_struct *work) -{ - struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev, - release_work); - - __intel_vgpu_release(vdev->vgpu); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1053,10 +756,10 @@ static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, int ret; if (is_write) - ret = intel_gvt_ops->emulate_mmio_write(vgpu, + ret = intel_vgpu_emulate_mmio_write(vgpu, bar_start + off, buf, count); else - ret = intel_gvt_ops->emulate_mmio_read(vgpu, + ret = intel_vgpu_emulate_mmio_read(vgpu, bar_start + off, buf, count); return ret; } @@ -1094,17 +797,15 @@ static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, return 0; } -static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, +static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf, size_t count, loff_t *ppos, bool is_write) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; int ret = -EINVAL; - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) { + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) { gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -1112,10 +813,10 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, switch (index) { case VFIO_PCI_CONFIG_REGION_INDEX: if (is_write) - ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos, + ret = intel_vgpu_emulate_cfg_write(vgpu, pos, buf, count); else - ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos, + ret = intel_vgpu_emulate_cfg_read(vgpu, pos, buf, count); break; case VFIO_PCI_BAR0_REGION_INDEX: @@ -1133,20 +834,19 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, case VFIO_PCI_ROM_REGION_INDEX: break; default: - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) return -EINVAL; index -= VFIO_PCI_NUM_REGIONS; - return vdev->region[index].ops->rw(vgpu, buf, count, + return vgpu->region[index].ops->rw(vgpu, buf, count, ppos, is_write); } return ret == 0 ? count : ret; } -static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) +static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct intel_gvt *gvt = vgpu->gvt; int offset; @@ -1163,9 +863,10 @@ static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) true : false; } -static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, +static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf, size_t count, loff_t *ppos) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int done = 0; int ret; @@ -1174,10 +875,10 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, /* Only support GGTT entry 8 bytes read */ if (count >= 8 && !(*ppos % 8) && - gtt_entry(mdev, ppos)) { + gtt_entry(vgpu, ppos)) { u64 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1189,7 +890,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1201,7 +902,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1213,7 +914,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else { u8 val; - ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos, + ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1236,10 +937,11 @@ read_err: return -EFAULT; } -static ssize_t intel_vgpu_write(struct mdev_device *mdev, +static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev, const char __user *buf, size_t count, loff_t *ppos) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int done = 0; int ret; @@ -1248,13 +950,13 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, /* Only support GGTT entry 8 bytes write */ if (count >= 8 && !(*ppos % 8) && - gtt_entry(mdev, ppos)) { + gtt_entry(vgpu, ppos)) { u64 val; if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1266,7 +968,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1278,7 +980,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1290,7 +992,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, &val, sizeof(val), + ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1309,13 +1011,14 @@ write_err: return -EFAULT; } -static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +static int intel_vgpu_mmap(struct vfio_device *vfio_dev, + struct vm_area_struct *vma) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int index; u64 virtaddr; unsigned long req_size, pgoff, req_start; pgprot_t pg_prot; - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); if (index >= VFIO_PCI_ROM_REGION_INDEX) @@ -1390,7 +1093,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, gvt_vgpu_err("eventfd_ctx_fdget failed\n"); return PTR_ERR(trigger); } - kvmgt_vdev(vgpu)->msi_trigger = trigger; + vgpu->msi_trigger = trigger; } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) intel_vgpu_release_msi_eventfd_ctx(vgpu); @@ -1438,11 +1141,126 @@ static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, return func(vgpu, index, start, count, flags, data); } -static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, +static int intel_vgpu_ioctl_get_region_info(struct vfio_device *vfio_dev, + struct vfio_region_info *info, + struct vfio_info_cap *caps) +{ + struct vfio_region_info_cap_sparse_mmap *sparse = NULL; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + int nr_areas = 1; + int cap_type_id; + unsigned int i; + int ret; + + switch (info->index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = vgpu->gvt->device_info.cfg_space_size; + info->flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + break; + case VFIO_PCI_BAR0_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = vgpu->cfg_space.bar[info->index].size; + if (!info->size) { + info->flags = 0; + break; + } + + info->flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + break; + case VFIO_PCI_BAR1_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = 0; + info->flags = 0; + break; + case VFIO_PCI_BAR2_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->flags = VFIO_REGION_INFO_FLAG_CAPS | + VFIO_REGION_INFO_FLAG_MMAP | + VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE; + info->size = gvt_aperture_sz(vgpu->gvt); + + sparse = kzalloc(struct_size(sparse, areas, nr_areas), + GFP_KERNEL); + if (!sparse) + return -ENOMEM; + + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; + sparse->nr_areas = nr_areas; + cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->areas[0].offset = + PAGE_ALIGN(vgpu_aperture_offset(vgpu)); + sparse->areas[0].size = vgpu_aperture_sz(vgpu); + break; + + case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = 0; + info->flags = 0; + + gvt_dbg_core("get region info bar:%d\n", info->index); + break; + + case VFIO_PCI_ROM_REGION_INDEX: + case VFIO_PCI_VGA_REGION_INDEX: + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = 0; + info->flags = 0; + + gvt_dbg_core("get region info index:%d\n", info->index); + break; + default: { + struct vfio_region_info_cap_type cap_type = { + .header.id = VFIO_REGION_INFO_CAP_TYPE, + .header.version = 1 + }; + + if (info->index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) + return -EINVAL; + info->index = array_index_nospec( + info->index, VFIO_PCI_NUM_REGIONS + vgpu->num_regions); + + i = info->index - VFIO_PCI_NUM_REGIONS; + + info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index); + info->size = vgpu->region[i].size; + info->flags = vgpu->region[i].flags; + + cap_type.type = vgpu->region[i].type; + cap_type.subtype = vgpu->region[i].subtype; + + ret = vfio_info_add_capability(caps, &cap_type.header, + sizeof(cap_type)); + if (ret) + return ret; + } + } + + if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { + ret = -EINVAL; + if (cap_type_id == VFIO_REGION_INFO_CAP_SPARSE_MMAP) { + ret = vfio_info_add_capability( + caps, &sparse->header, + struct_size(sparse, areas, sparse->nr_areas)); + } + if (ret) { + kfree(sparse); + return ret; + } + } + + kfree(sparse); + return 0; +} + +static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, unsigned long arg) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned long minsz; gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); @@ -1461,163 +1279,12 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, info.flags = VFIO_DEVICE_FLAGS_PCI; info.flags |= VFIO_DEVICE_FLAGS_RESET; info.num_regions = VFIO_PCI_NUM_REGIONS + - vdev->num_regions; + vgpu->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; - } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { - struct vfio_region_info info; - struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - unsigned int i; - int ret; - struct vfio_region_info_cap_sparse_mmap *sparse = NULL; - int nr_areas = 1; - int cap_type_id; - - minsz = offsetofend(struct vfio_region_info, offset); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - if (info.argsz < minsz) - return -EINVAL; - - switch (info.index) { - case VFIO_PCI_CONFIG_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = vgpu->gvt->device_info.cfg_space_size; - info.flags = VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE; - break; - case VFIO_PCI_BAR0_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = vgpu->cfg_space.bar[info.index].size; - if (!info.size) { - info.flags = 0; - break; - } - - info.flags = VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE; - break; - case VFIO_PCI_BAR1_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = 0; - info.flags = 0; - break; - case VFIO_PCI_BAR2_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.flags = VFIO_REGION_INFO_FLAG_CAPS | - VFIO_REGION_INFO_FLAG_MMAP | - VFIO_REGION_INFO_FLAG_READ | - VFIO_REGION_INFO_FLAG_WRITE; - info.size = gvt_aperture_sz(vgpu->gvt); - - sparse = kzalloc(struct_size(sparse, areas, nr_areas), - GFP_KERNEL); - if (!sparse) - return -ENOMEM; - - sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; - sparse->header.version = 1; - sparse->nr_areas = nr_areas; - cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; - sparse->areas[0].offset = - PAGE_ALIGN(vgpu_aperture_offset(vgpu)); - sparse->areas[0].size = vgpu_aperture_sz(vgpu); - break; - - case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = 0; - info.flags = 0; - - gvt_dbg_core("get region info bar:%d\n", info.index); - break; - - case VFIO_PCI_ROM_REGION_INDEX: - case VFIO_PCI_VGA_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = 0; - info.flags = 0; - - gvt_dbg_core("get region info index:%d\n", info.index); - break; - default: - { - struct vfio_region_info_cap_type cap_type = { - .header.id = VFIO_REGION_INFO_CAP_TYPE, - .header.version = 1 }; - - if (info.index >= VFIO_PCI_NUM_REGIONS + - vdev->num_regions) - return -EINVAL; - info.index = - array_index_nospec(info.index, - VFIO_PCI_NUM_REGIONS + - vdev->num_regions); - - i = info.index - VFIO_PCI_NUM_REGIONS; - - info.offset = - VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = vdev->region[i].size; - info.flags = vdev->region[i].flags; - - cap_type.type = vdev->region[i].type; - cap_type.subtype = vdev->region[i].subtype; - - ret = vfio_info_add_capability(&caps, - &cap_type.header, - sizeof(cap_type)); - if (ret) - return ret; - } - } - - if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) { - switch (cap_type_id) { - case VFIO_REGION_INFO_CAP_SPARSE_MMAP: - ret = vfio_info_add_capability(&caps, - &sparse->header, - struct_size(sparse, areas, - sparse->nr_areas)); - if (ret) { - kfree(sparse); - return ret; - } - break; - default: - kfree(sparse); - return -EINVAL; - } - } - - if (caps.size) { - info.flags |= VFIO_REGION_INFO_FLAG_CAPS; - if (info.argsz < sizeof(info) + caps.size) { - info.argsz = sizeof(info) + caps.size; - info.cap_offset = 0; - } else { - vfio_info_cap_shift(&caps, sizeof(info)); - if (copy_to_user((void __user *)arg + - sizeof(info), caps.buf, - caps.size)) { - kfree(caps.buf); - kfree(sparse); - return -EFAULT; - } - info.cap_offset = sizeof(info); - } - - kfree(caps.buf); - } - - kfree(sparse); - return copy_to_user((void __user *)arg, &info, minsz) ? - -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -1660,21 +1327,27 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (copy_from_user(&hdr, (void __user *)arg, minsz)) return -EFAULT; + if (!is_power_of_2(hdr.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) || + !is_power_of_2(hdr.flags & VFIO_IRQ_SET_ACTION_TYPE_MASK)) + return -EINVAL; + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { int max = intel_vgpu_get_irq_count(vgpu, hdr.index); + if (!hdr.count) + return -EINVAL; + ret = vfio_set_irqs_validate_and_prepare(&hdr, max, VFIO_PCI_NUM_IRQS, &data_size); if (ret) { - gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); - return -EINVAL; - } - if (data_size) { - data = memdup_user((void __user *)(arg + minsz), - data_size); - if (IS_ERR(data)) - return PTR_ERR(data); + gvt_vgpu_err("vfio_set_irqs_validate_and_prepare failed\n"); + return ret; } + + data = memdup_user((void __user *)(arg + minsz), + data_size); + if (IS_ERR(data)) + return PTR_ERR(data); } ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index, @@ -1683,10 +1356,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return ret; } else if (cmd == VFIO_DEVICE_RESET) { - intel_gvt_ops->vgpu_reset(vgpu); + intel_gvt_reset_vgpu(vgpu); return 0; } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { - struct vfio_device_gfx_plane_info dmabuf; + struct vfio_device_gfx_plane_info dmabuf = {}; int ret = 0; minsz = offsetofend(struct vfio_device_gfx_plane_info, @@ -1696,7 +1369,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (dmabuf.argsz < minsz) return -EINVAL; - ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf); + ret = intel_vgpu_query_plane(vgpu, &dmabuf); if (ret != 0) return ret; @@ -1704,14 +1377,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { __u32 dmabuf_id; - __s32 dmabuf_fd; if (get_user(dmabuf_id, (__u32 __user *)arg)) return -EFAULT; - - dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id); - return dmabuf_fd; - + return intel_vgpu_get_dmabuf(vgpu, dmabuf_id); } return -ENOTTY; @@ -1721,14 +1390,9 @@ static ssize_t vgpu_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = mdev_from_dev(dev); + struct intel_vgpu *vgpu = dev_get_drvdata(dev); - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%d\n", vgpu->id); - } - return sprintf(buf, "\n"); + return sprintf(buf, "%d\n", vgpu->id); } static DEVICE_ATTR_RO(vgpu_id); @@ -1748,321 +1412,211 @@ static const struct attribute_group *intel_vgpu_groups[] = { NULL, }; -static struct mdev_parent_ops intel_vgpu_ops = { - .mdev_attr_groups = intel_vgpu_groups, - .create = intel_vgpu_create, - .remove = intel_vgpu_remove, - - .open = intel_vgpu_open, - .release = intel_vgpu_release, - - .read = intel_vgpu_read, - .write = intel_vgpu_write, - .mmap = intel_vgpu_mmap, - .ioctl = intel_vgpu_ioctl, -}; - -static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) +static int intel_vgpu_init_dev(struct vfio_device *vfio_dev) { + struct mdev_device *mdev = to_mdev_device(vfio_dev->dev); + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct intel_vgpu_type *type = + container_of(mdev->type, struct intel_vgpu_type, type); int ret; - ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt); + vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt; + ret = intel_gvt_create_vgpu(vgpu, type->conf); if (ret) return ret; - intel_gvt_ops = ops; - intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups; - - ret = mdev_register_device(dev, &intel_vgpu_ops); - if (ret) - intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); - - return ret; -} + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); -static void kvmgt_host_exit(struct device *dev, void *gvt) -{ - mdev_unregister_device(dev); - intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); + return 0; } -static int kvmgt_page_track_add(unsigned long handle, u64 gfn) +static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) { - struct kvmgt_guest_info *info; - struct kvm *kvm; - struct kvm_memory_slot *slot; - int idx; - - if (!handle_valid(handle)) - return -ESRCH; - - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - - idx = srcu_read_lock(&kvm->srcu); - slot = gfn_to_memslot(kvm, gfn); - if (!slot) { - srcu_read_unlock(&kvm->srcu, idx); - return -EINVAL; - } - - write_lock(&kvm->mmu_lock); + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - if (kvmgt_gfn_is_write_protected(info, gfn)) - goto out; - - kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); - kvmgt_protect_table_add(info, gfn); - -out: - write_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); - return 0; + intel_gvt_destroy_vgpu(vgpu); } -static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) -{ - struct kvmgt_guest_info *info; - struct kvm *kvm; - struct kvm_memory_slot *slot; - int idx; - - if (!handle_valid(handle)) - return 0; +static const struct vfio_device_ops intel_vgpu_dev_ops = { + .init = intel_vgpu_init_dev, + .release = intel_vgpu_release_dev, + .open_device = intel_vgpu_open_device, + .close_device = intel_vgpu_close_device, + .read = intel_vgpu_read, + .write = intel_vgpu_write, + .mmap = intel_vgpu_mmap, + .ioctl = intel_vgpu_ioctl, + .get_region_info_caps = intel_vgpu_ioctl_get_region_info, + .dma_unmap = intel_vgpu_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, + .detach_ioas = vfio_iommufd_emulated_detach_ioas, +}; - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; +static int intel_vgpu_probe(struct mdev_device *mdev) +{ + struct intel_vgpu *vgpu; + int ret; - idx = srcu_read_lock(&kvm->srcu); - slot = gfn_to_memslot(kvm, gfn); - if (!slot) { - srcu_read_unlock(&kvm->srcu, idx); - return -EINVAL; + vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev, + &intel_vgpu_dev_ops); + if (IS_ERR(vgpu)) { + gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu)); + return PTR_ERR(vgpu); } - write_lock(&kvm->mmu_lock); - - if (!kvmgt_gfn_is_write_protected(info, gfn)) - goto out; - - kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); - kvmgt_protect_table_del(info, gfn); + dev_set_drvdata(&mdev->dev, vgpu); + ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device); + if (ret) + goto out_put_vdev; -out: - write_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); + gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", + dev_name(mdev_dev(mdev))); return 0; -} - -static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *val, int len, - struct kvm_page_track_notifier_node *node) -{ - struct kvmgt_guest_info *info = container_of(node, - struct kvmgt_guest_info, track_node); - if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) - intel_gvt_ops->write_protect_handler(info->vgpu, gpa, - (void *)val, len); +out_put_vdev: + vfio_put_device(&vgpu->vfio_device); + return ret; } -static void kvmgt_page_track_flush_slot(struct kvm *kvm, - struct kvm_memory_slot *slot, - struct kvm_page_track_notifier_node *node) +static void intel_vgpu_remove(struct mdev_device *mdev) { - int i; - gfn_t gfn; - struct kvmgt_guest_info *info = container_of(node, - struct kvmgt_guest_info, track_node); - - write_lock(&kvm->mmu_lock); - for (i = 0; i < slot->npages; i++) { - gfn = slot->base_gfn + i; - if (kvmgt_gfn_is_write_protected(info, gfn)) { - kvm_slot_page_track_remove_page(kvm, slot, gfn, - KVM_PAGE_TRACK_WRITE); - kvmgt_protect_table_del(info, gfn); - } - } - write_unlock(&kvm->mmu_lock); + struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); + + vfio_unregister_group_dev(&vgpu->vfio_device); + vfio_put_device(&vgpu->vfio_device); } -static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm) +static unsigned int intel_vgpu_get_available(struct mdev_type *mtype) { - struct intel_vgpu *itr; - struct kvmgt_guest_info *info; - int id; - bool ret = false; + struct intel_vgpu_type *type = + container_of(mtype, struct intel_vgpu_type, type); + struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt; + unsigned int low_gm_avail, high_gm_avail, fence_avail; - mutex_lock(&vgpu->gvt->lock); - for_each_active_vgpu(vgpu->gvt, itr, id) { - if (!handle_valid(itr->handle)) - continue; + mutex_lock(&gvt->lock); + low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE - + gvt->gm.vgpu_allocated_low_gm_size; + high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE - + gvt->gm.vgpu_allocated_high_gm_size; + fence_avail = gvt_fence_sz(gvt) - HOST_FENCE - + gvt->fence.vgpu_allocated_fence_num; + mutex_unlock(&gvt->lock); - info = (struct kvmgt_guest_info *)itr->handle; - if (kvm && kvm == info->kvm) { - ret = true; - goto out; - } - } -out: - mutex_unlock(&vgpu->gvt->lock); - return ret; + return min3(low_gm_avail / type->conf->low_mm, + high_gm_avail / type->conf->high_mm, + fence_avail / type->conf->fence); } -static int kvmgt_guest_init(struct mdev_device *mdev) -{ - struct kvmgt_guest_info *info; - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; - struct kvm *kvm; +static struct mdev_driver intel_vgpu_mdev_driver = { + .device_api = VFIO_DEVICE_API_PCI_STRING, + .driver = { + .name = "intel_vgpu_mdev", + .owner = THIS_MODULE, + .dev_groups = intel_vgpu_groups, + }, + .probe = intel_vgpu_probe, + .remove = intel_vgpu_remove, + .get_available = intel_vgpu_get_available, + .show_description = intel_vgpu_show_description, +}; - vgpu = mdev_get_drvdata(mdev); - if (handle_valid(vgpu->handle)) - return -EEXIST; +int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) +{ + int r; - vdev = kvmgt_vdev(vgpu); - kvm = vdev->kvm; - if (!kvm || kvm->mm != current->mm) { - gvt_vgpu_err("KVM is required to use Intel vGPU\n"); + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) return -ESRCH; - } - - if (__kvmgt_vgpu_exist(vgpu, kvm)) - return -EEXIST; - - info = vzalloc(sizeof(struct kvmgt_guest_info)); - if (!info) - return -ENOMEM; - - vgpu->handle = (unsigned long)info; - info->vgpu = vgpu; - info->kvm = kvm; - kvm_get_kvm(info->kvm); - kvmgt_protect_table_init(info); - gvt_cache_init(vgpu); + if (kvmgt_gfn_is_write_protected(info, gfn)) + return 0; - info->track_node.track_write = kvmgt_page_track_write; - info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; - kvm_page_track_register_notifier(kvm, &info->track_node); + r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn); + if (r) + return r; - debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, - &vdev->nr_cache_entries); + kvmgt_protect_table_add(info, gfn); return 0; } -static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) +int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) { - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, - info->vgpu->debugfs)); + int r; - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); - kvm_put_kvm(info->kvm); - kvmgt_protect_table_destroy(info); - gvt_cache_destroy(info->vgpu); - vfree(info); - - return true; -} - -static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle) -{ - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - - vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL); + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status)) + return -ESRCH; - if (!vgpu->vdev) - return -ENOMEM; + if (!kvmgt_gfn_is_write_protected(info, gfn)) + return 0; - kvmgt_vdev(vgpu)->vgpu = vgpu; + r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn); + if (r) + return r; + kvmgt_protect_table_del(info, gfn); return 0; } -static void kvmgt_detach_vgpu(void *p_vgpu) +static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len, + struct kvm_page_track_notifier_node *node) { - int i; - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + struct intel_vgpu *info = + container_of(node, struct intel_vgpu, track_node); - if (!vdev->region) - return; + mutex_lock(&info->vgpu_lock); - for (i = 0; i < vdev->num_regions; i++) - if (vdev->region[i].ops->release) - vdev->region[i].ops->release(vgpu, - &vdev->region[i]); - vdev->num_regions = 0; - kfree(vdev->region); - vdev->region = NULL; + if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT)) + intel_vgpu_page_track_handler(info, gpa, + (void *)val, len); - kfree(vdev); + mutex_unlock(&info->vgpu_lock); } -static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) +static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages, + struct kvm_page_track_notifier_node *node) { - struct kvmgt_guest_info *info; - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; + unsigned long i; + struct intel_vgpu *info = + container_of(node, struct intel_vgpu, track_node); - if (!handle_valid(handle)) - return -ESRCH; - - info = (struct kvmgt_guest_info *)handle; - vgpu = info->vgpu; - vdev = kvmgt_vdev(vgpu); + mutex_lock(&info->vgpu_lock); - /* - * When guest is poweroff, msi_trigger is set to NULL, but vgpu's - * config and mmio register isn't restored to default during guest - * poweroff. If this vgpu is still used in next vm, this vgpu's pipe - * may be enabled, then once this vgpu is active, it will get inject - * vblank interrupt request. But msi_trigger is null until msi is - * enabled by guest. so if msi_trigger is null, success is still - * returned and don't inject interrupt into guest. - */ - if (vdev->msi_trigger == NULL) - return 0; - - if (eventfd_signal(vdev->msi_trigger, 1) == 1) - return 0; + for (i = 0; i < nr_pages; i++) { + if (kvmgt_gfn_is_write_protected(info, gfn + i)) + kvmgt_protect_table_del(info, gfn + i); + } - return -EFAULT; + mutex_unlock(&info->vgpu_lock); } -static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) +void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) { - struct kvmgt_guest_info *info; - kvm_pfn_t pfn; - - if (!handle_valid(handle)) - return INTEL_GVT_INVALID_ADDR; - - info = (struct kvmgt_guest_info *)handle; + int i; - pfn = gfn_to_pfn(info->kvm, gfn); - if (is_error_noslot_pfn(pfn)) - return INTEL_GVT_INVALID_ADDR; + if (!vgpu->region) + return; - return pfn; + for (i = 0; i < vgpu->num_regions; i++) + if (vgpu->region[i].ops->release) + vgpu->region[i].ops->release(vgpu, + &vgpu->region[i]); + vgpu->num_regions = 0; + kfree(vgpu->region); + vgpu->region = NULL; } -static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, +int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; int ret; - if (!handle_valid(handle)) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return -EINVAL; - vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; - vdev = kvmgt_vdev(vgpu); - - mutex_lock(&vdev->cache_lock); + mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_gfn(vgpu, gfn); if (!entry) { @@ -2090,36 +1644,31 @@ static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, *dma_addr = entry->dma_addr; } - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return 0; err_unmap: gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return ret; } -static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) { - struct kvmgt_guest_info *info; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; int ret = 0; - if (!handle_valid(handle)) - return -ENODEV; - - info = (struct kvmgt_guest_info *)handle; - vdev = kvmgt_vdev(info->vgpu); + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) + return -EINVAL; - mutex_lock(&vdev->cache_lock); - entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + mutex_lock(&vgpu->cache_lock); + entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); if (entry) kref_get(&entry->ref); else ret = -ENOMEM; - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return ret; } @@ -2133,113 +1682,290 @@ static void __gvt_dma_release(struct kref *ref) __gvt_cache_remove_entry(entry->vgpu, entry); } -static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) +void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) { - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; - if (!handle_valid(handle)) + if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status)) return; - vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; - vdev = kvmgt_vdev(vgpu); - - mutex_lock(&vdev->cache_lock); + mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); if (entry) kref_put(&entry->ref, __gvt_dma_release); - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); } -static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len, bool write) +static void init_device_info(struct intel_gvt *gvt) { - struct kvmgt_guest_info *info; + struct intel_gvt_device_info *info = &gvt->device_info; + struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev); - if (!handle_valid(handle)) - return -ESRCH; + info->max_support_vgpus = 8; + info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; + info->mmio_size = 2 * 1024 * 1024; + info->mmio_bar = 0; + info->gtt_start_offset = 8 * 1024 * 1024; + info->gtt_entry_size = 8; + info->gtt_entry_size_shift = 3; + info->gmadr_bytes_in_cmd = 8; + info->max_surface_size = 36 * 1024 * 1024; + info->msi_cap_offset = pdev->msi_cap; +} - info = (struct kvmgt_guest_info *)handle; +static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) +{ + struct intel_vgpu *vgpu; + int id; - return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group, - gpa, buf, len, write); + mutex_lock(&gvt->lock); + idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { + if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, + (void *)&gvt->service_request)) { + if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status)) + intel_vgpu_emulate_vblank(vgpu); + } + } + mutex_unlock(&gvt->lock); } -static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len) +static int gvt_service_thread(void *data) { - return kvmgt_rw_gpa(handle, gpa, buf, len, false); + struct intel_gvt *gvt = (struct intel_gvt *)data; + int ret; + + gvt_dbg_core("service thread start\n"); + + while (!kthread_should_stop()) { + ret = wait_event_interruptible(gvt->service_thread_wq, + kthread_should_stop() || gvt->service_request); + + if (kthread_should_stop()) + break; + + if (WARN_ONCE(ret, "service thread is waken up by signal.\n")) + continue; + + intel_gvt_test_and_emulate_vblank(gvt); + + if (test_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request) || + test_bit(INTEL_GVT_REQUEST_EVENT_SCHED, + (void *)&gvt->service_request)) { + intel_gvt_schedule(gvt); + } + } + + return 0; } -static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len) +static void clean_service_thread(struct intel_gvt *gvt) { - return kvmgt_rw_gpa(handle, gpa, buf, len, true); + kthread_stop(gvt->service_thread); } -static unsigned long kvmgt_virt_to_pfn(void *addr) +static int init_service_thread(struct intel_gvt *gvt) { - return PFN_DOWN(__pa(addr)); + init_waitqueue_head(&gvt->service_thread_wq); + + gvt->service_thread = kthread_run(gvt_service_thread, + gvt, "gvt_service_thread"); + if (IS_ERR(gvt->service_thread)) { + gvt_err("fail to start service thread.\n"); + return PTR_ERR(gvt->service_thread); + } + return 0; } -static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) +/** + * intel_gvt_clean_device - clean a GVT device + * @i915: i915 private + * + * This function is called at the driver unloading stage, to free the + * resources owned by a GVT device. + * + */ +static void intel_gvt_clean_device(struct drm_i915_private *i915) { - struct kvmgt_guest_info *info; - struct kvm *kvm; - int idx; - bool ret; + struct intel_gvt *gvt = fetch_and_zero(&i915->gvt); - if (!handle_valid(handle)) - return false; + if (drm_WARN_ON(&i915->drm, !gvt)) + return; - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; + mdev_unregister_parent(&gvt->parent); + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_clean_vgpu_types(gvt); - idx = srcu_read_lock(&kvm->srcu); - ret = kvm_is_visible_gfn(kvm, gfn); - srcu_read_unlock(&kvm->srcu, idx); + intel_gvt_debugfs_clean(gvt); + clean_service_thread(gvt); + intel_gvt_clean_cmd_parser(gvt); + intel_gvt_clean_sched_policy(gvt); + intel_gvt_clean_workload_scheduler(gvt); + intel_gvt_clean_gtt(gvt); + intel_gvt_free_firmware(gvt); + intel_gvt_clean_mmio_info(gvt); + idr_destroy(&gvt->vgpu_idr); + kfree(i915->gvt); +} + +/** + * intel_gvt_init_device - initialize a GVT device + * @i915: drm i915 private data + * + * This function is called at the initialization stage, to initialize + * necessary GVT components. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +static int intel_gvt_init_device(struct drm_i915_private *i915) +{ + struct intel_gvt *gvt; + struct intel_vgpu *vgpu; + int ret; + + if (drm_WARN_ON(&i915->drm, i915->gvt)) + return -EEXIST; + + gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL); + if (!gvt) + return -ENOMEM; + + gvt_dbg_core("init gvt device\n"); + + idr_init_base(&gvt->vgpu_idr, 1); + spin_lock_init(&gvt->scheduler.mmio_context_lock); + mutex_init(&gvt->lock); + mutex_init(&gvt->sched_lock); + gvt->gt = to_gt(i915); + i915->gvt = gvt; + + init_device_info(gvt); + + ret = intel_gvt_setup_mmio_info(gvt); + if (ret) + goto out_clean_idr; + + intel_gvt_init_engine_mmio_context(gvt); + + ret = intel_gvt_load_firmware(gvt); + if (ret) + goto out_clean_mmio_info; + + ret = intel_gvt_init_irq(gvt); + if (ret) + goto out_free_firmware; + + ret = intel_gvt_init_gtt(gvt); + if (ret) + goto out_free_firmware; + + ret = intel_gvt_init_workload_scheduler(gvt); + if (ret) + goto out_clean_gtt; + + ret = intel_gvt_init_sched_policy(gvt); + if (ret) + goto out_clean_workload_scheduler; + + ret = intel_gvt_init_cmd_parser(gvt); + if (ret) + goto out_clean_sched_policy; + + ret = init_service_thread(gvt); + if (ret) + goto out_clean_cmd_parser; + + ret = intel_gvt_init_vgpu_types(gvt); + if (ret) + goto out_clean_thread; + + vgpu = intel_gvt_create_idle_vgpu(gvt); + if (IS_ERR(vgpu)) { + ret = PTR_ERR(vgpu); + gvt_err("failed to create idle vgpu\n"); + goto out_clean_types; + } + gvt->idle_vgpu = vgpu; + + intel_gvt_debugfs_init(gvt); + + ret = mdev_register_parent(&gvt->parent, i915->drm.dev, + &intel_vgpu_mdev_driver, + gvt->mdev_types, gvt->num_types); + if (ret) + goto out_destroy_idle_vgpu; + + gvt_dbg_core("gvt device initialization is done\n"); + return 0; + +out_destroy_idle_vgpu: + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_debugfs_clean(gvt); +out_clean_types: + intel_gvt_clean_vgpu_types(gvt); +out_clean_thread: + clean_service_thread(gvt); +out_clean_cmd_parser: + intel_gvt_clean_cmd_parser(gvt); +out_clean_sched_policy: + intel_gvt_clean_sched_policy(gvt); +out_clean_workload_scheduler: + intel_gvt_clean_workload_scheduler(gvt); +out_clean_gtt: + intel_gvt_clean_gtt(gvt); +out_free_firmware: + intel_gvt_free_firmware(gvt); +out_clean_mmio_info: + intel_gvt_clean_mmio_info(gvt); +out_clean_idr: + idr_destroy(&gvt->vgpu_idr); + kfree(gvt); + i915->gvt = NULL; return ret; } -static const struct intel_gvt_mpt kvmgt_mpt = { - .type = INTEL_GVT_HYPERVISOR_KVM, - .host_init = kvmgt_host_init, - .host_exit = kvmgt_host_exit, - .attach_vgpu = kvmgt_attach_vgpu, - .detach_vgpu = kvmgt_detach_vgpu, - .inject_msi = kvmgt_inject_msi, - .from_virt_to_mfn = kvmgt_virt_to_pfn, - .enable_page_track = kvmgt_page_track_add, - .disable_page_track = kvmgt_page_track_remove, - .read_gpa = kvmgt_read_gpa, - .write_gpa = kvmgt_write_gpa, - .gfn_to_mfn = kvmgt_gfn_to_pfn, - .dma_map_guest_page = kvmgt_dma_map_guest_page, - .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, - .dma_pin_guest_page = kvmgt_dma_pin_guest_page, - .set_opregion = kvmgt_set_opregion, - .set_edid = kvmgt_set_edid, - .get_vfio_device = kvmgt_get_vfio_device, - .put_vfio_device = kvmgt_put_vfio_device, - .is_valid_gfn = kvmgt_is_valid_gfn, +static void intel_gvt_pm_resume(struct drm_i915_private *i915) +{ + struct intel_gvt *gvt = i915->gvt; + + intel_gvt_restore_fence(gvt); + intel_gvt_restore_mmio(gvt); + intel_gvt_restore_ggtt(gvt); +} + +static const struct intel_vgpu_ops intel_gvt_vgpu_ops = { + .init_device = intel_gvt_init_device, + .clean_device = intel_gvt_clean_device, + .pm_resume = intel_gvt_pm_resume, }; static int __init kvmgt_init(void) { - if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) - return -ENODEV; - return 0; + int ret; + + ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops); + if (ret) + return ret; + + ret = mdev_register_driver(&intel_vgpu_mdev_driver); + if (ret) + intel_gvt_clear_ops(&intel_gvt_vgpu_ops); + return ret; } static void __exit kvmgt_exit(void) { - intel_gvt_unregister_hypervisor(); + mdev_unregister_driver(&intel_vgpu_mdev_driver); + intel_gvt_clear_ops(&intel_gvt_vgpu_ops); } module_init(kvmgt_init); module_exit(kvmgt_exit); +MODULE_DESCRIPTION("Intel mediated pass-through framework for KVM"); MODULE_LICENSE("GPL and additional rights"); MODULE_AUTHOR("Intel Corporation"); |
