From 5a6b4cc5b7a1892a8d7f63d6cbac6e0ae2a9d031 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Feb 2020 17:34:02 +0100 Subject: virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM Commit 71994620bb25 ("virtio_balloon: replace oom notifier with shrinker") changed the behavior when deflation happens automatically. Instead of deflating when called by the OOM handler, the shrinker is used. However, the balloon is not simply some slab cache that should be shrunk when under memory pressure. The shrinker does not have a concept of priorities, so this behavior cannot be configured. There was a report that this results in undesired side effects when inflating the balloon to shrink the page cache. [1] "When inflating the balloon against page cache (i.e. no free memory remains) vmscan.c will both shrink page cache, but also invoke the shrinkers -- including the balloon's shrinker. So the balloon driver allocates memory which requires reclaim, vmscan gets this memory by shrinking the balloon, and then the driver adds the memory back to the balloon. Basically a busy no-op." The name "deflate on OOM" makes it pretty clear when deflation should happen - after other approaches to reclaim memory failed, not while reclaiming. This allows to minimize the footprint of a guest - memory will only be taken out of the balloon when really needed. Especially, a drop_slab() will result in the whole balloon getting deflated - undesired. While handling it via the OOM handler might not be perfect, it keeps existing behavior. If we want a different behavior, then we need a new feature bit and document it properly (although, there should be a clear use case and the intended effects should be well described). Keep using the shrinker for VIRTIO_BALLOON_F_FREE_PAGE_HINT, because this has no such side effects. Always register the shrinker with VIRTIO_BALLOON_F_FREE_PAGE_HINT now. We are always allowed to reuse free pages that are still to be processed by the guest. The hypervisor takes care of identifying and resolving possible races between processing a hinting request and the guest reusing a page. In contrast to pre commit 71994620bb25 ("virtio_balloon: replace oom notifier with shrinker"), don't add a moodule parameter to configure the number of pages to deflate on OOM. Can be re-added if really needed. Also, pay attention that leak_balloon() returns the number of 4k pages - convert it properly in virtio_balloon_oom_notify(). Note1: using the OOM handler is frowned upon, but it really is what we need for this feature. Note2: without VIRTIO_BALLOON_F_MUST_TELL_HOST (iow, always with QEMU) we could actually skip sending deflation requests to our hypervisor, making the OOM path *very* simple. Besically freeing pages and updating the balloon. If the communication with the host ever becomes a problem on this call path. [1] https://www.spinics.net/lists/linux-virtualization/msg40863.html Test report by Tyler Sanderson: Test setup: VM with 16 CPU, 64GB RAM. Running Debian 10. We have a 42 GB file full of random bytes that we continually cat to /dev/null. This fills the page cache as the file is read. Meanwhile we trigger the balloon to inflate, with a target size of 53 GB. This setup causes the balloon inflation to pressure the page cache as the page cache is also trying to grow. Afterwards we shrink the balloon back to zero (so total deflate = total inflate). Without patch (kernel 4.19.0-5): Inflation never reaches the target until we stop the "cat file > /dev/null" process. Total inflation time was 542 seconds. The longest period that made no net forward progress was 315 seconds (see attached graph). Result of "grep balloon /proc/vmstat" after the test: balloon_inflate 154828377 balloon_deflate 154828377 With patch (kernel 5.6.0-rc4+): Total inflation duration was 63 seconds. No deflate-queue activity occurs when pressuring the page-cache. Result of "grep balloon /proc/vmstat" after the test: balloon_inflate 12968539 balloon_deflate 12968539 Conclusion: This patch fixes the issue. In the test it reduced inflate/deflate activity by 12x, and reduced inflation time by 8.6x. But more importantly, if we hadn't killed the "grep balloon /proc/vmstat" process then, without the patch, the inflation process would never reach the target. Attached [1] is a png of a graph showing the problematic behavior without this patch. It shows deflate-queue activity increasing linearly while balloon size stays constant over the course of more than 8 minutes of the test. [1] https://lore.kernel.org/linux-mm/CAJuQAmphPcfew1v_EOgAdSFiprzjiZjmOf3iJDmFX0gD6b9TYQ@mail.gmail.com/2-without_patch.png Full test report and discussion [2]: [2] https://lore.kernel.org/r/CAJuQAmphPcfew1v_EOgAdSFiprzjiZjmOf3iJDmFX0gD6b9TYQ@mail.gmail.com Tested-by: Tyler Sanderson Reported-by: Tyler Sanderson Cc: Michael S. Tsirkin Cc: Wei Wang Cc: Alexander Duyck Cc: David Rientjes Cc: Nadav Amit Cc: Michal Hocko Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200205163402.42627-4-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 107 +++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 63 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 341458fd95ca..44375a22307b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,9 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 -#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 +/* Maximum number of (4k) pages to deflate on OOM notifications. */ +#define VIRTIO_BALLOON_OOM_NR_PAGES 256 +#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ __GFP_NOMEMALLOC) @@ -112,8 +115,11 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; - /* To register a shrinker to shrink memory upon memory pressure */ + /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */ struct shrinker shrinker; + + /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */ + struct notifier_block oom_nb; }; static struct virtio_device_id id_table[] = { @@ -788,50 +794,13 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb, return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES; } -static unsigned long leak_balloon_pages(struct virtio_balloon *vb, - unsigned long pages_to_free) -{ - return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) / - VIRTIO_BALLOON_PAGES_PER_PAGE; -} - -static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, - unsigned long pages_to_free) -{ - unsigned long pages_freed = 0; - - /* - * One invocation of leak_balloon can deflate at most - * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it - * multiple times to deflate pages till reaching pages_to_free. - */ - while (vb->num_pages && pages_freed < pages_to_free) - pages_freed += leak_balloon_pages(vb, - pages_to_free - pages_freed); - - update_balloon_size(vb); - - return pages_freed; -} - static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { - unsigned long pages_to_free, pages_freed = 0; struct virtio_balloon *vb = container_of(shrinker, struct virtio_balloon, shrinker); - pages_to_free = sc->nr_to_scan; - - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) - pages_freed = shrink_free_pages(vb, pages_to_free); - - if (pages_freed >= pages_to_free) - return pages_freed; - - pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); - - return pages_freed; + return shrink_free_pages(vb, sc->nr_to_scan); } static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, @@ -839,26 +808,22 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, { struct virtio_balloon *vb = container_of(shrinker, struct virtio_balloon, shrinker); - unsigned long count; - - count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; - count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; - return count; + return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; } -static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) +static int virtio_balloon_oom_notify(struct notifier_block *nb, + unsigned long dummy, void *parm) { - unregister_shrinker(&vb->shrinker); -} + struct virtio_balloon *vb = container_of(nb, + struct virtio_balloon, oom_nb); + unsigned long *freed = parm; -static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) -{ - vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; - vb->shrinker.count_objects = virtio_balloon_shrinker_count; - vb->shrinker.seeks = DEFAULT_SEEKS; + *freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) / + VIRTIO_BALLOON_PAGES_PER_PAGE; + update_balloon_size(vb); - return register_shrinker(&vb->shrinker); + return NOTIFY_OK; } static int virtballoon_probe(struct virtio_device *vdev) @@ -935,22 +900,35 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_cwrite(vb->vdev, struct virtio_balloon_config, poison_val, &poison_val); } - } - /* - * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a - * shrinker needs to be registered to relieve memory pressure. - */ - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { - err = virtio_balloon_register_shrinker(vb); + + /* + * We're allowed to reuse any free pages, even if they are + * still to be processed by the host. + */ + vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker.count_objects = virtio_balloon_shrinker_count; + vb->shrinker.seeks = DEFAULT_SEEKS; + err = register_shrinker(&vb->shrinker); if (err) goto out_del_balloon_wq; } + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { + vb->oom_nb.notifier_call = virtio_balloon_oom_notify; + vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY; + err = register_oom_notifier(&vb->oom_nb); + if (err < 0) + goto out_unregister_shrinker; + } + virtio_device_ready(vdev); if (towards_target(vb)) virtballoon_changed(vdev); return 0; +out_unregister_shrinker: + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + unregister_shrinker(&vb->shrinker); out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); @@ -989,8 +967,11 @@ static void virtballoon_remove(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; - if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - virtio_balloon_unregister_shrinker(vb); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + unregister_oom_notifier(&vb->oom_nb); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + unregister_shrinker(&vb->shrinker); + spin_lock_irq(&vb->stop_update_lock); vb->stop_update = true; spin_unlock_irq(&vb->stop_update_lock); -- cgit From 961e9c84077f6c8579d7a628cbe94a675cb67ae4 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:21 +0800 Subject: vDPA: introduce vDPA bus vDPA device is a device that uses a datapath which complies with the virtio specifications with vendor specific control path. vDPA devices can be both physically located on the hardware or emulated by software. vDPA hardware devices are usually implemented through PCIE with the following types: - PF (Physical Function) - A single Physical Function - VF (Virtual Function) - Device that supports single root I/O virtualization (SR-IOV). Its Virtual Function (VF) represents a virtualized instance of the device that can be assigned to different partitions - ADI (Assignable Device Interface) and its equivalents - With technologies such as Intel Scalable IOV, a virtual device (VDEV) composed by host OS utilizing one or more ADIs. Or its equivalent like SF (Sub function) from Mellanox. >From a driver's perspective, depends on how and where the DMA translation is done, vDPA devices are split into two types: - Platform specific DMA translation - From the driver's perspective, the device can be used on a platform where device access to data in memory is limited and/or translated. An example is a PCIE vDPA whose DMA request was tagged via a bus (e.g PCIE) specific way. DMA translation and protection are done at PCIE bus IOMMU level. - Device specific DMA translation - The device implements DMA isolation and protection through its own logic. An example is a vDPA device which uses on-chip IOMMU. To hide the differences and complexity of the above types for a vDPA device/IOMMU options and in order to present a generic virtio device to the upper layer, a device agnostic framework is required. This patch introduces a software vDPA bus which abstracts the common attributes of vDPA device, vDPA bus driver and the communication method (vdpa_config_ops) between the vDPA device abstraction and the vDPA bus driver. This allows multiple types of drivers to be used for vDPA device like the virtio_vdpa and vhost_vdpa driver to operate on the bus and allow vDPA device could be used by either kernel virtio driver or userspace vhost drivers as: virtio drivers vhost drivers | | [virtio bus] [vhost uAPI] | | virtio device vhost device virtio_vdpa drv vhost_vdpa drv \ / [vDPA bus] | vDPA device hardware drv | [hardware bus] | vDPA hardware With the abstraction of vDPA bus and vDPA bus operations, the difference and complexity of the under layer hardware is hidden from upper layer. The vDPA bus drivers on top can use a unified vdpa_config_ops to control different types of vDPA device. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-6-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 2 + drivers/virtio/Makefile | 1 + drivers/virtio/vdpa/Kconfig | 7 ++ drivers/virtio/vdpa/Makefile | 2 + drivers/virtio/vdpa/vdpa.c | 180 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+) create mode 100644 drivers/virtio/vdpa/Kconfig create mode 100644 drivers/virtio/vdpa/Makefile create mode 100644 drivers/virtio/vdpa/vdpa.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 078615cf2afc..9c4fdb64d9ac 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -96,3 +96,5 @@ config VIRTIO_MMIO_CMDLINE_DEVICES If unsure, say 'N'. endif # VIRTIO_MENU + +source "drivers/virtio/vdpa/Kconfig" diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 3a2b5c5dcf46..fdf5eacd0d0a 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -6,3 +6,4 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o +obj-$(CONFIG_VDPA) += vdpa/ diff --git a/drivers/virtio/vdpa/Kconfig b/drivers/virtio/vdpa/Kconfig new file mode 100644 index 000000000000..351617723d12 --- /dev/null +++ b/drivers/virtio/vdpa/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +config VDPA + tristate + help + Enable this module to support vDPA device that uses a + datapath which complies with virtio specifications with + vendor specific control path. diff --git a/drivers/virtio/vdpa/Makefile b/drivers/virtio/vdpa/Makefile new file mode 100644 index 000000000000..ee6a35e8a4fb --- /dev/null +++ b/drivers/virtio/vdpa/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VDPA) += vdpa.o diff --git a/drivers/virtio/vdpa/vdpa.c b/drivers/virtio/vdpa/vdpa.c new file mode 100644 index 000000000000..e9ed6a2b635b --- /dev/null +++ b/drivers/virtio/vdpa/vdpa.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bus. + * + * Copyright (c) 2020, Red Hat. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include + +static DEFINE_IDA(vdpa_index_ida); + +static int vdpa_dev_probe(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + int ret = 0; + + if (drv && drv->probe) + ret = drv->probe(vdev); + + return ret; +} + +static int vdpa_dev_remove(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + + if (drv && drv->remove) + drv->remove(vdev); + + return 0; +} + +static struct bus_type vdpa_bus = { + .name = "vdpa", + .probe = vdpa_dev_probe, + .remove = vdpa_dev_remove, +}; + +static void vdpa_release_dev(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + const struct vdpa_config_ops *ops = vdev->config; + + if (ops->free) + ops->free(vdev); + + ida_simple_remove(&vdpa_index_ida, vdev->index); + kfree(vdev); +} + +/** + * __vdpa_alloc_device - allocate and initilaize a vDPA device + * This allows driver to some prepartion after device is + * initialized but before registered. + * @parent: the parent device + * @config: the bus operations that is supported by this device + * @size: size of the parent structure that contains private data + * + * Drvier should use vdap_alloc_device() wrapper macro instead of + * using this directly. + * + * Returns an error when parent/config/dma_dev is not set or fail to get + * ida. + */ +struct vdpa_device *__vdpa_alloc_device(struct device *parent, + const struct vdpa_config_ops *config, + size_t size) +{ + struct vdpa_device *vdev; + int err = -EINVAL; + + if (!config) + goto err; + + if (!!config->dma_map != !!config->dma_unmap) + goto err; + + err = -ENOMEM; + vdev = kzalloc(size, GFP_KERNEL); + if (!vdev) + goto err; + + err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); + if (err < 0) + goto err_ida; + + vdev->dev.bus = &vdpa_bus; + vdev->dev.parent = parent; + vdev->dev.release = vdpa_release_dev; + vdev->index = err; + vdev->config = config; + + err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); + if (err) + goto err_name; + + device_initialize(&vdev->dev); + + return vdev; + +err_name: + ida_simple_remove(&vdpa_index_ida, vdev->index); +err_ida: + kfree(vdev); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(__vdpa_alloc_device); + +/** + * vdpa_register_device - register a vDPA device + * Callers must have a succeed call of vdpa_init_device() before. + * @vdev: the vdpa device to be registered to vDPA bus + * + * Returns an error when fail to add to vDPA bus + */ +int vdpa_register_device(struct vdpa_device *vdev) +{ + return device_add(&vdev->dev); +} +EXPORT_SYMBOL_GPL(vdpa_register_device); + +/** + * vdpa_unregister_device - unregister a vDPA device + * @vdev: the vdpa device to be unregisted from vDPA bus + */ +void vdpa_unregister_device(struct vdpa_device *vdev) +{ + device_unregister(&vdev->dev); +} +EXPORT_SYMBOL_GPL(vdpa_unregister_device); + +/** + * __vdpa_register_driver - register a vDPA device driver + * @drv: the vdpa device driver to be registered + * @owner: module owner of the driver + * + * Returns an err when fail to do the registration + */ +int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) +{ + drv->driver.bus = &vdpa_bus; + drv->driver.owner = owner; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(__vdpa_register_driver); + +/** + * vdpa_unregister_driver - unregister a vDPA device driver + * @drv: the vdpa device driver to be unregistered + */ +void vdpa_unregister_driver(struct vdpa_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(vdpa_unregister_driver); + +static int vdpa_init(void) +{ + return bus_register(&vdpa_bus); +} + +static void __exit vdpa_exit(void) +{ + bus_unregister(&vdpa_bus); + ida_destroy(&vdpa_index_ida); +} +core_initcall(vdpa_init); +module_exit(vdpa_exit); + +MODULE_AUTHOR("Jason Wang "); +MODULE_LICENSE("GPL v2"); -- cgit From c043b4a8cf3b16fbdcaec1126841431c33b16e98 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:22 +0800 Subject: virtio: introduce a vDPA based transport This patch introduces a vDPA transport for virtio. This is used to use kernel virtio driver to drive the vDPA device that is capable of populating virtqueue directly. A new virtio-vdpa driver will be registered to the vDPA bus, when a new virtio-vdpa device is probed, it will register the device with vdpa based config ops. This means it is a software transport between vDPA driver and vDPA device. The transport was implemented through bus_ops of vDPA parent. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-7-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 13 ++ drivers/virtio/Makefile | 1 + drivers/virtio/virtio_vdpa.c | 396 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 410 insertions(+) create mode 100644 drivers/virtio/virtio_vdpa.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 9c4fdb64d9ac..99e424570644 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -43,6 +43,19 @@ config VIRTIO_PCI_LEGACY If unsure, say Y. +config VIRTIO_VDPA + tristate "vDPA driver for virtio devices" + select VDPA + select VIRTIO + help + This driver provides support for virtio based paravirtual + device driver over vDPA bus. For this to be useful, you need + an appropriate vDPA device implementation that operates on a + physical device to allow the datapath of virtio to be + offloaded to hardware. + + If unsure, say M. + config VIRTIO_PMEM tristate "Support for virtio pmem driver" depends on VIRTIO diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index fdf5eacd0d0a..3407ac03fe60 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -6,4 +6,5 @@ virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o +obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o obj-$(CONFIG_VDPA) += vdpa/ diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c new file mode 100644 index 000000000000..c30eb55030be --- /dev/null +++ b/drivers/virtio/virtio_vdpa.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VIRTIO based driver for vDPA device + * + * Copyright (c) 2020, Red Hat. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MOD_VERSION "0.1" +#define MOD_AUTHOR "Jason Wang " +#define MOD_DESC "vDPA bus driver for virtio devices" +#define MOD_LICENSE "GPL v2" + +struct virtio_vdpa_device { + struct virtio_device vdev; + struct vdpa_device *vdpa; + u64 features; + + /* The lock to protect virtqueue list */ + spinlock_t lock; + /* List of virtio_vdpa_vq_info */ + struct list_head virtqueues; +}; + +struct virtio_vdpa_vq_info { + /* the actual virtqueue */ + struct virtqueue *vq; + + /* the list node for the virtqueues list */ + struct list_head node; +}; + +static inline struct virtio_vdpa_device * +to_virtio_vdpa_device(struct virtio_device *dev) +{ + return container_of(dev, struct virtio_vdpa_device, vdev); +} + +static struct vdpa_device *vd_get_vdpa(struct virtio_device *vdev) +{ + return to_virtio_vdpa_device(vdev)->vdpa; +} + +static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + ops->get_config(vdpa, offset, buf, len); +} + +static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + ops->set_config(vdpa, offset, buf, len); +} + +static u32 virtio_vdpa_generation(struct virtio_device *vdev) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + if (ops->get_generation) + return ops->get_generation(vdpa); + + return 0; +} + +static u8 virtio_vdpa_get_status(struct virtio_device *vdev) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_status(vdpa); +} + +static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->set_status(vdpa, status); +} + +static void virtio_vdpa_reset(struct virtio_device *vdev) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->set_status(vdpa, 0); +} + +static bool virtio_vdpa_notify(struct virtqueue *vq) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vq->vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + ops->kick_vq(vdpa, vq->index); + + return true; +} + +static irqreturn_t virtio_vdpa_config_cb(void *private) +{ + struct virtio_vdpa_device *vd_dev = private; + + virtio_config_changed(&vd_dev->vdev); + + return IRQ_HANDLED; +} + +static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) +{ + struct virtio_vdpa_vq_info *info = private; + + return vring_interrupt(0, info->vq); +} + +static struct virtqueue * +virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, + void (*callback)(struct virtqueue *vq), + const char *name, bool ctx) +{ + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + struct virtio_vdpa_vq_info *info; + struct vdpa_callback cb; + struct virtqueue *vq; + u64 desc_addr, driver_addr, device_addr; + unsigned long flags; + u32 align, num; + int err; + + if (!name) + return NULL; + + /* Queue shouldn't already be set up. */ + if (ops->get_vq_ready(vdpa, index)) + return ERR_PTR(-ENOENT); + + /* Allocate and fill out our active queue description */ + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + num = ops->get_vq_num_max(vdpa); + if (num == 0) { + err = -ENOENT; + goto error_new_virtqueue; + } + + /* Create the vring */ + align = ops->get_vq_align(vdpa); + vq = vring_create_virtqueue(index, num, align, vdev, + true, true, ctx, + virtio_vdpa_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto error_new_virtqueue; + } + + /* Setup virtqueue callback */ + cb.callback = virtio_vdpa_virtqueue_cb; + cb.private = info; + ops->set_vq_cb(vdpa, index, &cb); + ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); + + desc_addr = virtqueue_get_desc_addr(vq); + driver_addr = virtqueue_get_avail_addr(vq); + device_addr = virtqueue_get_used_addr(vq); + + if (ops->set_vq_address(vdpa, index, + desc_addr, driver_addr, + device_addr)) { + err = -EINVAL; + goto err_vq; + } + + ops->set_vq_ready(vdpa, index, 1); + + vq->priv = info; + info->vq = vq; + + spin_lock_irqsave(&vd_dev->lock, flags); + list_add(&info->node, &vd_dev->virtqueues); + spin_unlock_irqrestore(&vd_dev->lock, flags); + + return vq; + +err_vq: + vring_del_virtqueue(vq); +error_new_virtqueue: + ops->set_vq_ready(vdpa, index, 0); + /* VDPA driver should make sure vq is stopeed here */ + WARN_ON(ops->get_vq_ready(vdpa, index)); + kfree(info); + return ERR_PTR(err); +} + +static void virtio_vdpa_del_vq(struct virtqueue *vq) +{ + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vq->vdev); + struct vdpa_device *vdpa = vd_dev->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct virtio_vdpa_vq_info *info = vq->priv; + unsigned int index = vq->index; + unsigned long flags; + + spin_lock_irqsave(&vd_dev->lock, flags); + list_del(&info->node); + spin_unlock_irqrestore(&vd_dev->lock, flags); + + /* Select and deactivate the queue */ + ops->set_vq_ready(vdpa, index, 0); + WARN_ON(ops->get_vq_ready(vdpa, index)); + + vring_del_virtqueue(vq); + + kfree(info); +} + +static void virtio_vdpa_del_vqs(struct virtio_device *vdev) +{ + struct virtqueue *vq, *n; + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + virtio_vdpa_del_vq(vq); +} + +static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], + const bool *ctx, + struct irq_affinity *desc) +{ + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_callback cb; + int i, err, queue_idx = 0; + + for (i = 0; i < nvqs; ++i) { + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, + callbacks[i], names[i], ctx ? + ctx[i] : false); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto err_setup_vq; + } + } + + cb.callback = virtio_vdpa_config_cb; + cb.private = vd_dev; + ops->set_config_cb(vdpa, &cb); + + return 0; + +err_setup_vq: + virtio_vdpa_del_vqs(vdev); + return err; +} + +static u64 virtio_vdpa_get_features(struct virtio_device *vdev) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->get_features(vdpa); +} + +static int virtio_vdpa_finalize_features(struct virtio_device *vdev) +{ + struct vdpa_device *vdpa = vd_get_vdpa(vdev); + const struct vdpa_config_ops *ops = vdpa->config; + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + return ops->set_features(vdpa, vdev->features); +} + +static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) +{ + struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); + struct vdpa_device *vdpa = vd_dev->vdpa; + + return dev_name(&vdpa->dev); +} + +static const struct virtio_config_ops virtio_vdpa_config_ops = { + .get = virtio_vdpa_get, + .set = virtio_vdpa_set, + .generation = virtio_vdpa_generation, + .get_status = virtio_vdpa_get_status, + .set_status = virtio_vdpa_set_status, + .reset = virtio_vdpa_reset, + .find_vqs = virtio_vdpa_find_vqs, + .del_vqs = virtio_vdpa_del_vqs, + .get_features = virtio_vdpa_get_features, + .finalize_features = virtio_vdpa_finalize_features, + .bus_name = virtio_vdpa_bus_name, +}; + +static void virtio_vdpa_release_dev(struct device *_d) +{ + struct virtio_device *vdev = + container_of(_d, struct virtio_device, dev); + struct virtio_vdpa_device *vd_dev = + container_of(vdev, struct virtio_vdpa_device, vdev); + + kfree(vd_dev); +} + +static int virtio_vdpa_probe(struct vdpa_device *vdpa) +{ + const struct vdpa_config_ops *ops = vdpa->config; + struct virtio_vdpa_device *vd_dev, *reg_dev = NULL; + int ret = -EINVAL; + + vd_dev = kzalloc(sizeof(*vd_dev), GFP_KERNEL); + if (!vd_dev) + return -ENOMEM; + + vd_dev->vdev.dev.parent = vdpa_get_dma_dev(vdpa); + vd_dev->vdev.dev.release = virtio_vdpa_release_dev; + vd_dev->vdev.config = &virtio_vdpa_config_ops; + vd_dev->vdpa = vdpa; + INIT_LIST_HEAD(&vd_dev->virtqueues); + spin_lock_init(&vd_dev->lock); + + vd_dev->vdev.id.device = ops->get_device_id(vdpa); + if (vd_dev->vdev.id.device == 0) + goto err; + + vd_dev->vdev.id.vendor = ops->get_vendor_id(vdpa); + ret = register_virtio_device(&vd_dev->vdev); + reg_dev = vd_dev; + if (ret) + goto err; + + vdpa_set_drvdata(vdpa, vd_dev); + + return 0; + +err: + if (reg_dev) + put_device(&vd_dev->vdev.dev); + else + kfree(vd_dev); + return ret; +} + +static void virtio_vdpa_remove(struct vdpa_device *vdpa) +{ + struct virtio_vdpa_device *vd_dev = vdpa_get_drvdata(vdpa); + + unregister_virtio_device(&vd_dev->vdev); +} + +static struct vdpa_driver virtio_vdpa_driver = { + .driver = { + .name = "virtio_vdpa", + }, + .probe = virtio_vdpa_probe, + .remove = virtio_vdpa_remove, +}; + +module_vdpa_driver(virtio_vdpa_driver); + +MODULE_VERSION(MOD_VERSION); +MODULE_LICENSE(MOD_LICENSE); +MODULE_AUTHOR(MOD_AUTHOR); +MODULE_DESCRIPTION(MOD_DESC); -- cgit From 2c53d0f64c06f4580588ef16f50f63a112117a02 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 26 Mar 2020 22:01:24 +0800 Subject: vdpasim: vDPA device simulator This patch implements a software vDPA networking device. The datapath is implemented through vringh and workqueue. The device has an on-chip IOMMU which translates IOVA to PA. For kernel virtio drivers, vDPA simulator driver provides dma_ops. For vhost driers, set_map() methods of vdpa_config_ops is implemented to accept mappings from vhost. Currently, vDPA device simulator will loopback TX traffic to RX. So the main use case for the device is vDPA feature testing, prototyping and development. Note, there's no management API implemented, a vDPA device will be registered once the module is probed. We need to handle this in the future development. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-9-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/vdpa/Kconfig | 19 + drivers/virtio/vdpa/Makefile | 1 + drivers/virtio/vdpa/vdpa_sim/Makefile | 2 + drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c | 629 ++++++++++++++++++++++++++++++++ 4 files changed, 651 insertions(+) create mode 100644 drivers/virtio/vdpa/vdpa_sim/Makefile create mode 100644 drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/vdpa/Kconfig b/drivers/virtio/vdpa/Kconfig index 351617723d12..ae204465964e 100644 --- a/drivers/virtio/vdpa/Kconfig +++ b/drivers/virtio/vdpa/Kconfig @@ -5,3 +5,22 @@ config VDPA Enable this module to support vDPA device that uses a datapath which complies with virtio specifications with vendor specific control path. + +menuconfig VDPA_MENU + bool "VDPA drivers" + default n + +if VDPA_MENU + +config VDPA_SIM + tristate "vDPA device simulator" + depends on RUNTIME_TESTING_MENU + select VDPA + select VHOST_RING + default n + help + vDPA networking device simulator which loop TX traffic back + to RX. This device is used for testing, prototyping and + development of vDPA. + +endif # VDPA_MENU diff --git a/drivers/virtio/vdpa/Makefile b/drivers/virtio/vdpa/Makefile index ee6a35e8a4fb..3814af8e097b 100644 --- a/drivers/virtio/vdpa/Makefile +++ b/drivers/virtio/vdpa/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA) += vdpa.o +obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ diff --git a/drivers/virtio/vdpa/vdpa_sim/Makefile b/drivers/virtio/vdpa/vdpa_sim/Makefile new file mode 100644 index 000000000000..b40278f65e04 --- /dev/null +++ b/drivers/virtio/vdpa/vdpa_sim/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o diff --git a/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c b/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c new file mode 100644 index 000000000000..6e8a0cf2fdeb --- /dev/null +++ b/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c @@ -0,0 +1,629 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA networking device simulator. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang " +#define DRV_DESC "vDPA Device Simulator" +#define DRV_LICENSE "GPL v2" + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +#define VDPASIM_QUEUE_ALIGN PAGE_SIZE +#define VDPASIM_QUEUE_MAX 256 +#define VDPASIM_DEVICE_ID 0x1 +#define VDPASIM_VENDOR_ID 0 +#define VDPASIM_VQ_NUM 0x2 +#define VDPASIM_NAME "vdpasim-netdev" + +static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | + (1ULL << VIRTIO_F_VERSION_1) | + (1ULL << VIRTIO_F_IOMMU_PLATFORM); + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue vqs[2]; + struct work_struct work; + /* spinlock to synchronize virtqueue state */ + spinlock_t lock; + struct virtio_net_config config; + struct vhost_iotlb *iommu; + void *buffer; + u32 status; + u32 generation; + u64 features; +}; + +static struct vdpasim *vdpasim_dev; + +static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vdpasim, vdpa); +} + +static struct vdpasim *dev_to_sim(struct device *dev) +{ + struct vdpa_device *vdpa = dev_to_vdpa(dev); + + return vdpa_to_sim(vdpa); +} + +static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) +{ + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + int ret; + + ret = vringh_init_iotlb(&vq->vring, vdpasim_features, + VDPASIM_QUEUE_MAX, false, + (struct vring_desc *)(uintptr_t)vq->desc_addr, + (struct vring_avail *) + (uintptr_t)vq->driver_addr, + (struct vring_used *) + (uintptr_t)vq->device_addr); +} + +static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) +{ + vq->ready = 0; + vq->desc_addr = 0; + vq->driver_addr = 0; + vq->device_addr = 0; + vq->cb = NULL; + vq->private = NULL; + vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, + false, NULL, NULL, NULL); +} + +static void vdpasim_reset(struct vdpasim *vdpasim) +{ + int i; + + for (i = 0; i < VDPASIM_VQ_NUM; i++) + vdpasim_vq_reset(&vdpasim->vqs[i]); + + vhost_iotlb_reset(vdpasim->iommu); + + vdpasim->features = 0; + vdpasim->status = 0; + ++vdpasim->generation; +} + +static void vdpasim_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct + vdpasim, work); + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + size_t read, write, total_write; + int err; + int pkts = 0; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + total_write = 0; + err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) + break; + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + vringh_complete_iotlb(&txq->vring, txq->head, 0); + break; + } + + while (true) { + read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, + vdpasim->buffer, + PAGE_SIZE); + if (read <= 0) + break; + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, + vdpasim->buffer, read); + if (write <= 0) + break; + + total_write += write; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&txq->vring, txq->head, 0); + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (txq->cb) + txq->cb(txq->private); + if (rxq->cb) + rxq->cb(rxq->private); + local_bh_enable(); + + if (++pkts > 4) { + schedule_work(&vdpasim->work); + goto out; + } + } + +out: + spin_unlock(&vdpasim->lock); +} + +static int dir_to_perm(enum dma_data_direction dir) +{ + int perm = -EFAULT; + + switch (dir) { + case DMA_FROM_DEVICE: + perm = VHOST_MAP_WO; + break; + case DMA_TO_DEVICE: + perm = VHOST_MAP_RO; + break; + case DMA_BIDIRECTIONAL: + perm = VHOST_MAP_RW; + break; + default: + break; + } + + return perm; +} + +static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; + int ret, perm = dir_to_perm(dir); + + if (perm < 0) + return DMA_MAPPING_ERROR; + + /* For simplicity, use identical mapping to avoid e.g iova + * allocator. + */ + ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, + pa, dir_to_perm(dir)); + if (ret) + return DMA_MAPPING_ERROR; + + return (dma_addr_t)(pa); +} + +static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + + vhost_iotlb_del_range(iommu, (u64)dma_addr, + (u64)dma_addr + size - 1); +} + +static void *vdpasim_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + void *addr = kmalloc(size, flag); + int ret; + + if (!addr) + *dma_addr = DMA_MAPPING_ERROR; + else { + u64 pa = virt_to_phys(addr); + + ret = vhost_iotlb_add_range(iommu, (u64)pa, + (u64)pa + size - 1, + pa, VHOST_MAP_RW); + if (ret) { + *dma_addr = DMA_MAPPING_ERROR; + kfree(addr); + addr = NULL; + } else + *dma_addr = (dma_addr_t)pa; + } + + return addr; +} + +static void vdpasim_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + + vhost_iotlb_del_range(iommu, (u64)dma_addr, + (u64)dma_addr + size - 1); + kfree(phys_to_virt((uintptr_t)dma_addr)); +} + +static const struct dma_map_ops vdpasim_dma_ops = { + .map_page = vdpasim_map_page, + .unmap_page = vdpasim_unmap_page, + .alloc = vdpasim_alloc_coherent, + .free = vdpasim_free_coherent, +}; + +static const struct vdpa_config_ops vdpasim_net_config_ops; + +static struct vdpasim *vdpasim_create(void) +{ + struct virtio_net_config *config; + struct vdpasim *vdpasim; + struct device *dev; + int ret = -ENOMEM; + + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, + &vdpasim_net_config_ops); + if (!vdpasim) + goto err_alloc; + + INIT_WORK(&vdpasim->work, vdpasim_work); + spin_lock_init(&vdpasim->lock); + + dev = &vdpasim->vdpa.dev; + dev->coherent_dma_mask = DMA_BIT_MASK(64); + set_dma_ops(dev, &vdpasim_dma_ops); + + vdpasim->iommu = vhost_iotlb_alloc(2048, 0); + if (!vdpasim->iommu) + goto err_iommu; + + vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vdpasim->buffer) + goto err_iommu; + + config = &vdpasim->config; + config->mtu = 1500; + config->status = VIRTIO_NET_S_LINK_UP; + eth_random_addr(config->mac); + + vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); + vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); + + vdpasim->vdpa.dma_dev = dev; + ret = vdpa_register_device(&vdpasim->vdpa); + if (ret) + goto err_iommu; + + return vdpasim; + +err_iommu: + put_device(dev); +err_alloc: + return ERR_PTR(ret); +} + +static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->desc_addr = desc_area; + vq->driver_addr = driver_area; + vq->device_addr = device_area; + + return 0; +} + +static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->num = num; +} + +static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + if (vq->ready) + schedule_work(&vdpasim->work); +} + +static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, + struct vdpa_callback *cb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->cb = cb->callback; + vq->private = cb->private; +} + +static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + spin_lock(&vdpasim->lock); + vq->ready = ready; + if (vq->ready) + vdpasim_queue_ready(vdpasim, idx); + spin_unlock(&vdpasim->lock); +} + +static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + return vq->ready; +} + +static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + spin_lock(&vdpasim->lock); + vrh->last_avail_idx = state; + spin_unlock(&vdpasim->lock); + + return 0; +} + +static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + return vrh->last_avail_idx; +} + +static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_ALIGN; +} + +static u64 vdpasim_get_features(struct vdpa_device *vdpa) +{ + return vdpasim_features; +} + +static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + /* DMA mapping must be done by driver */ + if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + return -EINVAL; + + vdpasim->features = features & vdpasim_features; + + return 0; +} + +static void vdpasim_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + /* We don't support config interrupt */ +} + +static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_MAX; +} + +static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) +{ + return VDPASIM_DEVICE_ID; +} + +static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) +{ + return VDPASIM_VENDOR_ID; +} + +static u8 vdpasim_get_status(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + u8 status; + + spin_lock(&vdpasim->lock); + status = vdpasim->status; + spin_unlock(&vdpasim->lock); + + return vdpasim->status; +} + +static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + spin_lock(&vdpasim->lock); + vdpasim->status = status; + if (status == 0) + vdpasim_reset(vdpasim); + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, + void *buf, unsigned int len) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len < sizeof(struct virtio_net_config)) + memcpy(buf, &vdpasim->config + offset, len); +} + +static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, + const void *buf, unsigned int len) +{ + /* No writable config supportted by vdpasim */ +} + +static u32 vdpasim_get_generation(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->generation; +} + +static int vdpasim_set_map(struct vdpa_device *vdpa, + struct vhost_iotlb *iotlb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vhost_iotlb_map *map; + u64 start = 0ULL, last = 0ULL - 1; + int ret; + + vhost_iotlb_reset(vdpasim->iommu); + + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, + map->last, map->addr, map->perm); + if (ret) + goto err; + } + return 0; + +err: + vhost_iotlb_reset(vdpasim->iommu); + return ret; +} + +static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, + u64 pa, u32 perm) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vhost_iotlb_add_range(vdpasim->iommu, iova, + iova + size - 1, pa, perm); +} + +static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); + + return 0; +} + +static void vdpasim_free(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + cancel_work_sync(&vdpasim->work); + kfree(vdpasim->buffer); + if (vdpasim->iommu) + vhost_iotlb_free(vdpasim->iommu); +} + +static const struct vdpa_config_ops vdpasim_net_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_features = vdpasim_get_features, + .set_features = vdpasim_set_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .set_map = vdpasim_set_map, + .dma_map = vdpasim_dma_map, + .dma_unmap = vdpasim_dma_unmap, + .free = vdpasim_free, +}; + +static int __init vdpasim_dev_init(void) +{ + vdpasim_dev = vdpasim_create(); + + if (!IS_ERR(vdpasim_dev)) + return 0; + + return PTR_ERR(vdpasim_dev); +} + +static void __exit vdpasim_dev_exit(void) +{ + struct vdpa_device *vdpa = &vdpasim_dev->vdpa; + + vdpa_unregister_device(vdpa); +} + +module_init(vdpasim_dev_init) +module_exit(vdpasim_dev_exit) + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); -- cgit From 5a2414bc454e89e0515b47500734a65aa40cf9fe Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Thu, 26 Mar 2020 22:01:25 +0800 Subject: virtio: Intel IFC VF driver for VDPA This commit introduced two layers to drive IFC VF: (1) ifcvf_base layer, which handles IFC VF NIC hardware operations and configurations. (2) ifcvf_main layer, which complies to VDPA bus framework, implemented device operations for VDPA bus, handles device probe, bus attaching, vring operations, etc. Signed-off-by: Zhu Lingshan Signed-off-by: Bie Tiwei Signed-off-by: Wang Xiao Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200326140125.19794-10-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/vdpa/Kconfig | 11 + drivers/virtio/vdpa/Makefile | 1 + drivers/virtio/vdpa/ifcvf/Makefile | 3 + drivers/virtio/vdpa/ifcvf/ifcvf_base.c | 389 +++++++++++++++++++++++++++++ drivers/virtio/vdpa/ifcvf/ifcvf_base.h | 118 +++++++++ drivers/virtio/vdpa/ifcvf/ifcvf_main.c | 435 +++++++++++++++++++++++++++++++++ 6 files changed, 957 insertions(+) create mode 100644 drivers/virtio/vdpa/ifcvf/Makefile create mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_base.c create mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_base.h create mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_main.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/vdpa/Kconfig b/drivers/virtio/vdpa/Kconfig index ae204465964e..7db1460104b7 100644 --- a/drivers/virtio/vdpa/Kconfig +++ b/drivers/virtio/vdpa/Kconfig @@ -23,4 +23,15 @@ config VDPA_SIM to RX. This device is used for testing, prototyping and development of vDPA. +config IFCVF + tristate "Intel IFC VF VDPA driver" + depends on PCI_MSI + select VDPA + default n + help + This kernel module can drive Intel IFC VF NIC to offload + virtio dataplane traffic to hardware. + To compile this driver as a module, choose M here: the module will + be called ifcvf. + endif # VDPA_MENU diff --git a/drivers/virtio/vdpa/Makefile b/drivers/virtio/vdpa/Makefile index 3814af8e097b..8bbb686ca7a2 100644 --- a/drivers/virtio/vdpa/Makefile +++ b/drivers/virtio/vdpa/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ +obj-$(CONFIG_IFCVF) += ifcvf/ diff --git a/drivers/virtio/vdpa/ifcvf/Makefile b/drivers/virtio/vdpa/ifcvf/Makefile new file mode 100644 index 000000000000..d709915995ab --- /dev/null +++ b/drivers/virtio/vdpa/ifcvf/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_IFCVF) += ifcvf.o +ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_base.c b/drivers/virtio/vdpa/ifcvf/ifcvf_base.c new file mode 100644 index 000000000000..b61b06ea26d3 --- /dev/null +++ b/drivers/virtio/vdpa/ifcvf/ifcvf_base.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan + * + */ + +#include "ifcvf_base.h" + +static inline u8 ifc_ioread8(u8 __iomem *addr) +{ + return ioread8(addr); +} +static inline u16 ifc_ioread16 (__le16 __iomem *addr) +{ + return ioread16(addr); +} + +static inline u32 ifc_ioread32(__le32 __iomem *addr) +{ + return ioread32(addr); +} + +static inline void ifc_iowrite8(u8 value, u8 __iomem *addr) +{ + iowrite8(value, addr); +} + +static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr) +{ + iowrite16(value, addr); +} + +static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr) +{ + iowrite32(value, addr); +} + +static void ifc_iowrite64_twopart(u64 val, + __le32 __iomem *lo, __le32 __iomem *hi) +{ + ifc_iowrite32((u32)val, lo); + ifc_iowrite32(val >> 32, hi); +} + +struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) +{ + return container_of(hw, struct ifcvf_adapter, vf); +} + +static void __iomem *get_cap_addr(struct ifcvf_hw *hw, + struct virtio_pci_cap *cap) +{ + struct ifcvf_adapter *ifcvf; + struct pci_dev *pdev; + u32 length, offset; + u8 bar; + + length = le32_to_cpu(cap->length); + offset = le32_to_cpu(cap->offset); + bar = cap->bar; + + ifcvf= vf_to_adapter(hw); + pdev = ifcvf->pdev; + + if (bar >= IFCVF_PCI_MAX_RESOURCE) { + IFCVF_DBG(pdev, + "Invalid bar number %u to get capabilities\n", bar); + return NULL; + } + + if (offset + length > pci_resource_len(pdev, bar)) { + IFCVF_DBG(pdev, + "offset(%u) + len(%u) overflows bar%u's capability\n", + offset, length, bar); + return NULL; + } + + return hw->base[bar] + offset; +} + +static int ifcvf_read_config_range(struct pci_dev *dev, + uint32_t *val, int size, int where) +{ + int ret, i; + + for (i = 0; i < size; i += 4) { + ret = pci_read_config_dword(dev, where + i, val + i / 4); + if (ret < 0) + return ret; + } + + return 0; +} + +int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) +{ + struct virtio_pci_cap cap; + u16 notify_off; + int ret; + u8 pos; + u32 i; + + ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); + if (ret < 0) { + IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); + return -EIO; + } + + while (pos) { + ret = ifcvf_read_config_range(pdev, (u32 *)&cap, + sizeof(cap), pos); + if (ret < 0) { + IFCVF_ERR(pdev, + "Failed to get PCI capability at %x\n", pos); + break; + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) + goto next; + + switch (cap.cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + hw->common_cfg = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->common_cfg = %p\n", + hw->common_cfg); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + pci_read_config_dword(pdev, pos + sizeof(cap), + &hw->notify_off_multiplier); + hw->notify_bar = cap.bar; + hw->notify_base = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->notify_base = %p\n", + hw->notify_base); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + hw->isr = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + hw->net_cfg = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg); + break; + } + +next: + pos = cap.cap_next; + } + + if (hw->common_cfg == NULL || hw->notify_base == NULL || + hw->isr == NULL || hw->net_cfg == NULL) { + IFCVF_ERR(pdev, "Incomplete PCI capabilities\n"); + return -EIO; + } + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + ifc_iowrite16(i, &hw->common_cfg->queue_select); + notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); + hw->vring[i].notify_addr = hw->notify_base + + notify_off * hw->notify_off_multiplier; + } + + hw->lm_cfg = hw->base[IFCVF_LM_BAR]; + + IFCVF_DBG(pdev, + "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n", + hw->common_cfg, hw->notify_base, hw->isr, + hw->net_cfg, hw->notify_off_multiplier); + + return 0; +} + +u8 ifcvf_get_status(struct ifcvf_hw *hw) +{ + return ifc_ioread8(&hw->common_cfg->device_status); +} + +void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) +{ + ifc_iowrite8(status, &hw->common_cfg->device_status); +} + +void ifcvf_reset(struct ifcvf_hw *hw) +{ + ifcvf_set_status(hw, 0); + /* flush set_status, make sure VF is stopped, reset */ + ifcvf_get_status(hw); +} + +static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) +{ + if (status != 0) + status |= ifcvf_get_status(hw); + + ifcvf_set_status(hw, status); + ifcvf_get_status(hw); +} + +u64 ifcvf_get_features(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + u32 features_lo, features_hi; + + ifc_iowrite32(0, &cfg->device_feature_select); + features_lo = ifc_ioread32(&cfg->device_feature); + + ifc_iowrite32(1, &cfg->device_feature_select); + features_hi = ifc_ioread32(&cfg->device_feature); + + return ((u64)features_hi << 32) | features_lo; +} + +void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, + void *dst, int length) +{ + u8 old_gen, new_gen, *p; + int i; + + WARN_ON(offset + length > sizeof(struct virtio_net_config)); + do { + old_gen = ifc_ioread8(&hw->common_cfg->config_generation); + p = dst; + for (i = 0; i < length; i++) + *p++ = ifc_ioread8(hw->net_cfg + offset + i); + + new_gen = ifc_ioread8(&hw->common_cfg->config_generation); + } while (old_gen != new_gen); +} + +void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, + const void *src, int length) +{ + const u8 *p; + int i; + + p = src; + WARN_ON(offset + length > sizeof(struct virtio_net_config)); + for (i = 0; i < length; i++) + ifc_iowrite8(*p++, hw->net_cfg + offset + i); +} + +static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + + ifc_iowrite32(0, &cfg->guest_feature_select); + ifc_iowrite32((u32)features, &cfg->guest_feature); + + ifc_iowrite32(1, &cfg->guest_feature_select); + ifc_iowrite32(features >> 32, &cfg->guest_feature); +} + +static int ifcvf_config_features(struct ifcvf_hw *hw) +{ + struct ifcvf_adapter *ifcvf; + + ifcvf = vf_to_adapter(hw); + ifcvf_set_features(hw, hw->req_features); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK); + + if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) { + IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n"); + return -EIO; + } + + return 0; +} + +u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) +{ + struct ifcvf_lm_cfg __iomem *ifcvf_lm; + void __iomem *avail_idx_addr; + u16 last_avail_idx; + u32 q_pair_id; + + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; + last_avail_idx = ifc_ioread16(avail_idx_addr); + + return last_avail_idx; +} + +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num) +{ + struct ifcvf_lm_cfg __iomem *ifcvf_lm; + void __iomem *avail_idx_addr; + u32 q_pair_id; + + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; + hw->vring[qid].last_avail_idx = num; + ifc_iowrite16(num, avail_idx_addr); + + return 0; +} + +static int ifcvf_hw_enable(struct ifcvf_hw *hw) +{ + struct ifcvf_lm_cfg __iomem *ifcvf_lm; + struct virtio_pci_common_cfg __iomem *cfg; + struct ifcvf_adapter *ifcvf; + u32 i; + + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; + ifcvf = vf_to_adapter(hw); + cfg = hw->common_cfg; + ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); + + if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); + return -EINVAL; + } + + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].ready) + break; + + ifc_iowrite16(i, &cfg->queue_select); + ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + ifc_iowrite16(hw->vring[i].size, &cfg->queue_size); + ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); + + if (ifc_ioread16(&cfg->queue_msix_vector) == + VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(ifcvf->pdev, + "No msix vector for queue %u\n", i); + return -EINVAL; + } + + ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); + ifc_iowrite16(1, &cfg->queue_enable); + } + + return 0; +} + +static void ifcvf_hw_disable(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg; + u32 i; + + cfg = hw->common_cfg; + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); + + for (i = 0; i < hw->nr_vring; i++) { + ifc_iowrite16(i, &cfg->queue_select); + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); + } + + ifc_ioread16(&cfg->queue_msix_vector); +} + +int ifcvf_start_hw(struct ifcvf_hw *hw) +{ + ifcvf_reset(hw); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER); + + if (ifcvf_config_features(hw) < 0) + return -EINVAL; + + if (ifcvf_hw_enable(hw) < 0) + return -EINVAL; + + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK); + + return 0; +} + +void ifcvf_stop_hw(struct ifcvf_hw *hw) +{ + ifcvf_hw_disable(hw); + ifcvf_reset(hw); +} + +void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) +{ + ifc_iowrite16(qid, hw->vring[qid].notify_addr); +} diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_base.h b/drivers/virtio/vdpa/ifcvf/ifcvf_base.h new file mode 100644 index 000000000000..e80307092351 --- /dev/null +++ b/drivers/virtio/vdpa/ifcvf/ifcvf_base.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan + * + */ + +#ifndef _IFCVF_H_ +#define _IFCVF_H_ + +#include +#include +#include +#include +#include +#include + +#define IFCVF_VENDOR_ID 0x1AF4 +#define IFCVF_DEVICE_ID 0x1041 +#define IFCVF_SUBSYS_VENDOR_ID 0x8086 +#define IFCVF_SUBSYS_DEVICE_ID 0x001A + +#define IFCVF_SUPPORTED_FEATURES \ + ((1ULL << VIRTIO_NET_F_MAC) | \ + (1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ + (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ + (1ULL << VIRTIO_NET_F_MRG_RXBUF)) + +/* Only one queue pair for now. */ +#define IFCVF_MAX_QUEUE_PAIRS 1 + +#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE +#define IFCVF_QUEUE_MAX 32768 +#define IFCVF_MSI_CONFIG_OFF 0 +#define IFCVF_MSI_QUEUE_OFF 1 +#define IFCVF_PCI_MAX_RESOURCE 6 + +#define IFCVF_LM_CFG_SIZE 0x40 +#define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_LM_BAR 4 + +#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) +#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__) +#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__) + +#define ifcvf_private_to_vf(adapter) \ + (&((struct ifcvf_adapter *)adapter)->vf) + +#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1) + +struct vring_info { + u64 desc; + u64 avail; + u64 used; + u16 size; + u16 last_avail_idx; + bool ready; + void __iomem *notify_addr; + u32 irq; + struct vdpa_callback cb; + char msix_name[256]; +}; + +struct ifcvf_hw { + u8 __iomem *isr; + /* Live migration */ + u8 __iomem *lm_cfg; + u16 nr_vring; + /* Notification bar number */ + u8 notify_bar; + /* Notificaiton bar address */ + void __iomem *notify_base; + u32 notify_off_multiplier; + u64 req_features; + struct virtio_pci_common_cfg __iomem *common_cfg; + void __iomem *net_cfg; + struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; + void __iomem * const *base; +}; + +struct ifcvf_adapter { + struct vdpa_device vdpa; + struct pci_dev *pdev; + struct ifcvf_hw vf; +}; + +struct ifcvf_vring_lm_cfg { + u32 idx_addr[2]; + u8 reserved[IFCVF_LM_CFG_SIZE - 8]; +}; + +struct ifcvf_lm_cfg { + u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; + struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS]; +}; + +int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); +int ifcvf_start_hw(struct ifcvf_hw *hw); +void ifcvf_stop_hw(struct ifcvf_hw *hw); +void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); +void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, + void *dst, int length); +void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, + const void *src, int length); +u8 ifcvf_get_status(struct ifcvf_hw *hw); +void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); +void io_write64_twopart(u64 val, u32 *lo, u32 *hi); +void ifcvf_reset(struct ifcvf_hw *hw); +u64 ifcvf_get_features(struct ifcvf_hw *hw); +u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num); +struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); +#endif /* _IFCVF_H_ */ diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_main.c b/drivers/virtio/vdpa/ifcvf/ifcvf_main.c new file mode 100644 index 000000000000..8d54dc5b08d2 --- /dev/null +++ b/drivers/virtio/vdpa/ifcvf/ifcvf_main.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan + * + */ + +#include +#include +#include +#include +#include "ifcvf_base.h" + +#define VERSION_STRING "0.1" +#define DRIVER_AUTHOR "Intel Corporation" +#define IFCVF_DRIVER_NAME "ifcvf" + +static irqreturn_t ifcvf_intr_handler(int irq, void *arg) +{ + struct vring_info *vring = arg; + + if (vring->cb.callback) + return vring->cb.callback(vring->cb.private); + + return IRQ_HANDLED; +} + +static int ifcvf_start_datapath(void *private) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); + struct ifcvf_adapter *ifcvf; + u8 status; + int ret; + + ifcvf = vf_to_adapter(vf); + vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2; + ret = ifcvf_start_hw(vf); + if (ret < 0) { + status = ifcvf_get_status(vf); + status |= VIRTIO_CONFIG_S_FAILED; + ifcvf_set_status(vf, status); + } + + return ret; +} + +static int ifcvf_stop_datapath(void *private) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); + int i; + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + vf->vring[i].cb.callback = NULL; + + ifcvf_stop_hw(vf); + + return 0; +} + +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter); + int i; + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + vf->vring[i].last_avail_idx = 0; + vf->vring[i].desc = 0; + vf->vring[i].avail = 0; + vf->vring[i].used = 0; + vf->vring[i].ready = 0; + vf->vring[i].cb.callback = NULL; + vf->vring[i].cb.private = NULL; + } + + ifcvf_reset(vf); +} + +static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev) +{ + return container_of(vdpa_dev, struct ifcvf_adapter, vdpa); +} + +static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); + + return &adapter->vf; +} + +static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + u64 features; + + features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES; + + return features; +} + +static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->req_features = features; + + return 0; +} + +static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_get_status(vf); +} + +static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) +{ + struct ifcvf_adapter *adapter; + struct ifcvf_hw *vf; + + vf = vdpa_to_vf(vdpa_dev); + adapter = dev_get_drvdata(vdpa_dev->dev.parent); + + if (status == 0) { + ifcvf_stop_datapath(adapter); + ifcvf_reset_vring(adapter); + return; + } + + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (ifcvf_start_datapath(adapter) < 0) + IFCVF_ERR(adapter->pdev, + "Failed to set ifcvf vdpa status %u\n", + status); + } + + ifcvf_set_status(vf, status); +} + +static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) +{ + return IFCVF_QUEUE_MAX; +} + +static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_get_vq_state(vf, qid); +} + +static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + u64 num) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_set_vq_state(vf, qid, num); +} + +static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_callback *cb) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].cb = *cb; +} + +static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, + u16 qid, bool ready) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].ready = ready; +} + +static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->vring[qid].ready; +} + +static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, + u32 num) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].size = num; +} + +static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].desc = desc_area; + vf->vring[qid].avail = driver_area; + vf->vring[qid].used = device_area; + + return 0; +} + +static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + ifcvf_notify_queue(vf, qid); +} + +static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ioread8(&vf->common_cfg->config_generation); +} + +static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) +{ + return VIRTIO_ID_NET; +} + +static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) +{ + return IFCVF_SUBSYS_VENDOR_ID; +} + +static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) +{ + return IFCVF_QUEUE_ALIGNMENT; +} + +static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, + unsigned int offset, + void *buf, unsigned int len) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + WARN_ON(offset + len > sizeof(struct virtio_net_config)); + ifcvf_read_net_config(vf, offset, buf, len); +} + +static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + WARN_ON(offset + len > sizeof(struct virtio_net_config)); + ifcvf_write_net_config(vf, offset, buf, len); +} + +static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, + struct vdpa_callback *cb) +{ + /* We don't support config interrupt */ +} + +/* + * IFCVF currently does't have on-chip IOMMU, so not + * implemented set_map()/dma_map()/dma_unmap() + */ +static const struct vdpa_config_ops ifc_vdpa_ops = { + .get_features = ifcvf_vdpa_get_features, + .set_features = ifcvf_vdpa_set_features, + .get_status = ifcvf_vdpa_get_status, + .set_status = ifcvf_vdpa_set_status, + .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, + .get_vq_state = ifcvf_vdpa_get_vq_state, + .set_vq_state = ifcvf_vdpa_set_vq_state, + .set_vq_cb = ifcvf_vdpa_set_vq_cb, + .set_vq_ready = ifcvf_vdpa_set_vq_ready, + .get_vq_ready = ifcvf_vdpa_get_vq_ready, + .set_vq_num = ifcvf_vdpa_set_vq_num, + .set_vq_address = ifcvf_vdpa_set_vq_address, + .kick_vq = ifcvf_vdpa_kick_vq, + .get_generation = ifcvf_vdpa_get_generation, + .get_device_id = ifcvf_vdpa_get_device_id, + .get_vendor_id = ifcvf_vdpa_get_vendor_id, + .get_vq_align = ifcvf_vdpa_get_vq_align, + .get_config = ifcvf_vdpa_get_config, + .set_config = ifcvf_vdpa_set_config, + .set_config_cb = ifcvf_vdpa_set_config_cb, +}; + +static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int vector, i, ret, irq; + + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", + pci_name(pdev), i); + vector = i + IFCVF_MSI_QUEUE_OFF; + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_intr_handler, 0, + vf->vring[i].msix_name, + &vf->vring[i]); + if (ret) { + IFCVF_ERR(pdev, + "Failed to request irq for vq %d\n", i); + return ret; + } + vf->vring[i].irq = irq; + } + + return 0; +} + +static void ifcvf_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct ifcvf_adapter *adapter; + struct ifcvf_hw *vf; + int ret; + + ret = pcim_enable_device(pdev); + if (ret) { + IFCVF_ERR(pdev, "Failed to enable device\n"); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), + IFCVF_DRIVER_NAME); + if (ret) { + IFCVF_ERR(pdev, "Failed to request MMIO region\n"); + return ret; + } + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + IFCVF_ERR(pdev, "No usable DMA confiugration\n"); + return ret; + } + + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + IFCVF_ERR(pdev, + "No usable coherent DMA confiugration\n"); + return ret; + } + + ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, + IFCVF_MAX_INTR, PCI_IRQ_MSIX); + if (ret < 0) { + IFCVF_ERR(pdev, "Failed to alloc irq vectors\n"); + return ret; + } + + ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); + if (ret) { + IFCVF_ERR(pdev, + "Failed for adding devres for freeing irq vectors\n"); + return ret; + } + + adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, + dev, &ifc_vdpa_ops); + if (adapter == NULL) { + IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); + return -ENOMEM; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, adapter); + + vf = &adapter->vf; + vf->base = pcim_iomap_table(pdev); + + adapter->pdev = pdev; + adapter->vdpa.dma_dev = &pdev->dev; + + ret = ifcvf_request_irq(adapter); + if (ret) { + IFCVF_ERR(pdev, "Failed to request MSI-X irq\n"); + goto err; + } + + ret = ifcvf_init_hw(vf, pdev); + if (ret) { + IFCVF_ERR(pdev, "Failed to init IFCVF hw\n"); + goto err; + } + + ret = vdpa_register_device(&adapter->vdpa); + if (ret) { + IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); + goto err; + } + + return 0; + +err: + put_device(&adapter->vdpa.dev); + return ret; +} + +static void ifcvf_remove(struct pci_dev *pdev) +{ + struct ifcvf_adapter *adapter = pci_get_drvdata(pdev); + + vdpa_unregister_device(&adapter->vdpa); +} + +static struct pci_device_id ifcvf_pci_ids[] = { + { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, + IFCVF_DEVICE_ID, + IFCVF_SUBSYS_VENDOR_ID, + IFCVF_SUBSYS_DEVICE_ID) }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids); + +static struct pci_driver ifcvf_driver = { + .name = IFCVF_DRIVER_NAME, + .id_table = ifcvf_pci_ids, + .probe = ifcvf_probe, + .remove = ifcvf_remove, +}; + +module_pci_driver(ifcvf_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(VERSION_STRING); -- cgit From c9b9f5f8c0f3cdb893cb86c168cdaa3aa5ed7278 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 31 Mar 2020 15:15:14 -0400 Subject: vdpa: move to drivers/vdpa We have both vhost and virtio drivers that depend on vdpa. It's easier to locate it at a top level directory otherwise we run into issues e.g. if vhost is built-in but virtio is modular. Let's just move it up a level. Reported-by: Randy Dunlap Signed-off-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 2 - drivers/virtio/Makefile | 1 - drivers/virtio/vdpa/Kconfig | 37 -- drivers/virtio/vdpa/Makefile | 4 - drivers/virtio/vdpa/ifcvf/Makefile | 3 - drivers/virtio/vdpa/ifcvf/ifcvf_base.c | 389 -------------------- drivers/virtio/vdpa/ifcvf/ifcvf_base.h | 118 ------ drivers/virtio/vdpa/ifcvf/ifcvf_main.c | 435 ---------------------- drivers/virtio/vdpa/vdpa.c | 180 --------- drivers/virtio/vdpa/vdpa_sim/Makefile | 2 - drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c | 629 -------------------------------- 11 files changed, 1800 deletions(-) delete mode 100644 drivers/virtio/vdpa/Kconfig delete mode 100644 drivers/virtio/vdpa/Makefile delete mode 100644 drivers/virtio/vdpa/ifcvf/Makefile delete mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_base.c delete mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_base.h delete mode 100644 drivers/virtio/vdpa/ifcvf/ifcvf_main.c delete mode 100644 drivers/virtio/vdpa/vdpa.c delete mode 100644 drivers/virtio/vdpa/vdpa_sim/Makefile delete mode 100644 drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c (limited to 'drivers/virtio') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 99e424570644..2aadf398d8cc 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -109,5 +109,3 @@ config VIRTIO_MMIO_CMDLINE_DEVICES If unsure, say 'N'. endif # VIRTIO_MENU - -source "drivers/virtio/vdpa/Kconfig" diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 3407ac03fe60..29a1386ecc03 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -7,4 +7,3 @@ virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o -obj-$(CONFIG_VDPA) += vdpa/ diff --git a/drivers/virtio/vdpa/Kconfig b/drivers/virtio/vdpa/Kconfig deleted file mode 100644 index 7db1460104b7..000000000000 --- a/drivers/virtio/vdpa/Kconfig +++ /dev/null @@ -1,37 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config VDPA - tristate - help - Enable this module to support vDPA device that uses a - datapath which complies with virtio specifications with - vendor specific control path. - -menuconfig VDPA_MENU - bool "VDPA drivers" - default n - -if VDPA_MENU - -config VDPA_SIM - tristate "vDPA device simulator" - depends on RUNTIME_TESTING_MENU - select VDPA - select VHOST_RING - default n - help - vDPA networking device simulator which loop TX traffic back - to RX. This device is used for testing, prototyping and - development of vDPA. - -config IFCVF - tristate "Intel IFC VF VDPA driver" - depends on PCI_MSI - select VDPA - default n - help - This kernel module can drive Intel IFC VF NIC to offload - virtio dataplane traffic to hardware. - To compile this driver as a module, choose M here: the module will - be called ifcvf. - -endif # VDPA_MENU diff --git a/drivers/virtio/vdpa/Makefile b/drivers/virtio/vdpa/Makefile deleted file mode 100644 index 8bbb686ca7a2..000000000000 --- a/drivers/virtio/vdpa/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VDPA) += vdpa.o -obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ -obj-$(CONFIG_IFCVF) += ifcvf/ diff --git a/drivers/virtio/vdpa/ifcvf/Makefile b/drivers/virtio/vdpa/ifcvf/Makefile deleted file mode 100644 index d709915995ab..000000000000 --- a/drivers/virtio/vdpa/ifcvf/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_IFCVF) += ifcvf.o -ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_base.c b/drivers/virtio/vdpa/ifcvf/ifcvf_base.c deleted file mode 100644 index b61b06ea26d3..000000000000 --- a/drivers/virtio/vdpa/ifcvf/ifcvf_base.c +++ /dev/null @@ -1,389 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Intel IFC VF NIC driver for virtio dataplane offloading - * - * Copyright (C) 2020 Intel Corporation. - * - * Author: Zhu Lingshan - * - */ - -#include "ifcvf_base.h" - -static inline u8 ifc_ioread8(u8 __iomem *addr) -{ - return ioread8(addr); -} -static inline u16 ifc_ioread16 (__le16 __iomem *addr) -{ - return ioread16(addr); -} - -static inline u32 ifc_ioread32(__le32 __iomem *addr) -{ - return ioread32(addr); -} - -static inline void ifc_iowrite8(u8 value, u8 __iomem *addr) -{ - iowrite8(value, addr); -} - -static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr) -{ - iowrite16(value, addr); -} - -static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr) -{ - iowrite32(value, addr); -} - -static void ifc_iowrite64_twopart(u64 val, - __le32 __iomem *lo, __le32 __iomem *hi) -{ - ifc_iowrite32((u32)val, lo); - ifc_iowrite32(val >> 32, hi); -} - -struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) -{ - return container_of(hw, struct ifcvf_adapter, vf); -} - -static void __iomem *get_cap_addr(struct ifcvf_hw *hw, - struct virtio_pci_cap *cap) -{ - struct ifcvf_adapter *ifcvf; - struct pci_dev *pdev; - u32 length, offset; - u8 bar; - - length = le32_to_cpu(cap->length); - offset = le32_to_cpu(cap->offset); - bar = cap->bar; - - ifcvf= vf_to_adapter(hw); - pdev = ifcvf->pdev; - - if (bar >= IFCVF_PCI_MAX_RESOURCE) { - IFCVF_DBG(pdev, - "Invalid bar number %u to get capabilities\n", bar); - return NULL; - } - - if (offset + length > pci_resource_len(pdev, bar)) { - IFCVF_DBG(pdev, - "offset(%u) + len(%u) overflows bar%u's capability\n", - offset, length, bar); - return NULL; - } - - return hw->base[bar] + offset; -} - -static int ifcvf_read_config_range(struct pci_dev *dev, - uint32_t *val, int size, int where) -{ - int ret, i; - - for (i = 0; i < size; i += 4) { - ret = pci_read_config_dword(dev, where + i, val + i / 4); - if (ret < 0) - return ret; - } - - return 0; -} - -int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) -{ - struct virtio_pci_cap cap; - u16 notify_off; - int ret; - u8 pos; - u32 i; - - ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); - if (ret < 0) { - IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); - return -EIO; - } - - while (pos) { - ret = ifcvf_read_config_range(pdev, (u32 *)&cap, - sizeof(cap), pos); - if (ret < 0) { - IFCVF_ERR(pdev, - "Failed to get PCI capability at %x\n", pos); - break; - } - - if (cap.cap_vndr != PCI_CAP_ID_VNDR) - goto next; - - switch (cap.cfg_type) { - case VIRTIO_PCI_CAP_COMMON_CFG: - hw->common_cfg = get_cap_addr(hw, &cap); - IFCVF_DBG(pdev, "hw->common_cfg = %p\n", - hw->common_cfg); - break; - case VIRTIO_PCI_CAP_NOTIFY_CFG: - pci_read_config_dword(pdev, pos + sizeof(cap), - &hw->notify_off_multiplier); - hw->notify_bar = cap.bar; - hw->notify_base = get_cap_addr(hw, &cap); - IFCVF_DBG(pdev, "hw->notify_base = %p\n", - hw->notify_base); - break; - case VIRTIO_PCI_CAP_ISR_CFG: - hw->isr = get_cap_addr(hw, &cap); - IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr); - break; - case VIRTIO_PCI_CAP_DEVICE_CFG: - hw->net_cfg = get_cap_addr(hw, &cap); - IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg); - break; - } - -next: - pos = cap.cap_next; - } - - if (hw->common_cfg == NULL || hw->notify_base == NULL || - hw->isr == NULL || hw->net_cfg == NULL) { - IFCVF_ERR(pdev, "Incomplete PCI capabilities\n"); - return -EIO; - } - - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { - ifc_iowrite16(i, &hw->common_cfg->queue_select); - notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); - hw->vring[i].notify_addr = hw->notify_base + - notify_off * hw->notify_off_multiplier; - } - - hw->lm_cfg = hw->base[IFCVF_LM_BAR]; - - IFCVF_DBG(pdev, - "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n", - hw->common_cfg, hw->notify_base, hw->isr, - hw->net_cfg, hw->notify_off_multiplier); - - return 0; -} - -u8 ifcvf_get_status(struct ifcvf_hw *hw) -{ - return ifc_ioread8(&hw->common_cfg->device_status); -} - -void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) -{ - ifc_iowrite8(status, &hw->common_cfg->device_status); -} - -void ifcvf_reset(struct ifcvf_hw *hw) -{ - ifcvf_set_status(hw, 0); - /* flush set_status, make sure VF is stopped, reset */ - ifcvf_get_status(hw); -} - -static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) -{ - if (status != 0) - status |= ifcvf_get_status(hw); - - ifcvf_set_status(hw, status); - ifcvf_get_status(hw); -} - -u64 ifcvf_get_features(struct ifcvf_hw *hw) -{ - struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; - u32 features_lo, features_hi; - - ifc_iowrite32(0, &cfg->device_feature_select); - features_lo = ifc_ioread32(&cfg->device_feature); - - ifc_iowrite32(1, &cfg->device_feature_select); - features_hi = ifc_ioread32(&cfg->device_feature); - - return ((u64)features_hi << 32) | features_lo; -} - -void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, - void *dst, int length) -{ - u8 old_gen, new_gen, *p; - int i; - - WARN_ON(offset + length > sizeof(struct virtio_net_config)); - do { - old_gen = ifc_ioread8(&hw->common_cfg->config_generation); - p = dst; - for (i = 0; i < length; i++) - *p++ = ifc_ioread8(hw->net_cfg + offset + i); - - new_gen = ifc_ioread8(&hw->common_cfg->config_generation); - } while (old_gen != new_gen); -} - -void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, - const void *src, int length) -{ - const u8 *p; - int i; - - p = src; - WARN_ON(offset + length > sizeof(struct virtio_net_config)); - for (i = 0; i < length; i++) - ifc_iowrite8(*p++, hw->net_cfg + offset + i); -} - -static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) -{ - struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; - - ifc_iowrite32(0, &cfg->guest_feature_select); - ifc_iowrite32((u32)features, &cfg->guest_feature); - - ifc_iowrite32(1, &cfg->guest_feature_select); - ifc_iowrite32(features >> 32, &cfg->guest_feature); -} - -static int ifcvf_config_features(struct ifcvf_hw *hw) -{ - struct ifcvf_adapter *ifcvf; - - ifcvf = vf_to_adapter(hw); - ifcvf_set_features(hw, hw->req_features); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK); - - if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) { - IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n"); - return -EIO; - } - - return 0; -} - -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) -{ - struct ifcvf_lm_cfg __iomem *ifcvf_lm; - void __iomem *avail_idx_addr; - u16 last_avail_idx; - u32 q_pair_id; - - ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); - avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; - last_avail_idx = ifc_ioread16(avail_idx_addr); - - return last_avail_idx; -} - -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num) -{ - struct ifcvf_lm_cfg __iomem *ifcvf_lm; - void __iomem *avail_idx_addr; - u32 q_pair_id; - - ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); - avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; - hw->vring[qid].last_avail_idx = num; - ifc_iowrite16(num, avail_idx_addr); - - return 0; -} - -static int ifcvf_hw_enable(struct ifcvf_hw *hw) -{ - struct ifcvf_lm_cfg __iomem *ifcvf_lm; - struct virtio_pci_common_cfg __iomem *cfg; - struct ifcvf_adapter *ifcvf; - u32 i; - - ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - ifcvf = vf_to_adapter(hw); - cfg = hw->common_cfg; - ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); - - if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { - IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); - return -EINVAL; - } - - for (i = 0; i < hw->nr_vring; i++) { - if (!hw->vring[i].ready) - break; - - ifc_iowrite16(i, &cfg->queue_select); - ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, - &cfg->queue_desc_hi); - ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, - &cfg->queue_avail_hi); - ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, - &cfg->queue_used_hi); - ifc_iowrite16(hw->vring[i].size, &cfg->queue_size); - ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); - - if (ifc_ioread16(&cfg->queue_msix_vector) == - VIRTIO_MSI_NO_VECTOR) { - IFCVF_ERR(ifcvf->pdev, - "No msix vector for queue %u\n", i); - return -EINVAL; - } - - ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); - ifc_iowrite16(1, &cfg->queue_enable); - } - - return 0; -} - -static void ifcvf_hw_disable(struct ifcvf_hw *hw) -{ - struct virtio_pci_common_cfg __iomem *cfg; - u32 i; - - cfg = hw->common_cfg; - ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); - - for (i = 0; i < hw->nr_vring; i++) { - ifc_iowrite16(i, &cfg->queue_select); - ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); - } - - ifc_ioread16(&cfg->queue_msix_vector); -} - -int ifcvf_start_hw(struct ifcvf_hw *hw) -{ - ifcvf_reset(hw); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE); - ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER); - - if (ifcvf_config_features(hw) < 0) - return -EINVAL; - - if (ifcvf_hw_enable(hw) < 0) - return -EINVAL; - - ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK); - - return 0; -} - -void ifcvf_stop_hw(struct ifcvf_hw *hw) -{ - ifcvf_hw_disable(hw); - ifcvf_reset(hw); -} - -void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) -{ - ifc_iowrite16(qid, hw->vring[qid].notify_addr); -} diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_base.h b/drivers/virtio/vdpa/ifcvf/ifcvf_base.h deleted file mode 100644 index e80307092351..000000000000 --- a/drivers/virtio/vdpa/ifcvf/ifcvf_base.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel IFC VF NIC driver for virtio dataplane offloading - * - * Copyright (C) 2020 Intel Corporation. - * - * Author: Zhu Lingshan - * - */ - -#ifndef _IFCVF_H_ -#define _IFCVF_H_ - -#include -#include -#include -#include -#include -#include - -#define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_DEVICE_ID 0x1041 -#define IFCVF_SUBSYS_VENDOR_ID 0x8086 -#define IFCVF_SUBSYS_DEVICE_ID 0x001A - -#define IFCVF_SUPPORTED_FEATURES \ - ((1ULL << VIRTIO_NET_F_MAC) | \ - (1ULL << VIRTIO_F_ANY_LAYOUT) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ - (1ULL << VIRTIO_NET_F_MRG_RXBUF)) - -/* Only one queue pair for now. */ -#define IFCVF_MAX_QUEUE_PAIRS 1 - -#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE -#define IFCVF_QUEUE_MAX 32768 -#define IFCVF_MSI_CONFIG_OFF 0 -#define IFCVF_MSI_QUEUE_OFF 1 -#define IFCVF_PCI_MAX_RESOURCE 6 - -#define IFCVF_LM_CFG_SIZE 0x40 -#define IFCVF_LM_RING_STATE_OFFSET 0x20 -#define IFCVF_LM_BAR 4 - -#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) -#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__) -#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__) - -#define ifcvf_private_to_vf(adapter) \ - (&((struct ifcvf_adapter *)adapter)->vf) - -#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1) - -struct vring_info { - u64 desc; - u64 avail; - u64 used; - u16 size; - u16 last_avail_idx; - bool ready; - void __iomem *notify_addr; - u32 irq; - struct vdpa_callback cb; - char msix_name[256]; -}; - -struct ifcvf_hw { - u8 __iomem *isr; - /* Live migration */ - u8 __iomem *lm_cfg; - u16 nr_vring; - /* Notification bar number */ - u8 notify_bar; - /* Notificaiton bar address */ - void __iomem *notify_base; - u32 notify_off_multiplier; - u64 req_features; - struct virtio_pci_common_cfg __iomem *common_cfg; - void __iomem *net_cfg; - struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; - void __iomem * const *base; -}; - -struct ifcvf_adapter { - struct vdpa_device vdpa; - struct pci_dev *pdev; - struct ifcvf_hw vf; -}; - -struct ifcvf_vring_lm_cfg { - u32 idx_addr[2]; - u8 reserved[IFCVF_LM_CFG_SIZE - 8]; -}; - -struct ifcvf_lm_cfg { - u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; - struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS]; -}; - -int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); -int ifcvf_start_hw(struct ifcvf_hw *hw); -void ifcvf_stop_hw(struct ifcvf_hw *hw); -void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); -void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, - void *dst, int length); -void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, - const void *src, int length); -u8 ifcvf_get_status(struct ifcvf_hw *hw); -void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); -void io_write64_twopart(u64 val, u32 *lo, u32 *hi); -void ifcvf_reset(struct ifcvf_hw *hw); -u64 ifcvf_get_features(struct ifcvf_hw *hw); -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num); -struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); -#endif /* _IFCVF_H_ */ diff --git a/drivers/virtio/vdpa/ifcvf/ifcvf_main.c b/drivers/virtio/vdpa/ifcvf/ifcvf_main.c deleted file mode 100644 index 8d54dc5b08d2..000000000000 --- a/drivers/virtio/vdpa/ifcvf/ifcvf_main.c +++ /dev/null @@ -1,435 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Intel IFC VF NIC driver for virtio dataplane offloading - * - * Copyright (C) 2020 Intel Corporation. - * - * Author: Zhu Lingshan - * - */ - -#include -#include -#include -#include -#include "ifcvf_base.h" - -#define VERSION_STRING "0.1" -#define DRIVER_AUTHOR "Intel Corporation" -#define IFCVF_DRIVER_NAME "ifcvf" - -static irqreturn_t ifcvf_intr_handler(int irq, void *arg) -{ - struct vring_info *vring = arg; - - if (vring->cb.callback) - return vring->cb.callback(vring->cb.private); - - return IRQ_HANDLED; -} - -static int ifcvf_start_datapath(void *private) -{ - struct ifcvf_hw *vf = ifcvf_private_to_vf(private); - struct ifcvf_adapter *ifcvf; - u8 status; - int ret; - - ifcvf = vf_to_adapter(vf); - vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2; - ret = ifcvf_start_hw(vf); - if (ret < 0) { - status = ifcvf_get_status(vf); - status |= VIRTIO_CONFIG_S_FAILED; - ifcvf_set_status(vf, status); - } - - return ret; -} - -static int ifcvf_stop_datapath(void *private) -{ - struct ifcvf_hw *vf = ifcvf_private_to_vf(private); - int i; - - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) - vf->vring[i].cb.callback = NULL; - - ifcvf_stop_hw(vf); - - return 0; -} - -static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) -{ - struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter); - int i; - - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { - vf->vring[i].last_avail_idx = 0; - vf->vring[i].desc = 0; - vf->vring[i].avail = 0; - vf->vring[i].used = 0; - vf->vring[i].ready = 0; - vf->vring[i].cb.callback = NULL; - vf->vring[i].cb.private = NULL; - } - - ifcvf_reset(vf); -} - -static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev) -{ - return container_of(vdpa_dev, struct ifcvf_adapter, vdpa); -} - -static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) -{ - struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); - - return &adapter->vf; -} - -static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - u64 features; - - features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES; - - return features; -} - -static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - vf->req_features = features; - - return 0; -} - -static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - return ifcvf_get_status(vf); -} - -static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) -{ - struct ifcvf_adapter *adapter; - struct ifcvf_hw *vf; - - vf = vdpa_to_vf(vdpa_dev); - adapter = dev_get_drvdata(vdpa_dev->dev.parent); - - if (status == 0) { - ifcvf_stop_datapath(adapter); - ifcvf_reset_vring(adapter); - return; - } - - if (status & VIRTIO_CONFIG_S_DRIVER_OK) { - if (ifcvf_start_datapath(adapter) < 0) - IFCVF_ERR(adapter->pdev, - "Failed to set ifcvf vdpa status %u\n", - status); - } - - ifcvf_set_status(vf, status); -} - -static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) -{ - return IFCVF_QUEUE_MAX; -} - -static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - return ifcvf_get_vq_state(vf, qid); -} - -static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, - u64 num) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - return ifcvf_set_vq_state(vf, qid, num); -} - -static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, - struct vdpa_callback *cb) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - vf->vring[qid].cb = *cb; -} - -static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, - u16 qid, bool ready) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - vf->vring[qid].ready = ready; -} - -static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - return vf->vring[qid].ready; -} - -static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, - u32 num) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - vf->vring[qid].size = num; -} - -static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, - u64 desc_area, u64 driver_area, - u64 device_area) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - vf->vring[qid].desc = desc_area; - vf->vring[qid].avail = driver_area; - vf->vring[qid].used = device_area; - - return 0; -} - -static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - ifcvf_notify_queue(vf, qid); -} - -static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - return ioread8(&vf->common_cfg->config_generation); -} - -static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) -{ - return VIRTIO_ID_NET; -} - -static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) -{ - return IFCVF_SUBSYS_VENDOR_ID; -} - -static u16 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) -{ - return IFCVF_QUEUE_ALIGNMENT; -} - -static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, - unsigned int offset, - void *buf, unsigned int len) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - WARN_ON(offset + len > sizeof(struct virtio_net_config)); - ifcvf_read_net_config(vf, offset, buf, len); -} - -static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, - unsigned int offset, const void *buf, - unsigned int len) -{ - struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - - WARN_ON(offset + len > sizeof(struct virtio_net_config)); - ifcvf_write_net_config(vf, offset, buf, len); -} - -static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, - struct vdpa_callback *cb) -{ - /* We don't support config interrupt */ -} - -/* - * IFCVF currently does't have on-chip IOMMU, so not - * implemented set_map()/dma_map()/dma_unmap() - */ -static const struct vdpa_config_ops ifc_vdpa_ops = { - .get_features = ifcvf_vdpa_get_features, - .set_features = ifcvf_vdpa_set_features, - .get_status = ifcvf_vdpa_get_status, - .set_status = ifcvf_vdpa_set_status, - .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, - .get_vq_state = ifcvf_vdpa_get_vq_state, - .set_vq_state = ifcvf_vdpa_set_vq_state, - .set_vq_cb = ifcvf_vdpa_set_vq_cb, - .set_vq_ready = ifcvf_vdpa_set_vq_ready, - .get_vq_ready = ifcvf_vdpa_get_vq_ready, - .set_vq_num = ifcvf_vdpa_set_vq_num, - .set_vq_address = ifcvf_vdpa_set_vq_address, - .kick_vq = ifcvf_vdpa_kick_vq, - .get_generation = ifcvf_vdpa_get_generation, - .get_device_id = ifcvf_vdpa_get_device_id, - .get_vendor_id = ifcvf_vdpa_get_vendor_id, - .get_vq_align = ifcvf_vdpa_get_vq_align, - .get_config = ifcvf_vdpa_get_config, - .set_config = ifcvf_vdpa_set_config, - .set_config_cb = ifcvf_vdpa_set_config_cb, -}; - -static int ifcvf_request_irq(struct ifcvf_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - struct ifcvf_hw *vf = &adapter->vf; - int vector, i, ret, irq; - - - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { - snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", - pci_name(pdev), i); - vector = i + IFCVF_MSI_QUEUE_OFF; - irq = pci_irq_vector(pdev, vector); - ret = devm_request_irq(&pdev->dev, irq, - ifcvf_intr_handler, 0, - vf->vring[i].msix_name, - &vf->vring[i]); - if (ret) { - IFCVF_ERR(pdev, - "Failed to request irq for vq %d\n", i); - return ret; - } - vf->vring[i].irq = irq; - } - - return 0; -} - -static void ifcvf_free_irq_vectors(void *data) -{ - pci_free_irq_vectors(data); -} - -static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) -{ - struct device *dev = &pdev->dev; - struct ifcvf_adapter *adapter; - struct ifcvf_hw *vf; - int ret; - - ret = pcim_enable_device(pdev); - if (ret) { - IFCVF_ERR(pdev, "Failed to enable device\n"); - return ret; - } - - ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), - IFCVF_DRIVER_NAME); - if (ret) { - IFCVF_ERR(pdev, "Failed to request MMIO region\n"); - return ret; - } - - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, "No usable DMA confiugration\n"); - return ret; - } - - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, - "No usable coherent DMA confiugration\n"); - return ret; - } - - ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, - IFCVF_MAX_INTR, PCI_IRQ_MSIX); - if (ret < 0) { - IFCVF_ERR(pdev, "Failed to alloc irq vectors\n"); - return ret; - } - - ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); - if (ret) { - IFCVF_ERR(pdev, - "Failed for adding devres for freeing irq vectors\n"); - return ret; - } - - adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops); - if (adapter == NULL) { - IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); - return -ENOMEM; - } - - pci_set_master(pdev); - pci_set_drvdata(pdev, adapter); - - vf = &adapter->vf; - vf->base = pcim_iomap_table(pdev); - - adapter->pdev = pdev; - adapter->vdpa.dma_dev = &pdev->dev; - - ret = ifcvf_request_irq(adapter); - if (ret) { - IFCVF_ERR(pdev, "Failed to request MSI-X irq\n"); - goto err; - } - - ret = ifcvf_init_hw(vf, pdev); - if (ret) { - IFCVF_ERR(pdev, "Failed to init IFCVF hw\n"); - goto err; - } - - ret = vdpa_register_device(&adapter->vdpa); - if (ret) { - IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); - goto err; - } - - return 0; - -err: - put_device(&adapter->vdpa.dev); - return ret; -} - -static void ifcvf_remove(struct pci_dev *pdev) -{ - struct ifcvf_adapter *adapter = pci_get_drvdata(pdev); - - vdpa_unregister_device(&adapter->vdpa); -} - -static struct pci_device_id ifcvf_pci_ids[] = { - { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, - IFCVF_DEVICE_ID, - IFCVF_SUBSYS_VENDOR_ID, - IFCVF_SUBSYS_DEVICE_ID) }, - { 0 }, -}; -MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids); - -static struct pci_driver ifcvf_driver = { - .name = IFCVF_DRIVER_NAME, - .id_table = ifcvf_pci_ids, - .probe = ifcvf_probe, - .remove = ifcvf_remove, -}; - -module_pci_driver(ifcvf_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_VERSION(VERSION_STRING); diff --git a/drivers/virtio/vdpa/vdpa.c b/drivers/virtio/vdpa/vdpa.c deleted file mode 100644 index e9ed6a2b635b..000000000000 --- a/drivers/virtio/vdpa/vdpa.c +++ /dev/null @@ -1,180 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vDPA bus. - * - * Copyright (c) 2020, Red Hat. All rights reserved. - * Author: Jason Wang - * - */ - -#include -#include -#include -#include - -static DEFINE_IDA(vdpa_index_ida); - -static int vdpa_dev_probe(struct device *d) -{ - struct vdpa_device *vdev = dev_to_vdpa(d); - struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); - int ret = 0; - - if (drv && drv->probe) - ret = drv->probe(vdev); - - return ret; -} - -static int vdpa_dev_remove(struct device *d) -{ - struct vdpa_device *vdev = dev_to_vdpa(d); - struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); - - if (drv && drv->remove) - drv->remove(vdev); - - return 0; -} - -static struct bus_type vdpa_bus = { - .name = "vdpa", - .probe = vdpa_dev_probe, - .remove = vdpa_dev_remove, -}; - -static void vdpa_release_dev(struct device *d) -{ - struct vdpa_device *vdev = dev_to_vdpa(d); - const struct vdpa_config_ops *ops = vdev->config; - - if (ops->free) - ops->free(vdev); - - ida_simple_remove(&vdpa_index_ida, vdev->index); - kfree(vdev); -} - -/** - * __vdpa_alloc_device - allocate and initilaize a vDPA device - * This allows driver to some prepartion after device is - * initialized but before registered. - * @parent: the parent device - * @config: the bus operations that is supported by this device - * @size: size of the parent structure that contains private data - * - * Drvier should use vdap_alloc_device() wrapper macro instead of - * using this directly. - * - * Returns an error when parent/config/dma_dev is not set or fail to get - * ida. - */ -struct vdpa_device *__vdpa_alloc_device(struct device *parent, - const struct vdpa_config_ops *config, - size_t size) -{ - struct vdpa_device *vdev; - int err = -EINVAL; - - if (!config) - goto err; - - if (!!config->dma_map != !!config->dma_unmap) - goto err; - - err = -ENOMEM; - vdev = kzalloc(size, GFP_KERNEL); - if (!vdev) - goto err; - - err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); - if (err < 0) - goto err_ida; - - vdev->dev.bus = &vdpa_bus; - vdev->dev.parent = parent; - vdev->dev.release = vdpa_release_dev; - vdev->index = err; - vdev->config = config; - - err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); - if (err) - goto err_name; - - device_initialize(&vdev->dev); - - return vdev; - -err_name: - ida_simple_remove(&vdpa_index_ida, vdev->index); -err_ida: - kfree(vdev); -err: - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(__vdpa_alloc_device); - -/** - * vdpa_register_device - register a vDPA device - * Callers must have a succeed call of vdpa_init_device() before. - * @vdev: the vdpa device to be registered to vDPA bus - * - * Returns an error when fail to add to vDPA bus - */ -int vdpa_register_device(struct vdpa_device *vdev) -{ - return device_add(&vdev->dev); -} -EXPORT_SYMBOL_GPL(vdpa_register_device); - -/** - * vdpa_unregister_device - unregister a vDPA device - * @vdev: the vdpa device to be unregisted from vDPA bus - */ -void vdpa_unregister_device(struct vdpa_device *vdev) -{ - device_unregister(&vdev->dev); -} -EXPORT_SYMBOL_GPL(vdpa_unregister_device); - -/** - * __vdpa_register_driver - register a vDPA device driver - * @drv: the vdpa device driver to be registered - * @owner: module owner of the driver - * - * Returns an err when fail to do the registration - */ -int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) -{ - drv->driver.bus = &vdpa_bus; - drv->driver.owner = owner; - - return driver_register(&drv->driver); -} -EXPORT_SYMBOL_GPL(__vdpa_register_driver); - -/** - * vdpa_unregister_driver - unregister a vDPA device driver - * @drv: the vdpa device driver to be unregistered - */ -void vdpa_unregister_driver(struct vdpa_driver *drv) -{ - driver_unregister(&drv->driver); -} -EXPORT_SYMBOL_GPL(vdpa_unregister_driver); - -static int vdpa_init(void) -{ - return bus_register(&vdpa_bus); -} - -static void __exit vdpa_exit(void) -{ - bus_unregister(&vdpa_bus); - ida_destroy(&vdpa_index_ida); -} -core_initcall(vdpa_init); -module_exit(vdpa_exit); - -MODULE_AUTHOR("Jason Wang "); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/virtio/vdpa/vdpa_sim/Makefile b/drivers/virtio/vdpa/vdpa_sim/Makefile deleted file mode 100644 index b40278f65e04..000000000000 --- a/drivers/virtio/vdpa/vdpa_sim/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o diff --git a/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c b/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c deleted file mode 100644 index 6e8a0cf2fdeb..000000000000 --- a/drivers/virtio/vdpa/vdpa_sim/vdpa_sim.c +++ /dev/null @@ -1,629 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * VDPA networking device simulator. - * - * Copyright (c) 2020, Red Hat Inc. All rights reserved. - * Author: Jason Wang - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRV_VERSION "0.1" -#define DRV_AUTHOR "Jason Wang " -#define DRV_DESC "vDPA Device Simulator" -#define DRV_LICENSE "GPL v2" - -struct vdpasim_virtqueue { - struct vringh vring; - struct vringh_kiov iov; - unsigned short head; - bool ready; - u64 desc_addr; - u64 device_addr; - u64 driver_addr; - u32 num; - void *private; - irqreturn_t (*cb)(void *data); -}; - -#define VDPASIM_QUEUE_ALIGN PAGE_SIZE -#define VDPASIM_QUEUE_MAX 256 -#define VDPASIM_DEVICE_ID 0x1 -#define VDPASIM_VENDOR_ID 0 -#define VDPASIM_VQ_NUM 0x2 -#define VDPASIM_NAME "vdpasim-netdev" - -static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM); - -/* State of each vdpasim device */ -struct vdpasim { - struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[2]; - struct work_struct work; - /* spinlock to synchronize virtqueue state */ - spinlock_t lock; - struct virtio_net_config config; - struct vhost_iotlb *iommu; - void *buffer; - u32 status; - u32 generation; - u64 features; -}; - -static struct vdpasim *vdpasim_dev; - -static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) -{ - return container_of(vdpa, struct vdpasim, vdpa); -} - -static struct vdpasim *dev_to_sim(struct device *dev) -{ - struct vdpa_device *vdpa = dev_to_vdpa(dev); - - return vdpa_to_sim(vdpa); -} - -static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) -{ - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - int ret; - - ret = vringh_init_iotlb(&vq->vring, vdpasim_features, - VDPASIM_QUEUE_MAX, false, - (struct vring_desc *)(uintptr_t)vq->desc_addr, - (struct vring_avail *) - (uintptr_t)vq->driver_addr, - (struct vring_used *) - (uintptr_t)vq->device_addr); -} - -static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) -{ - vq->ready = 0; - vq->desc_addr = 0; - vq->driver_addr = 0; - vq->device_addr = 0; - vq->cb = NULL; - vq->private = NULL; - vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, - false, NULL, NULL, NULL); -} - -static void vdpasim_reset(struct vdpasim *vdpasim) -{ - int i; - - for (i = 0; i < VDPASIM_VQ_NUM; i++) - vdpasim_vq_reset(&vdpasim->vqs[i]); - - vhost_iotlb_reset(vdpasim->iommu); - - vdpasim->features = 0; - vdpasim->status = 0; - ++vdpasim->generation; -} - -static void vdpasim_work(struct work_struct *work) -{ - struct vdpasim *vdpasim = container_of(work, struct - vdpasim, work); - struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; - struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; - size_t read, write, total_write; - int err; - int pkts = 0; - - spin_lock(&vdpasim->lock); - - if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) - goto out; - - if (!txq->ready || !rxq->ready) - goto out; - - while (true) { - total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, - &txq->head, GFP_ATOMIC); - if (err <= 0) - break; - - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, - &rxq->head, GFP_ATOMIC); - if (err <= 0) { - vringh_complete_iotlb(&txq->vring, txq->head, 0); - break; - } - - while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, - vdpasim->buffer, - PAGE_SIZE); - if (read <= 0) - break; - - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, - vdpasim->buffer, read); - if (write <= 0) - break; - - total_write += write; - } - - /* Make sure data is wrote before advancing index */ - smp_wmb(); - - vringh_complete_iotlb(&txq->vring, txq->head, 0); - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); - - /* Make sure used is visible before rasing the interrupt. */ - smp_wmb(); - - local_bh_disable(); - if (txq->cb) - txq->cb(txq->private); - if (rxq->cb) - rxq->cb(rxq->private); - local_bh_enable(); - - if (++pkts > 4) { - schedule_work(&vdpasim->work); - goto out; - } - } - -out: - spin_unlock(&vdpasim->lock); -} - -static int dir_to_perm(enum dma_data_direction dir) -{ - int perm = -EFAULT; - - switch (dir) { - case DMA_FROM_DEVICE: - perm = VHOST_MAP_WO; - break; - case DMA_TO_DEVICE: - perm = VHOST_MAP_RO; - break; - case DMA_BIDIRECTIONAL: - perm = VHOST_MAP_RW; - break; - default: - break; - } - - return perm; -} - -static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; - int ret, perm = dir_to_perm(dir); - - if (perm < 0) - return DMA_MAPPING_ERROR; - - /* For simplicity, use identical mapping to avoid e.g iova - * allocator. - */ - ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, - pa, dir_to_perm(dir)); - if (ret) - return DMA_MAPPING_ERROR; - - return (dma_addr_t)(pa); -} - -static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - - vhost_iotlb_del_range(iommu, (u64)dma_addr, - (u64)dma_addr + size - 1); -} - -static void *vdpasim_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - void *addr = kmalloc(size, flag); - int ret; - - if (!addr) - *dma_addr = DMA_MAPPING_ERROR; - else { - u64 pa = virt_to_phys(addr); - - ret = vhost_iotlb_add_range(iommu, (u64)pa, - (u64)pa + size - 1, - pa, VHOST_MAP_RW); - if (ret) { - *dma_addr = DMA_MAPPING_ERROR; - kfree(addr); - addr = NULL; - } else - *dma_addr = (dma_addr_t)pa; - } - - return addr; -} - -static void vdpasim_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs) -{ - struct vdpasim *vdpasim = dev_to_sim(dev); - struct vhost_iotlb *iommu = vdpasim->iommu; - - vhost_iotlb_del_range(iommu, (u64)dma_addr, - (u64)dma_addr + size - 1); - kfree(phys_to_virt((uintptr_t)dma_addr)); -} - -static const struct dma_map_ops vdpasim_dma_ops = { - .map_page = vdpasim_map_page, - .unmap_page = vdpasim_unmap_page, - .alloc = vdpasim_alloc_coherent, - .free = vdpasim_free_coherent, -}; - -static const struct vdpa_config_ops vdpasim_net_config_ops; - -static struct vdpasim *vdpasim_create(void) -{ - struct virtio_net_config *config; - struct vdpasim *vdpasim; - struct device *dev; - int ret = -ENOMEM; - - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, - &vdpasim_net_config_ops); - if (!vdpasim) - goto err_alloc; - - INIT_WORK(&vdpasim->work, vdpasim_work); - spin_lock_init(&vdpasim->lock); - - dev = &vdpasim->vdpa.dev; - dev->coherent_dma_mask = DMA_BIT_MASK(64); - set_dma_ops(dev, &vdpasim_dma_ops); - - vdpasim->iommu = vhost_iotlb_alloc(2048, 0); - if (!vdpasim->iommu) - goto err_iommu; - - vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vdpasim->buffer) - goto err_iommu; - - config = &vdpasim->config; - config->mtu = 1500; - config->status = VIRTIO_NET_S_LINK_UP; - eth_random_addr(config->mac); - - vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); - vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); - - vdpasim->vdpa.dma_dev = dev; - ret = vdpa_register_device(&vdpasim->vdpa); - if (ret) - goto err_iommu; - - return vdpasim; - -err_iommu: - put_device(dev); -err_alloc: - return ERR_PTR(ret); -} - -static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, - u64 desc_area, u64 driver_area, - u64 device_area) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - vq->desc_addr = desc_area; - vq->driver_addr = driver_area; - vq->device_addr = device_area; - - return 0; -} - -static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - vq->num = num; -} - -static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - if (vq->ready) - schedule_work(&vdpasim->work); -} - -static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, - struct vdpa_callback *cb) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - vq->cb = cb->callback; - vq->private = cb->private; -} - -static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - spin_lock(&vdpasim->lock); - vq->ready = ready; - if (vq->ready) - vdpasim_queue_ready(vdpasim, idx); - spin_unlock(&vdpasim->lock); -} - -static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - - return vq->ready; -} - -static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - struct vringh *vrh = &vq->vring; - - spin_lock(&vdpasim->lock); - vrh->last_avail_idx = state; - spin_unlock(&vdpasim->lock); - - return 0; -} - -static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - struct vringh *vrh = &vq->vring; - - return vrh->last_avail_idx; -} - -static u16 vdpasim_get_vq_align(struct vdpa_device *vdpa) -{ - return VDPASIM_QUEUE_ALIGN; -} - -static u64 vdpasim_get_features(struct vdpa_device *vdpa) -{ - return vdpasim_features; -} - -static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - /* DMA mapping must be done by driver */ - if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) - return -EINVAL; - - vdpasim->features = features & vdpasim_features; - - return 0; -} - -static void vdpasim_set_config_cb(struct vdpa_device *vdpa, - struct vdpa_callback *cb) -{ - /* We don't support config interrupt */ -} - -static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) -{ - return VDPASIM_QUEUE_MAX; -} - -static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) -{ - return VDPASIM_DEVICE_ID; -} - -static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) -{ - return VDPASIM_VENDOR_ID; -} - -static u8 vdpasim_get_status(struct vdpa_device *vdpa) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - u8 status; - - spin_lock(&vdpasim->lock); - status = vdpasim->status; - spin_unlock(&vdpasim->lock); - - return vdpasim->status; -} - -static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - spin_lock(&vdpasim->lock); - vdpasim->status = status; - if (status == 0) - vdpasim_reset(vdpasim); - spin_unlock(&vdpasim->lock); -} - -static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, - void *buf, unsigned int len) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, &vdpasim->config + offset, len); -} - -static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, - const void *buf, unsigned int len) -{ - /* No writable config supportted by vdpasim */ -} - -static u32 vdpasim_get_generation(struct vdpa_device *vdpa) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - return vdpasim->generation; -} - -static int vdpasim_set_map(struct vdpa_device *vdpa, - struct vhost_iotlb *iotlb) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct vhost_iotlb_map *map; - u64 start = 0ULL, last = 0ULL - 1; - int ret; - - vhost_iotlb_reset(vdpasim->iommu); - - for (map = vhost_iotlb_itree_first(iotlb, start, last); map; - map = vhost_iotlb_itree_next(map, start, last)) { - ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, - map->last, map->addr, map->perm); - if (ret) - goto err; - } - return 0; - -err: - vhost_iotlb_reset(vdpasim->iommu); - return ret; -} - -static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, - u64 pa, u32 perm) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - return vhost_iotlb_add_range(vdpasim->iommu, iova, - iova + size - 1, pa, perm); -} - -static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); - - return 0; -} - -static void vdpasim_free(struct vdpa_device *vdpa) -{ - struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - - cancel_work_sync(&vdpasim->work); - kfree(vdpasim->buffer); - if (vdpasim->iommu) - vhost_iotlb_free(vdpasim->iommu); -} - -static const struct vdpa_config_ops vdpasim_net_config_ops = { - .set_vq_address = vdpasim_set_vq_address, - .set_vq_num = vdpasim_set_vq_num, - .kick_vq = vdpasim_kick_vq, - .set_vq_cb = vdpasim_set_vq_cb, - .set_vq_ready = vdpasim_set_vq_ready, - .get_vq_ready = vdpasim_get_vq_ready, - .set_vq_state = vdpasim_set_vq_state, - .get_vq_state = vdpasim_get_vq_state, - .get_vq_align = vdpasim_get_vq_align, - .get_features = vdpasim_get_features, - .set_features = vdpasim_set_features, - .set_config_cb = vdpasim_set_config_cb, - .get_vq_num_max = vdpasim_get_vq_num_max, - .get_device_id = vdpasim_get_device_id, - .get_vendor_id = vdpasim_get_vendor_id, - .get_status = vdpasim_get_status, - .set_status = vdpasim_set_status, - .get_config = vdpasim_get_config, - .set_config = vdpasim_set_config, - .get_generation = vdpasim_get_generation, - .set_map = vdpasim_set_map, - .dma_map = vdpasim_dma_map, - .dma_unmap = vdpasim_dma_unmap, - .free = vdpasim_free, -}; - -static int __init vdpasim_dev_init(void) -{ - vdpasim_dev = vdpasim_create(); - - if (!IS_ERR(vdpasim_dev)) - return 0; - - return PTR_ERR(vdpasim_dev); -} - -static void __exit vdpasim_dev_exit(void) -{ - struct vdpa_device *vdpa = &vdpasim_dev->vdpa; - - vdpa_unregister_device(vdpa); -} - -module_init(vdpasim_dev_init) -module_exit(vdpasim_dev_exit) - -MODULE_VERSION(DRV_VERSION); -MODULE_LICENSE(DRV_LICENSE); -MODULE_AUTHOR(DRV_AUTHOR); -MODULE_DESCRIPTION(DRV_DESC); -- cgit From 835a6a649d0dd1b1f46759eb60fff2f63ed253a7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 7 Apr 2020 05:43:30 -0400 Subject: virtio-balloon: Revert "virtio-balloon: Switch back to OOM handler for VIRTIO_BALLOON_F_DEFLATE_ON_OOM" This reverts commit 5a6b4cc5b7a1892a8d7f63d6cbac6e0ae2a9d031. It has been queued properly in the akpm tree, this version is just creating conflicts. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 107 +++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 44 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 44375a22307b..341458fd95ca 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -28,9 +27,7 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 -/* Maximum number of (4k) pages to deflate on OOM notifications. */ -#define VIRTIO_BALLOON_OOM_NR_PAGES 256 -#define VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY 80 +#define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 #define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \ __GFP_NOMEMALLOC) @@ -115,11 +112,8 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; - /* Shrinker to return free pages - VIRTIO_BALLOON_F_FREE_PAGE_HINT */ + /* To register a shrinker to shrink memory upon memory pressure */ struct shrinker shrinker; - - /* OOM notifier to deflate on OOM - VIRTIO_BALLOON_F_DEFLATE_ON_OOM */ - struct notifier_block oom_nb; }; static struct virtio_device_id id_table[] = { @@ -794,13 +788,50 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb, return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES; } +static unsigned long leak_balloon_pages(struct virtio_balloon *vb, + unsigned long pages_to_free) +{ + return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) / + VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static unsigned long shrink_balloon_pages(struct virtio_balloon *vb, + unsigned long pages_to_free) +{ + unsigned long pages_freed = 0; + + /* + * One invocation of leak_balloon can deflate at most + * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it + * multiple times to deflate pages till reaching pages_to_free. + */ + while (vb->num_pages && pages_freed < pages_to_free) + pages_freed += leak_balloon_pages(vb, + pages_to_free - pages_freed); + + update_balloon_size(vb); + + return pages_freed; +} + static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { + unsigned long pages_to_free, pages_freed = 0; struct virtio_balloon *vb = container_of(shrinker, struct virtio_balloon, shrinker); - return shrink_free_pages(vb, sc->nr_to_scan); + pages_to_free = sc->nr_to_scan; + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + pages_freed = shrink_free_pages(vb, pages_to_free); + + if (pages_freed >= pages_to_free) + return pages_freed; + + pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed); + + return pages_freed; } static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, @@ -808,22 +839,26 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, { struct virtio_balloon *vb = container_of(shrinker, struct virtio_balloon, shrinker); + unsigned long count; + + count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; + count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; - return vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES; + return count; } -static int virtio_balloon_oom_notify(struct notifier_block *nb, - unsigned long dummy, void *parm) +static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) { - struct virtio_balloon *vb = container_of(nb, - struct virtio_balloon, oom_nb); - unsigned long *freed = parm; + unregister_shrinker(&vb->shrinker); +} - *freed += leak_balloon(vb, VIRTIO_BALLOON_OOM_NR_PAGES) / - VIRTIO_BALLOON_PAGES_PER_PAGE; - update_balloon_size(vb); +static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) +{ + vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker.count_objects = virtio_balloon_shrinker_count; + vb->shrinker.seeks = DEFAULT_SEEKS; - return NOTIFY_OK; + return register_shrinker(&vb->shrinker); } static int virtballoon_probe(struct virtio_device *vdev) @@ -900,35 +935,22 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_cwrite(vb->vdev, struct virtio_balloon_config, poison_val, &poison_val); } - - /* - * We're allowed to reuse any free pages, even if they are - * still to be processed by the host. - */ - vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; - vb->shrinker.count_objects = virtio_balloon_shrinker_count; - vb->shrinker.seeks = DEFAULT_SEEKS; - err = register_shrinker(&vb->shrinker); + } + /* + * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a + * shrinker needs to be registered to relieve memory pressure. + */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { + err = virtio_balloon_register_shrinker(vb); if (err) goto out_del_balloon_wq; } - if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { - vb->oom_nb.notifier_call = virtio_balloon_oom_notify; - vb->oom_nb.priority = VIRTIO_BALLOON_OOM_NOTIFY_PRIORITY; - err = register_oom_notifier(&vb->oom_nb); - if (err < 0) - goto out_unregister_shrinker; - } - virtio_device_ready(vdev); if (towards_target(vb)) virtballoon_changed(vdev); return 0; -out_unregister_shrinker: - if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) - unregister_shrinker(&vb->shrinker); out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); @@ -967,11 +989,8 @@ static void virtballoon_remove(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; - if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - unregister_oom_notifier(&vb->oom_nb); - if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) - unregister_shrinker(&vb->shrinker); - + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + virtio_balloon_unregister_shrinker(vb); spin_lock_irq(&vb->stop_update_lock); vb->stop_update = true; spin_unlock_irq(&vb->stop_update_lock); -- cgit