// SPDX-License-Identifier: GPL-2.0-only /* * VDPA networking device simulator. * * Copyright (c) 2020, Red Hat Inc. All rights reserved. * Author: Jason Wang * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DRV_VERSION "0.1" #define DRV_AUTHOR "Jason Wang " #define DRV_DESC "vDPA Device Simulator" #define DRV_LICENSE "GPL v2" static int batch_mapping = 1; module_param(batch_mapping, int, 0444); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; unsigned short head; bool ready; u64 desc_addr; u64 device_addr; u64 driver_addr; u32 num; void *private; irqreturn_t (*cb)(void *data); }; #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 #define VDPASIM_DEVICE_ID 0x1 #define VDPASIM_VENDOR_ID 0 #define VDPASIM_VQ_NUM 0x2 #define VDPASIM_NAME "vdpasim-netdev" static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_VERSION_1) | (1ULL << VIRTIO_F_ACCESS_PLATFORM); /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; struct work_struct work; /* spinlock to synchronize virtqueue state */ spinlock_t lock; struct virtio_net_config config; struct vhost_iotlb *iommu; void *buffer; u32 status; u32 generation; u64 features; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; /* TODO: cross-endian support */ static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) { return virtio_legacy_is_little_endian() || (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); } static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) { return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); } static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) { return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); } static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) { return container_of(vdpa, struct vdpasim, vdpa); } static struct vdpasim *dev_to_sim(struct device *dev) { struct vdpa_device *vdpa = dev_to_vdpa(dev); return vdpa_to_sim(vdpa); } static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) (uintptr_t)vq->driver_addr, (struct vring_used *) (uintptr_t)vq->device_addr); } static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) { vq->ready = false; vq->desc_addr = 0; vq->driver_addr = 0; vq->device_addr = 0; vq->cb = NULL; vq->private = NULL; vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); } static void vdpasim_reset(struct vdpasim *vdpasim) { int i; for (i = 0; i < VDPASIM_VQ_NUM; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; vdpasim->status = 0; ++vdpasim->generation; } static void vdpasim_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; ssize_t read, write; size_t total_write; int pkts = 0; int err; spin_lock(&vdpasim->lock); if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) goto out; if (!txq->ready || !rxq->ready) goto out; while (true) { total_write = 0; err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, &txq->head, GFP_ATOMIC); if (err <= 0) break; err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, &rxq->head, GFP_ATOMIC); if (err <= 0) { vringh_complete_iotlb(&txq->vring, txq->head, 0); break; } while (true) { read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, vdpasim->buffer, PAGE_SIZE); if (read <= 0) break; write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, vdpasim->buffer, read); if (write <= 0) break; total_write += write; } /* Make sure data is wrote before advancing index */ smp_wmb(); vringh_complete_iotlb(&txq->vring, txq->head, 0); vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); /* Make sure used is visible before rasing the interrupt. */ smp_wmb(); local_bh_disable(); if (txq->cb) txq->cb(txq->private); if (rxq->cb) rxq->cb(rxq->private); local_bh_enable(); if (++pkts > 4) { schedule_work(&vdpasim->work); goto out; } } out: spin_unlock(&vdpasim->lock); } static int dir_to_perm(enum dma_data_direction dir) { int perm = -EFAULT; switch (dir) { case DMA_FROM_DEVICE: perm = VHOST_MAP_WO; break; case DMA_TO_DEVICE: perm = VHOST_MAP_RO; break; case DMA_BIDIRECTIONAL: perm = VHOST_MAP_RW; break; default: break; } return perm; } static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; int ret, perm = dir_to_perm(dir); if (perm < 0) return DMA_MAPPING_ERROR; /* For simplicity, use identical mapping to avoid e.g iova * allocator. */ spin_lock(&vdpasim->iommu_lock); ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, pa, dir_to_perm(dir)); spin_unlock(&vdpasim->iommu_lock); if (ret) return DMA_MAPPING_ERROR; return (dma_addr_t)(pa); } static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); spin_unlock(&vdpasim->iommu_lock); } static void *vdpasim_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; void *addr = kmalloc(size, flag); int ret; spin_lock(&vdpasim->iommu_lock); if (!addr) { *dma_addr = DMA_MAPPING_ERROR; } else { u64 pa = virt_to_phys(addr); ret = vhost_iotlb_add_range(iommu, (u64)pa, (u64)pa + size - 1, pa, VHOST_MAP_RW); if (ret) { *dma_addr = DMA_MAPPING_ERROR; kfree(addr); addr = NULL; } else *dma_addr = (dma_addr_t)pa; } spin_unlock(&vdpasim->iommu_lock); return addr; } static void vdpasim_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); spin_unlock(&vdpasim->iommu_lock); kfree(phys_to_virt((uintptr_t)dma_addr)); } static const struct dma_map_ops vdpasim_dma_ops = { .map_page = vdpasim_map_page, .unmap_page = vdpasim_unmap_page, .alloc = vdpasim_alloc_coherent, .free = vdpasim_free_coherent, }; static const struct vdpa_config_ops vdpasim_net_config_ops; static const struct vdpa_config_ops vdpasim_net_batch_config_ops; static struct vdpasim *vdpasim_create(void) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; int ret = -ENOMEM; if (batch_mapping) ops = &vdpasim_net_batch_config_ops; else ops = &vdpasim_net_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); if (!vdpasim) goto err_alloc; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); dev = &vdpasim->vdpa.dev; dev->coherent_dma_mask = DMA_BIT_MASK(64); set_dma_ops(dev, &vdpasim_dma_ops); vdpasim->iommu = vhost_iotlb_alloc(2048, 0); if (!vdpasim->iommu) goto err_iommu; vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; eth_random_addr(vdpasim->config.mac); vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; ret = vdpa_register_device(&vdpasim->vdpa); if (ret) goto err_iommu; return vdpasim; err_iommu: put_device(dev); err_alloc: return ERR_PTR(ret); } static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, u64 desc_area, u64 driver_area, u64 device_area) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; vq->desc_addr = desc_area; vq->driver_addr = driver_area; vq->device_addr = device_area; return 0; } static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; vq->num = num; } static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; if (vq->ready) schedule_work(&vdpasim->work); } static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, struct vdpa_callback *cb) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; vq->cb = cb->callback; vq->private = cb->private; } static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; spin_lock(&vdpasim->lock); vq->ready = ready; if (vq->ready) vdpasim_queue_ready(vdpasim, idx); spin_unlock(&vdpasim->lock); } static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; return vq->ready; } static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, const struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; spin_lock(&vdpasim->lock); vrh->last_avail_idx = state->avail_index; spin_unlock(&vdpasim->lock); return 0; } static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; state->avail_index = vrh->last_avail_idx; return 0; } static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) { return VDPASIM_QUEUE_ALIGN; } static u64 vdpasim_get_features(struct vdpa_device *vdpa) { return vdpasim_features; } static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct virtio_net_config *config = &vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; vdpasim->features = features & vdpasim_features; /* We generally only know whether guest is using the legacy interface * here, so generally that's the earliest we can set config fields. * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. */ config->mtu = cpu_to_vdpasim16(vdpasim, 1500); config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); return 0; } static void vdpasim_set_config_cb(struct vdpa_device *vdpa, struct vdpa_callback *cb) { /* We don't support config interrupt */ } static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) { return VDPASIM_QUEUE_MAX; } static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) { return VDPASIM_DEVICE_ID; } static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) { return VDPASIM_VENDOR_ID; } static u8 vdpasim_get_status(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); u8 status; spin_lock(&vdpasim->lock); status = vdpasim->status; spin_unlock(&vdpasim->lock); return status; } static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); spin_lock(&vdpasim->lock); vdpasim->status = status; if (status == 0) vdpasim_reset(vdpasim); spin_unlock(&vdpasim->lock); } static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, void *buf, unsigned int len) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); if (offset + len < sizeof(struct virtio_net_config)) memcpy(buf, (u8 *)&vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, const void *buf, unsigned int len) { /* No writable config supportted by vdpasim */ } static u32 vdpasim_get_generation(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); return vdpasim->generation; } static int vdpasim_set_map(struct vdpa_device *vdpa, struct vhost_iotlb *iotlb) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vhost_iotlb_map *map; u64 start = 0ULL, last = 0ULL - 1; int ret; spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, map->last, map->addr, map->perm); if (ret) goto err; } spin_unlock(&vdpasim->iommu_lock); return 0; err: vhost_iotlb_reset(vdpasim->iommu); spin_unlock(&vdpasim->iommu_lock); return ret; } static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, u64 pa, u32 perm) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); int ret; spin_lock(&vdpasim->iommu_lock); ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, perm); spin_unlock(&vdpasim->iommu_lock); return ret; } static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); spin_unlock(&vdpasim->iommu_lock); return 0; } static void vdpasim_free(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); cancel_work_sync(&vdpasim->work); kfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); } static const struct vdpa_config_ops vdpasim_net_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, .set_vq_cb = vdpasim_set_vq_cb, .set_vq_ready = vdpasim_set_vq_ready, .get_vq_ready = vdpasim_get_vq_ready, .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, .get_features = vdpasim_get_features, .set_features = vdpasim_set_features, .set_config_cb = vdpasim_set_config_cb, .get_vq_num_max = vdpasim_get_vq_num_max, .get_device_id = vdpasim_get_device_id, .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, .free = vdpasim_free, }; static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, .set_vq_cb = vdpasim_set_vq_cb, .set_vq_ready = vdpasim_set_vq_ready, .get_vq_ready = vdpasim_get_vq_ready, .set_vq_state = vdpasim_set_vq_state, .get_vq_state = vdpasim_get_vq_state, .get_vq_align = vdpasim_get_vq_align, .get_features = vdpasim_get_features, .set_features = vdpasim_set_features, .set_config_cb = vdpasim_set_config_cb, .get_vq_num_max = vdpasim_get_vq_num_max, .get_device_id = vdpasim_get_device_id, .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, .set_map = vdpasim_set_map, .free = vdpasim_free, }; static int __init vdpasim_dev_init(void) { vdpasim_dev = vdpasim_create(); if (!IS_ERR(vdpasim_dev)) return 0; return PTR_ERR(vdpasim_dev); } static void __exit vdpasim_dev_exit(void) { struct vdpa_device *vdpa = &vdpasim_dev->vdpa; vdpa_unregister_device(vdpa); } module_init(vdpasim_dev_init) module_exit(vdpasim_dev_exit) MODULE_VERSION(DRV_VERSION); MODULE_LICENSE(DRV_LICENSE); MODULE_AUTHOR(DRV_AUTHOR); MODULE_DESCRIPTION(DRV_DESC);