diff options
Diffstat (limited to 'tools/virtio')
54 files changed, 2778 insertions, 196 deletions
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore new file mode 100644 index 000000000000..7e47b281c442 --- /dev/null +++ b/tools/virtio/.gitignore @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +*.d +virtio_test +vhost_net_test +vringh_test +virtio-trace/trace-agent diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 3187c62d9814..e25e99c1c3b7 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -1,14 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 all: test mod -test: virtio_test vringh_test +test: virtio_test vringh_test vhost_net_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o +vhost_net_test: virtio_ring.o vhost_net_test.o -CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE +try-run = $(shell set -e; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi) + +__cc-option = $(call try-run,\ + $(1) -Werror $(2) -c -x c /dev/null -o /dev/null,$(2),) +cc-option = $(call __cc-option, $(CC),$(1)) + +CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h $(call cc-option,-mfunction-return=thunk) $(call cc-option,-fcf-protection=none) $(call cc-option,-mindirect-branch-register) + +CFLAGS += -pthread +LDFLAGS += -pthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: - ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test -.PHONY: all test mod clean + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} + +#oot: build vhost as an out of tree module for a distro kernel +#no effort is taken to make it actually build or work, but tends to mostly work +#if the distro kernel is very close to upstream +#unsupported! this is a development tool only, don't use the +#resulting modules in production! +OOT_KSRC=/lib/modules/$$(uname -r)/build +OOT_VHOST=`pwd`/../../drivers/vhost +#Everyone depends on vhost +#Tweak the below to enable more modules +OOT_CONFIGS=\ + CONFIG_VHOST=m \ + CONFIG_VHOST_NET=n \ + CONFIG_VHOST_SCSI=n \ + CONFIG_VHOST_VSOCK=n \ + CONFIG_VHOST_RING=n +OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V} +oot-build: + echo "UNSUPPORTED! Don't use the resulting modules in production!" + ${OOT_BUILD} M=`pwd`/vhost_test + ${OOT_BUILD} M=${OOT_VHOST} ${OOT_CONFIGS} + +oot-clean: oot-build +oot: oot-build +oot-clean: OOT_BUILD+=clean + +.PHONY: all test mod clean vhost oot oot-clean oot-build clean: - ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ - vhost_test/Module.symvers vhost_test/modules.order *.d + ${RM} *.o vringh_test virtio_test vhost_net_test vhost_test/*.o \ + vhost_test/.*.cmd vhost_test/Module.symvers \ + vhost_test/modules.order *.d -include *.d diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index aff61e13306c..468435ed64e6 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -1,13 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdlib.h> #if defined(__i386__) || defined(__x86_64__) #define barrier() asm volatile("" ::: "memory") -#define mb() __sync_synchronize() - -#define smp_mb() mb() -# define smp_rmb() barrier() -# define smp_wmb() barrier() +#define virt_mb() __sync_synchronize() +#define virt_rmb() barrier() +#define virt_wmb() barrier() +/* Atomic store should be enough, but gcc generates worse code in that case. */ +#define virt_store_mb(var, value) do { \ + typeof(var) virt_store_mb_value = (value); \ + __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \ + __ATOMIC_SEQ_CST); \ + barrier(); \ +} while (0); +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() +#elif defined(__aarch64__) +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() dmb(ishld) +#define virt_wmb() dmb(ishst) +#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0) /* Weak barriers should be used. If not - it's a bug */ -# define rmb() abort() -# define wmb() abort() +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif diff --git a/tools/virtio/crypto/hash.h b/tools/virtio/crypto/hash.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/crypto/hash.h diff --git a/tools/virtio/generated/autoconf.h b/tools/virtio/generated/autoconf.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/generated/autoconf.h diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h index fb94f0787c47..51a919083d9b 100644 --- a/tools/virtio/linux/bug.h +++ b/tools/virtio/linux/bug.h @@ -1,10 +1,11 @@ -#ifndef BUG_H -#define BUG_H +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUG_H +#define _LINUX_BUG_H -#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) +#include <asm/bug.h> -#define BUILD_BUG_ON(x) +#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUG() abort() -#endif /* BUG_H */ +#endif /* _LINUX_BUG_H */ diff --git a/tools/virtio/linux/build_bug.h b/tools/virtio/linux/build_bug.h new file mode 100644 index 000000000000..cdbb75e28a60 --- /dev/null +++ b/tools/virtio/linux/build_bug.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUILD_BUG_H +#define _LINUX_BUILD_BUG_H + +#define BUILD_BUG_ON(x) + +#endif /* _LINUX_BUILD_BUG_H */ diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h new file mode 100644 index 000000000000..725b93bfeee1 --- /dev/null +++ b/tools/virtio/linux/compiler.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_COMPILER_H +#define LINUX_COMPILER_H + +#include "../../../include/linux/compiler_types.h" + +#define WRITE_ONCE(var, val) \ + (*((volatile typeof(val) *)(&(var))) = (val)) + +#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) + +#define __aligned(x) __attribute((__aligned__(x))) + +/** + * data_race - mark an expression as containing intentional data races + * + * This data_race() macro is useful for situations in which data races + * should be forgiven. One example is diagnostic code that accesses + * shared variables but is not a part of the core synchronization design. + * For example, if accesses to a given variable are protected by a lock, + * except for diagnostic code, then the accesses under the lock should + * be plain C-language accesses and those in the diagnostic code should + * use data_race(). This way, KCSAN will complain if buggy lockless + * accesses to that variable are introduced, even if the buggy accesses + * are protected by READ_ONCE() or WRITE_ONCE(). + * + * This macro *does not* affect normal code generation, but is a hint + * to tooling that data races here are to be ignored. If the access must + * be atomic *and* KCSAN should ignore the access, use both data_race() + * and READ_ONCE(), for example, data_race(READ_ONCE(x)). + */ +#define data_race(expr) \ +({ \ + auto __v = (expr); \ + __v; \ +}) + +#endif diff --git a/tools/virtio/linux/cpumask.h b/tools/virtio/linux/cpumask.h new file mode 100644 index 000000000000..307da69d6b26 --- /dev/null +++ b/tools/virtio/linux/cpumask.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CPUMASK_H +#define _LINUX_CPUMASK_H + +#include <linux/kernel.h> + +#endif /* _LINUX_CPUMASK_H */ diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h new file mode 100644 index 000000000000..095958461788 --- /dev/null +++ b/tools/virtio/linux/dma-mapping.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_DMA_MAPPING_H +#define _LINUX_DMA_MAPPING_H + +#ifdef CONFIG_HAS_DMA +# error Virtio userspace code does not support CONFIG_HAS_DMA +#endif + +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +#define dma_alloc_coherent(d, s, hp, f) ({ \ + void *__dma_alloc_coherent_p = kmalloc((s), (f)); \ + *(hp) = (unsigned long)__dma_alloc_coherent_p; \ + __dma_alloc_coherent_p; \ +}) + +#define dma_free_coherent(d, s, p, h) kfree(p) + +#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) + +#define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_map_single_attrs(d, p, s, dir, a) (virt_to_phys(p)) +#define dma_mapping_error(...) (0) + +#define dma_unmap_single(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) +#define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) + +#define sg_dma_address(sg) (0) +#define sg_dma_len(sg) (0) +#define dma_need_sync(v, a) (0) +#define dma_unmap_single_attrs(d, a, s, r, t) do { \ + (void)(d); (void)(a); (void)(s); (void)(r); (void)(t); \ +} while (0) +#define dma_sync_single_range_for_cpu(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) +#define dma_sync_single_range_for_device(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) +#define dma_max_mapping_size(...) SIZE_MAX + +/* + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. + */ +#define DMA_MAPPING_ERROR (~(dma_addr_t)0) + +#endif diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h index e32eff8b2a14..0943c644a701 100644 --- a/tools/virtio/linux/err.h +++ b/tools/virtio/linux/err.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef ERR_H #define ERR_H #define MAX_ERRNO 4095 diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h index 7311d326894a..416875e29254 100644 --- a/tools/virtio/linux/export.h +++ b/tools/virtio/linux/export.h @@ -1,5 +1,3 @@ -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym +#define EXPORT_SYMBOL(sym) extern typeof(sym) sym + diff --git a/tools/virtio/linux/gfp.h b/tools/virtio/linux/gfp.h new file mode 100644 index 000000000000..43d146f236f1 --- /dev/null +++ b/tools/virtio/linux/gfp.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_H +#define __LINUX_GFP_H + +#include <linux/topology.h> + +#endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index fba705963968..6702008f7f5c 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef KERNEL_H #define KERNEL_H #include <stdbool.h> @@ -8,7 +9,12 @@ #include <assert.h> #include <stdarg.h> +#include <linux/compiler.h> +#include "../../../include/linux/container_of.h" +#include <linux/log2.h> #include <linux/types.h> +#include <linux/overflow.h> +#include <linux/list.h> #include <linux/printk.h> #include <linux/bug.h> #include <errno.h> @@ -19,9 +25,15 @@ #define PAGE_SIZE getpagesize() #define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) + +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; +typedef unsigned int __wsum; struct page { unsigned long long dummy; @@ -38,13 +50,6 @@ struct page { #define __printf(a,b) __attribute__((format(printf,a,b))) -typedef enum { - GFP_KERNEL, - GFP_ATOMIC, - __GFP_HIGHMEM, - __GFP_HIGH -} gfp_t; - #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; @@ -54,6 +59,23 @@ static inline void *kmalloc(size_t s, gfp_t gfp) return __kmalloc_fake; return malloc(s); } +static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp) +{ + return kmalloc(n * s, gfp); +} + +static inline void *kzalloc(size_t s, gfp_t gfp) +{ + void *p = kmalloc(s, gfp); + + memset(p, 0, s); + return p; +} + +static inline void *alloc_pages_exact(size_t s, gfp_t gfp) +{ + return kmalloc(s, gfp); +} static inline void kfree(void *p) { @@ -62,6 +84,11 @@ static inline void kfree(void *p) free(p); } +static inline void free_pages_exact(void *p, size_t s) +{ + kfree(p); +} + static inline void *krealloc(void *p, size_t s, gfp_t gfp) { return realloc(p, s); @@ -81,12 +108,6 @@ static inline void free_page(unsigned long addr) free((void *)addr); } -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -#define uninitialized_var(x) x = x - # ifndef likely # define likely(x) (__builtin_expect(!!(x), 1)) # endif @@ -94,6 +115,16 @@ static inline void free_page(unsigned long addr) # define unlikely(x) (__builtin_expect(!!(x), 0)) # endif +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, gfp); +} + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #ifdef DEBUG #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) @@ -102,6 +133,7 @@ static inline void free_page(unsigned long addr) #endif #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define dev_warn_once(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/virtio/linux/kmemleak.h b/tools/virtio/linux/kmemleak.h new file mode 100644 index 000000000000..c07072270e2f --- /dev/null +++ b/tools/virtio/linux/kmemleak.h @@ -0,0 +1,3 @@ +static inline void kmemleak_ignore(const void *ptr) +{ +} diff --git a/tools/virtio/linux/kmsan.h b/tools/virtio/linux/kmsan.h new file mode 100644 index 000000000000..6cd2e3efd03d --- /dev/null +++ b/tools/virtio/linux/kmsan.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KMSAN_H +#define _LINUX_KMSAN_H + +#include <linux/gfp.h> + +inline void kmsan_handle_dma(phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ +} + +#endif /* _LINUX_KMSAN_H */ diff --git a/tools/virtio/linux/mm_types.h b/tools/virtio/linux/mm_types.h new file mode 100644 index 000000000000..356ba4d496f6 --- /dev/null +++ b/tools/virtio/linux/mm_types.h @@ -0,0 +1,3 @@ +struct folio { + struct page page; +}; diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h index 28ce95a05997..b91681fc1571 100644 --- a/tools/virtio/linux/module.h +++ b/tools/virtio/linux/module.h @@ -1,6 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #include <linux/export.h> #define MODULE_LICENSE(__MODULE_LICENSE_value) \ static __attribute__((unused)) const char *__MODULE_LICENSE_name = \ __MODULE_LICENSE_value +#ifndef MODULE_AUTHOR +#define MODULE_AUTHOR(x) +#endif + +#ifndef MODULE_DESCRIPTION +#define MODULE_DESCRIPTION(x) +#endif diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h index 68c9e2adc996..74d9e1825748 100644 --- a/tools/virtio/linux/scatterlist.h +++ b/tools/virtio/linux/scatterlist.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SCATTERLIST_H #define SCATTERLIST_H #include <linux/kernel.h> +#include <linux/bug.h> struct scatterlist { unsigned long page_link; @@ -35,7 +37,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & 0x03); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -66,7 +67,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~0x3); @@ -115,9 +115,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -135,17 +132,11 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~0x02; } static inline struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -159,13 +150,6 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) { memset(sgl, 0, sizeof(*sgl) * nents); -#ifdef CONFIG_DEBUG_SG - { - unsigned int i; - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; - } -#endif sg_mark_end(&sgl[nents - 1]); } diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h index 81baeac8ae40..319dcaa07755 100644 --- a/tools/virtio/linux/slab.h +++ b/tools/virtio/linux/slab.h @@ -1,2 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_SLAB_H +#define GFP_KERNEL 0 +#define GFP_ATOMIC 0 +#define __GFP_NOWARN 0 +#define __GFP_ZERO 0 #endif diff --git a/tools/virtio/linux/spinlock.h b/tools/virtio/linux/spinlock.h new file mode 100644 index 000000000000..028e3cdcc5d3 --- /dev/null +++ b/tools/virtio/linux/spinlock.h @@ -0,0 +1,56 @@ +#ifndef SPINLOCK_H_STUB +#define SPINLOCK_H_STUB + +#include <pthread.h> + +typedef pthread_spinlock_t spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static inline void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static inline void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#endif diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h new file mode 100644 index 000000000000..e0f610d08006 --- /dev/null +++ b/tools/virtio/linux/thread_info.h @@ -0,0 +1 @@ +#define check_copy_size(A, B, C) (1) diff --git a/tools/virtio/linux/topology.h b/tools/virtio/linux/topology.h new file mode 100644 index 000000000000..910794afb993 --- /dev/null +++ b/tools/virtio/linux/topology.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TOPOLOGY_H +#define _LINUX_TOPOLOGY_H + +#include <linux/cpumask.h> + +#endif /* _LINUX_TOPOLOGY_H */ diff --git a/tools/virtio/linux/types.h b/tools/virtio/linux/types.h deleted file mode 100644 index f8ebb9a2b3d6..000000000000 --- a/tools/virtio/linux/types.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef TYPES_H -#define TYPES_H -#include <stdint.h> - -#define __force -#define __user -#define __must_check -#define __cold - -typedef uint64_t u64; -typedef int64_t s64; -typedef uint32_t u32; -typedef int32_t s32; -typedef uint16_t u16; -typedef int16_t s16; -typedef uint8_t u8; -typedef int8_t s8; - -typedef uint64_t __u64; -typedef int64_t __s64; -typedef uint32_t __u32; -typedef int32_t __s32; -typedef uint16_t __u16; -typedef int16_t __s16; -typedef uint8_t __u8; -typedef int8_t __s8; - -#endif /* TYPES_H */ diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h index 0a578fe18653..f13828e0c409 100644 --- a/tools/virtio/linux/uaccess.h +++ b/tools/virtio/linux/uaccess.h @@ -1,27 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef UACCESS_H #define UACCESS_H -extern void *__user_addr_min, *__user_addr_max; -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include <linux/compiler.h> -static inline void __chk_user_ptr(const volatile void *p, size_t size) -{ - assert(p >= __user_addr_min && p + size <= __user_addr_max); -} +extern void *__user_addr_min, *__user_addr_max; #define put_user(x, ptr) \ ({ \ typeof(ptr) __pu_ptr = (ptr); \ - __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ - ACCESS_ONCE(*(__pu_ptr)) = x; \ + __chk_user_ptr(__pu_ptr); \ + WRITE_ONCE(*(__pu_ptr), x); \ 0; \ }) #define get_user(x, ptr) \ ({ \ typeof(ptr) __pu_ptr = (ptr); \ - __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \ - x = ACCESS_ONCE(*(__pu_ptr)); \ + __chk_user_ptr(__pu_ptr); \ + x = READ_ONCE(*(__pu_ptr)); \ 0; \ }) @@ -35,7 +32,6 @@ static void volatile_memcpy(volatile char *to, const volatile char *from, static inline int copy_from_user(void *to, const void __user volatile *from, unsigned long n) { - __chk_user_ptr(from, n); volatile_memcpy(to, from, n); return 0; } @@ -43,7 +39,6 @@ static inline int copy_from_user(void *to, const void __user volatile *from, static inline int copy_to_user(void __user volatile *to, const void *from, unsigned long n) { - __chk_user_ptr(to, n); volatile_memcpy(to, from, n); return 0; } diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 844783040703..5d3440f474dd 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -1,48 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_VIRTIO_H #define LINUX_VIRTIO_H #include <linux/scatterlist.h> #include <linux/kernel.h> +#include <linux/spinlock.h> -/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ -#define list_add_tail(a, b) do {} while (0) -#define list_del(a) do {} while (0) - -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BITS_PER_BYTE 8 -#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) - -/* TODO: Not atomic as it should be: - * we don't use this for anything important. */ -static inline void clear_bit(int nr, volatile unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - - *p &= ~mask; -} - -static inline int test_bit(int nr, const volatile unsigned long *addr) -{ - return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); -} -/* end of stubs */ +struct device { + void *parent; +}; struct virtio_device { - void *dev; - unsigned long features[1]; + struct device dev; + u64 features; + struct list_head vqs; + spinlock_t vqs_list_lock; + const struct virtio_config_ops *config; }; struct virtqueue { - /* TODO: commented as list macros are empty stubs for now. - * Broken but enough for virtio_ring.c - * struct list_head list; */ + struct list_head list; void (*callback)(struct virtqueue *vq); const char *name; struct virtio_device *vdev; unsigned int index; unsigned int num_free; + unsigned int num_max; void *priv; + bool reset; }; /* Interfaces exported by virtio_ring. */ @@ -63,7 +47,7 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp); -void virtqueue_kick(struct virtqueue *vq); +bool virtqueue_kick(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); @@ -78,8 +62,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, - void (*notify)(struct virtqueue *vq), + bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); void vring_del_virtqueue(struct virtqueue *vq); diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h new file mode 100644 index 000000000000..5b50f7eebd9c --- /dev/null +++ b/tools/virtio/linux/virtio_byteorder.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H +#define _LINUX_VIRTIO_BYTEORDER_STUB_H + +#include <asm/byteorder.h> +#include "../../include/linux/byteorder/generic.h" +#include "../../include/linux/virtio_byteorder.h" + +#endif diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 5049967f99f7..42a564f22f2d 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -1,6 +1,101 @@ -#define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 32 +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VIRTIO_CONFIG_H +#define LINUX_VIRTIO_CONFIG_H +#include <linux/virtio_byteorder.h> +#include <linux/virtio.h> +#include <uapi/linux/virtio_config.h> + +struct virtio_config_ops { + int (*disable_vq_and_reset)(struct virtqueue *vq); + int (*enable_vq_after_reset)(struct virtqueue *vq); +}; + +/* + * __virtio_test_bit - helper to test feature bits. For use by transports. + * Devices should normally use virtio_has_feature, + * which includes more checks. + * @vdev: the device + * @fbit: the feature bit + */ +static inline bool __virtio_test_bit(const struct virtio_device *vdev, + unsigned int fbit) +{ + return vdev->features & (1ULL << fbit); +} + +/** + * __virtio_set_bit - helper to set feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_set_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features |= (1ULL << fbit); +} + +/** + * __virtio_clear_bit - helper to clear feature bits. For use by transports. + * @vdev: the device + * @fbit: the feature bit + */ +static inline void __virtio_clear_bit(struct virtio_device *vdev, + unsigned int fbit) +{ + vdev->features &= ~(1ULL << fbit); +} #define virtio_has_feature(dev, feature) \ - test_bit((feature), (dev)->features) + (__virtio_test_bit((dev), feature)) + +/** + * virtio_has_dma_quirk - determine whether this device has the DMA quirk + * @vdev: the device + */ +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) +{ + /* + * Note the reverse polarity of the quirk feature (compared to most + * other features), this is for compatibility with legacy systems. + */ + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); +} + +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); +} + +/* Memory accessors */ +static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) +{ + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) +{ + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); +} + +static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) +{ + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) +{ + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); +} + +static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) +{ + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); +} + +static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) +{ + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); +} +#endif diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h index 9348957be56e..e11c6aece734 100644 --- a/tools/virtio/linux/vringh.h +++ b/tools/virtio/linux/vringh.h @@ -1 +1,2 @@ +#include <limits.h> #include "../../../include/linux/vringh.h" diff --git a/tools/virtio/ringtest/.gitignore b/tools/virtio/ringtest/.gitignore new file mode 100644 index 000000000000..100b9e30c0f4 --- /dev/null +++ b/tools/virtio/ringtest/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +/noring +/ptr_ring +/ring +/virtio_ring_0_9 +/virtio_ring_inorder +/virtio_ring_poll diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile new file mode 100644 index 000000000000..85c98c2810fb --- /dev/null +++ b/tools/virtio/ringtest/Makefile @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +all: + +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring + +CFLAGS += -Wall +CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program +LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program + +main.o: main.c main.h +ring.o: ring.c main.h +ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h +virtio_ring_0_9.o: virtio_ring_0_9.c main.h +virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h +virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h +ring: ring.o main.o +virtio_ring_0_9: virtio_ring_0_9.o main.o +virtio_ring_poll: virtio_ring_poll.o main.o +virtio_ring_inorder: virtio_ring_inorder.o main.o +ptr_ring: ptr_ring.o main.o +noring: noring.o main.o +clean: + -rm main.o + -rm ring.o ring + -rm virtio_ring_0_9.o virtio_ring_0_9 + -rm virtio_ring_poll.o virtio_ring_poll + -rm virtio_ring_inorder.o virtio_ring_inorder + -rm ptr_ring.o ptr_ring + -rm noring.o noring + +.PHONY: all clean diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README new file mode 100644 index 000000000000..d83707a336c9 --- /dev/null +++ b/tools/virtio/ringtest/README @@ -0,0 +1,6 @@ +Partial implementation of various ring layouts, useful to tune virtio design. +Uses shared memory heavily. + +Typical use: + +# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c new file mode 100644 index 000000000000..e471d8e7cfaa --- /dev/null +++ b/tools/virtio/ringtest/main.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Command line processing and common functions for ring benchmarking. + */ +#define _GNU_SOURCE +#include <getopt.h> +#include <pthread.h> +#include <assert.h> +#include <sched.h> +#include "main.h" +#include <sys/eventfd.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <limits.h> + +int runcycles = 10000000; +int max_outstanding = INT_MAX; +int batch = 1; +int param = 0; + +bool do_sleep = false; +bool do_relax = false; +bool do_exit = true; + +unsigned ring_size = 256; + +static int kickfd = -1; +static int callfd = -1; + +void notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = write(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void wait_for_notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = read(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void kick(void) +{ + notify(kickfd); +} + +void wait_for_kick(void) +{ + wait_for_notify(kickfd); +} + +void call(void) +{ + notify(callfd); +} + +void wait_for_call(void) +{ + wait_for_notify(callfd); +} + +void set_affinity(const char *arg) +{ + cpu_set_t cpuset; + int ret; + pthread_t self; + long int cpu; + char *endptr; + + if (!arg) + return; + + cpu = strtol(arg, &endptr, 0); + assert(!*endptr); + + assert(cpu >= 0 && cpu < CPU_SETSIZE); + + self = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); + assert(!ret); +} + +void poll_used(void) +{ + while (used_empty()) + busy_wait(); +} + +static void __attribute__((__flatten__)) run_guest(void) +{ + int completed_before; + int completed = 0; + int started = 0; + int bufs = runcycles; + int spurious = 0; + int r; + unsigned len; + void *buf; + int tokick = batch; + + for (;;) { + if (do_sleep) + disable_call(); + completed_before = completed; + do { + if (started < bufs && + started - completed < max_outstanding) { + r = add_inbuf(0, "Buffer\n", "Hello, world!"); + if (__builtin_expect(r == 0, true)) { + ++started; + if (!--tokick) { + tokick = batch; + if (do_sleep) + kick_available(); + } + + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (get_buf(&len, &buf)) { + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + r = 0; + } + } while (r == 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (do_sleep) { + if (used_empty() && enable_call()) + wait_for_call(); + } else { + poll_used(); + } + } +} + +void poll_avail(void) +{ + while (avail_empty()) + busy_wait(); +} + +static void __attribute__((__flatten__)) run_host(void) +{ + int completed_before; + int completed = 0; + int spurious = 0; + int bufs = runcycles; + unsigned len; + void *buf; + + for (;;) { + if (do_sleep) { + if (avail_empty() && enable_kick()) + wait_for_kick(); + } else { + poll_avail(); + } + if (do_sleep) + disable_kick(); + completed_before = completed; + while (__builtin_expect(use_buf(&len, &buf), true)) { + if (do_sleep) + call_used(); + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + } + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + if (completed == bufs) + break; + } +} + +void *start_guest(void *arg) +{ + set_affinity(arg); + run_guest(); + pthread_exit(NULL); +} + +void *start_host(void *arg) +{ + set_affinity(arg); + run_host(); + pthread_exit(NULL); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'h', + }, + { + .name = "host-affinity", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "guest-affinity", + .has_arg = required_argument, + .val = 'G', + }, + { + .name = "ring-size", + .has_arg = required_argument, + .val = 'R', + }, + { + .name = "run-cycles", + .has_arg = required_argument, + .val = 'C', + }, + { + .name = "outstanding", + .has_arg = required_argument, + .val = 'o', + }, + { + .name = "batch", + .has_arg = required_argument, + .val = 'b', + }, + { + .name = "param", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "sleep", + .has_arg = no_argument, + .val = 's', + }, + { + .name = "relax", + .has_arg = no_argument, + .val = 'x', + }, + { + .name = "exit", + .has_arg = no_argument, + .val = 'e', + }, + { + } +}; + +static void help(void) +{ + fprintf(stderr, "Usage: <test> [--help]" + " [--host-affinity H]" + " [--guest-affinity G]" + " [--ring-size R (default: %u)]" + " [--run-cycles C (default: %d)]" + " [--batch b]" + " [--outstanding o]" + " [--param p]" + " [--sleep]" + " [--relax]" + " [--exit]" + "\n", + ring_size, + runcycles); +} + +int main(int argc, char **argv) +{ + int ret; + pthread_t host, guest; + void *tret; + char *host_arg = NULL; + char *guest_arg = NULL; + char *endptr; + long int c; + + kickfd = eventfd(0, 0); + assert(kickfd >= 0); + callfd = eventfd(0, 0); + assert(callfd >= 0); + + for (;;) { + int o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'H': + host_arg = optarg; + break; + case 'G': + guest_arg = optarg; + break; + case 'R': + ring_size = strtol(optarg, &endptr, 0); + assert(ring_size && !(ring_size & (ring_size - 1))); + assert(!*endptr); + break; + case 'C': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + runcycles = c; + break; + case 'o': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + max_outstanding = c; + break; + case 'p': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + param = c; + break; + case 'b': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + batch = c; + break; + case 's': + do_sleep = true; + break; + case 'x': + do_relax = true; + break; + case 'e': + do_exit = true; + break; + default: + help(); + exit(4); + break; + } + } + + /* does nothing here, used to make sure all smp APIs compile */ + smp_acquire(); + smp_release(); + smp_mb(); +done: + + if (batch > max_outstanding) + batch = max_outstanding; + + if (optind < argc) { + help(); + exit(4); + } + alloc_ring(); + + ret = pthread_create(&host, NULL, start_host, host_arg); + assert(!ret); + ret = pthread_create(&guest, NULL, start_guest, guest_arg); + assert(!ret); + + ret = pthread_join(guest, &tret); + assert(!ret); + ret = pthread_join(host, &tret); + assert(!ret); + return 0; +} diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h new file mode 100644 index 000000000000..d18dd317e27f --- /dev/null +++ b/tools/virtio/ringtest/main.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Common macros and functions for ring benchmarking. + */ +#ifndef MAIN_H +#define MAIN_H + +#include <assert.h> +#include <stdbool.h> + +extern int param; + +extern bool do_exit; + +#if defined(__x86_64__) || defined(__i386__) +#include "x86intrin.h" + +static inline void wait_cycles(unsigned long long cycles) +{ + unsigned long long t; + + t = __rdtsc(); + while (__rdtsc() - t < cycles) {} +} + +#define VMEXIT_CYCLES 500 +#define VMENTRY_CYCLES 500 + +#elif defined(__s390x__) +static inline void wait_cycles(unsigned long long cycles) +{ + asm volatile("0: brctg %0,0b" : : "d" (cycles)); +} + +/* tweak me */ +#define VMEXIT_CYCLES 200 +#define VMENTRY_CYCLES 200 + +#else +static inline void wait_cycles(unsigned long long cycles) +{ + _Exit(5); +} +#define VMEXIT_CYCLES 0 +#define VMENTRY_CYCLES 0 +#endif + +static inline void vmexit(void) +{ + if (!do_exit) + return; + + wait_cycles(VMEXIT_CYCLES); +} +static inline void vmentry(void) +{ + if (!do_exit) + return; + + wait_cycles(VMENTRY_CYCLES); +} + +/* implemented by ring */ +void alloc_ring(void); +/* guest side */ +int add_inbuf(unsigned, void *, void *); +void *get_buf(unsigned *, void **); +void disable_call(); +bool used_empty(); +bool enable_call(); +void kick_available(); +/* host side */ +void disable_kick(); +bool avail_empty(); +bool enable_kick(); +bool use_buf(unsigned *, void **); +void call_used(); + +/* implemented by main */ +extern bool do_sleep; +void kick(void); +void wait_for_kick(void); +void call(void); +void wait_for_call(void); + +extern unsigned ring_size; + +/* Compiler barrier - similar to what Linux uses */ +#define barrier() asm volatile("" ::: "memory") + +/* Is there a portable way to do this? */ +#if defined(__x86_64__) || defined(__i386__) +#define cpu_relax() asm ("rep; nop" ::: "memory") +#elif defined(__s390x__) +#define cpu_relax() barrier() +#elif defined(__aarch64__) +#define cpu_relax() asm ("yield" ::: "memory") +#else +#define cpu_relax() assert(0) +#endif + +extern bool do_relax; + +static inline void busy_wait(void) +{ + if (do_relax) + cpu_relax(); + else + /* prevent compiler from removing busy loops */ + barrier(); +} + +#if defined(__x86_64__) || defined(__i386__) +#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#elif defined(__aarch64__) +#define smp_mb() asm volatile("dmb ish" ::: "memory") +#else +/* + * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized + * with other __ATOMIC_SEQ_CST calls. + */ +#define smp_mb() __sync_synchronize() +#endif + +/* + * This abuses the atomic builtins for thread fences, and + * adds a compiler barrier. + */ +#define smp_release() do { \ + barrier(); \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ +} while (0) + +#define smp_acquire() do { \ + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ + barrier(); \ +} while (0) + +#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) +#define smp_wmb() barrier() +#elif defined(__aarch64__) +#define smp_wmb() asm volatile("dmb ishst" ::: "memory") +#else +#define smp_wmb() smp_release() +#endif + +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline)) +#endif + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; + case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; + case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; + case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break; + case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break; + case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break; + case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +#ifdef __alpha__ +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + smp_mb(); /* Enforce dependency ordering from x */ \ + __u.__val; \ +}) +#else +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) +#endif + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (typeof(x)) (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#endif diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c new file mode 100644 index 000000000000..ce2440d5ca93 --- /dev/null +++ b/tools/virtio/ringtest/noring.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include "main.h" +#include <assert.h> + +/* stub implementation: useful for measuring overhead */ +void alloc_ring(void) +{ +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + return 0; +} + +/* + * skb_array API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + return "Buffer"; +} + +bool used_empty() +{ + return false; +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +bool avail_empty() +{ + return false; +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + return true; +} + +void call_used(void) +{ + assert(0); +} diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c new file mode 100644 index 000000000000..c9b26335f891 --- /dev/null +++ b/tools/virtio/ringtest/ptr_ring.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <pthread.h> +#include <malloc.h> +#include <assert.h> +#include <errno.h> +#include <limits.h> + +#define SMP_CACHE_BYTES 64 +#define cache_line_size() SMP_CACHE_BYTES +#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES))) +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#define likely(x) (__builtin_expect(!!(x), 1)) +#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) +#define SIZE_MAX (~(size_t)0) +#define KMALLOC_MAX_SIZE SIZE_MAX + +typedef pthread_spinlock_t spinlock_t; + +typedef int gfp_t; +#define __GFP_ZERO 0x1 + +static void *kmalloc(unsigned size, gfp_t gfp) +{ + void *p = memalign(64, size); + if (!p) + return p; + + if (gfp & __GFP_ZERO) + memset(p, 0, size); + return p; +} + +static inline void *kzalloc(unsigned size, gfp_t flags) +{ + return kmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + return kmalloc(n * size, flags); +} + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kmalloc_array(n, size, flags | __GFP_ZERO); +} + +static void kfree(void *p) +{ + if (p) + free(p); +} + +#define kvmalloc_array kmalloc_array +#define kvfree kfree + +static void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#include "../../../include/linux/ptr_ring.h" + +static unsigned long long headcnt, tailcnt; +static struct ptr_ring array ____cacheline_aligned_in_smp; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret = ptr_ring_init(&array, ring_size, 0); + assert(!ret); + /* Hacky way to poke at ring internals. Useful for testing though. */ + if (param) + array.batch = param; +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + int ret; + + ret = __ptr_ring_produce(&array, buf); + if (ret >= 0) { + ret = 0; + headcnt++; + } + + return ret; +} + +/* + * ptr_ring API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + void *datap; + + if (tailcnt == headcnt || __ptr_ring_full(&array)) + datap = NULL; + else { + datap = "Buffer\n"; + ++tailcnt; + } + + return datap; +} + +bool used_empty() +{ + return (tailcnt == headcnt || __ptr_ring_full(&array)); +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +bool avail_empty() +{ + return __ptr_ring_empty(&array); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + void *ptr; + + ptr = __ptr_ring_consume(&array); + + return ptr; +} + +void call_used(void) +{ + assert(0); +} diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c new file mode 100644 index 000000000000..58e7d33bddfc --- /dev/null +++ b/tools/virtio/ringtest/ring.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Simple descriptor-based ring. virtio 0.9 compatible event index is used for + * signalling, unconditionally. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* Next - Where next entry will be written. + * Prev - "Next" value when event triggered previously. + * Event - Peer requested event after writing this entry. + */ +static inline bool need_event(unsigned short event, + unsigned short next, + unsigned short prev) +{ + return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); +} + +/* Design: + * Guest adds descriptors with unique index values and DESC_HW in flags. + * Host overwrites used descriptors with correct len, index, and DESC_HW clear. + * Flags are always set last. + */ +#define DESC_HW 0x1 + +struct desc { + unsigned short flags; + unsigned short index; + unsigned len; + unsigned long long addr; +}; + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +/* Mostly read */ +struct event { + unsigned short kick_index; + unsigned char reserved0[HOST_GUEST_PADDING - 2]; + unsigned short call_index; + unsigned char reserved1[HOST_GUEST_PADDING - 2]; +}; + +struct data { + void *buf; /* descriptor is writeable, we can't get buf from there */ + void *data; +} *data; + +struct desc *ring; +struct event *event; + +struct guest { + unsigned avail_idx; + unsigned last_used_idx; + unsigned num_free; + unsigned kicked_avail_idx; + unsigned char reserved[HOST_GUEST_PADDING - 12]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned used_idx; + unsigned called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + + ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + event = calloc(1, sizeof(*event)); + if (!event) { + perror("Unable to allocate event buffer.\n"); + exit(3); + } + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + host.used_idx = 0; + host.called_used_idx = -1; + for (i = 0; i < ring_size; ++i) { + struct desc desc = { + .index = i, + }; + ring[i] = desc; + } + guest.num_free = ring_size; + data = calloc(ring_size, sizeof(*data)); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, index; + + if (!guest.num_free) + return -1; + + guest.num_free--; + head = (ring_size - 1) & (guest.avail_idx++); + + /* Start with a write. On MESI architectures this helps + * avoid a shared state with consumer that is polling this descriptor. + */ + ring[head].addr = (unsigned long)(void*)buf; + ring[head].len = len; + /* read below might bypass write above. That is OK because it's just an + * optimization. If this happens, we will get the cache line in a + * shared state which is unfortunate, but probably not worth it to + * add an explicit full barrier to avoid this. + */ + barrier(); + index = ring[head].index; + data[index].buf = buf; + data[index].data = datap; + /* Barrier A (for pairing) */ + smp_release(); + ring[head].flags = DESC_HW; + + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + unsigned index; + void *datap; + + if (ring[head].flags & DESC_HW) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + *lenp = ring[head].len; + index = ring[head].index & (ring_size - 1); + datap = data[index].data; + *bufp = data[index].buf; + data[index].buf = NULL; + data[index].data = NULL; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +bool used_empty() +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + return (ring[head].flags & DESC_HW); +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + event->call_index = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return used_empty(); +} + +void kick_available(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + need = need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx); + + guest.kicked_avail_idx = guest.avail_idx; + if (need) + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + event->kick_index = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return avail_empty(); +} + +bool avail_empty() +{ + unsigned head = (ring_size - 1) & host.used_idx; + + return !(ring[head].flags & DESC_HW); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + if (!(ring[head].flags & DESC_HW)) + return false; + + /* make sure length read below is not speculated */ + /* Barrier A (for pairing) */ + smp_acquire(); + + /* simple in-order completion: we don't need + * to touch index at all. This also means we + * can just modify the descriptor in-place. + */ + ring[head].len--; + /* Make sure len is valid before flags. + * Note: alternative is to write len and flags in one access - + * possible on 64 bit architectures but wmb is free on Intel anyway + * so I have no way to test whether it's a gain. + */ + /* Barrier B (for pairing) */ + smp_release(); + ring[head].flags = 0; + host.used_idx++; + return true; +} + +void call_used(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + + need = need_event(event->call_index, + host.used_idx, + host.called_used_idx); + + host.called_used_idx = host.used_idx; + + if (need) + call(); +} diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh new file mode 100755 index 000000000000..dcc3ea758f48 --- /dev/null +++ b/tools/virtio/ringtest/run-on-all.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#') +#use last CPU for host. Why not the first? +#many devices tend to use cpu0 by default so +#it tends to be busier +HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1) + +#run command on all cpus +for cpu in $CPUS_ONLINE +do + #Don't run guest and host on same CPU + #It actually works ok if using signalling + if + (echo "$@" | grep -e "--sleep" > /dev/null) || \ + test $HOST_AFFINITY '!=' $cpu + then + echo "GUEST AFFINITY $cpu" + "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu + fi +done +echo "NO GUEST AFFINITY" +"$@" --host-affinity $HOST_AFFINITY +echo "NO AFFINITY" +"$@" diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c new file mode 100644 index 000000000000..13a035a390e9 --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * Partial implementation of virtio 0.9. event index is used for signalling, + * unconditionally. Design roughly follows linux kernel implementation in order + * to be able to judge its performance. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <linux/virtio_ring.h> + +struct data { + void *data; +} *data; + +struct vring ring; + +/* enabling the below activates experimental ring polling code + * (which skips index reads on consumer in favor of looking at + * high bits of ring id ^ 0x8000). + */ +/* #ifdef RING_POLL */ +/* enabling the below activates experimental in-order code + * (which skips ring updates and reads and writes len in descriptor). + */ +/* #ifdef INORDER */ + +#if defined(RING_POLL) && defined(INORDER) +#error "RING_POLL and INORDER are mutually exclusive" +#endif + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +struct guest { + unsigned short avail_idx; + unsigned short last_used_idx; + unsigned short num_free; + unsigned short kicked_avail_idx; +#ifndef INORDER + unsigned short free_head; +#else + unsigned short reserved_free_head; +#endif + unsigned char reserved[HOST_GUEST_PADDING - 10]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned short used_idx; + unsigned short called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + void *p; + + ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + memset(p, 0, vring_size(ring_size, 0x1000)); + vring_init(&ring, ring_size, p, 0x1000); + + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; +#ifndef INORDER + /* Put everything in free lists. */ + guest.free_head = 0; +#endif + for (i = 0; i < ring_size - 1; i++) + ring.desc[i].next = i + 1; + host.used_idx = 0; + host.called_used_idx = -1; + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head; +#ifndef INORDER + unsigned avail; +#endif + struct vring_desc *desc; + + if (!guest.num_free) + return -1; + +#ifdef INORDER + head = (ring_size - 1) & (guest.avail_idx++); +#else + head = guest.free_head; +#endif + guest.num_free--; + + desc = ring.desc; + desc[head].flags = VRING_DESC_F_NEXT; + desc[head].addr = (unsigned long)(void *)buf; + desc[head].len = len; + /* We do it like this to simulate the way + * we'd have to flip it if we had multiple + * descriptors. + */ + desc[head].flags &= ~VRING_DESC_F_NEXT; +#ifndef INORDER + guest.free_head = desc[head].next; +#endif + + data[head].data = datap; + +#ifdef RING_POLL + /* Barrier A (for pairing) */ + smp_release(); + avail = guest.avail_idx++; + ring.avail->ring[avail & (ring_size - 1)] = + (head | (avail & ~(ring_size - 1))) ^ 0x8000; +#else +#ifndef INORDER + /* Barrier A (for pairing) */ + smp_release(); + avail = (ring_size - 1) & (guest.avail_idx++); + ring.avail->ring[avail] = head; +#endif + /* Barrier A (for pairing) */ + smp_release(); +#endif + ring.avail->idx = guest.avail_idx; + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head; + unsigned index; + void *datap; + +#ifdef RING_POLL + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + index &= ring_size - 1; +#else + if (ring.used->idx == guest.last_used_idx) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); +#ifdef INORDER + head = (ring_size - 1) & guest.last_used_idx; + index = head; +#else + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; +#endif + +#endif +#ifdef INORDER + *lenp = ring.desc[index].len; +#else + *lenp = ring.used->ring[head].len; +#endif + datap = data[index].data; + *bufp = (void*)(unsigned long)ring.desc[index].addr; + data[index].data = NULL; +#ifndef INORDER + ring.desc[index].next = guest.free_head; + guest.free_head = index; +#endif + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +bool used_empty() +{ + unsigned short last_used_idx = guest.last_used_idx; +#ifdef RING_POLL + unsigned short head = last_used_idx & (ring_size - 1); + unsigned index = ring.used->ring[head].id; + + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); +#else + return ring.used->idx == last_used_idx; +#endif +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + vring_used_event(&ring) = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return used_empty(); +} + +void kick_available(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + need = vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx); + + guest.kicked_avail_idx = guest.avail_idx; + if (need) + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + vring_avail_event(&ring) = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return avail_empty(); +} + +bool avail_empty() +{ + unsigned head = host.used_idx; +#ifdef RING_POLL + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + + return ((index ^ head ^ 0x8000) & ~(ring_size - 1)); +#else + return head == ring.avail->idx; +#endif +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned used_idx = host.used_idx; + struct vring_desc *desc; + unsigned head; + +#ifdef RING_POLL + head = ring.avail->ring[used_idx & (ring_size - 1)]; + if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) + return false; + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + desc = &ring.desc[head & (ring_size - 1)]; +#else + if (used_idx == ring.avail->idx) + return false; + + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; +#ifdef INORDER + head = used_idx; +#else + head = ring.avail->ring[used_idx]; +#endif + desc = &ring.desc[head]; +#endif + + *lenp = desc->len; + *bufp = (void *)(unsigned long)desc->addr; + +#ifdef INORDER + desc->len = desc->len - 1; +#else + /* now update used ring */ + ring.used->ring[used_idx].id = head; + ring.used->ring[used_idx].len = desc->len - 1; +#endif + /* Barrier B (for pairing) */ + smp_release(); + host.used_idx++; + ring.used->idx = host.used_idx; + + return true; +} + +void call_used(void) +{ + bool need; + + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + need = vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx); + + host.called_used_idx = host.used_idx; + if (need) + call(); +} diff --git a/tools/virtio/ringtest/virtio_ring_inorder.c b/tools/virtio/ringtest/virtio_ring_inorder.c new file mode 100644 index 000000000000..2438ca58a2ad --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_inorder.c @@ -0,0 +1,2 @@ +#define INORDER 1 +#include "virtio_ring_0_9.c" diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c new file mode 100644 index 000000000000..84fc2c557aaa --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_poll.c @@ -0,0 +1,2 @@ +#define RING_POLL 1 +#include "virtio_ring_0_9.c" diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h index 4d99c78234d3..cf50b2e5ff02 100644 --- a/tools/virtio/uapi/linux/virtio_ring.h +++ b/tools/virtio/uapi/linux/virtio_ring.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef VIRTIO_RING_H #define VIRTIO_RING_H #include "../../../../include/uapi/linux/virtio_ring.h" diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h new file mode 100644 index 000000000000..e7a1096e7c97 --- /dev/null +++ b/tools/virtio/uapi/linux/virtio_types.h @@ -0,0 +1 @@ +#include "../../include/uapi/linux/virtio_types.h" diff --git a/tools/virtio/vhost_net_test.c b/tools/virtio/vhost_net_test.c new file mode 100644 index 000000000000..389d99a6d7c7 --- /dev/null +++ b/tools/virtio/vhost_net_test.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <getopt.h> +#include <limits.h> +#include <string.h> +#include <poll.h> +#include <sys/eventfd.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <stdbool.h> +#include <linux/vhost.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/in.h> +#include <linux/if_packet.h> +#include <linux/virtio_net.h> +#include <netinet/ether.h> + +#define HDR_LEN sizeof(struct virtio_net_hdr_mrg_rxbuf) +#define TEST_BUF_LEN 256 +#define TEST_PTYPE ETH_P_LOOPBACK +#define DESC_NUM 256 + +/* Used by implementation of kmalloc() in tools/virtio/linux/kernel.h */ +void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; + +struct vq_info { + int kick; + int call; + int idx; + long started; + long completed; + struct pollfd fds; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct vq_info vqs[2]; + int nvqs; + void *buf; + size_t buf_size; + char *test_buf; + char *res_buf; + struct vhost_memory *mem; + int sock; + int ifindex; + unsigned char mac[ETHER_ADDR_LEN]; +}; + +static int tun_alloc(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + int len = HDR_LEN; + int fd, e; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("Cannot open /dev/net/tun"); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + + e = ioctl(fd, TUNSETIFF, &ifr); + if (e < 0) { + perror("ioctl[TUNSETIFF]"); + close(fd); + return e; + } + + e = ioctl(fd, TUNSETVNETHDRSZ, &len); + if (e < 0) { + perror("ioctl[TUNSETVNETHDRSZ]"); + close(fd); + return e; + } + + e = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (e < 0) { + perror("ioctl[SIOCGIFHWADDR]"); + close(fd); + return e; + } + + memcpy(dev->mac, &ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); + return fd; +} + +static void vdev_create_socket(struct vdev_info *dev, char *tun_name) +{ + struct ifreq ifr; + + dev->sock = socket(AF_PACKET, SOCK_RAW, htons(TEST_PTYPE)); + assert(dev->sock != -1); + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ); + assert(ioctl(dev->sock, SIOCGIFINDEX, &ifr) >= 0); + + dev->ifindex = ifr.ifr_ifindex; + + /* Set the flags that bring the device up */ + assert(ioctl(dev->sock, SIOCGIFFLAGS, &ifr) >= 0); + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + assert(ioctl(dev->sock, SIOCSIFFLAGS, &ifr) >= 0); +} + +static void vdev_send_packet(struct vdev_info *dev) +{ + char *sendbuf = dev->test_buf + HDR_LEN; + struct sockaddr_ll saddrll = {0}; + int sockfd = dev->sock; + int ret; + + saddrll.sll_family = PF_PACKET; + saddrll.sll_ifindex = dev->ifindex; + saddrll.sll_halen = ETH_ALEN; + saddrll.sll_protocol = htons(TEST_PTYPE); + + ret = sendto(sockfd, sendbuf, TEST_BUF_LEN, 0, + (struct sockaddr *)&saddrll, + sizeof(struct sockaddr_ll)); + assert(ret >= 0); +} + +static bool vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + + r = write(info->kick, &v, sizeof(v)); + assert(r == sizeof(v)); + + return true; +} + +static void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + int r; + + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); +} + +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, NULL, "test"); + assert(info->vq); + info->vq->priv = info; +} + +static void vq_info_add(struct vdev_info *dev, int idx, int num, int fd) +{ + struct vhost_vring_file backend = { .index = idx, .fd = fd }; + struct vq_info *info = &dev->vqs[idx]; + int r; + + info->idx = idx; + info->kick = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + vq_reset(info, num, &dev->vdev); + vhost_vq_setup(dev, info); + + r = ioctl(dev->control, VHOST_NET_SET_BACKEND, &backend); + assert(!r); +} + +static void vdev_info_init(struct vdev_info *dev, unsigned long long features) +{ + struct ether_header *eh; + int i, r; + + dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); + + dev->buf_size = (HDR_LEN + TEST_BUF_LEN) * 2; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->test_buf = dev->buf; + dev->res_buf = dev->test_buf + HDR_LEN + TEST_BUF_LEN; + + memset(dev->test_buf, 0, HDR_LEN + TEST_BUF_LEN); + eh = (struct ether_header *)(dev->test_buf + HDR_LEN); + eh->ether_type = htons(TEST_PTYPE); + memcpy(eh->ether_dhost, dev->mac, ETHER_ADDR_LEN); + memcpy(eh->ether_shost, dev->mac, ETHER_ADDR_LEN); + + for (i = sizeof(*eh); i < TEST_BUF_LEN; i++) + dev->test_buf[i + HDR_LEN] = (char)i; + + dev->control = open("/dev/vhost-net", O_RDWR); + assert(dev->control >= 0); + + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof(dev->mem->regions[0])); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); + + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + + dev->nvqs = 2; +} + +static void wait_for_interrupt(struct vq_info *vq) +{ + unsigned long long val; + + poll(&vq->fds, 1, 100); + + if (vq->fds.revents & POLLIN) + read(vq->fds.fd, &val, sizeof(val)); +} + +static void verify_res_buf(char *res_buf) +{ + int i; + + for (i = ETHER_HDR_LEN; i < TEST_BUF_LEN; i++) + assert(res_buf[i] == (char)i); +} + +static void run_tx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + virtqueue_disable_cb(vq->vq); + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->test_buf, HDR_LEN + TEST_BUF_LEN); + r = virtqueue_add_outbuf(vq->vq, &sl, 1, + dev->test_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + int n; + + n = recvfrom(dev->sock, dev->res_buf, TEST_BUF_LEN, 0, NULL, NULL); + assert(n == TEST_BUF_LEN); + verify_res_buf(dev->res_buf); + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + + if (delayed) { + if (virtqueue_enable_cb_delayed(vq->vq)) + wait_for_interrupt(vq); + } else { + if (virtqueue_enable_cb(vq->vq)) + wait_for_interrupt(vq); + } + } + printf("TX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static void run_rx_test(struct vdev_info *dev, struct vq_info *vq, + bool delayed, int bufs) +{ + long long spurious = 0; + struct scatterlist sl; + unsigned int len; + int r; + + for (;;) { + long started_before = vq->started; + long completed_before = vq->completed; + + do { + while (vq->started < bufs && + (vq->started - vq->completed) < 1) { + sg_init_one(&sl, dev->res_buf, HDR_LEN + TEST_BUF_LEN); + + r = virtqueue_add_inbuf(vq->vq, &sl, 1, + dev->res_buf + vq->started, + GFP_ATOMIC); + if (unlikely(r != 0)) + break; + + ++vq->started; + + vdev_send_packet(dev); + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (vq->started >= bufs) + r = -1; + + /* Flush out completed bufs if any */ + while (virtqueue_get_buf(vq->vq, &len)) { + struct ether_header *eh; + + eh = (struct ether_header *)(dev->res_buf + HDR_LEN); + + /* tun netdev is up and running, only handle the + * TEST_PTYPE packet. + */ + if (eh->ether_type == htons(TEST_PTYPE)) { + assert(len == TEST_BUF_LEN + HDR_LEN); + verify_res_buf(dev->res_buf + HDR_LEN); + } + + ++vq->completed; + r = 0; + } + } while (r == 0); + + if (vq->completed == completed_before && vq->started == started_before) + ++spurious; + + assert(vq->completed <= bufs); + assert(vq->started <= bufs); + if (vq->completed == bufs) + break; + } + + printf("RX spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, vq->started, vq->completed); +} + +static const char optstring[] = "h"; +static const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { + .name = "delayed-interrupt", + .val = 'D', + }, + { + .name = "no-delayed-interrupt", + .val = 'd', + }, + { + .name = "buf-num", + .val = 'n', + .has_arg = required_argument, + }, + { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, + { + } +}; + +static void help(int status) +{ + fprintf(stderr, "Usage: vhost_net_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + " [--no-virtio-1]" + " [--delayed-interrupt]" + " [--buf-num]" + "\n"); + + exit(status); +} + +int main(int argc, char **argv) +{ + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + char tun_name[IFNAMSIZ]; + long nbufs = 0x100000; + struct vdev_info dev; + bool delayed = false; + int o, fd; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; + case 'h': + help(0); + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; + case 'D': + delayed = true; + break; + case 'n': + nbufs = strtol(optarg, NULL, 10); + assert(nbufs > 0); + break; + default: + assert(0); + break; + } + } + +done: + memset(&dev, 0, sizeof(dev)); + snprintf(tun_name, IFNAMSIZ, "tun_%d", getpid()); + + fd = tun_alloc(&dev, tun_name); + assert(fd >= 0); + + vdev_info_init(&dev, features); + vq_info_add(&dev, 0, DESC_NUM, fd); + vq_info_add(&dev, 1, DESC_NUM, fd); + vdev_create_socket(&dev, tun_name); + + run_rx_test(&dev, &dev.vqs[0], delayed, nbufs); + run_tx_test(&dev, &dev.vqs[1], delayed, nbufs); + + return 0; +} diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile index a1d35b81b314..94d3aff987dc 100644 --- a/tools/virtio/vhost_test/Makefile +++ b/tools/virtio/vhost_test/Makefile @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-m += vhost_test.o EXTRA_CFLAGS += -Idrivers/vhost diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile index 0d2381633475..7843ebcda71d 100644 --- a/tools/virtio/virtio-trace/Makefile +++ b/tools/virtio/virtio-trace/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 CC = gcc CFLAGS = -O2 -Wall -pthread diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README index b64845b823ab..0127ff0c54b0 100644 --- a/tools/virtio/virtio-trace/README +++ b/tools/virtio/virtio-trace/README @@ -61,7 +61,7 @@ and id=channel0,name=agent-ctl-path\ ##data path## -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ - -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,\ id=channel1,name=trace-path-cpu0\ ... @@ -95,7 +95,7 @@ Run 1) Enable ftrace in the guest <Example> - # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + # echo 1 > /sys/kernel/tracing/events/sched/enable 2) Run trace agent in the guest This agent must be operated as root. diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c index a2d0403c4f94..39860be6e2d8 100644 --- a/tools/virtio/virtio-trace/trace-agent-ctl.c +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Controller of read/write threads for virtio-trace * * Copyright (C) 2012 Hitachi, Ltd. * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> - * - * Licensed under GPL version 2 only. - * */ #define _GNU_SOURCE @@ -77,7 +75,7 @@ static int wait_order(int ctl_fd) if (ret) break; - }; + } return ret; diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c index 3aace5ea4842..ddfe7875eb16 100644 --- a/tools/virtio/virtio-trace/trace-agent-rw.c +++ b/tools/virtio/virtio-trace/trace-agent-rw.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Read/write thread of a guest agent for virtio-trace * * Copyright (C) 2012 Hitachi, Ltd. * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> - * - * Licensed under GPL version 2 only. - * */ #define _GNU_SOURCE diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c index 0a0a7dd4eff7..7e2d9bbf0b84 100644 --- a/tools/virtio/virtio-trace/trace-agent.c +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Guest agent for virtio-trace * * Copyright (C) 2012 Hitachi, Ltd. * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> - * - * Licensed under GPL version 2 only. - * */ #define _GNU_SOURCE @@ -20,8 +18,9 @@ #define PIPE_DEF_BUFS 16 #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) #define PIPE_MAX_SIZE (1024*1024) -#define READ_PATH_FMT \ - "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define TRACEFS "/sys/kernel/tracing" +#define DEBUGFS "/sys/kernel/debug/tracing" +#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw" #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" #define CTL_PATH "/dev/virtio-ports/agent-ctl-path" @@ -122,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path) if (this_is_write_path) /* write(output) path */ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); - else + else { /* read(input) path */ - ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num); + if (ret > 0 && access(buf, F_OK) != 0) + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num); + } if (ret <= 0) { pr_err("Failed to generate %s path(CPU#%d):%d\n", diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h index 8de79bfeaa73..e67885969f0e 100644 --- a/tools/virtio/virtio-trace/trace-agent.h +++ b/tools/virtio/virtio-trace/trace-agent.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __TRACE_AGENT_H__ #define __TRACE_AGENT_H__ #include <pthread.h> diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index da7a19558281..028f54e6854a 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -1,5 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <getopt.h> +#include <limits.h> #include <string.h> #include <poll.h> #include <sys/eventfd.h> @@ -11,11 +13,14 @@ #include <sys/types.h> #include <fcntl.h> #include <stdbool.h> +#include <linux/virtio_types.h> #include <linux/vhost.h> #include <linux/virtio.h> #include <linux/virtio_ring.h> #include "../../drivers/vhost/test.h" +#define RANDOM_BATCH -1 + /* Unused */ void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end; @@ -41,13 +46,18 @@ struct vdev_info { struct vhost_memory *mem; }; -void vq_notify(struct virtqueue *vq) +static const struct vhost_vring_file no_backend = { .fd = -1 }, + backend = { .fd = 1 }; +static const struct vhost_vring_state null_state = {}; + +bool vq_notify(struct virtqueue *vq) { struct vq_info *info = vq->priv; unsigned long long v = 1; int r; r = write(info->kick, &v, sizeof v); assert(r == sizeof v); + return true; } void vq_callback(struct virtqueue *vq) @@ -59,7 +69,7 @@ void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) { struct vhost_vring_state state = { .index = info->idx }; struct vhost_vring_file file = { .index = info->idx }; - unsigned long long features = dev->vdev.features[0]; + unsigned long long features = dev->vdev.features; struct vhost_vring_addr addr = { .index = info->idx, .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, @@ -85,6 +95,19 @@ void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) assert(r >= 0); } +static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) +{ + if (info->vq) + vring_del_virtqueue(info->vq); + + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false, + info->ring, vq_notify, vq_callback, "test"); + assert(info->vq); + info->vq->priv = info; +} + static void vq_info_add(struct vdev_info *dev, int num) { struct vq_info *info = &dev->vqs[dev->nvqs]; @@ -94,14 +117,7 @@ static void vq_info_add(struct vdev_info *dev, int num) info->call = eventfd(0, EFD_NONBLOCK); r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); assert(r >= 0); - memset(info->ring, 0, vring_size(num, 4096)); - vring_init(&info->vring, num, info->ring, 4096); - info->vq = vring_new_virtqueue(info->idx, - info->vring.num, 4096, &dev->vdev, - true, info->ring, - vq_notify, vq_callback, "test"); - assert(info->vq); - info->vq->priv = info; + vq_reset(info, num, &dev->vdev); vhost_vq_setup(dev, info); dev->fds[info->idx].fd = info->call; dev->fds[info->idx].events = POLLIN; @@ -112,12 +128,13 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features) { int r; memset(dev, 0, sizeof *dev); - dev->vdev.features[0] = features; - dev->vdev.features[1] = features >> 32; + dev->vdev.features = features; + INIT_LIST_HEAD(&dev->vdev.vqs); + spin_lock_init(&dev->vdev.vqs_list_lock); dev->buf_size = 1024; dev->buf = malloc(dev->buf_size); assert(dev->buf); - dev->control = open("/dev/vhost-test", O_RDWR); + dev->control = open("/dev/vhost-test", O_RDWR); assert(dev->control >= 0); r = ioctl(dev->control, VHOST_SET_OWNER, NULL); assert(r >= 0); @@ -150,40 +167,93 @@ static void wait_for_interrupt(struct vdev_info *dev) } static void run_test(struct vdev_info *dev, struct vq_info *vq, - bool delayed, int bufs) + bool delayed, int batch, int reset_n, int bufs) { struct scatterlist sl; - long started = 0, completed = 0; - long completed_before; + long started = 0, completed = 0, next_reset = reset_n; + long completed_before, started_before; int r, test = 1; - unsigned len; + unsigned int len; long long spurious = 0; + const bool random_batch = batch == RANDOM_BATCH; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); + if (!reset_n) { + next_reset = INT_MAX; + } + for (;;) { virtqueue_disable_cb(vq->vq); completed_before = completed; + started_before = started; do { - if (started < bufs) { + const bool reset = completed > next_reset; + if (random_batch) + batch = (random() % vq->vring.num) + 1; + + while (started < bufs && + (started - completed) < batch) { sg_init_one(&sl, dev->buf, dev->buf_size); r = virtqueue_add_outbuf(vq->vq, &sl, 1, dev->buf + started, GFP_ATOMIC); - if (likely(r == 0)) { - ++started; - virtqueue_kick(vq->vq); + if (unlikely(r != 0)) { + if (r == -ENOSPC && + started > started_before) + r = 0; + else + r = -1; + break; } - } else + + ++started; + + if (unlikely(!virtqueue_kick(vq->vq))) { + r = -1; + break; + } + } + + if (started >= bufs) r = -1; + if (reset) { + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, + &no_backend); + assert(!r); + } + /* Flush out completed bufs if any */ - if (virtqueue_get_buf(vq->vq, &len)) { + while (virtqueue_get_buf(vq->vq, &len)) { ++completed; r = 0; } + if (reset) { + struct vhost_vring_state s = { .index = 0 }; + + vq_reset(vq, vq->vring.num, &dev->vdev); + + r = ioctl(dev->control, VHOST_GET_VRING_BASE, + &s); + assert(!r); + + s.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, + &null_state); + assert(!r); + + r = ioctl(dev->control, VHOST_TEST_SET_BACKEND, + &backend); + assert(!r); + + started = completed; + while (completed > next_reset) + next_reset += completed; + } } while (r == 0); - if (completed == completed_before) + if (completed == completed_before && started == started_before) ++spurious; assert(completed <= bufs); assert(started <= bufs); @@ -200,7 +270,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, test = 0; r = ioctl(dev->control, VHOST_TEST_RUN, &test); assert(r >= 0); - fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); + fprintf(stderr, + "spurious wakeups: 0x%llx started=0x%lx completed=0x%lx\n", + spurious, started, completed); } const char optstring[] = "h"; @@ -226,6 +298,14 @@ const struct option longopts[] = { .val = 'i', }, { + .name = "virtio-1", + .val = '1', + }, + { + .name = "no-virtio-1", + .val = '0', + }, + { .name = "delayed-interrupt", .val = 'D', }, @@ -234,23 +314,39 @@ const struct option longopts[] = { .val = 'd', }, { + .name = "batch", + .val = 'b', + .has_arg = required_argument, + }, + { + .name = "reset", + .val = 'r', + .has_arg = optional_argument, + }, + { } }; -static void help(void) +static void help(int status) { fprintf(stderr, "Usage: virtio_test [--help]" " [--no-indirect]" " [--no-event-idx]" + " [--no-virtio-1]" " [--delayed-interrupt]" + " [--batch=random/N]" + " [--reset=N]" "\n"); + + exit(status); } int main(int argc, char **argv) { struct vdev_info dev; unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | - (1ULL << VIRTIO_RING_F_EVENT_IDX); + (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1); + long batch = 1, reset = 0; int o; bool delayed = false; @@ -260,20 +356,39 @@ int main(int argc, char **argv) case -1: goto done; case '?': - help(); - exit(2); + help(2); case 'e': features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); break; case 'h': - help(); - goto done; + help(0); case 'i': features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); break; + case '0': + features &= ~(1ULL << VIRTIO_F_VERSION_1); + break; case 'D': delayed = true; break; + case 'b': + if (0 == strcmp(optarg, "random")) { + batch = RANDOM_BATCH; + } else { + batch = strtol(optarg, NULL, 10); + assert(batch > 0); + assert(batch < (long)INT_MAX + 1); + } + break; + case 'r': + if (!optarg) { + reset = 1; + } else { + reset = strtol(optarg, NULL, 10); + assert(reset > 0); + assert(reset < (long)INT_MAX + 1); + } + break; default: assert(0); break; @@ -283,6 +398,6 @@ int main(int argc, char **argv) done: vdev_info_init(&dev, features); vq_info_add(&dev, 256); - run_test(&dev, &dev.vqs[0], delayed, 0x100000); + run_test(&dev, &dev.vqs[0], delayed, batch, reset, 0x100000); return 0; } diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index d053ea40c001..b9591223437a 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Simple test of virtio code, entirely in userpsace. */ #define _GNU_SOURCE #include <sched.h> @@ -7,6 +8,7 @@ #include <linux/virtio.h> #include <linux/vringh.h> #include <linux/virtio_ring.h> +#include <linux/virtio_config.h> #include <linux/uaccess.h> #include <sys/types.h> #include <sys/stat.h> @@ -22,7 +24,7 @@ static u64 user_addr_offset; #define RINGSIZE 256 #define ALIGN 4096 -static void never_notify_host(struct virtqueue *vq) +static bool never_notify_host(struct virtqueue *vq) { abort(); } @@ -65,17 +67,22 @@ struct guest_virtio_device { unsigned long notifies; }; -static void parallel_notify_host(struct virtqueue *vq) +static bool parallel_notify_host(struct virtqueue *vq) { + int rc; struct guest_virtio_device *gvdev; gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev); - write(gvdev->to_host_fd, "", 1); + rc = write(gvdev->to_host_fd, "", 1); + if (rc < 0) + return false; gvdev->notifies++; + return true; } -static void no_notify_host(struct virtqueue *vq) +static bool no_notify_host(struct virtqueue *vq) { + return true; } #define NUM_XFERS (10000000) @@ -126,13 +133,13 @@ static inline int vringh_get_head(struct vringh *vrh, u16 *head) return 1; } -static int parallel_test(unsigned long features, +static int parallel_test(u64 features, bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r), bool fast_vringh) { void *host_map, *guest_map; - int fd, mapsize, to_guest[2], to_host[2]; + int pipe_ret, fd, mapsize, to_guest[2], to_host[2]; unsigned long xfers = 0, notifies = 0, receives = 0; unsigned int first_cpu, last_cpu; cpu_set_t cpu_set; @@ -154,8 +161,11 @@ static int parallel_test(unsigned long features, host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - pipe(to_guest); - pipe(to_host); + pipe_ret = pipe(to_guest); + assert(!pipe_ret); + + pipe_ret = pipe(to_host); + assert(!pipe_ret); CPU_ZERO(&cpu_set); find_cpus(&first_cpu, &last_cpu); @@ -299,7 +309,9 @@ static int parallel_test(unsigned long features, close(to_guest[1]); close(to_host[0]); - gvdev.vdev.features[0] = features; + gvdev.vdev.features = features; + INIT_LIST_HEAD(&gvdev.vdev.vqs); + spin_lock_init(&gvdev.vdev.vqs_list_lock); gvdev.to_host_fd = to_host[1]; gvdev.notifies = 0; @@ -308,7 +320,8 @@ static int parallel_test(unsigned long features, err(1, "Could not set affinity to cpu %u", first_cpu); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true, - guest_map, fast_vringh ? no_notify_host + false, guest_map, + fast_vringh ? no_notify_host : parallel_notify_host, never_callback_guest, "guest vq"); @@ -444,13 +457,17 @@ int main(int argc, char *argv[]) bool fast_vringh = false, parallel = false; getrange = getrange_iov; - vdev.features[0] = 0; + vdev.features = 0; + INIT_LIST_HEAD(&vdev.vqs); + spin_lock_init(&vdev.vqs_list_lock); while (argv[1]) { if (strcmp(argv[1], "--indirect") == 0) - vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC); + __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); else if (strcmp(argv[1], "--eventidx") == 0) - vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX); + __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX); + else if (strcmp(argv[1], "--virtio-1") == 0) + __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1); else if (strcmp(argv[1], "--slow-range") == 0) getrange = getrange_slow; else if (strcmp(argv[1], "--fast-vringh") == 0) @@ -463,7 +480,7 @@ int main(int argc, char *argv[]) } if (parallel) - return parallel_test(vdev.features[0], getrange, fast_vringh); + return parallel_test(vdev.features, getrange, fast_vringh); if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0) abort(); @@ -471,14 +488,14 @@ int main(int argc, char *argv[]) memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN)); /* Set up guest side. */ - vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, + vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); /* Set up host side. */ vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN); - vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true, + vringh_init_user(&vrh, vdev.features, RINGSIZE, true, vrh.vring.desc, vrh.vring.avail, vrh.vring.used); /* No descriptor to get yet... */ @@ -502,7 +519,7 @@ int main(int argc, char *argv[]) errx(1, "virtqueue_add_sgs: %i", err); __kmalloc_fake = NULL; - /* Host retreives it. */ + /* Host retrieves it. */ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); @@ -647,15 +664,15 @@ int main(int argc, char *argv[]) } /* Test weird (but legal!) indirect. */ - if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) { + if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) { char *data = __user_addr_max - USER_MEM/4; struct vring_desc *d = __user_addr_max - USER_MEM/2; struct vring vring; /* Force creation of direct, which we modify. */ - vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); + __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC); vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, - __user_addr_min, + false, __user_addr_min, never_notify_host, never_callback_guest, "guest vq"); diff --git a/tools/virtio/xen/xen.h b/tools/virtio/xen/xen.h new file mode 100644 index 000000000000..f569387d1403 --- /dev/null +++ b/tools/virtio/xen/xen.h @@ -0,0 +1,6 @@ +#ifndef XEN_XEN_STUB_H +#define XEN_XEN_STUB_H + +#define xen_domain() 0 + +#endif |
