diff options
| author | David S. Miller <davem@davemloft.net> | 2023-10-15 13:19:43 +0100 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2023-10-15 13:19:43 +0100 |
| commit | 4b714fd1a05b26665badf19e484878eb80f0417a (patch) | |
| tree | 64d420ecfc7eec85eaf44c9435fd1918670dd588 /tools | |
| parent | 85605fb694f084ba017c93c150e668882445ce73 (diff) | |
| parent | 8d211285c6d48baca4934c54b1965d4e75ce35e2 (diff) | |
Merge branch 'vsock-virtio-vhost-zerocopy'
Arseniy Krasnov says:
====================
vsock/virtio/vhost: MSG_ZEROCOPY preparations
this patchset is first of three parts of another big patchset for
MSG_ZEROCOPY flag support:
https://lore.kernel.org/netdev/20230701063947.3422088-1-AVKrasnov@sberdevices.ru/
During review of this series, Stefano Garzarella <sgarzare@redhat.com>
suggested to split it for three parts to simplify review and merging:
1) virtio and vhost updates (for fragged skbs) <--- this patchset
2) AF_VSOCK updates (allows to enable MSG_ZEROCOPY mode and read
tx completions) and update for Documentation/.
3) Updates for tests and utils.
This series enables handling of fragged skbs in virtio and vhost parts.
Newly logic won't be triggered, because SO_ZEROCOPY options is still
impossible to enable at this moment (next bunch of patches from big
set above will enable it).
I've included changelog to some patches anyway, because there were some
comments during review of last big patchset from the link above.
Head for this patchset is:
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=f2fa1c812c91e99d0317d1fc7d845e1e05f39716
Link to v1:
https://lore.kernel.org/netdev/20230717210051.856388-1-AVKrasnov@sberdevices.ru/
Link to v2:
https://lore.kernel.org/netdev/20230718180237.3248179-1-AVKrasnov@sberdevices.ru/
Link to v3:
https://lore.kernel.org/netdev/20230720214245.457298-1-AVKrasnov@sberdevices.ru/
Link to v4:
https://lore.kernel.org/netdev/20230727222627.1895355-1-AVKrasnov@sberdevices.ru/
Link to v5:
https://lore.kernel.org/netdev/20230730085905.3420811-1-AVKrasnov@sberdevices.ru/
Link to v6:
https://lore.kernel.org/netdev/20230814212720.3679058-1-AVKrasnov@sberdevices.ru/
Link to v7:
https://lore.kernel.org/netdev/20230827085436.941183-1-avkrasnov@salutedevices.com/
Link to v8:
https://lore.kernel.org/netdev/20230911202234.1932024-1-avkrasnov@salutedevices.com/
Changelog:
v3 -> v4:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
v4 -> v5:
* See per-patch changelog after ---.
v5 -> v6:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
* See per-patch changelog after ---.
v6 -> v7:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
* See per-patch changelog after ---.
v7 -> v8:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
* See per-patch changelog after ---.
v8 -> v9:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
* See per-patch changelog after ---.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/testing/vsock/.gitignore | 1 | ||||
| -rw-r--r-- | tools/testing/vsock/Makefile | 11 | ||||
| -rw-r--r-- | tools/testing/vsock/msg_zerocopy_common.c | 87 | ||||
| -rw-r--r-- | tools/testing/vsock/msg_zerocopy_common.h | 18 | ||||
| -rw-r--r-- | tools/testing/vsock/util.c | 133 | ||||
| -rw-r--r-- | tools/testing/vsock/util.h | 5 | ||||
| -rw-r--r-- | tools/testing/vsock/vsock_perf.c | 80 | ||||
| -rw-r--r-- | tools/testing/vsock/vsock_test.c | 16 | ||||
| -rw-r--r-- | tools/testing/vsock/vsock_test_zerocopy.c | 358 | ||||
| -rw-r--r-- | tools/testing/vsock/vsock_test_zerocopy.h | 15 | ||||
| -rw-r--r-- | tools/testing/vsock/vsock_uring_test.c | 342 |
11 files changed, 1053 insertions, 13 deletions
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index a8adcfdc292b..d9f798713cd7 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -3,3 +3,4 @@ vsock_test vsock_diag_test vsock_perf +vsock_uring_test diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 21a98ba565ab..a7f56a09ca9f 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -1,12 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-only all: test vsock_perf -test: vsock_test vsock_diag_test -vsock_test: vsock_test.o timeout.o control.o util.o +test: vsock_test vsock_diag_test vsock_uring_test +vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_zerocopy_common.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o -vsock_perf: vsock_perf.o +vsock_perf: vsock_perf.o msg_zerocopy_common.o + +vsock_uring_test: LDLIBS = -luring +vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf + ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test -include *.d diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c new file mode 100644 index 000000000000..5a4bdf7b5132 --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Some common code for MSG_ZEROCOPY logic + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/errqueue.h> + +#include "msg_zerocopy_common.h" + +void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + +void vsock_recv_completion(int fd, const bool *zerocopied) +{ + struct sock_extended_err *serr; + struct msghdr msg = { 0 }; + char cmsg_data[128]; + struct cmsghdr *cm; + ssize_t res; + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res) { + fprintf(stderr, "failed to read error queue: %zi\n", res); + exit(EXIT_FAILURE); + } + + cm = CMSG_FIRSTHDR(&msg); + if (!cm) { + fprintf(stderr, "cmsg: no cmsg\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_level != SOL_VSOCK) { + fprintf(stderr, "cmsg: unexpected 'cmsg_level'\n"); + exit(EXIT_FAILURE); + } + + if (cm->cmsg_type != VSOCK_RECVERR) { + fprintf(stderr, "cmsg: unexpected 'cmsg_type'\n"); + exit(EXIT_FAILURE); + } + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + fprintf(stderr, "serr: wrong origin: %u\n", serr->ee_origin); + exit(EXIT_FAILURE); + } + + if (serr->ee_errno) { + fprintf(stderr, "serr: wrong error code: %u\n", serr->ee_errno); + exit(EXIT_FAILURE); + } + + /* This flag is used for tests, to check that transmission was + * performed as expected: zerocopy or fallback to copy. If NULL + * - don't care. + */ + if (!zerocopied) + return; + + if (*zerocopied && (serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was copy instead of zerocopy\n"); + exit(EXIT_FAILURE); + } + + if (!*zerocopied && !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED)) { + fprintf(stderr, "serr: was zerocopy instead of copy\n"); + exit(EXIT_FAILURE); + } +} diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h new file mode 100644 index 000000000000..3763c5ccedb9 --- /dev/null +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef MSG_ZEROCOPY_COMMON_H +#define MSG_ZEROCOPY_COMMON_H + +#include <stdbool.h> + +#ifndef SOL_VSOCK +#define SOL_VSOCK 287 +#endif + +#ifndef VSOCK_RECVERR +#define VSOCK_RECVERR 1 +#endif + +void enable_so_zerocopy(int fd); +void vsock_recv_completion(int fd, const bool *zerocopied); + +#endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 6779d5008b27..92336721321a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -11,10 +11,12 @@ #include <stdio.h> #include <stdint.h> #include <stdlib.h> +#include <string.h> #include <signal.h> #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/mman.h> #include "timeout.h" #include "control.h" @@ -444,3 +446,134 @@ unsigned long hash_djb2(const void *data, size_t len) return hash; } + +size_t iovec_bytes(const struct iovec *iov, size_t iovnum) +{ + size_t bytes; + int i; + + for (bytes = 0, i = 0; i < iovnum; i++) + bytes += iov[i].iov_len; + + return bytes; +} + +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum) +{ + unsigned long hash; + size_t iov_bytes; + size_t offs; + void *tmp; + int i; + + iov_bytes = iovec_bytes(iov, iovnum); + + tmp = malloc(iov_bytes); + if (!tmp) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (offs = 0, i = 0; i < iovnum; i++) { + memcpy(tmp + offs, iov[i].iov_base, iov[i].iov_len); + offs += iov[i].iov_len; + } + + hash = hash_djb2(tmp, iov_bytes); + free(tmp); + + return hash; +} + +/* Allocates and returns new 'struct iovec *' according pattern + * in the 'test_iovec'. For each element in the 'test_iovec' it + * allocates new element in the resulting 'iovec'. 'iov_len' + * of the new element is copied from 'test_iovec'. 'iov_base' is + * allocated depending on the 'iov_base' of 'test_iovec': + * + * 'iov_base' == NULL -> valid buf: mmap('iov_len'). + * + * 'iov_base' == MAP_FAILED -> invalid buf: + * mmap('iov_len'), then munmap('iov_len'). + * 'iov_base' still contains result of + * mmap(). + * + * 'iov_base' == number -> unaligned valid buf: + * mmap('iov_len') + number. + * + * 'iovnum' is number of elements in 'test_iovec'. + * + * Returns new 'iovec' or calls 'exit()' on error. + */ +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum) +{ + struct iovec *iovec; + int i; + + iovec = malloc(sizeof(*iovec) * iovnum); + if (!iovec) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for (i = 0; i < iovnum; i++) { + iovec[i].iov_len = test_iovec[i].iov_len; + + iovec[i].iov_base = mmap(NULL, iovec[i].iov_len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); + if (iovec[i].iov_base == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + if (test_iovec[i].iov_base != MAP_FAILED) + iovec[i].iov_base += (uintptr_t)test_iovec[i].iov_base; + } + + /* Unmap "invalid" elements. */ + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base == MAP_FAILED) { + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + for (i = 0; i < iovnum; i++) { + int j; + + if (test_iovec[i].iov_base == MAP_FAILED) + continue; + + for (j = 0; j < iovec[i].iov_len; j++) + ((uint8_t *)iovec[i].iov_base)[j] = rand() & 0xff; + } + + return iovec; +} + +/* Frees 'iovec *', previously allocated by 'alloc_test_iovec()'. + * On error calls 'exit()'. + */ +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum) +{ + int i; + + for (i = 0; i < iovnum; i++) { + if (test_iovec[i].iov_base != MAP_FAILED) { + if (test_iovec[i].iov_base) + iovec[i].iov_base -= (uintptr_t)test_iovec[i].iov_base; + + if (munmap(iovec[i].iov_base, iovec[i].iov_len)) { + perror("munmap"); + exit(EXIT_FAILURE); + } + } + } + + free(iovec); +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e5407677ce05..a77175d25864 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -53,4 +53,9 @@ void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); +size_t iovec_bytes(const struct iovec *iov, size_t iovnum); +unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); +struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); +void free_test_iovec(const struct iovec *test_iovec, + struct iovec *iovec, int iovnum); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index a72520338f84..4e8578f815e0 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -18,6 +18,9 @@ #include <poll.h> #include <sys/socket.h> #include <linux/vm_sockets.h> +#include <sys/mman.h> + +#include "msg_zerocopy_common.h" #define DEFAULT_BUF_SIZE_BYTES (128 * 1024) #define DEFAULT_TO_SEND_BYTES (64 * 1024) @@ -31,6 +34,7 @@ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static bool zerocopy; static void error(const char *s) { @@ -252,10 +256,15 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes) time_t tx_begin_ns; time_t tx_total_ns; size_t total_send; + time_t time_in_send; void *data; int fd; - printf("Run as sender\n"); + if (zerocopy) + printf("Run as sender MSG_ZEROCOPY\n"); + else + printf("Run as sender\n"); + printf("Connect to %i:%u\n", peer_cid, port); printf("Send %lu bytes\n", to_send_bytes); printf("TX buffer %lu bytes\n", buf_size_bytes); @@ -265,38 +274,82 @@ static void run_sender(int peer_cid, unsigned long to_send_bytes) if (fd < 0) exit(EXIT_FAILURE); - data = malloc(buf_size_bytes); + if (zerocopy) { + enable_so_zerocopy(fd); - if (!data) { - fprintf(stderr, "'malloc()' failed\n"); - exit(EXIT_FAILURE); + data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + } else { + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "'malloc()' failed\n"); + exit(EXIT_FAILURE); + } } memset(data, 0, buf_size_bytes); total_send = 0; + time_in_send = 0; tx_begin_ns = current_nsec(); while (total_send < to_send_bytes) { ssize_t sent; + size_t rest_bytes; + time_t before; - sent = write(fd, data, buf_size_bytes); + rest_bytes = to_send_bytes - total_send; + + before = current_nsec(); + sent = send(fd, data, (rest_bytes > buf_size_bytes) ? + buf_size_bytes : rest_bytes, + zerocopy ? MSG_ZEROCOPY : 0); + time_in_send += (current_nsec() - before); if (sent <= 0) error("write"); total_send += sent; + + if (zerocopy) { + struct pollfd fds = { 0 }; + + fds.fd = fd; + + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (!(fds.revents & POLLERR)) { + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + vsock_recv_completion(fd, NULL); + } } tx_total_ns = current_nsec() - tx_begin_ns; printf("total bytes sent: %zu\n", total_send); printf("tx performance: %f Gbits/s\n", - get_gbps(total_send * 8, tx_total_ns)); - printf("total time in 'write()': %f sec\n", + get_gbps(total_send * 8, time_in_send)); + printf("total time in tx loop: %f sec\n", (float)tx_total_ns / NSEC_PER_SEC); + printf("time in 'send()': %f sec\n", + (float)time_in_send / NSEC_PER_SEC); close(fd); - free(data); + + if (zerocopy) + munmap(data, buf_size_bytes); + else + free(data); } static const char optstring[] = ""; @@ -336,6 +389,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 'R', }, + { + .name = "zerocopy", + .has_arg = no_argument, + .val = 'Z', + }, {}, }; @@ -351,6 +409,7 @@ static void usage(void) " --help This message\n" " --sender <cid> Sender mode (receiver default)\n" " <cid> of the receiver to connect to\n" + " --zerocopy Enable zerocopy (for sender mode only)\n" " --port <port> Port (default %d)\n" " --bytes <bytes>KMG Bytes to send (default %d)\n" " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" @@ -413,6 +472,9 @@ int main(int argc, char **argv) case 'H': /* Help. */ usage(); break; + case 'Z': /* Zerocopy. */ + zerocopy = true; + break; default: usage(); } diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index da4cb819a183..c1f7bc9abd22 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -21,6 +21,7 @@ #include <poll.h> #include <signal.h> +#include "vsock_test_zerocopy.h" #include "timeout.h" #include "control.h" #include "util.h" @@ -1269,6 +1270,21 @@ static struct test_case test_cases[] = { .run_client = test_stream_shutrd_client, .run_server = test_stream_shutrd_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY", + .run_client = test_stream_msgzcopy_client, + .run_server = test_stream_msgzcopy_server, + }, + { + .name = "SOCK_SEQPACKET MSG_ZEROCOPY", + .run_client = test_seqpacket_msgzcopy_client, + .run_server = test_seqpacket_msgzcopy_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY empty MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_empty_errq_client, + .run_server = test_stream_msgzcopy_empty_errq_server, + }, {}, }; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c new file mode 100644 index 000000000000..a16ff76484e6 --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* MSG_ZEROCOPY feature tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#include <poll.h> +#include <linux/errqueue.h> +#include <linux/kernel.h> +#include <errno.h> + +#include "control.h" +#include "vsock_test_zerocopy.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_test_data { + /* This test case if for SOCK_STREAM only. */ + bool stream_only; + /* Data must be zerocopied. This field is checked against + * field 'ee_code' of the 'struct sock_extended_err', which + * contains bit to detect that zerocopy transmission was + * fallbacked to copy mode. + */ + bool zerocopied; + /* Enable SO_ZEROCOPY option on the socket. Without enabled + * SO_ZEROCOPY, every MSG_ZEROCOPY transmission will behave + * like without MSG_ZEROCOPY flag. + */ + bool so_zerocopy; + /* 'errno' after 'sendmsg()' call. */ + int sendmsg_errno; + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_test_data test_data_array[] = { + /* Last element has non-page aligned size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE }, + { NULL, 200 } + } + }, + /* All elements have page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, PAGE_SIZE * 2 }, + { NULL, PAGE_SIZE * 3 } + } + }, + /* All elements have page aligned base and size. But + * data length is bigger than 64Kb. + */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 }, + { NULL, PAGE_SIZE * 16 } + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .zerocopied = true, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 100 }, + { NULL, PAGE_SIZE } + } + }, + /* Middle element is unmapped. */ + { + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = ENOMEM, + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { MAP_FAILED, PAGE_SIZE }, + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but SO_ZEROCOPY is off. This + * will trigger fallback to copy. + */ + { + .zerocopied = false, + .so_zerocopy = false, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, PAGE_SIZE } + } + }, + /* Valid data, but message is bigger than peer's + * buffer, so this will trigger fallback to copy. + * This test is for SOCK_STREAM only, because + * for SOCK_SEQPACKET, 'sendmsg()' returns EMSGSIZE. + */ + { + .stream_only = true, + .zerocopied = false, + .so_zerocopy = true, + .sendmsg_errno = 0, + .vecs_cnt = 1, + { + { NULL, 100 * PAGE_SIZE } + } + }, +}; + +#define POLL_TIMEOUT_MS 100 + +static void test_client(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + struct pollfd fds = { 0 }; + struct msghdr msg = { 0 }; + ssize_t sendmsg_res; + struct iovec *iovec; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + else + fd = vsock_stream_connect(opts->peer_cid, 1234); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (test_data->so_zerocopy) + enable_so_zerocopy(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + + errno = 0; + + sendmsg_res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (errno != test_data->sendmsg_errno) { + fprintf(stderr, "expected 'errno' == %i, got %i\n", + test_data->sendmsg_errno, errno); + exit(EXIT_FAILURE); + } + + if (!errno) { + if (sendmsg_res != iovec_bytes(iovec, test_data->vecs_cnt)) { + fprintf(stderr, "expected 'sendmsg()' == %li, got %li\n", + iovec_bytes(iovec, test_data->vecs_cnt), + sendmsg_res); + exit(EXIT_FAILURE); + } + } + + fds.fd = fd; + fds.events = 0; + + if (poll(&fds, 1, POLL_TIMEOUT_MS) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + vsock_recv_completion(fd, &test_data->zerocopied); + } else if (test_data->so_zerocopy && !test_data->sendmsg_errno) { + /* If we don't have data in the error queue, but + * SO_ZEROCOPY was enabled and 'sendmsg()' was + * successful - this is an error. + */ + fprintf(stderr, "POLLERR expected\n"); + exit(EXIT_FAILURE); + } + + if (!test_data->sendmsg_errno) + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + else + control_writeulong(0); + + control_writeln("DONE"); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +void test_stream_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_client(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_client(opts, &test_data_array[i], true); + } +} + +static void test_server(const struct test_opts *opts, + const struct vsock_test_data *test_data, + bool sock_seqpacket) +{ + unsigned long remote_hash; + unsigned long local_hash; + ssize_t total_bytes_rec; + unsigned char *data; + size_t data_len; + int fd; + + if (sock_seqpacket) + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + else + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + total_bytes_rec = 0; + + while (total_bytes_rec != data_len) { + ssize_t bytes_rec; + + bytes_rec = read(fd, data + total_bytes_rec, + data_len - total_bytes_rec); + if (bytes_rec <= 0) + break; + + total_bytes_rec += bytes_rec; + } + + if (test_data->sendmsg_errno == 0) + local_hash = hash_djb2(data, data_len); + else + local_hash = 0; + + free(data); + + /* Waiting for some result. */ + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + test_server(opts, &test_data_array[i], false); +} + +void test_seqpacket_msgzcopy_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) { + if (test_data_array[i].stream_only) + continue; + + test_server(opts, &test_data_array[i], true); + } +} + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts) +{ + struct msghdr msg = { 0 }; + char cmsg_data[128]; + ssize_t res; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + msg.msg_control = cmsg_data; + msg.msg_controllen = sizeof(cmsg_data); + + res = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (res != -1) { + fprintf(stderr, "expected 'recvmsg(2)' failure, got %zi\n", + res); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); + close(fd); +} + +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} diff --git a/tools/testing/vsock/vsock_test_zerocopy.h b/tools/testing/vsock/vsock_test_zerocopy.h new file mode 100644 index 000000000000..3ef2579e024d --- /dev/null +++ b/tools/testing/vsock/vsock_test_zerocopy.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef VSOCK_TEST_ZEROCOPY_H +#define VSOCK_TEST_ZEROCOPY_H +#include "util.h" + +void test_stream_msgzcopy_client(const struct test_opts *opts); +void test_stream_msgzcopy_server(const struct test_opts *opts); + +void test_seqpacket_msgzcopy_client(const struct test_opts *opts); +void test_seqpacket_msgzcopy_server(const struct test_opts *opts); + +void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts); +void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts); + +#endif /* VSOCK_TEST_ZEROCOPY_H */ diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c new file mode 100644 index 000000000000..d976d35f0ba9 --- /dev/null +++ b/tools/testing/vsock/vsock_uring_test.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* io_uring tests for vsock + * + * Copyright (C) 2023 SberDevices. + * + * Author: Arseniy Krasnov <avkrasnov@salutedevices.com> + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <liburing.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/kernel.h> +#include <error.h> + +#include "util.h" +#include "control.h" +#include "msg_zerocopy_common.h" + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define RING_ENTRIES_NUM 4 + +#define VSOCK_TEST_DATA_MAX_IOV 3 + +struct vsock_io_uring_test { + /* Number of valid elements in 'vecs'. */ + int vecs_cnt; + struct iovec vecs[VSOCK_TEST_DATA_MAX_IOV]; +}; + +static struct vsock_io_uring_test test_data_array[] = { + /* All elements have page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { NULL, 2 * PAGE_SIZE }, + { NULL, 3 * PAGE_SIZE }, + } + }, + /* Middle element has both non-page aligned base and size. */ + { + .vecs_cnt = 3, + { + { NULL, PAGE_SIZE }, + { (void *)1, 200 }, + { NULL, 3 * PAGE_SIZE }, + } + } +}; + +static void vsock_io_uring_client(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data, + bool msg_zerocopy) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct io_uring ring; + struct iovec *iovec; + struct msghdr msg; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (msg_zerocopy) + enable_so_zerocopy(fd); + + iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + if (io_uring_register_buffers(&ring, iovec, test_data->vecs_cnt)) + error(1, errno, "io_uring_register_buffers"); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iovec; + msg.msg_iovlen = test_data->vecs_cnt; + sqe = io_uring_get_sqe(&ring); + + if (msg_zerocopy) + io_uring_prep_sendmsg_zc(sqe, fd, &msg, 0); + else + io_uring_prep_sendmsg(sqe, fd, &msg, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + io_uring_cqe_seen(&ring, cqe); + + control_writeulong(iovec_hash_djb2(iovec, test_data->vecs_cnt)); + + control_writeln("DONE"); + io_uring_queue_exit(&ring); + free_test_iovec(test_data->vecs, iovec, test_data->vecs_cnt); + close(fd); +} + +static void vsock_io_uring_server(const struct test_opts *opts, + const struct vsock_io_uring_test *test_data) +{ + unsigned long remote_hash; + unsigned long local_hash; + struct io_uring ring; + size_t data_len; + size_t recv_len; + void *data; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + data_len = iovec_bytes(test_data->vecs, test_data->vecs_cnt); + + data = malloc(data_len); + if (!data) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + if (io_uring_queue_init(RING_ENTRIES_NUM, &ring, 0)) + error(1, errno, "io_uring_queue_init"); + + recv_len = 0; + + while (recv_len < data_len) { + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + struct iovec iovec; + + sqe = io_uring_get_sqe(&ring); + iovec.iov_base = data + recv_len; + iovec.iov_len = data_len; + + io_uring_prep_readv(sqe, fd, &iovec, 1, 0); + + if (io_uring_submit(&ring) != 1) + error(1, errno, "io_uring_submit"); + + if (io_uring_wait_cqe(&ring, &cqe)) + error(1, errno, "io_uring_wait_cqe"); + + recv_len += cqe->res; + io_uring_cqe_seen(&ring, cqe); + } + + if (recv_len != data_len) { + fprintf(stderr, "expected %zu, got %zu\n", data_len, + recv_len); + exit(EXIT_FAILURE); + } + + local_hash = hash_djb2(data, data_len); + + remote_hash = control_readulong(); + if (remote_hash != local_hash) { + fprintf(stderr, "hash mismatch\n"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + io_uring_queue_exit(&ring); + free(data); +} + +void test_stream_uring_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], false); +} + +void test_stream_uring_msg_zc_server(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_server(opts, &test_data_array[i]); +} + +void test_stream_uring_msg_zc_client(const struct test_opts *opts) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_data_array); i++) + vsock_io_uring_client(opts, &test_data_array[i], true); +} + +static struct test_case test_cases[] = { + { + .name = "SOCK_STREAM io_uring test", + .run_server = test_stream_uring_server, + .run_client = test_stream_uring_client, + }, + { + .name = "SOCK_STREAM io_uring MSG_ZEROCOPY test", + .run_server = test_stream_uring_msg_zc_server, + .run_client = test_stream_uring_msg_zc_client, + }, + {}, +}; + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "control-host", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "control-port", + .has_arg = required_argument, + .val = 'P', + }, + { + .name = "mode", + .has_arg = required_argument, + .val = 'm', + }, + { + .name = "peer-cid", + .has_arg = required_argument, + .val = 'p', + }, + { + .name = "help", + .has_arg = no_argument, + .val = '?', + }, + {}, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n" + "\n" + " Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n" + " Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" + "\n" + "Run transmission tests using io_uring. Usage is the same as\n" + "in ./vsock_test\n" + "\n" + "Options:\n" + " --help This help message\n" + " --control-host <host> Server IP address to connect to\n" + " --control-port <port> Server port to listen on/connect to\n" + " --mode client|server Server or client mode\n" + " --peer-cid <cid> CID of the other side\n" + ); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + const char *control_host = NULL; + const char *control_port = NULL; + struct test_opts opts = { + .mode = TEST_MODE_UNSET, + .peer_cid = VMADDR_CID_ANY, + }; + + init_signals(); + + for (;;) { + int opt = getopt_long(argc, argv, optstring, longopts, NULL); + + if (opt == -1) + break; + + switch (opt) { + case 'H': + control_host = optarg; + break; + case 'm': + if (strcmp(optarg, "client") == 0) { + opts.mode = TEST_MODE_CLIENT; + } else if (strcmp(optarg, "server") == 0) { + opts.mode = TEST_MODE_SERVER; + } else { + fprintf(stderr, "--mode must be \"client\" or \"server\"\n"); + return EXIT_FAILURE; + } + break; + case 'p': + opts.peer_cid = parse_cid(optarg); + break; + case 'P': + control_port = optarg; + break; + case '?': + default: + usage(); + } + } + + if (!control_port) + usage(); + if (opts.mode == TEST_MODE_UNSET) + usage(); + if (opts.peer_cid == VMADDR_CID_ANY) + usage(); + + if (!control_host) { + if (opts.mode != TEST_MODE_SERVER) + usage(); + control_host = "0.0.0.0"; + } + + control_init(control_host, control_port, + opts.mode == TEST_MODE_SERVER); + + run_tests(test_cases, &opts); + + control_cleanup(); + + return 0; +} |
