summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2023-03-29 08:19:38 +0100
committerDavid S. Miller <davem@davemloft.net>2023-03-29 08:19:38 +0100
commit5a8c8b72f65f6b80b52b21a207a4ddc3011d6440 (patch)
tree754396a677b3ba62fa5756be11c1b3ca9cba2009 /tools
parent24265c2c91ad6aae9446e18472566cd83e92b602 (diff)
parentd61bd8c1fd02cfc8aed00a58c20bd58c72549e3a (diff)
Merge branch 'vsock-sockmap-support'
Bobby Eshleman says: ==================== Add support for sockmap to vsock. We're testing usage of vsock as a way to redirect guest-local UDS requests to the host and this patch series greatly improves the performance of such a setup. Compared to copying packets via userspace, this improves throughput by 121% in basic testing. Tested as follows. Setup: guest unix dgram sender -> guest vsock redirector -> host vsock server Threads: 1 Payload: 64k No sockmap: - 76.3 MB/s - The guest vsock redirector was "socat VSOCK-CONNECT:2:1234 UNIX-RECV:/path/to/sock" Using sockmap (this patch): - 168.8 MB/s (+121%) - The guest redirector was a simple sockmap echo server, redirecting unix ingress to vsock 2:1234 egress. - Same sender and server programs *Note: these numbers are from RFC v1 Only the virtio transport has been tested. The loopback transport was used in writing bpf/selftests, but not thoroughly tested otherwise. This series requires the skb patch. Changes in v4: - af_vsock: fix parameter alignment in vsock_dgram_recvmsg() - af_vsock: add TCP_ESTABLISHED comment in vsock_dgram_connect() - vsock/bpf: change ret type to bool Changes in v3: - vsock/bpf: Refactor wait logic in vsock_bpf_recvmsg() to avoid backwards goto - vsock/bpf: Check psock before acquiring slock - vsock/bpf: Return bool instead of int of 0 or 1 - vsock/bpf: Wrap macro args __sk/__psock in parens - vsock/bpf: Place comment trailer */ on separate line Changes in v2: - vsock/bpf: rename vsock_dgram_* -> vsock_* - vsock/bpf: change sk_psock_{get,put} and {lock,release}_sock() order to minimize slock hold time - vsock/bpf: use "new style" wait - vsock/bpf: fix bug in wait log - vsock/bpf: add check that recvmsg sk_type is one dgram, seqpacket, or stream. Return error if not one of the three. - virtio/vsock: comment __skb_recv_datagram() usage - virtio/vsock: do not init copied in read_skb() - vsock/bpf: add ifdef guard around struct proto in dgram_recvmsg() - selftests/bpf: add vsock loopback config for aarch64 - selftests/bpf: add vsock loopback config for s390x - selftests/bpf: remove vsock device from vmtest.sh qemu machine - selftests/bpf: remove CONFIG_VIRTIO_VSOCKETS=y from config.x86_64 - vsock/bpf: move transport-related (e.g., if (!vsk->transport)) checks out of fast path ==================== Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/bpf/config.aarch642
-rw-r--r--tools/testing/selftests/bpf/config.s390x3
-rw-r--r--tools/testing/selftests/bpf/config.x86_643
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c163
4 files changed, 171 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 1f0437644186..253821494884 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -176,6 +176,8 @@ CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_NET=y
CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index d49f6170e7bd..2ba92167be35 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -140,5 +140,8 @@ CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_NET=y
CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index dd97d61d325c..b650b2e617b8 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -234,7 +234,10 @@ CONFIG_VIRTIO_BLK=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_VIRTIO_NET=y
CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_CPUID=y
CONFIG_X86_MSR=y
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 567e07c19ecc..8f09e1ea3ba7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -18,6 +18,7 @@
#include <string.h>
#include <sys/select.h>
#include <unistd.h>
+#include <linux/vm_sockets.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -251,6 +252,16 @@ static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
*len = sizeof(*addr6);
}
+static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
static void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -261,6 +272,9 @@ static void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
default:
FAIL("unsupported address family %d", family);
}
@@ -1478,6 +1492,8 @@ static const char *family_str(sa_family_t family)
return "IPv6";
case AF_UNIX:
return "Unix";
+ case AF_VSOCK:
+ return "VSOCK";
default:
return "unknown";
}
@@ -1689,6 +1705,151 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma
unix_skb_redir_to_connected(skel, map, sotype);
}
+/* Returns two connected loopback vsock sockets */
+static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int s, p, c;
+
+ s = socket_loopback(AF_VSOCK, sotype);
+ if (s < 0)
+ return -1;
+
+ c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
+ if (c == -1)
+ goto close_srv;
+
+ if (getsockname(s, sockaddr(&addr), &len) < 0)
+ goto close_cli;
+
+ if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ goto close_cli;
+ }
+
+ len = sizeof(addr);
+ p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
+ if (p < 0)
+ goto close_cli;
+
+ *v0 = p;
+ *v1 = c;
+
+ return 0;
+
+close_cli:
+ close(c);
+close_srv:
+ close(s);
+
+ return -1;
+}
+
+static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode, int sotype)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ char a = 'a', b = 'b';
+ int u0, u1, v0, v1;
+ int sfd[2];
+ unsigned int pass;
+ int err, n;
+ u32 key;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+
+ u0 = sfd[0];
+ u1 = sfd[1];
+
+ err = vsock_socketpair_connectible(sotype, &v0, &v1);
+ if (err) {
+ FAIL("vsock_socketpair_connectible() failed");
+ goto close_uds;
+ }
+
+ err = add_to_sockmap(sock_mapfd, u0, v0);
+ if (err) {
+ FAIL("add_to_sockmap failed");
+ goto close_vsock;
+ }
+
+ n = write(v1, &a, sizeof(a));
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto out;
+
+ n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+ if (n < 0)
+ FAIL("%s: recv() err, errno=%d", log_prefix, errno);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+ if (b != a)
+ FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto out;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+out:
+ key = 0;
+ bpf_map_delete_elem(sock_mapfd, &key);
+ key = 1;
+ bpf_map_delete_elem(sock_mapfd, &key);
+
+close_vsock:
+ close(v0);
+ close(v1);
+
+close_uds:
+ close(u0);
+ close(u1);
+}
+
+static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
+ skel->bss->test_ingress = true;
+ vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_VSOCK);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+
+ vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
+ vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -2060,12 +2221,14 @@ void serial_test_sockmap_listen(void)
run_tests(skel, skel->maps.sock_map, AF_INET6);
test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
+ test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
+ test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}