summaryrefslogtreecommitdiff
path: root/net/tls/tls_device.c
diff options
context:
space:
mode:
authorBoris Pismenny <borisp@nvidia.com>2022-05-18 12:27:31 +0300
committerPaolo Abeni <pabeni@redhat.com>2022-05-19 12:14:11 +0200
commitc1318b39c7d36bd5139a9c71044ff2b2d3c6f9d8 (patch)
treeab03a38f64254b0a40bcf4a6d1faef188e5a6fa0 /net/tls/tls_device.c
parente43d940f480b69ab98dd5d14e9fefdef8851db1a (diff)
tls: Add opt-in zerocopy mode of sendfile()
TLS device offload copies sendfile data to a bounce buffer before transmitting. It allows to maintain the valid MAC on TLS records when the file contents change and a part of TLS record has to be retransmitted on TCP level. In many common use cases (like serving static files over HTTPS) the file contents are not changed on the fly. In many use cases breaking the connection is totally acceptable if the file is changed during transmission, because it would be received corrupted in any case. This commit allows to optimize performance for such use cases to providing a new optional mode of TLS sendfile(), in which the extra copy is skipped. Removing this copy improves performance significantly, as TLS and TCP sendfile perform the same operations, and the only overhead is TLS header/trailer insertion. The new mode can only be enabled with the new socket option named TLS_TX_ZEROCOPY_SENDFILE on per-socket basis. It preserves backwards compatibility with existing applications that rely on the copying behavior. The new mode is safe, meaning that unsolicited modifications of the file being sent can't break integrity of the kernel. The worst thing that can happen is sending a corrupted TLS record, which is in any case not forbidden when using regular TCP sockets. Sockets other than TLS device offload are not affected by the new socket option. The actual status of zerocopy sendfile can be queried with sock_diag. Performance numbers in a single-core test with 24 HTTPS streams on nginx, under 100% CPU load: * non-zerocopy: 33.6 Gbit/s * zerocopy: 79.92 Gbit/s CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz Signed-off-by: Boris Pismenny <borisp@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20220518092731.1243494-1-maximmi@nvidia.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net/tls/tls_device.c')
-rw-r--r--net/tls/tls_device.c53
1 files changed, 40 insertions, 13 deletions
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bca00521ebc1..ec6f4b699a2b 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -411,10 +411,16 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
return 0;
}
+union tls_iter_offset {
+ struct iov_iter *msg_iter;
+ int offset;
+};
+
static int tls_push_data(struct sock *sk,
- struct iov_iter *msg_iter,
+ union tls_iter_offset iter_offset,
size_t size, int flags,
- unsigned char record_type)
+ unsigned char record_type,
+ struct page *zc_page)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -480,12 +486,21 @@ handle_error:
}
record = ctx->open_record;
- copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
- copy = min_t(size_t, copy, (max_open_record_len - record->len));
- if (copy) {
+ copy = min_t(size_t, size, max_open_record_len - record->len);
+ if (copy && zc_page) {
+ struct page_frag zc_pfrag;
+
+ zc_pfrag.page = zc_page;
+ zc_pfrag.offset = iter_offset.offset;
+ zc_pfrag.size = copy;
+ tls_append_frag(record, &zc_pfrag, copy);
+ } else if (copy) {
+ copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
+
rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
+ pfrag->offset, copy,
+ iter_offset.msg_iter);
if (rc)
goto handle_error;
tls_append_frag(record, pfrag, copy);
@@ -540,6 +555,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ union tls_iter_offset iter;
int rc;
mutex_lock(&tls_ctx->tx_lock);
@@ -551,8 +567,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out;
}
- rc = tls_push_data(sk, &msg->msg_iter, size,
- msg->msg_flags, record_type);
+ iter.msg_iter = &msg->msg_iter;
+ rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL);
out:
release_sock(sk);
@@ -564,7 +580,8 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct iov_iter msg_iter;
+ union tls_iter_offset iter_offset;
+ struct iov_iter msg_iter;
char *kaddr;
struct kvec iov;
int rc;
@@ -580,12 +597,20 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
goto out;
}
+ if (tls_ctx->zerocopy_sendfile) {
+ iter_offset.offset = offset;
+ rc = tls_push_data(sk, iter_offset, size,
+ flags, TLS_RECORD_TYPE_DATA, page);
+ goto out;
+ }
+
kaddr = kmap(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
- rc = tls_push_data(sk, &msg_iter, size,
- flags, TLS_RECORD_TYPE_DATA);
+ iter_offset.msg_iter = &msg_iter;
+ rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA,
+ NULL);
kunmap(page);
out:
@@ -656,10 +681,12 @@ EXPORT_SYMBOL(tls_get_record);
static int tls_device_push_pending_record(struct sock *sk, int flags)
{
- struct iov_iter msg_iter;
+ union tls_iter_offset iter;
+ struct iov_iter msg_iter;
iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
- return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+ iter.msg_iter = &msg_iter;
+ return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
}
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)