diff options
Diffstat (limited to 'net/tls')
-rw-r--r-- | net/tls/Kconfig | 1 | ||||
-rw-r--r-- | net/tls/tls.h | 5 | ||||
-rw-r--r-- | net/tls/tls_device.c | 22 | ||||
-rw-r--r-- | net/tls/tls_device_fallback.c | 32 | ||||
-rw-r--r-- | net/tls/tls_main.c | 104 | ||||
-rw-r--r-- | net/tls/tls_proc.c | 5 | ||||
-rw-r--r-- | net/tls/tls_strp.c | 10 | ||||
-rw-r--r-- | net/tls/tls_sw.c | 360 | ||||
-rw-r--r-- | net/tls/trace.h | 2 |
9 files changed, 343 insertions, 198 deletions
diff --git a/net/tls/Kconfig b/net/tls/Kconfig index 0cdc1f7b6b08..ce8d56a19187 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -20,6 +20,7 @@ config TLS config TLS_DEVICE bool "Transport Layer Security HW offload" depends on TLS + select SKB_DECRYPTED select SOCK_VALIDATE_XMIT select SOCK_RX_QUEUE_MAPPING default n diff --git a/net/tls/tls.h b/net/tls/tls.h index 762f424ff2d5..774859b63f0d 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -145,7 +145,8 @@ void tls_err_abort(struct sock *sk, int err); int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, const struct tls_cipher_desc *cipher_desc); -int tls_set_sw_offload(struct sock *sk, int tx); +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); @@ -215,7 +216,7 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) { - return ctx->strp.msg_ready; + return READ_ONCE(ctx->strp.msg_ready); } static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bf8ed36b1ad6..f672a62a9a52 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -37,6 +37,7 @@ #include <net/inet_connection_sock.h> #include <net/tcp.h> #include <net/tls.h> +#include <linux/skbuff_ref.h> #include "tls.h" #include "trace.h" @@ -156,7 +157,7 @@ static void delete_all_records(struct tls_offload_context_tx *offload_ctx) offload_ctx->retransmit_hint = NULL; } -static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +static void tls_tcp_clean_acked(struct sock *sk, u32 acked_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_record_info *info, *temp; @@ -203,7 +204,7 @@ void tls_device_sk_destruct(struct sock *sk) destroy_record(ctx->open_record); delete_all_records(ctx); crypto_free_aead(ctx->aead_send); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); } tls_device_queue_ctx_destruction(tls_ctx); @@ -230,14 +231,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; - struct sk_buff *skb; int err = 0; u8 *rcd_sn; - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; - + tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); @@ -1066,7 +1063,6 @@ int tls_set_device_offload(struct sock *sk) struct tls_prot_info *prot; struct net_device *netdev; struct tls_context *ctx; - struct sk_buff *skb; char *iv, *rec_seq; int rc; @@ -1130,16 +1126,14 @@ int tls_set_device_offload(struct sock *sk) start_marker_record->num_frags = 0; list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + clean_acked_data_enable(tcp_sk(sk), &tls_tcp_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; /* TLS offload is greatly simplified if we don't send * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; + tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after @@ -1178,7 +1172,7 @@ int tls_set_device_offload(struct sock *sk) release_lock: up_read(&device_offload_lock); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); crypto_free_aead(offload_ctx->aead_send); free_offload_ctx: kfree(offload_ctx); @@ -1233,7 +1227,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, 0); + rc = tls_set_sw_offload(sk, 0, NULL); if (rc) goto release_ctx; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 4e7228f275fa..03d508a45aae 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -33,20 +33,10 @@ #include <crypto/aead.h> #include <crypto/scatterwalk.h> #include <net/ip6_checksum.h> +#include <linux/skbuff_ref.h> #include "tls.h" -static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk) -{ - struct scatterlist *src = walk->sg; - int diff = walk->offset - src->offset; - - sg_set_page(sg, sg_page(src), - src->length - diff, walk->offset); - - scatterwalk_crypto_chain(sg, sg_next(src), 2); -} - static int tls_enc_record(struct aead_request *aead_req, struct crypto_aead *aead, char *aad, char *iv, __be64 rcd_sn, @@ -68,16 +58,13 @@ static int tls_enc_record(struct aead_request *aead_req, buf_size = TLS_HEADER_SIZE + cipher_desc->iv; len = min_t(int, *in_len, buf_size); - scatterwalk_copychunks(buf, in, len, 0); - scatterwalk_copychunks(buf, out, len, 1); + memcpy_from_scatterwalk(buf, in, len); + memcpy_to_scatterwalk(out, buf, len); *in_len -= len; if (!*in_len) return 0; - scatterwalk_pagedone(in, 0, 1); - scatterwalk_pagedone(out, 1, 1); - len = buf[4] | (buf[3] << 8); len -= cipher_desc->iv; @@ -89,8 +76,8 @@ static int tls_enc_record(struct aead_request *aead_req, sg_init_table(sg_out, ARRAY_SIZE(sg_out)); sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE); sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE); - chain_to_walk(sg_in + 1, in); - chain_to_walk(sg_out + 1, out); + scatterwalk_get_sglist(in, sg_in + 1); + scatterwalk_get_sglist(out, sg_out + 1); *in_len -= len; if (*in_len < 0) { @@ -109,10 +96,8 @@ static int tls_enc_record(struct aead_request *aead_req, } if (*in_len) { - scatterwalk_copychunks(NULL, in, len, 2); - scatterwalk_pagedone(in, 0, 1); - scatterwalk_copychunks(NULL, out, len, 2); - scatterwalk_pagedone(out, 1, 1); + scatterwalk_skip(in, len); + scatterwalk_skip(out, len); } len -= cipher_desc->tag; @@ -161,9 +146,6 @@ static int tls_enc_records(struct aead_request *aead_req, } while (rc == 0 && len); - scatterwalk_done(&in, 0, 0); - scatterwalk_done(&out, 1, 0); - return rc; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 1c2c6800949d..a3ccb3135e51 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -423,9 +423,10 @@ static __poll_t tls_sk_poll(struct file *file, struct socket *sock, ctx = tls_sw_ctx_rx(tls_ctx); psock = sk_psock_get(sk); - if (skb_queue_empty_lockless(&ctx->rx_list) && - !tls_strp_msg_ready(ctx) && - sk_psock_queue_empty(psock)) + if ((skb_queue_empty_lockless(&ctx->rx_list) && + !tls_strp_msg_ready(ctx) && + sk_psock_queue_empty(psock)) || + READ_ONCE(ctx->key_update_pending)) mask &= ~(EPOLLIN | EPOLLRDNORM); if (psock) @@ -612,10 +613,13 @@ static int validate_crypto_info(const struct tls_crypto_info *crypto_info, static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { - struct tls_crypto_info *crypto_info; - struct tls_crypto_info *alt_crypto_info; + struct tls_crypto_info *crypto_info, *alt_crypto_info; + struct tls_crypto_info *old_crypto_info = NULL; struct tls_context *ctx = tls_get_ctx(sk); const struct tls_cipher_desc *cipher_desc; + union tls_crypto_context *crypto_ctx; + union tls_crypto_context tmp = {}; + bool update = false; int rc = 0; int conf; @@ -623,16 +627,30 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return -EINVAL; if (tx) { - crypto_info = &ctx->crypto_send.info; + crypto_ctx = &ctx->crypto_send; alt_crypto_info = &ctx->crypto_recv.info; } else { - crypto_info = &ctx->crypto_recv.info; + crypto_ctx = &ctx->crypto_recv; alt_crypto_info = &ctx->crypto_send.info; } - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) - return -EBUSY; + crypto_info = &crypto_ctx->info; + + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + /* Currently we only support setting crypto info more + * than one time for TLS 1.3 + */ + if (crypto_info->version != TLS_1_3_VERSION) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + return -EBUSY; + } + + update = true; + old_crypto_info = crypto_info; + crypto_info = &tmp.info; + crypto_ctx = &tmp; + } rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info)); if (rc) { @@ -640,7 +658,14 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - rc = validate_crypto_info(crypto_info, alt_crypto_info); + if (update) { + /* Ensure that TLS version and ciphers are not modified */ + if (crypto_info->version != old_crypto_info->version || + crypto_info->cipher_type != old_crypto_info->cipher_type) + rc = -EINVAL; + } else { + rc = validate_crypto_info(crypto_info, alt_crypto_info); + } if (rc) goto err_crypto_info; @@ -670,11 +695,17 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, 1); + rc = tls_set_sw_offload(sk, 1, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + } conf = TLS_SW; } } else { @@ -684,14 +715,21 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, 0); + rc = tls_set_sw_offload(sk, 0, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } conf = TLS_SW; } - tls_sw_strparser_arm(sk, ctx); + if (!update) + tls_sw_strparser_arm(sk, ctx); } if (tx) @@ -699,6 +737,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, else ctx->rx_conf = conf; update_sk_prot(sk, ctx); + + if (update) + return 0; + if (tx) { ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; @@ -710,7 +752,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: - memzero_explicit(crypto_info, sizeof(union tls_crypto_context)); + if (update) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + } + memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } @@ -806,6 +852,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static int tls_disconnect(struct sock *sk, int flags) +{ + return -EOPNOTSUPP; +} + struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -816,9 +867,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } @@ -893,6 +952,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE] = *base; prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; @@ -1003,7 +1063,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { u16 version, cipher_type; struct tls_context *ctx; @@ -1061,7 +1121,7 @@ nla_failure: return err; } -static size_t tls_get_info_size(const struct sock *sk) +static size_t tls_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index 68982728f620..367666aa07b8 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -22,6 +22,11 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY), SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL), + SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK), + SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR), + SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK), + SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR), + SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb..65b0da6fdf6a 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ #include <linux/skbuff.h> +#include <linux/skbuff_ref.h> #include <linux/workqueue.h> #include <net/strparser.h> #include <net/tcp.h> @@ -360,7 +361,7 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } @@ -395,7 +396,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) return 0; shinfo = skb_shinfo(strp->anchor); - shinfo->frag_list = NULL; /* If we don't know the length go max plus page for cipher overhead */ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; @@ -411,6 +411,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) page, 0, 0); } + shinfo->frag_list = NULL; + strp->copy_mode = 1; strp->stm.offset = 0; @@ -528,7 +530,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; @@ -580,7 +582,7 @@ void tls_strp_msg_done(struct tls_strparser *strp) else tls_strp_flush_anchor_copy(strp); - strp->msg_ready = 0; + WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 31e8a94dfc11..fc88e34b7f33 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -52,6 +52,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -63,6 +64,7 @@ struct tls_decrypt_ctx { u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -187,7 +189,6 @@ static void tls_decrypt_done(void *data, int err) struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -196,6 +197,17 @@ static void tls_decrypt_done(void *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -213,7 +225,7 @@ static void tls_decrypt_done(void *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -224,10 +236,17 @@ static void tls_decrypt_done(void *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -253,20 +272,33 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { + DECLARE_CRYPTO_WAIT(wait); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &ctx->async_wait); + crypto_req_done, &wait); + ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS || ret == -EBUSY) + ret = crypto_wait_req(ret, &wait); + return ret; } ret = crypto_aead_decrypt(aead_req); - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; + if (ret == -EINPROGRESS) + return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; } + + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -426,7 +458,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, -EBADMSG); + tls_err_abort(sk, rc); return rc; } @@ -439,9 +471,10 @@ static void tls_encrypt_done(void *data, int err) struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - bool ready = false; struct sock *sk; - int pending; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; msg_en = &rec->msg_encrypted; @@ -476,23 +509,25 @@ static void tls_encrypt_done(void *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); +} - if (!ready) - return; +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); + return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, @@ -541,9 +576,14 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -868,6 +908,13 @@ more_data: &msg_redir, send, flags); lock_sock(sk); if (err < 0) { + /* Regardless of whether the data represented by + * msg_redir is sent successfully, we have already + * uncharged it via sk_msg_return_zero(). The + * msg->sg.size represents the remaining unprocessed + * data, which needs to be uncharged here. + */ + sk_mem_uncharge(sk, msg->sg.size); *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } @@ -984,7 +1031,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, int num_zc = 0; int orig_size; int ret = 0; - int pending; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; @@ -1081,9 +1127,13 @@ alloc_encrypted: num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ctx->open_rec && ret == -ENOSPC) + else if (ctx->open_rec && ret == -ENOSPC) { + if (msg_pl->cork_bytes) { + ret = 0; + goto send_end; + } goto rollback_iter; - else if (ret != -EAGAIN) + } else if (ret != -EAGAIN) goto send_end; } continue; @@ -1162,25 +1212,13 @@ trim_sgl: if (!num_async) { goto send_end; - } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + } else if (num_zc || eor) { + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1229,7 +1267,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1264,22 +1301,7 @@ retry: } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -1303,6 +1325,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, int ret = 0; long timeo; + /* a rekey is pending, let userspace deal with it */ + if (unlikely(ctx->key_update_pending)) + return -EKEYEXPIRED; + timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { @@ -1581,12 +1607,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; @@ -1598,6 +1628,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; @@ -1702,6 +1735,36 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } +static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, + struct sk_buff *skb) +{ + const struct strp_msg *rxm = strp_msg(skb); + const struct tls_msg *tlm = tls_msg(skb); + char hs_type; + int err; + + if (likely(tlm->control != TLS_RECORD_TYPE_HANDSHAKE)) + return 0; + + if (rxm->full_len < 1) + return 0; + + err = skb_copy_bits(skb, rxm->offset, &hs_type, 1); + if (err < 0) { + DEBUG_NET_WARN_ON_ONCE(1); + return err; + } + + if (hs_type == TLS_HANDSHAKE_KEYUPDATE) { + struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; + + WRITE_ONCE(rx_ctx->key_update_pending, true); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); + } + + return 0; +} + static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { @@ -1721,7 +1784,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return 0; + return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) @@ -1769,7 +1832,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1782,7 +1846,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1800,12 +1864,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1841,6 +1905,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1940,10 +2008,12 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1951,10 +2021,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ @@ -1963,12 +2033,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2061,6 +2131,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2084,8 +2156,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2109,33 +2183,28 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) err = ret; - decrypted = 0; goto end; } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + if (is_peek) + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); - decrypted += max(err, 0); + async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; @@ -2435,16 +2504,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2607,7 +2669,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc } crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; @@ -2628,7 +2690,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) } crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); @@ -2667,12 +2729,22 @@ int init_prot_info(struct tls_prot_info *prot, return 0; } -int tls_set_sw_offload(struct sock *sk, int tx) +static void tls_finish_key_update(struct sock *sk, struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *ctx = tls_ctx->priv_ctx_rx; + + WRITE_ONCE(ctx->key_update_pending, false); + /* wake-up pre-existing poll() */ + ctx->saved_data_ready(sk); +} + +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info) { + struct tls_crypto_info *crypto_info, *src_crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; const struct tls_cipher_desc *cipher_desc; - struct tls_crypto_info *crypto_info; char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; struct tls_prot_info *prot; @@ -2684,44 +2756,47 @@ int tls_set_sw_offload(struct sock *sk, int tx) ctx = tls_get_ctx(sk); prot = &ctx->prot_info; - if (tx) { - ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); - if (!ctx->priv_ctx_tx) - return -ENOMEM; + /* new_crypto_info != NULL means rekey */ + if (!new_crypto_info) { + if (tx) { + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; + } else { + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + } + } + if (tx) { sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; } else { - ctx->priv_ctx_rx = init_ctx_rx(ctx); - if (!ctx->priv_ctx_rx) - return -ENOMEM; - sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; } - cipher_desc = get_cipher_desc(crypto_info->cipher_type); + src_crypto_info = new_crypto_info ?: crypto_info; + + cipher_desc = get_cipher_desc(src_crypto_info->cipher_type); if (!cipher_desc) { rc = -EINVAL; goto free_priv; } - rc = init_prot_info(prot, crypto_info, cipher_desc); + rc = init_prot_info(prot, src_crypto_info, cipher_desc); if (rc) goto free_priv; - iv = crypto_info_iv(crypto_info, cipher_desc); - key = crypto_info_key(crypto_info, cipher_desc); - salt = crypto_info_salt(crypto_info, cipher_desc); - rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - - memcpy(cctx->iv, salt, cipher_desc->salt); - memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); - memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); + iv = crypto_info_iv(src_crypto_info, cipher_desc); + key = crypto_info_key(src_crypto_info, cipher_desc); + salt = crypto_info_salt(src_crypto_info, cipher_desc); + rec_seq = crypto_info_rec_seq(src_crypto_info, cipher_desc); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); @@ -2734,20 +2809,30 @@ int tls_set_sw_offload(struct sock *sk, int tx) ctx->push_pending_record = tls_sw_push_pending_record; + /* setkey is the last operation that could fail during a + * rekey. if it succeeds, we can start modifying the + * context. + */ rc = crypto_aead_setkey(*aead, key, cipher_desc->key); - if (rc) - goto free_aead; + if (rc) { + if (new_crypto_info) + goto out; + else + goto free_aead; + } - rc = crypto_aead_setauthsize(*aead, prot->tag_size); - if (rc) - goto free_aead; + if (!new_crypto_info) { + rc = crypto_aead_setauthsize(*aead, prot->tag_size); + if (rc) + goto free_aead; + } - if (sw_ctx_rx) { + if (!tx && !new_crypto_info) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); tls_update_rx_zc_capable(ctx); sw_ctx_rx->async_capable = - crypto_info->version != TLS_1_3_VERSION && + src_crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); rc = tls_strp_init(&sw_ctx_rx->strp, sk); @@ -2755,18 +2840,33 @@ int tls_set_sw_offload(struct sock *sk, int tx) goto free_aead; } + memcpy(cctx->iv, salt, cipher_desc->salt); + memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); + + if (new_crypto_info) { + unsafe_memcpy(crypto_info, new_crypto_info, + cipher_desc->crypto_info, + /* size was checked in do_tls_setsockopt_conf */); + memzero_explicit(new_crypto_info, cipher_desc->crypto_info); + if (!tx) + tls_finish_key_update(sk, ctx); + } + goto out; free_aead: crypto_free_aead(*aead); *aead = NULL; free_priv: - if (tx) { - kfree(ctx->priv_ctx_tx); - ctx->priv_ctx_tx = NULL; - } else { - kfree(ctx->priv_ctx_rx); - ctx->priv_ctx_rx = NULL; + if (!new_crypto_info) { + if (tx) { + kfree(ctx->priv_ctx_tx); + ctx->priv_ctx_tx = NULL; + } else { + kfree(ctx->priv_ctx_rx); + ctx->priv_ctx_rx = NULL; + } } out: return rc; diff --git a/net/tls/trace.h b/net/tls/trace.h index 9ba5f600ea43..2d8ce4ff3265 100644 --- a/net/tls/trace.h +++ b/net/tls/trace.h @@ -7,7 +7,7 @@ #if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _TLS_TRACE_H_ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/tracepoint.h> struct sock; |