diff options
Diffstat (limited to 'net/tls')
| -rw-r--r-- | net/tls/Kconfig | 1 | ||||
| -rw-r--r-- | net/tls/tls.h | 95 | ||||
| -rw-r--r-- | net/tls/tls_device.c | 376 | ||||
| -rw-r--r-- | net/tls/tls_device_fallback.c | 119 | ||||
| -rw-r--r-- | net/tls/tls_main.c | 590 | ||||
| -rw-r--r-- | net/tls/tls_proc.c | 15 | ||||
| -rw-r--r-- | net/tls/tls_strp.c | 228 | ||||
| -rw-r--r-- | net/tls/tls_sw.c | 1167 | ||||
| -rw-r--r-- | net/tls/trace.h | 2 |
9 files changed, 1432 insertions, 1161 deletions
diff --git a/net/tls/Kconfig b/net/tls/Kconfig index 0cdc1f7b6b08..ce8d56a19187 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -20,6 +20,7 @@ config TLS config TLS_DEVICE bool "Transport Layer Security HW offload" depends on TLS + select SKB_DECRYPTED select SOCK_VALIDATE_XMIT select SOCK_RX_QUEUE_MAPPING default n diff --git a/net/tls/tls.h b/net/tls/tls.h index 0e840a0c3437..2f86baeb71fc 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -39,6 +39,7 @@ #include <linux/types.h> #include <linux/skmsg.h> #include <net/tls.h> +#include <net/tls_prot.h> #define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \ TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT)) @@ -50,6 +51,59 @@ #define TLS_DEC_STATS(net, field) \ SNMP_DEC_STATS((net)->mib.tls_statistics, field) +struct tls_cipher_desc { + unsigned int nonce; + unsigned int iv; + unsigned int key; + unsigned int salt; + unsigned int tag; + unsigned int rec_seq; + unsigned int iv_offset; + unsigned int key_offset; + unsigned int salt_offset; + unsigned int rec_seq_offset; + char *cipher_name; + bool offloadable; + size_t crypto_info; +}; + +#define TLS_CIPHER_MIN TLS_CIPHER_AES_GCM_128 +#define TLS_CIPHER_MAX TLS_CIPHER_ARIA_GCM_256 +extern const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN]; + +static inline const struct tls_cipher_desc *get_cipher_desc(u16 cipher_type) +{ + if (cipher_type < TLS_CIPHER_MIN || cipher_type > TLS_CIPHER_MAX) + return NULL; + + return &tls_cipher_desc[cipher_type - TLS_CIPHER_MIN]; +} + +static inline char *crypto_info_iv(struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + return (char *)crypto_info + cipher_desc->iv_offset; +} + +static inline char *crypto_info_key(struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + return (char *)crypto_info + cipher_desc->key_offset; +} + +static inline char *crypto_info_salt(struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + return (char *)crypto_info + cipher_desc->salt_offset; +} + +static inline char *crypto_info_rec_seq(struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + return (char *)crypto_info + cipher_desc->rec_seq_offset; +} + + /* TLS records are maintained in 'struct tls_rec'. It stores the memory pages * allocated or mapped for each TLS record. After encryption, the records are * stores in a linked list. @@ -70,10 +124,13 @@ struct tls_rec { char content_type; struct scatterlist sg_content_type; + struct sock *sk; + char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[MAX_IV_SIZE]; + u8 iv_data[TLS_MAX_IV_SIZE]; + + /* Must be last --ends in a flexible-array member. */ struct aead_request aead_req; - u8 aead_req_ctx[]; }; int __net_init tls_proc_init(struct net *net); @@ -84,21 +141,19 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx); void update_sk_prot(struct sock *sk, struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); -int tls_sk_query(struct sock *sk, int optname, char __user *optval, - int __user *optlen); -int tls_sk_attach(struct sock *sk, int optname, char __user *optval, - unsigned int optlen); void tls_err_abort(struct sock *sk, int err); +void tls_strp_abort_strp(struct tls_strparser *strp, int err); -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc); +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); +void tls_sw_splice_eof(struct socket *sock); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_release_resources_tx(struct sock *sk); void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); @@ -111,10 +166,11 @@ bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t read_actor); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); +void tls_device_splice_eof(struct socket *sock); int tls_tx_records(struct sock *sk, int flags); void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); @@ -142,7 +198,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); @@ -162,13 +218,18 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) { - return ctx->strp.msg_ready; + return READ_ONCE(ctx->strp.msg_ready); +} + +static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) +{ + return ctx->strp.mixed_decrypted; } #ifdef CONFIG_TLS_DEVICE int tls_device_init(void); void tls_device_cleanup(void); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_device_offload(struct sock *sk); void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); @@ -179,7 +240,7 @@ static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int -tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +tls_set_device_offload(struct sock *sk) { return -EOPNOTSUPP; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6c593788dc25..82ea407e520a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -37,6 +37,7 @@ #include <net/inet_connection_sock.h> #include <net/tcp.h> #include <net/tls.h> +#include <linux/skbuff_ref.h> #include "tls.h" #include "trace.h" @@ -52,13 +53,12 @@ static LIST_HEAD(tls_device_list); static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); +static struct page *dummy_page; + static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) { + if (ctx->tx_conf == TLS_HW) kfree(tls_offload_ctx_tx(ctx)); - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); - } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); @@ -123,17 +123,19 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) /* We assume that the socket is already connected */ static struct net_device *get_netdev_for_sock(struct sock *sk) { - struct dst_entry *dst = sk_dst_get(sk); - struct net_device *netdev = NULL; + struct net_device *dev, *lowest_dev = NULL; + struct dst_entry *dst; - if (likely(dst)) { - netdev = netdev_sk_get_lowest_dev(dst->dev, sk); - dev_hold(netdev); + rcu_read_lock(); + dst = __sk_dst_get(sk); + dev = dst ? dst_dev_rcu(dst) : NULL; + if (likely(dev)) { + lowest_dev = netdev_sk_get_lowest_dev(dev, sk); + dev_hold(lowest_dev); } + rcu_read_unlock(); - dst_release(dst); - - return netdev; + return lowest_dev; } static void destroy_record(struct tls_record_info *record) @@ -157,7 +159,7 @@ static void delete_all_records(struct tls_offload_context_tx *offload_ctx) offload_ctx->retransmit_hint = NULL; } -static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +static void tls_tcp_clean_acked(struct sock *sk, u32 acked_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_record_info *info, *temp; @@ -204,7 +206,7 @@ void tls_device_sk_destruct(struct sock *sk) destroy_record(ctx->open_record); delete_all_records(ctx); crypto_free_aead(ctx->aead_send); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); } tls_device_queue_ctx_destruction(tls_ctx); @@ -231,14 +233,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; - struct sk_buff *skb; int err = 0; u8 *rcd_sn; - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; - + tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); @@ -268,9 +266,8 @@ static void tls_append_frag(struct tls_record_info *record, skb_frag_size_add(frag, size); } else { ++frag; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + size); ++record->num_frags; get_page(pfrag->page); } @@ -313,36 +310,33 @@ static int tls_push_record(struct sock *sk, return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); } -static int tls_device_record_close(struct sock *sk, - struct tls_context *ctx, - struct tls_record_info *record, - struct page_frag *pfrag, - unsigned char record_type) +static void tls_device_record_close(struct sock *sk, + struct tls_context *ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + unsigned char record_type) { struct tls_prot_info *prot = &ctx->prot_info; - int ret; + struct page_frag dummy_tag_frag; /* append tag * device will fill in the tag, we just need to append a placeholder * use socket memory to improve coalescing (re-using a single buffer * increases frag count) - * if we can't allocate memory now, steal some back from data + * if we can't allocate memory now use the dummy page */ - if (likely(skb_page_frag_refill(prot->tag_size, pfrag, - sk->sk_allocation))) { - ret = 0; - tls_append_frag(record, pfrag, prot->tag_size); - } else { - ret = prot->tag_size; - if (record->len <= prot->overhead_size) - return -ENOMEM; + if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) && + !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) { + dummy_tag_frag.page = dummy_page; + dummy_tag_frag.offset = 0; + pfrag = &dummy_tag_frag; } + tls_append_frag(record, pfrag, prot->tag_size); /* fill prepend */ tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]), record->len - prot->overhead_size, record_type); - return ret; } static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, @@ -357,9 +351,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, return -ENOMEM; frag = &record->frags[0]; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, prepend_size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + prepend_size); get_page(pfrag->page); pfrag->offset += prepend_size; @@ -380,7 +373,8 @@ static int tls_do_allocation(struct sock *sk, if (!offload_ctx->open_record) { if (unlikely(!skb_page_frag_refill(prepend_size, pfrag, sk->sk_allocation))) { - READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); + if (!sk->sk_bypass_prot_mem) + READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return -ENOMEM; } @@ -424,16 +418,10 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) return 0; } -union tls_iter_offset { - struct iov_iter *msg_iter; - int offset; -}; - static int tls_push_data(struct sock *sk, - union tls_iter_offset iter_offset, + struct iov_iter *iter, size_t size, int flags, - unsigned char record_type, - struct page *zc_page) + unsigned char record_type) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -449,14 +437,18 @@ static int tls_push_data(struct sock *sk, long timeo; if (flags & - ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST)) + ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SPLICE_PAGES | MSG_EOR)) return -EOPNOTSUPP; + if ((flags & (MSG_MORE | MSG_EOR)) == (MSG_MORE | MSG_EOR)) + return -EINVAL; + if (unlikely(sk->sk_err)) return -sk->sk_err; flags |= MSG_SENDPAGE_DECRYPTED; - tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + tls_push_record_flags = flags | MSG_MORE; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { @@ -470,7 +462,7 @@ static int tls_push_data(struct sock *sk, /* TLS_HEADER_SIZE is not counted as part of the TLS record, and * we need to leave room for an authentication tag. */ - max_open_record_len = TLS_MAX_PAYLOAD_SIZE + + max_open_record_len = tls_ctx->tx_max_payload_len + prot->prepend_size; do { rc = tls_do_allocation(sk, ctx, pfrag, prot->prepend_size); @@ -501,11 +493,27 @@ handle_error: record = ctx->open_record; copy = min_t(size_t, size, max_open_record_len - record->len); - if (copy && zc_page) { + if (copy && (flags & MSG_SPLICE_PAGES)) { struct page_frag zc_pfrag; + struct page **pages = &zc_pfrag.page; + size_t off; + + rc = iov_iter_extract_pages(iter, &pages, + copy, 1, 0, &off); + if (rc <= 0) { + if (rc == 0) + rc = -EIO; + goto handle_error; + } + copy = rc; - zc_pfrag.page = zc_page; - zc_pfrag.offset = iter_offset.offset; + if (WARN_ON_ONCE(!sendpage_ok(zc_pfrag.page))) { + iov_iter_revert(iter, copy); + rc = -EIO; + goto handle_error; + } + + zc_pfrag.offset = off; zc_pfrag.size = copy; tls_append_frag(record, &zc_pfrag, copy); } else if (copy) { @@ -513,7 +521,7 @@ handle_error: rc = tls_device_copy_data(page_address(pfrag->page) + pfrag->offset, copy, - iter_offset.msg_iter); + iter); if (rc) goto handle_error; tls_append_frag(record, pfrag, copy); @@ -523,7 +531,7 @@ handle_error: if (!size) { last_record: tls_push_record_flags = flags; - if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) { + if (flags & MSG_MORE) { more = true; break; } @@ -533,18 +541,8 @@ last_record: if (done || record->len >= max_open_record_len || (record->num_frags >= MAX_SKB_FRAGS - 1)) { - rc = tls_device_record_close(sk, tls_ctx, record, - pfrag, record_type); - if (rc) { - if (rc > 0) { - size += rc; - } else { - size = orig_size; - destroy_record(record); - ctx->open_record = NULL; - break; - } - } + tls_device_record_close(sk, tls_ctx, record, + pfrag, record_type); rc = tls_push_record(sk, tls_ctx, @@ -568,9 +566,11 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; struct tls_context *tls_ctx = tls_get_ctx(sk); - union tls_iter_offset iter; int rc; + if (!tls_ctx->zerocopy_sendfile) + msg->msg_flags &= ~MSG_SPLICE_PAGES; + mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); @@ -580,8 +580,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out; } - iter.msg_iter = &msg->msg_iter; - rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL); + rc = tls_push_data(sk, &msg->msg_iter, size, msg->msg_flags, + record_type); out: release_sock(sk); @@ -589,47 +589,25 @@ out: return rc; } -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +void tls_device_splice_eof(struct socket *sock) { + struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); - union tls_iter_offset iter_offset; - struct iov_iter msg_iter; - char *kaddr; - struct kvec iov; - int rc; + struct iov_iter iter = {}; - if (flags & MSG_SENDPAGE_NOTLAST) - flags |= MSG_MORE; + if (!tls_is_partially_sent_record(tls_ctx)) + return; mutex_lock(&tls_ctx->tx_lock); lock_sock(sk); - if (flags & MSG_OOB) { - rc = -EOPNOTSUPP; - goto out; - } - - if (tls_ctx->zerocopy_sendfile) { - iter_offset.offset = offset; - rc = tls_push_data(sk, iter_offset, size, - flags, TLS_RECORD_TYPE_DATA, page); - goto out; + if (tls_is_partially_sent_record(tls_ctx)) { + iov_iter_bvec(&iter, ITER_SOURCE, NULL, 0, 0); + tls_push_data(sk, &iter, 0, 0, TLS_RECORD_TYPE_DATA); } - kaddr = kmap(page); - iov.iov_base = kaddr + offset; - iov.iov_len = size; - iov_iter_kvec(&msg_iter, ITER_SOURCE, &iov, 1, size); - iter_offset.msg_iter = &msg_iter; - rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA, - NULL); - kunmap(page); - -out: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return rc; } struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, @@ -694,12 +672,10 @@ EXPORT_SYMBOL(tls_get_record); static int tls_device_push_pending_record(struct sock *sk, int flags) { - union tls_iter_offset iter; - struct iov_iter msg_iter; + struct iov_iter iter; - iov_iter_kvec(&msg_iter, ITER_SOURCE, NULL, 0, 0); - iter.msg_iter = &msg_iter; - return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL); + iov_iter_kvec(&iter, ITER_SOURCE, NULL, 0, 0); + return tls_push_data(sk, &iter, 0, flags, TLS_RECORD_TYPE_DATA); } void tls_device_write_space(struct sock *sk, struct tls_context *ctx) @@ -748,8 +724,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, /* shouldn't get to wraparound: * too long in async stage, something bad happened */ - if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) { + tls_offload_rx_resync_async_request_cancel(resync_async); return false; + } /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request @@ -905,24 +883,18 @@ static int tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) { struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); - const struct tls_cipher_size_desc *cipher_sz; + const struct tls_cipher_desc *cipher_desc; int err, offset, copy, data_len, pos; struct sk_buff *skb, *skb_iter; struct scatterlist sg[1]; struct strp_msg *rxm; char *orig_buf, *buf; - switch (tls_ctx->crypto_recv.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } - cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type]; + cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); rxm = strp_msg(tls_strp_msg(sw_ctx)); - orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv, + orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv, sk->sk_allocation); if (!orig_buf) return -ENOMEM; @@ -938,8 +910,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) sg_init_table(sg, 1); sg_set_buf(&sg[0], buf, - rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv); - err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv); + rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv); + err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_desc->iv); if (err) goto free_buf; @@ -950,7 +922,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) else err = 0; - data_len = rxm->full_len - cipher_sz->tag; + data_len = rxm->full_len - cipher_desc->tag; if (skb_pagelen(skb) > offset) { copy = min_t(int, skb_pagelen(skb) - offset, data_len); @@ -1005,20 +977,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx) struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); struct sk_buff *skb = tls_strp_msg(sw_ctx); struct strp_msg *rxm = strp_msg(skb); - int is_decrypted = skb->decrypted; - int is_encrypted = !is_decrypted; - struct sk_buff *skb_iter; - int left; - - left = rxm->full_len - skb->len; - /* Check if all the data is decrypted already */ - skb_iter = skb_shinfo(skb)->frag_list; - while (skb_iter && left > 0) { - is_decrypted &= skb_iter->decrypted; - is_encrypted &= !skb_iter->decrypted; - - left -= skb_iter->len; - skb_iter = skb_iter->next; + int is_decrypted, is_encrypted; + + if (!tls_strp_msg_mixed_decrypted(sw_ctx)) { + is_decrypted = skb->decrypted; + is_encrypted = !is_decrypted; + } else { + is_decrypted = 0; + is_encrypted = 0; } trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, @@ -1069,22 +1035,44 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, } } -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx) +{ + struct tls_offload_context_tx *offload_ctx; + __be64 rcd_sn; + + offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL); + if (!offload_ctx) + return NULL; + + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + INIT_LIST_HEAD(&offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + sg_init_table(offload_ctx->sg_tx_data, + ARRAY_SIZE(offload_ctx->sg_tx_data)); + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + offload_ctx->ctx = ctx; + + return offload_ctx; +} + +int tls_set_device_offload(struct sock *sk) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - const struct tls_cipher_size_desc *cipher_sz; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; + const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; + struct tls_prot_info *prot; struct net_device *netdev; + struct tls_context *ctx; char *iv, *rec_seq; - struct sk_buff *skb; - __be64 rcd_sn; int rc; - if (!ctx) - return -EINVAL; + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (ctx->priv_ctx_tx) return -EEXIST; @@ -1106,58 +1094,29 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto release_netdev; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; - rec_seq = - ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; - break; - case TLS_CIPHER_AES_GCM_256: - iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv; - rec_seq = - ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq; - break; - default: + cipher_desc = get_cipher_desc(crypto_info->cipher_type); + if (!cipher_desc || !cipher_desc->offloadable) { rc = -EINVAL; goto release_netdev; } - cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; - /* Sanity-check the rec_seq_size for stack allocations */ - if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) { - rc = -EINVAL; - goto release_netdev; - } - - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv; - prot->tag_size = cipher_sz->tag; - prot->overhead_size = prot->prepend_size + prot->tag_size; - prot->iv_size = cipher_sz->iv; - prot->salt_size = cipher_sz->salt; - ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL); - if (!ctx->tx.iv) { - rc = -ENOMEM; + rc = init_prot_info(prot, crypto_info, cipher_desc); + if (rc) goto release_netdev; - } - memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv); + iv = crypto_info_iv(crypto_info, cipher_desc); + rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - prot->rec_seq_size = cipher_sz->rec_seq; - ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL); - if (!ctx->tx.rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); + memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; - goto free_rec_seq; + goto release_netdev; } - offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + offload_ctx = alloc_offload_ctx_tx(ctx); if (!offload_ctx) { rc = -ENOMEM; goto free_marker_record; @@ -1167,33 +1126,19 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (rc) goto free_offload_ctx; - /* start at rec_seq - 1 to account for the start marker record */ - memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); - offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; - start_marker_record->end_seq = tcp_sk(sk)->write_seq; start_marker_record->len = 0; start_marker_record->num_frags = 0; - - INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); - offload_ctx->ctx = ctx; - - INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - spin_lock_init(&offload_ctx->lock); - sg_init_table(offload_ctx->sg_tx_data, - ARRAY_SIZE(offload_ctx->sg_tx_data)); - clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + clean_acked_data_enable(tcp_sk(sk), &tls_tcp_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; /* TLS offload is greatly simplified if we don't send * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; + tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after @@ -1221,7 +1166,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) tls_device_attach(ctx, sk, netdev); up_read(&device_offload_lock); - /* following this assignment tls_is_sk_tx_device_offloaded + /* following this assignment tls_is_skb_tx_device_offloaded * will return true and the context might be accessed * by the netdev's xmit function. */ @@ -1232,17 +1177,13 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) release_lock: up_read(&device_offload_lock); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); crypto_free_aead(offload_ctx->aead_send); free_offload_ctx: kfree(offload_ctx); ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -free_rec_seq: - kfree(ctx->tx.rec_seq); -free_iv: - kfree(ctx->tx.iv); release_netdev: dev_put(netdev); return rc; @@ -1283,7 +1224,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_lock; } - context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { rc = -ENOMEM; goto release_lock; @@ -1291,7 +1232,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0, NULL); if (rc) goto release_ctx; @@ -1374,7 +1315,7 @@ static int tls_device_down(struct net_device *netdev) list_for_each_entry_safe(ctx, tmp, &list, list) { /* Stop offloaded TX and switch to the fallback. - * tls_is_sk_tx_device_offloaded will return false. + * tls_is_skb_tx_device_offloaded will return false. */ WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); @@ -1470,14 +1411,26 @@ int __init tls_device_init(void) { int err; - destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0); - if (!destruct_wq) + dummy_page = alloc_page(GFP_KERNEL); + if (!dummy_page) return -ENOMEM; + destruct_wq = alloc_workqueue("ktls_device_destruct", WQ_PERCPU, 0); + if (!destruct_wq) { + err = -ENOMEM; + goto err_free_dummy; + } + err = register_netdevice_notifier(&tls_dev_notifier); if (err) - destroy_workqueue(destruct_wq); + goto err_destroy_wq; + return 0; + +err_destroy_wq: + destroy_workqueue(destruct_wq); +err_free_dummy: + put_page(dummy_page); return err; } @@ -1486,4 +1439,5 @@ void __exit tls_device_cleanup(void) unregister_netdevice_notifier(&tls_dev_notifier); destroy_workqueue(destruct_wq); clean_acked_data_flush(); + put_page(dummy_page); } diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 7fbb1d0b69b3..03d508a45aae 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -33,20 +33,10 @@ #include <crypto/aead.h> #include <crypto/scatterwalk.h> #include <net/ip6_checksum.h> +#include <linux/skbuff_ref.h> #include "tls.h" -static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk) -{ - struct scatterlist *src = walk->sg; - int diff = walk->offset - src->offset; - - sg_set_page(sg, sg_page(src), - src->length - diff, walk->offset); - - scatterwalk_crypto_chain(sg, sg_next(src), 2); -} - static int tls_enc_record(struct aead_request *aead_req, struct crypto_aead *aead, char *aad, char *iv, __be64 rcd_sn, @@ -54,53 +44,44 @@ static int tls_enc_record(struct aead_request *aead_req, struct scatter_walk *out, int *in_len, struct tls_prot_info *prot) { - unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE]; - const struct tls_cipher_size_desc *cipher_sz; + unsigned char buf[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; + const struct tls_cipher_desc *cipher_desc; struct scatterlist sg_in[3]; struct scatterlist sg_out[3]; unsigned int buf_size; u16 len; int rc; - switch (prot->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } - cipher_sz = &tls_cipher_size_desc[prot->cipher_type]; + cipher_desc = get_cipher_desc(prot->cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); - buf_size = TLS_HEADER_SIZE + cipher_sz->iv; + buf_size = TLS_HEADER_SIZE + cipher_desc->iv; len = min_t(int, *in_len, buf_size); - scatterwalk_copychunks(buf, in, len, 0); - scatterwalk_copychunks(buf, out, len, 1); + memcpy_from_scatterwalk(buf, in, len); + memcpy_to_scatterwalk(out, buf, len); *in_len -= len; if (!*in_len) return 0; - scatterwalk_pagedone(in, 0, 1); - scatterwalk_pagedone(out, 1, 1); - len = buf[4] | (buf[3] << 8); - len -= cipher_sz->iv; + len -= cipher_desc->iv; - tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot); + tls_make_aad(aad, len - cipher_desc->tag, (char *)&rcd_sn, buf[0], prot); - memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv); + memcpy(iv + cipher_desc->salt, buf + TLS_HEADER_SIZE, cipher_desc->iv); sg_init_table(sg_in, ARRAY_SIZE(sg_in)); sg_init_table(sg_out, ARRAY_SIZE(sg_out)); sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE); sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE); - chain_to_walk(sg_in + 1, in); - chain_to_walk(sg_out + 1, out); + scatterwalk_get_sglist(in, sg_in + 1); + scatterwalk_get_sglist(out, sg_out + 1); *in_len -= len; if (*in_len < 0) { - *in_len += cipher_sz->tag; + *in_len += cipher_desc->tag; /* the input buffer doesn't contain the entire record. * trim len accordingly. The resulting authentication tag * will contain garbage, but we don't care, so we won't @@ -115,13 +96,11 @@ static int tls_enc_record(struct aead_request *aead_req, } if (*in_len) { - scatterwalk_copychunks(NULL, in, len, 2); - scatterwalk_pagedone(in, 0, 1); - scatterwalk_copychunks(NULL, out, len, 2); - scatterwalk_pagedone(out, 1, 1); + scatterwalk_skip(in, len); + scatterwalk_skip(out, len); } - len -= cipher_sz->tag; + len -= cipher_desc->tag; aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv); rc = crypto_aead_encrypt(aead_req); @@ -167,9 +146,6 @@ static int tls_enc_records(struct aead_request *aead_req, } while (rc == 0 && len); - scatterwalk_done(&in, 0, 0); - scatterwalk_done(&out, 1, 0); - return rc; } @@ -271,7 +247,7 @@ static int fill_sg_in(struct scatterlist *sg_in, * There is a corner case where the packet contains * both an acked and a non-acked record. * We currently don't handle that case and rely - * on TCP to retranmit a packet that doesn't contain + * on TCP to retransmit a packet that doesn't contain * already acked payload. */ if (!is_start_marker) @@ -309,14 +285,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf, int sync_size, void *dummy_buf) { - const struct tls_cipher_size_desc *cipher_sz = - &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; + const struct tls_cipher_desc *cipher_desc = + get_cipher_desc(tls_ctx->crypto_send.info.cipher_type); sg_set_buf(&sg_out[0], dummy_buf, sync_size); sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len); /* Add room for authentication tag produced by crypto */ dummy_buf += sync_size; - sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag); + sg_set_buf(&sg_out[2], dummy_buf, cipher_desc->tag); } static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, @@ -328,7 +304,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); int tcp_payload_offset = skb_tcp_all_headers(skb); int payload_len = skb->len - tcp_payload_offset; - const struct tls_cipher_size_desc *cipher_sz; + const struct tls_cipher_desc *cipher_desc; void *buf, *iv, *aad, *dummy_buf, *salt; struct aead_request *aead_req; struct sk_buff *nskb = NULL; @@ -338,26 +314,19 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, if (!aead_req) return NULL; - switch (tls_ctx->crypto_send.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - salt = tls_ctx->crypto_send.aes_gcm_128.salt; - break; - case TLS_CIPHER_AES_GCM_256: - salt = tls_ctx->crypto_send.aes_gcm_256.salt; - break; - default: - goto free_req; - } - cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; - buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE + - sync_size + cipher_sz->tag; + cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); + + buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE + + sync_size + cipher_desc->tag; buf = kmalloc(buf_len, GFP_ATOMIC); if (!buf) goto free_req; iv = buf; - memcpy(iv, salt, cipher_sz->salt); - aad = buf + cipher_sz->salt + cipher_sz->iv; + salt = crypto_info_salt(&tls_ctx->crypto_send.info, cipher_desc); + memcpy(iv, salt, cipher_desc->salt); + aad = buf + cipher_desc->salt + cipher_desc->iv; dummy_buf = aad + TLS_AAD_SPACE_SIZE; nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC); @@ -471,12 +440,15 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info) { - const struct tls_cipher_size_desc *cipher_sz; - const u8 *key; + const struct tls_cipher_desc *cipher_desc; int rc; + cipher_desc = get_cipher_desc(crypto_info->cipher_type); + if (!cipher_desc || !cipher_desc->offloadable) + return -EINVAL; + offload_ctx->aead_send = - crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + crypto_alloc_aead(cipher_desc->cipher_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(offload_ctx->aead_send)) { rc = PTR_ERR(offload_ctx->aead_send); pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc); @@ -484,24 +456,13 @@ int tls_sw_fallback_init(struct sock *sk, goto err_out; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key; - break; - case TLS_CIPHER_AES_GCM_256: - key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key; - break; - default: - rc = -EINVAL; - goto free_aead; - } - cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; - - rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key); + rc = crypto_aead_setkey(offload_ctx->aead_send, + crypto_info_key(crypto_info, cipher_desc), + cipher_desc->key); if (rc) goto free_aead; - rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag); + rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_desc->tag); if (rc) goto free_aead; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 3735cb00905d..56ce0bc8317b 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -58,23 +58,67 @@ enum { TLS_NUM_PROTS, }; -#define CIPHER_SIZE_DESC(cipher) [cipher] = { \ +#define CHECK_CIPHER_DESC(cipher,ci) \ + static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \ + static_assert(cipher ## _SALT_SIZE <= TLS_MAX_SALT_SIZE); \ + static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \ + static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \ + static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \ + static_assert(sizeof_field(struct ci, key) == cipher ## _KEY_SIZE); \ + static_assert(sizeof_field(struct ci, salt) == cipher ## _SALT_SIZE); \ + static_assert(sizeof_field(struct ci, rec_seq) == cipher ## _REC_SEQ_SIZE); + +#define __CIPHER_DESC(ci) \ + .iv_offset = offsetof(struct ci, iv), \ + .key_offset = offsetof(struct ci, key), \ + .salt_offset = offsetof(struct ci, salt), \ + .rec_seq_offset = offsetof(struct ci, rec_seq), \ + .crypto_info = sizeof(struct ci) + +#define CIPHER_DESC(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \ + .nonce = cipher ## _IV_SIZE, \ .iv = cipher ## _IV_SIZE, \ .key = cipher ## _KEY_SIZE, \ .salt = cipher ## _SALT_SIZE, \ .tag = cipher ## _TAG_SIZE, \ .rec_seq = cipher ## _REC_SEQ_SIZE, \ + .cipher_name = algname, \ + .offloadable = _offloadable, \ + __CIPHER_DESC(ci), \ } -const struct tls_cipher_size_desc tls_cipher_size_desc[] = { - CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128), - CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256), - CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128), - CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305), - CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM), - CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM), +#define CIPHER_DESC_NONCE0(cipher,ci,algname,_offloadable) [cipher - TLS_CIPHER_MIN] = { \ + .nonce = 0, \ + .iv = cipher ## _IV_SIZE, \ + .key = cipher ## _KEY_SIZE, \ + .salt = cipher ## _SALT_SIZE, \ + .tag = cipher ## _TAG_SIZE, \ + .rec_seq = cipher ## _REC_SEQ_SIZE, \ + .cipher_name = algname, \ + .offloadable = _offloadable, \ + __CIPHER_DESC(ci), \ +} + +const struct tls_cipher_desc tls_cipher_desc[TLS_CIPHER_MAX + 1 - TLS_CIPHER_MIN] = { + CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128, "gcm(aes)", true), + CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256, "gcm(aes)", true), + CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128, "ccm(aes)", false), + CIPHER_DESC_NONCE0(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305, "rfc7539(chacha20,poly1305)", false), + CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm, "gcm(sm4)", false), + CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm, "ccm(sm4)", false), + CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128, "gcm(aria)", false), + CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256, "gcm(aria)", false), }; +CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_128, tls12_crypto_info_aes_gcm_128); +CHECK_CIPHER_DESC(TLS_CIPHER_AES_GCM_256, tls12_crypto_info_aes_gcm_256); +CHECK_CIPHER_DESC(TLS_CIPHER_AES_CCM_128, tls12_crypto_info_aes_ccm_128); +CHECK_CIPHER_DESC(TLS_CIPHER_CHACHA20_POLY1305, tls12_crypto_info_chacha20_poly1305); +CHECK_CIPHER_DESC(TLS_CIPHER_SM4_GCM, tls12_crypto_info_sm4_gcm); +CHECK_CIPHER_DESC(TLS_CIPHER_SM4_CCM, tls12_crypto_info_sm4_ccm); +CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_128, tls12_crypto_info_aria_gcm_128); +CHECK_CIPHER_DESC(TLS_CIPHER_ARIA_GCM_256, tls12_crypto_info_aria_gcm_256); + static const struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static const struct proto *saved_tcpv4_prot; @@ -96,8 +140,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) int wait_on_pending_writer(struct sock *sk, long *timeo) { - int rc = 0; DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret, rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { @@ -111,8 +155,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) break; } - if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait)) + ret = sk_wait_event(sk, timeo, + !READ_ONCE(sk->sk_write_pending), &wait); + if (ret) { + if (ret < 0) + rc = ret; break; + } } remove_wait_queue(sk_sleep(sk), &wait); return rc; @@ -124,7 +173,10 @@ int tls_push_sg(struct sock *sk, u16 first_offset, int flags) { - int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST; + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = MSG_SPLICE_PAGES | flags, + }; int ret = 0; struct page *p; size_t size; @@ -133,16 +185,16 @@ int tls_push_sg(struct sock *sk, size = sg->length - offset; offset += sg->offset; - ctx->in_tcp_sendpages = true; + ctx->splicing_pages = true; while (1) { - if (sg_is_last(sg)) - sendpage_flags = flags; - /* is sending application-limited? */ tcp_rate_check_app_limited(sk); p = sg_page(sg); retry: - ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags); + bvec_set_page(&bvec, p, size, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + + ret = tcp_sendmsg_locked(sk, &msg, size); if (ret != size) { if (ret > 0) { @@ -154,7 +206,7 @@ retry: offset -= sg->offset; ctx->partially_sent_offset = offset; ctx->partially_sent_record = (void *)sg; - ctx->in_tcp_sendpages = false; + ctx->splicing_pages = false; return ret; } @@ -168,7 +220,7 @@ retry: size = sg->length; } - ctx->in_tcp_sendpages = false; + ctx->splicing_pages = false; return 0; } @@ -203,12 +255,9 @@ int tls_process_cmsg(struct sock *sk, struct msghdr *msg, if (msg->msg_flags & MSG_MORE) return -EINVAL; - rc = tls_handle_open_record(sk, msg->msg_flags); - if (rc) - return rc; - *record_type = *(unsigned char *)CMSG_DATA(cmsg); - rc = 0; + + rc = tls_handle_open_record(sk, msg->msg_flags); break; default: return -EINVAL; @@ -246,11 +295,11 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - /* If in_tcp_sendpages call lower protocol write space handler + /* If splicing_pages call lower protocol write space handler * to ensure we wake up any waiting operations there. For example - * if do_tcp_sendpages where to call sk_wait_event. + * if splicing pages where to call sk_wait_event. */ - if (ctx->in_tcp_sendpages) { + if (ctx->splicing_pages) { ctx->sk_write_space(sk); return; } @@ -297,8 +346,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, /* We need these for tls_sw_fallback handling of other packets */ if (ctx->tx_conf == TLS_SW) { - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { @@ -351,10 +398,45 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_ctx_free(sk, ctx); } +static __poll_t tls_sk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) +{ + struct tls_sw_context_rx *ctx; + struct tls_context *tls_ctx; + struct sock *sk = sock->sk; + struct sk_psock *psock; + __poll_t mask = 0; + u8 shutdown; + int state; + + mask = tcp_poll(file, sock, wait); + + state = inet_sk_state_load(sk); + shutdown = READ_ONCE(sk->sk_shutdown); + if (unlikely(state != TCP_ESTABLISHED || shutdown & RCV_SHUTDOWN)) + return mask; + + tls_ctx = tls_get_ctx(sk); + ctx = tls_sw_ctx_rx(tls_ctx); + psock = sk_psock_get(sk); + + if ((skb_queue_empty_lockless(&ctx->rx_list) && + !tls_strp_msg_ready(ctx) && + sk_psock_queue_empty(psock)) || + READ_ONCE(ctx->key_update_pending)) + mask &= ~(EPOLLIN | EPOLLRDNORM); + + if (psock) + sk_psock_put(sk, psock); + + return mask; +} + static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, int __user *optlen, int tx) { int rc = 0; + const struct tls_cipher_desc *cipher_desc; struct tls_context *ctx = tls_get_ctx(sk); struct tls_crypto_info *crypto_info; struct cipher_context *cctx; @@ -393,188 +475,19 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, goto out; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 * - crypto_info_aes_gcm_128 = - container_of(crypto_info, - struct tls12_crypto_info_aes_gcm_128, - info); - - if (len != sizeof(*crypto_info_aes_gcm_128)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(crypto_info_aes_gcm_128->iv, - cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - TLS_CIPHER_AES_GCM_128_IV_SIZE); - memcpy(crypto_info_aes_gcm_128->rec_seq, cctx->rec_seq, - TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, - crypto_info_aes_gcm_128, - sizeof(*crypto_info_aes_gcm_128))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_AES_GCM_256: { - struct tls12_crypto_info_aes_gcm_256 * - crypto_info_aes_gcm_256 = - container_of(crypto_info, - struct tls12_crypto_info_aes_gcm_256, - info); - - if (len != sizeof(*crypto_info_aes_gcm_256)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(crypto_info_aes_gcm_256->iv, - cctx->iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, - TLS_CIPHER_AES_GCM_256_IV_SIZE); - memcpy(crypto_info_aes_gcm_256->rec_seq, cctx->rec_seq, - TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, - crypto_info_aes_gcm_256, - sizeof(*crypto_info_aes_gcm_256))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_AES_CCM_128: { - struct tls12_crypto_info_aes_ccm_128 *aes_ccm_128 = - container_of(crypto_info, - struct tls12_crypto_info_aes_ccm_128, info); - - if (len != sizeof(*aes_ccm_128)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(aes_ccm_128->iv, - cctx->iv + TLS_CIPHER_AES_CCM_128_SALT_SIZE, - TLS_CIPHER_AES_CCM_128_IV_SIZE); - memcpy(aes_ccm_128->rec_seq, cctx->rec_seq, - TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, aes_ccm_128, sizeof(*aes_ccm_128))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_CHACHA20_POLY1305: { - struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305 = - container_of(crypto_info, - struct tls12_crypto_info_chacha20_poly1305, - info); - - if (len != sizeof(*chacha20_poly1305)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(chacha20_poly1305->iv, - cctx->iv + TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE, - TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE); - memcpy(chacha20_poly1305->rec_seq, cctx->rec_seq, - TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, chacha20_poly1305, - sizeof(*chacha20_poly1305))) - rc = -EFAULT; - break; + cipher_desc = get_cipher_desc(crypto_info->cipher_type); + if (!cipher_desc || len != cipher_desc->crypto_info) { + rc = -EINVAL; + goto out; } - case TLS_CIPHER_SM4_GCM: { - struct tls12_crypto_info_sm4_gcm *sm4_gcm_info = - container_of(crypto_info, - struct tls12_crypto_info_sm4_gcm, info); - if (len != sizeof(*sm4_gcm_info)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(sm4_gcm_info->iv, - cctx->iv + TLS_CIPHER_SM4_GCM_SALT_SIZE, - TLS_CIPHER_SM4_GCM_IV_SIZE); - memcpy(sm4_gcm_info->rec_seq, cctx->rec_seq, - TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, sm4_gcm_info, sizeof(*sm4_gcm_info))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_SM4_CCM: { - struct tls12_crypto_info_sm4_ccm *sm4_ccm_info = - container_of(crypto_info, - struct tls12_crypto_info_sm4_ccm, info); + memcpy(crypto_info_iv(crypto_info, cipher_desc), + cctx->iv + cipher_desc->salt, cipher_desc->iv); + memcpy(crypto_info_rec_seq(crypto_info, cipher_desc), + cctx->rec_seq, cipher_desc->rec_seq); - if (len != sizeof(*sm4_ccm_info)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(sm4_ccm_info->iv, - cctx->iv + TLS_CIPHER_SM4_CCM_SALT_SIZE, - TLS_CIPHER_SM4_CCM_IV_SIZE); - memcpy(sm4_ccm_info->rec_seq, cctx->rec_seq, - TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, sm4_ccm_info, sizeof(*sm4_ccm_info))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_ARIA_GCM_128: { - struct tls12_crypto_info_aria_gcm_128 * - crypto_info_aria_gcm_128 = - container_of(crypto_info, - struct tls12_crypto_info_aria_gcm_128, - info); - - if (len != sizeof(*crypto_info_aria_gcm_128)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(crypto_info_aria_gcm_128->iv, - cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE, - TLS_CIPHER_ARIA_GCM_128_IV_SIZE); - memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq, - TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, - crypto_info_aria_gcm_128, - sizeof(*crypto_info_aria_gcm_128))) - rc = -EFAULT; - break; - } - case TLS_CIPHER_ARIA_GCM_256: { - struct tls12_crypto_info_aria_gcm_256 * - crypto_info_aria_gcm_256 = - container_of(crypto_info, - struct tls12_crypto_info_aria_gcm_256, - info); - - if (len != sizeof(*crypto_info_aria_gcm_256)) { - rc = -EINVAL; - goto out; - } - lock_sock(sk); - memcpy(crypto_info_aria_gcm_256->iv, - cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE, - TLS_CIPHER_ARIA_GCM_256_IV_SIZE); - memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq, - TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE); - release_sock(sk); - if (copy_to_user(optval, - crypto_info_aria_gcm_256, - sizeof(*crypto_info_aria_gcm_256))) - rc = -EFAULT; - break; - } - default: - rc = -EINVAL; - } + if (copy_to_user(optval, crypto_info, cipher_desc->crypto_info)) + rc = -EFAULT; out: return rc; @@ -614,11 +527,9 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, if (len < sizeof(value)) return -EINVAL; - lock_sock(sk); value = -EINVAL; if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW) value = ctx->rx_no_pad; - release_sock(sk); if (value < 0) return value; @@ -630,11 +541,35 @@ static int do_tls_getsockopt_no_pad(struct sock *sk, char __user *optval, return 0; } +static int do_tls_getsockopt_tx_payload_len(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + u16 payload_len = ctx->tx_max_payload_len; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len < sizeof(payload_len)) + return -EINVAL; + + if (put_user(sizeof(payload_len), optlen)) + return -EFAULT; + + if (copy_to_user(optval, &payload_len, sizeof(payload_len))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int rc = 0; + lock_sock(sk); + switch (optname) { case TLS_TX: case TLS_RX: @@ -647,10 +582,16 @@ static int do_tls_getsockopt(struct sock *sk, int optname, case TLS_RX_EXPECT_NO_PAD: rc = do_tls_getsockopt_no_pad(sk, optval, optlen); break; + case TLS_TX_MAX_PAYLOAD_LEN: + rc = do_tls_getsockopt_tx_payload_len(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; } + + release_sock(sk); + return rc; } @@ -666,13 +607,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } +static int validate_crypto_info(const struct tls_crypto_info *crypto_info, + const struct tls_crypto_info *alt_crypto_info) +{ + if (crypto_info->version != TLS_1_2_VERSION && + crypto_info->version != TLS_1_3_VERSION) + return -EINVAL; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_ARIA_GCM_128: + case TLS_CIPHER_ARIA_GCM_256: + if (crypto_info->version != TLS_1_2_VERSION) + return -EINVAL; + break; + } + + /* Ensure that TLS version and ciphers are same in both directions */ + if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { + if (alt_crypto_info->version != crypto_info->version || + alt_crypto_info->cipher_type != crypto_info->cipher_type) + return -EINVAL; + } + + return 0; +} + static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { - struct tls_crypto_info *crypto_info; - struct tls_crypto_info *alt_crypto_info; + struct tls_crypto_info *crypto_info, *alt_crypto_info; + struct tls_crypto_info *old_crypto_info = NULL; struct tls_context *ctx = tls_get_ctx(sk); - size_t optsize; + const struct tls_cipher_desc *cipher_desc; + union tls_crypto_context *crypto_ctx; + union tls_crypto_context tmp = {}; + bool update = false; int rc = 0; int conf; @@ -680,16 +649,30 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return -EINVAL; if (tx) { - crypto_info = &ctx->crypto_send.info; + crypto_ctx = &ctx->crypto_send; alt_crypto_info = &ctx->crypto_recv.info; } else { - crypto_info = &ctx->crypto_recv.info; + crypto_ctx = &ctx->crypto_recv; alt_crypto_info = &ctx->crypto_send.info; } - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) - return -EBUSY; + crypto_info = &crypto_ctx->info; + + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + /* Currently we only support setting crypto info more + * than one time for TLS 1.3 + */ + if (crypto_info->version != TLS_1_3_VERSION) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + return -EBUSY; + } + + update = true; + old_crypto_info = crypto_info; + crypto_info = &tmp.info; + crypto_ctx = &tmp; + } rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info)); if (rc) { @@ -697,62 +680,24 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - /* check version */ - if (crypto_info->version != TLS_1_2_VERSION && - crypto_info->version != TLS_1_3_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - - /* Ensure that TLS version and ciphers are same in both directions */ - if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { - if (alt_crypto_info->version != crypto_info->version || - alt_crypto_info->cipher_type != crypto_info->cipher_type) { + if (update) { + /* Ensure that TLS version and ciphers are not modified */ + if (crypto_info->version != old_crypto_info->version || + crypto_info->cipher_type != old_crypto_info->cipher_type) rc = -EINVAL; - goto err_crypto_info; - } + } else { + rc = validate_crypto_info(crypto_info, alt_crypto_info); } + if (rc) + goto err_crypto_info; - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - optsize = sizeof(struct tls12_crypto_info_aes_gcm_128); - break; - case TLS_CIPHER_AES_GCM_256: { - optsize = sizeof(struct tls12_crypto_info_aes_gcm_256); - break; - } - case TLS_CIPHER_AES_CCM_128: - optsize = sizeof(struct tls12_crypto_info_aes_ccm_128); - break; - case TLS_CIPHER_CHACHA20_POLY1305: - optsize = sizeof(struct tls12_crypto_info_chacha20_poly1305); - break; - case TLS_CIPHER_SM4_GCM: - optsize = sizeof(struct tls12_crypto_info_sm4_gcm); - break; - case TLS_CIPHER_SM4_CCM: - optsize = sizeof(struct tls12_crypto_info_sm4_ccm); - break; - case TLS_CIPHER_ARIA_GCM_128: - if (crypto_info->version != TLS_1_2_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - optsize = sizeof(struct tls12_crypto_info_aria_gcm_128); - break; - case TLS_CIPHER_ARIA_GCM_256: - if (crypto_info->version != TLS_1_2_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - optsize = sizeof(struct tls12_crypto_info_aria_gcm_256); - break; - default: + cipher_desc = get_cipher_desc(crypto_info->cipher_type); + if (!cipher_desc) { rc = -EINVAL; goto err_crypto_info; } - if (optlen != optsize) { + if (optlen != cipher_desc->crypto_info) { rc = -EINVAL; goto err_crypto_info; } @@ -766,17 +711,23 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, } if (tx) { - rc = tls_set_device_offload(sk, ctx); + rc = tls_set_device_offload(sk); conf = TLS_HW; if (!rc) { TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 1); + rc = tls_set_sw_offload(sk, 1, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + } conf = TLS_SW; } } else { @@ -786,14 +737,21 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } conf = TLS_SW; } - tls_sw_strparser_arm(sk, ctx); + if (!update) + tls_sw_strparser_arm(sk, ctx); } if (tx) @@ -801,6 +759,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, else ctx->rx_conf = conf; update_sk_prot(sk, ctx); + + if (update) + return 0; + if (tx) { ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; @@ -812,7 +774,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: - memzero_explicit(crypto_info, sizeof(union tls_crypto_context)); + if (update) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + } + memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } @@ -868,6 +834,32 @@ static int do_tls_setsockopt_no_pad(struct sock *sk, sockptr_t optval, return rc; } +static int do_tls_setsockopt_tx_payload_len(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_sw_context_tx *sw_ctx = tls_sw_ctx_tx(ctx); + u16 value; + bool tls_13 = ctx->prot_info.version == TLS_1_3_VERSION; + + if (sw_ctx && sw_ctx->open_rec) + return -EBUSY; + + if (sockptr_is_null(optval) || optlen != sizeof(value)) + return -EINVAL; + + if (copy_from_sockptr(&value, optval, sizeof(value))) + return -EFAULT; + + if (value < TLS_MIN_RECORD_SIZE_LIM - (tls_13 ? 1 : 0) || + value > TLS_MAX_PAYLOAD_SIZE) + return -EINVAL; + + ctx->tx_max_payload_len = value; + + return 0; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -889,6 +881,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, case TLS_RX_EXPECT_NO_PAD: rc = do_tls_setsockopt_no_pad(sk, optval, optlen); break; + case TLS_TX_MAX_PAYLOAD_LEN: + lock_sock(sk); + rc = do_tls_setsockopt_tx_payload_len(sk, optval, optlen); + release_sock(sk); + break; default: rc = -ENOPROTOOPT; break; @@ -908,6 +905,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static int tls_disconnect(struct sock *sk, int flags) +{ + return -EOPNOTSUPP; +} + struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -918,9 +920,17 @@ struct tls_context *tls_ctx_create(struct sock *sk) return NULL; mutex_init(&ctx->tx_lock); - rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); ctx->sk = sk; + /* Release semantic of rcu_assign_pointer() ensures that + * ctx->sk_proto is visible before changing sk->sk_prot in + * update_sk_prot(), and prevents reading uninitialized value in + * tls_{getsockopt, setsockopt}. Note that we do not need a + * read barrier in tls_{getsockopt,setsockopt} as there is an + * address dependency between sk->sk_proto->{getsockopt,setsockopt} + * and ctx->sk_proto. + */ + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); return ctx; } @@ -930,27 +940,28 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_BASE] = *base; ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; - ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; + ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; + ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll; + ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock; ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; + ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll; + ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock; #ifdef CONFIG_TLS_DEVICE ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; - ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL; ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ]; - ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL; ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ]; ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ]; ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ]; - ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL; #endif #ifdef CONFIG_TLS_TOE ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base; @@ -994,11 +1005,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE] = *base; prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; - prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; + prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; @@ -1013,11 +1025,11 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], #ifdef CONFIG_TLS_DEVICE prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; - prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; + prot[TLS_HW][TLS_BASE].splice_eof = tls_device_splice_eof; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; - prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; + prot[TLS_HW][TLS_SW].splice_eof = tls_device_splice_eof; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; @@ -1063,6 +1075,7 @@ static int tls_init(struct sock *sk) ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; + ctx->tx_max_payload_len = TLS_MAX_PAYLOAD_SIZE; update_sk_prot(sk, ctx); out: write_unlock_bh(&sk->sk_callback_lock); @@ -1104,7 +1117,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb, bool net_admin) { u16 version, cipher_type; struct tls_context *ctx; @@ -1152,6 +1165,12 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb) goto nla_failure; } + err = nla_put_u16(skb, TLS_INFO_TX_MAX_PAYLOAD_LEN, + ctx->tx_max_payload_len); + + if (err) + goto nla_failure; + rcu_read_unlock(); nla_nest_end(skb, start); return 0; @@ -1162,7 +1181,7 @@ nla_failure: return err; } -static size_t tls_get_info_size(const struct sock *sk) +static size_t tls_get_info_size(const struct sock *sk, bool net_admin) { size_t size = 0; @@ -1173,6 +1192,7 @@ static size_t tls_get_info_size(const struct sock *sk) nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */ nla_total_size(0) + /* TLS_INFO_RX_NO_PAD */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_TX_MAX_PAYLOAD_LEN */ 0; return size; diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index 68982728f620..4012c4372d4c 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -22,17 +22,24 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY), SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL), - SNMP_MIB_SENTINEL + SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK), + SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR), + SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK), + SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR), + SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED), }; static int tls_statistics_seq_show(struct seq_file *seq, void *v) { - unsigned long buf[LINUX_MIB_TLSMAX] = {}; + unsigned long buf[ARRAY_SIZE(tls_mib_list)]; + const int cnt = ARRAY_SIZE(tls_mib_list); struct net *net = seq->private; int i; - snmp_get_cpu_field_batch(buf, tls_mib_list, net->mib.tls_statistics); - for (i = 0; tls_mib_list[i].name; i++) + memset(buf, 0, sizeof(buf)); + snmp_get_cpu_field_batch_cnt(buf, tls_mib_list, cnt, + net->mib.tls_statistics); + for (i = 0; i < cnt; i++) seq_printf(seq, "%-32s\t%lu\n", tls_mib_list[i].name, buf[i]); return 0; diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 955ac3e0bf4d..98e12f0ff57e 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2016 Tom Herbert <tom@herbertland.com> */ #include <linux/skbuff.h> +#include <linux/skbuff_ref.h> #include <linux/workqueue.h> #include <net/strparser.h> #include <net/tcp.h> @@ -12,7 +13,7 @@ static struct workqueue_struct *tls_strp_wq; -static void tls_strp_abort_strp(struct tls_strparser *strp, int err) +void tls_strp_abort_strp(struct tls_strparser *strp, int err) { if (strp->stopped) return; @@ -20,7 +21,9 @@ static void tls_strp_abort_strp(struct tls_strparser *strp, int err) strp->stopped = 1; /* Report an error on the lower socket */ - strp->sk->sk_err = -err; + WRITE_ONCE(strp->sk->sk_err, -err); + /* Paired with smp_rmb() in tcp_poll() */ + smp_wmb(); sk_error_report(strp->sk); } @@ -29,34 +32,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp) struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); - shinfo->frag_list = NULL; + if (!strp->copy_mode) + shinfo->frag_list = NULL; consume_skb(strp->anchor); strp->anchor = NULL; } -/* Create a new skb with the contents of input copied to its page frags */ -static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) +static struct sk_buff * +tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb, + int offset, int len) { - struct strp_msg *rxm; struct sk_buff *skb; - int i, err, offset; + int i, err; - skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER, + skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER, &err, strp->sk->sk_allocation); if (!skb) return NULL; - offset = strp->stm.offset; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset, + WARN_ON_ONCE(skb_copy_bits(in_skb, offset, skb_frag_address(frag), skb_frag_size(frag))); offset += skb_frag_size(frag); } - skb_copy_header(skb, strp->anchor); + skb->len = len; + skb->data_len = len; + skb_copy_header(skb, in_skb); + return skb; +} + +/* Create a new skb with the contents of input copied to its page frags */ +static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) +{ + struct strp_msg *rxm; + struct sk_buff *skb; + + skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset, + strp->stm.full_len); + if (!skb) + return NULL; + rxm = strp_msg(skb); rxm->offset = 0; return skb; @@ -180,23 +199,29 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp) for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i], false); shinfo->nr_frags = 0; + if (strp->copy_mode) { + kfree_skb_list(shinfo->frag_list); + shinfo->frag_list = NULL; + } strp->copy_mode = 0; + strp->mixed_decrypted = 0; } -static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, - unsigned int offset, size_t in_len) +static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb, + struct sk_buff *in_skb, unsigned int offset, + size_t in_len) { - struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data; - struct sk_buff *skb; - skb_frag_t *frag; + unsigned int nfrag = skb->len / PAGE_SIZE; size_t len, chunk; + skb_frag_t *frag; int sz; - if (strp->msg_ready) - return 0; + if (unlikely(nfrag >= skb_shinfo(skb)->nr_frags)) { + DEBUG_NET_WARN_ON_ONCE(1); + return -EMSGSIZE; + } - skb = strp->anchor; - frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE]; + frag = &skb_shinfo(skb)->frags[nfrag]; len = in_len; /* First make sure we got the header */ @@ -208,19 +233,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, skb_frag_size(frag), chunk)); - sz = tls_rx_msg_size(strp, strp->anchor); - if (sz < 0) { - desc->error = sz; - return 0; - } - - /* We may have over-read, sz == 0 is guaranteed under-read */ - if (sz > 0) - chunk = min_t(size_t, chunk, sz - skb->len); - skb->len += chunk; skb->data_len += chunk; skb_frag_size_add(frag, chunk); + + sz = tls_rx_msg_size(strp, skb); + if (sz < 0) + return sz; + + /* We may have over-read, sz == 0 is guaranteed under-read */ + if (unlikely(sz && sz < skb->len)) { + int over = skb->len - sz; + + WARN_ON_ONCE(over > chunk); + skb->len -= over; + skb->data_len -= over; + skb_frag_size_add(frag, -over); + + chunk -= over; + } + frag++; len -= chunk; offset += chunk; @@ -247,20 +279,103 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, offset += chunk; } - if (strp->stm.full_len == skb->len) { +read_done: + return in_len - len; +} + +static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb, + struct sk_buff *in_skb, unsigned int offset, + size_t in_len) +{ + struct sk_buff *nskb, *first, *last; + struct skb_shared_info *shinfo; + size_t chunk; + int sz; + + if (strp->stm.full_len) + chunk = strp->stm.full_len - skb->len; + else + chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; + chunk = min(chunk, in_len); + + nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk); + if (!nskb) + return -ENOMEM; + + shinfo = skb_shinfo(skb); + if (!shinfo->frag_list) { + shinfo->frag_list = nskb; + nskb->prev = nskb; + } else { + first = shinfo->frag_list; + last = first->prev; + last->next = nskb; + first->prev = nskb; + } + + skb->len += chunk; + skb->data_len += chunk; + + if (!strp->stm.full_len) { + sz = tls_rx_msg_size(strp, skb); + if (sz < 0) + return sz; + + /* We may have over-read, sz == 0 is guaranteed under-read */ + if (unlikely(sz && sz < skb->len)) { + int over = skb->len - sz; + + WARN_ON_ONCE(over > chunk); + skb->len -= over; + skb->data_len -= over; + __pskb_trim(nskb, nskb->len - over); + + chunk -= over; + } + + strp->stm.full_len = sz; + } + + return chunk; +} + +static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, + unsigned int offset, size_t in_len) +{ + struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data; + struct sk_buff *skb; + int ret; + + if (strp->msg_ready) + return 0; + + skb = strp->anchor; + if (!skb->len) + skb_copy_decrypted(skb, in_skb); + else + strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb); + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted) + ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len); + else + ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len); + if (ret < 0) { + desc->error = ret; + ret = 0; + } + + if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } -read_done: - return in_len - len; + return ret; } static int tls_strp_read_copyin(struct tls_strparser *strp) { - struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; desc.arg.data = strp; @@ -268,7 +383,7 @@ static int tls_strp_read_copyin(struct tls_strparser *strp) desc.count = 1; /* give more than one skb per call */ /* sk should be locked here, so okay to do read_sock */ - sock->ops->read_sock(strp->sk, &desc, tls_strp_copyin); + tcp_read_sock(strp->sk, &desc, tls_strp_copyin); return desc.error; } @@ -287,7 +402,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) return 0; shinfo = skb_shinfo(strp->anchor); - shinfo->frag_list = NULL; /* If we don't know the length go max plus page for cipher overhead */ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; @@ -303,6 +417,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) page, 0, 0); } + shinfo->frag_list = NULL; + strp->copy_mode = 1; strp->stm.offset = 0; @@ -315,15 +431,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) return 0; } -static bool tls_strp_check_no_dup(struct tls_strparser *strp) +static bool tls_strp_check_queue_ok(struct tls_strparser *strp) { unsigned int len = strp->stm.offset + strp->stm.full_len; - struct sk_buff *skb; + struct sk_buff *first, *skb; u32 seq; - skb = skb_shinfo(strp->anchor)->frag_list; - seq = TCP_SKB_CB(skb)->seq; + first = skb_shinfo(strp->anchor)->frag_list; + skb = first; + seq = TCP_SKB_CB(first)->seq; + /* Make sure there's no duplicate data in the queue, + * and the decrypted status matches. + */ while (skb->len < len) { seq += skb->len; len -= skb->len; @@ -331,6 +451,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp) if (TCP_SKB_CB(skb)->seq != seq) return false; + if (skb_cmp_decrypted(first, skb)) + return false; } return true; @@ -359,7 +481,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; } -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -368,8 +490,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + } tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -379,6 +504,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; } /* Called with lock held on lower socket */ @@ -396,14 +523,11 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (inq < strp->stm.full_len) return tls_strp_read_copy(strp, true); + tls_strp_load_anchor_with_queue(strp, inq); if (!strp->stm.full_len) { - tls_strp_load_anchor_with_queue(strp, inq); - sz = tls_rx_msg_size(strp, strp->anchor); - if (sz < 0) { - tls_strp_abort_strp(strp, sz); + if (sz < 0) return sz; - } strp->stm.full_len = sz; @@ -411,10 +535,10 @@ static int tls_strp_read_sock(struct tls_strparser *strp) return tls_strp_read_copy(strp, true); } - if (!tls_strp_check_no_dup(strp)) + if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; @@ -466,7 +590,7 @@ void tls_strp_msg_done(struct tls_strparser *strp) else tls_strp_flush_anchor_copy(strp); - strp->msg_ready = 0; + WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9ed978634125..9937d4c810f2 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -38,11 +38,13 @@ #include <linux/bug.h> #include <linux/sched/signal.h> #include <linux/module.h> +#include <linux/kernel.h> #include <linux/splice.h> #include <crypto/aead.h> #include <net/strparser.h> #include <net/tls.h> +#include <trace/events/sock.h> #include "tls.h" @@ -50,6 +52,7 @@ struct tls_decrypt_arg { struct_group(inargs, bool zc; bool async; + bool async_done; u8 tail; ); @@ -57,9 +60,11 @@ struct tls_decrypt_arg { }; struct tls_decrypt_ctx { - u8 iv[MAX_IV_SIZE]; + struct sock *sk; + u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -67,7 +72,9 @@ noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); /* sk->sk_err should contain a positive error code. */ - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); + /* Paired with smp_rmb() in tcp_poll() */ + smp_wmb(); sk_error_report(sk); } @@ -177,18 +184,35 @@ static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb, return sub; } -static void tls_decrypt_done(struct crypto_async_request *req, int err) +static void tls_decrypt_done(void *data, int err) { - struct aead_request *aead_req = (struct aead_request *)req; + struct aead_request *aead_req = data; + struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; + struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; struct scatterlist *sg; unsigned int pages; struct sock *sk; + int aead_size; + + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; - sk = (struct sock *)req->data; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); + aead_size = ALIGN(aead_size, __alignof__(*dctx)); + dctx = (void *)((u8 *)aead_req + aead_size); + + sk = dctx->sk; tls_ctx = tls_get_ctx(sk); ctx = tls_sw_ctx_rx(tls_ctx); @@ -201,7 +225,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -212,10 +236,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -240,21 +271,34 @@ static int tls_do_decryption(struct sock *sk, if (darg->async) { aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - tls_decrypt_done, sk); + tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { + DECLARE_CRYPTO_WAIT(wait); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &ctx->async_wait); + crypto_req_done, &wait); + ret = crypto_aead_decrypt(aead_req); + if (ret == -EINPROGRESS || ret == -EBUSY) + ret = crypto_wait_req(ret, &wait); + return ret; } ret = crypto_aead_decrypt(aead_req); - if (ret == -EINPROGRESS) { - if (darg->async) - return 0; + if (ret == -EINPROGRESS) + return 0; - ret = crypto_wait_req(ret, &ctx->async_wait); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + darg->async_done = true; + /* all completions have run, we're not doing async anymore */ + darg->async = false; + return ret; } + + atomic_dec(&ctx->decrypt_pending); darg->async = false; return ret; @@ -336,6 +380,8 @@ static struct tls_rec *tls_get_rec(struct sock *sk) sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); sg_unmark_end(&rec->sg_aead_out[1]); + rec->sk = sk; + return rec; } @@ -412,27 +458,31 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, -EBADMSG); + tls_err_abort(sk, rc); return rc; } -static void tls_encrypt_done(struct crypto_async_request *req, int err) +static void tls_encrypt_done(void *data, int err) { - struct aead_request *aead_req = (struct aead_request *)req; - struct sock *sk = req->data; - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); + struct tls_sw_context_tx *ctx; + struct tls_context *tls_ctx; + struct tls_prot_info *prot; + struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - struct tls_rec *rec; - bool ready = false; - int pending; + struct sock *sk; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; - rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; + sk = rec->sk; + tls_ctx = tls_get_ctx(sk); + prot = &tls_ctx->prot_info; + ctx = tls_sw_ctx_tx(tls_ctx); + sge = sk_msg_elem(msg_en, msg_en->sg.curr); sge->offset -= prot->prepend_size; sge->length += prot->prepend_size; @@ -459,23 +509,25 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); +} - if (!ready) - return; +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); + return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, @@ -520,13 +572,18 @@ static int tls_do_encryption(struct sock *sk, data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, - tls_encrypt_done, sk); + tls_encrypt_done, rec); /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -800,7 +857,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err && sk->sk_err == EBADMSG) { + if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; @@ -815,6 +872,19 @@ more_data: delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); delta -= msg->sg.size; + + if ((s32)delta > 0) { + /* It indicates that we executed bpf_msg_pop_data(), + * causing the plaintext data size to decrease. + * Therefore the encrypted data size also needs to + * correspondingly decrease. We only need to subtract + * delta to calculate the new ciphertext length since + * ktls does not support block encryption. + */ + struct sk_msg *enc = &ctx->open_rec->msg_encrypted; + + sk_msg_trim(sk, enc, enc->sg.size - delta); + } } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { @@ -829,7 +899,7 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err && sk->sk_err == EBADMSG) { + if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; @@ -851,6 +921,13 @@ more_data: &msg_redir, send, flags); lock_sock(sk); if (err < 0) { + /* Regardless of whether the data represented by + * msg_redir is sent successfully, we have already + * uncharged it via sk_msg_return_zero(). The + * msg->sg.size represents the remaining unprocessed + * data, which needs to be uncharged here. + */ + sk_mem_uncharge(sk, msg->sg.size); *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } @@ -914,7 +991,39 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags) &copied, flags); } -int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, + struct sk_msg *msg_pl, size_t try_to_copy, + ssize_t *copied) +{ + struct page *page = NULL, **pages = &page; + + do { + ssize_t part; + size_t off; + + part = iov_iter_extract_pages(&msg->msg_iter, &pages, + try_to_copy, 1, 0, &off); + if (part <= 0) + return part ?: -EIO; + + if (WARN_ON_ONCE(!sendpage_ok(page))) { + iov_iter_revert(&msg->msg_iter, part); + return -EIO; + } + + sk_msg_page_add(msg_pl, page, part, off); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; + sk_mem_charge(sk, part); + *copied += part; + try_to_copy -= part; + } while (try_to_copy && !sk_msg_full(msg_pl)); + + return 0; +} + +static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, + size_t size) { long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -935,14 +1044,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_CMSG_COMPAT)) - return -EOPNOTSUPP; - - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); + if (!eor && (msg->msg_flags & MSG_EOR)) + return -EINVAL; if (unlikely(msg->msg_controllen)) { ret = tls_process_cmsg(sk, msg, &record_type); @@ -950,7 +1054,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) if (ret == -EINPROGRESS) num_async++; else if (ret != -EAGAIN) - goto send_end; + goto end; } } @@ -975,7 +1079,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) orig_size = msg_pl->sg.size; full_record = false; try_to_copy = msg_data_left(msg); - record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; + record_room = tls_ctx->tx_max_payload_len - msg_pl->sg.size; if (try_to_copy >= record_room) { try_to_copy = record_room; full_record = true; @@ -1001,6 +1105,24 @@ alloc_encrypted: full_record = true; } + if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) { + ret = tls_sw_sendmsg_splice(sk, msg, msg_pl, + try_to_copy, &copied); + if (ret < 0) + goto send_end; + tls_ctx->pending_open_record_frags = true; + + if (sk_msg_full(msg_pl)) { + full_record = true; + sk_msg_trim(sk, msg_en, + msg_pl->sg.size + prot->overhead_size); + } + + if (full_record || eor) + goto copied; + continue; + } + if (!is_kvec && (full_record || eor) && !async_capable) { u32 first = msg_pl->sg.end; @@ -1021,11 +1143,22 @@ alloc_encrypted: num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ctx->open_rec && ret == -ENOSPC) + else if (ctx->open_rec && ret == -ENOSPC) { + if (msg_pl->cork_bytes) { + ret = 0; + goto send_end; + } goto rollback_iter; - else if (ret != -EAGAIN) + } else if (ret != -EAGAIN) goto send_end; } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } + continue; rollback_iter: copied -= try_to_copy; @@ -1065,6 +1198,7 @@ fallback_to_reg_send: */ tls_ctx->pending_open_record_frags = true; copied += try_to_copy; +copied: if (full_record || eor) { ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, record_type, &copied, @@ -1080,6 +1214,12 @@ fallback_to_reg_send: goto send_end; } } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } } continue; @@ -1099,27 +1239,16 @@ trim_sgl: goto alloc_encrypted; } +send_end: if (!num_async) { - goto send_end; - } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + goto end; + } else if (num_zc || eor) { + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1130,157 +1259,90 @@ trim_sgl: tls_tx_records(sk, msg->msg_flags); } -send_end: +end: ret = sk_stream_error(sk, msg->msg_flags, ret); + return copied > 0 ? copied : ret; +} + +int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + int ret; + + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT | MSG_SPLICE_PAGES | MSG_EOR | + MSG_SENDPAGE_NOPOLICY)) + return -EOPNOTSUPP; + ret = mutex_lock_interruptible(&tls_ctx->tx_lock); + if (ret) + return ret; + lock_sock(sk); + ret = tls_sw_sendmsg_locked(sk, msg, size); release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return copied > 0 ? copied : ret; + return ret; } -static int tls_sw_do_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) +/* + * Handle unexpected EOF during splice without SPLICE_F_MORE set. + */ +void tls_sw_splice_eof(struct socket *sock) { - long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - struct tls_prot_info *prot = &tls_ctx->prot_info; - unsigned char record_type = TLS_RECORD_TYPE_DATA; - struct sk_msg *msg_pl; struct tls_rec *rec; - int num_async = 0; + struct sk_msg *msg_pl; ssize_t copied = 0; - bool full_record; - int record_room; + bool retrying = false; int ret = 0; - bool eor; - - eor = !(flags & MSG_SENDPAGE_NOTLAST); - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - /* Call the sk_stream functions to manage the sndbuf mem. */ - while (size > 0) { - size_t copy, required_size; - - if (sk->sk_err) { - ret = -sk->sk_err; - goto sendpage_end; - } - if (ctx->open_rec) - rec = ctx->open_rec; - else - rec = ctx->open_rec = tls_get_rec(sk); - if (!rec) { - ret = -ENOMEM; - goto sendpage_end; - } - - msg_pl = &rec->msg_plaintext; - - full_record = false; - record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size; - copy = size; - if (copy >= record_room) { - copy = record_room; - full_record = true; - } - - required_size = msg_pl->sg.size + copy + prot->overhead_size; - - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; -alloc_payload: - ret = tls_alloc_encrypted_msg(sk, required_size); - if (ret) { - if (ret != -ENOSPC) - goto wait_for_memory; - - /* Adjust copy according to the amount that was - * actually allocated. The difference is due - * to max sg elements limit - */ - copy -= required_size - msg_pl->sg.size; - full_record = true; - } - - sk_msg_page_add(msg_pl, page, copy, offset); - sk_mem_charge(sk, copy); - - offset += copy; - size -= copy; - copied += copy; + if (!ctx->open_rec) + return; - tls_ctx->pending_open_record_frags = true; - if (full_record || eor || sk_msg_full(msg_pl)) { - ret = bpf_exec_tx_verdict(msg_pl, sk, full_record, - record_type, &copied, flags); - if (ret) { - if (ret == -EINPROGRESS) - num_async++; - else if (ret == -ENOMEM) - goto wait_for_memory; - else if (ret != -EAGAIN) { - if (ret == -ENOSPC) - ret = 0; - goto sendpage_end; - } - } - } - continue; -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - ret = sk_stream_wait_memory(sk, &timeo); - if (ret) { - if (ctx->open_rec) - tls_trim_both_msgs(sk, msg_pl->sg.size); - goto sendpage_end; - } + mutex_lock(&tls_ctx->tx_lock); + lock_sock(sk); - if (ctx->open_rec) - goto alloc_payload; - } +retry: + /* same checks as in tls_sw_push_pending_record() */ + rec = ctx->open_rec; + if (!rec) + goto unlock; - if (num_async) { - /* Transmit if any encryptions have completed */ - if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { - cancel_delayed_work(&ctx->tx_work.work); - tls_tx_records(sk, flags); - } + msg_pl = &rec->msg_plaintext; + if (msg_pl->sg.size == 0) + goto unlock; + + /* Check the BPF advisor and perform transmission. */ + ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA, + &copied, 0); + switch (ret) { + case 0: + case -EAGAIN: + if (retrying) + goto unlock; + retrying = true; + goto retry; + case -EINPROGRESS: + break; + default: + goto unlock; } -sendpage_end: - ret = sk_stream_error(sk, flags, ret); - return copied > 0 ? copied : ret; -} -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY | - MSG_NO_SHARED_FRAGS)) - return -EOPNOTSUPP; + /* Wait for pending encryptions to get completed */ + if (tls_encrypt_async_wait(ctx)) + goto unlock; - return tls_sw_do_sendpage(sk, page, offset, size, flags); -} - -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - int ret; - - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) - return -EOPNOTSUPP; + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, 0); + } - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); - ret = tls_sw_do_sendpage(sk, page, offset, size, flags); +unlock: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return ret; } static int @@ -1290,8 +1352,13 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; long timeo; + /* a rekey is pending, let userspace deal with it */ + if (unlikely(ctx->key_update_pending)) + return -EKEYEXPIRED; + timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { @@ -1301,6 +1368,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sk->sk_err) return sock_error(sk); + if (ret < 0) + return ret; + if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) @@ -1319,10 +1389,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + ret = sk_wait_event(sk, &timeo, + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), + &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1331,7 +1401,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } - tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } @@ -1485,7 +1556,8 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, * Both structs are variable length. */ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); - mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout), + aead_size = ALIGN(aead_size, __alignof__(*dctx)); + mem = kmalloc(aead_size + struct_size(dctx, sg, size_add(n_sgin, n_sgout)), sk->sk_allocation); if (!mem) { err = -ENOMEM; @@ -1495,6 +1567,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, /* Segment the allocated memory */ aead_req = (struct aead_request *)mem; dctx = (struct tls_decrypt_ctx *)(mem + aead_size); + dctx->sk = sk; sgin = &dctx->sg[0]; sgout = &dctx->sg[n_sgin]; @@ -1565,23 +1638,32 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, data_len + prot->tail_size, aead_req, darg); - if (err) + if (err) { + if (darg->async_done) + goto exit_free_skb; goto exit_free_pages; + } darg->skb = clear_skb ?: tls_strp_msg(ctx); clear_skb = NULL; if (unlikely(darg->async)) { err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); - if (err) - __skb_queue_tail(&ctx->async_hold, darg->skb); + if (err) { + err = tls_decrypt_async_wait(ctx); + darg->async = false; + } return err; } + if (unlikely(darg->async_done)) + return 0; + if (prot->tail_size) darg->tail = dctx->tail; @@ -1686,6 +1768,36 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } +static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, + struct sk_buff *skb) +{ + const struct strp_msg *rxm = strp_msg(skb); + const struct tls_msg *tlm = tls_msg(skb); + char hs_type; + int err; + + if (likely(tlm->control != TLS_RECORD_TYPE_HANDSHAKE)) + return 0; + + if (rxm->full_len < 1) + return 0; + + err = skb_copy_bits(skb, rxm->offset, &hs_type, 1); + if (err < 0) { + DEBUG_NET_WARN_ON_ONCE(1); + return err; + } + + if (hs_type == TLS_HANDSHAKE_KEYUPDATE) { + struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; + + WRITE_ONCE(rx_ctx->key_update_pending, true); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); + } + + return 0; +} + static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { @@ -1705,7 +1817,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return 0; + return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) @@ -1715,6 +1827,9 @@ int decrypt_skb(struct sock *sk, struct scatterlist *sgout) return tls_decrypt_sg(sk, NULL, sgout, &darg); } +/* All records returned from a recvmsg() call must have the same type. + * 0 is not a valid content type. Use it as "no type reported, yet". + */ static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { @@ -1753,7 +1868,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1766,7 +1882,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1784,12 +1900,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1825,6 +1941,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1845,13 +1965,11 @@ tls_read_flush_backlog(struct sock *sk, struct tls_prot_info *prot, return sk_flush_backlog(sk); } -static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, - bool nonblock) +static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, + bool nonblock) { long timeo; - int err; - - lock_sock(sk); + int ret; timeo = sock_rcvtimeo(sk, nonblock); @@ -1861,30 +1979,36 @@ static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); - sk_wait_event(sk, &timeo, - !READ_ONCE(ctx->reader_present), &wait); + ret = sk_wait_event(sk, &timeo, + !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); - if (timeo <= 0) { - err = -EAGAIN; - goto err_unlock; - } - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - goto err_unlock; - } + if (timeo <= 0) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(timeo); + if (ret < 0) + return ret; } WRITE_ONCE(ctx->reader_present, 1); return 0; +} -err_unlock: - release_sock(sk); +static int tls_rx_reader_lock(struct sock *sk, struct tls_sw_context_rx *ctx, + bool nonblock) +{ + int err; + + lock_sock(sk); + err = tls_rx_reader_acquire(sk, ctx, nonblock); + if (err) + release_sock(sk); return err; } -static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) +static void tls_rx_reader_release(struct sock *sk, struct tls_sw_context_rx *ctx) { if (unlikely(ctx->reader_contended)) { if (wq_has_sleeper(&ctx->wq)) @@ -1896,6 +2020,11 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) } WRITE_ONCE(ctx->reader_present, 0); +} + +static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) +{ + tls_rx_reader_release(sk, ctx); release_sock(sk); } @@ -1915,10 +2044,12 @@ int tls_sw_recvmsg(struct sock *sk, struct strp_msg *rxm; struct tls_msg *tlm; ssize_t copied = 0; + ssize_t peeked = 0; bool async = false; int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1926,10 +2057,10 @@ int tls_sw_recvmsg(struct sock *sk, if (unlikely(flags & MSG_ERRQUEUE)) return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR); - psock = sk_psock_get(sk); err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT); if (err < 0) return err; + psock = sk_psock_get(sk); bpf_strp_enabled = sk_psock_strp_enabled(psock); /* If crypto failed the connection is broken */ @@ -1938,12 +2069,14 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; + /* process_rx_list() will set @control if it processed any records */ copied = err; - if (len <= copied) + if (len <= copied || rx_more || + (control && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2036,6 +2169,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2059,8 +2194,10 @@ put_on_rx_list: if (err < 0) goto put_on_rx_list_err; - if (is_peek) + if (is_peek) { + peeked += chunk; goto put_on_rx_list; + } if (partially_consumed) { rxm->offset += chunk; @@ -2084,33 +2221,28 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { if (err >= 0 || err == -EINPROGRESS) err = ret; - decrypted = 0; goto end; } /* Drain records from the rx_list & copy if required */ - if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + if (is_peek) + err = process_rx_list(ctx, msg, &control, copied + peeked, + decrypted - peeked, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); - decrypted = max(err, 0); + async_copy_bytes, is_peek, NULL); + + /* we could have copied less than we wanted, and possibly nothing */ + decrypted += max(err, 0) - async_copy_bytes; } copied += decrypted; @@ -2193,6 +2325,102 @@ splice_requeue: goto splice_read_end; } +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t read_actor) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct tls_prot_info *prot = &tls_ctx->prot_info; + struct strp_msg *rxm = NULL; + struct sk_buff *skb = NULL; + struct sk_psock *psock; + size_t flushed_at = 0; + bool released = true; + struct tls_msg *tlm; + ssize_t copied = 0; + ssize_t decrypted; + int err, used; + + psock = sk_psock_get(sk); + if (psock) { + sk_psock_put(sk, psock); + return -EINVAL; + } + err = tls_rx_reader_acquire(sk, ctx, true); + if (err < 0) + return err; + + /* If crypto failed the connection is broken */ + err = ctx->async_wait.err; + if (err) + goto read_sock_end; + + decrypted = 0; + do { + if (!skb_queue_empty(&ctx->rx_list)) { + skb = __skb_dequeue(&ctx->rx_list); + rxm = strp_msg(skb); + tlm = tls_msg(skb); + } else { + struct tls_decrypt_arg darg; + + err = tls_rx_rec_wait(sk, NULL, true, released); + if (err <= 0) + goto read_sock_end; + + memset(&darg.inargs, 0, sizeof(darg.inargs)); + + err = tls_rx_one_record(sk, NULL, &darg); + if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto read_sock_end; + } + + released = tls_read_flush_backlog(sk, prot, INT_MAX, + 0, decrypted, + &flushed_at); + skb = darg.skb; + rxm = strp_msg(skb); + tlm = tls_msg(skb); + decrypted += rxm->full_len; + + tls_rx_rec_done(ctx); + } + + /* read_sock does not support reading control messages */ + if (tlm->control != TLS_RECORD_TYPE_DATA) { + err = -EINVAL; + goto read_sock_requeue; + } + + used = read_actor(desc, skb, rxm->offset, rxm->full_len); + if (used <= 0) { + if (!copied) + err = used; + goto read_sock_requeue; + } + copied += used; + if (used < rxm->full_len) { + rxm->offset += used; + rxm->full_len -= used; + if (!desc->count) + goto read_sock_requeue; + } else { + consume_skb(skb); + if (!desc->count) + skb = NULL; + } + } while (skb); + +read_sock_end: + tls_rx_reader_release(sk, ctx); + return copied ? : err; + +read_sock_requeue: + __skb_queue_head(&ctx->rx_list, skb); + goto read_sock_end; +} + bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2214,7 +2442,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; - char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2265,8 +2493,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) return data_len + TLS_HEADER_SIZE; read_failure: - tls_err_abort(strp->sk, ret); - + tls_strp_abort_strp(strp, ret); return ret; } @@ -2283,8 +2510,14 @@ static void tls_data_ready(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct sk_psock *psock; + gfp_t alloc_save; + + trace_sk_data_ready(sk); + alloc_save = sk->sk_allocation; + sk->sk_allocation = GFP_ATOMIC; tls_strp_data_ready(&ctx->strp); + sk->sk_allocation = alloc_save; psock = sk_psock_get(sk); if (psock) { @@ -2308,16 +2541,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2356,9 +2582,6 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); - if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); @@ -2416,18 +2639,26 @@ static void tx_work_handler(struct work_struct *work) if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) return; - mutex_lock(&tls_ctx->tx_lock); - lock_sock(sk); - tls_tx_records(sk, -1); - release_sock(sk); - mutex_unlock(&tls_ctx->tx_lock); + + if (mutex_trylock(&tls_ctx->tx_lock)) { + lock_sock(sk); + tls_tx_records(sk, -1); + release_sock(sk); + mutex_unlock(&tls_ctx->tx_lock); + } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + /* Someone is holding the tx_lock, they will likely run Tx + * and cancel the work on their way out of the lock section. + * Schedule a long delay just in case. + */ + schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); + } } static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) { struct tls_rec *rec; - rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); + rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list); if (!rec) return false; @@ -2462,280 +2693,183 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_crypto_info *crypto_info; + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + u16 nonce_size = cipher_desc->nonce; + + if (crypto_info->version == TLS_1_3_VERSION) { + nonce_size = 0; + prot->aad_size = TLS_HEADER_SIZE; + prot->tail_size = 1; + } else { + prot->aad_size = TLS_AAD_SPACE_SIZE; + prot->tail_size = 0; + } + + /* Sanity-check the sizes for stack allocations. */ + if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) + return -EINVAL; + + prot->version = crypto_info->version; + prot->cipher_type = crypto_info->cipher_type; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = cipher_desc->tag; + prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; + prot->iv_size = cipher_desc->iv; + prot->salt_size = cipher_desc->salt; + prot->rec_seq_size = cipher_desc->rec_seq; + + return 0; +} + +static void tls_finish_key_update(struct sock *sk, struct tls_context *tls_ctx) +{ + struct tls_sw_context_rx *ctx = tls_ctx->priv_ctx_rx; + + WRITE_ONCE(ctx->key_update_pending, false); + /* wake-up pre-existing poll() */ + ctx->saved_data_ready(sk); +} + +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info) +{ + struct tls_crypto_info *crypto_info, *src_crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; + const struct tls_cipher_desc *cipher_desc; + char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; + struct tls_prot_info *prot; struct crypto_aead **aead; - u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; + struct tls_context *ctx; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt, *cipher_name; - size_t keysize; int rc = 0; - if (!ctx) { - rc = -EINVAL; - goto out; - } + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; - if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; + /* new_crypto_info != NULL means rekey */ + if (!new_crypto_info) { + if (tx) { + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; } } if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; - - gcm_128_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; - tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; - iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; - iv = gcm_128_info->iv; - rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; - rec_seq = gcm_128_info->rec_seq; - keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE; - key = gcm_128_info->key; - salt = gcm_128_info->salt; - salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; - cipher_name = "gcm(aes)"; - break; - } - case TLS_CIPHER_AES_GCM_256: { - struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; - - gcm_256_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; - tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE; - iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; - iv = gcm_256_info->iv; - rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE; - rec_seq = gcm_256_info->rec_seq; - keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE; - key = gcm_256_info->key; - salt = gcm_256_info->salt; - salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE; - cipher_name = "gcm(aes)"; - break; - } - case TLS_CIPHER_AES_CCM_128: { - struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; - - ccm_128_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; - tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE; - iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; - iv = ccm_128_info->iv; - rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE; - rec_seq = ccm_128_info->rec_seq; - keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE; - key = ccm_128_info->key; - salt = ccm_128_info->salt; - salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE; - cipher_name = "ccm(aes)"; - break; - } - case TLS_CIPHER_CHACHA20_POLY1305: { - struct tls12_crypto_info_chacha20_poly1305 *chacha20_poly1305_info; + src_crypto_info = new_crypto_info ?: crypto_info; - chacha20_poly1305_info = (void *)crypto_info; - nonce_size = 0; - tag_size = TLS_CIPHER_CHACHA20_POLY1305_TAG_SIZE; - iv_size = TLS_CIPHER_CHACHA20_POLY1305_IV_SIZE; - iv = chacha20_poly1305_info->iv; - rec_seq_size = TLS_CIPHER_CHACHA20_POLY1305_REC_SEQ_SIZE; - rec_seq = chacha20_poly1305_info->rec_seq; - keysize = TLS_CIPHER_CHACHA20_POLY1305_KEY_SIZE; - key = chacha20_poly1305_info->key; - salt = chacha20_poly1305_info->salt; - salt_size = TLS_CIPHER_CHACHA20_POLY1305_SALT_SIZE; - cipher_name = "rfc7539(chacha20,poly1305)"; - break; - } - case TLS_CIPHER_SM4_GCM: { - struct tls12_crypto_info_sm4_gcm *sm4_gcm_info; - - sm4_gcm_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_SM4_GCM_IV_SIZE; - tag_size = TLS_CIPHER_SM4_GCM_TAG_SIZE; - iv_size = TLS_CIPHER_SM4_GCM_IV_SIZE; - iv = sm4_gcm_info->iv; - rec_seq_size = TLS_CIPHER_SM4_GCM_REC_SEQ_SIZE; - rec_seq = sm4_gcm_info->rec_seq; - keysize = TLS_CIPHER_SM4_GCM_KEY_SIZE; - key = sm4_gcm_info->key; - salt = sm4_gcm_info->salt; - salt_size = TLS_CIPHER_SM4_GCM_SALT_SIZE; - cipher_name = "gcm(sm4)"; - break; - } - case TLS_CIPHER_SM4_CCM: { - struct tls12_crypto_info_sm4_ccm *sm4_ccm_info; - - sm4_ccm_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_SM4_CCM_IV_SIZE; - tag_size = TLS_CIPHER_SM4_CCM_TAG_SIZE; - iv_size = TLS_CIPHER_SM4_CCM_IV_SIZE; - iv = sm4_ccm_info->iv; - rec_seq_size = TLS_CIPHER_SM4_CCM_REC_SEQ_SIZE; - rec_seq = sm4_ccm_info->rec_seq; - keysize = TLS_CIPHER_SM4_CCM_KEY_SIZE; - key = sm4_ccm_info->key; - salt = sm4_ccm_info->salt; - salt_size = TLS_CIPHER_SM4_CCM_SALT_SIZE; - cipher_name = "ccm(sm4)"; - break; - } - case TLS_CIPHER_ARIA_GCM_128: { - struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info; - - aria_gcm_128_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE; - tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE; - iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE; - iv = aria_gcm_128_info->iv; - rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE; - rec_seq = aria_gcm_128_info->rec_seq; - keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE; - key = aria_gcm_128_info->key; - salt = aria_gcm_128_info->salt; - salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE; - cipher_name = "gcm(aria)"; - break; - } - case TLS_CIPHER_ARIA_GCM_256: { - struct tls12_crypto_info_aria_gcm_256 *gcm_256_info; - - gcm_256_info = (void *)crypto_info; - nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE; - tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE; - iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE; - iv = gcm_256_info->iv; - rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE; - rec_seq = gcm_256_info->rec_seq; - keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE; - key = gcm_256_info->key; - salt = gcm_256_info->salt; - salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE; - cipher_name = "gcm(aria)"; - break; - } - default: + cipher_desc = get_cipher_desc(src_crypto_info->cipher_type); + if (!cipher_desc) { rc = -EINVAL; goto free_priv; } - if (crypto_info->version == TLS_1_3_VERSION) { - nonce_size = 0; - prot->aad_size = TLS_HEADER_SIZE; - prot->tail_size = 1; - } else { - prot->aad_size = TLS_AAD_SPACE_SIZE; - prot->tail_size = 0; - } - - /* Sanity-check the sizes for stack allocations. */ - if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE || - rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE || - prot->aad_size > TLS_MAX_AAD_SIZE) { - rc = -EINVAL; + rc = init_prot_info(prot, src_crypto_info, cipher_desc); + if (rc) goto free_priv; - } - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + nonce_size; - prot->tag_size = tag_size; - prot->overhead_size = prot->prepend_size + - prot->tag_size + prot->tail_size; - prot->iv_size = iv_size; - prot->salt_size = salt_size; - cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL); - if (!cctx->iv) { - rc = -ENOMEM; - goto free_priv; - } - /* Note: 128 & 256 bit salt are the same size */ - prot->rec_seq_size = rec_seq_size; - memcpy(cctx->iv, salt, salt_size); - memcpy(cctx->iv + salt_size, iv, iv_size); - cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); - if (!cctx->rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + iv = crypto_info_iv(src_crypto_info, cipher_desc); + key = crypto_info_key(src_crypto_info, cipher_desc); + salt = crypto_info_salt(src_crypto_info, cipher_desc); + rec_seq = crypto_info_rec_seq(src_crypto_info, cipher_desc); if (!*aead) { - *aead = crypto_alloc_aead(cipher_name, 0, 0); + *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; - goto free_rec_seq; + goto free_priv; } } ctx->push_pending_record = tls_sw_push_pending_record; - rc = crypto_aead_setkey(*aead, key, keysize); - - if (rc) - goto free_aead; + /* setkey is the last operation that could fail during a + * rekey. if it succeeds, we can start modifying the + * context. + */ + rc = crypto_aead_setkey(*aead, key, cipher_desc->key); + if (rc) { + if (new_crypto_info) + goto out; + else + goto free_aead; + } - rc = crypto_aead_setauthsize(*aead, prot->tag_size); - if (rc) - goto free_aead; + if (!new_crypto_info) { + rc = crypto_aead_setauthsize(*aead, prot->tag_size); + if (rc) + goto free_aead; + } - if (sw_ctx_rx) { + if (!tx && !new_crypto_info) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); tls_update_rx_zc_capable(ctx); sw_ctx_rx->async_capable = - crypto_info->version != TLS_1_3_VERSION && + src_crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); rc = tls_strp_init(&sw_ctx_rx->strp, sk); @@ -2743,24 +2877,33 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_aead; } + memcpy(cctx->iv, salt, cipher_desc->salt); + memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); + + if (new_crypto_info) { + unsafe_memcpy(crypto_info, new_crypto_info, + cipher_desc->crypto_info, + /* size was checked in do_tls_setsockopt_conf */); + memzero_explicit(new_crypto_info, cipher_desc->crypto_info); + if (!tx) + tls_finish_key_update(sk, ctx); + } + goto out; free_aead: crypto_free_aead(*aead); *aead = NULL; -free_rec_seq: - kfree(cctx->rec_seq); - cctx->rec_seq = NULL; -free_iv: - kfree(cctx->iv); - cctx->iv = NULL; free_priv: - if (tx) { - kfree(ctx->priv_ctx_tx); - ctx->priv_ctx_tx = NULL; - } else { - kfree(ctx->priv_ctx_rx); - ctx->priv_ctx_rx = NULL; + if (!new_crypto_info) { + if (tx) { + kfree(ctx->priv_ctx_tx); + ctx->priv_ctx_tx = NULL; + } else { + kfree(ctx->priv_ctx_rx); + ctx->priv_ctx_rx = NULL; + } } out: return rc; diff --git a/net/tls/trace.h b/net/tls/trace.h index 9ba5f600ea43..2d8ce4ff3265 100644 --- a/net/tls/trace.h +++ b/net/tls/trace.h @@ -7,7 +7,7 @@ #if !defined(_TLS_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _TLS_TRACE_H_ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/tracepoint.h> struct sock; |
