From d4bd88e67666c73cfa9d75c282e708890d4f10a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:24 -0700 Subject: [PATCH 01/11] tls: rx: drop unnecessary arguments from tls_setup_from_iter() sk is unused, remove it to make it clear the function doesn't poke at the socket. size_used is always 0 on input and @length on success. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3a0a120f9c56..86f77f8b825e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1345,15 +1345,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, return skb; } -static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, +static int tls_setup_from_iter(struct iov_iter *from, int length, int *pages_used, - unsigned int *size_used, struct scatterlist *to, int to_max_pages) { int rc = 0, i = 0, num_elem = *pages_used, maxpages; struct page *pages[MAX_SKB_FRAGS]; - unsigned int size = *size_used; + unsigned int size = 0; ssize_t copied, use; size_t offset; @@ -1396,8 +1395,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from, sg_mark_end(&to[num_elem - 1]); out: if (rc) - iov_iter_revert(from, size - *size_used); - *size_used = size; + iov_iter_revert(from, size); *pages_used = num_elem; return rc; @@ -1523,12 +1521,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sg_init_table(sgout, n_sgout); sg_set_buf(&sgout[0], aad, prot->aad_size); - *chunk = 0; - err = tls_setup_from_iter(sk, out_iov, data_len, - &pages, chunk, &sgout[1], + err = tls_setup_from_iter(out_iov, data_len, + &pages, &sgout[1], (n_sgout - 1)); if (err < 0) goto fallback_to_reg_recv; + *chunk = data_len; } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } else { From 9bdf75ccffa690237cd0b472cd598cf6d22873dc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:25 -0700 Subject: [PATCH 02/11] tls: rx: don't report text length from the bowels of decrypt We plumb pointer to chunk all the way to the decryption method. It's set to the length of the text when decrypt_skb_update() returns. I think the code is written this way because original TLS implementation passed &chunk to zerocopy_from_iter() and this was carried forward as the code gotten more complex, without any refactoring. The fix for peek() introduced a new variable - to_decrypt which for all practical purposes is what chunk is going to get set to. Spare ourselves the pointer passing, use to_decrypt. Use this opportunity to clean things up a little further. Note that chunk / to_decrypt was mostly needed for the async path, since the sync path would access rxm->full_len (decryption transforms full_len from record size to text size). Use the right source of truth more explicitly. We have three cases: - async - it's TLS 1.2 only, so chunk == to_decrypt, but we need the min() because to_decrypt is a whole record and we don't want to underflow len. Note that we can't handle partial record by falling back to sync as it would introduce reordering against records in flight. - zc - again, TLS 1.2 only for now, so chunk == to_decrypt, we don't do zc if len < to_decrypt, no need to check again. - normal - it already handles chunk > len, we can factor out the assignment to rxm->full_len and share it with zc. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 86f77f8b825e..c321c5f85fbe 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1412,7 +1412,7 @@ static int tls_setup_from_iter(struct iov_iter *from, static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - int *chunk, bool *zc, bool async) + bool *zc, bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1526,7 +1526,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, (n_sgout - 1)); if (err < 0) goto fallback_to_reg_recv; - *chunk = data_len; } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } else { @@ -1536,7 +1535,6 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *chunk = data_len; *zc = false; } @@ -1555,8 +1553,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, int *chunk, bool *zc, - bool async) + struct iov_iter *dest, bool *zc, bool async) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -1580,7 +1577,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, } } - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); + err = decrypt_internal(sk, skb, dest, NULL, zc, async); if (err < 0) { if (err == -EINPROGRESS) tls_advance_record_sn(sk, prot, &tls_ctx->rx); @@ -1607,9 +1604,8 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { bool zc = true; - int chunk; - return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false); + return decrypt_internal(sk, skb, NULL, sgout, &zc, false); } static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, @@ -1799,9 +1795,8 @@ int tls_sw_recvmsg(struct sock *sk, num_async = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { bool retain_skb = false; + int to_decrypt, chunk; bool zc = false; - int to_decrypt; - int chunk = 0; bool async_capable; bool async = false; @@ -1838,7 +1833,7 @@ int tls_sw_recvmsg(struct sock *sk, async_capable = false; err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, async_capable); + &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, -EBADMSG); goto recv_end; @@ -1876,8 +1871,13 @@ int tls_sw_recvmsg(struct sock *sk, } } - if (async) + if (async) { + /* TLS 1.2-only, to_decrypt must be text length */ + chunk = min_t(int, to_decrypt, len); goto pick_next_record; + } + /* TLS 1.3 may have updated the length by more than overhead */ + chunk = rxm->full_len; if (!zc) { if (bpf_strp_enabled) { @@ -1893,11 +1893,9 @@ int tls_sw_recvmsg(struct sock *sk, } } - if (rxm->full_len > len) { + if (chunk > len) { retain_skb = true; chunk = len; - } else { - chunk = rxm->full_len; } err = skb_copy_datagram_msg(skb, rxm->offset, @@ -1912,9 +1910,6 @@ int tls_sw_recvmsg(struct sock *sk, } pick_next_record: - if (chunk > len) - chunk = len; - decrypted += chunk; len -= chunk; @@ -2016,7 +2011,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (!skb) goto splice_read_end; - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); + err = decrypt_skb_update(sk, skb, NULL, &zc, false); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; From 4175eac37123a68ebee71f288826339fb89bfec7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:26 -0700 Subject: [PATCH 03/11] tls: rx: wrap decryption arguments in a structure We pass zc as a pointer to bool a few functions down as an in/out argument. This is error prone since C will happily evalue a pointer as a boolean (IOW forgetting *zc and writing zc leads to loss of developer time..). Wrap the arguments into a structure. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 49 ++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c321c5f85fbe..ecf9a3832798 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -44,6 +44,11 @@ #include #include +struct tls_decrypt_arg { + bool zc; + bool async; +}; + noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); @@ -1412,7 +1417,7 @@ static int tls_setup_from_iter(struct iov_iter *from, static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct iov_iter *out_iov, struct scatterlist *out_sg, - bool *zc, bool async) + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1429,7 +1434,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, prot->tail_size; int iv_offset = 0; - if (*zc && (out_iov || out_sg)) { + if (darg->zc && (out_iov || out_sg)) { if (out_iov) n_sgout = 1 + iov_iter_npages_cap(out_iov, INT_MAX, data_len); @@ -1439,7 +1444,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, rxm->full_len - prot->prepend_size); } else { n_sgout = 0; - *zc = false; + darg->zc = false; n_sgin = skb_cow_data(skb, 0, &unused); } @@ -1535,12 +1540,12 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, fallback_to_reg_recv: sgout = sgin; pages = 0; - *zc = false; + darg->zc = false; } /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, skb, sgin, sgout, iv, - data_len, aead_req, async); + data_len, aead_req, darg->async); if (err == -EINPROGRESS) return err; @@ -1553,7 +1558,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, } static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, - struct iov_iter *dest, bool *zc, bool async) + struct iov_iter *dest, + struct tls_decrypt_arg *darg) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -1562,7 +1568,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int pad, err; if (tlm->decrypted) { - *zc = false; + darg->zc = false; return 0; } @@ -1572,12 +1578,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, return err; if (err > 0) { tlm->decrypted = 1; - *zc = false; + darg->zc = false; goto decrypt_done; } } - err = decrypt_internal(sk, skb, dest, NULL, zc, async); + err = decrypt_internal(sk, skb, dest, NULL, darg); if (err < 0) { if (err == -EINPROGRESS) tls_advance_record_sn(sk, prot, &tls_ctx->rx); @@ -1603,9 +1609,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout) { - bool zc = true; + struct tls_decrypt_arg darg = { .zc = true, }; - return decrypt_internal(sk, skb, NULL, sgout, &zc, false); + return decrypt_internal(sk, skb, NULL, sgout, &darg); } static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, @@ -1794,11 +1800,10 @@ int tls_sw_recvmsg(struct sock *sk, decrypted = 0; num_async = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { + struct tls_decrypt_arg darg = {}; bool retain_skb = false; int to_decrypt, chunk; - bool zc = false; - bool async_capable; - bool async = false; + bool async; skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { @@ -1824,16 +1829,15 @@ int tls_sw_recvmsg(struct sock *sk, tlm->control == TLS_RECORD_TYPE_DATA && prot->version != TLS_1_3_VERSION && !bpf_strp_enabled) - zc = true; + darg.zc = true; /* Do not use async mode if record is non-data */ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) - async_capable = ctx->async_capable; + darg.async = ctx->async_capable; else - async_capable = false; + darg.async = false; - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &zc, async_capable); + err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, -EBADMSG); goto recv_end; @@ -1879,7 +1883,7 @@ int tls_sw_recvmsg(struct sock *sk, /* TLS 1.3 may have updated the length by more than overhead */ chunk = rxm->full_len; - if (!zc) { + if (!darg.zc) { if (bpf_strp_enabled) { err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { @@ -1996,7 +2000,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, int err = 0; long timeo; int chunk; - bool zc = false; lock_sock(sk); @@ -2006,12 +2009,14 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (from_queue) { skb = __skb_dequeue(&ctx->rx_list); } else { + struct tls_decrypt_arg darg = {}; + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err); if (!skb) goto splice_read_end; - err = decrypt_skb_update(sk, skb, NULL, &zc, false); + err = decrypt_skb_update(sk, skb, NULL, &darg); if (err < 0) { tls_err_abort(sk, -EBADMSG); goto splice_read_end; From 37943f047bfb88ba4dfc7a522563f57c86d088a0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:27 -0700 Subject: [PATCH 04/11] tls: rx: simplify async wait Since we are protected from async completions by decrypt_compl_lock we can drop the async_notify and reinit the completion before we start waiting. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/tls.h | 1 - net/tls/tls_sw.c | 14 ++------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a01c264e5f15..6fe78361c8c8 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -152,7 +152,6 @@ struct tls_sw_context_rx { atomic_t decrypt_pending; /* protect crypto_wait with decrypt_pending*/ spinlock_t decrypt_compl_lock; - bool async_notify; }; struct tls_record_info { diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index ecf9a3832798..003f7c178cde 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -173,7 +173,6 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; - int pending; skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); @@ -221,9 +220,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); spin_lock_bh(&ctx->decrypt_compl_lock); - pending = atomic_dec_return(&ctx->decrypt_pending); - - if (!pending && ctx->async_notify) + if (!atomic_dec_return(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); spin_unlock_bh(&ctx->decrypt_compl_lock); } @@ -1940,7 +1937,7 @@ int tls_sw_recvmsg(struct sock *sk, if (num_async) { /* Wait for all previously submitted records to be decrypted */ spin_lock_bh(&ctx->decrypt_compl_lock); - ctx->async_notify = true; + reinit_completion(&ctx->async_wait.completion); pending = atomic_read(&ctx->decrypt_pending); spin_unlock_bh(&ctx->decrypt_compl_lock); if (pending) { @@ -1952,15 +1949,8 @@ int tls_sw_recvmsg(struct sock *sk, decrypted = 0; goto end; } - } else { - reinit_completion(&ctx->async_wait.completion); } - /* There can be no concurrent accesses, since we have no - * pending decrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, &cmsg, copied, From 06554f4ffc2595ae52ee80aec4a13bd77d22bed7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:28 -0700 Subject: [PATCH 05/11] tls: rx: factor out writing ContentType to cmsg cmsg can be filled in during rx_list processing or normal receive. Consolidate the code. We don't need to keep the boolean to track if the cmsg was created. 0 is an invalid content type. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 91 +++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 55 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 003f7c178cde..103a1aaca934 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1635,6 +1635,29 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, return true; } +static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, + u8 *control) +{ + int err; + + if (!*control) { + *control = tlm->control; + if (!*control) + return -EBADMSG; + + err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, + sizeof(*control), control); + if (*control != TLS_RECORD_TYPE_DATA) { + if (err || msg->msg_flags & MSG_CTRUNC) + return -EIO; + } + } else if (*control != tlm->control) { + return 0; + } + + return 1; +} + /* This function traverses the rx_list in tls receive context to copies the * decrypted records into the buffer provided by caller zero copy is not * true. Further, the records are removed from the rx_list if it is not a peek @@ -1643,31 +1666,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, static int process_rx_list(struct tls_sw_context_rx *ctx, struct msghdr *msg, u8 *control, - bool *cmsg, size_t skip, size_t len, bool zc, bool is_peek) { struct sk_buff *skb = skb_peek(&ctx->rx_list); - u8 ctrl = *control; - u8 msgc = *cmsg; struct tls_msg *tlm; ssize_t copied = 0; - - /* Set the record type in 'control' if caller didn't pass it */ - if (!ctrl && skb) { - tlm = tls_msg(skb); - ctrl = tlm->control; - } + int err; while (skip && skb) { struct strp_msg *rxm = strp_msg(skb); tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + return err; if (skip < rxm->full_len) break; @@ -1683,27 +1698,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, tlm = tls_msg(skb); - /* Cannot process a record of different type */ - if (ctrl != tlm->control) - return 0; - - /* Set record type if not already done. For a non-data record, - * do not proceed if record type could not be copied. - */ - if (!msgc) { - int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(ctrl), &ctrl); - msgc = true; - if (ctrl != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) - return -EIO; - - *cmsg = msgc; - } - } + err = tls_record_content_type(msg, tlm, control); + if (err <= 0) + return err; if (!zc || (rxm->full_len - skip) > len) { - int err = skb_copy_datagram_msg(skb, rxm->offset + skip, + err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) return err; @@ -1740,7 +1740,6 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, skb = next_skb; } - *control = ctrl; return copied; } @@ -1762,7 +1761,6 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; - bool cmsg = false; int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -1779,8 +1777,7 @@ int tls_sw_recvmsg(struct sock *sk, bpf_strp_enabled = sk_psock_strp_enabled(psock); /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false, - is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek); if (err < 0) { tls_err_abort(sk, err); goto end; @@ -1852,26 +1849,10 @@ int tls_sw_recvmsg(struct sock *sk, * is known just after record is dequeued from stream parser. * For tls1.3, we disable async. */ - - if (!control) - control = tlm->control; - else if (control != tlm->control) + err = tls_record_content_type(msg, tlm, &control); + if (err <= 0) goto recv_end; - if (!cmsg) { - int cerr; - - cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE, - sizeof(control), &control); - cmsg = true; - if (control != TLS_RECORD_TYPE_DATA) { - if (cerr || msg->msg_flags & MSG_CTRUNC) { - err = -EIO; - goto recv_end; - } - } - } - if (async) { /* TLS 1.2-only, to_decrypt must be text length */ chunk = min_t(int, to_decrypt, len); @@ -1953,10 +1934,10 @@ int tls_sw_recvmsg(struct sock *sk, /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) - err = process_rx_list(ctx, msg, &control, &cmsg, copied, + err = process_rx_list(ctx, msg, &control, copied, decrypted, false, is_peek); else - err = process_rx_list(ctx, msg, &control, &cmsg, 0, + err = process_rx_list(ctx, msg, &control, 0, decrypted, true, is_peek); if (err < 0) { tls_err_abort(sk, err); From fc8da80f990696a50ea76628daca6e63331b18b7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:29 -0700 Subject: [PATCH 06/11] tls: rx: don't handle async in tls_sw_advance_skb() tls_sw_advance_skb() caters to the async case when skb argument is NULL. In that case it simply unpauses the strparser. These are surprising semantics to a person reading the code, and result in higher LoC, so inline the __strp_unpause and only call tls_sw_advance_skb() when we actually move past an skb. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 103a1aaca934..6f17f599a6d4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1616,17 +1616,14 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = strp_msg(skb); - if (skb) { - struct strp_msg *rxm = strp_msg(skb); - - if (len < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; - return false; - } - consume_skb(skb); + if (len < rxm->full_len) { + rxm->offset += len; + rxm->full_len -= len; + return false; } + consume_skb(skb); /* Finished with message */ ctx->recv_pkt = NULL; @@ -1898,10 +1895,9 @@ int tls_sw_recvmsg(struct sock *sk, /* For async or peek case, queue the current skb */ if (async || is_peek || retain_skb) { skb_queue_tail(&ctx->rx_list, skb); - skb = NULL; - } - - if (tls_sw_advance_skb(sk, skb, chunk)) { + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + } else if (tls_sw_advance_skb(sk, skb, chunk)) { /* Return full control message to * userspace before trying to parse * another message type From 7da18bcc5e4cfd14ea520367546c5697e64ae592 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:30 -0700 Subject: [PATCH 07/11] tls: rx: don't track the async count We track both if the last record was handled by async crypto and how many records were async. This is not necessary. We implicitly assume once crypto goes async it will stay that way, otherwise we'd reorder records. So just track if we're in async mode, the exact number of records is not necessary. This change also forces us into "async" mode more consistently in case crypto ever decided to interleave async and sync. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6f17f599a6d4..183e5ec292a8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1751,13 +1751,13 @@ int tls_sw_recvmsg(struct sock *sk, struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_prot_info *prot = &tls_ctx->prot_info; struct sk_psock *psock; - int num_async, pending; unsigned char control = 0; ssize_t decrypted = 0; struct strp_msg *rxm; struct tls_msg *tlm; struct sk_buff *skb; ssize_t copied = 0; + bool async = false; int target, err = 0; long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); @@ -1789,12 +1789,10 @@ int tls_sw_recvmsg(struct sock *sk, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); decrypted = 0; - num_async = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { struct tls_decrypt_arg darg = {}; bool retain_skb = false; int to_decrypt, chunk; - bool async; skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { @@ -1834,10 +1832,8 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } - if (err == -EINPROGRESS) { + if (err == -EINPROGRESS) async = true; - num_async++; - } /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, @@ -1911,7 +1907,9 @@ int tls_sw_recvmsg(struct sock *sk, } recv_end: - if (num_async) { + if (async) { + int pending; + /* Wait for all previously submitted records to be decrypted */ spin_lock_bh(&ctx->decrypt_compl_lock); reinit_completion(&ctx->async_wait.completion); From ba13609df18dabf1d892a247201bd3fe38012ff9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:31 -0700 Subject: [PATCH 08/11] tls: rx: pull most of zc check out of the loop Most of the conditions deciding if zero-copy can be used do not change throughout the iterations, so pre-calculate them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 183e5ec292a8..5ad0b2505988 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1763,6 +1763,7 @@ int tls_sw_recvmsg(struct sock *sk, bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; bool bpf_strp_enabled; + bool zc_capable; flags |= nonblock; @@ -1788,6 +1789,8 @@ int tls_sw_recvmsg(struct sock *sk, len = len - copied; timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek && + prot->version != TLS_1_3_VERSION; decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { struct tls_decrypt_arg darg = {}; @@ -1814,10 +1817,8 @@ int tls_sw_recvmsg(struct sock *sk, to_decrypt = rxm->full_len - prot->overhead_size; - if (to_decrypt <= len && !is_kvec && !is_peek && - tlm->control == TLS_RECORD_TYPE_DATA && - prot->version != TLS_1_3_VERSION && - !bpf_strp_enabled) + if (zc_capable && to_decrypt <= len && + tlm->control == TLS_RECORD_TYPE_DATA) darg.zc = true; /* Do not use async mode if record is non-data */ From 465ea73535675ed3eb39e54a3631998f0c64e8d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:32 -0700 Subject: [PATCH 09/11] tls: rx: inline consuming the skb at the end of the loop tls_sw_advance_skb() always consumes the skb at the end of the loop. To fall here the following must be true: !async && !is_peek && !retain_skb retain_skb => !zc && rxm->full_len > len # but non-full record implies !zc, so above can be simplified as retain_skb => rxm->full_len > len !async && !is_peek && !(rxm->full_len > len) !async && !is_peek && rxm->full_len <= len tls_sw_advance_skb() returns false if len < rxm->full_len which can't be true given conditions above. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5ad0b2505988..3aa8fe1c6e77 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1611,27 +1611,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb, return decrypt_internal(sk, skb, NULL, sgout, &darg); } -static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, - unsigned int len) -{ - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - struct strp_msg *rxm = strp_msg(skb); - - if (len < rxm->full_len) { - rxm->offset += len; - rxm->full_len -= len; - return false; - } - consume_skb(skb); - - /* Finished with message */ - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); - - return true; -} - static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm, u8 *control) { @@ -1894,7 +1873,11 @@ int tls_sw_recvmsg(struct sock *sk, skb_queue_tail(&ctx->rx_list, skb); ctx->recv_pkt = NULL; __strp_unpause(&ctx->strp); - } else if (tls_sw_advance_skb(sk, skb, chunk)) { + } else { + consume_skb(skb); + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + /* Return full control message to * userspace before trying to parse * another message type @@ -1902,8 +1885,6 @@ int tls_sw_recvmsg(struct sock *sk, msg->msg_flags |= MSG_EOR; if (control != TLS_RECORD_TYPE_DATA) goto recv_end; - } else { - break; } } From b1a2c1786330286f4b31c4bb9fd1d5ac8bb09807 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:33 -0700 Subject: [PATCH 10/11] tls: rx: clear ctx->recv_pkt earlier Whatever we do in the loop the skb should not remain on as ctx->recv_pkt afterwards. We can clear that pointer and restart strparser earlier. This adds overhead of extra linking and unlinking to rx_list but that's not large (upcoming change will switch to unlocked skb list operations). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3aa8fe1c6e77..71d8082647c8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1826,6 +1826,10 @@ int tls_sw_recvmsg(struct sock *sk, if (err <= 0) goto recv_end; + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + skb_queue_tail(&ctx->rx_list, skb); + if (async) { /* TLS 1.2-only, to_decrypt must be text length */ chunk = min_t(int, to_decrypt, len); @@ -1840,10 +1844,9 @@ int tls_sw_recvmsg(struct sock *sk, if (err != __SK_PASS) { rxm->offset = rxm->offset + rxm->full_len; rxm->full_len = 0; + skb_unlink(skb, &ctx->rx_list); if (err == __SK_DROP) consume_skb(skb); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); continue; } } @@ -1869,14 +1872,9 @@ int tls_sw_recvmsg(struct sock *sk, len -= chunk; /* For async or peek case, queue the current skb */ - if (async || is_peek || retain_skb) { - skb_queue_tail(&ctx->rx_list, skb); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); - } else { + if (!(async || is_peek || retain_skb)) { + skb_unlink(skb, &ctx->rx_list); consume_skb(skb); - ctx->recv_pkt = NULL; - __strp_unpause(&ctx->strp); /* Return full control message to * userspace before trying to parse From f940b6efb17257844341d04b4fc622752c23cb9f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Apr 2022 11:31:34 -0700 Subject: [PATCH 11/11] tls: rx: jump out for cases which need to leave skb on list The current invese logic is harder to follow (and adds extra tests to the fast path). We have to enumerate all cases which need to keep the skb before consuming it. It's simpler to jump out of the full record flow as we detect those cases. This makes it clear that partial consumption and peek can only reach end of the function thru the !zc case so move the code up there. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 71d8082647c8..2e8a896af81a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1773,7 +1773,6 @@ int tls_sw_recvmsg(struct sock *sk, decrypted = 0; while (len && (decrypted + copied < target || ctx->recv_pkt)) { struct tls_decrypt_arg darg = {}; - bool retain_skb = false; int to_decrypt, chunk; skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); @@ -1833,12 +1832,17 @@ int tls_sw_recvmsg(struct sock *sk, if (async) { /* TLS 1.2-only, to_decrypt must be text length */ chunk = min_t(int, to_decrypt, len); - goto pick_next_record; +leave_on_list: + decrypted += chunk; + len -= chunk; + continue; } /* TLS 1.3 may have updated the length by more than overhead */ chunk = rxm->full_len; if (!darg.zc) { + bool partially_consumed = chunk > len; + if (bpf_strp_enabled) { err = sk_psock_tls_strp_read(psock, skb); if (err != __SK_PASS) { @@ -1851,39 +1855,36 @@ int tls_sw_recvmsg(struct sock *sk, } } - if (chunk > len) { - retain_skb = true; + if (partially_consumed) chunk = len; - } err = skb_copy_datagram_msg(skb, rxm->offset, msg, chunk); if (err < 0) goto recv_end; - if (!is_peek) { - rxm->offset = rxm->offset + chunk; - rxm->full_len = rxm->full_len - chunk; + if (is_peek) + goto leave_on_list; + + if (partially_consumed) { + rxm->offset += chunk; + rxm->full_len -= chunk; + goto leave_on_list; } } -pick_next_record: decrypted += chunk; len -= chunk; - /* For async or peek case, queue the current skb */ - if (!(async || is_peek || retain_skb)) { - skb_unlink(skb, &ctx->rx_list); - consume_skb(skb); + skb_unlink(skb, &ctx->rx_list); + consume_skb(skb); - /* Return full control message to - * userspace before trying to parse - * another message type - */ - msg->msg_flags |= MSG_EOR; - if (control != TLS_RECORD_TYPE_DATA) - goto recv_end; - } + /* Return full control message to userspace before trying + * to parse another message type + */ + msg->msg_flags |= MSG_EOR; + if (control != TLS_RECORD_TYPE_DATA) + break; } recv_end: