aboutsummaryrefslogtreecommitdiffstats
path: root/net/tls/tls_main.c
diff options
context:
space:
mode:
authorVakul Garg <vakul.garg@nxp.com>2018-09-21 00:16:13 -0400
committerDavid S. Miller <davem@davemloft.net>2018-09-21 22:17:34 -0400
commita42055e8d2c30d4decfc13ce943d09c7b9dad221 (patch)
tree6efe6b116f5959c4bad8fa25c04ad6fb84bd313a /net/tls/tls_main.c
parent06983aa526c759ebdf43f202d8d0491d9494e2f4 (diff)
net/tls: Add support for async encryption of records for performance
In current implementation, tls records are encrypted & transmitted serially. Till the time the previously submitted user data is encrypted, the implementation waits and on finish starts transmitting the record. This approach of encrypt-one record at a time is inefficient when asynchronous crypto accelerators are used. For each record, there are overheads of interrupts, driver softIRQ scheduling etc. Also the crypto accelerator sits idle most of time while an encrypted record's pages are handed over to tcp stack for transmission. This patch enables encryption of multiple records in parallel when an async capable crypto accelerator is present in system. This is achieved by allowing the user space application to send more data using sendmsg() even while previously issued data is being processed by crypto accelerator. This requires returning the control back to user space application after submitting encryption request to accelerator. This also means that zero-copy mode of encryption cannot be used with async accelerator as we must be done with user space application buffer before returning from sendmsg(). There can be multiple records in flight to/from the accelerator. Each of the record is represented by 'struct tls_rec'. This is used to store the memory pages for the record. After the records are encrypted, they are added in a linked list called tx_ready_list which contains encrypted tls records sorted as per tls sequence number. The records from tx_ready_list are transmitted using a newly introduced function called tls_tx_records(). The tx_ready_list is polled for any record ready to be transmitted in sendmsg(), sendpage() after initiating encryption of new tls records. This achieves parallel encryption and transmission of records when async accelerator is present. There could be situation when crypto accelerator completes encryption later than polling of tx_ready_list by sendmsg()/sendpage(). Therefore we need a deferred work context to be able to transmit records from tx_ready_list. The deferred work context gets scheduled if applications are not sending much data through the socket. If the applications issue sendmsg()/sendpage() in quick succession, then the scheduling of tx_work_handler gets cancelled as the tx_ready_list would be polled from application's context itself. This saves scheduling overhead of deferred work. The patch also brings some side benefit. We are able to get rid of the concept of CLOSED record. This is because the records once closed are either encrypted and then placed into tx_ready_list or if encryption fails, the socket error is set. This simplifies the kernel tls sendpath. However since tls_device.c is still using macros, accessory functions for CLOSED records have been retained. Signed-off-by: Vakul Garg <vakul.garg@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tls/tls_main.c')
-rw-r--r--net/tls/tls_main.c54
1 files changed, 21 insertions, 33 deletions
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 523622dc74f8..06094de7a3d9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -141,7 +141,6 @@ retry:
141 size = sg->length; 141 size = sg->length;
142 } 142 }
143 143
144 clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
145 ctx->in_tcp_sendpages = false; 144 ctx->in_tcp_sendpages = false;
146 ctx->sk_write_space(sk); 145 ctx->sk_write_space(sk);
147 146
@@ -193,15 +192,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
193 return rc; 192 return rc;
194} 193}
195 194
196int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, 195int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
197 int flags, long *timeo) 196 int flags)
198{ 197{
199 struct scatterlist *sg; 198 struct scatterlist *sg;
200 u16 offset; 199 u16 offset;
201 200
202 if (!tls_is_partially_sent_record(ctx))
203 return ctx->push_pending_record(sk, flags);
204
205 sg = ctx->partially_sent_record; 201 sg = ctx->partially_sent_record;
206 offset = ctx->partially_sent_offset; 202 offset = ctx->partially_sent_offset;
207 203
@@ -209,9 +205,23 @@ int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
209 return tls_push_sg(sk, ctx, sg, offset, flags); 205 return tls_push_sg(sk, ctx, sg, offset, flags);
210} 206}
211 207
208int tls_push_pending_closed_record(struct sock *sk,
209 struct tls_context *tls_ctx,
210 int flags, long *timeo)
211{
212 struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
213
214 if (tls_is_partially_sent_record(tls_ctx) ||
215 !list_empty(&ctx->tx_ready_list))
216 return tls_tx_records(sk, flags);
217 else
218 return tls_ctx->push_pending_record(sk, flags);
219}
220
212static void tls_write_space(struct sock *sk) 221static void tls_write_space(struct sock *sk)
213{ 222{
214 struct tls_context *ctx = tls_get_ctx(sk); 223 struct tls_context *ctx = tls_get_ctx(sk);
224 struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
215 225
216 /* If in_tcp_sendpages call lower protocol write space handler 226 /* If in_tcp_sendpages call lower protocol write space handler
217 * to ensure we wake up any waiting operations there. For example 227 * to ensure we wake up any waiting operations there. For example
@@ -222,20 +232,11 @@ static void tls_write_space(struct sock *sk)
222 return; 232 return;
223 } 233 }
224 234
225 if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { 235 /* Schedule the transmission if tx list is ready */
226 gfp_t sk_allocation = sk->sk_allocation; 236 if (is_tx_ready(ctx, tx_ctx) && !sk->sk_write_pending) {
227 int rc; 237 /* Schedule the transmission */
228 long timeo = 0; 238 if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
229 239 schedule_delayed_work(&tx_ctx->tx_work.work, 0);
230 sk->sk_allocation = GFP_ATOMIC;
231 rc = tls_push_pending_closed_record(sk, ctx,
232 MSG_DONTWAIT |
233 MSG_NOSIGNAL,
234 &timeo);
235 sk->sk_allocation = sk_allocation;
236
237 if (rc < 0)
238 return;
239 } 240 }
240 241
241 ctx->sk_write_space(sk); 242 ctx->sk_write_space(sk);
@@ -270,19 +271,6 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
270 if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) 271 if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
271 tls_handle_open_record(sk, 0); 272 tls_handle_open_record(sk, 0);
272 273
273 if (ctx->partially_sent_record) {
274 struct scatterlist *sg = ctx->partially_sent_record;
275
276 while (1) {
277 put_page(sg_page(sg));
278 sk_mem_uncharge(sk, sg->length);
279
280 if (sg_is_last(sg))
281 break;
282 sg++;
283 }
284 }
285
286 /* We need these for tls_sw_fallback handling of other packets */ 274 /* We need these for tls_sw_fallback handling of other packets */
287 if (ctx->tx_conf == TLS_SW) { 275 if (ctx->tx_conf == TLS_SW) {
288 kfree(ctx->tx.rec_seq); 276 kfree(ctx->tx.rec_seq);