aboutsummaryrefslogtreecommitdiffstats
path: root/net/tls/tls_device_fallback.c
diff options
context:
space:
mode:
authorIlya Lesokhin <ilyal@mellanox.com>2018-04-30 03:16:16 -0400
committerDavid S. Miller <davem@davemloft.net>2018-05-01 09:42:47 -0400
commite8f69799810c32dd40c6724d829eccc70baad07f (patch)
treeca8f75aa8d70d7ca5ea2e61e432b8d48f9829851 /net/tls/tls_device_fallback.c
parentf66de3ee2c161fcc2d66974e9671f8a2a471ab20 (diff)
net/tls: Add generic NIC offload infrastructure
This patch adds a generic infrastructure to offload TLS crypto to a network device. It enables the kernel TLS socket to skip encryption and authentication operations on the transmit side of the data path. Leaving those computationally expensive operations to the NIC. The NIC offload infrastructure builds TLS records and pushes them to the TCP layer just like the SW KTLS implementation and using the same API. TCP segmentation is mostly unaffected. Currently the only exception is that we prevent mixed SKBs where only part of the payload requires offload. In the future we are likely to add a similar restriction following a change cipher spec record. The notable differences between SW KTLS and NIC offloaded TLS implementations are as follows: 1. The offloaded implementation builds "plaintext TLS record", those records contain plaintext instead of ciphertext and place holder bytes instead of authentication tags. 2. The offloaded implementation maintains a mapping from TCP sequence number to TLS records. Thus given a TCP SKB sent from a NIC offloaded TLS socket, we can use the tls NIC offload infrastructure to obtain enough context to encrypt the payload of the SKB. A TLS record is released when the last byte of the record is ack'ed, this is done through the new icsk_clean_acked callback. The infrastructure should be extendable to support various NIC offload implementations. However it is currently written with the implementation below in mind: The NIC assumes that packets from each offloaded stream are sent as plaintext and in-order. It keeps track of the TLS records in the TCP stream. When a packet marked for offload is transmitted, the NIC encrypts the payload in-place and puts authentication tags in the relevant place holders. The responsibility for handling out-of-order packets (i.e. TCP retransmission, qdisc drops) falls on the netdev driver. The netdev driver keeps track of the expected TCP SN from the NIC's perspective. If the next packet to transmit matches the expected TCP SN, the driver advances the expected TCP SN, and transmits the packet with TLS offload indication. If the next packet to transmit does not match the expected TCP SN. The driver calls the TLS layer to obtain the TLS record that includes the TCP of the packet for transmission. Using this TLS record, the driver posts a work entry on the transmit queue to reconstruct the NIC TLS state required for the offload of the out-of-order packet. It updates the expected TCP SN accordingly and transmits the now in-order packet. The same queue is used for packet transmission and TLS context reconstruction to avoid the need for flushing the transmit queue before issuing the context reconstruction request. Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com> Signed-off-by: Boris Pismenny <borisp@mellanox.com> Signed-off-by: Aviad Yehezkel <aviadye@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tls/tls_device_fallback.c')
-rw-r--r--net/tls/tls_device_fallback.c450
1 files changed, 450 insertions, 0 deletions
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
new file mode 100644
index 000000000000..748914abdb60
--- /dev/null
+++ b/net/tls/tls_device_fallback.c
@@ -0,0 +1,450 @@
1/* Copyright (c) 2018, Mellanox Technologies All rights reserved.
2 *
3 * This software is available to you under a choice of one of two
4 * licenses. You may choose to be licensed under the terms of the GNU
5 * General Public License (GPL) Version 2, available from the file
6 * COPYING in the main directory of this source tree, or the
7 * OpenIB.org BSD license below:
8 *
9 * Redistribution and use in source and binary forms, with or
10 * without modification, are permitted provided that the following
11 * conditions are met:
12 *
13 * - Redistributions of source code must retain the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer.
16 *
17 * - Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
26 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
28 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 * SOFTWARE.
30 */
31
32#include <net/tls.h>
33#include <crypto/aead.h>
34#include <crypto/scatterwalk.h>
35#include <net/ip6_checksum.h>
36
37static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk)
38{
39 struct scatterlist *src = walk->sg;
40 int diff = walk->offset - src->offset;
41
42 sg_set_page(sg, sg_page(src),
43 src->length - diff, walk->offset);
44
45 scatterwalk_crypto_chain(sg, sg_next(src), 0, 2);
46}
47
48static int tls_enc_record(struct aead_request *aead_req,
49 struct crypto_aead *aead, char *aad,
50 char *iv, __be64 rcd_sn,
51 struct scatter_walk *in,
52 struct scatter_walk *out, int *in_len)
53{
54 unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE];
55 struct scatterlist sg_in[3];
56 struct scatterlist sg_out[3];
57 u16 len;
58 int rc;
59
60 len = min_t(int, *in_len, ARRAY_SIZE(buf));
61
62 scatterwalk_copychunks(buf, in, len, 0);
63 scatterwalk_copychunks(buf, out, len, 1);
64
65 *in_len -= len;
66 if (!*in_len)
67 return 0;
68
69 scatterwalk_pagedone(in, 0, 1);
70 scatterwalk_pagedone(out, 1, 1);
71
72 len = buf[4] | (buf[3] << 8);
73 len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
74
75 tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
76 (char *)&rcd_sn, sizeof(rcd_sn), buf[0]);
77
78 memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
79 TLS_CIPHER_AES_GCM_128_IV_SIZE);
80
81 sg_init_table(sg_in, ARRAY_SIZE(sg_in));
82 sg_init_table(sg_out, ARRAY_SIZE(sg_out));
83 sg_set_buf(sg_in, aad, TLS_AAD_SPACE_SIZE);
84 sg_set_buf(sg_out, aad, TLS_AAD_SPACE_SIZE);
85 chain_to_walk(sg_in + 1, in);
86 chain_to_walk(sg_out + 1, out);
87
88 *in_len -= len;
89 if (*in_len < 0) {
90 *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
91 /* the input buffer doesn't contain the entire record.
92 * trim len accordingly. The resulting authentication tag
93 * will contain garbage, but we don't care, so we won't
94 * include any of it in the output skb
95 * Note that we assume the output buffer length
96 * is larger then input buffer length + tag size
97 */
98 if (*in_len < 0)
99 len += *in_len;
100
101 *in_len = 0;
102 }
103
104 if (*in_len) {
105 scatterwalk_copychunks(NULL, in, len, 2);
106 scatterwalk_pagedone(in, 0, 1);
107 scatterwalk_copychunks(NULL, out, len, 2);
108 scatterwalk_pagedone(out, 1, 1);
109 }
110
111 len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE;
112 aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv);
113
114 rc = crypto_aead_encrypt(aead_req);
115
116 return rc;
117}
118
119static void tls_init_aead_request(struct aead_request *aead_req,
120 struct crypto_aead *aead)
121{
122 aead_request_set_tfm(aead_req, aead);
123 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
124}
125
126static struct aead_request *tls_alloc_aead_request(struct crypto_aead *aead,
127 gfp_t flags)
128{
129 unsigned int req_size = sizeof(struct aead_request) +
130 crypto_aead_reqsize(aead);
131 struct aead_request *aead_req;
132
133 aead_req = kzalloc(req_size, flags);
134 if (aead_req)
135 tls_init_aead_request(aead_req, aead);
136 return aead_req;
137}
138
139static int tls_enc_records(struct aead_request *aead_req,
140 struct crypto_aead *aead, struct scatterlist *sg_in,
141 struct scatterlist *sg_out, char *aad, char *iv,
142 u64 rcd_sn, int len)
143{
144 struct scatter_walk out, in;
145 int rc;
146
147 scatterwalk_start(&in, sg_in);
148 scatterwalk_start(&out, sg_out);
149
150 do {
151 rc = tls_enc_record(aead_req, aead, aad, iv,
152 cpu_to_be64(rcd_sn), &in, &out, &len);
153 rcd_sn++;
154
155 } while (rc == 0 && len);
156
157 scatterwalk_done(&in, 0, 0);
158 scatterwalk_done(&out, 1, 0);
159
160 return rc;
161}
162
163/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses
164 * might have been changed by NAT.
165 */
166static void update_chksum(struct sk_buff *skb, int headln)
167{
168 struct tcphdr *th = tcp_hdr(skb);
169 int datalen = skb->len - headln;
170 const struct ipv6hdr *ipv6h;
171 const struct iphdr *iph;
172
173 /* We only changed the payload so if we are using partial we don't
174 * need to update anything.
175 */
176 if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
177 return;
178
179 skb->ip_summed = CHECKSUM_PARTIAL;
180 skb->csum_start = skb_transport_header(skb) - skb->head;
181 skb->csum_offset = offsetof(struct tcphdr, check);
182
183 if (skb->sk->sk_family == AF_INET6) {
184 ipv6h = ipv6_hdr(skb);
185 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
186 datalen, IPPROTO_TCP, 0);
187 } else {
188 iph = ip_hdr(skb);
189 th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
190 IPPROTO_TCP, 0);
191 }
192}
193
194static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
195{
196 skb_copy_header(nskb, skb);
197
198 skb_put(nskb, skb->len);
199 memcpy(nskb->data, skb->data, headln);
200 update_chksum(nskb, headln);
201
202 nskb->destructor = skb->destructor;
203 nskb->sk = skb->sk;
204 skb->destructor = NULL;
205 skb->sk = NULL;
206 refcount_add(nskb->truesize - skb->truesize,
207 &nskb->sk->sk_wmem_alloc);
208}
209
210/* This function may be called after the user socket is already
211 * closed so make sure we don't use anything freed during
212 * tls_sk_proto_close here
213 */
214
215static int fill_sg_in(struct scatterlist *sg_in,
216 struct sk_buff *skb,
217 struct tls_offload_context *ctx,
218 u64 *rcd_sn,
219 s32 *sync_size,
220 int *resync_sgs)
221{
222 int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
223 int payload_len = skb->len - tcp_payload_offset;
224 u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
225 struct tls_record_info *record;
226 unsigned long flags;
227 int remaining;
228 int i;
229
230 spin_lock_irqsave(&ctx->lock, flags);
231 record = tls_get_record(ctx, tcp_seq, rcd_sn);
232 if (!record) {
233 spin_unlock_irqrestore(&ctx->lock, flags);
234 WARN(1, "Record not found for seq %u\n", tcp_seq);
235 return -EINVAL;
236 }
237
238 *sync_size = tcp_seq - tls_record_start_seq(record);
239 if (*sync_size < 0) {
240 int is_start_marker = tls_record_is_start_marker(record);
241
242 spin_unlock_irqrestore(&ctx->lock, flags);
243 /* This should only occur if the relevant record was
244 * already acked. In that case it should be ok
245 * to drop the packet and avoid retransmission.
246 *
247 * There is a corner case where the packet contains
248 * both an acked and a non-acked record.
249 * We currently don't handle that case and rely
250 * on TCP to retranmit a packet that doesn't contain
251 * already acked payload.
252 */
253 if (!is_start_marker)
254 *sync_size = 0;
255 return -EINVAL;
256 }
257
258 remaining = *sync_size;
259 for (i = 0; remaining > 0; i++) {
260 skb_frag_t *frag = &record->frags[i];
261
262 __skb_frag_ref(frag);
263 sg_set_page(sg_in + i, skb_frag_page(frag),
264 skb_frag_size(frag), frag->page_offset);
265
266 remaining -= skb_frag_size(frag);
267
268 if (remaining < 0)
269 sg_in[i].length += remaining;
270 }
271 *resync_sgs = i;
272
273 spin_unlock_irqrestore(&ctx->lock, flags);
274 if (skb_to_sgvec(skb, &sg_in[i], tcp_payload_offset, payload_len) < 0)
275 return -EINVAL;
276
277 return 0;
278}
279
280static void fill_sg_out(struct scatterlist sg_out[3], void *buf,
281 struct tls_context *tls_ctx,
282 struct sk_buff *nskb,
283 int tcp_payload_offset,
284 int payload_len,
285 int sync_size,
286 void *dummy_buf)
287{
288 sg_set_buf(&sg_out[0], dummy_buf, sync_size);
289 sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len);
290 /* Add room for authentication tag produced by crypto */
291 dummy_buf += sync_size;
292 sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE);
293}
294
295static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
296 struct scatterlist sg_out[3],
297 struct scatterlist *sg_in,
298 struct sk_buff *skb,
299 s32 sync_size, u64 rcd_sn)
300{
301 int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
302 struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
303 int payload_len = skb->len - tcp_payload_offset;
304 void *buf, *iv, *aad, *dummy_buf;
305 struct aead_request *aead_req;
306 struct sk_buff *nskb = NULL;
307 int buf_len;
308
309 aead_req = tls_alloc_aead_request(ctx->aead_send, GFP_ATOMIC);
310 if (!aead_req)
311 return NULL;
312
313 buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE +
314 TLS_CIPHER_AES_GCM_128_IV_SIZE +
315 TLS_AAD_SPACE_SIZE +
316 sync_size +
317 TLS_CIPHER_AES_GCM_128_TAG_SIZE;
318 buf = kmalloc(buf_len, GFP_ATOMIC);
319 if (!buf)
320 goto free_req;
321
322 iv = buf;
323 memcpy(iv, tls_ctx->crypto_send_aes_gcm_128.salt,
324 TLS_CIPHER_AES_GCM_128_SALT_SIZE);
325 aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE +
326 TLS_CIPHER_AES_GCM_128_IV_SIZE;
327 dummy_buf = aad + TLS_AAD_SPACE_SIZE;
328
329 nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC);
330 if (!nskb)
331 goto free_buf;
332
333 skb_reserve(nskb, skb_headroom(skb));
334
335 fill_sg_out(sg_out, buf, tls_ctx, nskb, tcp_payload_offset,
336 payload_len, sync_size, dummy_buf);
337
338 if (tls_enc_records(aead_req, ctx->aead_send, sg_in, sg_out, aad, iv,
339 rcd_sn, sync_size + payload_len) < 0)
340 goto free_nskb;
341
342 complete_skb(nskb, skb, tcp_payload_offset);
343
344 /* validate_xmit_skb_list assumes that if the skb wasn't segmented
345 * nskb->prev will point to the skb itself
346 */
347 nskb->prev = nskb;
348
349free_buf:
350 kfree(buf);
351free_req:
352 kfree(aead_req);
353 return nskb;
354free_nskb:
355 kfree_skb(nskb);
356 nskb = NULL;
357 goto free_buf;
358}
359
360static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
361{
362 int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
363 struct tls_context *tls_ctx = tls_get_ctx(sk);
364 struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
365 int payload_len = skb->len - tcp_payload_offset;
366 struct scatterlist *sg_in, sg_out[3];
367 struct sk_buff *nskb = NULL;
368 int sg_in_max_elements;
369 int resync_sgs = 0;
370 s32 sync_size = 0;
371 u64 rcd_sn;
372
373 /* worst case is:
374 * MAX_SKB_FRAGS in tls_record_info
375 * MAX_SKB_FRAGS + 1 in SKB head and frags.
376 */
377 sg_in_max_elements = 2 * MAX_SKB_FRAGS + 1;
378
379 if (!payload_len)
380 return skb;
381
382 sg_in = kmalloc_array(sg_in_max_elements, sizeof(*sg_in), GFP_ATOMIC);
383 if (!sg_in)
384 goto free_orig;
385
386 sg_init_table(sg_in, sg_in_max_elements);
387 sg_init_table(sg_out, ARRAY_SIZE(sg_out));
388
389 if (fill_sg_in(sg_in, skb, ctx, &rcd_sn, &sync_size, &resync_sgs)) {
390 /* bypass packets before kernel TLS socket option was set */
391 if (sync_size < 0 && payload_len <= -sync_size)
392 nskb = skb_get(skb);
393 goto put_sg;
394 }
395
396 nskb = tls_enc_skb(tls_ctx, sg_out, sg_in, skb, sync_size, rcd_sn);
397
398put_sg:
399 while (resync_sgs)
400 put_page(sg_page(&sg_in[--resync_sgs]));
401 kfree(sg_in);
402free_orig:
403 kfree_skb(skb);
404 return nskb;
405}
406
407struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
408 struct net_device *dev,
409 struct sk_buff *skb)
410{
411 if (dev == tls_get_ctx(sk)->netdev)
412 return skb;
413
414 return tls_sw_fallback(sk, skb);
415}
416
417int tls_sw_fallback_init(struct sock *sk,
418 struct tls_offload_context *offload_ctx,
419 struct tls_crypto_info *crypto_info)
420{
421 const u8 *key;
422 int rc;
423
424 offload_ctx->aead_send =
425 crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
426 if (IS_ERR(offload_ctx->aead_send)) {
427 rc = PTR_ERR(offload_ctx->aead_send);
428 pr_err_ratelimited("crypto_alloc_aead failed rc=%d\n", rc);
429 offload_ctx->aead_send = NULL;
430 goto err_out;
431 }
432
433 key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key;
434
435 rc = crypto_aead_setkey(offload_ctx->aead_send, key,
436 TLS_CIPHER_AES_GCM_128_KEY_SIZE);
437 if (rc)
438 goto free_aead;
439
440 rc = crypto_aead_setauthsize(offload_ctx->aead_send,
441 TLS_CIPHER_AES_GCM_128_TAG_SIZE);
442 if (rc)
443 goto free_aead;
444
445 return 0;
446free_aead:
447 crypto_free_aead(offload_ctx->aead_send);
448err_out:
449 return rc;
450}