diff options
author | Jakub Kicinski <jakub.kicinski@netronome.com> | 2019-09-07 01:30:00 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-09-07 12:10:34 -0400 |
commit | e681cc603a79b5907a7388e1d4587779b0fe8ff2 (patch) | |
tree | 055d750a7e240746bcbbc475736228674b3c0d07 | |
parent | e7b159a48ba6f7c243881c7ef3afa6e8785c0826 (diff) |
net/tls: align non temporal copy to cache lines
Unlike normal TCP code TLS has to touch the cache lines
it copies into to fill header info. On memory-heavy workloads
having non temporal stores and normal accesses targeting
the same cache line leads to significant overhead.
Measured 3% overhead running 3600 round robin connections
with additional memory heavy workload.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/tls/tls_device.c | 33 |
1 files changed, 28 insertions, 5 deletions
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 916c3c0a99f0..f959487c5cd1 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c | |||
@@ -372,6 +372,31 @@ static int tls_do_allocation(struct sock *sk, | |||
372 | return 0; | 372 | return 0; |
373 | } | 373 | } |
374 | 374 | ||
375 | static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) | ||
376 | { | ||
377 | size_t pre_copy, nocache; | ||
378 | |||
379 | pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1); | ||
380 | if (pre_copy) { | ||
381 | pre_copy = min(pre_copy, bytes); | ||
382 | if (copy_from_iter(addr, pre_copy, i) != pre_copy) | ||
383 | return -EFAULT; | ||
384 | bytes -= pre_copy; | ||
385 | addr += pre_copy; | ||
386 | } | ||
387 | |||
388 | nocache = round_down(bytes, SMP_CACHE_BYTES); | ||
389 | if (copy_from_iter_nocache(addr, nocache, i) != nocache) | ||
390 | return -EFAULT; | ||
391 | bytes -= nocache; | ||
392 | addr += nocache; | ||
393 | |||
394 | if (bytes && copy_from_iter(addr, bytes, i) != bytes) | ||
395 | return -EFAULT; | ||
396 | |||
397 | return 0; | ||
398 | } | ||
399 | |||
375 | static int tls_push_data(struct sock *sk, | 400 | static int tls_push_data(struct sock *sk, |
376 | struct iov_iter *msg_iter, | 401 | struct iov_iter *msg_iter, |
377 | size_t size, int flags, | 402 | size_t size, int flags, |
@@ -445,12 +470,10 @@ handle_error: | |||
445 | copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); | 470 | copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); |
446 | copy = min_t(size_t, copy, (max_open_record_len - record->len)); | 471 | copy = min_t(size_t, copy, (max_open_record_len - record->len)); |
447 | 472 | ||
448 | if (copy_from_iter_nocache(page_address(pfrag->page) + | 473 | rc = tls_device_copy_data(page_address(pfrag->page) + |
449 | pfrag->offset, | 474 | pfrag->offset, copy, msg_iter); |
450 | copy, msg_iter) != copy) { | 475 | if (rc) |
451 | rc = -EFAULT; | ||
452 | goto handle_error; | 476 | goto handle_error; |
453 | } | ||
454 | tls_append_frag(record, pfrag, copy); | 477 | tls_append_frag(record, pfrag, copy); |
455 | 478 | ||
456 | size -= copy; | 479 | size -= copy; |