aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S9
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c81
2 files changed, 49 insertions, 41 deletions
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index f7281e7a592f..913e49932ae6 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
3 * 3 *
4 * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 4 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published 7 * under the terms of the GNU General Public License version 2 as published
@@ -332,8 +332,6 @@ ENDPROC(pmull_ghash_update_p8)
332 ld1 {XL.2d}, [x1] 332 ld1 {XL.2d}, [x1]
333 ldr x8, [x5, #8] // load lower counter 333 ldr x8, [x5, #8] // load lower counter
334 334
335 load_round_keys w7, x6
336
337 movi MASK.16b, #0xe1 335 movi MASK.16b, #0xe1
338 trn1 SHASH2.2d, SHASH.2d, HH.2d 336 trn1 SHASH2.2d, SHASH.2d, HH.2d
339 trn2 T1.2d, SHASH.2d, HH.2d 337 trn2 T1.2d, SHASH.2d, HH.2d
@@ -346,6 +344,8 @@ CPU_LE( rev x8, x8 )
346 ld1 {KS0.16b-KS1.16b}, [x10] 344 ld1 {KS0.16b-KS1.16b}, [x10]
347 .endif 345 .endif
348 346
347 cbnz x6, 4f
348
3490: ld1 {INP0.16b-INP1.16b}, [x3], #32 3490: ld1 {INP0.16b-INP1.16b}, [x3], #32
350 350
351 rev x9, x8 351 rev x9, x8
@@ -471,6 +471,9 @@ CPU_LE( rev x8, x8 )
471 enc_round KS0, v20 471 enc_round KS0, v20
472 enc_round KS1, v20 472 enc_round KS1, v20
473 b 1b 473 b 1b
474
4754: load_round_keys w7, x6
476 b 0b
474 .endm 477 .endm
475 478
476 /* 479 /*
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index cd91b146c87d..42a0e84e276c 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 2 * Accelerated GHASH implementation with ARMv8 PMULL instructions.
3 * 3 *
4 * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 4 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published 7 * under the terms of the GNU General Public License version 2 as published
@@ -373,37 +373,39 @@ static int gcm_encrypt(struct aead_request *req)
373 memcpy(iv, req->iv, GCM_IV_SIZE); 373 memcpy(iv, req->iv, GCM_IV_SIZE);
374 put_unaligned_be32(1, iv + GCM_IV_SIZE); 374 put_unaligned_be32(1, iv + GCM_IV_SIZE);
375 375
376 if (likely(may_use_simd())) { 376 err = skcipher_walk_aead_encrypt(&walk, req, false);
377 kernel_neon_begin();
378 377
378 if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
379 u32 const *rk = NULL;
380
381 kernel_neon_begin();
379 pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); 382 pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
380 put_unaligned_be32(2, iv + GCM_IV_SIZE); 383 put_unaligned_be32(2, iv + GCM_IV_SIZE);
381 pmull_gcm_encrypt_block(ks, iv, NULL, nrounds); 384 pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
382 put_unaligned_be32(3, iv + GCM_IV_SIZE); 385 put_unaligned_be32(3, iv + GCM_IV_SIZE);
383 pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds); 386 pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
384 put_unaligned_be32(4, iv + GCM_IV_SIZE); 387 put_unaligned_be32(4, iv + GCM_IV_SIZE);
385 kernel_neon_end();
386
387 err = skcipher_walk_aead_encrypt(&walk, req, false);
388 388
389 while (walk.nbytes >= 2 * AES_BLOCK_SIZE) { 389 do {
390 int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; 390 int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
391 391
392 kernel_neon_begin(); 392 if (rk)
393 kernel_neon_begin();
394
393 pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, 395 pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
394 walk.src.virt.addr, ctx->h2, iv, 396 walk.src.virt.addr, ctx->h2, iv,
395 ctx->aes_key.key_enc, nrounds, ks); 397 rk, nrounds, ks);
396 kernel_neon_end(); 398 kernel_neon_end();
397 399
398 err = skcipher_walk_done(&walk, 400 err = skcipher_walk_done(&walk,
399 walk.nbytes % (2 * AES_BLOCK_SIZE)); 401 walk.nbytes % (2 * AES_BLOCK_SIZE));
400 } 402
403 rk = ctx->aes_key.key_enc;
404 } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
401 } else { 405 } else {
402 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); 406 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
403 put_unaligned_be32(2, iv + GCM_IV_SIZE); 407 put_unaligned_be32(2, iv + GCM_IV_SIZE);
404 408
405 err = skcipher_walk_aead_encrypt(&walk, req, false);
406
407 while (walk.nbytes >= AES_BLOCK_SIZE) { 409 while (walk.nbytes >= AES_BLOCK_SIZE) {
408 int blocks = walk.nbytes / AES_BLOCK_SIZE; 410 int blocks = walk.nbytes / AES_BLOCK_SIZE;
409 u8 *dst = walk.dst.virt.addr; 411 u8 *dst = walk.dst.virt.addr;
@@ -485,50 +487,53 @@ static int gcm_decrypt(struct aead_request *req)
485 memcpy(iv, req->iv, GCM_IV_SIZE); 487 memcpy(iv, req->iv, GCM_IV_SIZE);
486 put_unaligned_be32(1, iv + GCM_IV_SIZE); 488 put_unaligned_be32(1, iv + GCM_IV_SIZE);
487 489
488 if (likely(may_use_simd())) { 490 err = skcipher_walk_aead_decrypt(&walk, req, false);
491
492 if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
493 u32 const *rk = NULL;
494
489 kernel_neon_begin(); 495 kernel_neon_begin();
490 pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds); 496 pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
491 put_unaligned_be32(2, iv + GCM_IV_SIZE); 497 put_unaligned_be32(2, iv + GCM_IV_SIZE);
492 kernel_neon_end();
493 498
494 err = skcipher_walk_aead_decrypt(&walk, req, false); 499 do {
495
496 while (walk.nbytes >= 2 * AES_BLOCK_SIZE) {
497 int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; 500 int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
501 int rem = walk.total - blocks * AES_BLOCK_SIZE;
502
503 if (rk)
504 kernel_neon_begin();
498 505
499 kernel_neon_begin();
500 pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, 506 pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
501 walk.src.virt.addr, ctx->h2, iv, 507 walk.src.virt.addr, ctx->h2, iv,
502 ctx->aes_key.key_enc, nrounds); 508 rk, nrounds);
503 kernel_neon_end();
504 509
505 err = skcipher_walk_done(&walk, 510 /* check if this is the final iteration of the loop */
506 walk.nbytes % (2 * AES_BLOCK_SIZE)); 511 if (rem < (2 * AES_BLOCK_SIZE)) {
507 } 512 u8 *iv2 = iv + AES_BLOCK_SIZE;
508 513
509 if (walk.nbytes) { 514 if (rem > AES_BLOCK_SIZE) {
510 u8 *iv2 = iv + AES_BLOCK_SIZE; 515 memcpy(iv2, iv, AES_BLOCK_SIZE);
516 crypto_inc(iv2, AES_BLOCK_SIZE);
517 }
511 518
512 if (walk.nbytes > AES_BLOCK_SIZE) { 519 pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
513 memcpy(iv2, iv, AES_BLOCK_SIZE);
514 crypto_inc(iv2, AES_BLOCK_SIZE);
515 }
516 520
517 kernel_neon_begin(); 521 if (rem > AES_BLOCK_SIZE)
518 pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc, 522 pmull_gcm_encrypt_block(iv2, iv2, NULL,
519 nrounds); 523 nrounds);
524 }
520 525
521 if (walk.nbytes > AES_BLOCK_SIZE)
522 pmull_gcm_encrypt_block(iv2, iv2, NULL,
523 nrounds);
524 kernel_neon_end(); 526 kernel_neon_end();
525 } 527
528 err = skcipher_walk_done(&walk,
529 walk.nbytes % (2 * AES_BLOCK_SIZE));
530
531 rk = ctx->aes_key.key_enc;
532 } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
526 } else { 533 } else {
527 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds); 534 __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
528 put_unaligned_be32(2, iv + GCM_IV_SIZE); 535 put_unaligned_be32(2, iv + GCM_IV_SIZE);
529 536
530 err = skcipher_walk_aead_decrypt(&walk, req, false);
531
532 while (walk.nbytes >= AES_BLOCK_SIZE) { 537 while (walk.nbytes >= AES_BLOCK_SIZE) {
533 int blocks = walk.nbytes / AES_BLOCK_SIZE; 538 int blocks = walk.nbytes / AES_BLOCK_SIZE;
534 u8 *dst = walk.dst.virt.addr; 539 u8 *dst = walk.dst.virt.addr;