aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 13:42:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 13:42:15 -0400
commitcb906953d2c3fd450655d9fa833f03690ad50c23 (patch)
tree06c5665afb24baee3ac49f62db61ca97918079b4
parent6c373ca89399c5a3f7ef210ad8f63dc3437da345 (diff)
parent3abafaf2192b1712079edfd4232b19877d6f41a5 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 4.1: New interfaces: - user-space interface for AEAD - user-space interface for RNG (i.e., pseudo RNG) New hashes: - ARMv8 SHA1/256 - ARMv8 AES - ARMv8 GHASH - ARM assembler and NEON SHA256 - MIPS OCTEON SHA1/256/512 - MIPS img-hash SHA1/256 and MD5 - Power 8 VMX AES/CBC/CTR/GHASH - PPC assembler AES, SHA1/256 and MD5 - Broadcom IPROC RNG driver Cleanups/fixes: - prevent internal helper algos from being exposed to user-space - merge common code from assembly/C SHA implementations - misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits) crypto: arm - workaround for building with old binutils crypto: arm/sha256 - avoid sha256 code on ARMv7-M crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer crypto: sha512-generic - move to generic glue implementation crypto: sha256-generic - move to generic glue implementation crypto: sha1-generic - move to generic glue implementation crypto: sha512 - implement base layer for SHA-512 crypto: sha256 - implement base layer for SHA-256 crypto: sha1 - implement base layer for SHA-1 crypto: api - remove instance when test failed crypto: api - Move alg ref count init to crypto_check_alg ...
-rw-r--r--Documentation/DocBook/crypto-API.tmpl860
-rw-r--r--Documentation/crypto/crypto-API-userspace.txt205
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt12
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/crypto/Kconfig130
-rw-r--r--arch/arm/crypto/Makefile27
-rw-r--r--arch/arm/crypto/aes-ce-core.S518
-rw-r--r--arch/arm/crypto/aes-ce-glue.c524
-rw-r--r--arch/arm/crypto/aesbs-glue.c9
-rw-r--r--arch/arm/crypto/ghash-ce-core.S94
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c320
-rw-r--r--arch/arm/crypto/sha1-ce-core.S125
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c96
-rw-r--r--arch/arm/crypto/sha1.h (renamed from arch/arm/include/asm/crypto/sha1.h)3
-rw-r--r--arch/arm/crypto/sha1_glue.c112
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c137
-rw-r--r--arch/arm/crypto/sha2-ce-core.S125
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c114
-rw-r--r--arch/arm/crypto/sha256-armv4.pl716
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2808
-rw-r--r--arch/arm/crypto/sha256_glue.c128
-rw-r--r--arch/arm/crypto/sha256_glue.h14
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c101
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile5
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.c4
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.h83
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c8
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha1.c241
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha256.c280
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha512.c277
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h14
-rw-r--r--arch/powerpc/crypto/Makefile8
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S351
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c512
-rw-r--r--arch/powerpc/crypto/aes-spe-keys.S283
-rw-r--r--arch/powerpc/crypto/aes-spe-modes.S630
-rw-r--r--arch/powerpc/crypto/aes-spe-regs.h42
-rw-r--r--arch/powerpc/crypto/aes-tab-4k.S331
-rw-r--r--arch/powerpc/crypto/md5-asm.S243
-rw-r--r--arch/powerpc/crypto/md5-glue.c165
-rw-r--r--arch/powerpc/crypto/sha1-spe-asm.S299
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c210
-rw-r--r--arch/powerpc/crypto/sha256-spe-asm.S323
-rw-r--r--arch/powerpc/crypto/sha256-spe-glue.c275
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c187
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c15
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c9
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c7
-rw-r--r--arch/x86/crypto/glue_helper.c1
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c15
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c9
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c139
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S10
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S10
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S10
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c193
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S6
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S6
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S6
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c202
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c15
-rw-r--r--crypto/Kconfig142
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/ablk_helper.c3
-rw-r--r--crypto/algapi.c42
-rw-r--r--crypto/algif_aead.c666
-rw-r--r--crypto/algif_rng.c2
-rw-r--r--crypto/ansi_cprng.c6
-rw-r--r--crypto/api.c10
-rw-r--r--crypto/cryptd.c49
-rw-r--r--crypto/crypto_user.c39
-rw-r--r--crypto/drbg.c64
-rw-r--r--crypto/mcryptd.c25
-rw-r--r--crypto/proc.c3
-rw-r--r--crypto/sha1_generic.c102
-rw-r--r--crypto/sha256_generic.c133
-rw-r--r--crypto/sha512_generic.c123
-rw-r--r--crypto/tcrypt.c4
-rw-r--r--crypto/testmgr.c24
-rw-r--r--drivers/char/hw_random/Kconfig13
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/bcm63xx-rng.c120
-rw-r--r--drivers/char/hw_random/core.c45
-rw-r--r--drivers/char/hw_random/exynos-rng.c12
-rw-r--r--drivers/char/hw_random/iproc-rng200.c239
-rw-r--r--drivers/char/hw_random/msm-rng.c11
-rw-r--r--drivers/char/hw_random/octeon-rng.c4
-rw-r--r--drivers/char/hw_random/omap-rng.c23
-rw-r--r--drivers/char/hw_random/pseries-rng.c4
-rw-r--r--drivers/char/hw_random/xgene-rng.c10
-rw-r--r--drivers/crypto/Kconfig24
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c6
-rw-r--r--drivers/crypto/atmel-aes.c26
-rw-r--r--drivers/crypto/atmel-sha.c37
-rw-r--r--drivers/crypto/atmel-tdes.c3
-rw-r--r--drivers/crypto/caam/caamhash.c1
-rw-r--r--drivers/crypto/caam/caamrng.c6
-rw-r--r--drivers/crypto/ccp/Makefile9
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-xts.c4
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c3
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c5
-rw-r--r--drivers/crypto/ccp/ccp-crypto-sha.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto.h3
-rw-r--r--drivers/crypto/ccp/ccp-dev.c7
-rw-r--r--drivers/crypto/ccp/ccp-dev.h12
-rw-r--r--drivers/crypto/ccp/ccp-ops.c24
-rw-r--r--drivers/crypto/ccp/ccp-pci.c21
-rw-r--r--drivers/crypto/ccp/ccp-platform.c111
-rw-r--r--drivers/crypto/img-hash.c1029
-rw-r--r--drivers/crypto/mxs-dcp.c2
-rw-r--r--drivers/crypto/omap-aes.c14
-rw-r--r--drivers/crypto/omap-sham.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h1
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_engine.c35
-rw-r--r--drivers/crypto/qat/qat_common/adf_aer.c21
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg.c5
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg_strings.h10
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c66
-rw-r--r--drivers/crypto/qat/qat_common/adf_dev_mgr.c3
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c88
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.c31
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport_debug.c2
-rw-r--r--drivers/crypto/qat/qat_common/icp_qat_hw.h2
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c9
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_admin.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c4
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_isr.c15
-rw-r--r--drivers/crypto/sahara.c51
-rw-r--r--drivers/crypto/talitos.c17
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c2
-rw-r--r--drivers/crypto/vmx/Kconfig8
-rw-r--r--drivers/crypto/vmx/Makefile19
-rw-r--r--drivers/crypto/vmx/aes.c139
-rw-r--r--drivers/crypto/vmx/aes_cbc.c184
-rw-r--r--drivers/crypto/vmx/aes_ctr.c167
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h20
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl1930
-rw-r--r--drivers/crypto/vmx/ghash.c214
-rw-r--r--drivers/crypto/vmx/ghashp8-ppc.pl228
-rw-r--r--drivers/crypto/vmx/ppc-xlate.pl207
-rw-r--r--drivers/crypto/vmx/vmx.c88
-rw-r--r--include/crypto/algapi.h2
-rw-r--r--include/crypto/rng.h3
-rw-r--r--include/crypto/sha.h15
-rw-r--r--include/crypto/sha1_base.h106
-rw-r--r--include/crypto/sha256_base.h128
-rw-r--r--include/crypto/sha512_base.h131
-rw-r--r--include/linux/crypto.h6
-rw-r--r--include/linux/hw_random.h4
-rw-r--r--lib/string.c2
168 files changed, 18223 insertions, 2202 deletions
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index 04a8c24ead47..efc8d90a9a3f 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -509,6 +509,270 @@
509 select it due to the used type and mask field. 509 select it due to the used type and mask field.
510 </para> 510 </para>
511 </sect1> 511 </sect1>
512
513 <sect1><title>Internal Structure of Kernel Crypto API</title>
514
515 <para>
516 The kernel crypto API has an internal structure where a cipher
517 implementation may use many layers and indirections. This section
518 shall help to clarify how the kernel crypto API uses
519 various components to implement the complete cipher.
520 </para>
521
522 <para>
523 The following subsections explain the internal structure based
524 on existing cipher implementations. The first section addresses
525 the most complex scenario where all other scenarios form a logical
526 subset.
527 </para>
528
529 <sect2><title>Generic AEAD Cipher Structure</title>
530
531 <para>
532 The following ASCII art decomposes the kernel crypto API layers
533 when using the AEAD cipher with the automated IV generation. The
534 shown example is used by the IPSEC layer.
535 </para>
536
537 <para>
538 For other use cases of AEAD ciphers, the ASCII art applies as
539 well, but the caller may not use the GIVCIPHER interface. In
540 this case, the caller must generate the IV.
541 </para>
542
543 <para>
544 The depicted example decomposes the AEAD cipher of GCM(AES) based
545 on the generic C implementations (gcm.c, aes-generic.c, ctr.c,
546 ghash-generic.c, seqiv.c). The generic implementation serves as an
547 example showing the complete logic of the kernel crypto API.
548 </para>
549
550 <para>
551 It is possible that some streamlined cipher implementations (like
552 AES-NI) provide implementations merging aspects which in the view
553 of the kernel crypto API cannot be decomposed into layers any more.
554 In case of the AES-NI implementation, the CTR mode, the GHASH
555 implementation and the AES cipher are all merged into one cipher
556 implementation registered with the kernel crypto API. In this case,
557 the concept described by the following ASCII art applies too. However,
558 the decomposition of GCM into the individual sub-components
559 by the kernel crypto API is not done any more.
560 </para>
561
562 <para>
563 Each block in the following ASCII art is an independent cipher
564 instance obtained from the kernel crypto API. Each block
565 is accessed by the caller or by other blocks using the API functions
566 defined by the kernel crypto API for the cipher implementation type.
567 </para>
568
569 <para>
570 The blocks below indicate the cipher type as well as the specific
571 logic implemented in the cipher.
572 </para>
573
574 <para>
575 The ASCII art picture also indicates the call structure, i.e. who
576 calls which component. The arrows point to the invoked block
577 where the caller uses the API applicable to the cipher type
578 specified for the block.
579 </para>
580
581 <programlisting>
582<![CDATA[
583kernel crypto API | IPSEC Layer
584 |
585+-----------+ |
586| | (1)
587| givcipher | <----------------------------------- esp_output
588| (seqiv) | ---+
589+-----------+ |
590 | (2)
591+-----------+ |
592| | <--+ (2)
593| aead | <----------------------------------- esp_input
594| (gcm) | ------------+
595+-----------+ |
596 | (3) | (5)
597 v v
598+-----------+ +-----------+
599| | | |
600| ablkcipher| | ahash |
601| (ctr) | ---+ | (ghash) |
602+-----------+ | +-----------+
603 |
604+-----------+ | (4)
605| | <--+
606| cipher |
607| (aes) |
608+-----------+
609]]>
610 </programlisting>
611
612 <para>
613 The following call sequence is applicable when the IPSEC layer
614 triggers an encryption operation with the esp_output function. During
615 configuration, the administrator set up the use of rfc4106(gcm(aes)) as
616 the cipher for ESP. The following call sequence is now depicted in the
617 ASCII art above:
618 </para>
619
620 <orderedlist>
621 <listitem>
622 <para>
623 esp_output() invokes crypto_aead_givencrypt() to trigger an encryption
624 operation of the GIVCIPHER implementation.
625 </para>
626
627 <para>
628 In case of GCM, the SEQIV implementation is registered as GIVCIPHER
629 in crypto_rfc4106_alloc().
630 </para>
631
632 <para>
633 The SEQIV performs its operation to generate an IV where the core
634 function is seqiv_geniv().
635 </para>
636 </listitem>
637
638 <listitem>
639 <para>
640 Now, SEQIV uses the AEAD API function calls to invoke the associated
641 AEAD cipher. In our case, during the instantiation of SEQIV, the
642 cipher handle for GCM is provided to SEQIV. This means that SEQIV
643 invokes AEAD cipher operations with the GCM cipher handle.
644 </para>
645
646 <para>
647 During instantiation of the GCM handle, the CTR(AES) and GHASH
648 ciphers are instantiated. The cipher handles for CTR(AES) and GHASH
649 are retained for later use.
650 </para>
651
652 <para>
653 The GCM implementation is responsible to invoke the CTR mode AES and
654 the GHASH cipher in the right manner to implement the GCM
655 specification.
656 </para>
657 </listitem>
658
659 <listitem>
660 <para>
661 The GCM AEAD cipher type implementation now invokes the ABLKCIPHER API
662 with the instantiated CTR(AES) cipher handle.
663 </para>
664
665 <para>
666 During instantiation of the CTR(AES) cipher, the CIPHER type
667 implementation of AES is instantiated. The cipher handle for AES is
668 retained.
669 </para>
670
671 <para>
672 That means that the ABLKCIPHER implementation of CTR(AES) only
673 implements the CTR block chaining mode. After performing the block
674 chaining operation, the CIPHER implementation of AES is invoked.
675 </para>
676 </listitem>
677
678 <listitem>
679 <para>
680 The ABLKCIPHER of CTR(AES) now invokes the CIPHER API with the AES
681 cipher handle to encrypt one block.
682 </para>
683 </listitem>
684
685 <listitem>
686 <para>
687 The GCM AEAD implementation also invokes the GHASH cipher
688 implementation via the AHASH API.
689 </para>
690 </listitem>
691 </orderedlist>
692
693 <para>
694 When the IPSEC layer triggers the esp_input() function, the same call
695 sequence is followed with the only difference that the operation starts
696 with step (2).
697 </para>
698 </sect2>
699
700 <sect2><title>Generic Block Cipher Structure</title>
701 <para>
702 Generic block ciphers follow the same concept as depicted with the ASCII
703 art picture above.
704 </para>
705
706 <para>
707 For example, CBC(AES) is implemented with cbc.c, and aes-generic.c. The
708 ASCII art picture above applies as well with the difference that only
709 step (4) is used and the ABLKCIPHER block chaining mode is CBC.
710 </para>
711 </sect2>
712
713 <sect2><title>Generic Keyed Message Digest Structure</title>
714 <para>
715 Keyed message digest implementations again follow the same concept as
716 depicted in the ASCII art picture above.
717 </para>
718
719 <para>
720 For example, HMAC(SHA256) is implemented with hmac.c and
721 sha256_generic.c. The following ASCII art illustrates the
722 implementation:
723 </para>
724
725 <programlisting>
726<![CDATA[
727kernel crypto API | Caller
728 |
729+-----------+ (1) |
730| | <------------------ some_function
731| ahash |
732| (hmac) | ---+
733+-----------+ |
734 | (2)
735+-----------+ |
736| | <--+
737| shash |
738| (sha256) |
739+-----------+
740]]>
741 </programlisting>
742
743 <para>
744 The following call sequence is applicable when a caller triggers
745 an HMAC operation:
746 </para>
747
748 <orderedlist>
749 <listitem>
750 <para>
751 The AHASH API functions are invoked by the caller. The HMAC
752 implementation performs its operation as needed.
753 </para>
754
755 <para>
756 During initialization of the HMAC cipher, the SHASH cipher type of
757 SHA256 is instantiated. The cipher handle for the SHA256 instance is
758 retained.
759 </para>
760
761 <para>
762 At one time, the HMAC implementation requires a SHA256 operation
763 where the SHA256 cipher handle is used.
764 </para>
765 </listitem>
766
767 <listitem>
768 <para>
769 The HMAC instance now invokes the SHASH API with the SHA256
770 cipher handle to calculate the message digest.
771 </para>
772 </listitem>
773 </orderedlist>
774 </sect2>
775 </sect1>
512 </chapter> 776 </chapter>
513 777
514 <chapter id="Development"><title>Developing Cipher Algorithms</title> 778 <chapter id="Development"><title>Developing Cipher Algorithms</title>
@@ -808,6 +1072,602 @@
808 </sect1> 1072 </sect1>
809 </chapter> 1073 </chapter>
810 1074
1075 <chapter id="User"><title>User Space Interface</title>
1076 <sect1><title>Introduction</title>
1077 <para>
1078 The concepts of the kernel crypto API visible to kernel space is fully
1079 applicable to the user space interface as well. Therefore, the kernel
1080 crypto API high level discussion for the in-kernel use cases applies
1081 here as well.
1082 </para>
1083
1084 <para>
1085 The major difference, however, is that user space can only act as a
1086 consumer and never as a provider of a transformation or cipher algorithm.
1087 </para>
1088
1089 <para>
1090 The following covers the user space interface exported by the kernel
1091 crypto API. A working example of this description is libkcapi that
1092 can be obtained from [1]. That library can be used by user space
1093 applications that require cryptographic services from the kernel.
1094 </para>
1095
1096 <para>
1097 Some details of the in-kernel kernel crypto API aspects do not
1098 apply to user space, however. This includes the difference between
1099 synchronous and asynchronous invocations. The user space API call
1100 is fully synchronous.
1101 </para>
1102
1103 <para>
1104 [1] http://www.chronox.de/libkcapi.html
1105 </para>
1106
1107 </sect1>
1108
1109 <sect1><title>User Space API General Remarks</title>
1110 <para>
1111 The kernel crypto API is accessible from user space. Currently,
1112 the following ciphers are accessible:
1113 </para>
1114
1115 <itemizedlist>
1116 <listitem>
1117 <para>Message digest including keyed message digest (HMAC, CMAC)</para>
1118 </listitem>
1119
1120 <listitem>
1121 <para>Symmetric ciphers</para>
1122 </listitem>
1123
1124 <listitem>
1125 <para>AEAD ciphers</para>
1126 </listitem>
1127
1128 <listitem>
1129 <para>Random Number Generators</para>
1130 </listitem>
1131 </itemizedlist>
1132
1133 <para>
1134 The interface is provided via socket type using the type AF_ALG.
1135 In addition, the setsockopt option type is SOL_ALG. In case the
1136 user space header files do not export these flags yet, use the
1137 following macros:
1138 </para>
1139
1140 <programlisting>
1141#ifndef AF_ALG
1142#define AF_ALG 38
1143#endif
1144#ifndef SOL_ALG
1145#define SOL_ALG 279
1146#endif
1147 </programlisting>
1148
1149 <para>
1150 A cipher is accessed with the same name as done for the in-kernel
1151 API calls. This includes the generic vs. unique naming schema for
1152 ciphers as well as the enforcement of priorities for generic names.
1153 </para>
1154
1155 <para>
1156 To interact with the kernel crypto API, a socket must be
1157 created by the user space application. User space invokes the cipher
1158 operation with the send()/write() system call family. The result of the
1159 cipher operation is obtained with the read()/recv() system call family.
1160 </para>
1161
1162 <para>
1163 The following API calls assume that the socket descriptor
1164 is already opened by the user space application and discusses only
1165 the kernel crypto API specific invocations.
1166 </para>
1167
1168 <para>
1169 To initialize the socket interface, the following sequence has to
1170 be performed by the consumer:
1171 </para>
1172
1173 <orderedlist>
1174 <listitem>
1175 <para>
1176 Create a socket of type AF_ALG with the struct sockaddr_alg
1177 parameter specified below for the different cipher types.
1178 </para>
1179 </listitem>
1180
1181 <listitem>
1182 <para>
1183 Invoke bind with the socket descriptor
1184 </para>
1185 </listitem>
1186
1187 <listitem>
1188 <para>
1189 Invoke accept with the socket descriptor. The accept system call
1190 returns a new file descriptor that is to be used to interact with
1191 the particular cipher instance. When invoking send/write or recv/read
1192 system calls to send data to the kernel or obtain data from the
1193 kernel, the file descriptor returned by accept must be used.
1194 </para>
1195 </listitem>
1196 </orderedlist>
1197 </sect1>
1198
1199 <sect1><title>In-place Cipher operation</title>
1200 <para>
1201 Just like the in-kernel operation of the kernel crypto API, the user
1202 space interface allows the cipher operation in-place. That means that
1203 the input buffer used for the send/write system call and the output
1204 buffer used by the read/recv system call may be one and the same.
1205 This is of particular interest for symmetric cipher operations where a
1206 copying of the output data to its final destination can be avoided.
1207 </para>
1208
1209 <para>
1210 If a consumer on the other hand wants to maintain the plaintext and
1211 the ciphertext in different memory locations, all a consumer needs
1212 to do is to provide different memory pointers for the encryption and
1213 decryption operation.
1214 </para>
1215 </sect1>
1216
1217 <sect1><title>Message Digest API</title>
1218 <para>
1219 The message digest type to be used for the cipher operation is
1220 selected when invoking the bind syscall. bind requires the caller
1221 to provide a filled struct sockaddr data structure. This data
1222 structure must be filled as follows:
1223 </para>
1224
1225 <programlisting>
1226struct sockaddr_alg sa = {
1227 .salg_family = AF_ALG,
1228 .salg_type = "hash", /* this selects the hash logic in the kernel */
1229 .salg_name = "sha1" /* this is the cipher name */
1230};
1231 </programlisting>
1232
1233 <para>
1234 The salg_type value "hash" applies to message digests and keyed
1235 message digests. Though, a keyed message digest is referenced by
1236 the appropriate salg_name. Please see below for the setsockopt
1237 interface that explains how the key can be set for a keyed message
1238 digest.
1239 </para>
1240
1241 <para>
1242 Using the send() system call, the application provides the data that
1243 should be processed with the message digest. The send system call
1244 allows the following flags to be specified:
1245 </para>
1246
1247 <itemizedlist>
1248 <listitem>
1249 <para>
1250 MSG_MORE: If this flag is set, the send system call acts like a
1251 message digest update function where the final hash is not
1252 yet calculated. If the flag is not set, the send system call
1253 calculates the final message digest immediately.
1254 </para>
1255 </listitem>
1256 </itemizedlist>
1257
1258 <para>
1259 With the recv() system call, the application can read the message
1260 digest from the kernel crypto API. If the buffer is too small for the
1261 message digest, the flag MSG_TRUNC is set by the kernel.
1262 </para>
1263
1264 <para>
1265 In order to set a message digest key, the calling application must use
1266 the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
1267 operation is performed without the initial HMAC state change caused by
1268 the key.
1269 </para>
1270 </sect1>
1271
1272 <sect1><title>Symmetric Cipher API</title>
1273 <para>
1274 The operation is very similar to the message digest discussion.
1275 During initialization, the struct sockaddr data structure must be
1276 filled as follows:
1277 </para>
1278
1279 <programlisting>
1280struct sockaddr_alg sa = {
1281 .salg_family = AF_ALG,
1282 .salg_type = "skcipher", /* this selects the symmetric cipher */
1283 .salg_name = "cbc(aes)" /* this is the cipher name */
1284};
1285 </programlisting>
1286
1287 <para>
1288 Before data can be sent to the kernel using the write/send system
1289 call family, the consumer must set the key. The key setting is
1290 described with the setsockopt invocation below.
1291 </para>
1292
1293 <para>
1294 Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
1295 specified with the data structure provided by the sendmsg() system call.
1296 </para>
1297
1298 <para>
1299 The sendmsg system call parameter of struct msghdr is embedded into the
1300 struct cmsghdr data structure. See recv(2) and cmsg(3) for more
1301 information on how the cmsghdr data structure is used together with the
1302 send/recv system call family. That cmsghdr data structure holds the
1303 following information specified with a separate header instances:
1304 </para>
1305
1306 <itemizedlist>
1307 <listitem>
1308 <para>
1309 specification of the cipher operation type with one of these flags:
1310 </para>
1311 <itemizedlist>
1312 <listitem>
1313 <para>ALG_OP_ENCRYPT - encryption of data</para>
1314 </listitem>
1315 <listitem>
1316 <para>ALG_OP_DECRYPT - decryption of data</para>
1317 </listitem>
1318 </itemizedlist>
1319 </listitem>
1320
1321 <listitem>
1322 <para>
1323 specification of the IV information marked with the flag ALG_SET_IV
1324 </para>
1325 </listitem>
1326 </itemizedlist>
1327
1328 <para>
1329 The send system call family allows the following flag to be specified:
1330 </para>
1331
1332 <itemizedlist>
1333 <listitem>
1334 <para>
1335 MSG_MORE: If this flag is set, the send system call acts like a
1336 cipher update function where more input data is expected
1337 with a subsequent invocation of the send system call.
1338 </para>
1339 </listitem>
1340 </itemizedlist>
1341
1342 <para>
1343 Note: The kernel reports -EINVAL for any unexpected data. The caller
1344 must make sure that all data matches the constraints given in
1345 /proc/crypto for the selected cipher.
1346 </para>
1347
1348 <para>
1349 With the recv() system call, the application can read the result of
1350 the cipher operation from the kernel crypto API. The output buffer
1351 must be at least as large as to hold all blocks of the encrypted or
1352 decrypted data. If the output data size is smaller, only as many
1353 blocks are returned that fit into that output buffer size.
1354 </para>
1355 </sect1>
1356
1357 <sect1><title>AEAD Cipher API</title>
1358 <para>
1359 The operation is very similar to the symmetric cipher discussion.
1360 During initialization, the struct sockaddr data structure must be
1361 filled as follows:
1362 </para>
1363
1364 <programlisting>
1365struct sockaddr_alg sa = {
1366 .salg_family = AF_ALG,
1367 .salg_type = "aead", /* this selects the symmetric cipher */
1368 .salg_name = "gcm(aes)" /* this is the cipher name */
1369};
1370 </programlisting>
1371
1372 <para>
1373 Before data can be sent to the kernel using the write/send system
1374 call family, the consumer must set the key. The key setting is
1375 described with the setsockopt invocation below.
1376 </para>
1377
1378 <para>
1379 In addition, before data can be sent to the kernel using the
1380 write/send system call family, the consumer must set the authentication
1381 tag size. To set the authentication tag size, the caller must use the
1382 setsockopt invocation described below.
1383 </para>
1384
1385 <para>
1386 Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
1387 specified with the data structure provided by the sendmsg() system call.
1388 </para>
1389
1390 <para>
1391 The sendmsg system call parameter of struct msghdr is embedded into the
1392 struct cmsghdr data structure. See recv(2) and cmsg(3) for more
1393 information on how the cmsghdr data structure is used together with the
1394 send/recv system call family. That cmsghdr data structure holds the
1395 following information specified with a separate header instances:
1396 </para>
1397
1398 <itemizedlist>
1399 <listitem>
1400 <para>
1401 specification of the cipher operation type with one of these flags:
1402 </para>
1403 <itemizedlist>
1404 <listitem>
1405 <para>ALG_OP_ENCRYPT - encryption of data</para>
1406 </listitem>
1407 <listitem>
1408 <para>ALG_OP_DECRYPT - decryption of data</para>
1409 </listitem>
1410 </itemizedlist>
1411 </listitem>
1412
1413 <listitem>
1414 <para>
1415 specification of the IV information marked with the flag ALG_SET_IV
1416 </para>
1417 </listitem>
1418
1419 <listitem>
1420 <para>
1421 specification of the associated authentication data (AAD) with the
1422 flag ALG_SET_AEAD_ASSOCLEN. The AAD is sent to the kernel together
1423 with the plaintext / ciphertext. See below for the memory structure.
1424 </para>
1425 </listitem>
1426 </itemizedlist>
1427
1428 <para>
1429 The send system call family allows the following flag to be specified:
1430 </para>
1431
1432 <itemizedlist>
1433 <listitem>
1434 <para>
1435 MSG_MORE: If this flag is set, the send system call acts like a
1436 cipher update function where more input data is expected
1437 with a subsequent invocation of the send system call.
1438 </para>
1439 </listitem>
1440 </itemizedlist>
1441
1442 <para>
1443 Note: The kernel reports -EINVAL for any unexpected data. The caller
1444 must make sure that all data matches the constraints given in
1445 /proc/crypto for the selected cipher.
1446 </para>
1447
1448 <para>
1449 With the recv() system call, the application can read the result of
1450 the cipher operation from the kernel crypto API. The output buffer
1451 must be at least as large as defined with the memory structure below.
1452 If the output data size is smaller, the cipher operation is not performed.
1453 </para>
1454
1455 <para>
1456 The authenticated decryption operation may indicate an integrity error.
1457 Such breach in integrity is marked with the -EBADMSG error code.
1458 </para>
1459
1460 <sect2><title>AEAD Memory Structure</title>
1461 <para>
1462 The AEAD cipher operates with the following information that
1463 is communicated between user and kernel space as one data stream:
1464 </para>
1465
1466 <itemizedlist>
1467 <listitem>
1468 <para>plaintext or ciphertext</para>
1469 </listitem>
1470
1471 <listitem>
1472 <para>associated authentication data (AAD)</para>
1473 </listitem>
1474
1475 <listitem>
1476 <para>authentication tag</para>
1477 </listitem>
1478 </itemizedlist>
1479
1480 <para>
1481 The sizes of the AAD and the authentication tag are provided with
1482 the sendmsg and setsockopt calls (see there). As the kernel knows
1483 the size of the entire data stream, the kernel is now able to
1484 calculate the right offsets of the data components in the data
1485 stream.
1486 </para>
1487
1488 <para>
1489 The user space caller must arrange the aforementioned information
1490 in the following order:
1491 </para>
1492
1493 <itemizedlist>
1494 <listitem>
1495 <para>
1496 AEAD encryption input: AAD || plaintext
1497 </para>
1498 </listitem>
1499
1500 <listitem>
1501 <para>
1502 AEAD decryption input: AAD || ciphertext || authentication tag
1503 </para>
1504 </listitem>
1505 </itemizedlist>
1506
1507 <para>
1508 The output buffer the user space caller provides must be at least as
1509 large to hold the following data:
1510 </para>
1511
1512 <itemizedlist>
1513 <listitem>
1514 <para>
1515 AEAD encryption output: ciphertext || authentication tag
1516 </para>
1517 </listitem>
1518
1519 <listitem>
1520 <para>
1521 AEAD decryption output: plaintext
1522 </para>
1523 </listitem>
1524 </itemizedlist>
1525 </sect2>
1526 </sect1>
1527
1528 <sect1><title>Random Number Generator API</title>
1529 <para>
1530 Again, the operation is very similar to the other APIs.
1531 During initialization, the struct sockaddr data structure must be
1532 filled as follows:
1533 </para>
1534
1535 <programlisting>
1536struct sockaddr_alg sa = {
1537 .salg_family = AF_ALG,
1538 .salg_type = "rng", /* this selects the symmetric cipher */
1539 .salg_name = "drbg_nopr_sha256" /* this is the cipher name */
1540};
1541 </programlisting>
1542
1543 <para>
1544 Depending on the RNG type, the RNG must be seeded. The seed is provided
1545 using the setsockopt interface to set the key. For example, the
1546 ansi_cprng requires a seed. The DRBGs do not require a seed, but
1547 may be seeded.
1548 </para>
1549
1550 <para>
1551 Using the read()/recvmsg() system calls, random numbers can be obtained.
1552 The kernel generates at most 128 bytes in one call. If user space
1553 requires more data, multiple calls to read()/recvmsg() must be made.
1554 </para>
1555
1556 <para>
1557 WARNING: The user space caller may invoke the initially mentioned
1558 accept system call multiple times. In this case, the returned file
1559 descriptors have the same state.
1560 </para>
1561
1562 </sect1>
1563
1564 <sect1><title>Zero-Copy Interface</title>
1565 <para>
1566 In addition to the send/write/read/recv system call familty, the AF_ALG
1567 interface can be accessed with the zero-copy interface of splice/vmsplice.
1568 As the name indicates, the kernel tries to avoid a copy operation into
1569 kernel space.
1570 </para>
1571
1572 <para>
1573 The zero-copy operation requires data to be aligned at the page boundary.
1574 Non-aligned data can be used as well, but may require more operations of
1575 the kernel which would defeat the speed gains obtained from the zero-copy
1576 interface.
1577 </para>
1578
1579 <para>
1580 The system-interent limit for the size of one zero-copy operation is
1581 16 pages. If more data is to be sent to AF_ALG, user space must slice
1582 the input into segments with a maximum size of 16 pages.
1583 </para>
1584
1585 <para>
1586 Zero-copy can be used with the following code example (a complete working
1587 example is provided with libkcapi):
1588 </para>
1589
1590 <programlisting>
1591int pipes[2];
1592
1593pipe(pipes);
1594/* input data in iov */
1595vmsplice(pipes[1], iov, iovlen, SPLICE_F_GIFT);
1596/* opfd is the file descriptor returned from accept() system call */
1597splice(pipes[0], NULL, opfd, NULL, ret, 0);
1598read(opfd, out, outlen);
1599 </programlisting>
1600
1601 </sect1>
1602
1603 <sect1><title>Setsockopt Interface</title>
1604 <para>
1605 In addition to the read/recv and send/write system call handling
1606 to send and retrieve data subject to the cipher operation, a consumer
1607 also needs to set the additional information for the cipher operation.
1608 This additional information is set using the setsockopt system call
1609 that must be invoked with the file descriptor of the open cipher
1610 (i.e. the file descriptor returned by the accept system call).
1611 </para>
1612
1613 <para>
1614 Each setsockopt invocation must use the level SOL_ALG.
1615 </para>
1616
1617 <para>
1618 The setsockopt interface allows setting the following data using
1619 the mentioned optname:
1620 </para>
1621
1622 <itemizedlist>
1623 <listitem>
1624 <para>
1625 ALG_SET_KEY -- Setting the key. Key setting is applicable to:
1626 </para>
1627 <itemizedlist>
1628 <listitem>
1629 <para>the skcipher cipher type (symmetric ciphers)</para>
1630 </listitem>
1631 <listitem>
1632 <para>the hash cipher type (keyed message digests)</para>
1633 </listitem>
1634 <listitem>
1635 <para>the AEAD cipher type</para>
1636 </listitem>
1637 <listitem>
1638 <para>the RNG cipher type to provide the seed</para>
1639 </listitem>
1640 </itemizedlist>
1641 </listitem>
1642
1643 <listitem>
1644 <para>
1645 ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size
1646 for AEAD ciphers. For a encryption operation, the authentication
1647 tag of the given size will be generated. For a decryption operation,
1648 the provided ciphertext is assumed to contain an authentication tag
1649 of the given size (see section about AEAD memory layout below).
1650 </para>
1651 </listitem>
1652 </itemizedlist>
1653
1654 </sect1>
1655
1656 <sect1><title>User space API example</title>
1657 <para>
1658 Please see [1] for libkcapi which provides an easy-to-use wrapper
1659 around the aforementioned Netlink kernel interface. [1] also contains
1660 a test application that invokes all libkcapi API calls.
1661 </para>
1662
1663 <para>
1664 [1] http://www.chronox.de/libkcapi.html
1665 </para>
1666
1667 </sect1>
1668
1669 </chapter>
1670
811 <chapter id="API"><title>Programming Interface</title> 1671 <chapter id="API"><title>Programming Interface</title>
812 <sect1><title>Block Cipher Context Data Structures</title> 1672 <sect1><title>Block Cipher Context Data Structures</title>
813!Pinclude/linux/crypto.h Block Cipher Context Data Structures 1673!Pinclude/linux/crypto.h Block Cipher Context Data Structures
diff --git a/Documentation/crypto/crypto-API-userspace.txt b/Documentation/crypto/crypto-API-userspace.txt
deleted file mode 100644
index ac619cd90300..000000000000
--- a/Documentation/crypto/crypto-API-userspace.txt
+++ /dev/null
@@ -1,205 +0,0 @@
1Introduction
2============
3
4The concepts of the kernel crypto API visible to kernel space is fully
5applicable to the user space interface as well. Therefore, the kernel crypto API
6high level discussion for the in-kernel use cases applies here as well.
7
8The major difference, however, is that user space can only act as a consumer
9and never as a provider of a transformation or cipher algorithm.
10
11The following covers the user space interface exported by the kernel crypto
12API. A working example of this description is libkcapi that can be obtained from
13[1]. That library can be used by user space applications that require
14cryptographic services from the kernel.
15
16Some details of the in-kernel kernel crypto API aspects do not
17apply to user space, however. This includes the difference between synchronous
18and asynchronous invocations. The user space API call is fully synchronous.
19In addition, only a subset of all cipher types are available as documented
20below.
21
22
23User space API general remarks
24==============================
25
26The kernel crypto API is accessible from user space. Currently, the following
27ciphers are accessible:
28
29 * Message digest including keyed message digest (HMAC, CMAC)
30
31 * Symmetric ciphers
32
33Note, AEAD ciphers are currently not supported via the symmetric cipher
34interface.
35
36The interface is provided via Netlink using the type AF_ALG. In addition, the
37setsockopt option type is SOL_ALG. In case the user space header files do not
38export these flags yet, use the following macros:
39
40#ifndef AF_ALG
41#define AF_ALG 38
42#endif
43#ifndef SOL_ALG
44#define SOL_ALG 279
45#endif
46
47A cipher is accessed with the same name as done for the in-kernel API calls.
48This includes the generic vs. unique naming schema for ciphers as well as the
49enforcement of priorities for generic names.
50
51To interact with the kernel crypto API, a Netlink socket must be created by
52the user space application. User space invokes the cipher operation with the
53send/write system call family. The result of the cipher operation is obtained
54with the read/recv system call family.
55
56The following API calls assume that the Netlink socket descriptor is already
57opened by the user space application and discusses only the kernel crypto API
58specific invocations.
59
60To initialize a Netlink interface, the following sequence has to be performed
61by the consumer:
62
63 1. Create a socket of type AF_ALG with the struct sockaddr_alg parameter
64 specified below for the different cipher types.
65
66 2. Invoke bind with the socket descriptor
67
68 3. Invoke accept with the socket descriptor. The accept system call
69 returns a new file descriptor that is to be used to interact with
70 the particular cipher instance. When invoking send/write or recv/read
71 system calls to send data to the kernel or obtain data from the
72 kernel, the file descriptor returned by accept must be used.
73
74In-place cipher operation
75=========================
76
77Just like the in-kernel operation of the kernel crypto API, the user space
78interface allows the cipher operation in-place. That means that the input buffer
79used for the send/write system call and the output buffer used by the read/recv
80system call may be one and the same. This is of particular interest for
81symmetric cipher operations where a copying of the output data to its final
82destination can be avoided.
83
84If a consumer on the other hand wants to maintain the plaintext and the
85ciphertext in different memory locations, all a consumer needs to do is to
86provide different memory pointers for the encryption and decryption operation.
87
88Message digest API
89==================
90
91The message digest type to be used for the cipher operation is selected when
92invoking the bind syscall. bind requires the caller to provide a filled
93struct sockaddr data structure. This data structure must be filled as follows:
94
95struct sockaddr_alg sa = {
96 .salg_family = AF_ALG,
97 .salg_type = "hash", /* this selects the hash logic in the kernel */
98 .salg_name = "sha1" /* this is the cipher name */
99};
100
101The salg_type value "hash" applies to message digests and keyed message digests.
102Though, a keyed message digest is referenced by the appropriate salg_name.
103Please see below for the setsockopt interface that explains how the key can be
104set for a keyed message digest.
105
106Using the send() system call, the application provides the data that should be
107processed with the message digest. The send system call allows the following
108flags to be specified:
109
110 * MSG_MORE: If this flag is set, the send system call acts like a
111 message digest update function where the final hash is not
112 yet calculated. If the flag is not set, the send system call
113 calculates the final message digest immediately.
114
115With the recv() system call, the application can read the message digest from
116the kernel crypto API. If the buffer is too small for the message digest, the
117flag MSG_TRUNC is set by the kernel.
118
119In order to set a message digest key, the calling application must use the
120setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC operation is
121performed without the initial HMAC state change caused by the key.
122
123
124Symmetric cipher API
125====================
126
127The operation is very similar to the message digest discussion. During
128initialization, the struct sockaddr data structure must be filled as follows:
129
130struct sockaddr_alg sa = {
131 .salg_family = AF_ALG,
132 .salg_type = "skcipher", /* this selects the symmetric cipher */
133 .salg_name = "cbc(aes)" /* this is the cipher name */
134};
135
136Before data can be sent to the kernel using the write/send system call family,
137the consumer must set the key. The key setting is described with the setsockopt
138invocation below.
139
140Using the sendmsg() system call, the application provides the data that should
141be processed for encryption or decryption. In addition, the IV is specified
142with the data structure provided by the sendmsg() system call.
143
144The sendmsg system call parameter of struct msghdr is embedded into the
145struct cmsghdr data structure. See recv(2) and cmsg(3) for more information
146on how the cmsghdr data structure is used together with the send/recv system
147call family. That cmsghdr data structure holds the following information
148specified with a separate header instances:
149
150 * specification of the cipher operation type with one of these flags:
151 ALG_OP_ENCRYPT - encryption of data
152 ALG_OP_DECRYPT - decryption of data
153
154 * specification of the IV information marked with the flag ALG_SET_IV
155
156The send system call family allows the following flag to be specified:
157
158 * MSG_MORE: If this flag is set, the send system call acts like a
159 cipher update function where more input data is expected
160 with a subsequent invocation of the send system call.
161
162Note: The kernel reports -EINVAL for any unexpected data. The caller must
163make sure that all data matches the constraints given in /proc/crypto for the
164selected cipher.
165
166With the recv() system call, the application can read the result of the
167cipher operation from the kernel crypto API. The output buffer must be at least
168as large as to hold all blocks of the encrypted or decrypted data. If the output
169data size is smaller, only as many blocks are returned that fit into that
170output buffer size.
171
172Setsockopt interface
173====================
174
175In addition to the read/recv and send/write system call handling to send and
176retrieve data subject to the cipher operation, a consumer also needs to set
177the additional information for the cipher operation. This additional information
178is set using the setsockopt system call that must be invoked with the file
179descriptor of the open cipher (i.e. the file descriptor returned by the
180accept system call).
181
182Each setsockopt invocation must use the level SOL_ALG.
183
184The setsockopt interface allows setting the following data using the mentioned
185optname:
186
187 * ALG_SET_KEY -- Setting the key. Key setting is applicable to:
188
189 - the skcipher cipher type (symmetric ciphers)
190
191 - the hash cipher type (keyed message digests)
192
193User space API example
194======================
195
196Please see [1] for libkcapi which provides an easy-to-use wrapper around the
197aforementioned Netlink kernel interface. [1] also contains a test application
198that invokes all libkcapi API calls.
199
200[1] http://www.chronox.de/libkcapi.html
201
202Author
203======
204
205Stephan Mueller <smueller@chronox.de>
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
new file mode 100644
index 000000000000..91a3d757d641
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
@@ -0,0 +1,27 @@
1Imagination Technologies hardware hash accelerator
2
3The hash accelerator provides hardware hashing acceleration for
4SHA1, SHA224, SHA256 and MD5 hashes
5
6Required properties:
7
8- compatible : "img,hash-accelerator"
9- reg : Offset and length of the register set for the module, and the DMA port
10- interrupts : The designated IRQ line for the hashing module.
11- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
12- dma-names : Should be "tx"
13- clocks : Clock specifiers
14- clock-names : "sys" Used to clock the hash block registers
15 "hash" Used to clock data through the accelerator
16
17Example:
18
19 hash: hash@18149600 {
20 compatible = "img,hash-accelerator";
21 reg = <0x18149600 0x100>, <0x18101100 0x4>;
22 interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
23 dmas = <&dma 8 0xffffffff 0>;
24 dma-names = "tx";
25 clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
26 clock-names = "sys", "hash";
27 };
diff --git a/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
new file mode 100644
index 000000000000..e25a456664b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
@@ -0,0 +1,12 @@
1HWRNG support for the iproc-rng200 driver
2
3Required properties:
4- compatible : "brcm,iproc-rng200"
5- reg : base address and size of control register block
6
7Example:
8
9rng {
10 compatible = "brcm,iproc-rng200";
11 reg = <0x18032000 0x28>;
12};
diff --git a/MAINTAINERS b/MAINTAINERS
index 6ee1e79ea16b..7a8f367b4ebc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2825,6 +2825,7 @@ L: linux-crypto@vger.kernel.org
2825T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git 2825T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
2826S: Maintained 2826S: Maintained
2827F: Documentation/crypto/ 2827F: Documentation/crypto/
2828F: Documentation/DocBook/crypto-API.tmpl
2828F: arch/*/crypto/ 2829F: arch/*/crypto/
2829F: crypto/ 2830F: crypto/
2830F: drivers/crypto/ 2831F: drivers/crypto/
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index da1266c53c13..7cbf4ef5c6fd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug"
2175source "security/Kconfig" 2175source "security/Kconfig"
2176 2176
2177source "crypto/Kconfig" 2177source "crypto/Kconfig"
2178if CRYPTO
2179source "arch/arm/crypto/Kconfig"
2180endif
2178 2181
2179source "lib/Kconfig" 2182source "lib/Kconfig"
2180 2183
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
new file mode 100644
index 000000000000..8da2207b0072
--- /dev/null
+++ b/arch/arm/crypto/Kconfig
@@ -0,0 +1,130 @@
1
2menuconfig ARM_CRYPTO
3 bool "ARM Accelerated Cryptographic Algorithms"
4 depends on ARM
5 help
6 Say Y here to choose from a selection of cryptographic algorithms
7 implemented using ARM specific CPU features or instructions.
8
9if ARM_CRYPTO
10
11config CRYPTO_SHA1_ARM
12 tristate "SHA1 digest algorithm (ARM-asm)"
13 select CRYPTO_SHA1
14 select CRYPTO_HASH
15 help
16 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
17 using optimized ARM assembler.
18
19config CRYPTO_SHA1_ARM_NEON
20 tristate "SHA1 digest algorithm (ARM NEON)"
21 depends on KERNEL_MODE_NEON
22 select CRYPTO_SHA1_ARM
23 select CRYPTO_SHA1
24 select CRYPTO_HASH
25 help
26 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
27 using optimized ARM NEON assembly, when NEON instructions are
28 available.
29
30config CRYPTO_SHA1_ARM_CE
31 tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
32 depends on KERNEL_MODE_NEON
33 select CRYPTO_SHA1_ARM
34 select CRYPTO_HASH
35 help
36 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
37 using special ARMv8 Crypto Extensions.
38
39config CRYPTO_SHA2_ARM_CE
40 tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
41 depends on KERNEL_MODE_NEON
42 select CRYPTO_SHA256_ARM
43 select CRYPTO_HASH
44 help
45 SHA-256 secure hash standard (DFIPS 180-2) implemented
46 using special ARMv8 Crypto Extensions.
47
48config CRYPTO_SHA256_ARM
49 tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
50 select CRYPTO_HASH
51 depends on !CPU_V7M
52 help
53 SHA-256 secure hash standard (DFIPS 180-2) implemented
54 using optimized ARM assembler and NEON, when available.
55
56config CRYPTO_SHA512_ARM_NEON
57 tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
58 depends on KERNEL_MODE_NEON
59 select CRYPTO_SHA512
60 select CRYPTO_HASH
61 help
62 SHA-512 secure hash standard (DFIPS 180-2) implemented
63 using ARM NEON instructions, when available.
64
65 This version of SHA implements a 512 bit hash with 256 bits of
66 security against collision attacks.
67
68 This code also includes SHA-384, a 384 bit hash with 192 bits
69 of security against collision attacks.
70
71config CRYPTO_AES_ARM
72 tristate "AES cipher algorithms (ARM-asm)"
73 depends on ARM
74 select CRYPTO_ALGAPI
75 select CRYPTO_AES
76 help
77 Use optimized AES assembler routines for ARM platforms.
78
79 AES cipher algorithms (FIPS-197). AES uses the Rijndael
80 algorithm.
81
82 Rijndael appears to be consistently a very good performer in
83 both hardware and software across a wide range of computing
84 environments regardless of its use in feedback or non-feedback
85 modes. Its key setup time is excellent, and its key agility is
86 good. Rijndael's very low memory requirements make it very well
87 suited for restricted-space environments, in which it also
88 demonstrates excellent performance. Rijndael's operations are
89 among the easiest to defend against power and timing attacks.
90
91 The AES specifies three key sizes: 128, 192 and 256 bits
92
93 See <http://csrc.nist.gov/encryption/aes/> for more information.
94
95config CRYPTO_AES_ARM_BS
96 tristate "Bit sliced AES using NEON instructions"
97 depends on KERNEL_MODE_NEON
98 select CRYPTO_ALGAPI
99 select CRYPTO_AES_ARM
100 select CRYPTO_ABLK_HELPER
101 help
102 Use a faster and more secure NEON based implementation of AES in CBC,
103 CTR and XTS modes
104
105 Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
106 and for XTS mode encryption, CBC and XTS mode decryption speedup is
107 around 25%. (CBC encryption speed is not affected by this driver.)
108 This implementation does not rely on any lookup tables so it is
109 believed to be invulnerable to cache timing attacks.
110
111config CRYPTO_AES_ARM_CE
112 tristate "Accelerated AES using ARMv8 Crypto Extensions"
113 depends on KERNEL_MODE_NEON
114 select CRYPTO_ALGAPI
115 select CRYPTO_ABLK_HELPER
116 help
117 Use an implementation of AES in CBC, CTR and XTS modes that uses
118 ARMv8 Crypto Extensions
119
120config CRYPTO_GHASH_ARM_CE
121 tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
122 depends on KERNEL_MODE_NEON
123 select CRYPTO_HASH
124 select CRYPTO_CRYPTD
125 help
126 Use an implementation of GHASH (used by the GCM AEAD chaining mode)
127 that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
128 that is part of the ARMv8 Crypto Extensions
129
130endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b48fa341648d..6ea828241fcb 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
6obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o 6obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
7obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o 7obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
8obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o 8obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
9obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
9obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o 10obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
10 11
12ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
13ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
14ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
15ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
16
17ifneq ($(ce-obj-y)$(ce-obj-m),)
18ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
19obj-y += $(ce-obj-y)
20obj-m += $(ce-obj-m)
21else
22$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
23$(warning $(ce-obj-y) $(ce-obj-m))
24endif
25endif
26
11aes-arm-y := aes-armv4.o aes_glue.o 27aes-arm-y := aes-armv4.o aes_glue.o
12aes-arm-bs-y := aesbs-core.o aesbs-glue.o 28aes-arm-bs-y := aesbs-core.o aesbs-glue.o
13sha1-arm-y := sha1-armv4-large.o sha1_glue.o 29sha1-arm-y := sha1-armv4-large.o sha1_glue.o
14sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o 30sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
31sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
32sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
15sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o 33sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
34sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
35sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
36aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
37ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
16 38
17quiet_cmd_perl = PERL $@ 39quiet_cmd_perl = PERL $@
18 cmd_perl = $(PERL) $(<) > $(@) 40 cmd_perl = $(PERL) $(<) > $(@)
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@
20$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl 42$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
21 $(call cmd,perl) 43 $(call cmd,perl)
22 44
23.PRECIOUS: $(obj)/aesbs-core.S 45$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
46 $(call cmd,perl)
47
48.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8cfa468ee570
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -0,0 +1,518 @@
1/*
2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .fpu crypto-neon-fp-armv8
16 .align 3
17
18 .macro enc_round, state, key
19 aese.8 \state, \key
20 aesmc.8 \state, \state
21 .endm
22
23 .macro dec_round, state, key
24 aesd.8 \state, \key
25 aesimc.8 \state, \state
26 .endm
27
28 .macro enc_dround, key1, key2
29 enc_round q0, \key1
30 enc_round q0, \key2
31 .endm
32
33 .macro dec_dround, key1, key2
34 dec_round q0, \key1
35 dec_round q0, \key2
36 .endm
37
38 .macro enc_fround, key1, key2, key3
39 enc_round q0, \key1
40 aese.8 q0, \key2
41 veor q0, q0, \key3
42 .endm
43
44 .macro dec_fround, key1, key2, key3
45 dec_round q0, \key1
46 aesd.8 q0, \key2
47 veor q0, q0, \key3
48 .endm
49
50 .macro enc_dround_3x, key1, key2
51 enc_round q0, \key1
52 enc_round q1, \key1
53 enc_round q2, \key1
54 enc_round q0, \key2
55 enc_round q1, \key2
56 enc_round q2, \key2
57 .endm
58
59 .macro dec_dround_3x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q0, \key2
64 dec_round q1, \key2
65 dec_round q2, \key2
66 .endm
67
68 .macro enc_fround_3x, key1, key2, key3
69 enc_round q0, \key1
70 enc_round q1, \key1
71 enc_round q2, \key1
72 aese.8 q0, \key2
73 aese.8 q1, \key2
74 aese.8 q2, \key2
75 veor q0, q0, \key3
76 veor q1, q1, \key3
77 veor q2, q2, \key3
78 .endm
79
80 .macro dec_fround_3x, key1, key2, key3
81 dec_round q0, \key1
82 dec_round q1, \key1
83 dec_round q2, \key1
84 aesd.8 q0, \key2
85 aesd.8 q1, \key2
86 aesd.8 q2, \key2
87 veor q0, q0, \key3
88 veor q1, q1, \key3
89 veor q2, q2, \key3
90 .endm
91
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
95 \dround q8, q9
96 vld1.8 {q12-q13}, [ip]!
97 \dround q10, q11
98 vld1.8 {q10-q11}, [ip]!
99 \dround q12, q13
100 vld1.8 {q12-q13}, [ip]!
101 \dround q10, q11
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
104 beq 1f @ AES-192: 12 rounds
105 \dround q12, q13
106 vld1.8 {q12-q13}, [ip]
107 \dround q10, q11
1080: \fround q12, q13, q14
109 bx lr
110
1111: \dround q12, q13
112 \fround q10, q11, q14
113 bx lr
114 .endm
115
116 /*
117 * Internal, non-AAPCS compliant functions that implement the core AES
118 * transforms. These should preserve all registers except q0 - q2 and ip
119 * Arguments:
120 * q0 : first in/output block
121 * q1 : second in/output block (_3x version only)
122 * q2 : third in/output block (_3x version only)
123 * q8 : first round key
124 * q9 : secound round key
125 * ip : address of 3rd round key
126 * q14 : final round key
127 * r3 : number of rounds
128 */
129 .align 6
130aes_encrypt:
131 add ip, r2, #32 @ 3rd round key
132.Laes_encrypt_tweak:
133 do_block enc_dround, enc_fround
134ENDPROC(aes_encrypt)
135
136 .align 6
137aes_decrypt:
138 add ip, r2, #32 @ 3rd round key
139 do_block dec_dround, dec_fround
140ENDPROC(aes_decrypt)
141
142 .align 6
143aes_encrypt_3x:
144 add ip, r2, #32 @ 3rd round key
145 do_block enc_dround_3x, enc_fround_3x
146ENDPROC(aes_encrypt_3x)
147
148 .align 6
149aes_decrypt_3x:
150 add ip, r2, #32 @ 3rd round key
151 do_block dec_dround_3x, dec_fround_3x
152ENDPROC(aes_decrypt_3x)
153
154 .macro prepare_key, rk, rounds
155 add ip, \rk, \rounds, lsl #4
156 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
157 vld1.8 {q14}, [ip] @ load last round key
158 .endm
159
160 /*
161 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
162 * int blocks)
163 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
164 * int blocks)
165 */
166ENTRY(ce_aes_ecb_encrypt)
167 push {r4, lr}
168 ldr r4, [sp, #8]
169 prepare_key r2, r3
170.Lecbencloop3x:
171 subs r4, r4, #3
172 bmi .Lecbenc1x
173 vld1.8 {q0-q1}, [r1, :64]!
174 vld1.8 {q2}, [r1, :64]!
175 bl aes_encrypt_3x
176 vst1.8 {q0-q1}, [r0, :64]!
177 vst1.8 {q2}, [r0, :64]!
178 b .Lecbencloop3x
179.Lecbenc1x:
180 adds r4, r4, #3
181 beq .Lecbencout
182.Lecbencloop:
183 vld1.8 {q0}, [r1, :64]!
184 bl aes_encrypt
185 vst1.8 {q0}, [r0, :64]!
186 subs r4, r4, #1
187 bne .Lecbencloop
188.Lecbencout:
189 pop {r4, pc}
190ENDPROC(ce_aes_ecb_encrypt)
191
192ENTRY(ce_aes_ecb_decrypt)
193 push {r4, lr}
194 ldr r4, [sp, #8]
195 prepare_key r2, r3
196.Lecbdecloop3x:
197 subs r4, r4, #3
198 bmi .Lecbdec1x
199 vld1.8 {q0-q1}, [r1, :64]!
200 vld1.8 {q2}, [r1, :64]!
201 bl aes_decrypt_3x
202 vst1.8 {q0-q1}, [r0, :64]!
203 vst1.8 {q2}, [r0, :64]!
204 b .Lecbdecloop3x
205.Lecbdec1x:
206 adds r4, r4, #3
207 beq .Lecbdecout
208.Lecbdecloop:
209 vld1.8 {q0}, [r1, :64]!
210 bl aes_decrypt
211 vst1.8 {q0}, [r0, :64]!
212 subs r4, r4, #1
213 bne .Lecbdecloop
214.Lecbdecout:
215 pop {r4, pc}
216ENDPROC(ce_aes_ecb_decrypt)
217
218 /*
219 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
220 * int blocks, u8 iv[])
221 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
222 * int blocks, u8 iv[])
223 */
224ENTRY(ce_aes_cbc_encrypt)
225 push {r4-r6, lr}
226 ldrd r4, r5, [sp, #16]
227 vld1.8 {q0}, [r5]
228 prepare_key r2, r3
229.Lcbcencloop:
230 vld1.8 {q1}, [r1, :64]! @ get next pt block
231 veor q0, q0, q1 @ ..and xor with iv
232 bl aes_encrypt
233 vst1.8 {q0}, [r0, :64]!
234 subs r4, r4, #1
235 bne .Lcbcencloop
236 vst1.8 {q0}, [r5]
237 pop {r4-r6, pc}
238ENDPROC(ce_aes_cbc_encrypt)
239
240ENTRY(ce_aes_cbc_decrypt)
241 push {r4-r6, lr}
242 ldrd r4, r5, [sp, #16]
243 vld1.8 {q6}, [r5] @ keep iv in q6
244 prepare_key r2, r3
245.Lcbcdecloop3x:
246 subs r4, r4, #3
247 bmi .Lcbcdec1x
248 vld1.8 {q0-q1}, [r1, :64]!
249 vld1.8 {q2}, [r1, :64]!
250 vmov q3, q0
251 vmov q4, q1
252 vmov q5, q2
253 bl aes_decrypt_3x
254 veor q0, q0, q6
255 veor q1, q1, q3
256 veor q2, q2, q4
257 vmov q6, q5
258 vst1.8 {q0-q1}, [r0, :64]!
259 vst1.8 {q2}, [r0, :64]!
260 b .Lcbcdecloop3x
261.Lcbcdec1x:
262 adds r4, r4, #3
263 beq .Lcbcdecout
264 vmov q15, q14 @ preserve last round key
265.Lcbcdecloop:
266 vld1.8 {q0}, [r1, :64]! @ get next ct block
267 veor q14, q15, q6 @ combine prev ct with last key
268 vmov q6, q0
269 bl aes_decrypt
270 vst1.8 {q0}, [r0, :64]!
271 subs r4, r4, #1
272 bne .Lcbcdecloop
273.Lcbcdecout:
274 vst1.8 {q6}, [r5] @ keep iv in q6
275 pop {r4-r6, pc}
276ENDPROC(ce_aes_cbc_decrypt)
277
278 /*
279 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
280 * int blocks, u8 ctr[])
281 */
282ENTRY(ce_aes_ctr_encrypt)
283 push {r4-r6, lr}
284 ldrd r4, r5, [sp, #16]
285 vld1.8 {q6}, [r5] @ load ctr
286 prepare_key r2, r3
287 vmov r6, s27 @ keep swabbed ctr in r6
288 rev r6, r6
289 cmn r6, r4 @ 32 bit overflow?
290 bcs .Lctrloop
291.Lctrloop3x:
292 subs r4, r4, #3
293 bmi .Lctr1x
294 add r6, r6, #1
295 vmov q0, q6
296 vmov q1, q6
297 rev ip, r6
298 add r6, r6, #1
299 vmov q2, q6
300 vmov s7, ip
301 rev ip, r6
302 add r6, r6, #1
303 vmov s11, ip
304 vld1.8 {q3-q4}, [r1, :64]!
305 vld1.8 {q5}, [r1, :64]!
306 bl aes_encrypt_3x
307 veor q0, q0, q3
308 veor q1, q1, q4
309 veor q2, q2, q5
310 rev ip, r6
311 vst1.8 {q0-q1}, [r0, :64]!
312 vst1.8 {q2}, [r0, :64]!
313 vmov s27, ip
314 b .Lctrloop3x
315.Lctr1x:
316 adds r4, r4, #3
317 beq .Lctrout
318.Lctrloop:
319 vmov q0, q6
320 bl aes_encrypt
321 subs r4, r4, #1
322 bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
323 vld1.8 {q3}, [r1, :64]!
324 veor q3, q0, q3
325 vst1.8 {q3}, [r0, :64]!
326
327 adds r6, r6, #1 @ increment BE ctr
328 rev ip, r6
329 vmov s27, ip
330 bcs .Lctrcarry
331 teq r4, #0
332 bne .Lctrloop
333.Lctrout:
334 vst1.8 {q6}, [r5]
335 pop {r4-r6, pc}
336
337.Lctrhalfblock:
338 vld1.8 {d1}, [r1, :64]
339 veor d0, d0, d1
340 vst1.8 {d0}, [r0, :64]
341 pop {r4-r6, pc}
342
343.Lctrcarry:
344 .irp sreg, s26, s25, s24
345 vmov ip, \sreg @ load next word of ctr
346 rev ip, ip @ ... to handle the carry
347 adds ip, ip, #1
348 rev ip, ip
349 vmov \sreg, ip
350 bcc 0f
351 .endr
3520: teq r4, #0
353 beq .Lctrout
354 b .Lctrloop
355ENDPROC(ce_aes_ctr_encrypt)
356
357 /*
358 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
359 * int blocks, u8 iv[], u8 const rk2[], int first)
360 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
361 * int blocks, u8 iv[], u8 const rk2[], int first)
362 */
363
364 .macro next_tweak, out, in, const, tmp
365 vshr.s64 \tmp, \in, #63
366 vand \tmp, \tmp, \const
367 vadd.u64 \out, \in, \in
368 vext.8 \tmp, \tmp, \tmp, #8
369 veor \out, \out, \tmp
370 .endm
371
372 .align 3
373.Lxts_mul_x:
374 .quad 1, 0x87
375
376ce_aes_xts_init:
377 vldr d14, .Lxts_mul_x
378 vldr d15, .Lxts_mul_x + 8
379
380 ldrd r4, r5, [sp, #16] @ load args
381 ldr r6, [sp, #28]
382 vld1.8 {q0}, [r5] @ load iv
383 teq r6, #1 @ start of a block?
384 bxne lr
385
386 @ Encrypt the IV in q0 with the second AES key. This should only
387 @ be done at the start of a block.
388 ldr r6, [sp, #24] @ load AES key 2
389 prepare_key r6, r3
390 add ip, r6, #32 @ 3rd round key of key 2
391 b .Laes_encrypt_tweak @ tail call
392ENDPROC(ce_aes_xts_init)
393
394ENTRY(ce_aes_xts_encrypt)
395 push {r4-r6, lr}
396
397 bl ce_aes_xts_init @ run shared prologue
398 prepare_key r2, r3
399 vmov q3, q0
400
401 teq r6, #0 @ start of a block?
402 bne .Lxtsenc3x
403
404.Lxtsencloop3x:
405 next_tweak q3, q3, q7, q6
406.Lxtsenc3x:
407 subs r4, r4, #3
408 bmi .Lxtsenc1x
409 vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
410 vld1.8 {q2}, [r1, :64]!
411 next_tweak q4, q3, q7, q6
412 veor q0, q0, q3
413 next_tweak q5, q4, q7, q6
414 veor q1, q1, q4
415 veor q2, q2, q5
416 bl aes_encrypt_3x
417 veor q0, q0, q3
418 veor q1, q1, q4
419 veor q2, q2, q5
420 vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
421 vst1.8 {q2}, [r0, :64]!
422 vmov q3, q5
423 teq r4, #0
424 beq .Lxtsencout
425 b .Lxtsencloop3x
426.Lxtsenc1x:
427 adds r4, r4, #3
428 beq .Lxtsencout
429.Lxtsencloop:
430 vld1.8 {q0}, [r1, :64]!
431 veor q0, q0, q3
432 bl aes_encrypt
433 veor q0, q0, q3
434 vst1.8 {q0}, [r0, :64]!
435 subs r4, r4, #1
436 beq .Lxtsencout
437 next_tweak q3, q3, q7, q6
438 b .Lxtsencloop
439.Lxtsencout:
440 vst1.8 {q3}, [r5]
441 pop {r4-r6, pc}
442ENDPROC(ce_aes_xts_encrypt)
443
444
445ENTRY(ce_aes_xts_decrypt)
446 push {r4-r6, lr}
447
448 bl ce_aes_xts_init @ run shared prologue
449 prepare_key r2, r3
450 vmov q3, q0
451
452 teq r6, #0 @ start of a block?
453 bne .Lxtsdec3x
454
455.Lxtsdecloop3x:
456 next_tweak q3, q3, q7, q6
457.Lxtsdec3x:
458 subs r4, r4, #3
459 bmi .Lxtsdec1x
460 vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
461 vld1.8 {q2}, [r1, :64]!
462 next_tweak q4, q3, q7, q6
463 veor q0, q0, q3
464 next_tweak q5, q4, q7, q6
465 veor q1, q1, q4
466 veor q2, q2, q5
467 bl aes_decrypt_3x
468 veor q0, q0, q3
469 veor q1, q1, q4
470 veor q2, q2, q5
471 vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
472 vst1.8 {q2}, [r0, :64]!
473 vmov q3, q5
474 teq r4, #0
475 beq .Lxtsdecout
476 b .Lxtsdecloop3x
477.Lxtsdec1x:
478 adds r4, r4, #3
479 beq .Lxtsdecout
480.Lxtsdecloop:
481 vld1.8 {q0}, [r1, :64]!
482 veor q0, q0, q3
483 add ip, r2, #32 @ 3rd round key
484 bl aes_decrypt
485 veor q0, q0, q3
486 vst1.8 {q0}, [r0, :64]!
487 subs r4, r4, #1
488 beq .Lxtsdecout
489 next_tweak q3, q3, q7, q6
490 b .Lxtsdecloop
491.Lxtsdecout:
492 vst1.8 {q3}, [r5]
493 pop {r4-r6, pc}
494ENDPROC(ce_aes_xts_decrypt)
495
496 /*
497 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
498 * AES sbox substitution on each byte in
499 * 'input'
500 */
501ENTRY(ce_aes_sub)
502 vdup.32 q1, r0
503 veor q0, q0, q0
504 aese.8 q0, q1
505 vmov r0, s0
506 bx lr
507ENDPROC(ce_aes_sub)
508
509 /*
510 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
511 * operation on round key *src
512 */
513ENTRY(ce_aes_invert)
514 vld1.8 {q0}, [r1]
515 aesimc.8 q0, q0
516 vst1.8 {q0}, [r0]
517 bx lr
518ENDPROC(ce_aes_invert)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
new file mode 100644
index 000000000000..b445a5d56f43
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -0,0 +1,524 @@
1/*
2 * aes-ce-glue.c - wrapper code for ARMv8 AES
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <asm/hwcap.h>
12#include <asm/neon.h>
13#include <asm/hwcap.h>
14#include <crypto/aes.h>
15#include <crypto/ablk_helper.h>
16#include <crypto/algapi.h>
17#include <linux/module.h>
18
19MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2");
22
23/* defined in aes-ce-core.S */
24asmlinkage u32 ce_aes_sub(u32 input);
25asmlinkage void ce_aes_invert(void *dst, void *src);
26
27asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
28 int rounds, int blocks);
29asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
30 int rounds, int blocks);
31
32asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
33 int rounds, int blocks, u8 iv[]);
34asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
35 int rounds, int blocks, u8 iv[]);
36
37asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
38 int rounds, int blocks, u8 ctr[]);
39
40asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
41 int rounds, int blocks, u8 iv[],
42 u8 const rk2[], int first);
43asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
44 int rounds, int blocks, u8 iv[],
45 u8 const rk2[], int first);
46
47struct aes_block {
48 u8 b[AES_BLOCK_SIZE];
49};
50
51static int num_rounds(struct crypto_aes_ctx *ctx)
52{
53 /*
54 * # of rounds specified by AES:
55 * 128 bit key 10 rounds
56 * 192 bit key 12 rounds
57 * 256 bit key 14 rounds
58 * => n byte key => 6 + (n/4) rounds
59 */
60 return 6 + ctx->key_length / 4;
61}
62
63static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
64 unsigned int key_len)
65{
66 /*
67 * The AES key schedule round constants
68 */
69 static u8 const rcon[] = {
70 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
71 };
72
73 u32 kwords = key_len / sizeof(u32);
74 struct aes_block *key_enc, *key_dec;
75 int i, j;
76
77 if (key_len != AES_KEYSIZE_128 &&
78 key_len != AES_KEYSIZE_192 &&
79 key_len != AES_KEYSIZE_256)
80 return -EINVAL;
81
82 memcpy(ctx->key_enc, in_key, key_len);
83 ctx->key_length = key_len;
84
85 kernel_neon_begin();
86 for (i = 0; i < sizeof(rcon); i++) {
87 u32 *rki = ctx->key_enc + (i * kwords);
88 u32 *rko = rki + kwords;
89
90 rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
91 rko[0] = rko[0] ^ rki[0] ^ rcon[i];
92 rko[1] = rko[0] ^ rki[1];
93 rko[2] = rko[1] ^ rki[2];
94 rko[3] = rko[2] ^ rki[3];
95
96 if (key_len == AES_KEYSIZE_192) {
97 if (i >= 7)
98 break;
99 rko[4] = rko[3] ^ rki[4];
100 rko[5] = rko[4] ^ rki[5];
101 } else if (key_len == AES_KEYSIZE_256) {
102 if (i >= 6)
103 break;
104 rko[4] = ce_aes_sub(rko[3]) ^ rki[4];
105 rko[5] = rko[4] ^ rki[5];
106 rko[6] = rko[5] ^ rki[6];
107 rko[7] = rko[6] ^ rki[7];
108 }
109 }
110
111 /*
112 * Generate the decryption keys for the Equivalent Inverse Cipher.
113 * This involves reversing the order of the round keys, and applying
114 * the Inverse Mix Columns transformation on all but the first and
115 * the last one.
116 */
117 key_enc = (struct aes_block *)ctx->key_enc;
118 key_dec = (struct aes_block *)ctx->key_dec;
119 j = num_rounds(ctx);
120
121 key_dec[0] = key_enc[j];
122 for (i = 1, j--; j > 0; i++, j--)
123 ce_aes_invert(key_dec + i, key_enc + j);
124 key_dec[i] = key_enc[0];
125
126 kernel_neon_end();
127 return 0;
128}
129
130static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
131 unsigned int key_len)
132{
133 struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
134 int ret;
135
136 ret = ce_aes_expandkey(ctx, in_key, key_len);
137 if (!ret)
138 return 0;
139
140 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
141 return -EINVAL;
142}
143
144struct crypto_aes_xts_ctx {
145 struct crypto_aes_ctx key1;
146 struct crypto_aes_ctx __aligned(8) key2;
147};
148
149static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
150 unsigned int key_len)
151{
152 struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
153 int ret;
154
155 ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
156 if (!ret)
157 ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
158 key_len / 2);
159 if (!ret)
160 return 0;
161
162 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
163 return -EINVAL;
164}
165
166static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
170 struct blkcipher_walk walk;
171 unsigned int blocks;
172 int err;
173
174 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
175 blkcipher_walk_init(&walk, dst, src, nbytes);
176 err = blkcipher_walk_virt(desc, &walk);
177
178 kernel_neon_begin();
179 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
180 ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
181 (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
182 err = blkcipher_walk_done(desc, &walk,
183 walk.nbytes % AES_BLOCK_SIZE);
184 }
185 kernel_neon_end();
186 return err;
187}
188
189static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
190 struct scatterlist *src, unsigned int nbytes)
191{
192 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193 struct blkcipher_walk walk;
194 unsigned int blocks;
195 int err;
196
197 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
198 blkcipher_walk_init(&walk, dst, src, nbytes);
199 err = blkcipher_walk_virt(desc, &walk);
200
201 kernel_neon_begin();
202 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
203 ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
204 (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
205 err = blkcipher_walk_done(desc, &walk,
206 walk.nbytes % AES_BLOCK_SIZE);
207 }
208 kernel_neon_end();
209 return err;
210}
211
212static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
213 struct scatterlist *src, unsigned int nbytes)
214{
215 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
216 struct blkcipher_walk walk;
217 unsigned int blocks;
218 int err;
219
220 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
221 blkcipher_walk_init(&walk, dst, src, nbytes);
222 err = blkcipher_walk_virt(desc, &walk);
223
224 kernel_neon_begin();
225 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
226 ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
227 (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
228 walk.iv);
229 err = blkcipher_walk_done(desc, &walk,
230 walk.nbytes % AES_BLOCK_SIZE);
231 }
232 kernel_neon_end();
233 return err;
234}
235
236static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
237 struct scatterlist *src, unsigned int nbytes)
238{
239 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
240 struct blkcipher_walk walk;
241 unsigned int blocks;
242 int err;
243
244 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
245 blkcipher_walk_init(&walk, dst, src, nbytes);
246 err = blkcipher_walk_virt(desc, &walk);
247
248 kernel_neon_begin();
249 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
250 ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
251 (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
252 walk.iv);
253 err = blkcipher_walk_done(desc, &walk,
254 walk.nbytes % AES_BLOCK_SIZE);
255 }
256 kernel_neon_end();
257 return err;
258}
259
260static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
261 struct scatterlist *src, unsigned int nbytes)
262{
263 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
264 struct blkcipher_walk walk;
265 int err, blocks;
266
267 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
268 blkcipher_walk_init(&walk, dst, src, nbytes);
269 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
270
271 kernel_neon_begin();
272 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
273 ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
274 (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
275 walk.iv);
276 nbytes -= blocks * AES_BLOCK_SIZE;
277 if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
278 break;
279 err = blkcipher_walk_done(desc, &walk,
280 walk.nbytes % AES_BLOCK_SIZE);
281 }
282 if (nbytes) {
283 u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
284 u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
285 u8 __aligned(8) tail[AES_BLOCK_SIZE];
286
287 /*
288 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
289 * to tell aes_ctr_encrypt() to only read half a block.
290 */
291 blocks = (nbytes <= 8) ? -1 : 1;
292
293 ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
294 num_rounds(ctx), blocks, walk.iv);
295 memcpy(tdst, tail, nbytes);
296 err = blkcipher_walk_done(desc, &walk, 0);
297 }
298 kernel_neon_end();
299
300 return err;
301}
302
303static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
304 struct scatterlist *src, unsigned int nbytes)
305{
306 struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
307 int err, first, rounds = num_rounds(&ctx->key1);
308 struct blkcipher_walk walk;
309 unsigned int blocks;
310
311 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
312 blkcipher_walk_init(&walk, dst, src, nbytes);
313 err = blkcipher_walk_virt(desc, &walk);
314
315 kernel_neon_begin();
316 for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
317 ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
318 (u8 *)ctx->key1.key_enc, rounds, blocks,
319 walk.iv, (u8 *)ctx->key2.key_enc, first);
320 err = blkcipher_walk_done(desc, &walk,
321 walk.nbytes % AES_BLOCK_SIZE);
322 }
323 kernel_neon_end();
324
325 return err;
326}
327
328static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
329 struct scatterlist *src, unsigned int nbytes)
330{
331 struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
332 int err, first, rounds = num_rounds(&ctx->key1);
333 struct blkcipher_walk walk;
334 unsigned int blocks;
335
336 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
337 blkcipher_walk_init(&walk, dst, src, nbytes);
338 err = blkcipher_walk_virt(desc, &walk);
339
340 kernel_neon_begin();
341 for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
342 ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
343 (u8 *)ctx->key1.key_dec, rounds, blocks,
344 walk.iv, (u8 *)ctx->key2.key_enc, first);
345 err = blkcipher_walk_done(desc, &walk,
346 walk.nbytes % AES_BLOCK_SIZE);
347 }
348 kernel_neon_end();
349
350 return err;
351}
352
353static struct crypto_alg aes_algs[] = { {
354 .cra_name = "__ecb-aes-ce",
355 .cra_driver_name = "__driver-ecb-aes-ce",
356 .cra_priority = 0,
357 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
358 CRYPTO_ALG_INTERNAL,
359 .cra_blocksize = AES_BLOCK_SIZE,
360 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
361 .cra_alignmask = 7,
362 .cra_type = &crypto_blkcipher_type,
363 .cra_module = THIS_MODULE,
364 .cra_blkcipher = {
365 .min_keysize = AES_MIN_KEY_SIZE,
366 .max_keysize = AES_MAX_KEY_SIZE,
367 .ivsize = AES_BLOCK_SIZE,
368 .setkey = ce_aes_setkey,
369 .encrypt = ecb_encrypt,
370 .decrypt = ecb_decrypt,
371 },
372}, {
373 .cra_name = "__cbc-aes-ce",
374 .cra_driver_name = "__driver-cbc-aes-ce",
375 .cra_priority = 0,
376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
377 CRYPTO_ALG_INTERNAL,
378 .cra_blocksize = AES_BLOCK_SIZE,
379 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
380 .cra_alignmask = 7,
381 .cra_type = &crypto_blkcipher_type,
382 .cra_module = THIS_MODULE,
383 .cra_blkcipher = {
384 .min_keysize = AES_MIN_KEY_SIZE,
385 .max_keysize = AES_MAX_KEY_SIZE,
386 .ivsize = AES_BLOCK_SIZE,
387 .setkey = ce_aes_setkey,
388 .encrypt = cbc_encrypt,
389 .decrypt = cbc_decrypt,
390 },
391}, {
392 .cra_name = "__ctr-aes-ce",
393 .cra_driver_name = "__driver-ctr-aes-ce",
394 .cra_priority = 0,
395 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
396 CRYPTO_ALG_INTERNAL,
397 .cra_blocksize = 1,
398 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
399 .cra_alignmask = 7,
400 .cra_type = &crypto_blkcipher_type,
401 .cra_module = THIS_MODULE,
402 .cra_blkcipher = {
403 .min_keysize = AES_MIN_KEY_SIZE,
404 .max_keysize = AES_MAX_KEY_SIZE,
405 .ivsize = AES_BLOCK_SIZE,
406 .setkey = ce_aes_setkey,
407 .encrypt = ctr_encrypt,
408 .decrypt = ctr_encrypt,
409 },
410}, {
411 .cra_name = "__xts-aes-ce",
412 .cra_driver_name = "__driver-xts-aes-ce",
413 .cra_priority = 0,
414 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
415 CRYPTO_ALG_INTERNAL,
416 .cra_blocksize = AES_BLOCK_SIZE,
417 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
418 .cra_alignmask = 7,
419 .cra_type = &crypto_blkcipher_type,
420 .cra_module = THIS_MODULE,
421 .cra_blkcipher = {
422 .min_keysize = 2 * AES_MIN_KEY_SIZE,
423 .max_keysize = 2 * AES_MAX_KEY_SIZE,
424 .ivsize = AES_BLOCK_SIZE,
425 .setkey = xts_set_key,
426 .encrypt = xts_encrypt,
427 .decrypt = xts_decrypt,
428 },
429}, {
430 .cra_name = "ecb(aes)",
431 .cra_driver_name = "ecb-aes-ce",
432 .cra_priority = 300,
433 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
434 .cra_blocksize = AES_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct async_helper_ctx),
436 .cra_alignmask = 7,
437 .cra_type = &crypto_ablkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_init = ablk_init,
440 .cra_exit = ablk_exit,
441 .cra_ablkcipher = {
442 .min_keysize = AES_MIN_KEY_SIZE,
443 .max_keysize = AES_MAX_KEY_SIZE,
444 .ivsize = AES_BLOCK_SIZE,
445 .setkey = ablk_set_key,
446 .encrypt = ablk_encrypt,
447 .decrypt = ablk_decrypt,
448 }
449}, {
450 .cra_name = "cbc(aes)",
451 .cra_driver_name = "cbc-aes-ce",
452 .cra_priority = 300,
453 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
454 .cra_blocksize = AES_BLOCK_SIZE,
455 .cra_ctxsize = sizeof(struct async_helper_ctx),
456 .cra_alignmask = 7,
457 .cra_type = &crypto_ablkcipher_type,
458 .cra_module = THIS_MODULE,
459 .cra_init = ablk_init,
460 .cra_exit = ablk_exit,
461 .cra_ablkcipher = {
462 .min_keysize = AES_MIN_KEY_SIZE,
463 .max_keysize = AES_MAX_KEY_SIZE,
464 .ivsize = AES_BLOCK_SIZE,
465 .setkey = ablk_set_key,
466 .encrypt = ablk_encrypt,
467 .decrypt = ablk_decrypt,
468 }
469}, {
470 .cra_name = "ctr(aes)",
471 .cra_driver_name = "ctr-aes-ce",
472 .cra_priority = 300,
473 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
474 .cra_blocksize = 1,
475 .cra_ctxsize = sizeof(struct async_helper_ctx),
476 .cra_alignmask = 7,
477 .cra_type = &crypto_ablkcipher_type,
478 .cra_module = THIS_MODULE,
479 .cra_init = ablk_init,
480 .cra_exit = ablk_exit,
481 .cra_ablkcipher = {
482 .min_keysize = AES_MIN_KEY_SIZE,
483 .max_keysize = AES_MAX_KEY_SIZE,
484 .ivsize = AES_BLOCK_SIZE,
485 .setkey = ablk_set_key,
486 .encrypt = ablk_encrypt,
487 .decrypt = ablk_decrypt,
488 }
489}, {
490 .cra_name = "xts(aes)",
491 .cra_driver_name = "xts-aes-ce",
492 .cra_priority = 300,
493 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
494 .cra_blocksize = AES_BLOCK_SIZE,
495 .cra_ctxsize = sizeof(struct async_helper_ctx),
496 .cra_alignmask = 7,
497 .cra_type = &crypto_ablkcipher_type,
498 .cra_module = THIS_MODULE,
499 .cra_init = ablk_init,
500 .cra_exit = ablk_exit,
501 .cra_ablkcipher = {
502 .min_keysize = 2 * AES_MIN_KEY_SIZE,
503 .max_keysize = 2 * AES_MAX_KEY_SIZE,
504 .ivsize = AES_BLOCK_SIZE,
505 .setkey = ablk_set_key,
506 .encrypt = ablk_encrypt,
507 .decrypt = ablk_decrypt,
508 }
509} };
510
511static int __init aes_init(void)
512{
513 if (!(elf_hwcap2 & HWCAP2_AES))
514 return -ENODEV;
515 return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
516}
517
518static void __exit aes_exit(void)
519{
520 crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
521}
522
523module_init(aes_init);
524module_exit(aes_exit);
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 15468fbbdea3..6d685298690e 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { {
301 .cra_name = "__cbc-aes-neonbs", 301 .cra_name = "__cbc-aes-neonbs",
302 .cra_driver_name = "__driver-cbc-aes-neonbs", 302 .cra_driver_name = "__driver-cbc-aes-neonbs",
303 .cra_priority = 0, 303 .cra_priority = 0,
304 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 304 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
305 CRYPTO_ALG_INTERNAL,
305 .cra_blocksize = AES_BLOCK_SIZE, 306 .cra_blocksize = AES_BLOCK_SIZE,
306 .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), 307 .cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
307 .cra_alignmask = 7, 308 .cra_alignmask = 7,
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { {
319 .cra_name = "__ctr-aes-neonbs", 320 .cra_name = "__ctr-aes-neonbs",
320 .cra_driver_name = "__driver-ctr-aes-neonbs", 321 .cra_driver_name = "__driver-ctr-aes-neonbs",
321 .cra_priority = 0, 322 .cra_priority = 0,
322 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 323 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
324 CRYPTO_ALG_INTERNAL,
323 .cra_blocksize = 1, 325 .cra_blocksize = 1,
324 .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), 326 .cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
325 .cra_alignmask = 7, 327 .cra_alignmask = 7,
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { {
337 .cra_name = "__xts-aes-neonbs", 339 .cra_name = "__xts-aes-neonbs",
338 .cra_driver_name = "__driver-xts-aes-neonbs", 340 .cra_driver_name = "__driver-xts-aes-neonbs",
339 .cra_priority = 0, 341 .cra_priority = 0,
340 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 342 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
343 CRYPTO_ALG_INTERNAL,
341 .cra_blocksize = AES_BLOCK_SIZE, 344 .cra_blocksize = AES_BLOCK_SIZE,
342 .cra_ctxsize = sizeof(struct aesbs_xts_ctx), 345 .cra_ctxsize = sizeof(struct aesbs_xts_ctx),
343 .cra_alignmask = 7, 346 .cra_alignmask = 7,
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..f6ab8bcc9efe
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -0,0 +1,94 @@
1/*
2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3 *
4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 SHASH .req q0
15 SHASH2 .req q1
16 T1 .req q2
17 T2 .req q3
18 MASK .req q4
19 XL .req q5
20 XM .req q6
21 XH .req q7
22 IN1 .req q7
23
24 SHASH_L .req d0
25 SHASH_H .req d1
26 SHASH2_L .req d2
27 T1_L .req d4
28 MASK_L .req d8
29 XL_L .req d10
30 XL_H .req d11
31 XM_L .req d12
32 XM_H .req d13
33 XH_L .req d14
34
35 .text
36 .fpu crypto-neon-fp-armv8
37
38 /*
39 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
40 * struct ghash_key const *k, const char *head)
41 */
42ENTRY(pmull_ghash_update)
43 vld1.64 {SHASH}, [r3]
44 vld1.64 {XL}, [r1]
45 vmov.i8 MASK, #0xe1
46 vext.8 SHASH2, SHASH, SHASH, #8
47 vshl.u64 MASK, MASK, #57
48 veor SHASH2, SHASH2, SHASH
49
50 /* do the head block first, if supplied */
51 ldr ip, [sp]
52 teq ip, #0
53 beq 0f
54 vld1.64 {T1}, [ip]
55 teq r0, #0
56 b 1f
57
580: vld1.64 {T1}, [r2]!
59 subs r0, r0, #1
60
611: /* multiply XL by SHASH in GF(2^128) */
62#ifndef CONFIG_CPU_BIG_ENDIAN
63 vrev64.8 T1, T1
64#endif
65 vext.8 T2, XL, XL, #8
66 vext.8 IN1, T1, T1, #8
67 veor T1, T1, T2
68 veor XL, XL, IN1
69
70 vmull.p64 XH, SHASH_H, XL_H @ a1 * b1
71 veor T1, T1, XL
72 vmull.p64 XL, SHASH_L, XL_L @ a0 * b0
73 vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0)
74
75 vext.8 T1, XL, XH, #8
76 veor T2, XL, XH
77 veor XM, XM, T1
78 veor XM, XM, T2
79 vmull.p64 T2, XL_L, MASK_L
80
81 vmov XH_L, XM_H
82 vmov XM_H, XL_L
83
84 veor XL, XM, T2
85 vext.8 T2, XL, XL, #8
86 vmull.p64 XL, XL_L, MASK_L
87 veor T2, T2, XH
88 veor XL, XL, T2
89
90 bne 0b
91
92 vst1.64 {XL}, [r1]
93 bx lr
94ENDPROC(pmull_ghash_update)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..03a39fe29246
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -0,0 +1,320 @@
1/*
2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3 *
4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <asm/hwcap.h>
12#include <asm/neon.h>
13#include <asm/simd.h>
14#include <asm/unaligned.h>
15#include <crypto/cryptd.h>
16#include <crypto/internal/hash.h>
17#include <crypto/gf128mul.h>
18#include <linux/crypto.h>
19#include <linux/module.h>
20
21MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
22MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
23MODULE_LICENSE("GPL v2");
24
25#define GHASH_BLOCK_SIZE 16
26#define GHASH_DIGEST_SIZE 16
27
28struct ghash_key {
29 u64 a;
30 u64 b;
31};
32
33struct ghash_desc_ctx {
34 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
35 u8 buf[GHASH_BLOCK_SIZE];
36 u32 count;
37};
38
39struct ghash_async_ctx {
40 struct cryptd_ahash *cryptd_tfm;
41};
42
43asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
44 struct ghash_key const *k, const char *head);
45
46static int ghash_init(struct shash_desc *desc)
47{
48 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
49
50 *ctx = (struct ghash_desc_ctx){};
51 return 0;
52}
53
54static int ghash_update(struct shash_desc *desc, const u8 *src,
55 unsigned int len)
56{
57 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
58 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
59
60 ctx->count += len;
61
62 if ((partial + len) >= GHASH_BLOCK_SIZE) {
63 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
64 int blocks;
65
66 if (partial) {
67 int p = GHASH_BLOCK_SIZE - partial;
68
69 memcpy(ctx->buf + partial, src, p);
70 src += p;
71 len -= p;
72 }
73
74 blocks = len / GHASH_BLOCK_SIZE;
75 len %= GHASH_BLOCK_SIZE;
76
77 kernel_neon_begin();
78 pmull_ghash_update(blocks, ctx->digest, src, key,
79 partial ? ctx->buf : NULL);
80 kernel_neon_end();
81 src += blocks * GHASH_BLOCK_SIZE;
82 partial = 0;
83 }
84 if (len)
85 memcpy(ctx->buf + partial, src, len);
86 return 0;
87}
88
89static int ghash_final(struct shash_desc *desc, u8 *dst)
90{
91 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
92 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
93
94 if (partial) {
95 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
96
97 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
98 kernel_neon_begin();
99 pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
100 kernel_neon_end();
101 }
102 put_unaligned_be64(ctx->digest[1], dst);
103 put_unaligned_be64(ctx->digest[0], dst + 8);
104
105 *ctx = (struct ghash_desc_ctx){};
106 return 0;
107}
108
109static int ghash_setkey(struct crypto_shash *tfm,
110 const u8 *inkey, unsigned int keylen)
111{
112 struct ghash_key *key = crypto_shash_ctx(tfm);
113 u64 a, b;
114
115 if (keylen != GHASH_BLOCK_SIZE) {
116 crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
117 return -EINVAL;
118 }
119
120 /* perform multiplication by 'x' in GF(2^128) */
121 b = get_unaligned_be64(inkey);
122 a = get_unaligned_be64(inkey + 8);
123
124 key->a = (a << 1) | (b >> 63);
125 key->b = (b << 1) | (a >> 63);
126
127 if (b >> 63)
128 key->b ^= 0xc200000000000000UL;
129
130 return 0;
131}
132
133static struct shash_alg ghash_alg = {
134 .digestsize = GHASH_DIGEST_SIZE,
135 .init = ghash_init,
136 .update = ghash_update,
137 .final = ghash_final,
138 .setkey = ghash_setkey,
139 .descsize = sizeof(struct ghash_desc_ctx),
140 .base = {
141 .cra_name = "ghash",
142 .cra_driver_name = "__driver-ghash-ce",
143 .cra_priority = 0,
144 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
145 .cra_blocksize = GHASH_BLOCK_SIZE,
146 .cra_ctxsize = sizeof(struct ghash_key),
147 .cra_module = THIS_MODULE,
148 },
149};
150
151static int ghash_async_init(struct ahash_request *req)
152{
153 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
154 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
155 struct ahash_request *cryptd_req = ahash_request_ctx(req);
156 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
157
158 if (!may_use_simd()) {
159 memcpy(cryptd_req, req, sizeof(*req));
160 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
161 return crypto_ahash_init(cryptd_req);
162 } else {
163 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
164 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
165
166 desc->tfm = child;
167 desc->flags = req->base.flags;
168 return crypto_shash_init(desc);
169 }
170}
171
172static int ghash_async_update(struct ahash_request *req)
173{
174 struct ahash_request *cryptd_req = ahash_request_ctx(req);
175
176 if (!may_use_simd()) {
177 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
178 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
179 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
180
181 memcpy(cryptd_req, req, sizeof(*req));
182 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
183 return crypto_ahash_update(cryptd_req);
184 } else {
185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
186 return shash_ahash_update(req, desc);
187 }
188}
189
190static int ghash_async_final(struct ahash_request *req)
191{
192 struct ahash_request *cryptd_req = ahash_request_ctx(req);
193
194 if (!may_use_simd()) {
195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199 memcpy(cryptd_req, req, sizeof(*req));
200 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
201 return crypto_ahash_final(cryptd_req);
202 } else {
203 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
204 return crypto_shash_final(desc, req->result);
205 }
206}
207
208static int ghash_async_digest(struct ahash_request *req)
209{
210 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
211 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
212 struct ahash_request *cryptd_req = ahash_request_ctx(req);
213 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
214
215 if (!may_use_simd()) {
216 memcpy(cryptd_req, req, sizeof(*req));
217 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
218 return crypto_ahash_digest(cryptd_req);
219 } else {
220 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
221 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
222
223 desc->tfm = child;
224 desc->flags = req->base.flags;
225 return shash_ahash_digest(req, desc);
226 }
227}
228
229static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
230 unsigned int keylen)
231{
232 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
233 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
234 int err;
235
236 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
237 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
238 & CRYPTO_TFM_REQ_MASK);
239 err = crypto_ahash_setkey(child, key, keylen);
240 crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
241 & CRYPTO_TFM_RES_MASK);
242
243 return err;
244}
245
246static int ghash_async_init_tfm(struct crypto_tfm *tfm)
247{
248 struct cryptd_ahash *cryptd_tfm;
249 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
250
251 cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
252 CRYPTO_ALG_INTERNAL,
253 CRYPTO_ALG_INTERNAL);
254 if (IS_ERR(cryptd_tfm))
255 return PTR_ERR(cryptd_tfm);
256 ctx->cryptd_tfm = cryptd_tfm;
257 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
258 sizeof(struct ahash_request) +
259 crypto_ahash_reqsize(&cryptd_tfm->base));
260
261 return 0;
262}
263
264static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
265{
266 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
267
268 cryptd_free_ahash(ctx->cryptd_tfm);
269}
270
271static struct ahash_alg ghash_async_alg = {
272 .init = ghash_async_init,
273 .update = ghash_async_update,
274 .final = ghash_async_final,
275 .setkey = ghash_async_setkey,
276 .digest = ghash_async_digest,
277 .halg.digestsize = GHASH_DIGEST_SIZE,
278 .halg.base = {
279 .cra_name = "ghash",
280 .cra_driver_name = "ghash-ce",
281 .cra_priority = 300,
282 .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
283 .cra_blocksize = GHASH_BLOCK_SIZE,
284 .cra_type = &crypto_ahash_type,
285 .cra_ctxsize = sizeof(struct ghash_async_ctx),
286 .cra_module = THIS_MODULE,
287 .cra_init = ghash_async_init_tfm,
288 .cra_exit = ghash_async_exit_tfm,
289 },
290};
291
292static int __init ghash_ce_mod_init(void)
293{
294 int err;
295
296 if (!(elf_hwcap2 & HWCAP2_PMULL))
297 return -ENODEV;
298
299 err = crypto_register_shash(&ghash_alg);
300 if (err)
301 return err;
302 err = crypto_register_ahash(&ghash_async_alg);
303 if (err)
304 goto err_shash;
305
306 return 0;
307
308err_shash:
309 crypto_unregister_shash(&ghash_alg);
310 return err;
311}
312
313static void __exit ghash_ce_mod_exit(void)
314{
315 crypto_unregister_ahash(&ghash_async_alg);
316 crypto_unregister_shash(&ghash_alg);
317}
318
319module_init(ghash_ce_mod_init);
320module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..b623f51ccbcf
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -0,0 +1,125 @@
1/*
2 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15 .text
16 .fpu crypto-neon-fp-armv8
17
18 k0 .req q0
19 k1 .req q1
20 k2 .req q2
21 k3 .req q3
22
23 ta0 .req q4
24 ta1 .req q5
25 tb0 .req q5
26 tb1 .req q4
27
28 dga .req q6
29 dgb .req q7
30 dgbs .req s28
31
32 dg0 .req q12
33 dg1a0 .req q13
34 dg1a1 .req q14
35 dg1b0 .req q14
36 dg1b1 .req q13
37
38 .macro add_only, op, ev, rc, s0, dg1
39 .ifnb \s0
40 vadd.u32 tb\ev, q\s0, \rc
41 .endif
42 sha1h.32 dg1b\ev, dg0
43 .ifb \dg1
44 sha1\op\().32 dg0, dg1a\ev, ta\ev
45 .else
46 sha1\op\().32 dg0, \dg1, ta\ev
47 .endif
48 .endm
49
50 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
51 sha1su0.32 q\s0, q\s1, q\s2
52 add_only \op, \ev, \rc, \s1, \dg1
53 sha1su1.32 q\s0, q\s3
54 .endm
55
56 .align 6
57.Lsha1_rcon:
58 .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
59 .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
60 .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
61 .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
62
63 /*
64 * void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
65 * int blocks);
66 */
67ENTRY(sha1_ce_transform)
68 /* load round constants */
69 adr ip, .Lsha1_rcon
70 vld1.32 {k0-k1}, [ip, :128]!
71 vld1.32 {k2-k3}, [ip, :128]
72
73 /* load state */
74 vld1.32 {dga}, [r0]
75 vldr dgbs, [r0, #16]
76
77 /* load input */
780: vld1.32 {q8-q9}, [r1]!
79 vld1.32 {q10-q11}, [r1]!
80 subs r2, r2, #1
81
82#ifndef CONFIG_CPU_BIG_ENDIAN
83 vrev32.8 q8, q8
84 vrev32.8 q9, q9
85 vrev32.8 q10, q10
86 vrev32.8 q11, q11
87#endif
88
89 vadd.u32 ta0, q8, k0
90 vmov dg0, dga
91
92 add_update c, 0, k0, 8, 9, 10, 11, dgb
93 add_update c, 1, k0, 9, 10, 11, 8
94 add_update c, 0, k0, 10, 11, 8, 9
95 add_update c, 1, k0, 11, 8, 9, 10
96 add_update c, 0, k1, 8, 9, 10, 11
97
98 add_update p, 1, k1, 9, 10, 11, 8
99 add_update p, 0, k1, 10, 11, 8, 9
100 add_update p, 1, k1, 11, 8, 9, 10
101 add_update p, 0, k1, 8, 9, 10, 11
102 add_update p, 1, k2, 9, 10, 11, 8
103
104 add_update m, 0, k2, 10, 11, 8, 9
105 add_update m, 1, k2, 11, 8, 9, 10
106 add_update m, 0, k2, 8, 9, 10, 11
107 add_update m, 1, k2, 9, 10, 11, 8
108 add_update m, 0, k3, 10, 11, 8, 9
109
110 add_update p, 1, k3, 11, 8, 9, 10
111 add_only p, 0, k3, 9
112 add_only p, 1, k3, 10
113 add_only p, 0, k3, 11
114 add_only p, 1
115
116 /* update state */
117 vadd.u32 dga, dga, dg0
118 vadd.u32 dgb, dgb, dg1a0
119 bne 0b
120
121 /* store new state */
122 vst1.32 {dga}, [r0]
123 vstr dgbs, [r0, #16]
124 bx lr
125ENDPROC(sha1_ce_transform)
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..80bc2fcd241a
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -0,0 +1,96 @@
1/*
2 * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <crypto/sha1_base.h>
14#include <linux/crypto.h>
15#include <linux/module.h>
16
17#include <asm/hwcap.h>
18#include <asm/neon.h>
19#include <asm/simd.h>
20
21#include "sha1.h"
22
23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
25MODULE_LICENSE("GPL v2");
26
27asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
28 int blocks);
29
30static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 unsigned int len)
32{
33 struct sha1_state *sctx = shash_desc_ctx(desc);
34
35 if (!may_use_simd() ||
36 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
37 return sha1_update_arm(desc, data, len);
38
39 kernel_neon_begin();
40 sha1_base_do_update(desc, data, len, sha1_ce_transform);
41 kernel_neon_end();
42
43 return 0;
44}
45
46static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
47 unsigned int len, u8 *out)
48{
49 if (!may_use_simd())
50 return sha1_finup_arm(desc, data, len, out);
51
52 kernel_neon_begin();
53 if (len)
54 sha1_base_do_update(desc, data, len, sha1_ce_transform);
55 sha1_base_do_finalize(desc, sha1_ce_transform);
56 kernel_neon_end();
57
58 return sha1_base_finish(desc, out);
59}
60
61static int sha1_ce_final(struct shash_desc *desc, u8 *out)
62{
63 return sha1_ce_finup(desc, NULL, 0, out);
64}
65
66static struct shash_alg alg = {
67 .init = sha1_base_init,
68 .update = sha1_ce_update,
69 .final = sha1_ce_final,
70 .finup = sha1_ce_finup,
71 .descsize = sizeof(struct sha1_state),
72 .digestsize = SHA1_DIGEST_SIZE,
73 .base = {
74 .cra_name = "sha1",
75 .cra_driver_name = "sha1-ce",
76 .cra_priority = 200,
77 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
78 .cra_blocksize = SHA1_BLOCK_SIZE,
79 .cra_module = THIS_MODULE,
80 }
81};
82
83static int __init sha1_ce_mod_init(void)
84{
85 if (!(elf_hwcap2 & HWCAP2_SHA1))
86 return -ENODEV;
87 return crypto_register_shash(&alg);
88}
89
90static void __exit sha1_ce_mod_fini(void)
91{
92 crypto_unregister_shash(&alg);
93}
94
95module_init(sha1_ce_mod_init);
96module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 75e6a417416b..ffd8bd08b1a7 100644
--- a/arch/arm/include/asm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -7,4 +7,7 @@
7extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, 7extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
8 unsigned int len); 8 unsigned int len);
9 9
10extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
11 unsigned int len, u8 *out);
12
10#endif 13#endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index e31b0440c613..6fc73bf8766d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -22,127 +22,47 @@
22#include <linux/cryptohash.h> 22#include <linux/cryptohash.h>
23#include <linux/types.h> 23#include <linux/types.h>
24#include <crypto/sha.h> 24#include <crypto/sha.h>
25#include <crypto/sha1_base.h>
25#include <asm/byteorder.h> 26#include <asm/byteorder.h>
26#include <asm/crypto/sha1.h>
27 27
28#include "sha1.h"
28 29
29asmlinkage void sha1_block_data_order(u32 *digest, 30asmlinkage void sha1_block_data_order(u32 *digest,
30 const unsigned char *data, unsigned int rounds); 31 const unsigned char *data, unsigned int rounds);
31 32
32
33static int sha1_init(struct shash_desc *desc)
34{
35 struct sha1_state *sctx = shash_desc_ctx(desc);
36
37 *sctx = (struct sha1_state){
38 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
39 };
40
41 return 0;
42}
43
44
45static int __sha1_update(struct sha1_state *sctx, const u8 *data,
46 unsigned int len, unsigned int partial)
47{
48 unsigned int done = 0;
49
50 sctx->count += len;
51
52 if (partial) {
53 done = SHA1_BLOCK_SIZE - partial;
54 memcpy(sctx->buffer + partial, data, done);
55 sha1_block_data_order(sctx->state, sctx->buffer, 1);
56 }
57
58 if (len - done >= SHA1_BLOCK_SIZE) {
59 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
60 sha1_block_data_order(sctx->state, data + done, rounds);
61 done += rounds * SHA1_BLOCK_SIZE;
62 }
63
64 memcpy(sctx->buffer, data + done, len - done);
65 return 0;
66}
67
68
69int sha1_update_arm(struct shash_desc *desc, const u8 *data, 33int sha1_update_arm(struct shash_desc *desc, const u8 *data,
70 unsigned int len) 34 unsigned int len)
71{ 35{
72 struct sha1_state *sctx = shash_desc_ctx(desc); 36 /* make sure casting to sha1_block_fn() is safe */
73 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; 37 BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
74 int res;
75 38
76 /* Handle the fast case right here */ 39 return sha1_base_do_update(desc, data, len,
77 if (partial + len < SHA1_BLOCK_SIZE) { 40 (sha1_block_fn *)sha1_block_data_order);
78 sctx->count += len;
79 memcpy(sctx->buffer + partial, data, len);
80 return 0;
81 }
82 res = __sha1_update(sctx, data, len, partial);
83 return res;
84} 41}
85EXPORT_SYMBOL_GPL(sha1_update_arm); 42EXPORT_SYMBOL_GPL(sha1_update_arm);
86 43
87
88/* Add padding and return the message digest. */
89static int sha1_final(struct shash_desc *desc, u8 *out) 44static int sha1_final(struct shash_desc *desc, u8 *out)
90{ 45{
91 struct sha1_state *sctx = shash_desc_ctx(desc); 46 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
92 unsigned int i, index, padlen; 47 return sha1_base_finish(desc, out);
93 __be32 *dst = (__be32 *)out;
94 __be64 bits;
95 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
96
97 bits = cpu_to_be64(sctx->count << 3);
98
99 /* Pad out to 56 mod 64 and append length */
100 index = sctx->count % SHA1_BLOCK_SIZE;
101 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
102 /* We need to fill a whole block for __sha1_update() */
103 if (padlen <= 56) {
104 sctx->count += padlen;
105 memcpy(sctx->buffer + index, padding, padlen);
106 } else {
107 __sha1_update(sctx, padding, padlen, index);
108 }
109 __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
110
111 /* Store state in digest */
112 for (i = 0; i < 5; i++)
113 dst[i] = cpu_to_be32(sctx->state[i]);
114
115 /* Wipe context */
116 memset(sctx, 0, sizeof(*sctx));
117 return 0;
118} 48}
119 49
120 50int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
121static int sha1_export(struct shash_desc *desc, void *out) 51 unsigned int len, u8 *out)
122{ 52{
123 struct sha1_state *sctx = shash_desc_ctx(desc); 53 sha1_base_do_update(desc, data, len,
124 memcpy(out, sctx, sizeof(*sctx)); 54 (sha1_block_fn *)sha1_block_data_order);
125 return 0; 55 return sha1_final(desc, out);
126} 56}
127 57EXPORT_SYMBOL_GPL(sha1_finup_arm);
128
129static int sha1_import(struct shash_desc *desc, const void *in)
130{
131 struct sha1_state *sctx = shash_desc_ctx(desc);
132 memcpy(sctx, in, sizeof(*sctx));
133 return 0;
134}
135
136 58
137static struct shash_alg alg = { 59static struct shash_alg alg = {
138 .digestsize = SHA1_DIGEST_SIZE, 60 .digestsize = SHA1_DIGEST_SIZE,
139 .init = sha1_init, 61 .init = sha1_base_init,
140 .update = sha1_update_arm, 62 .update = sha1_update_arm,
141 .final = sha1_final, 63 .final = sha1_final,
142 .export = sha1_export, 64 .finup = sha1_finup_arm,
143 .import = sha1_import,
144 .descsize = sizeof(struct sha1_state), 65 .descsize = sizeof(struct sha1_state),
145 .statesize = sizeof(struct sha1_state),
146 .base = { 66 .base = {
147 .cra_name = "sha1", 67 .cra_name = "sha1",
148 .cra_driver_name= "sha1-asm", 68 .cra_driver_name= "sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0b0083757d47..4e22f122f966 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -25,147 +25,60 @@
25#include <linux/cryptohash.h> 25#include <linux/cryptohash.h>
26#include <linux/types.h> 26#include <linux/types.h>
27#include <crypto/sha.h> 27#include <crypto/sha.h>
28#include <asm/byteorder.h> 28#include <crypto/sha1_base.h>
29#include <asm/neon.h> 29#include <asm/neon.h>
30#include <asm/simd.h> 30#include <asm/simd.h>
31#include <asm/crypto/sha1.h>
32 31
32#include "sha1.h"
33 33
34asmlinkage void sha1_transform_neon(void *state_h, const char *data, 34asmlinkage void sha1_transform_neon(void *state_h, const char *data,
35 unsigned int rounds); 35 unsigned int rounds);
36 36
37
38static int sha1_neon_init(struct shash_desc *desc)
39{
40 struct sha1_state *sctx = shash_desc_ctx(desc);
41
42 *sctx = (struct sha1_state){
43 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
44 };
45
46 return 0;
47}
48
49static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
50 unsigned int len, unsigned int partial)
51{
52 struct sha1_state *sctx = shash_desc_ctx(desc);
53 unsigned int done = 0;
54
55 sctx->count += len;
56
57 if (partial) {
58 done = SHA1_BLOCK_SIZE - partial;
59 memcpy(sctx->buffer + partial, data, done);
60 sha1_transform_neon(sctx->state, sctx->buffer, 1);
61 }
62
63 if (len - done >= SHA1_BLOCK_SIZE) {
64 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
65
66 sha1_transform_neon(sctx->state, data + done, rounds);
67 done += rounds * SHA1_BLOCK_SIZE;
68 }
69
70 memcpy(sctx->buffer, data + done, len - done);
71
72 return 0;
73}
74
75static int sha1_neon_update(struct shash_desc *desc, const u8 *data, 37static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
76 unsigned int len) 38 unsigned int len)
77{ 39{
78 struct sha1_state *sctx = shash_desc_ctx(desc); 40 struct sha1_state *sctx = shash_desc_ctx(desc);
79 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
80 int res;
81 41
82 /* Handle the fast case right here */ 42 if (!may_use_simd() ||
83 if (partial + len < SHA1_BLOCK_SIZE) { 43 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
84 sctx->count += len; 44 return sha1_update_arm(desc, data, len);
85 memcpy(sctx->buffer + partial, data, len);
86 45
87 return 0; 46 kernel_neon_begin();
88 } 47 sha1_base_do_update(desc, data, len,
89 48 (sha1_block_fn *)sha1_transform_neon);
90 if (!may_use_simd()) { 49 kernel_neon_end();
91 res = sha1_update_arm(desc, data, len);
92 } else {
93 kernel_neon_begin();
94 res = __sha1_neon_update(desc, data, len, partial);
95 kernel_neon_end();
96 }
97
98 return res;
99}
100
101
102/* Add padding and return the message digest. */
103static int sha1_neon_final(struct shash_desc *desc, u8 *out)
104{
105 struct sha1_state *sctx = shash_desc_ctx(desc);
106 unsigned int i, index, padlen;
107 __be32 *dst = (__be32 *)out;
108 __be64 bits;
109 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
110
111 bits = cpu_to_be64(sctx->count << 3);
112
113 /* Pad out to 56 mod 64 and append length */
114 index = sctx->count % SHA1_BLOCK_SIZE;
115 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
116 if (!may_use_simd()) {
117 sha1_update_arm(desc, padding, padlen);
118 sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
119 } else {
120 kernel_neon_begin();
121 /* We need to fill a whole block for __sha1_neon_update() */
122 if (padlen <= 56) {
123 sctx->count += padlen;
124 memcpy(sctx->buffer + index, padding, padlen);
125 } else {
126 __sha1_neon_update(desc, padding, padlen, index);
127 }
128 __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
129 kernel_neon_end();
130 }
131
132 /* Store state in digest */
133 for (i = 0; i < 5; i++)
134 dst[i] = cpu_to_be32(sctx->state[i]);
135
136 /* Wipe context */
137 memset(sctx, 0, sizeof(*sctx));
138 50
139 return 0; 51 return 0;
140} 52}
141 53
142static int sha1_neon_export(struct shash_desc *desc, void *out) 54static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
55 unsigned int len, u8 *out)
143{ 56{
144 struct sha1_state *sctx = shash_desc_ctx(desc); 57 if (!may_use_simd())
58 return sha1_finup_arm(desc, data, len, out);
145 59
146 memcpy(out, sctx, sizeof(*sctx)); 60 kernel_neon_begin();
61 if (len)
62 sha1_base_do_update(desc, data, len,
63 (sha1_block_fn *)sha1_transform_neon);
64 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
65 kernel_neon_end();
147 66
148 return 0; 67 return sha1_base_finish(desc, out);
149} 68}
150 69
151static int sha1_neon_import(struct shash_desc *desc, const void *in) 70static int sha1_neon_final(struct shash_desc *desc, u8 *out)
152{ 71{
153 struct sha1_state *sctx = shash_desc_ctx(desc); 72 return sha1_neon_finup(desc, NULL, 0, out);
154
155 memcpy(sctx, in, sizeof(*sctx));
156
157 return 0;
158} 73}
159 74
160static struct shash_alg alg = { 75static struct shash_alg alg = {
161 .digestsize = SHA1_DIGEST_SIZE, 76 .digestsize = SHA1_DIGEST_SIZE,
162 .init = sha1_neon_init, 77 .init = sha1_base_init,
163 .update = sha1_neon_update, 78 .update = sha1_neon_update,
164 .final = sha1_neon_final, 79 .final = sha1_neon_final,
165 .export = sha1_neon_export, 80 .finup = sha1_neon_finup,
166 .import = sha1_neon_import,
167 .descsize = sizeof(struct sha1_state), 81 .descsize = sizeof(struct sha1_state),
168 .statesize = sizeof(struct sha1_state),
169 .base = { 82 .base = {
170 .cra_name = "sha1", 83 .cra_name = "sha1",
171 .cra_driver_name = "sha1-neon", 84 .cra_driver_name = "sha1-neon",
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..87ec11a5f405
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -0,0 +1,125 @@
1/*
2 * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15 .text
16 .fpu crypto-neon-fp-armv8
17
18 k0 .req q7
19 k1 .req q8
20 rk .req r3
21
22 ta0 .req q9
23 ta1 .req q10
24 tb0 .req q10
25 tb1 .req q9
26
27 dga .req q11
28 dgb .req q12
29
30 dg0 .req q13
31 dg1 .req q14
32 dg2 .req q15
33
34 .macro add_only, ev, s0
35 vmov dg2, dg0
36 .ifnb \s0
37 vld1.32 {k\ev}, [rk, :128]!
38 .endif
39 sha256h.32 dg0, dg1, tb\ev
40 sha256h2.32 dg1, dg2, tb\ev
41 .ifnb \s0
42 vadd.u32 ta\ev, q\s0, k\ev
43 .endif
44 .endm
45
46 .macro add_update, ev, s0, s1, s2, s3
47 sha256su0.32 q\s0, q\s1
48 add_only \ev, \s1
49 sha256su1.32 q\s0, q\s2, q\s3
50 .endm
51
52 .align 6
53.Lsha256_rcon:
54 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
55 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
56 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
57 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
58 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
59 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
60 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
61 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
62 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
63 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
64 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
65 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
66 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
67 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
68 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
69 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
70
71 /*
72 * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
73 int blocks);
74 */
75ENTRY(sha2_ce_transform)
76 /* load state */
77 vld1.32 {dga-dgb}, [r0]
78
79 /* load input */
800: vld1.32 {q0-q1}, [r1]!
81 vld1.32 {q2-q3}, [r1]!
82 subs r2, r2, #1
83
84#ifndef CONFIG_CPU_BIG_ENDIAN
85 vrev32.8 q0, q0
86 vrev32.8 q1, q1
87 vrev32.8 q2, q2
88 vrev32.8 q3, q3
89#endif
90
91 /* load first round constant */
92 adr rk, .Lsha256_rcon
93 vld1.32 {k0}, [rk, :128]!
94
95 vadd.u32 ta0, q0, k0
96 vmov dg0, dga
97 vmov dg1, dgb
98
99 add_update 1, 0, 1, 2, 3
100 add_update 0, 1, 2, 3, 0
101 add_update 1, 2, 3, 0, 1
102 add_update 0, 3, 0, 1, 2
103 add_update 1, 0, 1, 2, 3
104 add_update 0, 1, 2, 3, 0
105 add_update 1, 2, 3, 0, 1
106 add_update 0, 3, 0, 1, 2
107 add_update 1, 0, 1, 2, 3
108 add_update 0, 1, 2, 3, 0
109 add_update 1, 2, 3, 0, 1
110 add_update 0, 3, 0, 1, 2
111
112 add_only 1, 1
113 add_only 0, 2
114 add_only 1, 3
115 add_only 0
116
117 /* update state */
118 vadd.u32 dga, dga, dg0
119 vadd.u32 dgb, dgb, dg1
120 bne 0b
121
122 /* store new state */
123 vst1.32 {dga-dgb}, [r0]
124 bx lr
125ENDPROC(sha2_ce_transform)
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..0755b2d657f3
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -0,0 +1,114 @@
1/*
2 * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <crypto/sha256_base.h>
14#include <linux/crypto.h>
15#include <linux/module.h>
16
17#include <asm/hwcap.h>
18#include <asm/simd.h>
19#include <asm/neon.h>
20#include <asm/unaligned.h>
21
22#include "sha256_glue.h"
23
24MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
25MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
26MODULE_LICENSE("GPL v2");
27
28asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
29 int blocks);
30
31static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
32 unsigned int len)
33{
34 struct sha256_state *sctx = shash_desc_ctx(desc);
35
36 if (!may_use_simd() ||
37 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
38 return crypto_sha256_arm_update(desc, data, len);
39
40 kernel_neon_begin();
41 sha256_base_do_update(desc, data, len,
42 (sha256_block_fn *)sha2_ce_transform);
43 kernel_neon_end();
44
45 return 0;
46}
47
48static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
49 unsigned int len, u8 *out)
50{
51 if (!may_use_simd())
52 return crypto_sha256_arm_finup(desc, data, len, out);
53
54 kernel_neon_begin();
55 if (len)
56 sha256_base_do_update(desc, data, len,
57 (sha256_block_fn *)sha2_ce_transform);
58 sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
59 kernel_neon_end();
60
61 return sha256_base_finish(desc, out);
62}
63
64static int sha2_ce_final(struct shash_desc *desc, u8 *out)
65{
66 return sha2_ce_finup(desc, NULL, 0, out);
67}
68
69static struct shash_alg algs[] = { {
70 .init = sha224_base_init,
71 .update = sha2_ce_update,
72 .final = sha2_ce_final,
73 .finup = sha2_ce_finup,
74 .descsize = sizeof(struct sha256_state),
75 .digestsize = SHA224_DIGEST_SIZE,
76 .base = {
77 .cra_name = "sha224",
78 .cra_driver_name = "sha224-ce",
79 .cra_priority = 300,
80 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
81 .cra_blocksize = SHA256_BLOCK_SIZE,
82 .cra_module = THIS_MODULE,
83 }
84}, {
85 .init = sha256_base_init,
86 .update = sha2_ce_update,
87 .final = sha2_ce_final,
88 .finup = sha2_ce_finup,
89 .descsize = sizeof(struct sha256_state),
90 .digestsize = SHA256_DIGEST_SIZE,
91 .base = {
92 .cra_name = "sha256",
93 .cra_driver_name = "sha256-ce",
94 .cra_priority = 300,
95 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
96 .cra_blocksize = SHA256_BLOCK_SIZE,
97 .cra_module = THIS_MODULE,
98 }
99} };
100
101static int __init sha2_ce_mod_init(void)
102{
103 if (!(elf_hwcap2 & HWCAP2_SHA2))
104 return -ENODEV;
105 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
106}
107
108static void __exit sha2_ce_mod_fini(void)
109{
110 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
111}
112
113module_init(sha2_ce_mod_init);
114module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8#
9# Permission to use under GPL terms is granted.
10# ====================================================================
11
12# SHA256 block procedure for ARMv4. May 2007.
13
14# Performance is ~2x better than gcc 3.4 generated code and in "abso-
15# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16# byte [on single-issue Xscale PXA250 core].
17
18# July 2010.
19#
20# Rescheduling for dual-issue pipeline resulted in 22% improvement on
21# Cortex A8 core and ~20 cycles per processed byte.
22
23# February 2011.
24#
25# Profiler-assisted and platform-specific optimization resulted in 16%
26# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
27
28# September 2013.
29#
30# Add NEON implementation. On Cortex A8 it was measured to process one
31# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33# code (meaning that latter performs sub-optimally, nothing was done
34# about it).
35
36# May 2014.
37#
38# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
39
40while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
41open STDOUT,">$output";
42
43$ctx="r0"; $t0="r0";
44$inp="r1"; $t4="r1";
45$len="r2"; $t1="r2";
46$T1="r3"; $t3="r3";
47$A="r4";
48$B="r5";
49$C="r6";
50$D="r7";
51$E="r8";
52$F="r9";
53$G="r10";
54$H="r11";
55@V=($A,$B,$C,$D,$E,$F,$G,$H);
56$t2="r12";
57$Ktbl="r14";
58
59@Sigma0=( 2,13,22);
60@Sigma1=( 6,11,25);
61@sigma0=( 7,18, 3);
62@sigma1=(17,19,10);
63
64sub BODY_00_15 {
65my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
66
67$code.=<<___ if ($i<16);
68#if __ARM_ARCH__>=7
69 @ ldr $t1,[$inp],#4 @ $i
70# if $i==15
71 str $inp,[sp,#17*4] @ make room for $t4
72# endif
73 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
74 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
75 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
76# ifndef __ARMEB__
77 rev $t1,$t1
78# endif
79#else
80 @ ldrb $t1,[$inp,#3] @ $i
81 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
82 ldrb $t2,[$inp,#2]
83 ldrb $t0,[$inp,#1]
84 orr $t1,$t1,$t2,lsl#8
85 ldrb $t2,[$inp],#4
86 orr $t1,$t1,$t0,lsl#16
87# if $i==15
88 str $inp,[sp,#17*4] @ make room for $t4
89# endif
90 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
91 orr $t1,$t1,$t2,lsl#24
92 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
93#endif
94___
95$code.=<<___;
96 ldr $t2,[$Ktbl],#4 @ *K256++
97 add $h,$h,$t1 @ h+=X[i]
98 str $t1,[sp,#`$i%16`*4]
99 eor $t1,$f,$g
100 add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
101 and $t1,$t1,$e
102 add $h,$h,$t2 @ h+=K256[i]
103 eor $t1,$t1,$g @ Ch(e,f,g)
104 eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
105 add $h,$h,$t1 @ h+=Ch(e,f,g)
106#if $i==31
107 and $t2,$t2,#0xff
108 cmp $t2,#0xf2 @ done?
109#endif
110#if $i<15
111# if __ARM_ARCH__>=7
112 ldr $t1,[$inp],#4 @ prefetch
113# else
114 ldrb $t1,[$inp,#3]
115# endif
116 eor $t2,$a,$b @ a^b, b^c in next round
117#else
118 ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
119 eor $t2,$a,$b @ a^b, b^c in next round
120 ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
121#endif
122 eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
123 and $t3,$t3,$t2 @ (b^c)&=(a^b)
124 add $d,$d,$h @ d+=h
125 eor $t3,$t3,$b @ Maj(a,b,c)
126 add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
127 @ add $h,$h,$t3 @ h+=Maj(a,b,c)
128___
129 ($t2,$t3)=($t3,$t2);
130}
131
132sub BODY_16_XX {
133my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
134
135$code.=<<___;
136 @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
137 @ ldr $t4,[sp,#`($i+14)%16`*4]
138 mov $t0,$t1,ror#$sigma0[0]
139 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
140 mov $t2,$t4,ror#$sigma1[0]
141 eor $t0,$t0,$t1,ror#$sigma0[1]
142 eor $t2,$t2,$t4,ror#$sigma1[1]
143 eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
144 ldr $t1,[sp,#`($i+0)%16`*4]
145 eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
146 ldr $t4,[sp,#`($i+9)%16`*4]
147
148 add $t2,$t2,$t0
149 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
150 add $t1,$t1,$t2
151 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
152 add $t1,$t1,$t4 @ X[i]
153___
154 &BODY_00_15(@_);
155}
156
157$code=<<___;
158#ifndef __KERNEL__
159# include "arm_arch.h"
160#else
161# define __ARM_ARCH__ __LINUX_ARM_ARCH__
162# define __ARM_MAX_ARCH__ 7
163#endif
164
165.text
166#if __ARM_ARCH__<7
167.code 32
168#else
169.syntax unified
170# ifdef __thumb2__
171# define adrl adr
172.thumb
173# else
174.code 32
175# endif
176#endif
177
178.type K256,%object
179.align 5
180K256:
181.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
182.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
183.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
184.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
185.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
186.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
187.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
188.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
189.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
190.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
191.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
192.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
193.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
194.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
195.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
196.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
197.size K256,.-K256
198.word 0 @ terminator
199#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
200.LOPENSSL_armcap:
201.word OPENSSL_armcap_P-sha256_block_data_order
202#endif
203.align 5
204
205.global sha256_block_data_order
206.type sha256_block_data_order,%function
207sha256_block_data_order:
208#if __ARM_ARCH__<7
209 sub r3,pc,#8 @ sha256_block_data_order
210#else
211 adr r3,sha256_block_data_order
212#endif
213#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
214 ldr r12,.LOPENSSL_armcap
215 ldr r12,[r3,r12] @ OPENSSL_armcap_P
216 tst r12,#ARMV8_SHA256
217 bne .LARMv8
218 tst r12,#ARMV7_NEON
219 bne .LNEON
220#endif
221 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
222 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
223 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
224 sub $Ktbl,r3,#256+32 @ K256
225 sub sp,sp,#16*4 @ alloca(X[16])
226.Loop:
227# if __ARM_ARCH__>=7
228 ldr $t1,[$inp],#4
229# else
230 ldrb $t1,[$inp,#3]
231# endif
232 eor $t3,$B,$C @ magic
233 eor $t2,$t2,$t2
234___
235for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
236$code.=".Lrounds_16_xx:\n";
237for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
238$code.=<<___;
239#if __ARM_ARCH__>=7
240 ite eq @ Thumb2 thing, sanity check in ARM
241#endif
242 ldreq $t3,[sp,#16*4] @ pull ctx
243 bne .Lrounds_16_xx
244
245 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
246 ldr $t0,[$t3,#0]
247 ldr $t1,[$t3,#4]
248 ldr $t2,[$t3,#8]
249 add $A,$A,$t0
250 ldr $t0,[$t3,#12]
251 add $B,$B,$t1
252 ldr $t1,[$t3,#16]
253 add $C,$C,$t2
254 ldr $t2,[$t3,#20]
255 add $D,$D,$t0
256 ldr $t0,[$t3,#24]
257 add $E,$E,$t1
258 ldr $t1,[$t3,#28]
259 add $F,$F,$t2
260 ldr $inp,[sp,#17*4] @ pull inp
261 ldr $t2,[sp,#18*4] @ pull inp+len
262 add $G,$G,$t0
263 add $H,$H,$t1
264 stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
265 cmp $inp,$t2
266 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
267 bne .Loop
268
269 add sp,sp,#`16+3`*4 @ destroy frame
270#if __ARM_ARCH__>=5
271 ldmia sp!,{r4-r11,pc}
272#else
273 ldmia sp!,{r4-r11,lr}
274 tst lr,#1
275 moveq pc,lr @ be binary compatible with V4, yet
276 bx lr @ interoperable with Thumb ISA:-)
277#endif
278.size sha256_block_data_order,.-sha256_block_data_order
279___
280######################################################################
281# NEON stuff
282#
283{{{
284my @X=map("q$_",(0..3));
285my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
286my $Xfer=$t4;
287my $j=0;
288
289sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
290sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
291
292sub AUTOLOAD() # thunk [simplified] x86-style perlasm
293{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
294 my $arg = pop;
295 $arg = "#$arg" if ($arg*1 eq $arg);
296 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
297}
298
299sub Xupdate()
300{ use integer;
301 my $body = shift;
302 my @insns = (&$body,&$body,&$body,&$body);
303 my ($a,$b,$c,$d,$e,$f,$g,$h);
304
305 &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
306 eval(shift(@insns));
307 eval(shift(@insns));
308 eval(shift(@insns));
309 &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
310 eval(shift(@insns));
311 eval(shift(@insns));
312 eval(shift(@insns));
313 &vshr_u32 ($T2,$T0,$sigma0[0]);
314 eval(shift(@insns));
315 eval(shift(@insns));
316 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
317 eval(shift(@insns));
318 eval(shift(@insns));
319 &vshr_u32 ($T1,$T0,$sigma0[2]);
320 eval(shift(@insns));
321 eval(shift(@insns));
322 &vsli_32 ($T2,$T0,32-$sigma0[0]);
323 eval(shift(@insns));
324 eval(shift(@insns));
325 &vshr_u32 ($T3,$T0,$sigma0[1]);
326 eval(shift(@insns));
327 eval(shift(@insns));
328 &veor ($T1,$T1,$T2);
329 eval(shift(@insns));
330 eval(shift(@insns));
331 &vsli_32 ($T3,$T0,32-$sigma0[1]);
332 eval(shift(@insns));
333 eval(shift(@insns));
334 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
335 eval(shift(@insns));
336 eval(shift(@insns));
337 &veor ($T1,$T1,$T3); # sigma0(X[1..4])
338 eval(shift(@insns));
339 eval(shift(@insns));
340 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
341 eval(shift(@insns));
342 eval(shift(@insns));
343 &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
344 eval(shift(@insns));
345 eval(shift(@insns));
346 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
347 eval(shift(@insns));
348 eval(shift(@insns));
349 &veor ($T5,$T5,$T4);
350 eval(shift(@insns));
351 eval(shift(@insns));
352 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
353 eval(shift(@insns));
354 eval(shift(@insns));
355 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
356 eval(shift(@insns));
357 eval(shift(@insns));
358 &veor ($T5,$T5,$T4); # sigma1(X[14..15])
359 eval(shift(@insns));
360 eval(shift(@insns));
361 &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
362 eval(shift(@insns));
363 eval(shift(@insns));
364 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
365 eval(shift(@insns));
366 eval(shift(@insns));
367 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
368 eval(shift(@insns));
369 eval(shift(@insns));
370 &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
371 eval(shift(@insns));
372 eval(shift(@insns));
373 &veor ($T5,$T5,$T4);
374 eval(shift(@insns));
375 eval(shift(@insns));
376 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
377 eval(shift(@insns));
378 eval(shift(@insns));
379 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
380 eval(shift(@insns));
381 eval(shift(@insns));
382 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
383 eval(shift(@insns));
384 eval(shift(@insns));
385 &veor ($T5,$T5,$T4); # sigma1(X[16..17])
386 eval(shift(@insns));
387 eval(shift(@insns));
388 &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
389 eval(shift(@insns));
390 eval(shift(@insns));
391 &vadd_i32 ($T0,$T0,@X[0]);
392 while($#insns>=2) { eval(shift(@insns)); }
393 &vst1_32 ("{$T0}","[$Xfer,:128]!");
394 eval(shift(@insns));
395 eval(shift(@insns));
396
397 push(@X,shift(@X)); # "rotate" X[]
398}
399
400sub Xpreload()
401{ use integer;
402 my $body = shift;
403 my @insns = (&$body,&$body,&$body,&$body);
404 my ($a,$b,$c,$d,$e,$f,$g,$h);
405
406 eval(shift(@insns));
407 eval(shift(@insns));
408 eval(shift(@insns));
409 eval(shift(@insns));
410 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
411 eval(shift(@insns));
412 eval(shift(@insns));
413 eval(shift(@insns));
414 eval(shift(@insns));
415 &vrev32_8 (@X[0],@X[0]);
416 eval(shift(@insns));
417 eval(shift(@insns));
418 eval(shift(@insns));
419 eval(shift(@insns));
420 &vadd_i32 ($T0,$T0,@X[0]);
421 foreach (@insns) { eval; } # remaining instructions
422 &vst1_32 ("{$T0}","[$Xfer,:128]!");
423
424 push(@X,shift(@X)); # "rotate" X[]
425}
426
427sub body_00_15 () {
428 (
429 '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
430 '&add ($h,$h,$t1)', # h+=X[i]+K[i]
431 '&eor ($t1,$f,$g)',
432 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
433 '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
434 '&and ($t1,$t1,$e)',
435 '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
436 '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
437 '&eor ($t1,$t1,$g)', # Ch(e,f,g)
438 '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
439 '&eor ($t2,$a,$b)', # a^b, b^c in next round
440 '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
441 '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
442 '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
443 '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
444 '&ldr ($t1,"[sp,#64]") if ($j==31)',
445 '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
446 '&add ($d,$d,$h)', # d+=h
447 '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
448 '&eor ($t3,$t3,$b)', # Maj(a,b,c)
449 '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
450 )
451}
452
453$code.=<<___;
454#if __ARM_MAX_ARCH__>=7
455.arch armv7-a
456.fpu neon
457
458.global sha256_block_data_order_neon
459.type sha256_block_data_order_neon,%function
460.align 4
461sha256_block_data_order_neon:
462.LNEON:
463 stmdb sp!,{r4-r12,lr}
464
465 sub $H,sp,#16*4+16
466 adrl $Ktbl,K256
467 bic $H,$H,#15 @ align for 128-bit stores
468 mov $t2,sp
469 mov sp,$H @ alloca
470 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
471
472 vld1.8 {@X[0]},[$inp]!
473 vld1.8 {@X[1]},[$inp]!
474 vld1.8 {@X[2]},[$inp]!
475 vld1.8 {@X[3]},[$inp]!
476 vld1.32 {$T0},[$Ktbl,:128]!
477 vld1.32 {$T1},[$Ktbl,:128]!
478 vld1.32 {$T2},[$Ktbl,:128]!
479 vld1.32 {$T3},[$Ktbl,:128]!
480 vrev32.8 @X[0],@X[0] @ yes, even on
481 str $ctx,[sp,#64]
482 vrev32.8 @X[1],@X[1] @ big-endian
483 str $inp,[sp,#68]
484 mov $Xfer,sp
485 vrev32.8 @X[2],@X[2]
486 str $len,[sp,#72]
487 vrev32.8 @X[3],@X[3]
488 str $t2,[sp,#76] @ save original sp
489 vadd.i32 $T0,$T0,@X[0]
490 vadd.i32 $T1,$T1,@X[1]
491 vst1.32 {$T0},[$Xfer,:128]!
492 vadd.i32 $T2,$T2,@X[2]
493 vst1.32 {$T1},[$Xfer,:128]!
494 vadd.i32 $T3,$T3,@X[3]
495 vst1.32 {$T2},[$Xfer,:128]!
496 vst1.32 {$T3},[$Xfer,:128]!
497
498 ldmia $ctx,{$A-$H}
499 sub $Xfer,$Xfer,#64
500 ldr $t1,[sp,#0]
501 eor $t2,$t2,$t2
502 eor $t3,$B,$C
503 b .L_00_48
504
505.align 4
506.L_00_48:
507___
508 &Xupdate(\&body_00_15);
509 &Xupdate(\&body_00_15);
510 &Xupdate(\&body_00_15);
511 &Xupdate(\&body_00_15);
512$code.=<<___;
513 teq $t1,#0 @ check for K256 terminator
514 ldr $t1,[sp,#0]
515 sub $Xfer,$Xfer,#64
516 bne .L_00_48
517
518 ldr $inp,[sp,#68]
519 ldr $t0,[sp,#72]
520 sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
521 teq $inp,$t0
522 it eq
523 subeq $inp,$inp,#64 @ avoid SEGV
524 vld1.8 {@X[0]},[$inp]! @ load next input block
525 vld1.8 {@X[1]},[$inp]!
526 vld1.8 {@X[2]},[$inp]!
527 vld1.8 {@X[3]},[$inp]!
528 it ne
529 strne $inp,[sp,#68]
530 mov $Xfer,sp
531___
532 &Xpreload(\&body_00_15);
533 &Xpreload(\&body_00_15);
534 &Xpreload(\&body_00_15);
535 &Xpreload(\&body_00_15);
536$code.=<<___;
537 ldr $t0,[$t1,#0]
538 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
539 ldr $t2,[$t1,#4]
540 ldr $t3,[$t1,#8]
541 ldr $t4,[$t1,#12]
542 add $A,$A,$t0 @ accumulate
543 ldr $t0,[$t1,#16]
544 add $B,$B,$t2
545 ldr $t2,[$t1,#20]
546 add $C,$C,$t3
547 ldr $t3,[$t1,#24]
548 add $D,$D,$t4
549 ldr $t4,[$t1,#28]
550 add $E,$E,$t0
551 str $A,[$t1],#4
552 add $F,$F,$t2
553 str $B,[$t1],#4
554 add $G,$G,$t3
555 str $C,[$t1],#4
556 add $H,$H,$t4
557 str $D,[$t1],#4
558 stmia $t1,{$E-$H}
559
560 ittte ne
561 movne $Xfer,sp
562 ldrne $t1,[sp,#0]
563 eorne $t2,$t2,$t2
564 ldreq sp,[sp,#76] @ restore original sp
565 itt ne
566 eorne $t3,$B,$C
567 bne .L_00_48
568
569 ldmia sp!,{r4-r12,pc}
570.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
571#endif
572___
573}}}
574######################################################################
575# ARMv8 stuff
576#
577{{{
578my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
579my @MSG=map("q$_",(8..11));
580my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
581my $Ktbl="r3";
582
583$code.=<<___;
584#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
585
586# ifdef __thumb2__
587# define INST(a,b,c,d) .byte c,d|0xc,a,b
588# else
589# define INST(a,b,c,d) .byte a,b,c,d
590# endif
591
592.type sha256_block_data_order_armv8,%function
593.align 5
594sha256_block_data_order_armv8:
595.LARMv8:
596 vld1.32 {$ABCD,$EFGH},[$ctx]
597# ifdef __thumb2__
598 adr $Ktbl,.LARMv8
599 sub $Ktbl,$Ktbl,#.LARMv8-K256
600# else
601 adrl $Ktbl,K256
602# endif
603 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
604
605.Loop_v8:
606 vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
607 vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
608 vld1.32 {$W0},[$Ktbl]!
609 vrev32.8 @MSG[0],@MSG[0]
610 vrev32.8 @MSG[1],@MSG[1]
611 vrev32.8 @MSG[2],@MSG[2]
612 vrev32.8 @MSG[3],@MSG[3]
613 vmov $ABCD_SAVE,$ABCD @ offload
614 vmov $EFGH_SAVE,$EFGH
615 teq $inp,$len
616___
617for($i=0;$i<12;$i++) {
618$code.=<<___;
619 vld1.32 {$W1},[$Ktbl]!
620 vadd.i32 $W0,$W0,@MSG[0]
621 sha256su0 @MSG[0],@MSG[1]
622 vmov $abcd,$ABCD
623 sha256h $ABCD,$EFGH,$W0
624 sha256h2 $EFGH,$abcd,$W0
625 sha256su1 @MSG[0],@MSG[2],@MSG[3]
626___
627 ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
628}
629$code.=<<___;
630 vld1.32 {$W1},[$Ktbl]!
631 vadd.i32 $W0,$W0,@MSG[0]
632 vmov $abcd,$ABCD
633 sha256h $ABCD,$EFGH,$W0
634 sha256h2 $EFGH,$abcd,$W0
635
636 vld1.32 {$W0},[$Ktbl]!
637 vadd.i32 $W1,$W1,@MSG[1]
638 vmov $abcd,$ABCD
639 sha256h $ABCD,$EFGH,$W1
640 sha256h2 $EFGH,$abcd,$W1
641
642 vld1.32 {$W1},[$Ktbl]
643 vadd.i32 $W0,$W0,@MSG[2]
644 sub $Ktbl,$Ktbl,#256-16 @ rewind
645 vmov $abcd,$ABCD
646 sha256h $ABCD,$EFGH,$W0
647 sha256h2 $EFGH,$abcd,$W0
648
649 vadd.i32 $W1,$W1,@MSG[3]
650 vmov $abcd,$ABCD
651 sha256h $ABCD,$EFGH,$W1
652 sha256h2 $EFGH,$abcd,$W1
653
654 vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
655 vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
656 it ne
657 bne .Loop_v8
658
659 vst1.32 {$ABCD,$EFGH},[$ctx]
660
661 ret @ bx lr
662.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
663#endif
664___
665}}}
666$code.=<<___;
667.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
668.align 2
669#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
670.comm OPENSSL_armcap_P,4,4
671#endif
672___
673
674open SELF,$0;
675while(<SELF>) {
676 next if (/^#!/);
677 last if (!s/^#/@/ and !/^$/);
678 print;
679}
680close SELF;
681
682{ my %opcode = (
683 "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
684 "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
685
686 sub unsha256 {
687 my ($mnemonic,$arg)=@_;
688
689 if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
690 my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
691 |(($2&7)<<17)|(($2&8)<<4)
692 |(($3&7)<<1) |(($3&8)<<2);
693 # since ARMv7 instructions are always encoded little-endian.
694 # correct solution is to use .inst directive, but older
695 # assemblers don't implement it:-(
696 sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
697 $word&0xff,($word>>8)&0xff,
698 ($word>>16)&0xff,($word>>24)&0xff,
699 $mnemonic,$arg;
700 }
701 }
702}
703
704foreach (split($/,$code)) {
705
706 s/\`([^\`]*)\`/eval $1/geo;
707
708 s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
709
710 s/\bret\b/bx lr/go or
711 s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
712
713 print $_,"\n";
714}
715
716close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code 32
49#else
50.syntax unified
51# ifdef __thumb2__
52# define adrl adr
53.thumb
54# else
55.code 32
56# endif
57#endif
58
59.type K256,%object
60.align 5
61K256:
62.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78.size K256,.-K256
79.word 0 @ terminator
80#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81.LOPENSSL_armcap:
82.word OPENSSL_armcap_P-sha256_block_data_order
83#endif
84.align 5
85
86.global sha256_block_data_order
87.type sha256_block_data_order,%function
88sha256_block_data_order:
89#if __ARM_ARCH__<7
90 sub r3,pc,#8 @ sha256_block_data_order
91#else
92 adr r3,sha256_block_data_order
93#endif
94#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
95 ldr r12,.LOPENSSL_armcap
96 ldr r12,[r3,r12] @ OPENSSL_armcap_P
97 tst r12,#ARMV8_SHA256
98 bne .LARMv8
99 tst r12,#ARMV7_NEON
100 bne .LNEON
101#endif
102 add r2,r1,r2,lsl#6 @ len to point at the end of inp
103 stmdb sp!,{r0,r1,r2,r4-r11,lr}
104 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
105 sub r14,r3,#256+32 @ K256
106 sub sp,sp,#16*4 @ alloca(X[16])
107.Loop:
108# if __ARM_ARCH__>=7
109 ldr r2,[r1],#4
110# else
111 ldrb r2,[r1,#3]
112# endif
113 eor r3,r5,r6 @ magic
114 eor r12,r12,r12
115#if __ARM_ARCH__>=7
116 @ ldr r2,[r1],#4 @ 0
117# if 0==15
118 str r1,[sp,#17*4] @ make room for r1
119# endif
120 eor r0,r8,r8,ror#5
121 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
122 eor r0,r0,r8,ror#19 @ Sigma1(e)
123# ifndef __ARMEB__
124 rev r2,r2
125# endif
126#else
127 @ ldrb r2,[r1,#3] @ 0
128 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
129 ldrb r12,[r1,#2]
130 ldrb r0,[r1,#1]
131 orr r2,r2,r12,lsl#8
132 ldrb r12,[r1],#4
133 orr r2,r2,r0,lsl#16
134# if 0==15
135 str r1,[sp,#17*4] @ make room for r1
136# endif
137 eor r0,r8,r8,ror#5
138 orr r2,r2,r12,lsl#24
139 eor r0,r0,r8,ror#19 @ Sigma1(e)
140#endif
141 ldr r12,[r14],#4 @ *K256++
142 add r11,r11,r2 @ h+=X[i]
143 str r2,[sp,#0*4]
144 eor r2,r9,r10
145 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
146 and r2,r2,r8
147 add r11,r11,r12 @ h+=K256[i]
148 eor r2,r2,r10 @ Ch(e,f,g)
149 eor r0,r4,r4,ror#11
150 add r11,r11,r2 @ h+=Ch(e,f,g)
151#if 0==31
152 and r12,r12,#0xff
153 cmp r12,#0xf2 @ done?
154#endif
155#if 0<15
156# if __ARM_ARCH__>=7
157 ldr r2,[r1],#4 @ prefetch
158# else
159 ldrb r2,[r1,#3]
160# endif
161 eor r12,r4,r5 @ a^b, b^c in next round
162#else
163 ldr r2,[sp,#2*4] @ from future BODY_16_xx
164 eor r12,r4,r5 @ a^b, b^c in next round
165 ldr r1,[sp,#15*4] @ from future BODY_16_xx
166#endif
167 eor r0,r0,r4,ror#20 @ Sigma0(a)
168 and r3,r3,r12 @ (b^c)&=(a^b)
169 add r7,r7,r11 @ d+=h
170 eor r3,r3,r5 @ Maj(a,b,c)
171 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
172 @ add r11,r11,r3 @ h+=Maj(a,b,c)
173#if __ARM_ARCH__>=7
174 @ ldr r2,[r1],#4 @ 1
175# if 1==15
176 str r1,[sp,#17*4] @ make room for r1
177# endif
178 eor r0,r7,r7,ror#5
179 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
180 eor r0,r0,r7,ror#19 @ Sigma1(e)
181# ifndef __ARMEB__
182 rev r2,r2
183# endif
184#else
185 @ ldrb r2,[r1,#3] @ 1
186 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
187 ldrb r3,[r1,#2]
188 ldrb r0,[r1,#1]
189 orr r2,r2,r3,lsl#8
190 ldrb r3,[r1],#4
191 orr r2,r2,r0,lsl#16
192# if 1==15
193 str r1,[sp,#17*4] @ make room for r1
194# endif
195 eor r0,r7,r7,ror#5
196 orr r2,r2,r3,lsl#24
197 eor r0,r0,r7,ror#19 @ Sigma1(e)
198#endif
199 ldr r3,[r14],#4 @ *K256++
200 add r10,r10,r2 @ h+=X[i]
201 str r2,[sp,#1*4]
202 eor r2,r8,r9
203 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
204 and r2,r2,r7
205 add r10,r10,r3 @ h+=K256[i]
206 eor r2,r2,r9 @ Ch(e,f,g)
207 eor r0,r11,r11,ror#11
208 add r10,r10,r2 @ h+=Ch(e,f,g)
209#if 1==31
210 and r3,r3,#0xff
211 cmp r3,#0xf2 @ done?
212#endif
213#if 1<15
214# if __ARM_ARCH__>=7
215 ldr r2,[r1],#4 @ prefetch
216# else
217 ldrb r2,[r1,#3]
218# endif
219 eor r3,r11,r4 @ a^b, b^c in next round
220#else
221 ldr r2,[sp,#3*4] @ from future BODY_16_xx
222 eor r3,r11,r4 @ a^b, b^c in next round
223 ldr r1,[sp,#0*4] @ from future BODY_16_xx
224#endif
225 eor r0,r0,r11,ror#20 @ Sigma0(a)
226 and r12,r12,r3 @ (b^c)&=(a^b)
227 add r6,r6,r10 @ d+=h
228 eor r12,r12,r4 @ Maj(a,b,c)
229 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
230 @ add r10,r10,r12 @ h+=Maj(a,b,c)
231#if __ARM_ARCH__>=7
232 @ ldr r2,[r1],#4 @ 2
233# if 2==15
234 str r1,[sp,#17*4] @ make room for r1
235# endif
236 eor r0,r6,r6,ror#5
237 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
238 eor r0,r0,r6,ror#19 @ Sigma1(e)
239# ifndef __ARMEB__
240 rev r2,r2
241# endif
242#else
243 @ ldrb r2,[r1,#3] @ 2
244 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
245 ldrb r12,[r1,#2]
246 ldrb r0,[r1,#1]
247 orr r2,r2,r12,lsl#8
248 ldrb r12,[r1],#4
249 orr r2,r2,r0,lsl#16
250# if 2==15
251 str r1,[sp,#17*4] @ make room for r1
252# endif
253 eor r0,r6,r6,ror#5
254 orr r2,r2,r12,lsl#24
255 eor r0,r0,r6,ror#19 @ Sigma1(e)
256#endif
257 ldr r12,[r14],#4 @ *K256++
258 add r9,r9,r2 @ h+=X[i]
259 str r2,[sp,#2*4]
260 eor r2,r7,r8
261 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
262 and r2,r2,r6
263 add r9,r9,r12 @ h+=K256[i]
264 eor r2,r2,r8 @ Ch(e,f,g)
265 eor r0,r10,r10,ror#11
266 add r9,r9,r2 @ h+=Ch(e,f,g)
267#if 2==31
268 and r12,r12,#0xff
269 cmp r12,#0xf2 @ done?
270#endif
271#if 2<15
272# if __ARM_ARCH__>=7
273 ldr r2,[r1],#4 @ prefetch
274# else
275 ldrb r2,[r1,#3]
276# endif
277 eor r12,r10,r11 @ a^b, b^c in next round
278#else
279 ldr r2,[sp,#4*4] @ from future BODY_16_xx
280 eor r12,r10,r11 @ a^b, b^c in next round
281 ldr r1,[sp,#1*4] @ from future BODY_16_xx
282#endif
283 eor r0,r0,r10,ror#20 @ Sigma0(a)
284 and r3,r3,r12 @ (b^c)&=(a^b)
285 add r5,r5,r9 @ d+=h
286 eor r3,r3,r11 @ Maj(a,b,c)
287 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
288 @ add r9,r9,r3 @ h+=Maj(a,b,c)
289#if __ARM_ARCH__>=7
290 @ ldr r2,[r1],#4 @ 3
291# if 3==15
292 str r1,[sp,#17*4] @ make room for r1
293# endif
294 eor r0,r5,r5,ror#5
295 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
296 eor r0,r0,r5,ror#19 @ Sigma1(e)
297# ifndef __ARMEB__
298 rev r2,r2
299# endif
300#else
301 @ ldrb r2,[r1,#3] @ 3
302 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
303 ldrb r3,[r1,#2]
304 ldrb r0,[r1,#1]
305 orr r2,r2,r3,lsl#8
306 ldrb r3,[r1],#4
307 orr r2,r2,r0,lsl#16
308# if 3==15
309 str r1,[sp,#17*4] @ make room for r1
310# endif
311 eor r0,r5,r5,ror#5
312 orr r2,r2,r3,lsl#24
313 eor r0,r0,r5,ror#19 @ Sigma1(e)
314#endif
315 ldr r3,[r14],#4 @ *K256++
316 add r8,r8,r2 @ h+=X[i]
317 str r2,[sp,#3*4]
318 eor r2,r6,r7
319 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
320 and r2,r2,r5
321 add r8,r8,r3 @ h+=K256[i]
322 eor r2,r2,r7 @ Ch(e,f,g)
323 eor r0,r9,r9,ror#11
324 add r8,r8,r2 @ h+=Ch(e,f,g)
325#if 3==31
326 and r3,r3,#0xff
327 cmp r3,#0xf2 @ done?
328#endif
329#if 3<15
330# if __ARM_ARCH__>=7
331 ldr r2,[r1],#4 @ prefetch
332# else
333 ldrb r2,[r1,#3]
334# endif
335 eor r3,r9,r10 @ a^b, b^c in next round
336#else
337 ldr r2,[sp,#5*4] @ from future BODY_16_xx
338 eor r3,r9,r10 @ a^b, b^c in next round
339 ldr r1,[sp,#2*4] @ from future BODY_16_xx
340#endif
341 eor r0,r0,r9,ror#20 @ Sigma0(a)
342 and r12,r12,r3 @ (b^c)&=(a^b)
343 add r4,r4,r8 @ d+=h
344 eor r12,r12,r10 @ Maj(a,b,c)
345 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
346 @ add r8,r8,r12 @ h+=Maj(a,b,c)
347#if __ARM_ARCH__>=7
348 @ ldr r2,[r1],#4 @ 4
349# if 4==15
350 str r1,[sp,#17*4] @ make room for r1
351# endif
352 eor r0,r4,r4,ror#5
353 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
354 eor r0,r0,r4,ror#19 @ Sigma1(e)
355# ifndef __ARMEB__
356 rev r2,r2
357# endif
358#else
359 @ ldrb r2,[r1,#3] @ 4
360 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
361 ldrb r12,[r1,#2]
362 ldrb r0,[r1,#1]
363 orr r2,r2,r12,lsl#8
364 ldrb r12,[r1],#4
365 orr r2,r2,r0,lsl#16
366# if 4==15
367 str r1,[sp,#17*4] @ make room for r1
368# endif
369 eor r0,r4,r4,ror#5
370 orr r2,r2,r12,lsl#24
371 eor r0,r0,r4,ror#19 @ Sigma1(e)
372#endif
373 ldr r12,[r14],#4 @ *K256++
374 add r7,r7,r2 @ h+=X[i]
375 str r2,[sp,#4*4]
376 eor r2,r5,r6
377 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
378 and r2,r2,r4
379 add r7,r7,r12 @ h+=K256[i]
380 eor r2,r2,r6 @ Ch(e,f,g)
381 eor r0,r8,r8,ror#11
382 add r7,r7,r2 @ h+=Ch(e,f,g)
383#if 4==31
384 and r12,r12,#0xff
385 cmp r12,#0xf2 @ done?
386#endif
387#if 4<15
388# if __ARM_ARCH__>=7
389 ldr r2,[r1],#4 @ prefetch
390# else
391 ldrb r2,[r1,#3]
392# endif
393 eor r12,r8,r9 @ a^b, b^c in next round
394#else
395 ldr r2,[sp,#6*4] @ from future BODY_16_xx
396 eor r12,r8,r9 @ a^b, b^c in next round
397 ldr r1,[sp,#3*4] @ from future BODY_16_xx
398#endif
399 eor r0,r0,r8,ror#20 @ Sigma0(a)
400 and r3,r3,r12 @ (b^c)&=(a^b)
401 add r11,r11,r7 @ d+=h
402 eor r3,r3,r9 @ Maj(a,b,c)
403 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
404 @ add r7,r7,r3 @ h+=Maj(a,b,c)
405#if __ARM_ARCH__>=7
406 @ ldr r2,[r1],#4 @ 5
407# if 5==15
408 str r1,[sp,#17*4] @ make room for r1
409# endif
410 eor r0,r11,r11,ror#5
411 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
412 eor r0,r0,r11,ror#19 @ Sigma1(e)
413# ifndef __ARMEB__
414 rev r2,r2
415# endif
416#else
417 @ ldrb r2,[r1,#3] @ 5
418 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
419 ldrb r3,[r1,#2]
420 ldrb r0,[r1,#1]
421 orr r2,r2,r3,lsl#8
422 ldrb r3,[r1],#4
423 orr r2,r2,r0,lsl#16
424# if 5==15
425 str r1,[sp,#17*4] @ make room for r1
426# endif
427 eor r0,r11,r11,ror#5
428 orr r2,r2,r3,lsl#24
429 eor r0,r0,r11,ror#19 @ Sigma1(e)
430#endif
431 ldr r3,[r14],#4 @ *K256++
432 add r6,r6,r2 @ h+=X[i]
433 str r2,[sp,#5*4]
434 eor r2,r4,r5
435 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
436 and r2,r2,r11
437 add r6,r6,r3 @ h+=K256[i]
438 eor r2,r2,r5 @ Ch(e,f,g)
439 eor r0,r7,r7,ror#11
440 add r6,r6,r2 @ h+=Ch(e,f,g)
441#if 5==31
442 and r3,r3,#0xff
443 cmp r3,#0xf2 @ done?
444#endif
445#if 5<15
446# if __ARM_ARCH__>=7
447 ldr r2,[r1],#4 @ prefetch
448# else
449 ldrb r2,[r1,#3]
450# endif
451 eor r3,r7,r8 @ a^b, b^c in next round
452#else
453 ldr r2,[sp,#7*4] @ from future BODY_16_xx
454 eor r3,r7,r8 @ a^b, b^c in next round
455 ldr r1,[sp,#4*4] @ from future BODY_16_xx
456#endif
457 eor r0,r0,r7,ror#20 @ Sigma0(a)
458 and r12,r12,r3 @ (b^c)&=(a^b)
459 add r10,r10,r6 @ d+=h
460 eor r12,r12,r8 @ Maj(a,b,c)
461 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
462 @ add r6,r6,r12 @ h+=Maj(a,b,c)
463#if __ARM_ARCH__>=7
464 @ ldr r2,[r1],#4 @ 6
465# if 6==15
466 str r1,[sp,#17*4] @ make room for r1
467# endif
468 eor r0,r10,r10,ror#5
469 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
470 eor r0,r0,r10,ror#19 @ Sigma1(e)
471# ifndef __ARMEB__
472 rev r2,r2
473# endif
474#else
475 @ ldrb r2,[r1,#3] @ 6
476 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
477 ldrb r12,[r1,#2]
478 ldrb r0,[r1,#1]
479 orr r2,r2,r12,lsl#8
480 ldrb r12,[r1],#4
481 orr r2,r2,r0,lsl#16
482# if 6==15
483 str r1,[sp,#17*4] @ make room for r1
484# endif
485 eor r0,r10,r10,ror#5
486 orr r2,r2,r12,lsl#24
487 eor r0,r0,r10,ror#19 @ Sigma1(e)
488#endif
489 ldr r12,[r14],#4 @ *K256++
490 add r5,r5,r2 @ h+=X[i]
491 str r2,[sp,#6*4]
492 eor r2,r11,r4
493 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
494 and r2,r2,r10
495 add r5,r5,r12 @ h+=K256[i]
496 eor r2,r2,r4 @ Ch(e,f,g)
497 eor r0,r6,r6,ror#11
498 add r5,r5,r2 @ h+=Ch(e,f,g)
499#if 6==31
500 and r12,r12,#0xff
501 cmp r12,#0xf2 @ done?
502#endif
503#if 6<15
504# if __ARM_ARCH__>=7
505 ldr r2,[r1],#4 @ prefetch
506# else
507 ldrb r2,[r1,#3]
508# endif
509 eor r12,r6,r7 @ a^b, b^c in next round
510#else
511 ldr r2,[sp,#8*4] @ from future BODY_16_xx
512 eor r12,r6,r7 @ a^b, b^c in next round
513 ldr r1,[sp,#5*4] @ from future BODY_16_xx
514#endif
515 eor r0,r0,r6,ror#20 @ Sigma0(a)
516 and r3,r3,r12 @ (b^c)&=(a^b)
517 add r9,r9,r5 @ d+=h
518 eor r3,r3,r7 @ Maj(a,b,c)
519 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
520 @ add r5,r5,r3 @ h+=Maj(a,b,c)
521#if __ARM_ARCH__>=7
522 @ ldr r2,[r1],#4 @ 7
523# if 7==15
524 str r1,[sp,#17*4] @ make room for r1
525# endif
526 eor r0,r9,r9,ror#5
527 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
528 eor r0,r0,r9,ror#19 @ Sigma1(e)
529# ifndef __ARMEB__
530 rev r2,r2
531# endif
532#else
533 @ ldrb r2,[r1,#3] @ 7
534 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
535 ldrb r3,[r1,#2]
536 ldrb r0,[r1,#1]
537 orr r2,r2,r3,lsl#8
538 ldrb r3,[r1],#4
539 orr r2,r2,r0,lsl#16
540# if 7==15
541 str r1,[sp,#17*4] @ make room for r1
542# endif
543 eor r0,r9,r9,ror#5
544 orr r2,r2,r3,lsl#24
545 eor r0,r0,r9,ror#19 @ Sigma1(e)
546#endif
547 ldr r3,[r14],#4 @ *K256++
548 add r4,r4,r2 @ h+=X[i]
549 str r2,[sp,#7*4]
550 eor r2,r10,r11
551 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
552 and r2,r2,r9
553 add r4,r4,r3 @ h+=K256[i]
554 eor r2,r2,r11 @ Ch(e,f,g)
555 eor r0,r5,r5,ror#11
556 add r4,r4,r2 @ h+=Ch(e,f,g)
557#if 7==31
558 and r3,r3,#0xff
559 cmp r3,#0xf2 @ done?
560#endif
561#if 7<15
562# if __ARM_ARCH__>=7
563 ldr r2,[r1],#4 @ prefetch
564# else
565 ldrb r2,[r1,#3]
566# endif
567 eor r3,r5,r6 @ a^b, b^c in next round
568#else
569 ldr r2,[sp,#9*4] @ from future BODY_16_xx
570 eor r3,r5,r6 @ a^b, b^c in next round
571 ldr r1,[sp,#6*4] @ from future BODY_16_xx
572#endif
573 eor r0,r0,r5,ror#20 @ Sigma0(a)
574 and r12,r12,r3 @ (b^c)&=(a^b)
575 add r8,r8,r4 @ d+=h
576 eor r12,r12,r6 @ Maj(a,b,c)
577 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
578 @ add r4,r4,r12 @ h+=Maj(a,b,c)
579#if __ARM_ARCH__>=7
580 @ ldr r2,[r1],#4 @ 8
581# if 8==15
582 str r1,[sp,#17*4] @ make room for r1
583# endif
584 eor r0,r8,r8,ror#5
585 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
586 eor r0,r0,r8,ror#19 @ Sigma1(e)
587# ifndef __ARMEB__
588 rev r2,r2
589# endif
590#else
591 @ ldrb r2,[r1,#3] @ 8
592 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
593 ldrb r12,[r1,#2]
594 ldrb r0,[r1,#1]
595 orr r2,r2,r12,lsl#8
596 ldrb r12,[r1],#4
597 orr r2,r2,r0,lsl#16
598# if 8==15
599 str r1,[sp,#17*4] @ make room for r1
600# endif
601 eor r0,r8,r8,ror#5
602 orr r2,r2,r12,lsl#24
603 eor r0,r0,r8,ror#19 @ Sigma1(e)
604#endif
605 ldr r12,[r14],#4 @ *K256++
606 add r11,r11,r2 @ h+=X[i]
607 str r2,[sp,#8*4]
608 eor r2,r9,r10
609 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
610 and r2,r2,r8
611 add r11,r11,r12 @ h+=K256[i]
612 eor r2,r2,r10 @ Ch(e,f,g)
613 eor r0,r4,r4,ror#11
614 add r11,r11,r2 @ h+=Ch(e,f,g)
615#if 8==31
616 and r12,r12,#0xff
617 cmp r12,#0xf2 @ done?
618#endif
619#if 8<15
620# if __ARM_ARCH__>=7
621 ldr r2,[r1],#4 @ prefetch
622# else
623 ldrb r2,[r1,#3]
624# endif
625 eor r12,r4,r5 @ a^b, b^c in next round
626#else
627 ldr r2,[sp,#10*4] @ from future BODY_16_xx
628 eor r12,r4,r5 @ a^b, b^c in next round
629 ldr r1,[sp,#7*4] @ from future BODY_16_xx
630#endif
631 eor r0,r0,r4,ror#20 @ Sigma0(a)
632 and r3,r3,r12 @ (b^c)&=(a^b)
633 add r7,r7,r11 @ d+=h
634 eor r3,r3,r5 @ Maj(a,b,c)
635 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
636 @ add r11,r11,r3 @ h+=Maj(a,b,c)
637#if __ARM_ARCH__>=7
638 @ ldr r2,[r1],#4 @ 9
639# if 9==15
640 str r1,[sp,#17*4] @ make room for r1
641# endif
642 eor r0,r7,r7,ror#5
643 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
644 eor r0,r0,r7,ror#19 @ Sigma1(e)
645# ifndef __ARMEB__
646 rev r2,r2
647# endif
648#else
649 @ ldrb r2,[r1,#3] @ 9
650 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
651 ldrb r3,[r1,#2]
652 ldrb r0,[r1,#1]
653 orr r2,r2,r3,lsl#8
654 ldrb r3,[r1],#4
655 orr r2,r2,r0,lsl#16
656# if 9==15
657 str r1,[sp,#17*4] @ make room for r1
658# endif
659 eor r0,r7,r7,ror#5
660 orr r2,r2,r3,lsl#24
661 eor r0,r0,r7,ror#19 @ Sigma1(e)
662#endif
663 ldr r3,[r14],#4 @ *K256++
664 add r10,r10,r2 @ h+=X[i]
665 str r2,[sp,#9*4]
666 eor r2,r8,r9
667 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
668 and r2,r2,r7
669 add r10,r10,r3 @ h+=K256[i]
670 eor r2,r2,r9 @ Ch(e,f,g)
671 eor r0,r11,r11,ror#11
672 add r10,r10,r2 @ h+=Ch(e,f,g)
673#if 9==31
674 and r3,r3,#0xff
675 cmp r3,#0xf2 @ done?
676#endif
677#if 9<15
678# if __ARM_ARCH__>=7
679 ldr r2,[r1],#4 @ prefetch
680# else
681 ldrb r2,[r1,#3]
682# endif
683 eor r3,r11,r4 @ a^b, b^c in next round
684#else
685 ldr r2,[sp,#11*4] @ from future BODY_16_xx
686 eor r3,r11,r4 @ a^b, b^c in next round
687 ldr r1,[sp,#8*4] @ from future BODY_16_xx
688#endif
689 eor r0,r0,r11,ror#20 @ Sigma0(a)
690 and r12,r12,r3 @ (b^c)&=(a^b)
691 add r6,r6,r10 @ d+=h
692 eor r12,r12,r4 @ Maj(a,b,c)
693 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
694 @ add r10,r10,r12 @ h+=Maj(a,b,c)
695#if __ARM_ARCH__>=7
696 @ ldr r2,[r1],#4 @ 10
697# if 10==15
698 str r1,[sp,#17*4] @ make room for r1
699# endif
700 eor r0,r6,r6,ror#5
701 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
702 eor r0,r0,r6,ror#19 @ Sigma1(e)
703# ifndef __ARMEB__
704 rev r2,r2
705# endif
706#else
707 @ ldrb r2,[r1,#3] @ 10
708 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
709 ldrb r12,[r1,#2]
710 ldrb r0,[r1,#1]
711 orr r2,r2,r12,lsl#8
712 ldrb r12,[r1],#4
713 orr r2,r2,r0,lsl#16
714# if 10==15
715 str r1,[sp,#17*4] @ make room for r1
716# endif
717 eor r0,r6,r6,ror#5
718 orr r2,r2,r12,lsl#24
719 eor r0,r0,r6,ror#19 @ Sigma1(e)
720#endif
721 ldr r12,[r14],#4 @ *K256++
722 add r9,r9,r2 @ h+=X[i]
723 str r2,[sp,#10*4]
724 eor r2,r7,r8
725 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
726 and r2,r2,r6
727 add r9,r9,r12 @ h+=K256[i]
728 eor r2,r2,r8 @ Ch(e,f,g)
729 eor r0,r10,r10,ror#11
730 add r9,r9,r2 @ h+=Ch(e,f,g)
731#if 10==31
732 and r12,r12,#0xff
733 cmp r12,#0xf2 @ done?
734#endif
735#if 10<15
736# if __ARM_ARCH__>=7
737 ldr r2,[r1],#4 @ prefetch
738# else
739 ldrb r2,[r1,#3]
740# endif
741 eor r12,r10,r11 @ a^b, b^c in next round
742#else
743 ldr r2,[sp,#12*4] @ from future BODY_16_xx
744 eor r12,r10,r11 @ a^b, b^c in next round
745 ldr r1,[sp,#9*4] @ from future BODY_16_xx
746#endif
747 eor r0,r0,r10,ror#20 @ Sigma0(a)
748 and r3,r3,r12 @ (b^c)&=(a^b)
749 add r5,r5,r9 @ d+=h
750 eor r3,r3,r11 @ Maj(a,b,c)
751 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
752 @ add r9,r9,r3 @ h+=Maj(a,b,c)
753#if __ARM_ARCH__>=7
754 @ ldr r2,[r1],#4 @ 11
755# if 11==15
756 str r1,[sp,#17*4] @ make room for r1
757# endif
758 eor r0,r5,r5,ror#5
759 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
760 eor r0,r0,r5,ror#19 @ Sigma1(e)
761# ifndef __ARMEB__
762 rev r2,r2
763# endif
764#else
765 @ ldrb r2,[r1,#3] @ 11
766 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
767 ldrb r3,[r1,#2]
768 ldrb r0,[r1,#1]
769 orr r2,r2,r3,lsl#8
770 ldrb r3,[r1],#4
771 orr r2,r2,r0,lsl#16
772# if 11==15
773 str r1,[sp,#17*4] @ make room for r1
774# endif
775 eor r0,r5,r5,ror#5
776 orr r2,r2,r3,lsl#24
777 eor r0,r0,r5,ror#19 @ Sigma1(e)
778#endif
779 ldr r3,[r14],#4 @ *K256++
780 add r8,r8,r2 @ h+=X[i]
781 str r2,[sp,#11*4]
782 eor r2,r6,r7
783 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
784 and r2,r2,r5
785 add r8,r8,r3 @ h+=K256[i]
786 eor r2,r2,r7 @ Ch(e,f,g)
787 eor r0,r9,r9,ror#11
788 add r8,r8,r2 @ h+=Ch(e,f,g)
789#if 11==31
790 and r3,r3,#0xff
791 cmp r3,#0xf2 @ done?
792#endif
793#if 11<15
794# if __ARM_ARCH__>=7
795 ldr r2,[r1],#4 @ prefetch
796# else
797 ldrb r2,[r1,#3]
798# endif
799 eor r3,r9,r10 @ a^b, b^c in next round
800#else
801 ldr r2,[sp,#13*4] @ from future BODY_16_xx
802 eor r3,r9,r10 @ a^b, b^c in next round
803 ldr r1,[sp,#10*4] @ from future BODY_16_xx
804#endif
805 eor r0,r0,r9,ror#20 @ Sigma0(a)
806 and r12,r12,r3 @ (b^c)&=(a^b)
807 add r4,r4,r8 @ d+=h
808 eor r12,r12,r10 @ Maj(a,b,c)
809 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
810 @ add r8,r8,r12 @ h+=Maj(a,b,c)
811#if __ARM_ARCH__>=7
812 @ ldr r2,[r1],#4 @ 12
813# if 12==15
814 str r1,[sp,#17*4] @ make room for r1
815# endif
816 eor r0,r4,r4,ror#5
817 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
818 eor r0,r0,r4,ror#19 @ Sigma1(e)
819# ifndef __ARMEB__
820 rev r2,r2
821# endif
822#else
823 @ ldrb r2,[r1,#3] @ 12
824 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
825 ldrb r12,[r1,#2]
826 ldrb r0,[r1,#1]
827 orr r2,r2,r12,lsl#8
828 ldrb r12,[r1],#4
829 orr r2,r2,r0,lsl#16
830# if 12==15
831 str r1,[sp,#17*4] @ make room for r1
832# endif
833 eor r0,r4,r4,ror#5
834 orr r2,r2,r12,lsl#24
835 eor r0,r0,r4,ror#19 @ Sigma1(e)
836#endif
837 ldr r12,[r14],#4 @ *K256++
838 add r7,r7,r2 @ h+=X[i]
839 str r2,[sp,#12*4]
840 eor r2,r5,r6
841 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
842 and r2,r2,r4
843 add r7,r7,r12 @ h+=K256[i]
844 eor r2,r2,r6 @ Ch(e,f,g)
845 eor r0,r8,r8,ror#11
846 add r7,r7,r2 @ h+=Ch(e,f,g)
847#if 12==31
848 and r12,r12,#0xff
849 cmp r12,#0xf2 @ done?
850#endif
851#if 12<15
852# if __ARM_ARCH__>=7
853 ldr r2,[r1],#4 @ prefetch
854# else
855 ldrb r2,[r1,#3]
856# endif
857 eor r12,r8,r9 @ a^b, b^c in next round
858#else
859 ldr r2,[sp,#14*4] @ from future BODY_16_xx
860 eor r12,r8,r9 @ a^b, b^c in next round
861 ldr r1,[sp,#11*4] @ from future BODY_16_xx
862#endif
863 eor r0,r0,r8,ror#20 @ Sigma0(a)
864 and r3,r3,r12 @ (b^c)&=(a^b)
865 add r11,r11,r7 @ d+=h
866 eor r3,r3,r9 @ Maj(a,b,c)
867 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
868 @ add r7,r7,r3 @ h+=Maj(a,b,c)
869#if __ARM_ARCH__>=7
870 @ ldr r2,[r1],#4 @ 13
871# if 13==15
872 str r1,[sp,#17*4] @ make room for r1
873# endif
874 eor r0,r11,r11,ror#5
875 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
876 eor r0,r0,r11,ror#19 @ Sigma1(e)
877# ifndef __ARMEB__
878 rev r2,r2
879# endif
880#else
881 @ ldrb r2,[r1,#3] @ 13
882 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
883 ldrb r3,[r1,#2]
884 ldrb r0,[r1,#1]
885 orr r2,r2,r3,lsl#8
886 ldrb r3,[r1],#4
887 orr r2,r2,r0,lsl#16
888# if 13==15
889 str r1,[sp,#17*4] @ make room for r1
890# endif
891 eor r0,r11,r11,ror#5
892 orr r2,r2,r3,lsl#24
893 eor r0,r0,r11,ror#19 @ Sigma1(e)
894#endif
895 ldr r3,[r14],#4 @ *K256++
896 add r6,r6,r2 @ h+=X[i]
897 str r2,[sp,#13*4]
898 eor r2,r4,r5
899 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
900 and r2,r2,r11
901 add r6,r6,r3 @ h+=K256[i]
902 eor r2,r2,r5 @ Ch(e,f,g)
903 eor r0,r7,r7,ror#11
904 add r6,r6,r2 @ h+=Ch(e,f,g)
905#if 13==31
906 and r3,r3,#0xff
907 cmp r3,#0xf2 @ done?
908#endif
909#if 13<15
910# if __ARM_ARCH__>=7
911 ldr r2,[r1],#4 @ prefetch
912# else
913 ldrb r2,[r1,#3]
914# endif
915 eor r3,r7,r8 @ a^b, b^c in next round
916#else
917 ldr r2,[sp,#15*4] @ from future BODY_16_xx
918 eor r3,r7,r8 @ a^b, b^c in next round
919 ldr r1,[sp,#12*4] @ from future BODY_16_xx
920#endif
921 eor r0,r0,r7,ror#20 @ Sigma0(a)
922 and r12,r12,r3 @ (b^c)&=(a^b)
923 add r10,r10,r6 @ d+=h
924 eor r12,r12,r8 @ Maj(a,b,c)
925 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
926 @ add r6,r6,r12 @ h+=Maj(a,b,c)
927#if __ARM_ARCH__>=7
928 @ ldr r2,[r1],#4 @ 14
929# if 14==15
930 str r1,[sp,#17*4] @ make room for r1
931# endif
932 eor r0,r10,r10,ror#5
933 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
934 eor r0,r0,r10,ror#19 @ Sigma1(e)
935# ifndef __ARMEB__
936 rev r2,r2
937# endif
938#else
939 @ ldrb r2,[r1,#3] @ 14
940 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
941 ldrb r12,[r1,#2]
942 ldrb r0,[r1,#1]
943 orr r2,r2,r12,lsl#8
944 ldrb r12,[r1],#4
945 orr r2,r2,r0,lsl#16
946# if 14==15
947 str r1,[sp,#17*4] @ make room for r1
948# endif
949 eor r0,r10,r10,ror#5
950 orr r2,r2,r12,lsl#24
951 eor r0,r0,r10,ror#19 @ Sigma1(e)
952#endif
953 ldr r12,[r14],#4 @ *K256++
954 add r5,r5,r2 @ h+=X[i]
955 str r2,[sp,#14*4]
956 eor r2,r11,r4
957 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
958 and r2,r2,r10
959 add r5,r5,r12 @ h+=K256[i]
960 eor r2,r2,r4 @ Ch(e,f,g)
961 eor r0,r6,r6,ror#11
962 add r5,r5,r2 @ h+=Ch(e,f,g)
963#if 14==31
964 and r12,r12,#0xff
965 cmp r12,#0xf2 @ done?
966#endif
967#if 14<15
968# if __ARM_ARCH__>=7
969 ldr r2,[r1],#4 @ prefetch
970# else
971 ldrb r2,[r1,#3]
972# endif
973 eor r12,r6,r7 @ a^b, b^c in next round
974#else
975 ldr r2,[sp,#0*4] @ from future BODY_16_xx
976 eor r12,r6,r7 @ a^b, b^c in next round
977 ldr r1,[sp,#13*4] @ from future BODY_16_xx
978#endif
979 eor r0,r0,r6,ror#20 @ Sigma0(a)
980 and r3,r3,r12 @ (b^c)&=(a^b)
981 add r9,r9,r5 @ d+=h
982 eor r3,r3,r7 @ Maj(a,b,c)
983 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
984 @ add r5,r5,r3 @ h+=Maj(a,b,c)
985#if __ARM_ARCH__>=7
986 @ ldr r2,[r1],#4 @ 15
987# if 15==15
988 str r1,[sp,#17*4] @ make room for r1
989# endif
990 eor r0,r9,r9,ror#5
991 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
992 eor r0,r0,r9,ror#19 @ Sigma1(e)
993# ifndef __ARMEB__
994 rev r2,r2
995# endif
996#else
997 @ ldrb r2,[r1,#3] @ 15
998 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
999 ldrb r3,[r1,#2]
1000 ldrb r0,[r1,#1]
1001 orr r2,r2,r3,lsl#8
1002 ldrb r3,[r1],#4
1003 orr r2,r2,r0,lsl#16
1004# if 15==15
1005 str r1,[sp,#17*4] @ make room for r1
1006# endif
1007 eor r0,r9,r9,ror#5
1008 orr r2,r2,r3,lsl#24
1009 eor r0,r0,r9,ror#19 @ Sigma1(e)
1010#endif
1011 ldr r3,[r14],#4 @ *K256++
1012 add r4,r4,r2 @ h+=X[i]
1013 str r2,[sp,#15*4]
1014 eor r2,r10,r11
1015 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1016 and r2,r2,r9
1017 add r4,r4,r3 @ h+=K256[i]
1018 eor r2,r2,r11 @ Ch(e,f,g)
1019 eor r0,r5,r5,ror#11
1020 add r4,r4,r2 @ h+=Ch(e,f,g)
1021#if 15==31
1022 and r3,r3,#0xff
1023 cmp r3,#0xf2 @ done?
1024#endif
1025#if 15<15
1026# if __ARM_ARCH__>=7
1027 ldr r2,[r1],#4 @ prefetch
1028# else
1029 ldrb r2,[r1,#3]
1030# endif
1031 eor r3,r5,r6 @ a^b, b^c in next round
1032#else
1033 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1034 eor r3,r5,r6 @ a^b, b^c in next round
1035 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1036#endif
1037 eor r0,r0,r5,ror#20 @ Sigma0(a)
1038 and r12,r12,r3 @ (b^c)&=(a^b)
1039 add r8,r8,r4 @ d+=h
1040 eor r12,r12,r6 @ Maj(a,b,c)
1041 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1042 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1043.Lrounds_16_xx:
1044 @ ldr r2,[sp,#1*4] @ 16
1045 @ ldr r1,[sp,#14*4]
1046 mov r0,r2,ror#7
1047 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1048 mov r12,r1,ror#17
1049 eor r0,r0,r2,ror#18
1050 eor r12,r12,r1,ror#19
1051 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1052 ldr r2,[sp,#0*4]
1053 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1054 ldr r1,[sp,#9*4]
1055
1056 add r12,r12,r0
1057 eor r0,r8,r8,ror#5 @ from BODY_00_15
1058 add r2,r2,r12
1059 eor r0,r0,r8,ror#19 @ Sigma1(e)
1060 add r2,r2,r1 @ X[i]
1061 ldr r12,[r14],#4 @ *K256++
1062 add r11,r11,r2 @ h+=X[i]
1063 str r2,[sp,#0*4]
1064 eor r2,r9,r10
1065 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1066 and r2,r2,r8
1067 add r11,r11,r12 @ h+=K256[i]
1068 eor r2,r2,r10 @ Ch(e,f,g)
1069 eor r0,r4,r4,ror#11
1070 add r11,r11,r2 @ h+=Ch(e,f,g)
1071#if 16==31
1072 and r12,r12,#0xff
1073 cmp r12,#0xf2 @ done?
1074#endif
1075#if 16<15
1076# if __ARM_ARCH__>=7
1077 ldr r2,[r1],#4 @ prefetch
1078# else
1079 ldrb r2,[r1,#3]
1080# endif
1081 eor r12,r4,r5 @ a^b, b^c in next round
1082#else
1083 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1084 eor r12,r4,r5 @ a^b, b^c in next round
1085 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1086#endif
1087 eor r0,r0,r4,ror#20 @ Sigma0(a)
1088 and r3,r3,r12 @ (b^c)&=(a^b)
1089 add r7,r7,r11 @ d+=h
1090 eor r3,r3,r5 @ Maj(a,b,c)
1091 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1092 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1093 @ ldr r2,[sp,#2*4] @ 17
1094 @ ldr r1,[sp,#15*4]
1095 mov r0,r2,ror#7
1096 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1097 mov r3,r1,ror#17
1098 eor r0,r0,r2,ror#18
1099 eor r3,r3,r1,ror#19
1100 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1101 ldr r2,[sp,#1*4]
1102 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1103 ldr r1,[sp,#10*4]
1104
1105 add r3,r3,r0
1106 eor r0,r7,r7,ror#5 @ from BODY_00_15
1107 add r2,r2,r3
1108 eor r0,r0,r7,ror#19 @ Sigma1(e)
1109 add r2,r2,r1 @ X[i]
1110 ldr r3,[r14],#4 @ *K256++
1111 add r10,r10,r2 @ h+=X[i]
1112 str r2,[sp,#1*4]
1113 eor r2,r8,r9
1114 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1115 and r2,r2,r7
1116 add r10,r10,r3 @ h+=K256[i]
1117 eor r2,r2,r9 @ Ch(e,f,g)
1118 eor r0,r11,r11,ror#11
1119 add r10,r10,r2 @ h+=Ch(e,f,g)
1120#if 17==31
1121 and r3,r3,#0xff
1122 cmp r3,#0xf2 @ done?
1123#endif
1124#if 17<15
1125# if __ARM_ARCH__>=7
1126 ldr r2,[r1],#4 @ prefetch
1127# else
1128 ldrb r2,[r1,#3]
1129# endif
1130 eor r3,r11,r4 @ a^b, b^c in next round
1131#else
1132 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1133 eor r3,r11,r4 @ a^b, b^c in next round
1134 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1135#endif
1136 eor r0,r0,r11,ror#20 @ Sigma0(a)
1137 and r12,r12,r3 @ (b^c)&=(a^b)
1138 add r6,r6,r10 @ d+=h
1139 eor r12,r12,r4 @ Maj(a,b,c)
1140 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1141 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1142 @ ldr r2,[sp,#3*4] @ 18
1143 @ ldr r1,[sp,#0*4]
1144 mov r0,r2,ror#7
1145 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1146 mov r12,r1,ror#17
1147 eor r0,r0,r2,ror#18
1148 eor r12,r12,r1,ror#19
1149 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1150 ldr r2,[sp,#2*4]
1151 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1152 ldr r1,[sp,#11*4]
1153
1154 add r12,r12,r0
1155 eor r0,r6,r6,ror#5 @ from BODY_00_15
1156 add r2,r2,r12
1157 eor r0,r0,r6,ror#19 @ Sigma1(e)
1158 add r2,r2,r1 @ X[i]
1159 ldr r12,[r14],#4 @ *K256++
1160 add r9,r9,r2 @ h+=X[i]
1161 str r2,[sp,#2*4]
1162 eor r2,r7,r8
1163 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1164 and r2,r2,r6
1165 add r9,r9,r12 @ h+=K256[i]
1166 eor r2,r2,r8 @ Ch(e,f,g)
1167 eor r0,r10,r10,ror#11
1168 add r9,r9,r2 @ h+=Ch(e,f,g)
1169#if 18==31
1170 and r12,r12,#0xff
1171 cmp r12,#0xf2 @ done?
1172#endif
1173#if 18<15
1174# if __ARM_ARCH__>=7
1175 ldr r2,[r1],#4 @ prefetch
1176# else
1177 ldrb r2,[r1,#3]
1178# endif
1179 eor r12,r10,r11 @ a^b, b^c in next round
1180#else
1181 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1182 eor r12,r10,r11 @ a^b, b^c in next round
1183 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1184#endif
1185 eor r0,r0,r10,ror#20 @ Sigma0(a)
1186 and r3,r3,r12 @ (b^c)&=(a^b)
1187 add r5,r5,r9 @ d+=h
1188 eor r3,r3,r11 @ Maj(a,b,c)
1189 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1190 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1191 @ ldr r2,[sp,#4*4] @ 19
1192 @ ldr r1,[sp,#1*4]
1193 mov r0,r2,ror#7
1194 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1195 mov r3,r1,ror#17
1196 eor r0,r0,r2,ror#18
1197 eor r3,r3,r1,ror#19
1198 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1199 ldr r2,[sp,#3*4]
1200 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1201 ldr r1,[sp,#12*4]
1202
1203 add r3,r3,r0
1204 eor r0,r5,r5,ror#5 @ from BODY_00_15
1205 add r2,r2,r3
1206 eor r0,r0,r5,ror#19 @ Sigma1(e)
1207 add r2,r2,r1 @ X[i]
1208 ldr r3,[r14],#4 @ *K256++
1209 add r8,r8,r2 @ h+=X[i]
1210 str r2,[sp,#3*4]
1211 eor r2,r6,r7
1212 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1213 and r2,r2,r5
1214 add r8,r8,r3 @ h+=K256[i]
1215 eor r2,r2,r7 @ Ch(e,f,g)
1216 eor r0,r9,r9,ror#11
1217 add r8,r8,r2 @ h+=Ch(e,f,g)
1218#if 19==31
1219 and r3,r3,#0xff
1220 cmp r3,#0xf2 @ done?
1221#endif
1222#if 19<15
1223# if __ARM_ARCH__>=7
1224 ldr r2,[r1],#4 @ prefetch
1225# else
1226 ldrb r2,[r1,#3]
1227# endif
1228 eor r3,r9,r10 @ a^b, b^c in next round
1229#else
1230 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1231 eor r3,r9,r10 @ a^b, b^c in next round
1232 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1233#endif
1234 eor r0,r0,r9,ror#20 @ Sigma0(a)
1235 and r12,r12,r3 @ (b^c)&=(a^b)
1236 add r4,r4,r8 @ d+=h
1237 eor r12,r12,r10 @ Maj(a,b,c)
1238 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1239 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1240 @ ldr r2,[sp,#5*4] @ 20
1241 @ ldr r1,[sp,#2*4]
1242 mov r0,r2,ror#7
1243 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1244 mov r12,r1,ror#17
1245 eor r0,r0,r2,ror#18
1246 eor r12,r12,r1,ror#19
1247 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1248 ldr r2,[sp,#4*4]
1249 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1250 ldr r1,[sp,#13*4]
1251
1252 add r12,r12,r0
1253 eor r0,r4,r4,ror#5 @ from BODY_00_15
1254 add r2,r2,r12
1255 eor r0,r0,r4,ror#19 @ Sigma1(e)
1256 add r2,r2,r1 @ X[i]
1257 ldr r12,[r14],#4 @ *K256++
1258 add r7,r7,r2 @ h+=X[i]
1259 str r2,[sp,#4*4]
1260 eor r2,r5,r6
1261 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1262 and r2,r2,r4
1263 add r7,r7,r12 @ h+=K256[i]
1264 eor r2,r2,r6 @ Ch(e,f,g)
1265 eor r0,r8,r8,ror#11
1266 add r7,r7,r2 @ h+=Ch(e,f,g)
1267#if 20==31
1268 and r12,r12,#0xff
1269 cmp r12,#0xf2 @ done?
1270#endif
1271#if 20<15
1272# if __ARM_ARCH__>=7
1273 ldr r2,[r1],#4 @ prefetch
1274# else
1275 ldrb r2,[r1,#3]
1276# endif
1277 eor r12,r8,r9 @ a^b, b^c in next round
1278#else
1279 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1280 eor r12,r8,r9 @ a^b, b^c in next round
1281 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1282#endif
1283 eor r0,r0,r8,ror#20 @ Sigma0(a)
1284 and r3,r3,r12 @ (b^c)&=(a^b)
1285 add r11,r11,r7 @ d+=h
1286 eor r3,r3,r9 @ Maj(a,b,c)
1287 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1288 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1289 @ ldr r2,[sp,#6*4] @ 21
1290 @ ldr r1,[sp,#3*4]
1291 mov r0,r2,ror#7
1292 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1293 mov r3,r1,ror#17
1294 eor r0,r0,r2,ror#18
1295 eor r3,r3,r1,ror#19
1296 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1297 ldr r2,[sp,#5*4]
1298 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1299 ldr r1,[sp,#14*4]
1300
1301 add r3,r3,r0
1302 eor r0,r11,r11,ror#5 @ from BODY_00_15
1303 add r2,r2,r3
1304 eor r0,r0,r11,ror#19 @ Sigma1(e)
1305 add r2,r2,r1 @ X[i]
1306 ldr r3,[r14],#4 @ *K256++
1307 add r6,r6,r2 @ h+=X[i]
1308 str r2,[sp,#5*4]
1309 eor r2,r4,r5
1310 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1311 and r2,r2,r11
1312 add r6,r6,r3 @ h+=K256[i]
1313 eor r2,r2,r5 @ Ch(e,f,g)
1314 eor r0,r7,r7,ror#11
1315 add r6,r6,r2 @ h+=Ch(e,f,g)
1316#if 21==31
1317 and r3,r3,#0xff
1318 cmp r3,#0xf2 @ done?
1319#endif
1320#if 21<15
1321# if __ARM_ARCH__>=7
1322 ldr r2,[r1],#4 @ prefetch
1323# else
1324 ldrb r2,[r1,#3]
1325# endif
1326 eor r3,r7,r8 @ a^b, b^c in next round
1327#else
1328 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1329 eor r3,r7,r8 @ a^b, b^c in next round
1330 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1331#endif
1332 eor r0,r0,r7,ror#20 @ Sigma0(a)
1333 and r12,r12,r3 @ (b^c)&=(a^b)
1334 add r10,r10,r6 @ d+=h
1335 eor r12,r12,r8 @ Maj(a,b,c)
1336 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1337 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1338 @ ldr r2,[sp,#7*4] @ 22
1339 @ ldr r1,[sp,#4*4]
1340 mov r0,r2,ror#7
1341 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1342 mov r12,r1,ror#17
1343 eor r0,r0,r2,ror#18
1344 eor r12,r12,r1,ror#19
1345 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1346 ldr r2,[sp,#6*4]
1347 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1348 ldr r1,[sp,#15*4]
1349
1350 add r12,r12,r0
1351 eor r0,r10,r10,ror#5 @ from BODY_00_15
1352 add r2,r2,r12
1353 eor r0,r0,r10,ror#19 @ Sigma1(e)
1354 add r2,r2,r1 @ X[i]
1355 ldr r12,[r14],#4 @ *K256++
1356 add r5,r5,r2 @ h+=X[i]
1357 str r2,[sp,#6*4]
1358 eor r2,r11,r4
1359 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1360 and r2,r2,r10
1361 add r5,r5,r12 @ h+=K256[i]
1362 eor r2,r2,r4 @ Ch(e,f,g)
1363 eor r0,r6,r6,ror#11
1364 add r5,r5,r2 @ h+=Ch(e,f,g)
1365#if 22==31
1366 and r12,r12,#0xff
1367 cmp r12,#0xf2 @ done?
1368#endif
1369#if 22<15
1370# if __ARM_ARCH__>=7
1371 ldr r2,[r1],#4 @ prefetch
1372# else
1373 ldrb r2,[r1,#3]
1374# endif
1375 eor r12,r6,r7 @ a^b, b^c in next round
1376#else
1377 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1378 eor r12,r6,r7 @ a^b, b^c in next round
1379 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1380#endif
1381 eor r0,r0,r6,ror#20 @ Sigma0(a)
1382 and r3,r3,r12 @ (b^c)&=(a^b)
1383 add r9,r9,r5 @ d+=h
1384 eor r3,r3,r7 @ Maj(a,b,c)
1385 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1386 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1387 @ ldr r2,[sp,#8*4] @ 23
1388 @ ldr r1,[sp,#5*4]
1389 mov r0,r2,ror#7
1390 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1391 mov r3,r1,ror#17
1392 eor r0,r0,r2,ror#18
1393 eor r3,r3,r1,ror#19
1394 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1395 ldr r2,[sp,#7*4]
1396 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1397 ldr r1,[sp,#0*4]
1398
1399 add r3,r3,r0
1400 eor r0,r9,r9,ror#5 @ from BODY_00_15
1401 add r2,r2,r3
1402 eor r0,r0,r9,ror#19 @ Sigma1(e)
1403 add r2,r2,r1 @ X[i]
1404 ldr r3,[r14],#4 @ *K256++
1405 add r4,r4,r2 @ h+=X[i]
1406 str r2,[sp,#7*4]
1407 eor r2,r10,r11
1408 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1409 and r2,r2,r9
1410 add r4,r4,r3 @ h+=K256[i]
1411 eor r2,r2,r11 @ Ch(e,f,g)
1412 eor r0,r5,r5,ror#11
1413 add r4,r4,r2 @ h+=Ch(e,f,g)
1414#if 23==31
1415 and r3,r3,#0xff
1416 cmp r3,#0xf2 @ done?
1417#endif
1418#if 23<15
1419# if __ARM_ARCH__>=7
1420 ldr r2,[r1],#4 @ prefetch
1421# else
1422 ldrb r2,[r1,#3]
1423# endif
1424 eor r3,r5,r6 @ a^b, b^c in next round
1425#else
1426 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1427 eor r3,r5,r6 @ a^b, b^c in next round
1428 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1429#endif
1430 eor r0,r0,r5,ror#20 @ Sigma0(a)
1431 and r12,r12,r3 @ (b^c)&=(a^b)
1432 add r8,r8,r4 @ d+=h
1433 eor r12,r12,r6 @ Maj(a,b,c)
1434 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1435 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1436 @ ldr r2,[sp,#9*4] @ 24
1437 @ ldr r1,[sp,#6*4]
1438 mov r0,r2,ror#7
1439 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1440 mov r12,r1,ror#17
1441 eor r0,r0,r2,ror#18
1442 eor r12,r12,r1,ror#19
1443 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1444 ldr r2,[sp,#8*4]
1445 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1446 ldr r1,[sp,#1*4]
1447
1448 add r12,r12,r0
1449 eor r0,r8,r8,ror#5 @ from BODY_00_15
1450 add r2,r2,r12
1451 eor r0,r0,r8,ror#19 @ Sigma1(e)
1452 add r2,r2,r1 @ X[i]
1453 ldr r12,[r14],#4 @ *K256++
1454 add r11,r11,r2 @ h+=X[i]
1455 str r2,[sp,#8*4]
1456 eor r2,r9,r10
1457 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1458 and r2,r2,r8
1459 add r11,r11,r12 @ h+=K256[i]
1460 eor r2,r2,r10 @ Ch(e,f,g)
1461 eor r0,r4,r4,ror#11
1462 add r11,r11,r2 @ h+=Ch(e,f,g)
1463#if 24==31
1464 and r12,r12,#0xff
1465 cmp r12,#0xf2 @ done?
1466#endif
1467#if 24<15
1468# if __ARM_ARCH__>=7
1469 ldr r2,[r1],#4 @ prefetch
1470# else
1471 ldrb r2,[r1,#3]
1472# endif
1473 eor r12,r4,r5 @ a^b, b^c in next round
1474#else
1475 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1476 eor r12,r4,r5 @ a^b, b^c in next round
1477 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1478#endif
1479 eor r0,r0,r4,ror#20 @ Sigma0(a)
1480 and r3,r3,r12 @ (b^c)&=(a^b)
1481 add r7,r7,r11 @ d+=h
1482 eor r3,r3,r5 @ Maj(a,b,c)
1483 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1484 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1485 @ ldr r2,[sp,#10*4] @ 25
1486 @ ldr r1,[sp,#7*4]
1487 mov r0,r2,ror#7
1488 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1489 mov r3,r1,ror#17
1490 eor r0,r0,r2,ror#18
1491 eor r3,r3,r1,ror#19
1492 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1493 ldr r2,[sp,#9*4]
1494 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1495 ldr r1,[sp,#2*4]
1496
1497 add r3,r3,r0
1498 eor r0,r7,r7,ror#5 @ from BODY_00_15
1499 add r2,r2,r3
1500 eor r0,r0,r7,ror#19 @ Sigma1(e)
1501 add r2,r2,r1 @ X[i]
1502 ldr r3,[r14],#4 @ *K256++
1503 add r10,r10,r2 @ h+=X[i]
1504 str r2,[sp,#9*4]
1505 eor r2,r8,r9
1506 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1507 and r2,r2,r7
1508 add r10,r10,r3 @ h+=K256[i]
1509 eor r2,r2,r9 @ Ch(e,f,g)
1510 eor r0,r11,r11,ror#11
1511 add r10,r10,r2 @ h+=Ch(e,f,g)
1512#if 25==31
1513 and r3,r3,#0xff
1514 cmp r3,#0xf2 @ done?
1515#endif
1516#if 25<15
1517# if __ARM_ARCH__>=7
1518 ldr r2,[r1],#4 @ prefetch
1519# else
1520 ldrb r2,[r1,#3]
1521# endif
1522 eor r3,r11,r4 @ a^b, b^c in next round
1523#else
1524 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1525 eor r3,r11,r4 @ a^b, b^c in next round
1526 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1527#endif
1528 eor r0,r0,r11,ror#20 @ Sigma0(a)
1529 and r12,r12,r3 @ (b^c)&=(a^b)
1530 add r6,r6,r10 @ d+=h
1531 eor r12,r12,r4 @ Maj(a,b,c)
1532 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1533 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1534 @ ldr r2,[sp,#11*4] @ 26
1535 @ ldr r1,[sp,#8*4]
1536 mov r0,r2,ror#7
1537 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1538 mov r12,r1,ror#17
1539 eor r0,r0,r2,ror#18
1540 eor r12,r12,r1,ror#19
1541 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1542 ldr r2,[sp,#10*4]
1543 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1544 ldr r1,[sp,#3*4]
1545
1546 add r12,r12,r0
1547 eor r0,r6,r6,ror#5 @ from BODY_00_15
1548 add r2,r2,r12
1549 eor r0,r0,r6,ror#19 @ Sigma1(e)
1550 add r2,r2,r1 @ X[i]
1551 ldr r12,[r14],#4 @ *K256++
1552 add r9,r9,r2 @ h+=X[i]
1553 str r2,[sp,#10*4]
1554 eor r2,r7,r8
1555 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1556 and r2,r2,r6
1557 add r9,r9,r12 @ h+=K256[i]
1558 eor r2,r2,r8 @ Ch(e,f,g)
1559 eor r0,r10,r10,ror#11
1560 add r9,r9,r2 @ h+=Ch(e,f,g)
1561#if 26==31
1562 and r12,r12,#0xff
1563 cmp r12,#0xf2 @ done?
1564#endif
1565#if 26<15
1566# if __ARM_ARCH__>=7
1567 ldr r2,[r1],#4 @ prefetch
1568# else
1569 ldrb r2,[r1,#3]
1570# endif
1571 eor r12,r10,r11 @ a^b, b^c in next round
1572#else
1573 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1574 eor r12,r10,r11 @ a^b, b^c in next round
1575 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1576#endif
1577 eor r0,r0,r10,ror#20 @ Sigma0(a)
1578 and r3,r3,r12 @ (b^c)&=(a^b)
1579 add r5,r5,r9 @ d+=h
1580 eor r3,r3,r11 @ Maj(a,b,c)
1581 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1582 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1583 @ ldr r2,[sp,#12*4] @ 27
1584 @ ldr r1,[sp,#9*4]
1585 mov r0,r2,ror#7
1586 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1587 mov r3,r1,ror#17
1588 eor r0,r0,r2,ror#18
1589 eor r3,r3,r1,ror#19
1590 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1591 ldr r2,[sp,#11*4]
1592 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1593 ldr r1,[sp,#4*4]
1594
1595 add r3,r3,r0
1596 eor r0,r5,r5,ror#5 @ from BODY_00_15
1597 add r2,r2,r3
1598 eor r0,r0,r5,ror#19 @ Sigma1(e)
1599 add r2,r2,r1 @ X[i]
1600 ldr r3,[r14],#4 @ *K256++
1601 add r8,r8,r2 @ h+=X[i]
1602 str r2,[sp,#11*4]
1603 eor r2,r6,r7
1604 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1605 and r2,r2,r5
1606 add r8,r8,r3 @ h+=K256[i]
1607 eor r2,r2,r7 @ Ch(e,f,g)
1608 eor r0,r9,r9,ror#11
1609 add r8,r8,r2 @ h+=Ch(e,f,g)
1610#if 27==31
1611 and r3,r3,#0xff
1612 cmp r3,#0xf2 @ done?
1613#endif
1614#if 27<15
1615# if __ARM_ARCH__>=7
1616 ldr r2,[r1],#4 @ prefetch
1617# else
1618 ldrb r2,[r1,#3]
1619# endif
1620 eor r3,r9,r10 @ a^b, b^c in next round
1621#else
1622 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1623 eor r3,r9,r10 @ a^b, b^c in next round
1624 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1625#endif
1626 eor r0,r0,r9,ror#20 @ Sigma0(a)
1627 and r12,r12,r3 @ (b^c)&=(a^b)
1628 add r4,r4,r8 @ d+=h
1629 eor r12,r12,r10 @ Maj(a,b,c)
1630 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1631 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1632 @ ldr r2,[sp,#13*4] @ 28
1633 @ ldr r1,[sp,#10*4]
1634 mov r0,r2,ror#7
1635 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1636 mov r12,r1,ror#17
1637 eor r0,r0,r2,ror#18
1638 eor r12,r12,r1,ror#19
1639 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1640 ldr r2,[sp,#12*4]
1641 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1642 ldr r1,[sp,#5*4]
1643
1644 add r12,r12,r0
1645 eor r0,r4,r4,ror#5 @ from BODY_00_15
1646 add r2,r2,r12
1647 eor r0,r0,r4,ror#19 @ Sigma1(e)
1648 add r2,r2,r1 @ X[i]
1649 ldr r12,[r14],#4 @ *K256++
1650 add r7,r7,r2 @ h+=X[i]
1651 str r2,[sp,#12*4]
1652 eor r2,r5,r6
1653 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1654 and r2,r2,r4
1655 add r7,r7,r12 @ h+=K256[i]
1656 eor r2,r2,r6 @ Ch(e,f,g)
1657 eor r0,r8,r8,ror#11
1658 add r7,r7,r2 @ h+=Ch(e,f,g)
1659#if 28==31
1660 and r12,r12,#0xff
1661 cmp r12,#0xf2 @ done?
1662#endif
1663#if 28<15
1664# if __ARM_ARCH__>=7
1665 ldr r2,[r1],#4 @ prefetch
1666# else
1667 ldrb r2,[r1,#3]
1668# endif
1669 eor r12,r8,r9 @ a^b, b^c in next round
1670#else
1671 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1672 eor r12,r8,r9 @ a^b, b^c in next round
1673 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1674#endif
1675 eor r0,r0,r8,ror#20 @ Sigma0(a)
1676 and r3,r3,r12 @ (b^c)&=(a^b)
1677 add r11,r11,r7 @ d+=h
1678 eor r3,r3,r9 @ Maj(a,b,c)
1679 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1680 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1681 @ ldr r2,[sp,#14*4] @ 29
1682 @ ldr r1,[sp,#11*4]
1683 mov r0,r2,ror#7
1684 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1685 mov r3,r1,ror#17
1686 eor r0,r0,r2,ror#18
1687 eor r3,r3,r1,ror#19
1688 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1689 ldr r2,[sp,#13*4]
1690 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1691 ldr r1,[sp,#6*4]
1692
1693 add r3,r3,r0
1694 eor r0,r11,r11,ror#5 @ from BODY_00_15
1695 add r2,r2,r3
1696 eor r0,r0,r11,ror#19 @ Sigma1(e)
1697 add r2,r2,r1 @ X[i]
1698 ldr r3,[r14],#4 @ *K256++
1699 add r6,r6,r2 @ h+=X[i]
1700 str r2,[sp,#13*4]
1701 eor r2,r4,r5
1702 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1703 and r2,r2,r11
1704 add r6,r6,r3 @ h+=K256[i]
1705 eor r2,r2,r5 @ Ch(e,f,g)
1706 eor r0,r7,r7,ror#11
1707 add r6,r6,r2 @ h+=Ch(e,f,g)
1708#if 29==31
1709 and r3,r3,#0xff
1710 cmp r3,#0xf2 @ done?
1711#endif
1712#if 29<15
1713# if __ARM_ARCH__>=7
1714 ldr r2,[r1],#4 @ prefetch
1715# else
1716 ldrb r2,[r1,#3]
1717# endif
1718 eor r3,r7,r8 @ a^b, b^c in next round
1719#else
1720 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1721 eor r3,r7,r8 @ a^b, b^c in next round
1722 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1723#endif
1724 eor r0,r0,r7,ror#20 @ Sigma0(a)
1725 and r12,r12,r3 @ (b^c)&=(a^b)
1726 add r10,r10,r6 @ d+=h
1727 eor r12,r12,r8 @ Maj(a,b,c)
1728 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1729 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1730 @ ldr r2,[sp,#15*4] @ 30
1731 @ ldr r1,[sp,#12*4]
1732 mov r0,r2,ror#7
1733 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1734 mov r12,r1,ror#17
1735 eor r0,r0,r2,ror#18
1736 eor r12,r12,r1,ror#19
1737 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1738 ldr r2,[sp,#14*4]
1739 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1740 ldr r1,[sp,#7*4]
1741
1742 add r12,r12,r0
1743 eor r0,r10,r10,ror#5 @ from BODY_00_15
1744 add r2,r2,r12
1745 eor r0,r0,r10,ror#19 @ Sigma1(e)
1746 add r2,r2,r1 @ X[i]
1747 ldr r12,[r14],#4 @ *K256++
1748 add r5,r5,r2 @ h+=X[i]
1749 str r2,[sp,#14*4]
1750 eor r2,r11,r4
1751 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1752 and r2,r2,r10
1753 add r5,r5,r12 @ h+=K256[i]
1754 eor r2,r2,r4 @ Ch(e,f,g)
1755 eor r0,r6,r6,ror#11
1756 add r5,r5,r2 @ h+=Ch(e,f,g)
1757#if 30==31
1758 and r12,r12,#0xff
1759 cmp r12,#0xf2 @ done?
1760#endif
1761#if 30<15
1762# if __ARM_ARCH__>=7
1763 ldr r2,[r1],#4 @ prefetch
1764# else
1765 ldrb r2,[r1,#3]
1766# endif
1767 eor r12,r6,r7 @ a^b, b^c in next round
1768#else
1769 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1770 eor r12,r6,r7 @ a^b, b^c in next round
1771 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1772#endif
1773 eor r0,r0,r6,ror#20 @ Sigma0(a)
1774 and r3,r3,r12 @ (b^c)&=(a^b)
1775 add r9,r9,r5 @ d+=h
1776 eor r3,r3,r7 @ Maj(a,b,c)
1777 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1778 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1779 @ ldr r2,[sp,#0*4] @ 31
1780 @ ldr r1,[sp,#13*4]
1781 mov r0,r2,ror#7
1782 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1783 mov r3,r1,ror#17
1784 eor r0,r0,r2,ror#18
1785 eor r3,r3,r1,ror#19
1786 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1787 ldr r2,[sp,#15*4]
1788 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1789 ldr r1,[sp,#8*4]
1790
1791 add r3,r3,r0
1792 eor r0,r9,r9,ror#5 @ from BODY_00_15
1793 add r2,r2,r3
1794 eor r0,r0,r9,ror#19 @ Sigma1(e)
1795 add r2,r2,r1 @ X[i]
1796 ldr r3,[r14],#4 @ *K256++
1797 add r4,r4,r2 @ h+=X[i]
1798 str r2,[sp,#15*4]
1799 eor r2,r10,r11
1800 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1801 and r2,r2,r9
1802 add r4,r4,r3 @ h+=K256[i]
1803 eor r2,r2,r11 @ Ch(e,f,g)
1804 eor r0,r5,r5,ror#11
1805 add r4,r4,r2 @ h+=Ch(e,f,g)
1806#if 31==31
1807 and r3,r3,#0xff
1808 cmp r3,#0xf2 @ done?
1809#endif
1810#if 31<15
1811# if __ARM_ARCH__>=7
1812 ldr r2,[r1],#4 @ prefetch
1813# else
1814 ldrb r2,[r1,#3]
1815# endif
1816 eor r3,r5,r6 @ a^b, b^c in next round
1817#else
1818 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1819 eor r3,r5,r6 @ a^b, b^c in next round
1820 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1821#endif
1822 eor r0,r0,r5,ror#20 @ Sigma0(a)
1823 and r12,r12,r3 @ (b^c)&=(a^b)
1824 add r8,r8,r4 @ d+=h
1825 eor r12,r12,r6 @ Maj(a,b,c)
1826 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1827 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1828#if __ARM_ARCH__>=7
1829 ite eq @ Thumb2 thing, sanity check in ARM
1830#endif
1831 ldreq r3,[sp,#16*4] @ pull ctx
1832 bne .Lrounds_16_xx
1833
1834 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1835 ldr r0,[r3,#0]
1836 ldr r2,[r3,#4]
1837 ldr r12,[r3,#8]
1838 add r4,r4,r0
1839 ldr r0,[r3,#12]
1840 add r5,r5,r2
1841 ldr r2,[r3,#16]
1842 add r6,r6,r12
1843 ldr r12,[r3,#20]
1844 add r7,r7,r0
1845 ldr r0,[r3,#24]
1846 add r8,r8,r2
1847 ldr r2,[r3,#28]
1848 add r9,r9,r12
1849 ldr r1,[sp,#17*4] @ pull inp
1850 ldr r12,[sp,#18*4] @ pull inp+len
1851 add r10,r10,r0
1852 add r11,r11,r2
1853 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1854 cmp r1,r12
1855 sub r14,r14,#256 @ rewind Ktbl
1856 bne .Loop
1857
1858 add sp,sp,#19*4 @ destroy frame
1859#if __ARM_ARCH__>=5
1860 ldmia sp!,{r4-r11,pc}
1861#else
1862 ldmia sp!,{r4-r11,lr}
1863 tst lr,#1
1864 moveq pc,lr @ be binary compatible with V4, yet
1865 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1866#endif
1867.size sha256_block_data_order,.-sha256_block_data_order
1868#if __ARM_MAX_ARCH__>=7
1869.arch armv7-a
1870.fpu neon
1871
1872.global sha256_block_data_order_neon
1873.type sha256_block_data_order_neon,%function
1874.align 4
1875sha256_block_data_order_neon:
1876.LNEON:
1877 stmdb sp!,{r4-r12,lr}
1878
1879 sub r11,sp,#16*4+16
1880 adrl r14,K256
1881 bic r11,r11,#15 @ align for 128-bit stores
1882 mov r12,sp
1883 mov sp,r11 @ alloca
1884 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1885
1886 vld1.8 {q0},[r1]!
1887 vld1.8 {q1},[r1]!
1888 vld1.8 {q2},[r1]!
1889 vld1.8 {q3},[r1]!
1890 vld1.32 {q8},[r14,:128]!
1891 vld1.32 {q9},[r14,:128]!
1892 vld1.32 {q10},[r14,:128]!
1893 vld1.32 {q11},[r14,:128]!
1894 vrev32.8 q0,q0 @ yes, even on
1895 str r0,[sp,#64]
1896 vrev32.8 q1,q1 @ big-endian
1897 str r1,[sp,#68]
1898 mov r1,sp
1899 vrev32.8 q2,q2
1900 str r2,[sp,#72]
1901 vrev32.8 q3,q3
1902 str r12,[sp,#76] @ save original sp
1903 vadd.i32 q8,q8,q0
1904 vadd.i32 q9,q9,q1
1905 vst1.32 {q8},[r1,:128]!
1906 vadd.i32 q10,q10,q2
1907 vst1.32 {q9},[r1,:128]!
1908 vadd.i32 q11,q11,q3
1909 vst1.32 {q10},[r1,:128]!
1910 vst1.32 {q11},[r1,:128]!
1911
1912 ldmia r0,{r4-r11}
1913 sub r1,r1,#64
1914 ldr r2,[sp,#0]
1915 eor r12,r12,r12
1916 eor r3,r5,r6
1917 b .L_00_48
1918
1919.align 4
1920.L_00_48:
1921 vext.8 q8,q0,q1,#4
1922 add r11,r11,r2
1923 eor r2,r9,r10
1924 eor r0,r8,r8,ror#5
1925 vext.8 q9,q2,q3,#4
1926 add r4,r4,r12
1927 and r2,r2,r8
1928 eor r12,r0,r8,ror#19
1929 vshr.u32 q10,q8,#7
1930 eor r0,r4,r4,ror#11
1931 eor r2,r2,r10
1932 vadd.i32 q0,q0,q9
1933 add r11,r11,r12,ror#6
1934 eor r12,r4,r5
1935 vshr.u32 q9,q8,#3
1936 eor r0,r0,r4,ror#20
1937 add r11,r11,r2
1938 vsli.32 q10,q8,#25
1939 ldr r2,[sp,#4]
1940 and r3,r3,r12
1941 vshr.u32 q11,q8,#18
1942 add r7,r7,r11
1943 add r11,r11,r0,ror#2
1944 eor r3,r3,r5
1945 veor q9,q9,q10
1946 add r10,r10,r2
1947 vsli.32 q11,q8,#14
1948 eor r2,r8,r9
1949 eor r0,r7,r7,ror#5
1950 vshr.u32 d24,d7,#17
1951 add r11,r11,r3
1952 and r2,r2,r7
1953 veor q9,q9,q11
1954 eor r3,r0,r7,ror#19
1955 eor r0,r11,r11,ror#11
1956 vsli.32 d24,d7,#15
1957 eor r2,r2,r9
1958 add r10,r10,r3,ror#6
1959 vshr.u32 d25,d7,#10
1960 eor r3,r11,r4
1961 eor r0,r0,r11,ror#20
1962 vadd.i32 q0,q0,q9
1963 add r10,r10,r2
1964 ldr r2,[sp,#8]
1965 veor d25,d25,d24
1966 and r12,r12,r3
1967 add r6,r6,r10
1968 vshr.u32 d24,d7,#19
1969 add r10,r10,r0,ror#2
1970 eor r12,r12,r4
1971 vsli.32 d24,d7,#13
1972 add r9,r9,r2
1973 eor r2,r7,r8
1974 veor d25,d25,d24
1975 eor r0,r6,r6,ror#5
1976 add r10,r10,r12
1977 vadd.i32 d0,d0,d25
1978 and r2,r2,r6
1979 eor r12,r0,r6,ror#19
1980 vshr.u32 d24,d0,#17
1981 eor r0,r10,r10,ror#11
1982 eor r2,r2,r8
1983 vsli.32 d24,d0,#15
1984 add r9,r9,r12,ror#6
1985 eor r12,r10,r11
1986 vshr.u32 d25,d0,#10
1987 eor r0,r0,r10,ror#20
1988 add r9,r9,r2
1989 veor d25,d25,d24
1990 ldr r2,[sp,#12]
1991 and r3,r3,r12
1992 vshr.u32 d24,d0,#19
1993 add r5,r5,r9
1994 add r9,r9,r0,ror#2
1995 eor r3,r3,r11
1996 vld1.32 {q8},[r14,:128]!
1997 add r8,r8,r2
1998 vsli.32 d24,d0,#13
1999 eor r2,r6,r7
2000 eor r0,r5,r5,ror#5
2001 veor d25,d25,d24
2002 add r9,r9,r3
2003 and r2,r2,r5
2004 vadd.i32 d1,d1,d25
2005 eor r3,r0,r5,ror#19
2006 eor r0,r9,r9,ror#11
2007 vadd.i32 q8,q8,q0
2008 eor r2,r2,r7
2009 add r8,r8,r3,ror#6
2010 eor r3,r9,r10
2011 eor r0,r0,r9,ror#20
2012 add r8,r8,r2
2013 ldr r2,[sp,#16]
2014 and r12,r12,r3
2015 add r4,r4,r8
2016 vst1.32 {q8},[r1,:128]!
2017 add r8,r8,r0,ror#2
2018 eor r12,r12,r10
2019 vext.8 q8,q1,q2,#4
2020 add r7,r7,r2
2021 eor r2,r5,r6
2022 eor r0,r4,r4,ror#5
2023 vext.8 q9,q3,q0,#4
2024 add r8,r8,r12
2025 and r2,r2,r4
2026 eor r12,r0,r4,ror#19
2027 vshr.u32 q10,q8,#7
2028 eor r0,r8,r8,ror#11
2029 eor r2,r2,r6
2030 vadd.i32 q1,q1,q9
2031 add r7,r7,r12,ror#6
2032 eor r12,r8,r9
2033 vshr.u32 q9,q8,#3
2034 eor r0,r0,r8,ror#20
2035 add r7,r7,r2
2036 vsli.32 q10,q8,#25
2037 ldr r2,[sp,#20]
2038 and r3,r3,r12
2039 vshr.u32 q11,q8,#18
2040 add r11,r11,r7
2041 add r7,r7,r0,ror#2
2042 eor r3,r3,r9
2043 veor q9,q9,q10
2044 add r6,r6,r2
2045 vsli.32 q11,q8,#14
2046 eor r2,r4,r5
2047 eor r0,r11,r11,ror#5
2048 vshr.u32 d24,d1,#17
2049 add r7,r7,r3
2050 and r2,r2,r11
2051 veor q9,q9,q11
2052 eor r3,r0,r11,ror#19
2053 eor r0,r7,r7,ror#11
2054 vsli.32 d24,d1,#15
2055 eor r2,r2,r5
2056 add r6,r6,r3,ror#6
2057 vshr.u32 d25,d1,#10
2058 eor r3,r7,r8
2059 eor r0,r0,r7,ror#20
2060 vadd.i32 q1,q1,q9
2061 add r6,r6,r2
2062 ldr r2,[sp,#24]
2063 veor d25,d25,d24
2064 and r12,r12,r3
2065 add r10,r10,r6
2066 vshr.u32 d24,d1,#19
2067 add r6,r6,r0,ror#2
2068 eor r12,r12,r8
2069 vsli.32 d24,d1,#13
2070 add r5,r5,r2
2071 eor r2,r11,r4
2072 veor d25,d25,d24
2073 eor r0,r10,r10,ror#5
2074 add r6,r6,r12
2075 vadd.i32 d2,d2,d25
2076 and r2,r2,r10
2077 eor r12,r0,r10,ror#19
2078 vshr.u32 d24,d2,#17
2079 eor r0,r6,r6,ror#11
2080 eor r2,r2,r4
2081 vsli.32 d24,d2,#15
2082 add r5,r5,r12,ror#6
2083 eor r12,r6,r7
2084 vshr.u32 d25,d2,#10
2085 eor r0,r0,r6,ror#20
2086 add r5,r5,r2
2087 veor d25,d25,d24
2088 ldr r2,[sp,#28]
2089 and r3,r3,r12
2090 vshr.u32 d24,d2,#19
2091 add r9,r9,r5
2092 add r5,r5,r0,ror#2
2093 eor r3,r3,r7
2094 vld1.32 {q8},[r14,:128]!
2095 add r4,r4,r2
2096 vsli.32 d24,d2,#13
2097 eor r2,r10,r11
2098 eor r0,r9,r9,ror#5
2099 veor d25,d25,d24
2100 add r5,r5,r3
2101 and r2,r2,r9
2102 vadd.i32 d3,d3,d25
2103 eor r3,r0,r9,ror#19
2104 eor r0,r5,r5,ror#11
2105 vadd.i32 q8,q8,q1
2106 eor r2,r2,r11
2107 add r4,r4,r3,ror#6
2108 eor r3,r5,r6
2109 eor r0,r0,r5,ror#20
2110 add r4,r4,r2
2111 ldr r2,[sp,#32]
2112 and r12,r12,r3
2113 add r8,r8,r4
2114 vst1.32 {q8},[r1,:128]!
2115 add r4,r4,r0,ror#2
2116 eor r12,r12,r6
2117 vext.8 q8,q2,q3,#4
2118 add r11,r11,r2
2119 eor r2,r9,r10
2120 eor r0,r8,r8,ror#5
2121 vext.8 q9,q0,q1,#4
2122 add r4,r4,r12
2123 and r2,r2,r8
2124 eor r12,r0,r8,ror#19
2125 vshr.u32 q10,q8,#7
2126 eor r0,r4,r4,ror#11
2127 eor r2,r2,r10
2128 vadd.i32 q2,q2,q9
2129 add r11,r11,r12,ror#6
2130 eor r12,r4,r5
2131 vshr.u32 q9,q8,#3
2132 eor r0,r0,r4,ror#20
2133 add r11,r11,r2
2134 vsli.32 q10,q8,#25
2135 ldr r2,[sp,#36]
2136 and r3,r3,r12
2137 vshr.u32 q11,q8,#18
2138 add r7,r7,r11
2139 add r11,r11,r0,ror#2
2140 eor r3,r3,r5
2141 veor q9,q9,q10
2142 add r10,r10,r2
2143 vsli.32 q11,q8,#14
2144 eor r2,r8,r9
2145 eor r0,r7,r7,ror#5
2146 vshr.u32 d24,d3,#17
2147 add r11,r11,r3
2148 and r2,r2,r7
2149 veor q9,q9,q11
2150 eor r3,r0,r7,ror#19
2151 eor r0,r11,r11,ror#11
2152 vsli.32 d24,d3,#15
2153 eor r2,r2,r9
2154 add r10,r10,r3,ror#6
2155 vshr.u32 d25,d3,#10
2156 eor r3,r11,r4
2157 eor r0,r0,r11,ror#20
2158 vadd.i32 q2,q2,q9
2159 add r10,r10,r2
2160 ldr r2,[sp,#40]
2161 veor d25,d25,d24
2162 and r12,r12,r3
2163 add r6,r6,r10
2164 vshr.u32 d24,d3,#19
2165 add r10,r10,r0,ror#2
2166 eor r12,r12,r4
2167 vsli.32 d24,d3,#13
2168 add r9,r9,r2
2169 eor r2,r7,r8
2170 veor d25,d25,d24
2171 eor r0,r6,r6,ror#5
2172 add r10,r10,r12
2173 vadd.i32 d4,d4,d25
2174 and r2,r2,r6
2175 eor r12,r0,r6,ror#19
2176 vshr.u32 d24,d4,#17
2177 eor r0,r10,r10,ror#11
2178 eor r2,r2,r8
2179 vsli.32 d24,d4,#15
2180 add r9,r9,r12,ror#6
2181 eor r12,r10,r11
2182 vshr.u32 d25,d4,#10
2183 eor r0,r0,r10,ror#20
2184 add r9,r9,r2
2185 veor d25,d25,d24
2186 ldr r2,[sp,#44]
2187 and r3,r3,r12
2188 vshr.u32 d24,d4,#19
2189 add r5,r5,r9
2190 add r9,r9,r0,ror#2
2191 eor r3,r3,r11
2192 vld1.32 {q8},[r14,:128]!
2193 add r8,r8,r2
2194 vsli.32 d24,d4,#13
2195 eor r2,r6,r7
2196 eor r0,r5,r5,ror#5
2197 veor d25,d25,d24
2198 add r9,r9,r3
2199 and r2,r2,r5
2200 vadd.i32 d5,d5,d25
2201 eor r3,r0,r5,ror#19
2202 eor r0,r9,r9,ror#11
2203 vadd.i32 q8,q8,q2
2204 eor r2,r2,r7
2205 add r8,r8,r3,ror#6
2206 eor r3,r9,r10
2207 eor r0,r0,r9,ror#20
2208 add r8,r8,r2
2209 ldr r2,[sp,#48]
2210 and r12,r12,r3
2211 add r4,r4,r8
2212 vst1.32 {q8},[r1,:128]!
2213 add r8,r8,r0,ror#2
2214 eor r12,r12,r10
2215 vext.8 q8,q3,q0,#4
2216 add r7,r7,r2
2217 eor r2,r5,r6
2218 eor r0,r4,r4,ror#5
2219 vext.8 q9,q1,q2,#4
2220 add r8,r8,r12
2221 and r2,r2,r4
2222 eor r12,r0,r4,ror#19
2223 vshr.u32 q10,q8,#7
2224 eor r0,r8,r8,ror#11
2225 eor r2,r2,r6
2226 vadd.i32 q3,q3,q9
2227 add r7,r7,r12,ror#6
2228 eor r12,r8,r9
2229 vshr.u32 q9,q8,#3
2230 eor r0,r0,r8,ror#20
2231 add r7,r7,r2
2232 vsli.32 q10,q8,#25
2233 ldr r2,[sp,#52]
2234 and r3,r3,r12
2235 vshr.u32 q11,q8,#18
2236 add r11,r11,r7
2237 add r7,r7,r0,ror#2
2238 eor r3,r3,r9
2239 veor q9,q9,q10
2240 add r6,r6,r2
2241 vsli.32 q11,q8,#14
2242 eor r2,r4,r5
2243 eor r0,r11,r11,ror#5
2244 vshr.u32 d24,d5,#17
2245 add r7,r7,r3
2246 and r2,r2,r11
2247 veor q9,q9,q11
2248 eor r3,r0,r11,ror#19
2249 eor r0,r7,r7,ror#11
2250 vsli.32 d24,d5,#15
2251 eor r2,r2,r5
2252 add r6,r6,r3,ror#6
2253 vshr.u32 d25,d5,#10
2254 eor r3,r7,r8
2255 eor r0,r0,r7,ror#20
2256 vadd.i32 q3,q3,q9
2257 add r6,r6,r2
2258 ldr r2,[sp,#56]
2259 veor d25,d25,d24
2260 and r12,r12,r3
2261 add r10,r10,r6
2262 vshr.u32 d24,d5,#19
2263 add r6,r6,r0,ror#2
2264 eor r12,r12,r8
2265 vsli.32 d24,d5,#13
2266 add r5,r5,r2
2267 eor r2,r11,r4
2268 veor d25,d25,d24
2269 eor r0,r10,r10,ror#5
2270 add r6,r6,r12
2271 vadd.i32 d6,d6,d25
2272 and r2,r2,r10
2273 eor r12,r0,r10,ror#19
2274 vshr.u32 d24,d6,#17
2275 eor r0,r6,r6,ror#11
2276 eor r2,r2,r4
2277 vsli.32 d24,d6,#15
2278 add r5,r5,r12,ror#6
2279 eor r12,r6,r7
2280 vshr.u32 d25,d6,#10
2281 eor r0,r0,r6,ror#20
2282 add r5,r5,r2
2283 veor d25,d25,d24
2284 ldr r2,[sp,#60]
2285 and r3,r3,r12
2286 vshr.u32 d24,d6,#19
2287 add r9,r9,r5
2288 add r5,r5,r0,ror#2
2289 eor r3,r3,r7
2290 vld1.32 {q8},[r14,:128]!
2291 add r4,r4,r2
2292 vsli.32 d24,d6,#13
2293 eor r2,r10,r11
2294 eor r0,r9,r9,ror#5
2295 veor d25,d25,d24
2296 add r5,r5,r3
2297 and r2,r2,r9
2298 vadd.i32 d7,d7,d25
2299 eor r3,r0,r9,ror#19
2300 eor r0,r5,r5,ror#11
2301 vadd.i32 q8,q8,q3
2302 eor r2,r2,r11
2303 add r4,r4,r3,ror#6
2304 eor r3,r5,r6
2305 eor r0,r0,r5,ror#20
2306 add r4,r4,r2
2307 ldr r2,[r14]
2308 and r12,r12,r3
2309 add r8,r8,r4
2310 vst1.32 {q8},[r1,:128]!
2311 add r4,r4,r0,ror#2
2312 eor r12,r12,r6
2313 teq r2,#0 @ check for K256 terminator
2314 ldr r2,[sp,#0]
2315 sub r1,r1,#64
2316 bne .L_00_48
2317
2318 ldr r1,[sp,#68]
2319 ldr r0,[sp,#72]
2320 sub r14,r14,#256 @ rewind r14
2321 teq r1,r0
2322 it eq
2323 subeq r1,r1,#64 @ avoid SEGV
2324 vld1.8 {q0},[r1]! @ load next input block
2325 vld1.8 {q1},[r1]!
2326 vld1.8 {q2},[r1]!
2327 vld1.8 {q3},[r1]!
2328 it ne
2329 strne r1,[sp,#68]
2330 mov r1,sp
2331 add r11,r11,r2
2332 eor r2,r9,r10
2333 eor r0,r8,r8,ror#5
2334 add r4,r4,r12
2335 vld1.32 {q8},[r14,:128]!
2336 and r2,r2,r8
2337 eor r12,r0,r8,ror#19
2338 eor r0,r4,r4,ror#11
2339 eor r2,r2,r10
2340 vrev32.8 q0,q0
2341 add r11,r11,r12,ror#6
2342 eor r12,r4,r5
2343 eor r0,r0,r4,ror#20
2344 add r11,r11,r2
2345 vadd.i32 q8,q8,q0
2346 ldr r2,[sp,#4]
2347 and r3,r3,r12
2348 add r7,r7,r11
2349 add r11,r11,r0,ror#2
2350 eor r3,r3,r5
2351 add r10,r10,r2
2352 eor r2,r8,r9
2353 eor r0,r7,r7,ror#5
2354 add r11,r11,r3
2355 and r2,r2,r7
2356 eor r3,r0,r7,ror#19
2357 eor r0,r11,r11,ror#11
2358 eor r2,r2,r9
2359 add r10,r10,r3,ror#6
2360 eor r3,r11,r4
2361 eor r0,r0,r11,ror#20
2362 add r10,r10,r2
2363 ldr r2,[sp,#8]
2364 and r12,r12,r3
2365 add r6,r6,r10
2366 add r10,r10,r0,ror#2
2367 eor r12,r12,r4
2368 add r9,r9,r2
2369 eor r2,r7,r8
2370 eor r0,r6,r6,ror#5
2371 add r10,r10,r12
2372 and r2,r2,r6
2373 eor r12,r0,r6,ror#19
2374 eor r0,r10,r10,ror#11
2375 eor r2,r2,r8
2376 add r9,r9,r12,ror#6
2377 eor r12,r10,r11
2378 eor r0,r0,r10,ror#20
2379 add r9,r9,r2
2380 ldr r2,[sp,#12]
2381 and r3,r3,r12
2382 add r5,r5,r9
2383 add r9,r9,r0,ror#2
2384 eor r3,r3,r11
2385 add r8,r8,r2
2386 eor r2,r6,r7
2387 eor r0,r5,r5,ror#5
2388 add r9,r9,r3
2389 and r2,r2,r5
2390 eor r3,r0,r5,ror#19
2391 eor r0,r9,r9,ror#11
2392 eor r2,r2,r7
2393 add r8,r8,r3,ror#6
2394 eor r3,r9,r10
2395 eor r0,r0,r9,ror#20
2396 add r8,r8,r2
2397 ldr r2,[sp,#16]
2398 and r12,r12,r3
2399 add r4,r4,r8
2400 add r8,r8,r0,ror#2
2401 eor r12,r12,r10
2402 vst1.32 {q8},[r1,:128]!
2403 add r7,r7,r2
2404 eor r2,r5,r6
2405 eor r0,r4,r4,ror#5
2406 add r8,r8,r12
2407 vld1.32 {q8},[r14,:128]!
2408 and r2,r2,r4
2409 eor r12,r0,r4,ror#19
2410 eor r0,r8,r8,ror#11
2411 eor r2,r2,r6
2412 vrev32.8 q1,q1
2413 add r7,r7,r12,ror#6
2414 eor r12,r8,r9
2415 eor r0,r0,r8,ror#20
2416 add r7,r7,r2
2417 vadd.i32 q8,q8,q1
2418 ldr r2,[sp,#20]
2419 and r3,r3,r12
2420 add r11,r11,r7
2421 add r7,r7,r0,ror#2
2422 eor r3,r3,r9
2423 add r6,r6,r2
2424 eor r2,r4,r5
2425 eor r0,r11,r11,ror#5
2426 add r7,r7,r3
2427 and r2,r2,r11
2428 eor r3,r0,r11,ror#19
2429 eor r0,r7,r7,ror#11
2430 eor r2,r2,r5
2431 add r6,r6,r3,ror#6
2432 eor r3,r7,r8
2433 eor r0,r0,r7,ror#20
2434 add r6,r6,r2
2435 ldr r2,[sp,#24]
2436 and r12,r12,r3
2437 add r10,r10,r6
2438 add r6,r6,r0,ror#2
2439 eor r12,r12,r8
2440 add r5,r5,r2
2441 eor r2,r11,r4
2442 eor r0,r10,r10,ror#5
2443 add r6,r6,r12
2444 and r2,r2,r10
2445 eor r12,r0,r10,ror#19
2446 eor r0,r6,r6,ror#11
2447 eor r2,r2,r4
2448 add r5,r5,r12,ror#6
2449 eor r12,r6,r7
2450 eor r0,r0,r6,ror#20
2451 add r5,r5,r2
2452 ldr r2,[sp,#28]
2453 and r3,r3,r12
2454 add r9,r9,r5
2455 add r5,r5,r0,ror#2
2456 eor r3,r3,r7
2457 add r4,r4,r2
2458 eor r2,r10,r11
2459 eor r0,r9,r9,ror#5
2460 add r5,r5,r3
2461 and r2,r2,r9
2462 eor r3,r0,r9,ror#19
2463 eor r0,r5,r5,ror#11
2464 eor r2,r2,r11
2465 add r4,r4,r3,ror#6
2466 eor r3,r5,r6
2467 eor r0,r0,r5,ror#20
2468 add r4,r4,r2
2469 ldr r2,[sp,#32]
2470 and r12,r12,r3
2471 add r8,r8,r4
2472 add r4,r4,r0,ror#2
2473 eor r12,r12,r6
2474 vst1.32 {q8},[r1,:128]!
2475 add r11,r11,r2
2476 eor r2,r9,r10
2477 eor r0,r8,r8,ror#5
2478 add r4,r4,r12
2479 vld1.32 {q8},[r14,:128]!
2480 and r2,r2,r8
2481 eor r12,r0,r8,ror#19
2482 eor r0,r4,r4,ror#11
2483 eor r2,r2,r10
2484 vrev32.8 q2,q2
2485 add r11,r11,r12,ror#6
2486 eor r12,r4,r5
2487 eor r0,r0,r4,ror#20
2488 add r11,r11,r2
2489 vadd.i32 q8,q8,q2
2490 ldr r2,[sp,#36]
2491 and r3,r3,r12
2492 add r7,r7,r11
2493 add r11,r11,r0,ror#2
2494 eor r3,r3,r5
2495 add r10,r10,r2
2496 eor r2,r8,r9
2497 eor r0,r7,r7,ror#5
2498 add r11,r11,r3
2499 and r2,r2,r7
2500 eor r3,r0,r7,ror#19
2501 eor r0,r11,r11,ror#11
2502 eor r2,r2,r9
2503 add r10,r10,r3,ror#6
2504 eor r3,r11,r4
2505 eor r0,r0,r11,ror#20
2506 add r10,r10,r2
2507 ldr r2,[sp,#40]
2508 and r12,r12,r3
2509 add r6,r6,r10
2510 add r10,r10,r0,ror#2
2511 eor r12,r12,r4
2512 add r9,r9,r2
2513 eor r2,r7,r8
2514 eor r0,r6,r6,ror#5
2515 add r10,r10,r12
2516 and r2,r2,r6
2517 eor r12,r0,r6,ror#19
2518 eor r0,r10,r10,ror#11
2519 eor r2,r2,r8
2520 add r9,r9,r12,ror#6
2521 eor r12,r10,r11
2522 eor r0,r0,r10,ror#20
2523 add r9,r9,r2
2524 ldr r2,[sp,#44]
2525 and r3,r3,r12
2526 add r5,r5,r9
2527 add r9,r9,r0,ror#2
2528 eor r3,r3,r11
2529 add r8,r8,r2
2530 eor r2,r6,r7
2531 eor r0,r5,r5,ror#5
2532 add r9,r9,r3
2533 and r2,r2,r5
2534 eor r3,r0,r5,ror#19
2535 eor r0,r9,r9,ror#11
2536 eor r2,r2,r7
2537 add r8,r8,r3,ror#6
2538 eor r3,r9,r10
2539 eor r0,r0,r9,ror#20
2540 add r8,r8,r2
2541 ldr r2,[sp,#48]
2542 and r12,r12,r3
2543 add r4,r4,r8
2544 add r8,r8,r0,ror#2
2545 eor r12,r12,r10
2546 vst1.32 {q8},[r1,:128]!
2547 add r7,r7,r2
2548 eor r2,r5,r6
2549 eor r0,r4,r4,ror#5
2550 add r8,r8,r12
2551 vld1.32 {q8},[r14,:128]!
2552 and r2,r2,r4
2553 eor r12,r0,r4,ror#19
2554 eor r0,r8,r8,ror#11
2555 eor r2,r2,r6
2556 vrev32.8 q3,q3
2557 add r7,r7,r12,ror#6
2558 eor r12,r8,r9
2559 eor r0,r0,r8,ror#20
2560 add r7,r7,r2
2561 vadd.i32 q8,q8,q3
2562 ldr r2,[sp,#52]
2563 and r3,r3,r12
2564 add r11,r11,r7
2565 add r7,r7,r0,ror#2
2566 eor r3,r3,r9
2567 add r6,r6,r2
2568 eor r2,r4,r5
2569 eor r0,r11,r11,ror#5
2570 add r7,r7,r3
2571 and r2,r2,r11
2572 eor r3,r0,r11,ror#19
2573 eor r0,r7,r7,ror#11
2574 eor r2,r2,r5
2575 add r6,r6,r3,ror#6
2576 eor r3,r7,r8
2577 eor r0,r0,r7,ror#20
2578 add r6,r6,r2
2579 ldr r2,[sp,#56]
2580 and r12,r12,r3
2581 add r10,r10,r6
2582 add r6,r6,r0,ror#2
2583 eor r12,r12,r8
2584 add r5,r5,r2
2585 eor r2,r11,r4
2586 eor r0,r10,r10,ror#5
2587 add r6,r6,r12
2588 and r2,r2,r10
2589 eor r12,r0,r10,ror#19
2590 eor r0,r6,r6,ror#11
2591 eor r2,r2,r4
2592 add r5,r5,r12,ror#6
2593 eor r12,r6,r7
2594 eor r0,r0,r6,ror#20
2595 add r5,r5,r2
2596 ldr r2,[sp,#60]
2597 and r3,r3,r12
2598 add r9,r9,r5
2599 add r5,r5,r0,ror#2
2600 eor r3,r3,r7
2601 add r4,r4,r2
2602 eor r2,r10,r11
2603 eor r0,r9,r9,ror#5
2604 add r5,r5,r3
2605 and r2,r2,r9
2606 eor r3,r0,r9,ror#19
2607 eor r0,r5,r5,ror#11
2608 eor r2,r2,r11
2609 add r4,r4,r3,ror#6
2610 eor r3,r5,r6
2611 eor r0,r0,r5,ror#20
2612 add r4,r4,r2
2613 ldr r2,[sp,#64]
2614 and r12,r12,r3
2615 add r8,r8,r4
2616 add r4,r4,r0,ror#2
2617 eor r12,r12,r6
2618 vst1.32 {q8},[r1,:128]!
2619 ldr r0,[r2,#0]
2620 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2621 ldr r12,[r2,#4]
2622 ldr r3,[r2,#8]
2623 ldr r1,[r2,#12]
2624 add r4,r4,r0 @ accumulate
2625 ldr r0,[r2,#16]
2626 add r5,r5,r12
2627 ldr r12,[r2,#20]
2628 add r6,r6,r3
2629 ldr r3,[r2,#24]
2630 add r7,r7,r1
2631 ldr r1,[r2,#28]
2632 add r8,r8,r0
2633 str r4,[r2],#4
2634 add r9,r9,r12
2635 str r5,[r2],#4
2636 add r10,r10,r3
2637 str r6,[r2],#4
2638 add r11,r11,r1
2639 str r7,[r2],#4
2640 stmia r2,{r8-r11}
2641
2642 ittte ne
2643 movne r1,sp
2644 ldrne r2,[sp,#0]
2645 eorne r12,r12,r12
2646 ldreq sp,[sp,#76] @ restore original sp
2647 itt ne
2648 eorne r3,r5,r6
2649 bne .L_00_48
2650
2651 ldmia sp!,{r4-r12,pc}
2652.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2653#endif
2654#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2655
2656# ifdef __thumb2__
2657# define INST(a,b,c,d) .byte c,d|0xc,a,b
2658# else
2659# define INST(a,b,c,d) .byte a,b,c,d
2660# endif
2661
2662.type sha256_block_data_order_armv8,%function
2663.align 5
2664sha256_block_data_order_armv8:
2665.LARMv8:
2666 vld1.32 {q0,q1},[r0]
2667# ifdef __thumb2__
2668 adr r3,.LARMv8
2669 sub r3,r3,#.LARMv8-K256
2670# else
2671 adrl r3,K256
2672# endif
2673 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2674
2675.Loop_v8:
2676 vld1.8 {q8-q9},[r1]!
2677 vld1.8 {q10-q11},[r1]!
2678 vld1.32 {q12},[r3]!
2679 vrev32.8 q8,q8
2680 vrev32.8 q9,q9
2681 vrev32.8 q10,q10
2682 vrev32.8 q11,q11
2683 vmov q14,q0 @ offload
2684 vmov q15,q1
2685 teq r1,r2
2686 vld1.32 {q13},[r3]!
2687 vadd.i32 q12,q12,q8
2688 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2689 vmov q2,q0
2690 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2691 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2692 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2693 vld1.32 {q12},[r3]!
2694 vadd.i32 q13,q13,q9
2695 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2696 vmov q2,q0
2697 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2698 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2699 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2700 vld1.32 {q13},[r3]!
2701 vadd.i32 q12,q12,q10
2702 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2703 vmov q2,q0
2704 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2705 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2706 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2707 vld1.32 {q12},[r3]!
2708 vadd.i32 q13,q13,q11
2709 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2710 vmov q2,q0
2711 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2712 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2713 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2714 vld1.32 {q13},[r3]!
2715 vadd.i32 q12,q12,q8
2716 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2717 vmov q2,q0
2718 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2719 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2720 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2721 vld1.32 {q12},[r3]!
2722 vadd.i32 q13,q13,q9
2723 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2724 vmov q2,q0
2725 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2726 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2727 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2728 vld1.32 {q13},[r3]!
2729 vadd.i32 q12,q12,q10
2730 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2731 vmov q2,q0
2732 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2733 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2734 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2735 vld1.32 {q12},[r3]!
2736 vadd.i32 q13,q13,q11
2737 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2738 vmov q2,q0
2739 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2740 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2741 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2742 vld1.32 {q13},[r3]!
2743 vadd.i32 q12,q12,q8
2744 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2745 vmov q2,q0
2746 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2747 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2748 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2749 vld1.32 {q12},[r3]!
2750 vadd.i32 q13,q13,q9
2751 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2752 vmov q2,q0
2753 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2754 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2755 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2756 vld1.32 {q13},[r3]!
2757 vadd.i32 q12,q12,q10
2758 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2759 vmov q2,q0
2760 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2761 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2762 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2763 vld1.32 {q12},[r3]!
2764 vadd.i32 q13,q13,q11
2765 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2766 vmov q2,q0
2767 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2768 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2769 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2770 vld1.32 {q13},[r3]!
2771 vadd.i32 q12,q12,q8
2772 vmov q2,q0
2773 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2774 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2775
2776 vld1.32 {q12},[r3]!
2777 vadd.i32 q13,q13,q9
2778 vmov q2,q0
2779 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2780 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2781
2782 vld1.32 {q13},[r3]
2783 vadd.i32 q12,q12,q10
2784 sub r3,r3,#256-16 @ rewind
2785 vmov q2,q0
2786 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2787 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2788
2789 vadd.i32 q13,q13,q11
2790 vmov q2,q0
2791 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2792 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2793
2794 vadd.i32 q0,q0,q14
2795 vadd.i32 q1,q1,q15
2796 it ne
2797 bne .Loop_v8
2798
2799 vst1.32 {q0,q1},[r0]
2800
2801 bx lr @ bx lr
2802.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2803#endif
2804.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2805.align 2
2806#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2807.comm OPENSSL_armcap_P,4,4
2808#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..a84e869ef900
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,128 @@
1/*
2 * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
3 * using optimized ARM assembler and NEON instructions.
4 *
5 * Copyright © 2015 Google Inc.
6 *
7 * This file is based on sha256_ssse3_glue.c:
8 * Copyright (C) 2013 Intel Corporation
9 * Author: Tim Chen <tim.c.chen@linux.intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <crypto/internal/hash.h>
19#include <linux/crypto.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/cryptohash.h>
24#include <linux/types.h>
25#include <linux/string.h>
26#include <crypto/sha.h>
27#include <crypto/sha256_base.h>
28#include <asm/simd.h>
29#include <asm/neon.h>
30
31#include "sha256_glue.h"
32
33asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
34 unsigned int num_blks);
35
36int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{
39 /* make sure casting to sha256_block_fn() is safe */
40 BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
41
42 return sha256_base_do_update(desc, data, len,
43 (sha256_block_fn *)sha256_block_data_order);
44}
45EXPORT_SYMBOL(crypto_sha256_arm_update);
46
47static int sha256_final(struct shash_desc *desc, u8 *out)
48{
49 sha256_base_do_finalize(desc,
50 (sha256_block_fn *)sha256_block_data_order);
51 return sha256_base_finish(desc, out);
52}
53
54int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
55 unsigned int len, u8 *out)
56{
57 sha256_base_do_update(desc, data, len,
58 (sha256_block_fn *)sha256_block_data_order);
59 return sha256_final(desc, out);
60}
61EXPORT_SYMBOL(crypto_sha256_arm_finup);
62
63static struct shash_alg algs[] = { {
64 .digestsize = SHA256_DIGEST_SIZE,
65 .init = sha256_base_init,
66 .update = crypto_sha256_arm_update,
67 .final = sha256_final,
68 .finup = crypto_sha256_arm_finup,
69 .descsize = sizeof(struct sha256_state),
70 .base = {
71 .cra_name = "sha256",
72 .cra_driver_name = "sha256-asm",
73 .cra_priority = 150,
74 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
75 .cra_blocksize = SHA256_BLOCK_SIZE,
76 .cra_module = THIS_MODULE,
77 }
78}, {
79 .digestsize = SHA224_DIGEST_SIZE,
80 .init = sha224_base_init,
81 .update = crypto_sha256_arm_update,
82 .final = sha256_final,
83 .finup = crypto_sha256_arm_finup,
84 .descsize = sizeof(struct sha256_state),
85 .base = {
86 .cra_name = "sha224",
87 .cra_driver_name = "sha224-asm",
88 .cra_priority = 150,
89 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
90 .cra_blocksize = SHA224_BLOCK_SIZE,
91 .cra_module = THIS_MODULE,
92 }
93} };
94
95static int __init sha256_mod_init(void)
96{
97 int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
98
99 if (res < 0)
100 return res;
101
102 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
103 res = crypto_register_shashes(sha256_neon_algs,
104 ARRAY_SIZE(sha256_neon_algs));
105
106 if (res < 0)
107 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
108 }
109
110 return res;
111}
112
113static void __exit sha256_mod_fini(void)
114{
115 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
116
117 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
118 crypto_unregister_shashes(sha256_neon_algs,
119 ARRAY_SIZE(sha256_neon_algs));
120}
121
122module_init(sha256_mod_init);
123module_exit(sha256_mod_fini);
124
125MODULE_LICENSE("GPL");
126MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
127
128MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..7cf0bf786ada
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,14 @@
1#ifndef _CRYPTO_SHA256_GLUE_H
2#define _CRYPTO_SHA256_GLUE_H
3
4#include <linux/crypto.h>
5
6extern struct shash_alg sha256_neon_algs[2];
7
8int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
9 unsigned int len);
10
11int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
12 unsigned int len, u8 *hash);
13
14#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..39ccd658817e
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,101 @@
1/*
2 * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
3 * using NEON instructions.
4 *
5 * Copyright © 2015 Google Inc.
6 *
7 * This file is based on sha512_neon_glue.c:
8 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 */
16
17#include <crypto/internal/hash.h>
18#include <linux/cryptohash.h>
19#include <linux/types.h>
20#include <linux/string.h>
21#include <crypto/sha.h>
22#include <crypto/sha256_base.h>
23#include <asm/byteorder.h>
24#include <asm/simd.h>
25#include <asm/neon.h>
26
27#include "sha256_glue.h"
28
29asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
30 unsigned int num_blks);
31
32static int sha256_update(struct shash_desc *desc, const u8 *data,
33 unsigned int len)
34{
35 struct sha256_state *sctx = shash_desc_ctx(desc);
36
37 if (!may_use_simd() ||
38 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
39 return crypto_sha256_arm_update(desc, data, len);
40
41 kernel_neon_begin();
42 sha256_base_do_update(desc, data, len,
43 (sha256_block_fn *)sha256_block_data_order_neon);
44 kernel_neon_end();
45
46 return 0;
47}
48
49static int sha256_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
51{
52 if (!may_use_simd())
53 return crypto_sha256_arm_finup(desc, data, len, out);
54
55 kernel_neon_begin();
56 if (len)
57 sha256_base_do_update(desc, data, len,
58 (sha256_block_fn *)sha256_block_data_order_neon);
59 sha256_base_do_finalize(desc,
60 (sha256_block_fn *)sha256_block_data_order_neon);
61 kernel_neon_end();
62
63 return sha256_base_finish(desc, out);
64}
65
66static int sha256_final(struct shash_desc *desc, u8 *out)
67{
68 return sha256_finup(desc, NULL, 0, out);
69}
70
71struct shash_alg sha256_neon_algs[] = { {
72 .digestsize = SHA256_DIGEST_SIZE,
73 .init = sha256_base_init,
74 .update = sha256_update,
75 .final = sha256_final,
76 .finup = sha256_finup,
77 .descsize = sizeof(struct sha256_state),
78 .base = {
79 .cra_name = "sha256",
80 .cra_driver_name = "sha256-neon",
81 .cra_priority = 250,
82 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
83 .cra_blocksize = SHA256_BLOCK_SIZE,
84 .cra_module = THIS_MODULE,
85 }
86}, {
87 .digestsize = SHA224_DIGEST_SIZE,
88 .init = sha224_base_init,
89 .update = sha256_update,
90 .final = sha256_final,
91 .finup = sha256_finup,
92 .descsize = sizeof(struct sha256_state),
93 .base = {
94 .cra_name = "sha224",
95 .cra_driver_name = "sha224-neon",
96 .cra_priority = 250,
97 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
98 .cra_blocksize = SHA224_BLOCK_SIZE,
99 .cra_module = THIS_MODULE,
100 }
101} };
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
284 .cra_name = "__ecb-aes-" MODE, 284 .cra_name = "__ecb-aes-" MODE,
285 .cra_driver_name = "__driver-ecb-aes-" MODE, 285 .cra_driver_name = "__driver-ecb-aes-" MODE,
286 .cra_priority = 0, 286 .cra_priority = 0,
287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
288 CRYPTO_ALG_INTERNAL,
288 .cra_blocksize = AES_BLOCK_SIZE, 289 .cra_blocksize = AES_BLOCK_SIZE,
289 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 290 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
290 .cra_alignmask = 7, 291 .cra_alignmask = 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
302 .cra_name = "__cbc-aes-" MODE, 303 .cra_name = "__cbc-aes-" MODE,
303 .cra_driver_name = "__driver-cbc-aes-" MODE, 304 .cra_driver_name = "__driver-cbc-aes-" MODE,
304 .cra_priority = 0, 305 .cra_priority = 0,
305 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 306 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
307 CRYPTO_ALG_INTERNAL,
306 .cra_blocksize = AES_BLOCK_SIZE, 308 .cra_blocksize = AES_BLOCK_SIZE,
307 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 309 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
308 .cra_alignmask = 7, 310 .cra_alignmask = 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
320 .cra_name = "__ctr-aes-" MODE, 322 .cra_name = "__ctr-aes-" MODE,
321 .cra_driver_name = "__driver-ctr-aes-" MODE, 323 .cra_driver_name = "__driver-ctr-aes-" MODE,
322 .cra_priority = 0, 324 .cra_priority = 0,
323 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 325 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
326 CRYPTO_ALG_INTERNAL,
324 .cra_blocksize = 1, 327 .cra_blocksize = 1,
325 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 328 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
326 .cra_alignmask = 7, 329 .cra_alignmask = 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
338 .cra_name = "__xts-aes-" MODE, 341 .cra_name = "__xts-aes-" MODE,
339 .cra_driver_name = "__driver-xts-aes-" MODE, 342 .cra_driver_name = "__driver-xts-aes-" MODE,
340 .cra_priority = 0, 343 .cra_priority = 0,
341 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
345 CRYPTO_ALG_INTERNAL,
342 .cra_blocksize = AES_BLOCK_SIZE, 346 .cra_blocksize = AES_BLOCK_SIZE,
343 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), 347 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
344 .cra_alignmask = 7, 348 .cra_alignmask = 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67 67
68 /* 68 /*
69 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
70 * u8 *head, long bytes) 70 * int blocks)
71 */ 71 */
72ENTRY(sha1_ce_transform) 72ENTRY(sha1_ce_transform)
73 /* load round constants */ 73 /* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
78 ld1r {k3.4s}, [x6] 78 ld1r {k3.4s}, [x6]
79 79
80 /* load state */ 80 /* load state */
81 ldr dga, [x2] 81 ldr dga, [x0]
82 ldr dgb, [x2, #16] 82 ldr dgb, [x0, #16]
83 83
84 /* load partial state (if supplied) */ 84 /* load sha1_ce_state::finalize */
85 cbz x3, 0f 85 ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
86 ld1 {v8.4s-v11.4s}, [x3]
87 b 1f
88 86
89 /* load input */ 87 /* load input */
900: ld1 {v8.4s-v11.4s}, [x1], #64 880: ld1 {v8.4s-v11.4s}, [x1], #64
91 sub w0, w0, #1 89 sub w2, w2, #1
92 90
931:
94CPU_LE( rev32 v8.16b, v8.16b ) 91CPU_LE( rev32 v8.16b, v8.16b )
95CPU_LE( rev32 v9.16b, v9.16b ) 92CPU_LE( rev32 v9.16b, v9.16b )
96CPU_LE( rev32 v10.16b, v10.16b ) 93CPU_LE( rev32 v10.16b, v10.16b )
97CPU_LE( rev32 v11.16b, v11.16b ) 94CPU_LE( rev32 v11.16b, v11.16b )
98 95
992: add t0.4s, v8.4s, k0.4s 961: add t0.4s, v8.4s, k0.4s
100 mov dg0v.16b, dgav.16b 97 mov dg0v.16b, dgav.16b
101 98
102 add_update c, ev, k0, 8, 9, 10, 11, dgb 99 add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
127 add dgbv.2s, dgbv.2s, dg1v.2s 124 add dgbv.2s, dgbv.2s, dg1v.2s
128 add dgav.4s, dgav.4s, dg0v.4s 125 add dgav.4s, dgav.4s, dg0v.4s
129 126
130 cbnz w0, 0b 127 cbnz w2, 0b
131 128
132 /* 129 /*
133 * Final block: add padding and total bit count. 130 * Final block: add padding and total bit count.
134 * Skip if we have no total byte count in x4. In that case, the input 131 * Skip if the input size was not a round multiple of the block size,
135 * size was not a round multiple of the block size, and the padding is 132 * the padding is handled by the C code in that case.
136 * handled by the C code.
137 */ 133 */
138 cbz x4, 3f 134 cbz x4, 3f
135 ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
139 movi v9.2d, #0 136 movi v9.2d, #0
140 mov x8, #0x80000000 137 mov x8, #0x80000000
141 movi v10.2d, #0 138 movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
144 mov x4, #0 141 mov x4, #0
145 mov v11.d[0], xzr 142 mov v11.d[0], xzr
146 mov v11.d[1], x7 143 mov v11.d[1], x7
147 b 2b 144 b 1b
148 145
149 /* store new state */ 146 /* store new state */
1503: str dga, [x2] 1473: str dga, [x0]
151 str dgb, [x2, #16] 148 str dgb, [x0, #16]
152 ret 149 ret
153ENDPROC(sha1_ce_transform) 150ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha1_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha1_ce_state {
24 u8 *head, long bytes); 28 struct sha1_state sst;
29 u32 finalize;
30};
25 31
26static int sha1_init(struct shash_desc *desc) 32asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
27{ 33 int blocks);
28 struct sha1_state *sctx = shash_desc_ctx(desc);
29 34
30 *sctx = (struct sha1_state){ 35static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 36 unsigned int len)
32 };
33 return 0;
34}
35
36static int sha1_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{ 37{
39 struct sha1_state *sctx = shash_desc_ctx(desc); 38 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
40 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
41
42 sctx->count += len;
43
44 if ((partial + len) >= SHA1_BLOCK_SIZE) {
45 int blocks;
46
47 if (partial) {
48 int p = SHA1_BLOCK_SIZE - partial;
49 39
50 memcpy(sctx->buffer + partial, data, p); 40 sctx->finalize = 0;
51 data += p; 41 kernel_neon_begin_partial(16);
52 len -= p; 42 sha1_base_do_update(desc, data, len,
53 } 43 (sha1_block_fn *)sha1_ce_transform);
54 44 kernel_neon_end();
55 blocks = len / SHA1_BLOCK_SIZE;
56 len %= SHA1_BLOCK_SIZE;
57
58 kernel_neon_begin_partial(16);
59 sha1_ce_transform(blocks, data, sctx->state,
60 partial ? sctx->buffer : NULL, 0);
61 kernel_neon_end();
62 45
63 data += blocks * SHA1_BLOCK_SIZE;
64 partial = 0;
65 }
66 if (len)
67 memcpy(sctx->buffer + partial, data, len);
68 return 0; 46 return 0;
69} 47}
70 48
71static int sha1_final(struct shash_desc *desc, u8 *out) 49static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
72{ 51{
73 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; 52 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
53 bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
74 54
75 struct sha1_state *sctx = shash_desc_ctx(desc); 55 ASM_EXPORT(sha1_ce_offsetof_count,
76 __be64 bits = cpu_to_be64(sctx->count << 3); 56 offsetof(struct sha1_ce_state, sst.count));
77 __be32 *dst = (__be32 *)out; 57 ASM_EXPORT(sha1_ce_offsetof_finalize,
78 int i; 58 offsetof(struct sha1_ce_state, finalize));
79
80 u32 padlen = SHA1_BLOCK_SIZE
81 - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
82
83 sha1_update(desc, padding, padlen);
84 sha1_update(desc, (const u8 *)&bits, sizeof(bits));
85
86 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
87 put_unaligned_be32(sctx->state[i], dst++);
88
89 *sctx = (struct sha1_state){};
90 return 0;
91}
92
93static int sha1_finup(struct shash_desc *desc, const u8 *data,
94 unsigned int len, u8 *out)
95{
96 struct sha1_state *sctx = shash_desc_ctx(desc);
97 __be32 *dst = (__be32 *)out;
98 int blocks;
99 int i;
100
101 if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
102 sha1_update(desc, data, len);
103 return sha1_final(desc, out);
104 }
105 59
106 /* 60 /*
107 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
108 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
109 * perform the entire digest calculation in a single invocation
110 * of sha1_ce_transform()
111 */ 63 */
112 blocks = len / SHA1_BLOCK_SIZE; 64 sctx->finalize = finalize;
113 65
114 kernel_neon_begin_partial(16); 66 kernel_neon_begin_partial(16);
115 sha1_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha1_base_do_update(desc, data, len,
68 (sha1_block_fn *)sha1_ce_transform);
69 if (!finalize)
70 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
116 kernel_neon_end(); 71 kernel_neon_end();
117 72 return sha1_base_finish(desc, out);
118 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
119 put_unaligned_be32(sctx->state[i], dst++);
120
121 *sctx = (struct sha1_state){};
122 return 0;
123} 73}
124 74
125static int sha1_export(struct shash_desc *desc, void *out) 75static int sha1_ce_final(struct shash_desc *desc, u8 *out)
126{ 76{
127 struct sha1_state *sctx = shash_desc_ctx(desc); 77 kernel_neon_begin_partial(16);
128 struct sha1_state *dst = out; 78 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
129 79 kernel_neon_end();
130 *dst = *sctx; 80 return sha1_base_finish(desc, out);
131 return 0;
132}
133
134static int sha1_import(struct shash_desc *desc, const void *in)
135{
136 struct sha1_state *sctx = shash_desc_ctx(desc);
137 struct sha1_state const *src = in;
138
139 *sctx = *src;
140 return 0;
141} 81}
142 82
143static struct shash_alg alg = { 83static struct shash_alg alg = {
144 .init = sha1_init, 84 .init = sha1_base_init,
145 .update = sha1_update, 85 .update = sha1_ce_update,
146 .final = sha1_final, 86 .final = sha1_ce_final,
147 .finup = sha1_finup, 87 .finup = sha1_ce_finup,
148 .export = sha1_export, 88 .descsize = sizeof(struct sha1_ce_state),
149 .import = sha1_import,
150 .descsize = sizeof(struct sha1_state),
151 .digestsize = SHA1_DIGEST_SIZE, 89 .digestsize = SHA1_DIGEST_SIZE,
152 .statesize = sizeof(struct sha1_state),
153 .base = { 90 .base = {
154 .cra_name = "sha1", 91 .cra_name = "sha1",
155 .cra_driver_name = "sha1-ce", 92 .cra_driver_name = "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
74 74
75 /* 75 /*
76 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, 76 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
77 * u8 *head, long bytes) 77 * int blocks)
78 */ 78 */
79ENTRY(sha2_ce_transform) 79ENTRY(sha2_ce_transform)
80 /* load round constants */ 80 /* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
85 ld1 {v12.4s-v15.4s}, [x8] 85 ld1 {v12.4s-v15.4s}, [x8]
86 86
87 /* load state */ 87 /* load state */
88 ldp dga, dgb, [x2] 88 ldp dga, dgb, [x0]
89 89
90 /* load partial input (if supplied) */ 90 /* load sha256_ce_state::finalize */
91 cbz x3, 0f 91 ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
92 ld1 {v16.4s-v19.4s}, [x3]
93 b 1f
94 92
95 /* load input */ 93 /* load input */
960: ld1 {v16.4s-v19.4s}, [x1], #64 940: ld1 {v16.4s-v19.4s}, [x1], #64
97 sub w0, w0, #1 95 sub w2, w2, #1
98 96
991:
100CPU_LE( rev32 v16.16b, v16.16b ) 97CPU_LE( rev32 v16.16b, v16.16b )
101CPU_LE( rev32 v17.16b, v17.16b ) 98CPU_LE( rev32 v17.16b, v17.16b )
102CPU_LE( rev32 v18.16b, v18.16b ) 99CPU_LE( rev32 v18.16b, v18.16b )
103CPU_LE( rev32 v19.16b, v19.16b ) 100CPU_LE( rev32 v19.16b, v19.16b )
104 101
1052: add t0.4s, v16.4s, v0.4s 1021: add t0.4s, v16.4s, v0.4s
106 mov dg0v.16b, dgav.16b 103 mov dg0v.16b, dgav.16b
107 mov dg1v.16b, dgbv.16b 104 mov dg1v.16b, dgbv.16b
108 105
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
131 add dgbv.4s, dgbv.4s, dg1v.4s 128 add dgbv.4s, dgbv.4s, dg1v.4s
132 129
133 /* handled all input blocks? */ 130 /* handled all input blocks? */
134 cbnz w0, 0b 131 cbnz w2, 0b
135 132
136 /* 133 /*
137 * Final block: add padding and total bit count. 134 * Final block: add padding and total bit count.
138 * Skip if we have no total byte count in x4. In that case, the input 135 * Skip if the input size was not a round multiple of the block size,
139 * size was not a round multiple of the block size, and the padding is 136 * the padding is handled by the C code in that case.
140 * handled by the C code.
141 */ 137 */
142 cbz x4, 3f 138 cbz x4, 3f
139 ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
143 movi v17.2d, #0 140 movi v17.2d, #0
144 mov x8, #0x80000000 141 mov x8, #0x80000000
145 movi v18.2d, #0 142 movi v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
148 mov x4, #0 145 mov x4, #0
149 mov v19.d[0], xzr 146 mov v19.d[0], xzr
150 mov v19.d[1], x7 147 mov v19.d[1], x7
151 b 2b 148 b 1b
152 149
153 /* store new state */ 150 /* store new state */
1543: stp dga, dgb, [x2] 1513: stp dga, dgb, [x0]
155 ret 152 ret
156ENDPROC(sha2_ce_transform) 153ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha256_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha256_ce_state {
24 u8 *head, long bytes); 28 struct sha256_state sst;
25 29 u32 finalize;
26static int sha224_init(struct shash_desc *desc) 30};
27{
28 struct sha256_state *sctx = shash_desc_ctx(desc);
29
30 *sctx = (struct sha256_state){
31 .state = {
32 SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
33 SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
34 }
35 };
36 return 0;
37}
38
39static int sha256_init(struct shash_desc *desc)
40{
41 struct sha256_state *sctx = shash_desc_ctx(desc);
42
43 *sctx = (struct sha256_state){
44 .state = {
45 SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
46 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
47 }
48 };
49 return 0;
50}
51
52static int sha2_update(struct shash_desc *desc, const u8 *data,
53 unsigned int len)
54{
55 struct sha256_state *sctx = shash_desc_ctx(desc);
56 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
57
58 sctx->count += len;
59
60 if ((partial + len) >= SHA256_BLOCK_SIZE) {
61 int blocks;
62
63 if (partial) {
64 int p = SHA256_BLOCK_SIZE - partial;
65
66 memcpy(sctx->buf + partial, data, p);
67 data += p;
68 len -= p;
69 }
70 31
71 blocks = len / SHA256_BLOCK_SIZE; 32asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
72 len %= SHA256_BLOCK_SIZE; 33 int blocks);
73 34
74 kernel_neon_begin_partial(28); 35static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
75 sha2_ce_transform(blocks, data, sctx->state, 36 unsigned int len)
76 partial ? sctx->buf : NULL, 0);
77 kernel_neon_end();
78
79 data += blocks * SHA256_BLOCK_SIZE;
80 partial = 0;
81 }
82 if (len)
83 memcpy(sctx->buf + partial, data, len);
84 return 0;
85}
86
87static void sha2_final(struct shash_desc *desc)
88{ 37{
89 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 38 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
90
91 struct sha256_state *sctx = shash_desc_ctx(desc);
92 __be64 bits = cpu_to_be64(sctx->count << 3);
93 u32 padlen = SHA256_BLOCK_SIZE
94 - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
95
96 sha2_update(desc, padding, padlen);
97 sha2_update(desc, (const u8 *)&bits, sizeof(bits));
98}
99
100static int sha224_final(struct shash_desc *desc, u8 *out)
101{
102 struct sha256_state *sctx = shash_desc_ctx(desc);
103 __be32 *dst = (__be32 *)out;
104 int i;
105
106 sha2_final(desc);
107
108 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
109 put_unaligned_be32(sctx->state[i], dst++);
110
111 *sctx = (struct sha256_state){};
112 return 0;
113}
114 39
115static int sha256_final(struct shash_desc *desc, u8 *out) 40 sctx->finalize = 0;
116{ 41 kernel_neon_begin_partial(28);
117 struct sha256_state *sctx = shash_desc_ctx(desc); 42 sha256_base_do_update(desc, data, len,
118 __be32 *dst = (__be32 *)out; 43 (sha256_block_fn *)sha2_ce_transform);
119 int i; 44 kernel_neon_end();
120
121 sha2_final(desc);
122
123 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
124 put_unaligned_be32(sctx->state[i], dst++);
125 45
126 *sctx = (struct sha256_state){};
127 return 0; 46 return 0;
128} 47}
129 48
130static void sha2_finup(struct shash_desc *desc, const u8 *data, 49static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
131 unsigned int len) 50 unsigned int len, u8 *out)
132{ 51{
133 struct sha256_state *sctx = shash_desc_ctx(desc); 52 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
134 int blocks; 53 bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
135 54
136 if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { 55 ASM_EXPORT(sha256_ce_offsetof_count,
137 sha2_update(desc, data, len); 56 offsetof(struct sha256_ce_state, sst.count));
138 sha2_final(desc); 57 ASM_EXPORT(sha256_ce_offsetof_finalize,
139 return; 58 offsetof(struct sha256_ce_state, finalize));
140 }
141 59
142 /* 60 /*
143 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
144 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
145 * perform the entire digest calculation in a single invocation
146 * of sha2_ce_transform()
147 */ 63 */
148 blocks = len / SHA256_BLOCK_SIZE; 64 sctx->finalize = finalize;
149 65
150 kernel_neon_begin_partial(28); 66 kernel_neon_begin_partial(28);
151 sha2_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha256_base_do_update(desc, data, len,
68 (sha256_block_fn *)sha2_ce_transform);
69 if (!finalize)
70 sha256_base_do_finalize(desc,
71 (sha256_block_fn *)sha2_ce_transform);
152 kernel_neon_end(); 72 kernel_neon_end();
73 return sha256_base_finish(desc, out);
153} 74}
154 75
155static int sha224_finup(struct shash_desc *desc, const u8 *data, 76static int sha256_ce_final(struct shash_desc *desc, u8 *out)
156 unsigned int len, u8 *out)
157{ 77{
158 struct sha256_state *sctx = shash_desc_ctx(desc); 78 kernel_neon_begin_partial(28);
159 __be32 *dst = (__be32 *)out; 79 sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
160 int i; 80 kernel_neon_end();
161 81 return sha256_base_finish(desc, out);
162 sha2_finup(desc, data, len);
163
164 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
165 put_unaligned_be32(sctx->state[i], dst++);
166
167 *sctx = (struct sha256_state){};
168 return 0;
169}
170
171static int sha256_finup(struct shash_desc *desc, const u8 *data,
172 unsigned int len, u8 *out)
173{
174 struct sha256_state *sctx = shash_desc_ctx(desc);
175 __be32 *dst = (__be32 *)out;
176 int i;
177
178 sha2_finup(desc, data, len);
179
180 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
181 put_unaligned_be32(sctx->state[i], dst++);
182
183 *sctx = (struct sha256_state){};
184 return 0;
185}
186
187static int sha2_export(struct shash_desc *desc, void *out)
188{
189 struct sha256_state *sctx = shash_desc_ctx(desc);
190 struct sha256_state *dst = out;
191
192 *dst = *sctx;
193 return 0;
194}
195
196static int sha2_import(struct shash_desc *desc, const void *in)
197{
198 struct sha256_state *sctx = shash_desc_ctx(desc);
199 struct sha256_state const *src = in;
200
201 *sctx = *src;
202 return 0;
203} 82}
204 83
205static struct shash_alg algs[] = { { 84static struct shash_alg algs[] = { {
206 .init = sha224_init, 85 .init = sha224_base_init,
207 .update = sha2_update, 86 .update = sha256_ce_update,
208 .final = sha224_final, 87 .final = sha256_ce_final,
209 .finup = sha224_finup, 88 .finup = sha256_ce_finup,
210 .export = sha2_export, 89 .descsize = sizeof(struct sha256_ce_state),
211 .import = sha2_import,
212 .descsize = sizeof(struct sha256_state),
213 .digestsize = SHA224_DIGEST_SIZE, 90 .digestsize = SHA224_DIGEST_SIZE,
214 .statesize = sizeof(struct sha256_state),
215 .base = { 91 .base = {
216 .cra_name = "sha224", 92 .cra_name = "sha224",
217 .cra_driver_name = "sha224-ce", 93 .cra_driver_name = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
221 .cra_module = THIS_MODULE, 97 .cra_module = THIS_MODULE,
222 } 98 }
223}, { 99}, {
224 .init = sha256_init, 100 .init = sha256_base_init,
225 .update = sha2_update, 101 .update = sha256_ce_update,
226 .final = sha256_final, 102 .final = sha256_ce_final,
227 .finup = sha256_finup, 103 .finup = sha256_ce_finup,
228 .export = sha2_export, 104 .descsize = sizeof(struct sha256_ce_state),
229 .import = sha2_import,
230 .descsize = sizeof(struct sha256_state),
231 .digestsize = SHA256_DIGEST_SIZE, 105 .digestsize = SHA256_DIGEST_SIZE,
232 .statesize = sizeof(struct sha256_state),
233 .base = { 106 .base = {
234 .cra_name = "sha256", 107 .cra_name = "sha256",
235 .cra_driver_name = "sha256-ce", 108 .cra_driver_name = "sha256-ce",
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
index a74f76d85a2f..f7aa9d5d3b87 100644
--- a/arch/mips/cavium-octeon/crypto/Makefile
+++ b/arch/mips/cavium-octeon/crypto/Makefile
@@ -4,4 +4,7 @@
4 4
5obj-y += octeon-crypto.o 5obj-y += octeon-crypto.o
6 6
7obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o 7obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
8obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o
9obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o
10obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 7c82ff463b65..f66bd1adc7ff 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -17,7 +17,7 @@
17 * crypto operations in calls to octeon_crypto_enable/disable in order to make 17 * crypto operations in calls to octeon_crypto_enable/disable in order to make
18 * sure the state of COP2 isn't corrupted if userspace is also performing 18 * sure the state of COP2 isn't corrupted if userspace is also performing
19 * hardware crypto operations. Allocate the state parameter on the stack. 19 * hardware crypto operations. Allocate the state parameter on the stack.
20 * Preemption must be disabled to prevent context switches. 20 * Returns with preemption disabled.
21 * 21 *
22 * @state: Pointer to state structure to store current COP2 state in. 22 * @state: Pointer to state structure to store current COP2 state in.
23 * 23 *
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
28 int status; 28 int status;
29 unsigned long flags; 29 unsigned long flags;
30 30
31 preempt_disable();
31 local_irq_save(flags); 32 local_irq_save(flags);
32 status = read_c0_status(); 33 status = read_c0_status();
33 write_c0_status(status | ST0_CU2); 34 write_c0_status(status | ST0_CU2);
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state,
62 else 63 else
63 write_c0_status(read_c0_status() & ~ST0_CU2); 64 write_c0_status(read_c0_status() & ~ST0_CU2);
64 local_irq_restore(flags); 65 local_irq_restore(flags);
66 preempt_enable();
65} 67}
66EXPORT_SYMBOL_GPL(octeon_crypto_disable); 68EXPORT_SYMBOL_GPL(octeon_crypto_disable);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
index e2a4aece9c24..355072535110 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -5,7 +5,8 @@
5 * 5 *
6 * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. 6 * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
7 * 7 *
8 * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>. 8 * MD5/SHA1/SHA256/SHA512 instruction definitions added by
9 * Aaro Koskinen <aaro.koskinen@iki.fi>.
9 * 10 *
10 */ 11 */
11#ifndef __LINUX_OCTEON_CRYPTO_H 12#ifndef __LINUX_OCTEON_CRYPTO_H
@@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state,
21 unsigned long flags); 22 unsigned long flags);
22 23
23/* 24/*
24 * Macros needed to implement MD5: 25 * Macros needed to implement MD5/SHA1/SHA256:
25 */ 26 */
26 27
27/* 28/*
28 * The index can be 0-1. 29 * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
29 */ 30 */
30#define write_octeon_64bit_hash_dword(value, index) \ 31#define write_octeon_64bit_hash_dword(value, index) \
31do { \ 32do { \
@@ -36,7 +37,7 @@ do { \
36} while (0) 37} while (0)
37 38
38/* 39/*
39 * The index can be 0-1. 40 * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
40 */ 41 */
41#define read_octeon_64bit_hash_dword(index) \ 42#define read_octeon_64bit_hash_dword(index) \
42({ \ 43({ \
@@ -72,4 +73,78 @@ do { \
72 : [rt] "d" (value)); \ 73 : [rt] "d" (value)); \
73} while (0) 74} while (0)
74 75
76/*
77 * The value is the final block dword (64-bit).
78 */
79#define octeon_sha1_start(value) \
80do { \
81 __asm__ __volatile__ ( \
82 "dmtc2 %[rt],0x4057" \
83 : \
84 : [rt] "d" (value)); \
85} while (0)
86
87/*
88 * The value is the final block dword (64-bit).
89 */
90#define octeon_sha256_start(value) \
91do { \
92 __asm__ __volatile__ ( \
93 "dmtc2 %[rt],0x404f" \
94 : \
95 : [rt] "d" (value)); \
96} while (0)
97
98/*
99 * Macros needed to implement SHA512:
100 */
101
102/*
103 * The index can be 0-7.
104 */
105#define write_octeon_64bit_hash_sha512(value, index) \
106do { \
107 __asm__ __volatile__ ( \
108 "dmtc2 %[rt],0x0250+" STR(index) \
109 : \
110 : [rt] "d" (value)); \
111} while (0)
112
113/*
114 * The index can be 0-7.
115 */
116#define read_octeon_64bit_hash_sha512(index) \
117({ \
118 u64 __value; \
119 \
120 __asm__ __volatile__ ( \
121 "dmfc2 %[rt],0x0250+" STR(index) \
122 : [rt] "=d" (__value) \
123 : ); \
124 \
125 __value; \
126})
127
128/*
129 * The index can be 0-14.
130 */
131#define write_octeon_64bit_block_sha512(value, index) \
132do { \
133 __asm__ __volatile__ ( \
134 "dmtc2 %[rt],0x0240+" STR(index) \
135 : \
136 : [rt] "d" (value)); \
137} while (0)
138
139/*
140 * The value is the final block word (64-bit).
141 */
142#define octeon_sha512_start(value) \
143do { \
144 __asm__ __volatile__ ( \
145 "dmtc2 %[rt],0x424f" \
146 : \
147 : [rt] "d" (value)); \
148} while (0)
149
75#endif /* __LINUX_OCTEON_CRYPTO_H */ 150#endif /* __LINUX_OCTEON_CRYPTO_H */
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index b909881ba6c1..12dccdb38286 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
97 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, 97 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
98 avail); 98 avail);
99 99
100 local_bh_disable();
101 preempt_disable();
102 flags = octeon_crypto_enable(&state); 100 flags = octeon_crypto_enable(&state);
103 octeon_md5_store_hash(mctx); 101 octeon_md5_store_hash(mctx);
104 102
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
114 112
115 octeon_md5_read_hash(mctx); 113 octeon_md5_read_hash(mctx);
116 octeon_crypto_disable(&state, flags); 114 octeon_crypto_disable(&state, flags);
117 preempt_enable();
118 local_bh_enable();
119 115
120 memcpy(mctx->block, data, len); 116 memcpy(mctx->block, data, len);
121 117
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
133 129
134 *p++ = 0x80; 130 *p++ = 0x80;
135 131
136 local_bh_disable();
137 preempt_disable();
138 flags = octeon_crypto_enable(&state); 132 flags = octeon_crypto_enable(&state);
139 octeon_md5_store_hash(mctx); 133 octeon_md5_store_hash(mctx);
140 134
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
152 146
153 octeon_md5_read_hash(mctx); 147 octeon_md5_read_hash(mctx);
154 octeon_crypto_disable(&state, flags); 148 octeon_crypto_disable(&state, flags);
155 preempt_enable();
156 local_bh_enable();
157 149
158 memcpy(out, mctx->hash, sizeof(mctx->hash)); 150 memcpy(out, mctx->hash, sizeof(mctx->hash));
159 memset(mctx, 0, sizeof(*mctx)); 151 memset(mctx, 0, sizeof(*mctx));
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
new file mode 100644
index 000000000000..2b74b5b67cae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -0,0 +1,241 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA1 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha1_generic.c, which is:
9 *
10 * Copyright (c) Alan Smithee.
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 */
19
20#include <linux/mm.h>
21#include <crypto/sha.h>
22#include <linux/init.h>
23#include <linux/types.h>
24#include <linux/module.h>
25#include <asm/byteorder.h>
26#include <asm/octeon/octeon.h>
27#include <crypto/internal/hash.h>
28
29#include "octeon-crypto.h"
30
31/*
32 * We pass everything as 64-bit. OCTEON can handle misaligned data.
33 */
34
35static void octeon_sha1_store_hash(struct sha1_state *sctx)
36{
37 u64 *hash = (u64 *)sctx->state;
38 union {
39 u32 word[2];
40 u64 dword;
41 } hash_tail = { { sctx->state[4], } };
42
43 write_octeon_64bit_hash_dword(hash[0], 0);
44 write_octeon_64bit_hash_dword(hash[1], 1);
45 write_octeon_64bit_hash_dword(hash_tail.dword, 2);
46 memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0]));
47}
48
49static void octeon_sha1_read_hash(struct sha1_state *sctx)
50{
51 u64 *hash = (u64 *)sctx->state;
52 union {
53 u32 word[2];
54 u64 dword;
55 } hash_tail;
56
57 hash[0] = read_octeon_64bit_hash_dword(0);
58 hash[1] = read_octeon_64bit_hash_dword(1);
59 hash_tail.dword = read_octeon_64bit_hash_dword(2);
60 sctx->state[4] = hash_tail.word[0];
61 memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
62}
63
64static void octeon_sha1_transform(const void *_block)
65{
66 const u64 *block = _block;
67
68 write_octeon_64bit_block_dword(block[0], 0);
69 write_octeon_64bit_block_dword(block[1], 1);
70 write_octeon_64bit_block_dword(block[2], 2);
71 write_octeon_64bit_block_dword(block[3], 3);
72 write_octeon_64bit_block_dword(block[4], 4);
73 write_octeon_64bit_block_dword(block[5], 5);
74 write_octeon_64bit_block_dword(block[6], 6);
75 octeon_sha1_start(block[7]);
76}
77
78static int octeon_sha1_init(struct shash_desc *desc)
79{
80 struct sha1_state *sctx = shash_desc_ctx(desc);
81
82 sctx->state[0] = SHA1_H0;
83 sctx->state[1] = SHA1_H1;
84 sctx->state[2] = SHA1_H2;
85 sctx->state[3] = SHA1_H3;
86 sctx->state[4] = SHA1_H4;
87 sctx->count = 0;
88
89 return 0;
90}
91
92static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
93 unsigned int len)
94{
95 unsigned int partial;
96 unsigned int done;
97 const u8 *src;
98
99 partial = sctx->count % SHA1_BLOCK_SIZE;
100 sctx->count += len;
101 done = 0;
102 src = data;
103
104 if ((partial + len) >= SHA1_BLOCK_SIZE) {
105 if (partial) {
106 done = -partial;
107 memcpy(sctx->buffer + partial, data,
108 done + SHA1_BLOCK_SIZE);
109 src = sctx->buffer;
110 }
111
112 do {
113 octeon_sha1_transform(src);
114 done += SHA1_BLOCK_SIZE;
115 src = data + done;
116 } while (done + SHA1_BLOCK_SIZE <= len);
117
118 partial = 0;
119 }
120 memcpy(sctx->buffer + partial, src, len - done);
121}
122
123static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
124 unsigned int len)
125{
126 struct sha1_state *sctx = shash_desc_ctx(desc);
127 struct octeon_cop2_state state;
128 unsigned long flags;
129
130 /*
131 * Small updates never reach the crypto engine, so the generic sha1 is
132 * faster because of the heavyweight octeon_crypto_enable() /
133 * octeon_crypto_disable().
134 */
135 if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
136 return crypto_sha1_update(desc, data, len);
137
138 flags = octeon_crypto_enable(&state);
139 octeon_sha1_store_hash(sctx);
140
141 __octeon_sha1_update(sctx, data, len);
142
143 octeon_sha1_read_hash(sctx);
144 octeon_crypto_disable(&state, flags);
145
146 return 0;
147}
148
149static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
150{
151 struct sha1_state *sctx = shash_desc_ctx(desc);
152 static const u8 padding[64] = { 0x80, };
153 struct octeon_cop2_state state;
154 __be32 *dst = (__be32 *)out;
155 unsigned int pad_len;
156 unsigned long flags;
157 unsigned int index;
158 __be64 bits;
159 int i;
160
161 /* Save number of bits. */
162 bits = cpu_to_be64(sctx->count << 3);
163
164 /* Pad out to 56 mod 64. */
165 index = sctx->count & 0x3f;
166 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
167
168 flags = octeon_crypto_enable(&state);
169 octeon_sha1_store_hash(sctx);
170
171 __octeon_sha1_update(sctx, padding, pad_len);
172
173 /* Append length (before padding). */
174 __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
175
176 octeon_sha1_read_hash(sctx);
177 octeon_crypto_disable(&state, flags);
178
179 /* Store state in digest */
180 for (i = 0; i < 5; i++)
181 dst[i] = cpu_to_be32(sctx->state[i]);
182
183 /* Zeroize sensitive information. */
184 memset(sctx, 0, sizeof(*sctx));
185
186 return 0;
187}
188
189static int octeon_sha1_export(struct shash_desc *desc, void *out)
190{
191 struct sha1_state *sctx = shash_desc_ctx(desc);
192
193 memcpy(out, sctx, sizeof(*sctx));
194 return 0;
195}
196
197static int octeon_sha1_import(struct shash_desc *desc, const void *in)
198{
199 struct sha1_state *sctx = shash_desc_ctx(desc);
200
201 memcpy(sctx, in, sizeof(*sctx));
202 return 0;
203}
204
205static struct shash_alg octeon_sha1_alg = {
206 .digestsize = SHA1_DIGEST_SIZE,
207 .init = octeon_sha1_init,
208 .update = octeon_sha1_update,
209 .final = octeon_sha1_final,
210 .export = octeon_sha1_export,
211 .import = octeon_sha1_import,
212 .descsize = sizeof(struct sha1_state),
213 .statesize = sizeof(struct sha1_state),
214 .base = {
215 .cra_name = "sha1",
216 .cra_driver_name= "octeon-sha1",
217 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
218 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
219 .cra_blocksize = SHA1_BLOCK_SIZE,
220 .cra_module = THIS_MODULE,
221 }
222};
223
224static int __init octeon_sha1_mod_init(void)
225{
226 if (!octeon_has_crypto())
227 return -ENOTSUPP;
228 return crypto_register_shash(&octeon_sha1_alg);
229}
230
231static void __exit octeon_sha1_mod_fini(void)
232{
233 crypto_unregister_shash(&octeon_sha1_alg);
234}
235
236module_init(octeon_sha1_mod_init);
237module_exit(octeon_sha1_mod_fini);
238
239MODULE_LICENSE("GPL");
240MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)");
241MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
new file mode 100644
index 000000000000..97e96fead08a
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -0,0 +1,280 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA-224 and SHA-256 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha256_generic.c, which is:
9 *
10 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
13 * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 */
20
21#include <linux/mm.h>
22#include <crypto/sha.h>
23#include <linux/init.h>
24#include <linux/types.h>
25#include <linux/module.h>
26#include <asm/byteorder.h>
27#include <asm/octeon/octeon.h>
28#include <crypto/internal/hash.h>
29
30#include "octeon-crypto.h"
31
32/*
33 * We pass everything as 64-bit. OCTEON can handle misaligned data.
34 */
35
36static void octeon_sha256_store_hash(struct sha256_state *sctx)
37{
38 u64 *hash = (u64 *)sctx->state;
39
40 write_octeon_64bit_hash_dword(hash[0], 0);
41 write_octeon_64bit_hash_dword(hash[1], 1);
42 write_octeon_64bit_hash_dword(hash[2], 2);
43 write_octeon_64bit_hash_dword(hash[3], 3);
44}
45
46static void octeon_sha256_read_hash(struct sha256_state *sctx)
47{
48 u64 *hash = (u64 *)sctx->state;
49
50 hash[0] = read_octeon_64bit_hash_dword(0);
51 hash[1] = read_octeon_64bit_hash_dword(1);
52 hash[2] = read_octeon_64bit_hash_dword(2);
53 hash[3] = read_octeon_64bit_hash_dword(3);
54}
55
56static void octeon_sha256_transform(const void *_block)
57{
58 const u64 *block = _block;
59
60 write_octeon_64bit_block_dword(block[0], 0);
61 write_octeon_64bit_block_dword(block[1], 1);
62 write_octeon_64bit_block_dword(block[2], 2);
63 write_octeon_64bit_block_dword(block[3], 3);
64 write_octeon_64bit_block_dword(block[4], 4);
65 write_octeon_64bit_block_dword(block[5], 5);
66 write_octeon_64bit_block_dword(block[6], 6);
67 octeon_sha256_start(block[7]);
68}
69
70static int octeon_sha224_init(struct shash_desc *desc)
71{
72 struct sha256_state *sctx = shash_desc_ctx(desc);
73
74 sctx->state[0] = SHA224_H0;
75 sctx->state[1] = SHA224_H1;
76 sctx->state[2] = SHA224_H2;
77 sctx->state[3] = SHA224_H3;
78 sctx->state[4] = SHA224_H4;
79 sctx->state[5] = SHA224_H5;
80 sctx->state[6] = SHA224_H6;
81 sctx->state[7] = SHA224_H7;
82 sctx->count = 0;
83
84 return 0;
85}
86
87static int octeon_sha256_init(struct shash_desc *desc)
88{
89 struct sha256_state *sctx = shash_desc_ctx(desc);
90
91 sctx->state[0] = SHA256_H0;
92 sctx->state[1] = SHA256_H1;
93 sctx->state[2] = SHA256_H2;
94 sctx->state[3] = SHA256_H3;
95 sctx->state[4] = SHA256_H4;
96 sctx->state[5] = SHA256_H5;
97 sctx->state[6] = SHA256_H6;
98 sctx->state[7] = SHA256_H7;
99 sctx->count = 0;
100
101 return 0;
102}
103
104static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
105 unsigned int len)
106{
107 unsigned int partial;
108 unsigned int done;
109 const u8 *src;
110
111 partial = sctx->count % SHA256_BLOCK_SIZE;
112 sctx->count += len;
113 done = 0;
114 src = data;
115
116 if ((partial + len) >= SHA256_BLOCK_SIZE) {
117 if (partial) {
118 done = -partial;
119 memcpy(sctx->buf + partial, data,
120 done + SHA256_BLOCK_SIZE);
121 src = sctx->buf;
122 }
123
124 do {
125 octeon_sha256_transform(src);
126 done += SHA256_BLOCK_SIZE;
127 src = data + done;
128 } while (done + SHA256_BLOCK_SIZE <= len);
129
130 partial = 0;
131 }
132 memcpy(sctx->buf + partial, src, len - done);
133}
134
135static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
136 unsigned int len)
137{
138 struct sha256_state *sctx = shash_desc_ctx(desc);
139 struct octeon_cop2_state state;
140 unsigned long flags;
141
142 /*
143 * Small updates never reach the crypto engine, so the generic sha256 is
144 * faster because of the heavyweight octeon_crypto_enable() /
145 * octeon_crypto_disable().
146 */
147 if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
148 return crypto_sha256_update(desc, data, len);
149
150 flags = octeon_crypto_enable(&state);
151 octeon_sha256_store_hash(sctx);
152
153 __octeon_sha256_update(sctx, data, len);
154
155 octeon_sha256_read_hash(sctx);
156 octeon_crypto_disable(&state, flags);
157
158 return 0;
159}
160
161static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
162{
163 struct sha256_state *sctx = shash_desc_ctx(desc);
164 static const u8 padding[64] = { 0x80, };
165 struct octeon_cop2_state state;
166 __be32 *dst = (__be32 *)out;
167 unsigned int pad_len;
168 unsigned long flags;
169 unsigned int index;
170 __be64 bits;
171 int i;
172
173 /* Save number of bits. */
174 bits = cpu_to_be64(sctx->count << 3);
175
176 /* Pad out to 56 mod 64. */
177 index = sctx->count & 0x3f;
178 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
179
180 flags = octeon_crypto_enable(&state);
181 octeon_sha256_store_hash(sctx);
182
183 __octeon_sha256_update(sctx, padding, pad_len);
184
185 /* Append length (before padding). */
186 __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
187
188 octeon_sha256_read_hash(sctx);
189 octeon_crypto_disable(&state, flags);
190
191 /* Store state in digest */
192 for (i = 0; i < 8; i++)
193 dst[i] = cpu_to_be32(sctx->state[i]);
194
195 /* Zeroize sensitive information. */
196 memset(sctx, 0, sizeof(*sctx));
197
198 return 0;
199}
200
201static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
202{
203 u8 D[SHA256_DIGEST_SIZE];
204
205 octeon_sha256_final(desc, D);
206
207 memcpy(hash, D, SHA224_DIGEST_SIZE);
208 memzero_explicit(D, SHA256_DIGEST_SIZE);
209
210 return 0;
211}
212
213static int octeon_sha256_export(struct shash_desc *desc, void *out)
214{
215 struct sha256_state *sctx = shash_desc_ctx(desc);
216
217 memcpy(out, sctx, sizeof(*sctx));
218 return 0;
219}
220
221static int octeon_sha256_import(struct shash_desc *desc, const void *in)
222{
223 struct sha256_state *sctx = shash_desc_ctx(desc);
224
225 memcpy(sctx, in, sizeof(*sctx));
226 return 0;
227}
228
229static struct shash_alg octeon_sha256_algs[2] = { {
230 .digestsize = SHA256_DIGEST_SIZE,
231 .init = octeon_sha256_init,
232 .update = octeon_sha256_update,
233 .final = octeon_sha256_final,
234 .export = octeon_sha256_export,
235 .import = octeon_sha256_import,
236 .descsize = sizeof(struct sha256_state),
237 .statesize = sizeof(struct sha256_state),
238 .base = {
239 .cra_name = "sha256",
240 .cra_driver_name= "octeon-sha256",
241 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
242 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
243 .cra_blocksize = SHA256_BLOCK_SIZE,
244 .cra_module = THIS_MODULE,
245 }
246}, {
247 .digestsize = SHA224_DIGEST_SIZE,
248 .init = octeon_sha224_init,
249 .update = octeon_sha256_update,
250 .final = octeon_sha224_final,
251 .descsize = sizeof(struct sha256_state),
252 .base = {
253 .cra_name = "sha224",
254 .cra_driver_name= "octeon-sha224",
255 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
256 .cra_blocksize = SHA224_BLOCK_SIZE,
257 .cra_module = THIS_MODULE,
258 }
259} };
260
261static int __init octeon_sha256_mod_init(void)
262{
263 if (!octeon_has_crypto())
264 return -ENOTSUPP;
265 return crypto_register_shashes(octeon_sha256_algs,
266 ARRAY_SIZE(octeon_sha256_algs));
267}
268
269static void __exit octeon_sha256_mod_fini(void)
270{
271 crypto_unregister_shashes(octeon_sha256_algs,
272 ARRAY_SIZE(octeon_sha256_algs));
273}
274
275module_init(octeon_sha256_mod_init);
276module_exit(octeon_sha256_mod_fini);
277
278MODULE_LICENSE("GPL");
279MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
280MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
new file mode 100644
index 000000000000..d5fb3c6f22ae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -0,0 +1,277 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA-512 and SHA-384 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha512_generic.c, which is:
9 *
10 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation; either version 2, or (at your option) any
17 * later version.
18 */
19
20#include <linux/mm.h>
21#include <crypto/sha.h>
22#include <linux/init.h>
23#include <linux/types.h>
24#include <linux/module.h>
25#include <asm/byteorder.h>
26#include <asm/octeon/octeon.h>
27#include <crypto/internal/hash.h>
28
29#include "octeon-crypto.h"
30
31/*
32 * We pass everything as 64-bit. OCTEON can handle misaligned data.
33 */
34
35static void octeon_sha512_store_hash(struct sha512_state *sctx)
36{
37 write_octeon_64bit_hash_sha512(sctx->state[0], 0);
38 write_octeon_64bit_hash_sha512(sctx->state[1], 1);
39 write_octeon_64bit_hash_sha512(sctx->state[2], 2);
40 write_octeon_64bit_hash_sha512(sctx->state[3], 3);
41 write_octeon_64bit_hash_sha512(sctx->state[4], 4);
42 write_octeon_64bit_hash_sha512(sctx->state[5], 5);
43 write_octeon_64bit_hash_sha512(sctx->state[6], 6);
44 write_octeon_64bit_hash_sha512(sctx->state[7], 7);
45}
46
47static void octeon_sha512_read_hash(struct sha512_state *sctx)
48{
49 sctx->state[0] = read_octeon_64bit_hash_sha512(0);
50 sctx->state[1] = read_octeon_64bit_hash_sha512(1);
51 sctx->state[2] = read_octeon_64bit_hash_sha512(2);
52 sctx->state[3] = read_octeon_64bit_hash_sha512(3);
53 sctx->state[4] = read_octeon_64bit_hash_sha512(4);
54 sctx->state[5] = read_octeon_64bit_hash_sha512(5);
55 sctx->state[6] = read_octeon_64bit_hash_sha512(6);
56 sctx->state[7] = read_octeon_64bit_hash_sha512(7);
57}
58
59static void octeon_sha512_transform(const void *_block)
60{
61 const u64 *block = _block;
62
63 write_octeon_64bit_block_sha512(block[0], 0);
64 write_octeon_64bit_block_sha512(block[1], 1);
65 write_octeon_64bit_block_sha512(block[2], 2);
66 write_octeon_64bit_block_sha512(block[3], 3);
67 write_octeon_64bit_block_sha512(block[4], 4);
68 write_octeon_64bit_block_sha512(block[5], 5);
69 write_octeon_64bit_block_sha512(block[6], 6);
70 write_octeon_64bit_block_sha512(block[7], 7);
71 write_octeon_64bit_block_sha512(block[8], 8);
72 write_octeon_64bit_block_sha512(block[9], 9);
73 write_octeon_64bit_block_sha512(block[10], 10);
74 write_octeon_64bit_block_sha512(block[11], 11);
75 write_octeon_64bit_block_sha512(block[12], 12);
76 write_octeon_64bit_block_sha512(block[13], 13);
77 write_octeon_64bit_block_sha512(block[14], 14);
78 octeon_sha512_start(block[15]);
79}
80
81static int octeon_sha512_init(struct shash_desc *desc)
82{
83 struct sha512_state *sctx = shash_desc_ctx(desc);
84
85 sctx->state[0] = SHA512_H0;
86 sctx->state[1] = SHA512_H1;
87 sctx->state[2] = SHA512_H2;
88 sctx->state[3] = SHA512_H3;
89 sctx->state[4] = SHA512_H4;
90 sctx->state[5] = SHA512_H5;
91 sctx->state[6] = SHA512_H6;
92 sctx->state[7] = SHA512_H7;
93 sctx->count[0] = sctx->count[1] = 0;
94
95 return 0;
96}
97
98static int octeon_sha384_init(struct shash_desc *desc)
99{
100 struct sha512_state *sctx = shash_desc_ctx(desc);
101
102 sctx->state[0] = SHA384_H0;
103 sctx->state[1] = SHA384_H1;
104 sctx->state[2] = SHA384_H2;
105 sctx->state[3] = SHA384_H3;
106 sctx->state[4] = SHA384_H4;
107 sctx->state[5] = SHA384_H5;
108 sctx->state[6] = SHA384_H6;
109 sctx->state[7] = SHA384_H7;
110 sctx->count[0] = sctx->count[1] = 0;
111
112 return 0;
113}
114
115static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
116 unsigned int len)
117{
118 unsigned int part_len;
119 unsigned int index;
120 unsigned int i;
121
122 /* Compute number of bytes mod 128. */
123 index = sctx->count[0] % SHA512_BLOCK_SIZE;
124
125 /* Update number of bytes. */
126 if ((sctx->count[0] += len) < len)
127 sctx->count[1]++;
128
129 part_len = SHA512_BLOCK_SIZE - index;
130
131 /* Transform as many times as possible. */
132 if (len >= part_len) {
133 memcpy(&sctx->buf[index], data, part_len);
134 octeon_sha512_transform(sctx->buf);
135
136 for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
137 i += SHA512_BLOCK_SIZE)
138 octeon_sha512_transform(&data[i]);
139
140 index = 0;
141 } else {
142 i = 0;
143 }
144
145 /* Buffer remaining input. */
146 memcpy(&sctx->buf[index], &data[i], len - i);
147}
148
149static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
150 unsigned int len)
151{
152 struct sha512_state *sctx = shash_desc_ctx(desc);
153 struct octeon_cop2_state state;
154 unsigned long flags;
155
156 /*
157 * Small updates never reach the crypto engine, so the generic sha512 is
158 * faster because of the heavyweight octeon_crypto_enable() /
159 * octeon_crypto_disable().
160 */
161 if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
162 return crypto_sha512_update(desc, data, len);
163
164 flags = octeon_crypto_enable(&state);
165 octeon_sha512_store_hash(sctx);
166
167 __octeon_sha512_update(sctx, data, len);
168
169 octeon_sha512_read_hash(sctx);
170 octeon_crypto_disable(&state, flags);
171
172 return 0;
173}
174
175static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
176{
177 struct sha512_state *sctx = shash_desc_ctx(desc);
178 static u8 padding[128] = { 0x80, };
179 struct octeon_cop2_state state;
180 __be64 *dst = (__be64 *)hash;
181 unsigned int pad_len;
182 unsigned long flags;
183 unsigned int index;
184 __be64 bits[2];
185 int i;
186
187 /* Save number of bits. */
188 bits[1] = cpu_to_be64(sctx->count[0] << 3);
189 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
190
191 /* Pad out to 112 mod 128. */
192 index = sctx->count[0] & 0x7f;
193 pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
194
195 flags = octeon_crypto_enable(&state);
196 octeon_sha512_store_hash(sctx);
197
198 __octeon_sha512_update(sctx, padding, pad_len);
199
200 /* Append length (before padding). */
201 __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
202
203 octeon_sha512_read_hash(sctx);
204 octeon_crypto_disable(&state, flags);
205
206 /* Store state in digest. */
207 for (i = 0; i < 8; i++)
208 dst[i] = cpu_to_be64(sctx->state[i]);
209
210 /* Zeroize sensitive information. */
211 memset(sctx, 0, sizeof(struct sha512_state));
212
213 return 0;
214}
215
216static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
217{
218 u8 D[64];
219
220 octeon_sha512_final(desc, D);
221
222 memcpy(hash, D, 48);
223 memzero_explicit(D, 64);
224
225 return 0;
226}
227
228static struct shash_alg octeon_sha512_algs[2] = { {
229 .digestsize = SHA512_DIGEST_SIZE,
230 .init = octeon_sha512_init,
231 .update = octeon_sha512_update,
232 .final = octeon_sha512_final,
233 .descsize = sizeof(struct sha512_state),
234 .base = {
235 .cra_name = "sha512",
236 .cra_driver_name= "octeon-sha512",
237 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
238 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
239 .cra_blocksize = SHA512_BLOCK_SIZE,
240 .cra_module = THIS_MODULE,
241 }
242}, {
243 .digestsize = SHA384_DIGEST_SIZE,
244 .init = octeon_sha384_init,
245 .update = octeon_sha512_update,
246 .final = octeon_sha384_final,
247 .descsize = sizeof(struct sha512_state),
248 .base = {
249 .cra_name = "sha384",
250 .cra_driver_name= "octeon-sha384",
251 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
252 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
253 .cra_blocksize = SHA384_BLOCK_SIZE,
254 .cra_module = THIS_MODULE,
255 }
256} };
257
258static int __init octeon_sha512_mod_init(void)
259{
260 if (!octeon_has_crypto())
261 return -ENOTSUPP;
262 return crypto_register_shashes(octeon_sha512_algs,
263 ARRAY_SIZE(octeon_sha512_algs));
264}
265
266static void __exit octeon_sha512_mod_fini(void)
267{
268 crypto_unregister_shashes(octeon_sha512_algs,
269 ARRAY_SIZE(octeon_sha512_algs));
270}
271
272module_init(octeon_sha512_mod_init);
273module_exit(octeon_sha512_mod_fini);
274
275MODULE_LICENSE("GPL");
276MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)");
277MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 4794067cb5a7..5035f09c5427 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -1259,20 +1259,6 @@
1259#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) 1259#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18)
1260 1260
1261/************************************************************************* 1261/*************************************************************************
1262 * _REG relative to RSET_RNG
1263 *************************************************************************/
1264
1265#define RNG_CTRL 0x00
1266#define RNG_EN (1 << 0)
1267
1268#define RNG_STAT 0x04
1269#define RNG_AVAIL_MASK (0xff000000)
1270
1271#define RNG_DATA 0x08
1272#define RNG_THRES 0x0c
1273#define RNG_MASK 0x10
1274
1275/*************************************************************************
1276 * _REG relative to RSET_SPI 1262 * _REG relative to RSET_SPI
1277 *************************************************************************/ 1263 *************************************************************************/
1278 1264
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2926fb9c570a..9c221b69c181 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -4,6 +4,14 @@
4# Arch-specific CryptoAPI modules. 4# Arch-specific CryptoAPI modules.
5# 5#
6 6
7obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
8obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
7obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o 9obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
10obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
11obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
8 12
13aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
14md5-ppc-y := md5-asm.o md5-glue.o
9sha1-powerpc-y := sha1-powerpc-asm.o sha1.o 15sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
16sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
17sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 000000000000..5dc6bce90a77
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,351 @@
1/*
2 * Fast AES implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include "aes-spe-regs.h"
20
21#define EAD(in, bpos) \
22 rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
23
24#define DAD(in, bpos) \
25 rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
26
27#define LWH(out, off) \
28 evlwwsplat out,off(rT0); /* load word high */
29
30#define LWL(out, off) \
31 lwz out,off(rT0); /* load word low */
32
33#define LBZ(out, tab, off) \
34 lbz out,off(tab); /* load byte */
35
36#define LAH(out, in, bpos, off) \
37 EAD(in, bpos) /* calc addr + load word high */ \
38 LWH(out, off)
39
40#define LAL(out, in, bpos, off) \
41 EAD(in, bpos) /* calc addr + load word low */ \
42 LWL(out, off)
43
44#define LAE(out, in, bpos) \
45 EAD(in, bpos) /* calc addr + load enc byte */ \
46 LBZ(out, rT0, 8)
47
48#define LBE(out) \
49 LBZ(out, rT0, 8) /* load enc byte */
50
51#define LAD(out, in, bpos) \
52 DAD(in, bpos) /* calc addr + load dec byte */ \
53 LBZ(out, rT1, 0)
54
55#define LBD(out) \
56 LBZ(out, rT1, 0)
57
58/*
59 * ppc_encrypt_block: The central encryption function for a single 16 bytes
60 * block. It does no stack handling or register saving to support fast calls
61 * via bl/blr. It expects that caller has pre-xored input data with first
62 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
63 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
64 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
65 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
66 *
67 */
68_GLOBAL(ppc_encrypt_block)
69 LAH(rW4, rD1, 2, 4)
70 LAH(rW6, rD0, 3, 0)
71 LAH(rW3, rD0, 1, 8)
72ppc_encrypt_block_loop:
73 LAH(rW0, rD3, 0, 12)
74 LAL(rW0, rD0, 0, 12)
75 LAH(rW1, rD1, 0, 12)
76 LAH(rW2, rD2, 1, 8)
77 LAL(rW2, rD3, 1, 8)
78 LAL(rW3, rD1, 1, 8)
79 LAL(rW4, rD2, 2, 4)
80 LAL(rW6, rD1, 3, 0)
81 LAH(rW5, rD3, 2, 4)
82 LAL(rW5, rD0, 2, 4)
83 LAH(rW7, rD2, 3, 0)
84 evldw rD1,16(rKP)
85 EAD(rD3, 3)
86 evxor rW2,rW2,rW4
87 LWL(rW7, 0)
88 evxor rW2,rW2,rW6
89 EAD(rD2, 0)
90 evxor rD1,rD1,rW2
91 LWL(rW1, 12)
92 evxor rD1,rD1,rW0
93 evldw rD3,24(rKP)
94 evmergehi rD0,rD0,rD1
95 EAD(rD1, 2)
96 evxor rW3,rW3,rW5
97 LWH(rW4, 4)
98 evxor rW3,rW3,rW7
99 EAD(rD0, 3)
100 evxor rD3,rD3,rW3
101 LWH(rW6, 0)
102 evxor rD3,rD3,rW1
103 EAD(rD0, 1)
104 evmergehi rD2,rD2,rD3
105 LWH(rW3, 8)
106 LAH(rW0, rD3, 0, 12)
107 LAL(rW0, rD0, 0, 12)
108 LAH(rW1, rD1, 0, 12)
109 LAH(rW2, rD2, 1, 8)
110 LAL(rW2, rD3, 1, 8)
111 LAL(rW3, rD1, 1, 8)
112 LAL(rW4, rD2, 2, 4)
113 LAL(rW6, rD1, 3, 0)
114 LAH(rW5, rD3, 2, 4)
115 LAL(rW5, rD0, 2, 4)
116 LAH(rW7, rD2, 3, 0)
117 evldw rD1,32(rKP)
118 EAD(rD3, 3)
119 evxor rW2,rW2,rW4
120 LWL(rW7, 0)
121 evxor rW2,rW2,rW6
122 EAD(rD2, 0)
123 evxor rD1,rD1,rW2
124 LWL(rW1, 12)
125 evxor rD1,rD1,rW0
126 evldw rD3,40(rKP)
127 evmergehi rD0,rD0,rD1
128 EAD(rD1, 2)
129 evxor rW3,rW3,rW5
130 LWH(rW4, 4)
131 evxor rW3,rW3,rW7
132 EAD(rD0, 3)
133 evxor rD3,rD3,rW3
134 LWH(rW6, 0)
135 evxor rD3,rD3,rW1
136 EAD(rD0, 1)
137 evmergehi rD2,rD2,rD3
138 LWH(rW3, 8)
139 addi rKP,rKP,32
140 bdnz ppc_encrypt_block_loop
141 LAH(rW0, rD3, 0, 12)
142 LAL(rW0, rD0, 0, 12)
143 LAH(rW1, rD1, 0, 12)
144 LAH(rW2, rD2, 1, 8)
145 LAL(rW2, rD3, 1, 8)
146 LAL(rW3, rD1, 1, 8)
147 LAL(rW4, rD2, 2, 4)
148 LAH(rW5, rD3, 2, 4)
149 LAL(rW6, rD1, 3, 0)
150 LAL(rW5, rD0, 2, 4)
151 LAH(rW7, rD2, 3, 0)
152 evldw rD1,16(rKP)
153 EAD(rD3, 3)
154 evxor rW2,rW2,rW4
155 LWL(rW7, 0)
156 evxor rW2,rW2,rW6
157 EAD(rD2, 0)
158 evxor rD1,rD1,rW2
159 LWL(rW1, 12)
160 evxor rD1,rD1,rW0
161 evldw rD3,24(rKP)
162 evmergehi rD0,rD0,rD1
163 EAD(rD1, 0)
164 evxor rW3,rW3,rW5
165 LBE(rW2)
166 evxor rW3,rW3,rW7
167 EAD(rD0, 1)
168 evxor rD3,rD3,rW3
169 LBE(rW6)
170 evxor rD3,rD3,rW1
171 EAD(rD0, 0)
172 evmergehi rD2,rD2,rD3
173 LBE(rW1)
174 LAE(rW0, rD3, 0)
175 LAE(rW1, rD0, 0)
176 LAE(rW4, rD2, 1)
177 LAE(rW5, rD3, 1)
178 LAE(rW3, rD2, 0)
179 LAE(rW7, rD1, 1)
180 rlwimi rW0,rW4,8,16,23
181 rlwimi rW1,rW5,8,16,23
182 LAE(rW4, rD1, 2)
183 LAE(rW5, rD2, 2)
184 rlwimi rW2,rW6,8,16,23
185 rlwimi rW3,rW7,8,16,23
186 LAE(rW6, rD3, 2)
187 LAE(rW7, rD0, 2)
188 rlwimi rW0,rW4,16,8,15
189 rlwimi rW1,rW5,16,8,15
190 LAE(rW4, rD0, 3)
191 LAE(rW5, rD1, 3)
192 rlwimi rW2,rW6,16,8,15
193 lwz rD0,32(rKP)
194 rlwimi rW3,rW7,16,8,15
195 lwz rD1,36(rKP)
196 LAE(rW6, rD2, 3)
197 LAE(rW7, rD3, 3)
198 rlwimi rW0,rW4,24,0,7
199 lwz rD2,40(rKP)
200 rlwimi rW1,rW5,24,0,7
201 lwz rD3,44(rKP)
202 rlwimi rW2,rW6,24,0,7
203 rlwimi rW3,rW7,24,0,7
204 blr
205
206/*
207 * ppc_decrypt_block: The central decryption function for a single 16 bytes
208 * block. It does no stack handling or register saving to support fast calls
209 * via bl/blr. It expects that caller has pre-xored input data with first
210 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
211 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
212 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
213 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
214 *
215 */
216_GLOBAL(ppc_decrypt_block)
217 LAH(rW0, rD1, 0, 12)
218 LAH(rW6, rD0, 3, 0)
219 LAH(rW3, rD0, 1, 8)
220ppc_decrypt_block_loop:
221 LAH(rW1, rD3, 0, 12)
222 LAL(rW0, rD2, 0, 12)
223 LAH(rW2, rD2, 1, 8)
224 LAL(rW2, rD3, 1, 8)
225 LAH(rW4, rD3, 2, 4)
226 LAL(rW4, rD0, 2, 4)
227 LAL(rW6, rD1, 3, 0)
228 LAH(rW5, rD1, 2, 4)
229 LAH(rW7, rD2, 3, 0)
230 LAL(rW7, rD3, 3, 0)
231 LAL(rW3, rD1, 1, 8)
232 evldw rD1,16(rKP)
233 EAD(rD0, 0)
234 evxor rW4,rW4,rW6
235 LWL(rW1, 12)
236 evxor rW0,rW0,rW4
237 EAD(rD2, 2)
238 evxor rW0,rW0,rW2
239 LWL(rW5, 4)
240 evxor rD1,rD1,rW0
241 evldw rD3,24(rKP)
242 evmergehi rD0,rD0,rD1
243 EAD(rD1, 0)
244 evxor rW3,rW3,rW7
245 LWH(rW0, 12)
246 evxor rW3,rW3,rW1
247 EAD(rD0, 3)
248 evxor rD3,rD3,rW3
249 LWH(rW6, 0)
250 evxor rD3,rD3,rW5
251 EAD(rD0, 1)
252 evmergehi rD2,rD2,rD3
253 LWH(rW3, 8)
254 LAH(rW1, rD3, 0, 12)
255 LAL(rW0, rD2, 0, 12)
256 LAH(rW2, rD2, 1, 8)
257 LAL(rW2, rD3, 1, 8)
258 LAH(rW4, rD3, 2, 4)
259 LAL(rW4, rD0, 2, 4)
260 LAL(rW6, rD1, 3, 0)
261 LAH(rW5, rD1, 2, 4)
262 LAH(rW7, rD2, 3, 0)
263 LAL(rW7, rD3, 3, 0)
264 LAL(rW3, rD1, 1, 8)
265 evldw rD1,32(rKP)
266 EAD(rD0, 0)
267 evxor rW4,rW4,rW6
268 LWL(rW1, 12)
269 evxor rW0,rW0,rW4
270 EAD(rD2, 2)
271 evxor rW0,rW0,rW2
272 LWL(rW5, 4)
273 evxor rD1,rD1,rW0
274 evldw rD3,40(rKP)
275 evmergehi rD0,rD0,rD1
276 EAD(rD1, 0)
277 evxor rW3,rW3,rW7
278 LWH(rW0, 12)
279 evxor rW3,rW3,rW1
280 EAD(rD0, 3)
281 evxor rD3,rD3,rW3
282 LWH(rW6, 0)
283 evxor rD3,rD3,rW5
284 EAD(rD0, 1)
285 evmergehi rD2,rD2,rD3
286 LWH(rW3, 8)
287 addi rKP,rKP,32
288 bdnz ppc_decrypt_block_loop
289 LAH(rW1, rD3, 0, 12)
290 LAL(rW0, rD2, 0, 12)
291 LAH(rW2, rD2, 1, 8)
292 LAL(rW2, rD3, 1, 8)
293 LAH(rW4, rD3, 2, 4)
294 LAL(rW4, rD0, 2, 4)
295 LAL(rW6, rD1, 3, 0)
296 LAH(rW5, rD1, 2, 4)
297 LAH(rW7, rD2, 3, 0)
298 LAL(rW7, rD3, 3, 0)
299 LAL(rW3, rD1, 1, 8)
300 evldw rD1,16(rKP)
301 EAD(rD0, 0)
302 evxor rW4,rW4,rW6
303 LWL(rW1, 12)
304 evxor rW0,rW0,rW4
305 EAD(rD2, 2)
306 evxor rW0,rW0,rW2
307 LWL(rW5, 4)
308 evxor rD1,rD1,rW0
309 evldw rD3,24(rKP)
310 evmergehi rD0,rD0,rD1
311 DAD(rD1, 0)
312 evxor rW3,rW3,rW7
313 LBD(rW0)
314 evxor rW3,rW3,rW1
315 DAD(rD0, 1)
316 evxor rD3,rD3,rW3
317 LBD(rW6)
318 evxor rD3,rD3,rW5
319 DAD(rD0, 0)
320 evmergehi rD2,rD2,rD3
321 LBD(rW3)
322 LAD(rW2, rD3, 0)
323 LAD(rW1, rD2, 0)
324 LAD(rW4, rD2, 1)
325 LAD(rW5, rD3, 1)
326 LAD(rW7, rD1, 1)
327 rlwimi rW0,rW4,8,16,23
328 rlwimi rW1,rW5,8,16,23
329 LAD(rW4, rD3, 2)
330 LAD(rW5, rD0, 2)
331 rlwimi rW2,rW6,8,16,23
332 rlwimi rW3,rW7,8,16,23
333 LAD(rW6, rD1, 2)
334 LAD(rW7, rD2, 2)
335 rlwimi rW0,rW4,16,8,15
336 rlwimi rW1,rW5,16,8,15
337 LAD(rW4, rD0, 3)
338 LAD(rW5, rD1, 3)
339 rlwimi rW2,rW6,16,8,15
340 lwz rD0,32(rKP)
341 rlwimi rW3,rW7,16,8,15
342 lwz rD1,36(rKP)
343 LAD(rW6, rD2, 3)
344 LAD(rW7, rD3, 3)
345 rlwimi rW0,rW4,24,0,7
346 lwz rD2,40(rKP)
347 rlwimi rW1,rW5,24,0,7
348 lwz rD3,44(rKP)
349 rlwimi rW2,rW6,24,0,7
350 rlwimi rW3,rW7,24,0,7
351 blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 000000000000..bd5e63f72ad4
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,512 @@
1/*
2 * Glue code for AES implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation. The assembler module takes care
5 * about the SPE registers so it can run from interrupt context.
6 *
7 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */
15
16#include <crypto/aes.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/types.h>
20#include <linux/errno.h>
21#include <linux/crypto.h>
22#include <asm/byteorder.h>
23#include <asm/switch_to.h>
24#include <crypto/algapi.h>
25
26/*
27 * MAX_BYTES defines the number of bytes that are allowed to be processed
28 * between preempt_disable() and preempt_enable(). e500 cores can issue two
29 * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
30 * bit unit (SU2). One of these can be a memory access that is executed via
31 * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
32 * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
33 * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
34 * included. Even with the low end model clocked at 667 MHz this equals to a
35 * critical time window of less than 30us. The value has been choosen to
36 * process a 512 byte disk block in one or a large 1400 bytes IPsec network
37 * packet in two runs.
38 *
39 */
40#define MAX_BYTES 768
41
42struct ppc_aes_ctx {
43 u32 key_enc[AES_MAX_KEYLENGTH_U32];
44 u32 key_dec[AES_MAX_KEYLENGTH_U32];
45 u32 rounds;
46};
47
48struct ppc_xts_ctx {
49 u32 key_enc[AES_MAX_KEYLENGTH_U32];
50 u32 key_dec[AES_MAX_KEYLENGTH_U32];
51 u32 key_twk[AES_MAX_KEYLENGTH_U32];
52 u32 rounds;
53};
54
55extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
56extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
57extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
58 u32 bytes);
59extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
60 u32 bytes);
61extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
62 u32 bytes, u8 *iv);
63extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
64 u32 bytes, u8 *iv);
65extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
66 u32 bytes, u8 *iv);
67extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
68 u32 bytes, u8 *iv, u32 *key_twk);
69extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
70 u32 bytes, u8 *iv, u32 *key_twk);
71
72extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
73extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
74extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
75
76extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
77 unsigned int key_len);
78
79static void spe_begin(void)
80{
81 /* disable preemption and save users SPE registers if required */
82 preempt_disable();
83 enable_kernel_spe();
84}
85
86static void spe_end(void)
87{
88 /* reenable preemption */
89 preempt_enable();
90}
91
92static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
93 unsigned int key_len)
94{
95 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
96
97 if (key_len != AES_KEYSIZE_128 &&
98 key_len != AES_KEYSIZE_192 &&
99 key_len != AES_KEYSIZE_256) {
100 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
101 return -EINVAL;
102 }
103
104 switch (key_len) {
105 case AES_KEYSIZE_128:
106 ctx->rounds = 4;
107 ppc_expand_key_128(ctx->key_enc, in_key);
108 break;
109 case AES_KEYSIZE_192:
110 ctx->rounds = 5;
111 ppc_expand_key_192(ctx->key_enc, in_key);
112 break;
113 case AES_KEYSIZE_256:
114 ctx->rounds = 6;
115 ppc_expand_key_256(ctx->key_enc, in_key);
116 break;
117 }
118
119 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
120
121 return 0;
122}
123
124static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
125 unsigned int key_len)
126{
127 struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
128
129 key_len >>= 1;
130
131 if (key_len != AES_KEYSIZE_128 &&
132 key_len != AES_KEYSIZE_192 &&
133 key_len != AES_KEYSIZE_256) {
134 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
135 return -EINVAL;
136 }
137
138 switch (key_len) {
139 case AES_KEYSIZE_128:
140 ctx->rounds = 4;
141 ppc_expand_key_128(ctx->key_enc, in_key);
142 ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
143 break;
144 case AES_KEYSIZE_192:
145 ctx->rounds = 5;
146 ppc_expand_key_192(ctx->key_enc, in_key);
147 ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
148 break;
149 case AES_KEYSIZE_256:
150 ctx->rounds = 6;
151 ppc_expand_key_256(ctx->key_enc, in_key);
152 ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
153 break;
154 }
155
156 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
157
158 return 0;
159}
160
161static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
162{
163 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
164
165 spe_begin();
166 ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
167 spe_end();
168}
169
170static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
171{
172 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
173
174 spe_begin();
175 ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
176 spe_end();
177}
178
179static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
183 struct blkcipher_walk walk;
184 unsigned int ubytes;
185 int err;
186
187 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
188 blkcipher_walk_init(&walk, dst, src, nbytes);
189 err = blkcipher_walk_virt(desc, &walk);
190
191 while ((nbytes = walk.nbytes)) {
192 ubytes = nbytes > MAX_BYTES ?
193 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
194 nbytes -= ubytes;
195
196 spe_begin();
197 ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
198 ctx->key_enc, ctx->rounds, nbytes);
199 spe_end();
200
201 err = blkcipher_walk_done(desc, &walk, ubytes);
202 }
203
204 return err;
205}
206
207static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
208 struct scatterlist *src, unsigned int nbytes)
209{
210 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
211 struct blkcipher_walk walk;
212 unsigned int ubytes;
213 int err;
214
215 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
216 blkcipher_walk_init(&walk, dst, src, nbytes);
217 err = blkcipher_walk_virt(desc, &walk);
218
219 while ((nbytes = walk.nbytes)) {
220 ubytes = nbytes > MAX_BYTES ?
221 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
222 nbytes -= ubytes;
223
224 spe_begin();
225 ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
226 ctx->key_dec, ctx->rounds, nbytes);
227 spe_end();
228
229 err = blkcipher_walk_done(desc, &walk, ubytes);
230 }
231
232 return err;
233}
234
235static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
236 struct scatterlist *src, unsigned int nbytes)
237{
238 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
239 struct blkcipher_walk walk;
240 unsigned int ubytes;
241 int err;
242
243 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
244 blkcipher_walk_init(&walk, dst, src, nbytes);
245 err = blkcipher_walk_virt(desc, &walk);
246
247 while ((nbytes = walk.nbytes)) {
248 ubytes = nbytes > MAX_BYTES ?
249 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
250 nbytes -= ubytes;
251
252 spe_begin();
253 ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
254 ctx->key_enc, ctx->rounds, nbytes, walk.iv);
255 spe_end();
256
257 err = blkcipher_walk_done(desc, &walk, ubytes);
258 }
259
260 return err;
261}
262
263static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
264 struct scatterlist *src, unsigned int nbytes)
265{
266 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267 struct blkcipher_walk walk;
268 unsigned int ubytes;
269 int err;
270
271 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
272 blkcipher_walk_init(&walk, dst, src, nbytes);
273 err = blkcipher_walk_virt(desc, &walk);
274
275 while ((nbytes = walk.nbytes)) {
276 ubytes = nbytes > MAX_BYTES ?
277 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
278 nbytes -= ubytes;
279
280 spe_begin();
281 ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
282 ctx->key_dec, ctx->rounds, nbytes, walk.iv);
283 spe_end();
284
285 err = blkcipher_walk_done(desc, &walk, ubytes);
286 }
287
288 return err;
289}
290
291static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
295 struct blkcipher_walk walk;
296 unsigned int pbytes, ubytes;
297 int err;
298
299 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
300 blkcipher_walk_init(&walk, dst, src, nbytes);
301 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
302
303 while ((pbytes = walk.nbytes)) {
304 pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
305 pbytes = pbytes == nbytes ?
306 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
307 ubytes = walk.nbytes - pbytes;
308
309 spe_begin();
310 ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
311 ctx->key_enc, ctx->rounds, pbytes , walk.iv);
312 spe_end();
313
314 nbytes -= pbytes;
315 err = blkcipher_walk_done(desc, &walk, ubytes);
316 }
317
318 return err;
319}
320
321static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
322 struct scatterlist *src, unsigned int nbytes)
323{
324 struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
325 struct blkcipher_walk walk;
326 unsigned int ubytes;
327 int err;
328 u32 *twk;
329
330 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
331 blkcipher_walk_init(&walk, dst, src, nbytes);
332 err = blkcipher_walk_virt(desc, &walk);
333 twk = ctx->key_twk;
334
335 while ((nbytes = walk.nbytes)) {
336 ubytes = nbytes > MAX_BYTES ?
337 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
338 nbytes -= ubytes;
339
340 spe_begin();
341 ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
342 ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
343 spe_end();
344
345 twk = NULL;
346 err = blkcipher_walk_done(desc, &walk, ubytes);
347 }
348
349 return err;
350}
351
352static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
353 struct scatterlist *src, unsigned int nbytes)
354{
355 struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
356 struct blkcipher_walk walk;
357 unsigned int ubytes;
358 int err;
359 u32 *twk;
360
361 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
362 blkcipher_walk_init(&walk, dst, src, nbytes);
363 err = blkcipher_walk_virt(desc, &walk);
364 twk = ctx->key_twk;
365
366 while ((nbytes = walk.nbytes)) {
367 ubytes = nbytes > MAX_BYTES ?
368 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
369 nbytes -= ubytes;
370
371 spe_begin();
372 ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
373 ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
374 spe_end();
375
376 twk = NULL;
377 err = blkcipher_walk_done(desc, &walk, ubytes);
378 }
379
380 return err;
381}
382
383/*
384 * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
385 * because the e500 platform can handle unaligned reads/writes very efficently.
386 * This improves IPsec thoughput by another few percent. Additionally we assume
387 * that AES context is always aligned to at least 8 bytes because it is created
388 * with kmalloc() in the crypto infrastructure
389 *
390 */
391static struct crypto_alg aes_algs[] = { {
392 .cra_name = "aes",
393 .cra_driver_name = "aes-ppc-spe",
394 .cra_priority = 300,
395 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
396 .cra_blocksize = AES_BLOCK_SIZE,
397 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
398 .cra_alignmask = 0,
399 .cra_module = THIS_MODULE,
400 .cra_u = {
401 .cipher = {
402 .cia_min_keysize = AES_MIN_KEY_SIZE,
403 .cia_max_keysize = AES_MAX_KEY_SIZE,
404 .cia_setkey = ppc_aes_setkey,
405 .cia_encrypt = ppc_aes_encrypt,
406 .cia_decrypt = ppc_aes_decrypt
407 }
408 }
409}, {
410 .cra_name = "ecb(aes)",
411 .cra_driver_name = "ecb-ppc-spe",
412 .cra_priority = 300,
413 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
414 .cra_blocksize = AES_BLOCK_SIZE,
415 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
416 .cra_alignmask = 0,
417 .cra_type = &crypto_blkcipher_type,
418 .cra_module = THIS_MODULE,
419 .cra_u = {
420 .blkcipher = {
421 .min_keysize = AES_MIN_KEY_SIZE,
422 .max_keysize = AES_MAX_KEY_SIZE,
423 .ivsize = AES_BLOCK_SIZE,
424 .setkey = ppc_aes_setkey,
425 .encrypt = ppc_ecb_encrypt,
426 .decrypt = ppc_ecb_decrypt,
427 }
428 }
429}, {
430 .cra_name = "cbc(aes)",
431 .cra_driver_name = "cbc-ppc-spe",
432 .cra_priority = 300,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = AES_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = AES_MIN_KEY_SIZE,
442 .max_keysize = AES_MAX_KEY_SIZE,
443 .ivsize = AES_BLOCK_SIZE,
444 .setkey = ppc_aes_setkey,
445 .encrypt = ppc_cbc_encrypt,
446 .decrypt = ppc_cbc_decrypt,
447 }
448 }
449}, {
450 .cra_name = "ctr(aes)",
451 .cra_driver_name = "ctr-ppc-spe",
452 .cra_priority = 300,
453 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
454 .cra_blocksize = 1,
455 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
456 .cra_alignmask = 0,
457 .cra_type = &crypto_blkcipher_type,
458 .cra_module = THIS_MODULE,
459 .cra_u = {
460 .blkcipher = {
461 .min_keysize = AES_MIN_KEY_SIZE,
462 .max_keysize = AES_MAX_KEY_SIZE,
463 .ivsize = AES_BLOCK_SIZE,
464 .setkey = ppc_aes_setkey,
465 .encrypt = ppc_ctr_crypt,
466 .decrypt = ppc_ctr_crypt,
467 }
468 }
469}, {
470 .cra_name = "xts(aes)",
471 .cra_driver_name = "xts-ppc-spe",
472 .cra_priority = 300,
473 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
474 .cra_blocksize = AES_BLOCK_SIZE,
475 .cra_ctxsize = sizeof(struct ppc_xts_ctx),
476 .cra_alignmask = 0,
477 .cra_type = &crypto_blkcipher_type,
478 .cra_module = THIS_MODULE,
479 .cra_u = {
480 .blkcipher = {
481 .min_keysize = AES_MIN_KEY_SIZE * 2,
482 .max_keysize = AES_MAX_KEY_SIZE * 2,
483 .ivsize = AES_BLOCK_SIZE,
484 .setkey = ppc_xts_setkey,
485 .encrypt = ppc_xts_encrypt,
486 .decrypt = ppc_xts_decrypt,
487 }
488 }
489} };
490
491static int __init ppc_aes_mod_init(void)
492{
493 return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
494}
495
496static void __exit ppc_aes_mod_fini(void)
497{
498 crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
499}
500
501module_init(ppc_aes_mod_init);
502module_exit(ppc_aes_mod_fini);
503
504MODULE_LICENSE("GPL");
505MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
506
507MODULE_ALIAS_CRYPTO("aes");
508MODULE_ALIAS_CRYPTO("ecb(aes)");
509MODULE_ALIAS_CRYPTO("cbc(aes)");
510MODULE_ALIAS_CRYPTO("ctr(aes)");
511MODULE_ALIAS_CRYPTO("xts(aes)");
512MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 000000000000..be8090f3d700
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,283 @@
1/*
2 * Key handling functions for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <asm/ppc_asm.h>
14
15#ifdef __BIG_ENDIAN__
16#define LOAD_KEY(d, s, off) \
17 lwz d,off(s);
18#else
19#define LOAD_KEY(d, s, off) \
20 li r0,off; \
21 lwbrx d,s,r0;
22#endif
23
24#define INITIALIZE_KEY \
25 stwu r1,-32(r1); /* create stack frame */ \
26 stw r14,8(r1); /* save registers */ \
27 stw r15,12(r1); \
28 stw r16,16(r1);
29
30#define FINALIZE_KEY \
31 lwz r14,8(r1); /* restore registers */ \
32 lwz r15,12(r1); \
33 lwz r16,16(r1); \
34 xor r5,r5,r5; /* clear sensitive data */ \
35 xor r6,r6,r6; \
36 xor r7,r7,r7; \
37 xor r8,r8,r8; \
38 xor r9,r9,r9; \
39 xor r10,r10,r10; \
40 xor r11,r11,r11; \
41 xor r12,r12,r12; \
42 addi r1,r1,32; /* cleanup stack */
43
44#define LS_BOX(r, t1, t2) \
45 lis t2,PPC_AES_4K_ENCTAB@h; \
46 ori t2,t2,PPC_AES_4K_ENCTAB@l; \
47 rlwimi t2,r,4,20,27; \
48 lbz t1,8(t2); \
49 rlwimi r,t1,0,24,31; \
50 rlwimi t2,r,28,20,27; \
51 lbz t1,8(t2); \
52 rlwimi r,t1,8,16,23; \
53 rlwimi t2,r,20,20,27; \
54 lbz t1,8(t2); \
55 rlwimi r,t1,16,8,15; \
56 rlwimi t2,r,12,20,27; \
57 lbz t1,8(t2); \
58 rlwimi r,t1,24,0,7;
59
60#define GF8_MUL(out, in, t1, t2) \
61 lis t1,0x8080; /* multiplication in GF8 */ \
62 ori t1,t1,0x8080; \
63 and t1,t1,in; \
64 srwi t1,t1,7; \
65 mulli t1,t1,0x1b; \
66 lis t2,0x7f7f; \
67 ori t2,t2,0x7f7f; \
68 and t2,t2,in; \
69 slwi t2,t2,1; \
70 xor out,t1,t2;
71
72/*
73 * ppc_expand_key_128(u32 *key_enc, const u8 *key)
74 *
75 * Expand 128 bit key into 176 bytes encryption key. It consists of
76 * key itself plus 10 rounds with 16 bytes each
77 *
78 */
79_GLOBAL(ppc_expand_key_128)
80 INITIALIZE_KEY
81 LOAD_KEY(r5,r4,0)
82 LOAD_KEY(r6,r4,4)
83 LOAD_KEY(r7,r4,8)
84 LOAD_KEY(r8,r4,12)
85 stw r5,0(r3) /* key[0..3] = input data */
86 stw r6,4(r3)
87 stw r7,8(r3)
88 stw r8,12(r3)
89 li r16,10 /* 10 expansion rounds */
90 lis r0,0x0100 /* RCO(1) */
91ppc_expand_128_loop:
92 addi r3,r3,16
93 mr r14,r8 /* apply LS_BOX to 4th temp */
94 rotlwi r14,r14,8
95 LS_BOX(r14, r15, r4)
96 xor r14,r14,r0
97 xor r5,r5,r14 /* xor next 4 keys */
98 xor r6,r6,r5
99 xor r7,r7,r6
100 xor r8,r8,r7
101 stw r5,0(r3) /* store next 4 keys */
102 stw r6,4(r3)
103 stw r7,8(r3)
104 stw r8,12(r3)
105 GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
106 subi r16,r16,1
107 cmpwi r16,0
108 bt eq,ppc_expand_128_end
109 b ppc_expand_128_loop
110ppc_expand_128_end:
111 FINALIZE_KEY
112 blr
113
114/*
115 * ppc_expand_key_192(u32 *key_enc, const u8 *key)
116 *
117 * Expand 192 bit key into 208 bytes encryption key. It consists of key
118 * itself plus 12 rounds with 16 bytes each
119 *
120 */
121_GLOBAL(ppc_expand_key_192)
122 INITIALIZE_KEY
123 LOAD_KEY(r5,r4,0)
124 LOAD_KEY(r6,r4,4)
125 LOAD_KEY(r7,r4,8)
126 LOAD_KEY(r8,r4,12)
127 LOAD_KEY(r9,r4,16)
128 LOAD_KEY(r10,r4,20)
129 stw r5,0(r3)
130 stw r6,4(r3)
131 stw r7,8(r3)
132 stw r8,12(r3)
133 stw r9,16(r3)
134 stw r10,20(r3)
135 li r16,8 /* 8 expansion rounds */
136 lis r0,0x0100 /* RCO(1) */
137ppc_expand_192_loop:
138 addi r3,r3,24
139 mr r14,r10 /* apply LS_BOX to 6th temp */
140 rotlwi r14,r14,8
141 LS_BOX(r14, r15, r4)
142 xor r14,r14,r0
143 xor r5,r5,r14 /* xor next 6 keys */
144 xor r6,r6,r5
145 xor r7,r7,r6
146 xor r8,r8,r7
147 xor r9,r9,r8
148 xor r10,r10,r9
149 stw r5,0(r3)
150 stw r6,4(r3)
151 stw r7,8(r3)
152 stw r8,12(r3)
153 subi r16,r16,1
154 cmpwi r16,0 /* last round early kick out */
155 bt eq,ppc_expand_192_end
156 stw r9,16(r3)
157 stw r10,20(r3)
158 GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
159 b ppc_expand_192_loop
160ppc_expand_192_end:
161 FINALIZE_KEY
162 blr
163
164/*
165 * ppc_expand_key_256(u32 *key_enc, const u8 *key)
166 *
167 * Expand 256 bit key into 240 bytes encryption key. It consists of key
168 * itself plus 14 rounds with 16 bytes each
169 *
170 */
171_GLOBAL(ppc_expand_key_256)
172 INITIALIZE_KEY
173 LOAD_KEY(r5,r4,0)
174 LOAD_KEY(r6,r4,4)
175 LOAD_KEY(r7,r4,8)
176 LOAD_KEY(r8,r4,12)
177 LOAD_KEY(r9,r4,16)
178 LOAD_KEY(r10,r4,20)
179 LOAD_KEY(r11,r4,24)
180 LOAD_KEY(r12,r4,28)
181 stw r5,0(r3)
182 stw r6,4(r3)
183 stw r7,8(r3)
184 stw r8,12(r3)
185 stw r9,16(r3)
186 stw r10,20(r3)
187 stw r11,24(r3)
188 stw r12,28(r3)
189 li r16,7 /* 7 expansion rounds */
190 lis r0,0x0100 /* RCO(1) */
191ppc_expand_256_loop:
192 addi r3,r3,32
193 mr r14,r12 /* apply LS_BOX to 8th temp */
194 rotlwi r14,r14,8
195 LS_BOX(r14, r15, r4)
196 xor r14,r14,r0
197 xor r5,r5,r14 /* xor 4 keys */
198 xor r6,r6,r5
199 xor r7,r7,r6
200 xor r8,r8,r7
201 mr r14,r8
202 LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
203 xor r9,r9,r14 /* xor 4 keys */
204 xor r10,r10,r9
205 xor r11,r11,r10
206 xor r12,r12,r11
207 stw r5,0(r3)
208 stw r6,4(r3)
209 stw r7,8(r3)
210 stw r8,12(r3)
211 subi r16,r16,1
212 cmpwi r16,0 /* last round early kick out */
213 bt eq,ppc_expand_256_end
214 stw r9,16(r3)
215 stw r10,20(r3)
216 stw r11,24(r3)
217 stw r12,28(r3)
218 GF8_MUL(r0, r0, r4, r14)
219 b ppc_expand_256_loop
220ppc_expand_256_end:
221 FINALIZE_KEY
222 blr
223
224/*
225 * ppc_generate_decrypt_key: derive decryption key from encryption key
226 * number of bytes to handle are calculated from length of key (16/24/32)
227 *
228 */
229_GLOBAL(ppc_generate_decrypt_key)
230 addi r6,r5,24
231 slwi r6,r6,2
232 lwzx r7,r4,r6 /* first/last 4 words are same */
233 stw r7,0(r3)
234 lwz r7,0(r4)
235 stwx r7,r3,r6
236 addi r6,r6,4
237 lwzx r7,r4,r6
238 stw r7,4(r3)
239 lwz r7,4(r4)
240 stwx r7,r3,r6
241 addi r6,r6,4
242 lwzx r7,r4,r6
243 stw r7,8(r3)
244 lwz r7,8(r4)
245 stwx r7,r3,r6
246 addi r6,r6,4
247 lwzx r7,r4,r6
248 stw r7,12(r3)
249 lwz r7,12(r4)
250 stwx r7,r3,r6
251 addi r3,r3,16
252 add r4,r4,r6
253 subi r4,r4,28
254 addi r5,r5,20
255 srwi r5,r5,2
256ppc_generate_decrypt_block:
257 li r6,4
258 mtctr r6
259ppc_generate_decrypt_word:
260 lwz r6,0(r4)
261 GF8_MUL(r7, r6, r0, r7)
262 GF8_MUL(r8, r7, r0, r8)
263 GF8_MUL(r9, r8, r0, r9)
264 xor r10,r9,r6
265 xor r11,r7,r8
266 xor r11,r11,r9
267 xor r12,r7,r10
268 rotrwi r12,r12,24
269 xor r11,r11,r12
270 xor r12,r8,r10
271 rotrwi r12,r12,16
272 xor r11,r11,r12
273 rotrwi r12,r10,8
274 xor r11,r11,r12
275 stw r11,0(r3)
276 addi r3,r3,4
277 addi r4,r4,4
278 bdnz ppc_generate_decrypt_word
279 subi r4,r4,32
280 subi r5,r5,1
281 cmpwi r5,0
282 bt gt,ppc_generate_decrypt_block
283 blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 000000000000..ad48032ca8e0
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,630 @@
1/*
2 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <asm/ppc_asm.h>
14#include "aes-spe-regs.h"
15
16#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
17
18#define LOAD_DATA(reg, off) \
19 lwz reg,off(rSP); /* load with offset */
20#define SAVE_DATA(reg, off) \
21 stw reg,off(rDP); /* save with offset */
22#define NEXT_BLOCK \
23 addi rSP,rSP,16; /* increment pointers per bloc */ \
24 addi rDP,rDP,16;
25#define LOAD_IV(reg, off) \
26 lwz reg,off(rIP); /* IV loading with offset */
27#define SAVE_IV(reg, off) \
28 stw reg,off(rIP); /* IV saving with offset */
29#define START_IV /* nothing to reset */
30#define CBC_DEC 16 /* CBC decrement per block */
31#define CTR_DEC 1 /* CTR decrement one byte */
32
33#else /* Macros for little endian */
34
35#define LOAD_DATA(reg, off) \
36 lwbrx reg,0,rSP; /* load reversed */ \
37 addi rSP,rSP,4; /* and increment pointer */
38#define SAVE_DATA(reg, off) \
39 stwbrx reg,0,rDP; /* save reversed */ \
40 addi rDP,rDP,4; /* and increment pointer */
41#define NEXT_BLOCK /* nothing todo */
42#define LOAD_IV(reg, off) \
43 lwbrx reg,0,rIP; /* load reversed */ \
44 addi rIP,rIP,4; /* and increment pointer */
45#define SAVE_IV(reg, off) \
46 stwbrx reg,0,rIP; /* load reversed */ \
47 addi rIP,rIP,4; /* and increment pointer */
48#define START_IV \
49 subi rIP,rIP,16; /* must reset pointer */
50#define CBC_DEC 32 /* 2 blocks because of incs */
51#define CTR_DEC 17 /* 1 block because of incs */
52
53#endif
54
55#define SAVE_0_REGS
56#define LOAD_0_REGS
57
58#define SAVE_4_REGS \
59 stw rI0,96(r1); /* save 32 bit registers */ \
60 stw rI1,100(r1); \
61 stw rI2,104(r1); \
62 stw rI3,108(r1);
63
64#define LOAD_4_REGS \
65 lwz rI0,96(r1); /* restore 32 bit registers */ \
66 lwz rI1,100(r1); \
67 lwz rI2,104(r1); \
68 lwz rI3,108(r1);
69
70#define SAVE_8_REGS \
71 SAVE_4_REGS \
72 stw rG0,112(r1); /* save 32 bit registers */ \
73 stw rG1,116(r1); \
74 stw rG2,120(r1); \
75 stw rG3,124(r1);
76
77#define LOAD_8_REGS \
78 LOAD_4_REGS \
79 lwz rG0,112(r1); /* restore 32 bit registers */ \
80 lwz rG1,116(r1); \
81 lwz rG2,120(r1); \
82 lwz rG3,124(r1);
83
84#define INITIALIZE_CRYPT(tab,nr32bitregs) \
85 mflr r0; \
86 stwu r1,-160(r1); /* create stack frame */ \
87 lis rT0,tab@h; /* en-/decryption table pointer */ \
88 stw r0,8(r1); /* save link register */ \
89 ori rT0,rT0,tab@l; \
90 evstdw r14,16(r1); \
91 mr rKS,rKP; \
92 evstdw r15,24(r1); /* We must save non volatile */ \
93 evstdw r16,32(r1); /* registers. Take the chance */ \
94 evstdw r17,40(r1); /* and save the SPE part too */ \
95 evstdw r18,48(r1); \
96 evstdw r19,56(r1); \
97 evstdw r20,64(r1); \
98 evstdw r21,72(r1); \
99 evstdw r22,80(r1); \
100 evstdw r23,88(r1); \
101 SAVE_##nr32bitregs##_REGS
102
103#define FINALIZE_CRYPT(nr32bitregs) \
104 lwz r0,8(r1); \
105 evldw r14,16(r1); /* restore SPE registers */ \
106 evldw r15,24(r1); \
107 evldw r16,32(r1); \
108 evldw r17,40(r1); \
109 evldw r18,48(r1); \
110 evldw r19,56(r1); \
111 evldw r20,64(r1); \
112 evldw r21,72(r1); \
113 evldw r22,80(r1); \
114 evldw r23,88(r1); \
115 LOAD_##nr32bitregs##_REGS \
116 mtlr r0; /* restore link register */ \
117 xor r0,r0,r0; \
118 stw r0,16(r1); /* delete sensitive data */ \
119 stw r0,24(r1); /* that we might have pushed */ \
120 stw r0,32(r1); /* from other context that runs */ \
121 stw r0,40(r1); /* the same code */ \
122 stw r0,48(r1); \
123 stw r0,56(r1); \
124 stw r0,64(r1); \
125 stw r0,72(r1); \
126 stw r0,80(r1); \
127 stw r0,88(r1); \
128 addi r1,r1,160; /* cleanup stack frame */
129
130#define ENDIAN_SWAP(t0, t1, s0, s1) \
131 rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
132 rotrwi t1,s1,8; \
133 rlwimi t0,s0,8,8,15; \
134 rlwimi t1,s1,8,8,15; \
135 rlwimi t0,s0,8,24,31; \
136 rlwimi t1,s1,8,24,31;
137
138#define GF128_MUL(d0, d1, d2, d3, t0) \
139 li t0,0x87; /* multiplication in GF128 */ \
140 cmpwi d3,-1; \
141 iselgt t0,0,t0; \
142 rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
143 rotlwi d3,d3,1; \
144 rlwimi d2,d1,0,0,0; \
145 rotlwi d2,d2,1; \
146 rlwimi d1,d0,0,0,0; \
147 slwi d0,d0,1; /* shift left 128 bit */ \
148 rotlwi d1,d1,1; \
149 xor d0,d0,t0;
150
151#define START_KEY(d0, d1, d2, d3) \
152 lwz rW0,0(rKP); \
153 mtctr rRR; \
154 lwz rW1,4(rKP); \
155 lwz rW2,8(rKP); \
156 lwz rW3,12(rKP); \
157 xor rD0,d0,rW0; \
158 xor rD1,d1,rW1; \
159 xor rD2,d2,rW2; \
160 xor rD3,d3,rW3;
161
162/*
163 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
164 * u32 rounds)
165 *
166 * called from glue layer to encrypt a single 16 byte block
167 * round values are AES128 = 4, AES192 = 5, AES256 = 6
168 *
169 */
170_GLOBAL(ppc_encrypt_aes)
171 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
172 LOAD_DATA(rD0, 0)
173 LOAD_DATA(rD1, 4)
174 LOAD_DATA(rD2, 8)
175 LOAD_DATA(rD3, 12)
176 START_KEY(rD0, rD1, rD2, rD3)
177 bl ppc_encrypt_block
178 xor rD0,rD0,rW0
179 SAVE_DATA(rD0, 0)
180 xor rD1,rD1,rW1
181 SAVE_DATA(rD1, 4)
182 xor rD2,rD2,rW2
183 SAVE_DATA(rD2, 8)
184 xor rD3,rD3,rW3
185 SAVE_DATA(rD3, 12)
186 FINALIZE_CRYPT(0)
187 blr
188
189/*
190 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
191 * u32 rounds)
192 *
193 * called from glue layer to decrypt a single 16 byte block
194 * round values are AES128 = 4, AES192 = 5, AES256 = 6
195 *
196 */
197_GLOBAL(ppc_decrypt_aes)
198 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
199 LOAD_DATA(rD0, 0)
200 addi rT1,rT0,4096
201 LOAD_DATA(rD1, 4)
202 LOAD_DATA(rD2, 8)
203 LOAD_DATA(rD3, 12)
204 START_KEY(rD0, rD1, rD2, rD3)
205 bl ppc_decrypt_block
206 xor rD0,rD0,rW0
207 SAVE_DATA(rD0, 0)
208 xor rD1,rD1,rW1
209 SAVE_DATA(rD1, 4)
210 xor rD2,rD2,rW2
211 SAVE_DATA(rD2, 8)
212 xor rD3,rD3,rW3
213 SAVE_DATA(rD3, 12)
214 FINALIZE_CRYPT(0)
215 blr
216
217/*
218 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
219 * u32 rounds, u32 bytes);
220 *
221 * called from glue layer to encrypt multiple blocks via ECB
222 * Bytes must be larger or equal 16 and only whole blocks are
223 * processed. round values are AES128 = 4, AES192 = 5 and
224 * AES256 = 6
225 *
226 */
227_GLOBAL(ppc_encrypt_ecb)
228 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
229ppc_encrypt_ecb_loop:
230 LOAD_DATA(rD0, 0)
231 mr rKP,rKS
232 LOAD_DATA(rD1, 4)
233 subi rLN,rLN,16
234 LOAD_DATA(rD2, 8)
235 cmpwi rLN,15
236 LOAD_DATA(rD3, 12)
237 START_KEY(rD0, rD1, rD2, rD3)
238 bl ppc_encrypt_block
239 xor rD0,rD0,rW0
240 SAVE_DATA(rD0, 0)
241 xor rD1,rD1,rW1
242 SAVE_DATA(rD1, 4)
243 xor rD2,rD2,rW2
244 SAVE_DATA(rD2, 8)
245 xor rD3,rD3,rW3
246 SAVE_DATA(rD3, 12)
247 NEXT_BLOCK
248 bt gt,ppc_encrypt_ecb_loop
249 FINALIZE_CRYPT(0)
250 blr
251
252/*
253 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
254 * u32 rounds, u32 bytes);
255 *
256 * called from glue layer to decrypt multiple blocks via ECB
257 * Bytes must be larger or equal 16 and only whole blocks are
258 * processed. round values are AES128 = 4, AES192 = 5 and
259 * AES256 = 6
260 *
261 */
262_GLOBAL(ppc_decrypt_ecb)
263 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
264 addi rT1,rT0,4096
265ppc_decrypt_ecb_loop:
266 LOAD_DATA(rD0, 0)
267 mr rKP,rKS
268 LOAD_DATA(rD1, 4)
269 subi rLN,rLN,16
270 LOAD_DATA(rD2, 8)
271 cmpwi rLN,15
272 LOAD_DATA(rD3, 12)
273 START_KEY(rD0, rD1, rD2, rD3)
274 bl ppc_decrypt_block
275 xor rD0,rD0,rW0
276 SAVE_DATA(rD0, 0)
277 xor rD1,rD1,rW1
278 SAVE_DATA(rD1, 4)
279 xor rD2,rD2,rW2
280 SAVE_DATA(rD2, 8)
281 xor rD3,rD3,rW3
282 SAVE_DATA(rD3, 12)
283 NEXT_BLOCK
284 bt gt,ppc_decrypt_ecb_loop
285 FINALIZE_CRYPT(0)
286 blr
287
288/*
289 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
290 * 32 rounds, u32 bytes, u8 *iv);
291 *
292 * called from glue layer to encrypt multiple blocks via CBC
293 * Bytes must be larger or equal 16 and only whole blocks are
294 * processed. round values are AES128 = 4, AES192 = 5 and
295 * AES256 = 6
296 *
297 */
298_GLOBAL(ppc_encrypt_cbc)
299 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
300 LOAD_IV(rI0, 0)
301 LOAD_IV(rI1, 4)
302 LOAD_IV(rI2, 8)
303 LOAD_IV(rI3, 12)
304ppc_encrypt_cbc_loop:
305 LOAD_DATA(rD0, 0)
306 mr rKP,rKS
307 LOAD_DATA(rD1, 4)
308 subi rLN,rLN,16
309 LOAD_DATA(rD2, 8)
310 cmpwi rLN,15
311 LOAD_DATA(rD3, 12)
312 xor rD0,rD0,rI0
313 xor rD1,rD1,rI1
314 xor rD2,rD2,rI2
315 xor rD3,rD3,rI3
316 START_KEY(rD0, rD1, rD2, rD3)
317 bl ppc_encrypt_block
318 xor rI0,rD0,rW0
319 SAVE_DATA(rI0, 0)
320 xor rI1,rD1,rW1
321 SAVE_DATA(rI1, 4)
322 xor rI2,rD2,rW2
323 SAVE_DATA(rI2, 8)
324 xor rI3,rD3,rW3
325 SAVE_DATA(rI3, 12)
326 NEXT_BLOCK
327 bt gt,ppc_encrypt_cbc_loop
328 START_IV
329 SAVE_IV(rI0, 0)
330 SAVE_IV(rI1, 4)
331 SAVE_IV(rI2, 8)
332 SAVE_IV(rI3, 12)
333 FINALIZE_CRYPT(4)
334 blr
335
336/*
337 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
338 * u32 rounds, u32 bytes, u8 *iv);
339 *
340 * called from glue layer to decrypt multiple blocks via CBC
341 * round values are AES128 = 4, AES192 = 5, AES256 = 6
342 *
343 */
344_GLOBAL(ppc_decrypt_cbc)
345 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
346 li rT1,15
347 LOAD_IV(rI0, 0)
348 andc rLN,rLN,rT1
349 LOAD_IV(rI1, 4)
350 subi rLN,rLN,16
351 LOAD_IV(rI2, 8)
352 add rSP,rSP,rLN /* reverse processing */
353 LOAD_IV(rI3, 12)
354 add rDP,rDP,rLN
355 LOAD_DATA(rD0, 0)
356 addi rT1,rT0,4096
357 LOAD_DATA(rD1, 4)
358 LOAD_DATA(rD2, 8)
359 LOAD_DATA(rD3, 12)
360 START_IV
361 SAVE_IV(rD0, 0)
362 SAVE_IV(rD1, 4)
363 SAVE_IV(rD2, 8)
364 cmpwi rLN,16
365 SAVE_IV(rD3, 12)
366 bt lt,ppc_decrypt_cbc_end
367ppc_decrypt_cbc_loop:
368 mr rKP,rKS
369 START_KEY(rD0, rD1, rD2, rD3)
370 bl ppc_decrypt_block
371 subi rLN,rLN,16
372 subi rSP,rSP,CBC_DEC
373 xor rW0,rD0,rW0
374 LOAD_DATA(rD0, 0)
375 xor rW1,rD1,rW1
376 LOAD_DATA(rD1, 4)
377 xor rW2,rD2,rW2
378 LOAD_DATA(rD2, 8)
379 xor rW3,rD3,rW3
380 LOAD_DATA(rD3, 12)
381 xor rW0,rW0,rD0
382 SAVE_DATA(rW0, 0)
383 xor rW1,rW1,rD1
384 SAVE_DATA(rW1, 4)
385 xor rW2,rW2,rD2
386 SAVE_DATA(rW2, 8)
387 xor rW3,rW3,rD3
388 SAVE_DATA(rW3, 12)
389 cmpwi rLN,15
390 subi rDP,rDP,CBC_DEC
391 bt gt,ppc_decrypt_cbc_loop
392ppc_decrypt_cbc_end:
393 mr rKP,rKS
394 START_KEY(rD0, rD1, rD2, rD3)
395 bl ppc_decrypt_block
396 xor rW0,rW0,rD0
397 xor rW1,rW1,rD1
398 xor rW2,rW2,rD2
399 xor rW3,rW3,rD3
400 xor rW0,rW0,rI0 /* decrypt with initial IV */
401 SAVE_DATA(rW0, 0)
402 xor rW1,rW1,rI1
403 SAVE_DATA(rW1, 4)
404 xor rW2,rW2,rI2
405 SAVE_DATA(rW2, 8)
406 xor rW3,rW3,rI3
407 SAVE_DATA(rW3, 12)
408 FINALIZE_CRYPT(4)
409 blr
410
411/*
412 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
413 * u32 rounds, u32 bytes, u8 *iv);
414 *
415 * called from glue layer to encrypt/decrypt multiple blocks
416 * via CTR. Number of bytes does not need to be a multiple of
417 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
418 *
419 */
420_GLOBAL(ppc_crypt_ctr)
421 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
422 LOAD_IV(rI0, 0)
423 LOAD_IV(rI1, 4)
424 LOAD_IV(rI2, 8)
425 cmpwi rLN,16
426 LOAD_IV(rI3, 12)
427 START_IV
428 bt lt,ppc_crypt_ctr_partial
429ppc_crypt_ctr_loop:
430 mr rKP,rKS
431 START_KEY(rI0, rI1, rI2, rI3)
432 bl ppc_encrypt_block
433 xor rW0,rD0,rW0
434 xor rW1,rD1,rW1
435 xor rW2,rD2,rW2
436 xor rW3,rD3,rW3
437 LOAD_DATA(rD0, 0)
438 subi rLN,rLN,16
439 LOAD_DATA(rD1, 4)
440 LOAD_DATA(rD2, 8)
441 LOAD_DATA(rD3, 12)
442 xor rD0,rD0,rW0
443 SAVE_DATA(rD0, 0)
444 xor rD1,rD1,rW1
445 SAVE_DATA(rD1, 4)
446 xor rD2,rD2,rW2
447 SAVE_DATA(rD2, 8)
448 xor rD3,rD3,rW3
449 SAVE_DATA(rD3, 12)
450 addic rI3,rI3,1 /* increase counter */
451 addze rI2,rI2
452 addze rI1,rI1
453 addze rI0,rI0
454 NEXT_BLOCK
455 cmpwi rLN,15
456 bt gt,ppc_crypt_ctr_loop
457ppc_crypt_ctr_partial:
458 cmpwi rLN,0
459 bt eq,ppc_crypt_ctr_end
460 mr rKP,rKS
461 START_KEY(rI0, rI1, rI2, rI3)
462 bl ppc_encrypt_block
463 xor rW0,rD0,rW0
464 SAVE_IV(rW0, 0)
465 xor rW1,rD1,rW1
466 SAVE_IV(rW1, 4)
467 xor rW2,rD2,rW2
468 SAVE_IV(rW2, 8)
469 xor rW3,rD3,rW3
470 SAVE_IV(rW3, 12)
471 mtctr rLN
472 subi rIP,rIP,CTR_DEC
473 subi rSP,rSP,1
474 subi rDP,rDP,1
475ppc_crypt_ctr_xorbyte:
476 lbzu rW4,1(rIP) /* bytewise xor for partial block */
477 lbzu rW5,1(rSP)
478 xor rW4,rW4,rW5
479 stbu rW4,1(rDP)
480 bdnz ppc_crypt_ctr_xorbyte
481 subf rIP,rLN,rIP
482 addi rIP,rIP,1
483 addic rI3,rI3,1
484 addze rI2,rI2
485 addze rI1,rI1
486 addze rI0,rI0
487ppc_crypt_ctr_end:
488 SAVE_IV(rI0, 0)
489 SAVE_IV(rI1, 4)
490 SAVE_IV(rI2, 8)
491 SAVE_IV(rI3, 12)
492 FINALIZE_CRYPT(4)
493 blr
494
495/*
496 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
497 * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
498 *
499 * called from glue layer to encrypt multiple blocks via XTS
500 * If key_twk is given, the initial IV encryption will be
501 * processed too. Round values are AES128 = 4, AES192 = 5,
502 * AES256 = 6
503 *
504 */
505_GLOBAL(ppc_encrypt_xts)
506 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
507 LOAD_IV(rI0, 0)
508 LOAD_IV(rI1, 4)
509 LOAD_IV(rI2, 8)
510 cmpwi rKT,0
511 LOAD_IV(rI3, 12)
512 bt eq,ppc_encrypt_xts_notweak
513 mr rKP,rKT
514 START_KEY(rI0, rI1, rI2, rI3)
515 bl ppc_encrypt_block
516 xor rI0,rD0,rW0
517 xor rI1,rD1,rW1
518 xor rI2,rD2,rW2
519 xor rI3,rD3,rW3
520ppc_encrypt_xts_notweak:
521 ENDIAN_SWAP(rG0, rG1, rI0, rI1)
522 ENDIAN_SWAP(rG2, rG3, rI2, rI3)
523ppc_encrypt_xts_loop:
524 LOAD_DATA(rD0, 0)
525 mr rKP,rKS
526 LOAD_DATA(rD1, 4)
527 subi rLN,rLN,16
528 LOAD_DATA(rD2, 8)
529 LOAD_DATA(rD3, 12)
530 xor rD0,rD0,rI0
531 xor rD1,rD1,rI1
532 xor rD2,rD2,rI2
533 xor rD3,rD3,rI3
534 START_KEY(rD0, rD1, rD2, rD3)
535 bl ppc_encrypt_block
536 xor rD0,rD0,rW0
537 xor rD1,rD1,rW1
538 xor rD2,rD2,rW2
539 xor rD3,rD3,rW3
540 xor rD0,rD0,rI0
541 SAVE_DATA(rD0, 0)
542 xor rD1,rD1,rI1
543 SAVE_DATA(rD1, 4)
544 xor rD2,rD2,rI2
545 SAVE_DATA(rD2, 8)
546 xor rD3,rD3,rI3
547 SAVE_DATA(rD3, 12)
548 GF128_MUL(rG0, rG1, rG2, rG3, rW0)
549 ENDIAN_SWAP(rI0, rI1, rG0, rG1)
550 ENDIAN_SWAP(rI2, rI3, rG2, rG3)
551 cmpwi rLN,0
552 NEXT_BLOCK
553 bt gt,ppc_encrypt_xts_loop
554 START_IV
555 SAVE_IV(rI0, 0)
556 SAVE_IV(rI1, 4)
557 SAVE_IV(rI2, 8)
558 SAVE_IV(rI3, 12)
559 FINALIZE_CRYPT(8)
560 blr
561
562/*
563 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
564 * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
565 *
566 * called from glue layer to decrypt multiple blocks via XTS
567 * If key_twk is given, the initial IV encryption will be
568 * processed too. Round values are AES128 = 4, AES192 = 5,
569 * AES256 = 6
570 *
571 */
572_GLOBAL(ppc_decrypt_xts)
573 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
574 LOAD_IV(rI0, 0)
575 addi rT1,rT0,4096
576 LOAD_IV(rI1, 4)
577 LOAD_IV(rI2, 8)
578 cmpwi rKT,0
579 LOAD_IV(rI3, 12)
580 bt eq,ppc_decrypt_xts_notweak
581 subi rT0,rT0,4096
582 mr rKP,rKT
583 START_KEY(rI0, rI1, rI2, rI3)
584 bl ppc_encrypt_block
585 xor rI0,rD0,rW0
586 xor rI1,rD1,rW1
587 xor rI2,rD2,rW2
588 xor rI3,rD3,rW3
589 addi rT0,rT0,4096
590ppc_decrypt_xts_notweak:
591 ENDIAN_SWAP(rG0, rG1, rI0, rI1)
592 ENDIAN_SWAP(rG2, rG3, rI2, rI3)
593ppc_decrypt_xts_loop:
594 LOAD_DATA(rD0, 0)
595 mr rKP,rKS
596 LOAD_DATA(rD1, 4)
597 subi rLN,rLN,16
598 LOAD_DATA(rD2, 8)
599 LOAD_DATA(rD3, 12)
600 xor rD0,rD0,rI0
601 xor rD1,rD1,rI1
602 xor rD2,rD2,rI2
603 xor rD3,rD3,rI3
604 START_KEY(rD0, rD1, rD2, rD3)
605 bl ppc_decrypt_block
606 xor rD0,rD0,rW0
607 xor rD1,rD1,rW1
608 xor rD2,rD2,rW2
609 xor rD3,rD3,rW3
610 xor rD0,rD0,rI0
611 SAVE_DATA(rD0, 0)
612 xor rD1,rD1,rI1
613 SAVE_DATA(rD1, 4)
614 xor rD2,rD2,rI2
615 SAVE_DATA(rD2, 8)
616 xor rD3,rD3,rI3
617 SAVE_DATA(rD3, 12)
618 GF128_MUL(rG0, rG1, rG2, rG3, rW0)
619 ENDIAN_SWAP(rI0, rI1, rG0, rG1)
620 ENDIAN_SWAP(rI2, rI3, rG2, rG3)
621 cmpwi rLN,0
622 NEXT_BLOCK
623 bt gt,ppc_decrypt_xts_loop
624 START_IV
625 SAVE_IV(rI0, 0)
626 SAVE_IV(rI1, 4)
627 SAVE_IV(rI2, 8)
628 SAVE_IV(rI3, 12)
629 FINALIZE_CRYPT(8)
630 blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 000000000000..30d217b399c3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,42 @@
1/*
2 * Common registers for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#define rKS r0 /* copy of en-/decryption key pointer */
14#define rDP r3 /* destination pointer */
15#define rSP r4 /* source pointer */
16#define rKP r5 /* pointer to en-/decryption key pointer */
17#define rRR r6 /* en-/decryption rounds */
18#define rLN r7 /* length of data to be processed */
19#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
20#define rKT r9 /* pointer to tweak key (XTS mode) */
21#define rT0 r11 /* pointers to en-/decrpytion tables */
22#define rT1 r10
23#define rD0 r9 /* data */
24#define rD1 r14
25#define rD2 r12
26#define rD3 r15
27#define rW0 r16 /* working registers */
28#define rW1 r17
29#define rW2 r18
30#define rW3 r19
31#define rW4 r20
32#define rW5 r21
33#define rW6 r22
34#define rW7 r23
35#define rI0 r24 /* IV */
36#define rI1 r25
37#define rI2 r26
38#define rI3 r27
39#define rG0 r28 /* endian reversed tweak (XTS mode) */
40#define rG1 r29
41#define rG2 r30
42#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 000000000000..701e60240dc3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,331 @@
1/*
2 * 4K AES tables for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13/*
14 * These big endian AES encryption/decryption tables have been taken from
15 * crypto/aes_generic.c and are designed to be simply accessed by a combination
16 * of rlwimi/lwz instructions with a minimum of table registers (usually only
17 * one required). Thus they are aligned to 4K. The locality of rotated values
18 * is derived from the reduced offsets that are available in the SPE load
19 * instructions. E.g. evldw, evlwwsplat, ...
20 *
21 * For the safety-conscious it has to be noted that they might be vulnerable
22 * to cache timing attacks because of their size. Nevertheless in contrast to
23 * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
24 * This is a quite good tradeoff for low power devices (e.g. routers) without
25 * dedicated encryption hardware where we usually have no multiuser
26 * environment.
27 *
28 */
29
30#define R(a, b, c, d) \
31 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
32
33.data
34.align 12
35.globl PPC_AES_4K_ENCTAB
36PPC_AES_4K_ENCTAB:
37/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
38 .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
39 .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
40 .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
41 .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
42 .long R(60, 30, 30, 50), R(02, 01, 01, 03)
43 .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
44 .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
45 .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
46 .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
47 .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
48 .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
49 .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
50 .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
51 .long R(5f, a2, a2, fd), R(45, af, af, ea)
52 .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
53 .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
54 .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
55 .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
56 .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
57 .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
58 .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
59 .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
60 .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
61 .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
62 .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
63 .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
64 .long R(30, 18, 18, 28), R(37, 96, 96, a1)
65 .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
66 .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
67 .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
68 .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
69 .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
70 .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
71 .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
72 .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
73 .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
74 .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
75 .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
76 .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
77 .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
78 .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
79 .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
80 .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
81 .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
82 .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
83 .long R(67, be, be, d9), R(72, 39, 39, 4b)
84 .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
85 .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
86 .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
87 .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
88 .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
89 .long R(66, 33, 33, 55), R(11, 85, 85, 94)
90 .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
91 .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
92 .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
93 .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
94 .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
95 .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
96 .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
97 .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
98 .long R(63, bc, bc, df), R(77, b6, b6, c1)
99 .long R(af, da, da, 75), R(42, 21, 21, 63)
100 .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
101 .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
102 .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
103 .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
104 .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
105 .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
106 .long R(93, c4, c4, 57), R(55, a7, a7, f2)
107 .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
108 .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
109 .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
110 .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
111 .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
112 .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
113 .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
114 .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
115 .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
116 .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
117 .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
118 .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
119 .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
120 .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
121 .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
122 .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
123 .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
124 .long R(39, 91, 91, a8), R(31, 95, 95, a4)
125 .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
126 .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
127 .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
128 .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
129 .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
130 .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
131 .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
132 .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
133 .long R(47, ae, ae, e9), R(10, 08, 08, 18)
134 .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
135 .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
136 .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
137 .long R(73, b4, b4, c7), R(97, c6, c6, 51)
138 .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
139 .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
140 .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
141 .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
142 .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
143 .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
144 .long R(90, 48, 48, d8), R(06, 03, 03, 05)
145 .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
146 .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
147 .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
148 .long R(17, 86, 86, 91), R(99, c1, c1, 58)
149 .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
150 .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
151 .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
152 .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
153 .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
154 .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
155 .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
156 .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
157 .long R(50, 28, 28, 78), R(a5, df, df, 7a)
158 .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
159 .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
160 .long R(65, bf, bf, da), R(d7, e6, e6, 31)
161 .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
162 .long R(82, 41, 41, c3), R(29, 99, 99, b0)
163 .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
164 .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
165 .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
166.globl PPC_AES_4K_DECTAB
167PPC_AES_4K_DECTAB:
168/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
169 .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
170 .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
171 .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
172 .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
173 .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
174 .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
175 .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
176 .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
177 .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
178 .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
179 .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
180 .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
181 .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
182 .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
183 .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
184 .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
185 .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
186 .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
187 .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
188 .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
189 .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
190 .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
191 .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
192 .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
193 .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
194 .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
195 .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
196 .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
197 .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
198 .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
199 .long R(30, 28, 87, f2), R(23, bf, a5, b2)
200 .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
201 .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
202 .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
203 .long R(65, da, f4, cd), R(06, 05, be, d5)
204 .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
205 .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
206 .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
207 .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
208 .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
209 .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
210 .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
211 .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
212 .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
213 .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
214 .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
215 .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
216 .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
217 .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
218 .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
219 .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
220 .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
221 .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
222 .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
223 .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
224 .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
225 .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
226 .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
227 .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
228 .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
229 .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
230 .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
231 .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
232 .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
233 .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
234 .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
235 .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
236 .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
237 .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
238 .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
239 .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
240 .long R(13, 97, 22, 40), R(84, c6, 11, 20)
241 .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
242 .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
243 .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
244 .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
245 .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
246 .long R(11, 94, 48, fa), R(47, e9, 64, 22)
247 .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
248 .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
249 .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
250 .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
251 .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
252 .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
253 .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
254 .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
255 .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
256 .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
257 .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
258 .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
259 .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
260 .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
261 .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
262 .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
263 .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
264 .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
265 .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
266 .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
267 .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
268 .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
269 .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
270 .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
271 .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
272 .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
273 .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
274 .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
275 .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
276 .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
277 .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
278 .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
279 .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
280 .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
281 .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
282 .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
283 .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
284 .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
285 .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
286 .long R(18, 14, ce, 79), R(73, c7, 37, bf)
287 .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
288 .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
289 .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
290 .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
291 .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
292 .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
293 .long R(39, a8, 01, 71), R(08, 0c, b3, de)
294 .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
295 .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
296 .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
297.globl PPC_AES_4K_DECTAB2
298PPC_AES_4K_DECTAB2:
299/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
300 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
new file mode 100644
index 000000000000..10cdf5bceebb
--- /dev/null
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -0,0 +1,243 @@
1/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14
15#define rHP r3
16#define rWP r4
17
18#define rH0 r0
19#define rH1 r6
20#define rH2 r7
21#define rH3 r5
22
23#define rW00 r8
24#define rW01 r9
25#define rW02 r10
26#define rW03 r11
27#define rW04 r12
28#define rW05 r14
29#define rW06 r15
30#define rW07 r16
31#define rW08 r17
32#define rW09 r18
33#define rW10 r19
34#define rW11 r20
35#define rW12 r21
36#define rW13 r22
37#define rW14 r23
38#define rW15 r24
39
40#define rT0 r25
41#define rT1 r26
42
43#define INITIALIZE \
44 PPC_STLU r1,-INT_FRAME_SIZE(r1); \
45 SAVE_8GPRS(14, r1); /* push registers onto stack */ \
46 SAVE_4GPRS(22, r1); \
47 SAVE_GPR(26, r1)
48
49#define FINALIZE \
50 REST_8GPRS(14, r1); /* pop registers from stack */ \
51 REST_4GPRS(22, r1); \
52 REST_GPR(26, r1); \
53 addi r1,r1,INT_FRAME_SIZE;
54
55#ifdef __BIG_ENDIAN__
56#define LOAD_DATA(reg, off) \
57 lwbrx reg,0,rWP; /* load data */
58#define INC_PTR \
59 addi rWP,rWP,4; /* increment per word */
60#define NEXT_BLOCK /* nothing to do */
61#else
62#define LOAD_DATA(reg, off) \
63 lwz reg,off(rWP); /* load data */
64#define INC_PTR /* nothing to do */
65#define NEXT_BLOCK \
66 addi rWP,rWP,64; /* increment per block */
67#endif
68
69#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
70 LOAD_DATA(w0, off) /* W */ \
71 and rT0,b,c; /* 1: f = b and c */ \
72 INC_PTR /* ptr++ */ \
73 andc rT1,d,b; /* 1: f' = ~b and d */ \
74 LOAD_DATA(w1, off+4) /* W */ \
75 or rT0,rT0,rT1; /* 1: f = f or f' */ \
76 addi w0,w0,k0l; /* 1: wk = w + k */ \
77 add a,a,rT0; /* 1: a = a + f */ \
78 addis w0,w0,k0h; /* 1: wk = w + k' */ \
79 addis w1,w1,k1h; /* 2: wk = w + k */ \
80 add a,a,w0; /* 1: a = a + wk */ \
81 addi w1,w1,k1l; /* 2: wk = w + k' */ \
82 rotrwi a,a,p; /* 1: a = a rotl x */ \
83 add d,d,w1; /* 2: a = a + wk */ \
84 add a,a,b; /* 1: a = a + b */ \
85 and rT0,a,b; /* 2: f = b and c */ \
86 andc rT1,c,a; /* 2: f' = ~b and d */ \
87 or rT0,rT0,rT1; /* 2: f = f or f' */ \
88 add d,d,rT0; /* 2: a = a + f */ \
89 INC_PTR /* ptr++ */ \
90 rotrwi d,d,q; /* 2: a = a rotl x */ \
91 add d,d,a; /* 2: a = a + b */
92
93#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
94 andc rT0,c,d; /* 1: f = c and ~d */ \
95 and rT1,b,d; /* 1: f' = b and d */ \
96 addi w0,w0,k0l; /* 1: wk = w + k */ \
97 or rT0,rT0,rT1; /* 1: f = f or f' */ \
98 addis w0,w0,k0h; /* 1: wk = w + k' */ \
99 add a,a,rT0; /* 1: a = a + f */ \
100 addi w1,w1,k1l; /* 2: wk = w + k */ \
101 add a,a,w0; /* 1: a = a + wk */ \
102 addis w1,w1,k1h; /* 2: wk = w + k' */ \
103 andc rT0,b,c; /* 2: f = c and ~d */ \
104 rotrwi a,a,p; /* 1: a = a rotl x */ \
105 add a,a,b; /* 1: a = a + b */ \
106 add d,d,w1; /* 2: a = a + wk */ \
107 and rT1,a,c; /* 2: f' = b and d */ \
108 or rT0,rT0,rT1; /* 2: f = f or f' */ \
109 add d,d,rT0; /* 2: a = a + f */ \
110 rotrwi d,d,q; /* 2: a = a rotl x */ \
111 add d,d,a; /* 2: a = a +b */
112
113#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
114 xor rT0,b,c; /* 1: f' = b xor c */ \
115 addi w0,w0,k0l; /* 1: wk = w + k */ \
116 xor rT1,rT0,d; /* 1: f = f xor f' */ \
117 addis w0,w0,k0h; /* 1: wk = w + k' */ \
118 add a,a,rT1; /* 1: a = a + f */ \
119 addi w1,w1,k1l; /* 2: wk = w + k */ \
120 add a,a,w0; /* 1: a = a + wk */ \
121 addis w1,w1,k1h; /* 2: wk = w + k' */ \
122 rotrwi a,a,p; /* 1: a = a rotl x */ \
123 add d,d,w1; /* 2: a = a + wk */ \
124 add a,a,b; /* 1: a = a + b */ \
125 xor rT1,rT0,a; /* 2: f = b xor f' */ \
126 add d,d,rT1; /* 2: a = a + f */ \
127 rotrwi d,d,q; /* 2: a = a rotl x */ \
128 add d,d,a; /* 2: a = a + b */
129
130#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
131 addi w0,w0,k0l; /* 1: w = w + k */ \
132 orc rT0,b,d; /* 1: f = b or ~d */ \
133 addis w0,w0,k0h; /* 1: w = w + k' */ \
134 xor rT0,rT0,c; /* 1: f = f xor c */ \
135 add a,a,w0; /* 1: a = a + wk */ \
136 addi w1,w1,k1l; /* 2: w = w + k */ \
137 add a,a,rT0; /* 1: a = a + f */ \
138 addis w1,w1,k1h; /* 2: w = w + k' */ \
139 rotrwi a,a,p; /* 1: a = a rotl x */ \
140 add a,a,b; /* 1: a = a + b */ \
141 orc rT0,a,c; /* 2: f = b or ~d */ \
142 add d,d,w1; /* 2: a = a + wk */ \
143 xor rT0,rT0,b; /* 2: f = f xor c */ \
144 add d,d,rT0; /* 2: a = a + f */ \
145 rotrwi d,d,q; /* 2: a = a rotl x */ \
146 add d,d,a; /* 2: a = a + b */
147
148_GLOBAL(ppc_md5_transform)
149 INITIALIZE
150
151 mtctr r5
152 lwz rH0,0(rHP)
153 lwz rH1,4(rHP)
154 lwz rH2,8(rHP)
155 lwz rH3,12(rHP)
156
157ppc_md5_main:
158 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
159 0xd76b, -23432, 0xe8c8, -18602)
160 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
161 0x2420, 0x70db, 0xc1be, -12562)
162 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
163 0xf57c, 0x0faf, 0x4788, -14806)
164 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
165 0xa830, 0x4613, 0xfd47, -27391)
166 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
167 0x6981, -26408, 0x8b45, -2129)
168 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
169 0xffff, 0x5bb1, 0x895d, -10306)
170 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
171 0x6b90, 0x1122, 0xfd98, 0x7193)
172 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
173 0xa679, 0x438e, 0x49b4, 0x0821)
174
175 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
176 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
177 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
178 0x9d02, -32109, 0x124c, 0x2332)
179 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
180 0x8ea7, 0x4a33, 0x0245, -18270)
181 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
182 0x8eee, -8608, 0xf258, -5095)
183 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
184 0x969d, -10697, 0x1cbe, -15288)
185 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
186 0x3317, 0x3e99, 0xdbd9, 0x7c15)
187 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
188 0xac4b, 0x7772, 0xd8cf, 0x331d)
189 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
190 0x6a28, 0x6dd8, 0x219a, 0x3b68)
191
192 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
193 0x29cb, 0x28e5, 0x4218, -7788)
194 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
195 0x473f, 0x06d1, 0x3aae, 0x3036)
196 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
197 0xaea1, -15134, 0x640b, -11295)
198 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
199 0x8f4c, 0x4887, 0xbc7c, -22499)
200 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
201 0x7eb8, -27199, 0x00ea, 0x6050)
202 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
203 0xe01a, 0x22fe, 0x4447, 0x69c5)
204 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
205 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
206 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
207 0x4701, -27017, 0xc7bd, -19859)
208
209 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
210 0x0988, -1462, 0x4c70, -19401)
211 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
212 0xadaf, -5221, 0xfc99, 0x66f7)
213 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
214 0x7e80, -16418, 0xba1e, -25587)
215 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
216 0x4130, 0x380d, 0xe0c5, 0x738d)
217 lwz rW00,0(rHP)
218 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
219 0xe837, -30770, 0xde8a, 0x69e8)
220 lwz rW14,4(rHP)
221 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
222 0x9e79, 0x260f, 0x256d, -27941)
223 lwz rW12,8(rHP)
224 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
225 0xab75, -20775, 0x4f9e, -28397)
226 lwz rW10,12(rHP)
227 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
228 0x662b, 0x7c56, 0x11b2, 0x0358)
229
230 add rH0,rH0,rW00
231 stw rH0,0(rHP)
232 add rH1,rH1,rW14
233 stw rH1,4(rHP)
234 add rH2,rH2,rW12
235 stw rH2,8(rHP)
236 add rH3,rH3,rW10
237 stw rH3,12(rHP)
238 NEXT_BLOCK
239
240 bdnz ppc_md5_main
241
242 FINALIZE
243 blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644
index 000000000000..452fb4dc575f
--- /dev/null
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -0,0 +1,165 @@
1/*
2 * Glue code for MD5 implementation for PPC assembler
3 *
4 * Based on generic implementation.
5 *
6 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 */
14
15#include <crypto/internal/hash.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/cryptohash.h>
20#include <linux/types.h>
21#include <crypto/md5.h>
22#include <asm/byteorder.h>
23
24extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
25
26static inline void ppc_md5_clear_context(struct md5_state *sctx)
27{
28 int count = sizeof(struct md5_state) >> 2;
29 u32 *ptr = (u32 *)sctx;
30
31 /* make sure we can clear the fast way */
32 BUILD_BUG_ON(sizeof(struct md5_state) % 4);
33 do { *ptr++ = 0; } while (--count);
34}
35
36static int ppc_md5_init(struct shash_desc *desc)
37{
38 struct md5_state *sctx = shash_desc_ctx(desc);
39
40 sctx->hash[0] = 0x67452301;
41 sctx->hash[1] = 0xefcdab89;
42 sctx->hash[2] = 0x98badcfe;
43 sctx->hash[3] = 0x10325476;
44 sctx->byte_count = 0;
45
46 return 0;
47}
48
49static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
50 unsigned int len)
51{
52 struct md5_state *sctx = shash_desc_ctx(desc);
53 const unsigned int offset = sctx->byte_count & 0x3f;
54 unsigned int avail = 64 - offset;
55 const u8 *src = data;
56
57 sctx->byte_count += len;
58
59 if (avail > len) {
60 memcpy((char *)sctx->block + offset, src, len);
61 return 0;
62 }
63
64 if (offset) {
65 memcpy((char *)sctx->block + offset, src, avail);
66 ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
67 len -= avail;
68 src += avail;
69 }
70
71 if (len > 63) {
72 ppc_md5_transform(sctx->hash, src, len >> 6);
73 src += len & ~0x3f;
74 len &= 0x3f;
75 }
76
77 memcpy((char *)sctx->block, src, len);
78 return 0;
79}
80
81static int ppc_md5_final(struct shash_desc *desc, u8 *out)
82{
83 struct md5_state *sctx = shash_desc_ctx(desc);
84 const unsigned int offset = sctx->byte_count & 0x3f;
85 const u8 *src = (const u8 *)sctx->block;
86 u8 *p = (u8 *)src + offset;
87 int padlen = 55 - offset;
88 __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
89 __le32 *dst = (__le32 *)out;
90
91 *p++ = 0x80;
92
93 if (padlen < 0) {
94 memset(p, 0x00, padlen + sizeof (u64));
95 ppc_md5_transform(sctx->hash, src, 1);
96 p = (char *)sctx->block;
97 padlen = 56;
98 }
99
100 memset(p, 0, padlen);
101 *pbits = cpu_to_le64(sctx->byte_count << 3);
102 ppc_md5_transform(sctx->hash, src, 1);
103
104 dst[0] = cpu_to_le32(sctx->hash[0]);
105 dst[1] = cpu_to_le32(sctx->hash[1]);
106 dst[2] = cpu_to_le32(sctx->hash[2]);
107 dst[3] = cpu_to_le32(sctx->hash[3]);
108
109 ppc_md5_clear_context(sctx);
110 return 0;
111}
112
113static int ppc_md5_export(struct shash_desc *desc, void *out)
114{
115 struct md5_state *sctx = shash_desc_ctx(desc);
116
117 memcpy(out, sctx, sizeof(*sctx));
118 return 0;
119}
120
121static int ppc_md5_import(struct shash_desc *desc, const void *in)
122{
123 struct md5_state *sctx = shash_desc_ctx(desc);
124
125 memcpy(sctx, in, sizeof(*sctx));
126 return 0;
127}
128
129static struct shash_alg alg = {
130 .digestsize = MD5_DIGEST_SIZE,
131 .init = ppc_md5_init,
132 .update = ppc_md5_update,
133 .final = ppc_md5_final,
134 .export = ppc_md5_export,
135 .import = ppc_md5_import,
136 .descsize = sizeof(struct md5_state),
137 .statesize = sizeof(struct md5_state),
138 .base = {
139 .cra_name = "md5",
140 .cra_driver_name= "md5-ppc",
141 .cra_priority = 200,
142 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
143 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
144 .cra_module = THIS_MODULE,
145 }
146};
147
148static int __init ppc_md5_mod_init(void)
149{
150 return crypto_register_shash(&alg);
151}
152
153static void __exit ppc_md5_mod_fini(void)
154{
155 crypto_unregister_shash(&alg);
156}
157
158module_init(ppc_md5_mod_init);
159module_exit(ppc_md5_mod_fini);
160
161MODULE_LICENSE("GPL");
162MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
163
164MODULE_ALIAS_CRYPTO("md5");
165MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644
index 000000000000..fcb6cf002889
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-asm.S
@@ -0,0 +1,299 @@
1/*
2 * Fast SHA-1 implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20
21#define rHP r3 /* pointer to hash value */
22#define rWP r4 /* pointer to input */
23#define rKP r5 /* pointer to constants */
24
25#define rW0 r14 /* 64 bit round words */
26#define rW1 r15
27#define rW2 r16
28#define rW3 r17
29#define rW4 r18
30#define rW5 r19
31#define rW6 r20
32#define rW7 r21
33
34#define rH0 r6 /* 32 bit hash values */
35#define rH1 r7
36#define rH2 r8
37#define rH3 r9
38#define rH4 r10
39
40#define rT0 r22 /* 64 bit temporary */
41#define rT1 r0 /* 32 bit temporaries */
42#define rT2 r11
43#define rT3 r12
44
45#define rK r23 /* 64 bit constant in volatile register */
46
47#define LOAD_K01
48
49#define LOAD_K11 \
50 evlwwsplat rK,0(rKP);
51
52#define LOAD_K21 \
53 evlwwsplat rK,4(rKP);
54
55#define LOAD_K31 \
56 evlwwsplat rK,8(rKP);
57
58#define LOAD_K41 \
59 evlwwsplat rK,12(rKP);
60
61#define INITIALIZE \
62 stwu r1,-128(r1); /* create stack frame */ \
63 evstdw r14,8(r1); /* We must save non volatile */ \
64 evstdw r15,16(r1); /* registers. Take the chance */ \
65 evstdw r16,24(r1); /* and save the SPE part too */ \
66 evstdw r17,32(r1); \
67 evstdw r18,40(r1); \
68 evstdw r19,48(r1); \
69 evstdw r20,56(r1); \
70 evstdw r21,64(r1); \
71 evstdw r22,72(r1); \
72 evstdw r23,80(r1);
73
74
75#define FINALIZE \
76 evldw r14,8(r1); /* restore SPE registers */ \
77 evldw r15,16(r1); \
78 evldw r16,24(r1); \
79 evldw r17,32(r1); \
80 evldw r18,40(r1); \
81 evldw r19,48(r1); \
82 evldw r20,56(r1); \
83 evldw r21,64(r1); \
84 evldw r22,72(r1); \
85 evldw r23,80(r1); \
86 xor r0,r0,r0; \
87 stw r0,8(r1); /* Delete sensitive data */ \
88 stw r0,16(r1); /* that we might have pushed */ \
89 stw r0,24(r1); /* from other context that runs */ \
90 stw r0,32(r1); /* the same code. Assume that */ \
91 stw r0,40(r1); /* the lower part of the GPRs */ \
92 stw r0,48(r1); /* were already overwritten on */ \
93 stw r0,56(r1); /* the way down to here */ \
94 stw r0,64(r1); \
95 stw r0,72(r1); \
96 stw r0,80(r1); \
97 addi r1,r1,128; /* cleanup stack frame */
98
99#ifdef __BIG_ENDIAN__
100#define LOAD_DATA(reg, off) \
101 lwz reg,off(rWP); /* load data */
102#define NEXT_BLOCK \
103 addi rWP,rWP,64; /* increment per block */
104#else
105#define LOAD_DATA(reg, off) \
106 lwbrx reg,0,rWP; /* load data */ \
107 addi rWP,rWP,4; /* increment per word */
108#define NEXT_BLOCK /* nothing to do */
109#endif
110
111#define R_00_15(a, b, c, d, e, w0, w1, k, off) \
112 LOAD_DATA(w0, off) /* 1: W */ \
113 and rT2,b,c; /* 1: F' = B and C */ \
114 LOAD_K##k##1 \
115 andc rT1,d,b; /* 1: F" = ~B and D */ \
116 rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \
117 or rT2,rT2,rT1; /* 1: F = F' or F" */ \
118 add e,e,rT0; /* 1: E = E + A' */ \
119 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
120 add e,e,w0; /* 1: E = E + W */ \
121 LOAD_DATA(w1, off+4) /* 2: W */ \
122 add e,e,rT2; /* 1: E = E + F */ \
123 and rT1,a,b; /* 2: F' = B and C */ \
124 add e,e,rK; /* 1: E = E + K */ \
125 andc rT2,c,a; /* 2: F" = ~B and D */ \
126 add d,d,rK; /* 2: E = E + K */ \
127 or rT2,rT2,rT1; /* 2: F = F' or F" */ \
128 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
129 add d,d,w1; /* 2: E = E + W */ \
130 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
131 add d,d,rT0; /* 2: E = E + A' */ \
132 evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \
133 add d,d,rT2 /* 2: E = E + F */
134
135#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
136 and rT2,b,c; /* 1: F' = B and C */ \
137 evmergelohi rT0,w7,w6; /* W[-3] */ \
138 andc rT1,d,b; /* 1: F" = ~B and D */ \
139 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
140 or rT1,rT1,rT2; /* 1: F = F' or F" */ \
141 evxor w0,w0,w4; /* W = W xor W[-8] */ \
142 add e,e,rT1; /* 1: E = E + F */ \
143 evxor w0,w0,w1; /* W = W xor W[-14] */ \
144 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
145 evrlwi w0,w0,1; /* W = W rotl 1 */ \
146 add e,e,rT2; /* 1: E = E + A' */ \
147 evaddw rT0,w0,rK; /* WK = W + K */ \
148 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
149 LOAD_K##k##1 \
150 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
151 add e,e,rT0; /* 1: E = E + WK */ \
152 add d,d,rT1; /* 2: E = E + WK */ \
153 and rT2,a,b; /* 2: F' = B and C */ \
154 andc rT1,c,a; /* 2: F" = ~B and D */ \
155 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
156 or rT1,rT1,rT2; /* 2: F = F' or F" */ \
157 add d,d,rT0; /* 2: E = E + A' */ \
158 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
159 add d,d,rT1 /* 2: E = E + F */
160
161#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
162 evmergelohi rT0,w7,w6; /* W[-3] */ \
163 xor rT2,b,c; /* 1: F' = B xor C */ \
164 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
165 xor rT2,rT2,d; /* 1: F = F' xor D */ \
166 evxor w0,w0,w4; /* W = W xor W[-8] */ \
167 add e,e,rT2; /* 1: E = E + F */ \
168 evxor w0,w0,w1; /* W = W xor W[-14] */ \
169 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
170 evrlwi w0,w0,1; /* W = W rotl 1 */ \
171 add e,e,rT2; /* 1: E = E + A' */ \
172 evaddw rT0,w0,rK; /* WK = W + K */ \
173 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
174 LOAD_K##k##1 \
175 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
176 add e,e,rT0; /* 1: E = E + WK */ \
177 xor rT2,a,b; /* 2: F' = B xor C */ \
178 add d,d,rT1; /* 2: E = E + WK */ \
179 xor rT2,rT2,c; /* 2: F = F' xor D */ \
180 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
181 add d,d,rT2; /* 2: E = E + F */ \
182 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
183 add d,d,rT0 /* 2: E = E + A' */
184
185#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
186 and rT2,b,c; /* 1: F' = B and C */ \
187 evmergelohi rT0,w7,w6; /* W[-3] */ \
188 or rT1,b,c; /* 1: F" = B or C */ \
189 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
190 and rT1,d,rT1; /* 1: F" = F" and D */ \
191 evxor w0,w0,w4; /* W = W xor W[-8] */ \
192 or rT2,rT2,rT1; /* 1: F = F' or F" */ \
193 evxor w0,w0,w1; /* W = W xor W[-14] */ \
194 add e,e,rT2; /* 1: E = E + F */ \
195 evrlwi w0,w0,1; /* W = W rotl 1 */ \
196 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
197 evaddw rT0,w0,rK; /* WK = W + K */ \
198 add e,e,rT2; /* 1: E = E + A' */ \
199 LOAD_K##k##1 \
200 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
201 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
202 add e,e,rT0; /* 1: E = E + WK */ \
203 and rT2,a,b; /* 2: F' = B and C */ \
204 or rT0,a,b; /* 2: F" = B or C */ \
205 add d,d,rT1; /* 2: E = E + WK */ \
206 and rT0,c,rT0; /* 2: F" = F" and D */ \
207 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
208 or rT2,rT2,rT0; /* 2: F = F' or F" */ \
209 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
210 add d,d,rT2; /* 2: E = E + F */ \
211 add d,d,rT0 /* 2: E = E + A' */
212
213#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
214 R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
215
216_GLOBAL(ppc_spe_sha1_transform)
217 INITIALIZE
218
219 lwz rH0,0(rHP)
220 lwz rH1,4(rHP)
221 mtctr r5
222 lwz rH2,8(rHP)
223 lis rKP,PPC_SPE_SHA1_K@h
224 lwz rH3,12(rHP)
225 ori rKP,rKP,PPC_SPE_SHA1_K@l
226 lwz rH4,16(rHP)
227
228ppc_spe_sha1_main:
229 R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
230 R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
231 R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
232 R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
233 R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
234 R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
235 R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
236 R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
237
238 R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
239 R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
240
241 R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
242 R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
243 R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
244 R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
245 R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
246 R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
247 R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
248 R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
249 R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
250 R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
251
252 R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
253 R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
254 R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
255 R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
256 R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
257 R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
258 R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
259 R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
260 R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
261 R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
262
263 R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
264 R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
265 R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
266 R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
267 R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
268 R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
269 R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
270 lwz rT3,0(rHP)
271 R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
272 lwz rW1,4(rHP)
273 R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
274 lwz rW2,8(rHP)
275 R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
276 lwz rW3,12(rHP)
277 NEXT_BLOCK
278 lwz rW4,16(rHP)
279
280 add rH0,rH0,rT3
281 stw rH0,0(rHP)
282 add rH1,rH1,rW1
283 stw rH1,4(rHP)
284 add rH2,rH2,rW2
285 stw rH2,8(rHP)
286 add rH3,rH3,rW3
287 stw rH3,12(rHP)
288 add rH4,rH4,rW4
289 stw rH4,16(rHP)
290
291 bdnz ppc_spe_sha1_main
292
293 FINALIZE
294 blr
295
296.data
297.align 4
298PPC_SPE_SHA1_K:
299 .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644
index 000000000000..3e1d22212521
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -0,0 +1,210 @@
1/*
2 * Glue code for SHA-1 implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation.
5 *
6 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 */
14
15#include <crypto/internal/hash.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/cryptohash.h>
20#include <linux/types.h>
21#include <crypto/sha.h>
22#include <asm/byteorder.h>
23#include <asm/switch_to.h>
24#include <linux/hardirq.h>
25
26/*
27 * MAX_BYTES defines the number of bytes that are allowed to be processed
28 * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
29 * operations per 64 bytes. e500 cores can issue two arithmetic instructions
30 * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
31 * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
32 * Headroom for cache misses included. Even with the low end model clocked
33 * at 667 MHz this equals to a critical time window of less than 27us.
34 *
35 */
36#define MAX_BYTES 2048
37
38extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
39
40static void spe_begin(void)
41{
42 /* We just start SPE operations and will save SPE registers later. */
43 preempt_disable();
44 enable_kernel_spe();
45}
46
47static void spe_end(void)
48{
49 /* reenable preemption */
50 preempt_enable();
51}
52
53static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
54{
55 int count = sizeof(struct sha1_state) >> 2;
56 u32 *ptr = (u32 *)sctx;
57
58 /* make sure we can clear the fast way */
59 BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
60 do { *ptr++ = 0; } while (--count);
61}
62
63static int ppc_spe_sha1_init(struct shash_desc *desc)
64{
65 struct sha1_state *sctx = shash_desc_ctx(desc);
66
67 sctx->state[0] = SHA1_H0;
68 sctx->state[1] = SHA1_H1;
69 sctx->state[2] = SHA1_H2;
70 sctx->state[3] = SHA1_H3;
71 sctx->state[4] = SHA1_H4;
72 sctx->count = 0;
73
74 return 0;
75}
76
77static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
78 unsigned int len)
79{
80 struct sha1_state *sctx = shash_desc_ctx(desc);
81 const unsigned int offset = sctx->count & 0x3f;
82 const unsigned int avail = 64 - offset;
83 unsigned int bytes;
84 const u8 *src = data;
85
86 if (avail > len) {
87 sctx->count += len;
88 memcpy((char *)sctx->buffer + offset, src, len);
89 return 0;
90 }
91
92 sctx->count += len;
93
94 if (offset) {
95 memcpy((char *)sctx->buffer + offset, src, avail);
96
97 spe_begin();
98 ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
99 spe_end();
100
101 len -= avail;
102 src += avail;
103 }
104
105 while (len > 63) {
106 bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
107 bytes = bytes & ~0x3f;
108
109 spe_begin();
110 ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
111 spe_end();
112
113 src += bytes;
114 len -= bytes;
115 };
116
117 memcpy((char *)sctx->buffer, src, len);
118 return 0;
119}
120
121static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
122{
123 struct sha1_state *sctx = shash_desc_ctx(desc);
124 const unsigned int offset = sctx->count & 0x3f;
125 char *p = (char *)sctx->buffer + offset;
126 int padlen;
127 __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
128 __be32 *dst = (__be32 *)out;
129
130 padlen = 55 - offset;
131 *p++ = 0x80;
132
133 spe_begin();
134
135 if (padlen < 0) {
136 memset(p, 0x00, padlen + sizeof (u64));
137 ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
138 p = (char *)sctx->buffer;
139 padlen = 56;
140 }
141
142 memset(p, 0, padlen);
143 *pbits = cpu_to_be64(sctx->count << 3);
144 ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
145
146 spe_end();
147
148 dst[0] = cpu_to_be32(sctx->state[0]);
149 dst[1] = cpu_to_be32(sctx->state[1]);
150 dst[2] = cpu_to_be32(sctx->state[2]);
151 dst[3] = cpu_to_be32(sctx->state[3]);
152 dst[4] = cpu_to_be32(sctx->state[4]);
153
154 ppc_sha1_clear_context(sctx);
155 return 0;
156}
157
158static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
159{
160 struct sha1_state *sctx = shash_desc_ctx(desc);
161
162 memcpy(out, sctx, sizeof(*sctx));
163 return 0;
164}
165
166static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
167{
168 struct sha1_state *sctx = shash_desc_ctx(desc);
169
170 memcpy(sctx, in, sizeof(*sctx));
171 return 0;
172}
173
174static struct shash_alg alg = {
175 .digestsize = SHA1_DIGEST_SIZE,
176 .init = ppc_spe_sha1_init,
177 .update = ppc_spe_sha1_update,
178 .final = ppc_spe_sha1_final,
179 .export = ppc_spe_sha1_export,
180 .import = ppc_spe_sha1_import,
181 .descsize = sizeof(struct sha1_state),
182 .statesize = sizeof(struct sha1_state),
183 .base = {
184 .cra_name = "sha1",
185 .cra_driver_name= "sha1-ppc-spe",
186 .cra_priority = 300,
187 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
188 .cra_blocksize = SHA1_BLOCK_SIZE,
189 .cra_module = THIS_MODULE,
190 }
191};
192
193static int __init ppc_spe_sha1_mod_init(void)
194{
195 return crypto_register_shash(&alg);
196}
197
198static void __exit ppc_spe_sha1_mod_fini(void)
199{
200 crypto_unregister_shash(&alg);
201}
202
203module_init(ppc_spe_sha1_mod_init);
204module_exit(ppc_spe_sha1_mod_fini);
205
206MODULE_LICENSE("GPL");
207MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
208
209MODULE_ALIAS_CRYPTO("sha1");
210MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
new file mode 100644
index 000000000000..2d10e4c08f03
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-asm.S
@@ -0,0 +1,323 @@
1/*
2 * Fast SHA-256 implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20
21#define rHP r3 /* pointer to hash values in memory */
22#define rKP r24 /* pointer to round constants */
23#define rWP r4 /* pointer to input data */
24
25#define rH0 r5 /* 8 32 bit hash values in 8 registers */
26#define rH1 r6
27#define rH2 r7
28#define rH3 r8
29#define rH4 r9
30#define rH5 r10
31#define rH6 r11
32#define rH7 r12
33
34#define rW0 r14 /* 64 bit registers. 16 words in 8 registers */
35#define rW1 r15
36#define rW2 r16
37#define rW3 r17
38#define rW4 r18
39#define rW5 r19
40#define rW6 r20
41#define rW7 r21
42
43#define rT0 r22 /* 64 bit temporaries */
44#define rT1 r23
45#define rT2 r0 /* 32 bit temporaries */
46#define rT3 r25
47
48#define CMP_KN_LOOP
49#define CMP_KC_LOOP \
50 cmpwi rT1,0;
51
52#define INITIALIZE \
53 stwu r1,-128(r1); /* create stack frame */ \
54 evstdw r14,8(r1); /* We must save non volatile */ \
55 evstdw r15,16(r1); /* registers. Take the chance */ \
56 evstdw r16,24(r1); /* and save the SPE part too */ \
57 evstdw r17,32(r1); \
58 evstdw r18,40(r1); \
59 evstdw r19,48(r1); \
60 evstdw r20,56(r1); \
61 evstdw r21,64(r1); \
62 evstdw r22,72(r1); \
63 evstdw r23,80(r1); \
64 stw r24,88(r1); /* save normal registers */ \
65 stw r25,92(r1);
66
67
68#define FINALIZE \
69 evldw r14,8(r1); /* restore SPE registers */ \
70 evldw r15,16(r1); \
71 evldw r16,24(r1); \
72 evldw r17,32(r1); \
73 evldw r18,40(r1); \
74 evldw r19,48(r1); \
75 evldw r20,56(r1); \
76 evldw r21,64(r1); \
77 evldw r22,72(r1); \
78 evldw r23,80(r1); \
79 lwz r24,88(r1); /* restore normal registers */ \
80 lwz r25,92(r1); \
81 xor r0,r0,r0; \
82 stw r0,8(r1); /* Delete sensitive data */ \
83 stw r0,16(r1); /* that we might have pushed */ \
84 stw r0,24(r1); /* from other context that runs */ \
85 stw r0,32(r1); /* the same code. Assume that */ \
86 stw r0,40(r1); /* the lower part of the GPRs */ \
87 stw r0,48(r1); /* was already overwritten on */ \
88 stw r0,56(r1); /* the way down to here */ \
89 stw r0,64(r1); \
90 stw r0,72(r1); \
91 stw r0,80(r1); \
92 addi r1,r1,128; /* cleanup stack frame */
93
94#ifdef __BIG_ENDIAN__
95#define LOAD_DATA(reg, off) \
96 lwz reg,off(rWP); /* load data */
97#define NEXT_BLOCK \
98 addi rWP,rWP,64; /* increment per block */
99#else
100#define LOAD_DATA(reg, off) \
101 lwbrx reg,0,rWP; /* load data */ \
102 addi rWP,rWP,4; /* increment per word */
103#define NEXT_BLOCK /* nothing to do */
104#endif
105
106#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
107 LOAD_DATA(w, off) /* 1: W */ \
108 rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \
109 rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \
110 rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \
111 xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \
112 and rT3,e,f; /* 1: ch = e and f */ \
113 xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \
114 andc rT1,g,e; /* 1: ch' = ~e and g */ \
115 lwz rT2,off(rKP); /* 1: K */ \
116 xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \
117 add h,h,rT0; /* 1: temp1 = h + S1 */ \
118 add rT3,rT3,w; /* 1: temp1' = ch + w */ \
119 rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \
120 add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \
121 rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \
122 add h,h,rT2; /* 1: temp1 = temp1 + K */ \
123 rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \
124 xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \
125 add d,d,h; /* 1: d = d + temp1 */ \
126 xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \
127 evmergelo w,w,w; /* shift W */ \
128 or rT2,a,b; /* 1: maj = a or b */ \
129 and rT1,a,b; /* 1: maj' = a and b */ \
130 and rT2,rT2,c; /* 1: maj = maj and c */ \
131 LOAD_DATA(w, off+4) /* 2: W */ \
132 or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \
133 rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
134 add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \
135 rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
136 add h,h,rT3; /* 1: h = temp1 + temp2 */ \
137 rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \
138 xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
139 and rT3,d,e; /* 2: ch = e and f */ \
140 xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
141 andc rT1,f,d; /* 2: ch' = ~e and g */ \
142 lwz rT2,off+4(rKP); /* 2: K */ \
143 xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
144 add g,g,rT0; /* 2: temp1 = h + S1 */ \
145 add rT3,rT3,w; /* 2: temp1' = ch + w */ \
146 rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
147 add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \
148 rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
149 add g,g,rT2; /* 2: temp1 = temp1 + K */ \
150 rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
151 xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
152 or rT2,h,a; /* 2: maj = a or b */ \
153 xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
154 and rT1,h,a; /* 2: maj' = a and b */ \
155 and rT2,rT2,b; /* 2: maj = maj and c */ \
156 add c,c,g; /* 2: d = d + temp1 */ \
157 or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
158 add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
159 add g,g,rT3 /* 2: h = temp1 + temp2 */
160
161#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
162 rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \
163 evmergelohi rT0,w0,w1; /* w[-15] */ \
164 rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \
165 evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \
166 xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
167 evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \
168 rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \
169 evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \
170 xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
171 evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \
172 add h,h,rT2; /* 1: temp1 = h + S1 */ \
173 evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \
174 and rT2,e,f; /* 1: ch = e and f */ \
175 evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \
176 andc rT3,g,e; /* 1: ch' = ~e and g */ \
177 evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \
178 xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \
179 evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \
180 add h,h,rT2; /* 1: temp1 = temp1 + ch */ \
181 evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
182 rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \
183 evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \
184 rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \
185 evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
186 xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
187 evldw rT1,off(rKP); /* k */ \
188 rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \
189 evaddw w0,w0,rT0; /* w = w + s1 */ \
190 xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
191 evmergelohi rT0,w4,w5; /* w[-7] */ \
192 and rT3,a,b; /* 1: maj = a and b */ \
193 evaddw w0,w0,rT0; /* w = w + w[-7] */ \
194 CMP_K##k##_LOOP \
195 add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \
196 evaddw rT1,rT1,w0; /* wk = w + k */ \
197 xor rT3,a,b; /* 1: maj = a xor b */ \
198 evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \
199 and rT3,rT3,c; /* 1: maj = maj and c */ \
200 add h,h,rT0; /* 1: temp1 = temp1 + wk */ \
201 add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \
202 add g,g,rT1; /* 2: temp1 = temp1 + wk */ \
203 add d,d,h; /* 1: d = d + temp1 */ \
204 rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
205 add h,h,rT2; /* 1: h = temp1 + temp2 */ \
206 rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
207 rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \
208 xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
209 and rT3,d,e; /* 2: ch = e and f */ \
210 xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
211 andc rT1,f,d; /* 2: ch' = ~e and g */ \
212 add g,g,rT0; /* 2: temp1 = h + S1 */ \
213 xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
214 rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
215 add g,g,rT3; /* 2: temp1 = temp1 + ch */ \
216 rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
217 rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
218 xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
219 or rT2,h,a; /* 2: maj = a or b */ \
220 and rT1,h,a; /* 2: maj' = a and b */ \
221 and rT2,rT2,b; /* 2: maj = maj and c */ \
222 xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
223 or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
224 add c,c,g; /* 2: d = d + temp1 */ \
225 add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
226 add g,g,rT3 /* 2: h = temp1 + temp2 */
227
228_GLOBAL(ppc_spe_sha256_transform)
229 INITIALIZE
230
231 mtctr r5
232 lwz rH0,0(rHP)
233 lwz rH1,4(rHP)
234 lwz rH2,8(rHP)
235 lwz rH3,12(rHP)
236 lwz rH4,16(rHP)
237 lwz rH5,20(rHP)
238 lwz rH6,24(rHP)
239 lwz rH7,28(rHP)
240
241ppc_spe_sha256_main:
242 lis rKP,PPC_SPE_SHA256_K@ha
243 addi rKP,rKP,PPC_SPE_SHA256_K@l
244
245 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
246 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
247 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
248 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
249 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
250 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
251 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
252 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
253ppc_spe_sha256_16_rounds:
254 addi rKP,rKP,64
255 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
256 rW0, rW1, rW4, rW5, rW7, N, 0)
257 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
258 rW1, rW2, rW5, rW6, rW0, N, 8)
259 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
260 rW2, rW3, rW6, rW7, rW1, N, 16)
261 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
262 rW3, rW4, rW7, rW0, rW2, N, 24)
263 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
264 rW4, rW5, rW0, rW1, rW3, N, 32)
265 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
266 rW5, rW6, rW1, rW2, rW4, N, 40)
267 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
268 rW6, rW7, rW2, rW3, rW5, N, 48)
269 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
270 rW7, rW0, rW3, rW4, rW6, C, 56)
271 bt gt,ppc_spe_sha256_16_rounds
272
273 lwz rW0,0(rHP)
274 NEXT_BLOCK
275 lwz rW1,4(rHP)
276 lwz rW2,8(rHP)
277 lwz rW3,12(rHP)
278 lwz rW4,16(rHP)
279 lwz rW5,20(rHP)
280 lwz rW6,24(rHP)
281 lwz rW7,28(rHP)
282
283 add rH0,rH0,rW0
284 stw rH0,0(rHP)
285 add rH1,rH1,rW1
286 stw rH1,4(rHP)
287 add rH2,rH2,rW2
288 stw rH2,8(rHP)
289 add rH3,rH3,rW3
290 stw rH3,12(rHP)
291 add rH4,rH4,rW4
292 stw rH4,16(rHP)
293 add rH5,rH5,rW5
294 stw rH5,20(rHP)
295 add rH6,rH6,rW6
296 stw rH6,24(rHP)
297 add rH7,rH7,rW7
298 stw rH7,28(rHP)
299
300 bdnz ppc_spe_sha256_main
301
302 FINALIZE
303 blr
304
305.data
306.align 5
307PPC_SPE_SHA256_K:
308 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
309 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
310 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
311 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
312 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
313 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
314 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
315 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
316 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
317 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
318 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
319 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
320 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
321 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
322 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
323 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644
index 000000000000..f4a616fe1a82
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -0,0 +1,275 @@
1/*
2 * Glue code for SHA-256 implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation. The assembler module takes care
5 * about the SPE registers so it can run from interrupt context.
6 *
7 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */
15
16#include <crypto/internal/hash.h>
17#include <linux/init.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/cryptohash.h>
21#include <linux/types.h>
22#include <crypto/sha.h>
23#include <asm/byteorder.h>
24#include <asm/switch_to.h>
25#include <linux/hardirq.h>
26
27/*
28 * MAX_BYTES defines the number of bytes that are allowed to be processed
29 * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
30 * operations per 64 bytes. e500 cores can issue two arithmetic instructions
31 * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
32 * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
33 * Headroom for cache misses included. Even with the low end model clocked
34 * at 667 MHz this equals to a critical time window of less than 27us.
35 *
36 */
37#define MAX_BYTES 1024
38
39extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
40
41static void spe_begin(void)
42{
43 /* We just start SPE operations and will save SPE registers later. */
44 preempt_disable();
45 enable_kernel_spe();
46}
47
48static void spe_end(void)
49{
50 /* reenable preemption */
51 preempt_enable();
52}
53
54static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
55{
56 int count = sizeof(struct sha256_state) >> 2;
57 u32 *ptr = (u32 *)sctx;
58
59 /* make sure we can clear the fast way */
60 BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
61 do { *ptr++ = 0; } while (--count);
62}
63
64static int ppc_spe_sha256_init(struct shash_desc *desc)
65{
66 struct sha256_state *sctx = shash_desc_ctx(desc);
67
68 sctx->state[0] = SHA256_H0;
69 sctx->state[1] = SHA256_H1;
70 sctx->state[2] = SHA256_H2;
71 sctx->state[3] = SHA256_H3;
72 sctx->state[4] = SHA256_H4;
73 sctx->state[5] = SHA256_H5;
74 sctx->state[6] = SHA256_H6;
75 sctx->state[7] = SHA256_H7;
76 sctx->count = 0;
77
78 return 0;
79}
80
81static int ppc_spe_sha224_init(struct shash_desc *desc)
82{
83 struct sha256_state *sctx = shash_desc_ctx(desc);
84
85 sctx->state[0] = SHA224_H0;
86 sctx->state[1] = SHA224_H1;
87 sctx->state[2] = SHA224_H2;
88 sctx->state[3] = SHA224_H3;
89 sctx->state[4] = SHA224_H4;
90 sctx->state[5] = SHA224_H5;
91 sctx->state[6] = SHA224_H6;
92 sctx->state[7] = SHA224_H7;
93 sctx->count = 0;
94
95 return 0;
96}
97
98static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
99 unsigned int len)
100{
101 struct sha256_state *sctx = shash_desc_ctx(desc);
102 const unsigned int offset = sctx->count & 0x3f;
103 const unsigned int avail = 64 - offset;
104 unsigned int bytes;
105 const u8 *src = data;
106
107 if (avail > len) {
108 sctx->count += len;
109 memcpy((char *)sctx->buf + offset, src, len);
110 return 0;
111 }
112
113 sctx->count += len;
114
115 if (offset) {
116 memcpy((char *)sctx->buf + offset, src, avail);
117
118 spe_begin();
119 ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
120 spe_end();
121
122 len -= avail;
123 src += avail;
124 }
125
126 while (len > 63) {
127 /* cut input data into smaller blocks */
128 bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
129 bytes = bytes & ~0x3f;
130
131 spe_begin();
132 ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
133 spe_end();
134
135 src += bytes;
136 len -= bytes;
137 };
138
139 memcpy((char *)sctx->buf, src, len);
140 return 0;
141}
142
143static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
144{
145 struct sha256_state *sctx = shash_desc_ctx(desc);
146 const unsigned int offset = sctx->count & 0x3f;
147 char *p = (char *)sctx->buf + offset;
148 int padlen;
149 __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
150 __be32 *dst = (__be32 *)out;
151
152 padlen = 55 - offset;
153 *p++ = 0x80;
154
155 spe_begin();
156
157 if (padlen < 0) {
158 memset(p, 0x00, padlen + sizeof (u64));
159 ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
160 p = (char *)sctx->buf;
161 padlen = 56;
162 }
163
164 memset(p, 0, padlen);
165 *pbits = cpu_to_be64(sctx->count << 3);
166 ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
167
168 spe_end();
169
170 dst[0] = cpu_to_be32(sctx->state[0]);
171 dst[1] = cpu_to_be32(sctx->state[1]);
172 dst[2] = cpu_to_be32(sctx->state[2]);
173 dst[3] = cpu_to_be32(sctx->state[3]);
174 dst[4] = cpu_to_be32(sctx->state[4]);
175 dst[5] = cpu_to_be32(sctx->state[5]);
176 dst[6] = cpu_to_be32(sctx->state[6]);
177 dst[7] = cpu_to_be32(sctx->state[7]);
178
179 ppc_sha256_clear_context(sctx);
180 return 0;
181}
182
183static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
184{
185 u32 D[SHA256_DIGEST_SIZE >> 2];
186 __be32 *dst = (__be32 *)out;
187
188 ppc_spe_sha256_final(desc, (u8 *)D);
189
190 /* avoid bytewise memcpy */
191 dst[0] = D[0];
192 dst[1] = D[1];
193 dst[2] = D[2];
194 dst[3] = D[3];
195 dst[4] = D[4];
196 dst[5] = D[5];
197 dst[6] = D[6];
198
199 /* clear sensitive data */
200 memzero_explicit(D, SHA256_DIGEST_SIZE);
201 return 0;
202}
203
204static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
205{
206 struct sha256_state *sctx = shash_desc_ctx(desc);
207
208 memcpy(out, sctx, sizeof(*sctx));
209 return 0;
210}
211
212static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
213{
214 struct sha256_state *sctx = shash_desc_ctx(desc);
215
216 memcpy(sctx, in, sizeof(*sctx));
217 return 0;
218}
219
220static struct shash_alg algs[2] = { {
221 .digestsize = SHA256_DIGEST_SIZE,
222 .init = ppc_spe_sha256_init,
223 .update = ppc_spe_sha256_update,
224 .final = ppc_spe_sha256_final,
225 .export = ppc_spe_sha256_export,
226 .import = ppc_spe_sha256_import,
227 .descsize = sizeof(struct sha256_state),
228 .statesize = sizeof(struct sha256_state),
229 .base = {
230 .cra_name = "sha256",
231 .cra_driver_name= "sha256-ppc-spe",
232 .cra_priority = 300,
233 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
234 .cra_blocksize = SHA256_BLOCK_SIZE,
235 .cra_module = THIS_MODULE,
236 }
237}, {
238 .digestsize = SHA224_DIGEST_SIZE,
239 .init = ppc_spe_sha224_init,
240 .update = ppc_spe_sha256_update,
241 .final = ppc_spe_sha224_final,
242 .export = ppc_spe_sha256_export,
243 .import = ppc_spe_sha256_import,
244 .descsize = sizeof(struct sha256_state),
245 .statesize = sizeof(struct sha256_state),
246 .base = {
247 .cra_name = "sha224",
248 .cra_driver_name= "sha224-ppc-spe",
249 .cra_priority = 300,
250 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
251 .cra_blocksize = SHA224_BLOCK_SIZE,
252 .cra_module = THIS_MODULE,
253 }
254} };
255
256static int __init ppc_spe_sha256_mod_init(void)
257{
258 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
259}
260
261static void __exit ppc_spe_sha256_mod_fini(void)
262{
263 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
264}
265
266module_init(ppc_spe_sha256_mod_init);
267module_exit(ppc_spe_sha256_mod_fini);
268
269MODULE_LICENSE("GPL");
270MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
271
272MODULE_ALIAS_CRYPTO("sha224");
273MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
274MODULE_ALIAS_CRYPTO("sha256");
275MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 54f60ab41c63..112cefacf2af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
797 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); 797 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
798 struct crypto_aead *cryptd_child; 798 struct crypto_aead *cryptd_child;
799 struct aesni_rfc4106_gcm_ctx *child_ctx; 799 struct aesni_rfc4106_gcm_ctx *child_ctx;
800 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); 800 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
801 CRYPTO_ALG_INTERNAL,
802 CRYPTO_ALG_INTERNAL);
801 if (IS_ERR(cryptd_tfm)) 803 if (IS_ERR(cryptd_tfm))
802 return PTR_ERR(cryptd_tfm); 804 return PTR_ERR(cryptd_tfm);
803 805
@@ -890,15 +892,12 @@ out_free_ablkcipher:
890 return ret; 892 return ret;
891} 893}
892 894
893static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, 895static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
894 unsigned int key_len) 896 unsigned int key_len)
895{ 897{
896 int ret = 0; 898 int ret = 0;
897 struct crypto_tfm *tfm = crypto_aead_tfm(parent); 899 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
898 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); 900 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
899 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
900 struct aesni_rfc4106_gcm_ctx *child_ctx =
901 aesni_rfc4106_gcm_ctx_get(cryptd_child);
902 u8 *new_key_align, *new_key_mem = NULL; 901 u8 *new_key_align, *new_key_mem = NULL;
903 902
904 if (key_len < 4) { 903 if (key_len < 4) {
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
943 goto exit; 942 goto exit;
944 } 943 }
945 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); 944 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
946 memcpy(child_ctx, ctx, sizeof(*ctx));
947exit: 945exit:
948 kfree(new_key_mem); 946 kfree(new_key_mem);
949 return ret; 947 return ret;
950} 948}
951 949
952/* This is the Integrity Check Value (aka the authentication tag length and can 950static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
953 * be 8, 12 or 16 bytes long. */ 951 unsigned int key_len)
954static int rfc4106_set_authsize(struct crypto_aead *parent,
955 unsigned int authsize)
956{ 952{
957 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); 953 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
958 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); 954 struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
955 struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
956 struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
957 int ret;
959 958
959 ret = crypto_aead_setkey(child, key, key_len);
960 if (!ret) {
961 memcpy(ctx, c_ctx, sizeof(*ctx));
962 ctx->cryptd_tfm = cryptd_tfm;
963 }
964 return ret;
965}
966
967static int common_rfc4106_set_authsize(struct crypto_aead *aead,
968 unsigned int authsize)
969{
960 switch (authsize) { 970 switch (authsize) {
961 case 8: 971 case 8:
962 case 12: 972 case 12:
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
965 default: 975 default:
966 return -EINVAL; 976 return -EINVAL;
967 } 977 }
968 crypto_aead_crt(parent)->authsize = authsize; 978 crypto_aead_crt(aead)->authsize = authsize;
969 crypto_aead_crt(cryptd_child)->authsize = authsize;
970 return 0; 979 return 0;
971} 980}
972 981
973static int rfc4106_encrypt(struct aead_request *req) 982/* This is the Integrity Check Value (aka the authentication tag length and can
974{ 983 * be 8, 12 or 16 bytes long. */
975 int ret; 984static int rfc4106_set_authsize(struct crypto_aead *parent,
976 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 985 unsigned int authsize)
977 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
978
979 if (!irq_fpu_usable()) {
980 struct aead_request *cryptd_req =
981 (struct aead_request *) aead_request_ctx(req);
982 memcpy(cryptd_req, req, sizeof(*req));
983 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
984 return crypto_aead_encrypt(cryptd_req);
985 } else {
986 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
987 kernel_fpu_begin();
988 ret = cryptd_child->base.crt_aead.encrypt(req);
989 kernel_fpu_end();
990 return ret;
991 }
992}
993
994static int rfc4106_decrypt(struct aead_request *req)
995{ 986{
987 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
988 struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
996 int ret; 989 int ret;
997 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
998 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
999 990
1000 if (!irq_fpu_usable()) { 991 ret = crypto_aead_setauthsize(child, authsize);
1001 struct aead_request *cryptd_req = 992 if (!ret)
1002 (struct aead_request *) aead_request_ctx(req); 993 crypto_aead_crt(parent)->authsize = authsize;
1003 memcpy(cryptd_req, req, sizeof(*req)); 994 return ret;
1004 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1005 return crypto_aead_decrypt(cryptd_req);
1006 } else {
1007 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1008 kernel_fpu_begin();
1009 ret = cryptd_child->base.crt_aead.decrypt(req);
1010 kernel_fpu_end();
1011 return ret;
1012 }
1013} 995}
1014 996
1015static int __driver_rfc4106_encrypt(struct aead_request *req) 997static int __driver_rfc4106_encrypt(struct aead_request *req)
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1185 } 1167 }
1186 return retval; 1168 return retval;
1187} 1169}
1170
1171static int rfc4106_encrypt(struct aead_request *req)
1172{
1173 int ret;
1174 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1175 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1176
1177 if (!irq_fpu_usable()) {
1178 struct aead_request *cryptd_req =
1179 (struct aead_request *) aead_request_ctx(req);
1180
1181 memcpy(cryptd_req, req, sizeof(*req));
1182 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1183 ret = crypto_aead_encrypt(cryptd_req);
1184 } else {
1185 kernel_fpu_begin();
1186 ret = __driver_rfc4106_encrypt(req);
1187 kernel_fpu_end();
1188 }
1189 return ret;
1190}
1191
1192static int rfc4106_decrypt(struct aead_request *req)
1193{
1194 int ret;
1195 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1196 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1197
1198 if (!irq_fpu_usable()) {
1199 struct aead_request *cryptd_req =
1200 (struct aead_request *) aead_request_ctx(req);
1201
1202 memcpy(cryptd_req, req, sizeof(*req));
1203 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1204 ret = crypto_aead_decrypt(cryptd_req);
1205 } else {
1206 kernel_fpu_begin();
1207 ret = __driver_rfc4106_decrypt(req);
1208 kernel_fpu_end();
1209 }
1210 return ret;
1211}
1212
1213static int helper_rfc4106_encrypt(struct aead_request *req)
1214{
1215 int ret;
1216
1217 if (unlikely(!irq_fpu_usable())) {
1218 WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
1219 ret = -EINVAL;
1220 } else {
1221 kernel_fpu_begin();
1222 ret = __driver_rfc4106_encrypt(req);
1223 kernel_fpu_end();
1224 }
1225 return ret;
1226}
1227
1228static int helper_rfc4106_decrypt(struct aead_request *req)
1229{
1230 int ret;
1231
1232 if (unlikely(!irq_fpu_usable())) {
1233 WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
1234 ret = -EINVAL;
1235 } else {
1236 kernel_fpu_begin();
1237 ret = __driver_rfc4106_decrypt(req);
1238 kernel_fpu_end();
1239 }
1240 return ret;
1241}
1188#endif 1242#endif
1189 1243
1190static struct crypto_alg aesni_algs[] = { { 1244static struct crypto_alg aesni_algs[] = { {
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { {
1210 .cra_name = "__aes-aesni", 1264 .cra_name = "__aes-aesni",
1211 .cra_driver_name = "__driver-aes-aesni", 1265 .cra_driver_name = "__driver-aes-aesni",
1212 .cra_priority = 0, 1266 .cra_priority = 0,
1213 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 1267 .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
1214 .cra_blocksize = AES_BLOCK_SIZE, 1268 .cra_blocksize = AES_BLOCK_SIZE,
1215 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1269 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1216 AESNI_ALIGN - 1, 1270 AESNI_ALIGN - 1,
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { {
1229 .cra_name = "__ecb-aes-aesni", 1283 .cra_name = "__ecb-aes-aesni",
1230 .cra_driver_name = "__driver-ecb-aes-aesni", 1284 .cra_driver_name = "__driver-ecb-aes-aesni",
1231 .cra_priority = 0, 1285 .cra_priority = 0,
1232 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1286 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1287 CRYPTO_ALG_INTERNAL,
1233 .cra_blocksize = AES_BLOCK_SIZE, 1288 .cra_blocksize = AES_BLOCK_SIZE,
1234 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1289 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1235 AESNI_ALIGN - 1, 1290 AESNI_ALIGN - 1,
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { {
1249 .cra_name = "__cbc-aes-aesni", 1304 .cra_name = "__cbc-aes-aesni",
1250 .cra_driver_name = "__driver-cbc-aes-aesni", 1305 .cra_driver_name = "__driver-cbc-aes-aesni",
1251 .cra_priority = 0, 1306 .cra_priority = 0,
1252 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1307 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1308 CRYPTO_ALG_INTERNAL,
1253 .cra_blocksize = AES_BLOCK_SIZE, 1309 .cra_blocksize = AES_BLOCK_SIZE,
1254 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1310 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1255 AESNI_ALIGN - 1, 1311 AESNI_ALIGN - 1,
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { {
1313 .cra_name = "__ctr-aes-aesni", 1369 .cra_name = "__ctr-aes-aesni",
1314 .cra_driver_name = "__driver-ctr-aes-aesni", 1370 .cra_driver_name = "__driver-ctr-aes-aesni",
1315 .cra_priority = 0, 1371 .cra_priority = 0,
1316 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1372 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1373 CRYPTO_ALG_INTERNAL,
1317 .cra_blocksize = 1, 1374 .cra_blocksize = 1,
1318 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1375 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1319 AESNI_ALIGN - 1, 1376 AESNI_ALIGN - 1,
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { {
1357 .cra_name = "__gcm-aes-aesni", 1414 .cra_name = "__gcm-aes-aesni",
1358 .cra_driver_name = "__driver-gcm-aes-aesni", 1415 .cra_driver_name = "__driver-gcm-aes-aesni",
1359 .cra_priority = 0, 1416 .cra_priority = 0,
1360 .cra_flags = CRYPTO_ALG_TYPE_AEAD, 1417 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
1361 .cra_blocksize = 1, 1418 .cra_blocksize = 1,
1362 .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + 1419 .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
1363 AESNI_ALIGN, 1420 AESNI_ALIGN,
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { {
1366 .cra_module = THIS_MODULE, 1423 .cra_module = THIS_MODULE,
1367 .cra_u = { 1424 .cra_u = {
1368 .aead = { 1425 .aead = {
1369 .encrypt = __driver_rfc4106_encrypt, 1426 .setkey = common_rfc4106_set_key,
1370 .decrypt = __driver_rfc4106_decrypt, 1427 .setauthsize = common_rfc4106_set_authsize,
1428 .encrypt = helper_rfc4106_encrypt,
1429 .decrypt = helper_rfc4106_decrypt,
1430 .ivsize = 8,
1431 .maxauthsize = 16,
1371 }, 1432 },
1372 }, 1433 },
1373}, { 1434}, {
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { {
1423 .cra_name = "__lrw-aes-aesni", 1484 .cra_name = "__lrw-aes-aesni",
1424 .cra_driver_name = "__driver-lrw-aes-aesni", 1485 .cra_driver_name = "__driver-lrw-aes-aesni",
1425 .cra_priority = 0, 1486 .cra_priority = 0,
1426 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1487 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1488 CRYPTO_ALG_INTERNAL,
1427 .cra_blocksize = AES_BLOCK_SIZE, 1489 .cra_blocksize = AES_BLOCK_SIZE,
1428 .cra_ctxsize = sizeof(struct aesni_lrw_ctx), 1490 .cra_ctxsize = sizeof(struct aesni_lrw_ctx),
1429 .cra_alignmask = 0, 1491 .cra_alignmask = 0,
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { {
1444 .cra_name = "__xts-aes-aesni", 1506 .cra_name = "__xts-aes-aesni",
1445 .cra_driver_name = "__driver-xts-aes-aesni", 1507 .cra_driver_name = "__driver-xts-aes-aesni",
1446 .cra_priority = 0, 1508 .cra_priority = 0,
1447 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1509 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1510 CRYPTO_ALG_INTERNAL,
1448 .cra_blocksize = AES_BLOCK_SIZE, 1511 .cra_blocksize = AES_BLOCK_SIZE,
1449 .cra_ctxsize = sizeof(struct aesni_xts_ctx), 1512 .cra_ctxsize = sizeof(struct aesni_xts_ctx),
1450 .cra_alignmask = 0, 1513 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 9a07fafe3831..baf0ac21ace5 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { {
343 .cra_name = "__ecb-camellia-aesni-avx2", 343 .cra_name = "__ecb-camellia-aesni-avx2",
344 .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", 344 .cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
345 .cra_priority = 0, 345 .cra_priority = 0,
346 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 346 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
347 CRYPTO_ALG_INTERNAL,
347 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 348 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
348 .cra_ctxsize = sizeof(struct camellia_ctx), 349 .cra_ctxsize = sizeof(struct camellia_ctx),
349 .cra_alignmask = 0, 350 .cra_alignmask = 0,
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { {
362 .cra_name = "__cbc-camellia-aesni-avx2", 363 .cra_name = "__cbc-camellia-aesni-avx2",
363 .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", 364 .cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
364 .cra_priority = 0, 365 .cra_priority = 0,
365 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 366 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
367 CRYPTO_ALG_INTERNAL,
366 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 368 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
367 .cra_ctxsize = sizeof(struct camellia_ctx), 369 .cra_ctxsize = sizeof(struct camellia_ctx),
368 .cra_alignmask = 0, 370 .cra_alignmask = 0,
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { {
381 .cra_name = "__ctr-camellia-aesni-avx2", 383 .cra_name = "__ctr-camellia-aesni-avx2",
382 .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", 384 .cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
383 .cra_priority = 0, 385 .cra_priority = 0,
384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 386 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
387 CRYPTO_ALG_INTERNAL,
385 .cra_blocksize = 1, 388 .cra_blocksize = 1,
386 .cra_ctxsize = sizeof(struct camellia_ctx), 389 .cra_ctxsize = sizeof(struct camellia_ctx),
387 .cra_alignmask = 0, 390 .cra_alignmask = 0,
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { {
401 .cra_name = "__lrw-camellia-aesni-avx2", 404 .cra_name = "__lrw-camellia-aesni-avx2",
402 .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", 405 .cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
403 .cra_priority = 0, 406 .cra_priority = 0,
404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
408 CRYPTO_ALG_INTERNAL,
405 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 409 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct camellia_lrw_ctx), 410 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
407 .cra_alignmask = 0, 411 .cra_alignmask = 0,
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { {
424 .cra_name = "__xts-camellia-aesni-avx2", 428 .cra_name = "__xts-camellia-aesni-avx2",
425 .cra_driver_name = "__driver-xts-camellia-aesni-avx2", 429 .cra_driver_name = "__driver-xts-camellia-aesni-avx2",
426 .cra_priority = 0, 430 .cra_priority = 0,
427 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 431 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
432 CRYPTO_ALG_INTERNAL,
428 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 433 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
429 .cra_ctxsize = sizeof(struct camellia_xts_ctx), 434 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
430 .cra_alignmask = 0, 435 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index ed38d959add6..78818a1e73e3 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { {
335 .cra_name = "__ecb-camellia-aesni", 335 .cra_name = "__ecb-camellia-aesni",
336 .cra_driver_name = "__driver-ecb-camellia-aesni", 336 .cra_driver_name = "__driver-ecb-camellia-aesni",
337 .cra_priority = 0, 337 .cra_priority = 0,
338 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 338 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
339 CRYPTO_ALG_INTERNAL,
339 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 340 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
340 .cra_ctxsize = sizeof(struct camellia_ctx), 341 .cra_ctxsize = sizeof(struct camellia_ctx),
341 .cra_alignmask = 0, 342 .cra_alignmask = 0,
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { {
354 .cra_name = "__cbc-camellia-aesni", 355 .cra_name = "__cbc-camellia-aesni",
355 .cra_driver_name = "__driver-cbc-camellia-aesni", 356 .cra_driver_name = "__driver-cbc-camellia-aesni",
356 .cra_priority = 0, 357 .cra_priority = 0,
357 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 358 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
359 CRYPTO_ALG_INTERNAL,
358 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 360 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
359 .cra_ctxsize = sizeof(struct camellia_ctx), 361 .cra_ctxsize = sizeof(struct camellia_ctx),
360 .cra_alignmask = 0, 362 .cra_alignmask = 0,
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { {
373 .cra_name = "__ctr-camellia-aesni", 375 .cra_name = "__ctr-camellia-aesni",
374 .cra_driver_name = "__driver-ctr-camellia-aesni", 376 .cra_driver_name = "__driver-ctr-camellia-aesni",
375 .cra_priority = 0, 377 .cra_priority = 0,
376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 378 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
379 CRYPTO_ALG_INTERNAL,
377 .cra_blocksize = 1, 380 .cra_blocksize = 1,
378 .cra_ctxsize = sizeof(struct camellia_ctx), 381 .cra_ctxsize = sizeof(struct camellia_ctx),
379 .cra_alignmask = 0, 382 .cra_alignmask = 0,
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { {
393 .cra_name = "__lrw-camellia-aesni", 396 .cra_name = "__lrw-camellia-aesni",
394 .cra_driver_name = "__driver-lrw-camellia-aesni", 397 .cra_driver_name = "__driver-lrw-camellia-aesni",
395 .cra_priority = 0, 398 .cra_priority = 0,
396 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 399 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
400 CRYPTO_ALG_INTERNAL,
397 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 401 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
398 .cra_ctxsize = sizeof(struct camellia_lrw_ctx), 402 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
399 .cra_alignmask = 0, 403 .cra_alignmask = 0,
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { {
416 .cra_name = "__xts-camellia-aesni", 420 .cra_name = "__xts-camellia-aesni",
417 .cra_driver_name = "__driver-xts-camellia-aesni", 421 .cra_driver_name = "__driver-xts-camellia-aesni",
418 .cra_priority = 0, 422 .cra_priority = 0,
419 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 423 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
424 CRYPTO_ALG_INTERNAL,
420 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 425 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
421 .cra_ctxsize = sizeof(struct camellia_xts_ctx), 426 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
422 .cra_alignmask = 0, 427 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 60ada677a928..236c80974457 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { {
341 .cra_name = "__ecb-cast5-avx", 341 .cra_name = "__ecb-cast5-avx",
342 .cra_driver_name = "__driver-ecb-cast5-avx", 342 .cra_driver_name = "__driver-ecb-cast5-avx",
343 .cra_priority = 0, 343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
345 CRYPTO_ALG_INTERNAL,
345 .cra_blocksize = CAST5_BLOCK_SIZE, 346 .cra_blocksize = CAST5_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct cast5_ctx), 347 .cra_ctxsize = sizeof(struct cast5_ctx),
347 .cra_alignmask = 0, 348 .cra_alignmask = 0,
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { {
360 .cra_name = "__cbc-cast5-avx", 361 .cra_name = "__cbc-cast5-avx",
361 .cra_driver_name = "__driver-cbc-cast5-avx", 362 .cra_driver_name = "__driver-cbc-cast5-avx",
362 .cra_priority = 0, 363 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 364 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
365 CRYPTO_ALG_INTERNAL,
364 .cra_blocksize = CAST5_BLOCK_SIZE, 366 .cra_blocksize = CAST5_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct cast5_ctx), 367 .cra_ctxsize = sizeof(struct cast5_ctx),
366 .cra_alignmask = 0, 368 .cra_alignmask = 0,
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { {
379 .cra_name = "__ctr-cast5-avx", 381 .cra_name = "__ctr-cast5-avx",
380 .cra_driver_name = "__driver-ctr-cast5-avx", 382 .cra_driver_name = "__driver-ctr-cast5-avx",
381 .cra_priority = 0, 383 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
385 CRYPTO_ALG_INTERNAL,
383 .cra_blocksize = 1, 386 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct cast5_ctx), 387 .cra_ctxsize = sizeof(struct cast5_ctx),
385 .cra_alignmask = 0, 388 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 0160f68a57ff..f448810ca4ac 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { {
372 .cra_name = "__ecb-cast6-avx", 372 .cra_name = "__ecb-cast6-avx",
373 .cra_driver_name = "__driver-ecb-cast6-avx", 373 .cra_driver_name = "__driver-ecb-cast6-avx",
374 .cra_priority = 0, 374 .cra_priority = 0,
375 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 375 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
376 CRYPTO_ALG_INTERNAL,
376 .cra_blocksize = CAST6_BLOCK_SIZE, 377 .cra_blocksize = CAST6_BLOCK_SIZE,
377 .cra_ctxsize = sizeof(struct cast6_ctx), 378 .cra_ctxsize = sizeof(struct cast6_ctx),
378 .cra_alignmask = 0, 379 .cra_alignmask = 0,
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { {
391 .cra_name = "__cbc-cast6-avx", 392 .cra_name = "__cbc-cast6-avx",
392 .cra_driver_name = "__driver-cbc-cast6-avx", 393 .cra_driver_name = "__driver-cbc-cast6-avx",
393 .cra_priority = 0, 394 .cra_priority = 0,
394 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 395 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
396 CRYPTO_ALG_INTERNAL,
395 .cra_blocksize = CAST6_BLOCK_SIZE, 397 .cra_blocksize = CAST6_BLOCK_SIZE,
396 .cra_ctxsize = sizeof(struct cast6_ctx), 398 .cra_ctxsize = sizeof(struct cast6_ctx),
397 .cra_alignmask = 0, 399 .cra_alignmask = 0,
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { {
410 .cra_name = "__ctr-cast6-avx", 412 .cra_name = "__ctr-cast6-avx",
411 .cra_driver_name = "__driver-ctr-cast6-avx", 413 .cra_driver_name = "__driver-ctr-cast6-avx",
412 .cra_priority = 0, 414 .cra_priority = 0,
413 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 415 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
416 CRYPTO_ALG_INTERNAL,
414 .cra_blocksize = 1, 417 .cra_blocksize = 1,
415 .cra_ctxsize = sizeof(struct cast6_ctx), 418 .cra_ctxsize = sizeof(struct cast6_ctx),
416 .cra_alignmask = 0, 419 .cra_alignmask = 0,
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { {
430 .cra_name = "__lrw-cast6-avx", 433 .cra_name = "__lrw-cast6-avx",
431 .cra_driver_name = "__driver-lrw-cast6-avx", 434 .cra_driver_name = "__driver-lrw-cast6-avx",
432 .cra_priority = 0, 435 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 436 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
437 CRYPTO_ALG_INTERNAL,
434 .cra_blocksize = CAST6_BLOCK_SIZE, 438 .cra_blocksize = CAST6_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct cast6_lrw_ctx), 439 .cra_ctxsize = sizeof(struct cast6_lrw_ctx),
436 .cra_alignmask = 0, 440 .cra_alignmask = 0,
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { {
453 .cra_name = "__xts-cast6-avx", 457 .cra_name = "__xts-cast6-avx",
454 .cra_driver_name = "__driver-xts-cast6-avx", 458 .cra_driver_name = "__driver-xts-cast6-avx",
455 .cra_priority = 0, 459 .cra_priority = 0,
456 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 460 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
461 CRYPTO_ALG_INTERNAL,
457 .cra_blocksize = CAST6_BLOCK_SIZE, 462 .cra_blocksize = CAST6_BLOCK_SIZE,
458 .cra_ctxsize = sizeof(struct cast6_xts_ctx), 463 .cra_ctxsize = sizeof(struct cast6_xts_ctx),
459 .cra_alignmask = 0, 464 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 8253d85aa165..2079baf06bdd 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
154 .cra_name = "__ghash", 154 .cra_name = "__ghash",
155 .cra_driver_name = "__ghash-pclmulqdqni", 155 .cra_driver_name = "__ghash-pclmulqdqni",
156 .cra_priority = 0, 156 .cra_priority = 0,
157 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 157 .cra_flags = CRYPTO_ALG_TYPE_SHASH |
158 CRYPTO_ALG_INTERNAL,
158 .cra_blocksize = GHASH_BLOCK_SIZE, 159 .cra_blocksize = GHASH_BLOCK_SIZE,
159 .cra_ctxsize = sizeof(struct ghash_ctx), 160 .cra_ctxsize = sizeof(struct ghash_ctx),
160 .cra_module = THIS_MODULE, 161 .cra_module = THIS_MODULE,
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
261 struct cryptd_ahash *cryptd_tfm; 262 struct cryptd_ahash *cryptd_tfm;
262 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 263 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
263 264
264 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); 265 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
266 CRYPTO_ALG_INTERNAL,
267 CRYPTO_ALG_INTERNAL);
265 if (IS_ERR(cryptd_tfm)) 268 if (IS_ERR(cryptd_tfm))
266 return PTR_ERR(cryptd_tfm); 269 return PTR_ERR(cryptd_tfm);
267 ctx->cryptd_tfm = cryptd_tfm; 270 ctx->cryptd_tfm = cryptd_tfm;
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 432f1d76ceb8..6a85598931b5 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
232 232
233 le128_to_be128((be128 *)walk->iv, &ctrblk); 233 le128_to_be128((be128 *)walk->iv, &ctrblk);
234} 234}
235EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
236 235
237static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, 236static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
238 struct blkcipher_desc *desc, 237 struct blkcipher_desc *desc,
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 437e47a4d302..2f63dc89e7a9 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { {
309 .cra_name = "__ecb-serpent-avx2", 309 .cra_name = "__ecb-serpent-avx2",
310 .cra_driver_name = "__driver-ecb-serpent-avx2", 310 .cra_driver_name = "__driver-ecb-serpent-avx2",
311 .cra_priority = 0, 311 .cra_priority = 0,
312 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 312 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
313 CRYPTO_ALG_INTERNAL,
313 .cra_blocksize = SERPENT_BLOCK_SIZE, 314 .cra_blocksize = SERPENT_BLOCK_SIZE,
314 .cra_ctxsize = sizeof(struct serpent_ctx), 315 .cra_ctxsize = sizeof(struct serpent_ctx),
315 .cra_alignmask = 0, 316 .cra_alignmask = 0,
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { {
329 .cra_name = "__cbc-serpent-avx2", 330 .cra_name = "__cbc-serpent-avx2",
330 .cra_driver_name = "__driver-cbc-serpent-avx2", 331 .cra_driver_name = "__driver-cbc-serpent-avx2",
331 .cra_priority = 0, 332 .cra_priority = 0,
332 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 333 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
334 CRYPTO_ALG_INTERNAL,
333 .cra_blocksize = SERPENT_BLOCK_SIZE, 335 .cra_blocksize = SERPENT_BLOCK_SIZE,
334 .cra_ctxsize = sizeof(struct serpent_ctx), 336 .cra_ctxsize = sizeof(struct serpent_ctx),
335 .cra_alignmask = 0, 337 .cra_alignmask = 0,
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { {
349 .cra_name = "__ctr-serpent-avx2", 351 .cra_name = "__ctr-serpent-avx2",
350 .cra_driver_name = "__driver-ctr-serpent-avx2", 352 .cra_driver_name = "__driver-ctr-serpent-avx2",
351 .cra_priority = 0, 353 .cra_priority = 0,
352 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 354 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
355 CRYPTO_ALG_INTERNAL,
353 .cra_blocksize = 1, 356 .cra_blocksize = 1,
354 .cra_ctxsize = sizeof(struct serpent_ctx), 357 .cra_ctxsize = sizeof(struct serpent_ctx),
355 .cra_alignmask = 0, 358 .cra_alignmask = 0,
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { {
370 .cra_name = "__lrw-serpent-avx2", 373 .cra_name = "__lrw-serpent-avx2",
371 .cra_driver_name = "__driver-lrw-serpent-avx2", 374 .cra_driver_name = "__driver-lrw-serpent-avx2",
372 .cra_priority = 0, 375 .cra_priority = 0,
373 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
377 CRYPTO_ALG_INTERNAL,
374 .cra_blocksize = SERPENT_BLOCK_SIZE, 378 .cra_blocksize = SERPENT_BLOCK_SIZE,
375 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 379 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
376 .cra_alignmask = 0, 380 .cra_alignmask = 0,
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { {
394 .cra_name = "__xts-serpent-avx2", 398 .cra_name = "__xts-serpent-avx2",
395 .cra_driver_name = "__driver-xts-serpent-avx2", 399 .cra_driver_name = "__driver-xts-serpent-avx2",
396 .cra_priority = 0, 400 .cra_priority = 0,
397 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
402 CRYPTO_ALG_INTERNAL,
398 .cra_blocksize = SERPENT_BLOCK_SIZE, 403 .cra_blocksize = SERPENT_BLOCK_SIZE,
399 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 404 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
400 .cra_alignmask = 0, 405 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7e217398b4eb..c8d478af8456 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { {
378 .cra_name = "__ecb-serpent-avx", 378 .cra_name = "__ecb-serpent-avx",
379 .cra_driver_name = "__driver-ecb-serpent-avx", 379 .cra_driver_name = "__driver-ecb-serpent-avx",
380 .cra_priority = 0, 380 .cra_priority = 0,
381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
382 CRYPTO_ALG_INTERNAL,
382 .cra_blocksize = SERPENT_BLOCK_SIZE, 383 .cra_blocksize = SERPENT_BLOCK_SIZE,
383 .cra_ctxsize = sizeof(struct serpent_ctx), 384 .cra_ctxsize = sizeof(struct serpent_ctx),
384 .cra_alignmask = 0, 385 .cra_alignmask = 0,
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { {
397 .cra_name = "__cbc-serpent-avx", 398 .cra_name = "__cbc-serpent-avx",
398 .cra_driver_name = "__driver-cbc-serpent-avx", 399 .cra_driver_name = "__driver-cbc-serpent-avx",
399 .cra_priority = 0, 400 .cra_priority = 0,
400 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
402 CRYPTO_ALG_INTERNAL,
401 .cra_blocksize = SERPENT_BLOCK_SIZE, 403 .cra_blocksize = SERPENT_BLOCK_SIZE,
402 .cra_ctxsize = sizeof(struct serpent_ctx), 404 .cra_ctxsize = sizeof(struct serpent_ctx),
403 .cra_alignmask = 0, 405 .cra_alignmask = 0,
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { {
416 .cra_name = "__ctr-serpent-avx", 418 .cra_name = "__ctr-serpent-avx",
417 .cra_driver_name = "__driver-ctr-serpent-avx", 419 .cra_driver_name = "__driver-ctr-serpent-avx",
418 .cra_priority = 0, 420 .cra_priority = 0,
419 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 421 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
422 CRYPTO_ALG_INTERNAL,
420 .cra_blocksize = 1, 423 .cra_blocksize = 1,
421 .cra_ctxsize = sizeof(struct serpent_ctx), 424 .cra_ctxsize = sizeof(struct serpent_ctx),
422 .cra_alignmask = 0, 425 .cra_alignmask = 0,
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { {
436 .cra_name = "__lrw-serpent-avx", 439 .cra_name = "__lrw-serpent-avx",
437 .cra_driver_name = "__driver-lrw-serpent-avx", 440 .cra_driver_name = "__driver-lrw-serpent-avx",
438 .cra_priority = 0, 441 .cra_priority = 0,
439 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 442 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
443 CRYPTO_ALG_INTERNAL,
440 .cra_blocksize = SERPENT_BLOCK_SIZE, 444 .cra_blocksize = SERPENT_BLOCK_SIZE,
441 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 445 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
442 .cra_alignmask = 0, 446 .cra_alignmask = 0,
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { {
459 .cra_name = "__xts-serpent-avx", 463 .cra_name = "__xts-serpent-avx",
460 .cra_driver_name = "__driver-xts-serpent-avx", 464 .cra_driver_name = "__driver-xts-serpent-avx",
461 .cra_priority = 0, 465 .cra_priority = 0,
462 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 466 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
467 CRYPTO_ALG_INTERNAL,
463 .cra_blocksize = SERPENT_BLOCK_SIZE, 468 .cra_blocksize = SERPENT_BLOCK_SIZE,
464 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 469 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
465 .cra_alignmask = 0, 470 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index bf025adaea01..3643dd508f45 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { {
387 .cra_name = "__ecb-serpent-sse2", 387 .cra_name = "__ecb-serpent-sse2",
388 .cra_driver_name = "__driver-ecb-serpent-sse2", 388 .cra_driver_name = "__driver-ecb-serpent-sse2",
389 .cra_priority = 0, 389 .cra_priority = 0,
390 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 390 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
391 CRYPTO_ALG_INTERNAL,
391 .cra_blocksize = SERPENT_BLOCK_SIZE, 392 .cra_blocksize = SERPENT_BLOCK_SIZE,
392 .cra_ctxsize = sizeof(struct serpent_ctx), 393 .cra_ctxsize = sizeof(struct serpent_ctx),
393 .cra_alignmask = 0, 394 .cra_alignmask = 0,
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { {
406 .cra_name = "__cbc-serpent-sse2", 407 .cra_name = "__cbc-serpent-sse2",
407 .cra_driver_name = "__driver-cbc-serpent-sse2", 408 .cra_driver_name = "__driver-cbc-serpent-sse2",
408 .cra_priority = 0, 409 .cra_priority = 0,
409 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 410 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
411 CRYPTO_ALG_INTERNAL,
410 .cra_blocksize = SERPENT_BLOCK_SIZE, 412 .cra_blocksize = SERPENT_BLOCK_SIZE,
411 .cra_ctxsize = sizeof(struct serpent_ctx), 413 .cra_ctxsize = sizeof(struct serpent_ctx),
412 .cra_alignmask = 0, 414 .cra_alignmask = 0,
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { {
425 .cra_name = "__ctr-serpent-sse2", 427 .cra_name = "__ctr-serpent-sse2",
426 .cra_driver_name = "__driver-ctr-serpent-sse2", 428 .cra_driver_name = "__driver-ctr-serpent-sse2",
427 .cra_priority = 0, 429 .cra_priority = 0,
428 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 430 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
431 CRYPTO_ALG_INTERNAL,
429 .cra_blocksize = 1, 432 .cra_blocksize = 1,
430 .cra_ctxsize = sizeof(struct serpent_ctx), 433 .cra_ctxsize = sizeof(struct serpent_ctx),
431 .cra_alignmask = 0, 434 .cra_alignmask = 0,
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { {
445 .cra_name = "__lrw-serpent-sse2", 448 .cra_name = "__lrw-serpent-sse2",
446 .cra_driver_name = "__driver-lrw-serpent-sse2", 449 .cra_driver_name = "__driver-lrw-serpent-sse2",
447 .cra_priority = 0, 450 .cra_priority = 0,
448 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 451 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
452 CRYPTO_ALG_INTERNAL,
449 .cra_blocksize = SERPENT_BLOCK_SIZE, 453 .cra_blocksize = SERPENT_BLOCK_SIZE,
450 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 454 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
451 .cra_alignmask = 0, 455 .cra_alignmask = 0,
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { {
468 .cra_name = "__xts-serpent-sse2", 472 .cra_name = "__xts-serpent-sse2",
469 .cra_driver_name = "__driver-xts-serpent-sse2", 473 .cra_driver_name = "__driver-xts-serpent-sse2",
470 .cra_priority = 0, 474 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 475 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
476 CRYPTO_ALG_INTERNAL,
472 .cra_blocksize = SERPENT_BLOCK_SIZE, 477 .cra_blocksize = SERPENT_BLOCK_SIZE,
473 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 478 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
474 .cra_alignmask = 0, 479 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index fd9f6b035b16..e510b1c5d690 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = {
694 * use ASYNC flag as some buffers in multi-buffer 694 * use ASYNC flag as some buffers in multi-buffer
695 * algo may not have completed before hashing thread sleep 695 * algo may not have completed before hashing thread sleep
696 */ 696 */
697 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, 697 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
698 CRYPTO_ALG_INTERNAL,
698 .cra_blocksize = SHA1_BLOCK_SIZE, 699 .cra_blocksize = SHA1_BLOCK_SIZE,
699 .cra_module = THIS_MODULE, 700 .cra_module = THIS_MODULE,
700 .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), 701 .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
770 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); 771 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
771 struct mcryptd_hash_ctx *mctx; 772 struct mcryptd_hash_ctx *mctx;
772 773
773 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); 774 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
775 CRYPTO_ALG_INTERNAL,
776 CRYPTO_ALG_INTERNAL);
774 if (IS_ERR(mcryptd_tfm)) 777 if (IS_ERR(mcryptd_tfm))
775 return PTR_ERR(mcryptd_tfm); 778 return PTR_ERR(mcryptd_tfm);
776 mctx = crypto_ahash_ctx(&mcryptd_tfm->base); 779 mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
828 while (!list_empty(&cstate->work_list)) { 831 while (!list_empty(&cstate->work_list)) {
829 rctx = list_entry(cstate->work_list.next, 832 rctx = list_entry(cstate->work_list.next,
830 struct mcryptd_hash_request_ctx, waiter); 833 struct mcryptd_hash_request_ctx, waiter);
831 if time_before(cur_time, rctx->tag.expire) 834 if (time_before(cur_time, rctx->tag.expire))
832 break; 835 break;
833 kernel_fpu_begin(); 836 kernel_fpu_begin();
834 sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); 837 sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
index 4ca7e166a2aa..822acb5b464c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
@@ -56,7 +56,7 @@
56void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) 56void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
57{ 57{
58 unsigned int j; 58 unsigned int j;
59 state->unused_lanes = 0xF76543210; 59 state->unused_lanes = 0xF76543210ULL;
60 for (j = 0; j < 8; j++) { 60 for (j = 0; j < 8; j++) {
61 state->lens[j] = 0xFFFFFFFF; 61 state->lens[j] = 0xFFFFFFFF;
62 state->ldata[j].job_in_lane = NULL; 62 state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 6c20fe04a738..33d1b9dc14cc 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -28,7 +28,7 @@
28#include <linux/cryptohash.h> 28#include <linux/cryptohash.h>
29#include <linux/types.h> 29#include <linux/types.h>
30#include <crypto/sha.h> 30#include <crypto/sha.h>
31#include <asm/byteorder.h> 31#include <crypto/sha1_base.h>
32#include <asm/i387.h> 32#include <asm/i387.h>
33#include <asm/xcr.h> 33#include <asm/xcr.h>
34#include <asm/xsave.h> 34#include <asm/xsave.h>
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
44#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ 44#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
45 45
46asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, 46asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
47 unsigned int rounds); 47 unsigned int rounds);
48#endif 48#endif
49 49
50static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); 50static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
51
52
53static int sha1_ssse3_init(struct shash_desc *desc)
54{
55 struct sha1_state *sctx = shash_desc_ctx(desc);
56
57 *sctx = (struct sha1_state){
58 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
59 };
60
61 return 0;
62}
63
64static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
65 unsigned int len, unsigned int partial)
66{
67 struct sha1_state *sctx = shash_desc_ctx(desc);
68 unsigned int done = 0;
69
70 sctx->count += len;
71
72 if (partial) {
73 done = SHA1_BLOCK_SIZE - partial;
74 memcpy(sctx->buffer + partial, data, done);
75 sha1_transform_asm(sctx->state, sctx->buffer, 1);
76 }
77
78 if (len - done >= SHA1_BLOCK_SIZE) {
79 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
80
81 sha1_transform_asm(sctx->state, data + done, rounds);
82 done += rounds * SHA1_BLOCK_SIZE;
83 }
84
85 memcpy(sctx->buffer, data + done, len - done);
86
87 return 0;
88}
89 51
90static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, 52static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
91 unsigned int len) 53 unsigned int len)
92{ 54{
93 struct sha1_state *sctx = shash_desc_ctx(desc); 55 struct sha1_state *sctx = shash_desc_ctx(desc);
94 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
95 int res;
96 56
97 /* Handle the fast case right here */ 57 if (!irq_fpu_usable() ||
98 if (partial + len < SHA1_BLOCK_SIZE) { 58 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
99 sctx->count += len; 59 return crypto_sha1_update(desc, data, len);
100 memcpy(sctx->buffer + partial, data, len);
101 60
102 return 0; 61 /* make sure casting to sha1_block_fn() is safe */
103 } 62 BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
104 63
105 if (!irq_fpu_usable()) { 64 kernel_fpu_begin();
106 res = crypto_sha1_update(desc, data, len); 65 sha1_base_do_update(desc, data, len,
107 } else { 66 (sha1_block_fn *)sha1_transform_asm);
108 kernel_fpu_begin(); 67 kernel_fpu_end();
109 res = __sha1_ssse3_update(desc, data, len, partial);
110 kernel_fpu_end();
111 }
112
113 return res;
114}
115
116
117/* Add padding and return the message digest. */
118static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
119{
120 struct sha1_state *sctx = shash_desc_ctx(desc);
121 unsigned int i, index, padlen;
122 __be32 *dst = (__be32 *)out;
123 __be64 bits;
124 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
125
126 bits = cpu_to_be64(sctx->count << 3);
127
128 /* Pad out to 56 mod 64 and append length */
129 index = sctx->count % SHA1_BLOCK_SIZE;
130 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
131 if (!irq_fpu_usable()) {
132 crypto_sha1_update(desc, padding, padlen);
133 crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
134 } else {
135 kernel_fpu_begin();
136 /* We need to fill a whole block for __sha1_ssse3_update() */
137 if (padlen <= 56) {
138 sctx->count += padlen;
139 memcpy(sctx->buffer + index, padding, padlen);
140 } else {
141 __sha1_ssse3_update(desc, padding, padlen, index);
142 }
143 __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
144 kernel_fpu_end();
145 }
146
147 /* Store state in digest */
148 for (i = 0; i < 5; i++)
149 dst[i] = cpu_to_be32(sctx->state[i]);
150
151 /* Wipe context */
152 memset(sctx, 0, sizeof(*sctx));
153 68
154 return 0; 69 return 0;
155} 70}
156 71
157static int sha1_ssse3_export(struct shash_desc *desc, void *out) 72static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
73 unsigned int len, u8 *out)
158{ 74{
159 struct sha1_state *sctx = shash_desc_ctx(desc); 75 if (!irq_fpu_usable())
76 return crypto_sha1_finup(desc, data, len, out);
160 77
161 memcpy(out, sctx, sizeof(*sctx)); 78 kernel_fpu_begin();
79 if (len)
80 sha1_base_do_update(desc, data, len,
81 (sha1_block_fn *)sha1_transform_asm);
82 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
83 kernel_fpu_end();
162 84
163 return 0; 85 return sha1_base_finish(desc, out);
164} 86}
165 87
166static int sha1_ssse3_import(struct shash_desc *desc, const void *in) 88/* Add padding and return the message digest. */
89static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
167{ 90{
168 struct sha1_state *sctx = shash_desc_ctx(desc); 91 return sha1_ssse3_finup(desc, NULL, 0, out);
169
170 memcpy(sctx, in, sizeof(*sctx));
171
172 return 0;
173} 92}
174 93
175#ifdef CONFIG_AS_AVX2 94#ifdef CONFIG_AS_AVX2
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data,
186 105
187static struct shash_alg alg = { 106static struct shash_alg alg = {
188 .digestsize = SHA1_DIGEST_SIZE, 107 .digestsize = SHA1_DIGEST_SIZE,
189 .init = sha1_ssse3_init, 108 .init = sha1_base_init,
190 .update = sha1_ssse3_update, 109 .update = sha1_ssse3_update,
191 .final = sha1_ssse3_final, 110 .final = sha1_ssse3_final,
192 .export = sha1_ssse3_export, 111 .finup = sha1_ssse3_finup,
193 .import = sha1_ssse3_import,
194 .descsize = sizeof(struct sha1_state), 112 .descsize = sizeof(struct sha1_state),
195 .statesize = sizeof(struct sha1_state),
196 .base = { 113 .base = {
197 .cra_name = "sha1", 114 .cra_name = "sha1",
198 .cra_driver_name= "sha1-ssse3", 115 .cra_driver_name= "sha1-ssse3",
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 642f15687a0a..92b3b5d75ba9 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
96BYTE_FLIP_MASK = %xmm13 96BYTE_FLIP_MASK = %xmm13
97 97
98NUM_BLKS = %rdx # 3rd arg 98NUM_BLKS = %rdx # 3rd arg
99CTX = %rsi # 2nd arg 99INP = %rsi # 2nd arg
100INP = %rdi # 1st arg 100CTX = %rdi # 1st arg
101 101
102SRND = %rdi # clobbers INP 102SRND = %rsi # clobbers INP
103c = %ecx 103c = %ecx
104d = %r8d 104d = %r8d
105e = %edx 105e = %edx
@@ -342,8 +342,8 @@ a = TMP_
342 342
343######################################################################## 343########################################################################
344## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) 344## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
345## arg 1 : pointer to input data 345## arg 1 : pointer to digest
346## arg 2 : pointer to digest 346## arg 2 : pointer to input data
347## arg 3 : Num blocks 347## arg 3 : Num blocks
348######################################################################## 348########################################################################
349.text 349.text
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9e86944c539d..570ec5ec62d7 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13
91X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK 91X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
92 92
93NUM_BLKS = %rdx # 3rd arg 93NUM_BLKS = %rdx # 3rd arg
94CTX = %rsi # 2nd arg 94INP = %rsi # 2nd arg
95INP = %rdi # 1st arg 95CTX = %rdi # 1st arg
96c = %ecx 96c = %ecx
97d = %r8d 97d = %r8d
98e = %edx # clobbers NUM_BLKS 98e = %edx # clobbers NUM_BLKS
99y3 = %edi # clobbers INP 99y3 = %esi # clobbers INP
100 100
101 101
102TBL = %rbp 102TBL = %rbp
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE
523 523
524######################################################################## 524########################################################################
525## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) 525## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
526## arg 1 : pointer to input data 526## arg 1 : pointer to digest
527## arg 2 : pointer to digest 527## arg 2 : pointer to input data
528## arg 3 : Num blocks 528## arg 3 : Num blocks
529######################################################################## 529########################################################################
530.text 530.text
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index f833b74d902b..2cedc44e8121 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
88BYTE_FLIP_MASK = %xmm12 88BYTE_FLIP_MASK = %xmm12
89 89
90NUM_BLKS = %rdx # 3rd arg 90NUM_BLKS = %rdx # 3rd arg
91CTX = %rsi # 2nd arg 91INP = %rsi # 2nd arg
92INP = %rdi # 1st arg 92CTX = %rdi # 1st arg
93 93
94SRND = %rdi # clobbers INP 94SRND = %rsi # clobbers INP
95c = %ecx 95c = %ecx
96d = %r8d 96d = %r8d
97e = %edx 97e = %edx
@@ -348,8 +348,8 @@ a = TMP_
348 348
349######################################################################## 349########################################################################
350## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) 350## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
351## arg 1 : pointer to input data 351## arg 1 : pointer to digest
352## arg 2 : pointer to digest 352## arg 2 : pointer to input data
353## arg 3 : Num blocks 353## arg 3 : Num blocks
354######################################################################## 354########################################################################
355.text 355.text
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8fad72f4dfd2..ccc338881ee8 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -36,195 +36,74 @@
36#include <linux/cryptohash.h> 36#include <linux/cryptohash.h>
37#include <linux/types.h> 37#include <linux/types.h>
38#include <crypto/sha.h> 38#include <crypto/sha.h>
39#include <asm/byteorder.h> 39#include <crypto/sha256_base.h>
40#include <asm/i387.h> 40#include <asm/i387.h>
41#include <asm/xcr.h> 41#include <asm/xcr.h>
42#include <asm/xsave.h> 42#include <asm/xsave.h>
43#include <linux/string.h> 43#include <linux/string.h>
44 44
45asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, 45asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
46 u64 rounds); 46 u64 rounds);
47#ifdef CONFIG_AS_AVX 47#ifdef CONFIG_AS_AVX
48asmlinkage void sha256_transform_avx(const char *data, u32 *digest, 48asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
49 u64 rounds); 49 u64 rounds);
50#endif 50#endif
51#ifdef CONFIG_AS_AVX2 51#ifdef CONFIG_AS_AVX2
52asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, 52asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
53 u64 rounds); 53 u64 rounds);
54#endif 54#endif
55 55
56static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); 56static void (*sha256_transform_asm)(u32 *, const char *, u64);
57
58
59static int sha256_ssse3_init(struct shash_desc *desc)
60{
61 struct sha256_state *sctx = shash_desc_ctx(desc);
62
63 sctx->state[0] = SHA256_H0;
64 sctx->state[1] = SHA256_H1;
65 sctx->state[2] = SHA256_H2;
66 sctx->state[3] = SHA256_H3;
67 sctx->state[4] = SHA256_H4;
68 sctx->state[5] = SHA256_H5;
69 sctx->state[6] = SHA256_H6;
70 sctx->state[7] = SHA256_H7;
71 sctx->count = 0;
72
73 return 0;
74}
75
76static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
77 unsigned int len, unsigned int partial)
78{
79 struct sha256_state *sctx = shash_desc_ctx(desc);
80 unsigned int done = 0;
81
82 sctx->count += len;
83
84 if (partial) {
85 done = SHA256_BLOCK_SIZE - partial;
86 memcpy(sctx->buf + partial, data, done);
87 sha256_transform_asm(sctx->buf, sctx->state, 1);
88 }
89
90 if (len - done >= SHA256_BLOCK_SIZE) {
91 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92
93 sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94
95 done += rounds * SHA256_BLOCK_SIZE;
96 }
97
98 memcpy(sctx->buf, data + done, len - done);
99
100 return 0;
101}
102 57
103static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 58static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
104 unsigned int len) 59 unsigned int len)
105{ 60{
106 struct sha256_state *sctx = shash_desc_ctx(desc); 61 struct sha256_state *sctx = shash_desc_ctx(desc);
107 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108 int res;
109 62
110 /* Handle the fast case right here */ 63 if (!irq_fpu_usable() ||
111 if (partial + len < SHA256_BLOCK_SIZE) { 64 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
112 sctx->count += len; 65 return crypto_sha256_update(desc, data, len);
113 memcpy(sctx->buf + partial, data, len);
114 66
115 return 0; 67 /* make sure casting to sha256_block_fn() is safe */
116 } 68 BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
117
118 if (!irq_fpu_usable()) {
119 res = crypto_sha256_update(desc, data, len);
120 } else {
121 kernel_fpu_begin();
122 res = __sha256_ssse3_update(desc, data, len, partial);
123 kernel_fpu_end();
124 }
125
126 return res;
127}
128 69
129 70 kernel_fpu_begin();
130/* Add padding and return the message digest. */ 71 sha256_base_do_update(desc, data, len,
131static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 72 (sha256_block_fn *)sha256_transform_asm);
132{ 73 kernel_fpu_end();
133 struct sha256_state *sctx = shash_desc_ctx(desc);
134 unsigned int i, index, padlen;
135 __be32 *dst = (__be32 *)out;
136 __be64 bits;
137 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138
139 bits = cpu_to_be64(sctx->count << 3);
140
141 /* Pad out to 56 mod 64 and append length */
142 index = sctx->count % SHA256_BLOCK_SIZE;
143 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144
145 if (!irq_fpu_usable()) {
146 crypto_sha256_update(desc, padding, padlen);
147 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148 } else {
149 kernel_fpu_begin();
150 /* We need to fill a whole block for __sha256_ssse3_update() */
151 if (padlen <= 56) {
152 sctx->count += padlen;
153 memcpy(sctx->buf + index, padding, padlen);
154 } else {
155 __sha256_ssse3_update(desc, padding, padlen, index);
156 }
157 __sha256_ssse3_update(desc, (const u8 *)&bits,
158 sizeof(bits), 56);
159 kernel_fpu_end();
160 }
161
162 /* Store state in digest */
163 for (i = 0; i < 8; i++)
164 dst[i] = cpu_to_be32(sctx->state[i]);
165
166 /* Wipe context */
167 memset(sctx, 0, sizeof(*sctx));
168 74
169 return 0; 75 return 0;
170} 76}
171 77
172static int sha256_ssse3_export(struct shash_desc *desc, void *out) 78static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
79 unsigned int len, u8 *out)
173{ 80{
174 struct sha256_state *sctx = shash_desc_ctx(desc); 81 if (!irq_fpu_usable())
82 return crypto_sha256_finup(desc, data, len, out);
175 83
176 memcpy(out, sctx, sizeof(*sctx)); 84 kernel_fpu_begin();
85 if (len)
86 sha256_base_do_update(desc, data, len,
87 (sha256_block_fn *)sha256_transform_asm);
88 sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
89 kernel_fpu_end();
177 90
178 return 0; 91 return sha256_base_finish(desc, out);
179} 92}
180 93
181static int sha256_ssse3_import(struct shash_desc *desc, const void *in) 94/* Add padding and return the message digest. */
182{ 95static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
183 struct sha256_state *sctx = shash_desc_ctx(desc);
184
185 memcpy(sctx, in, sizeof(*sctx));
186
187 return 0;
188}
189
190static int sha224_ssse3_init(struct shash_desc *desc)
191{
192 struct sha256_state *sctx = shash_desc_ctx(desc);
193
194 sctx->state[0] = SHA224_H0;
195 sctx->state[1] = SHA224_H1;
196 sctx->state[2] = SHA224_H2;
197 sctx->state[3] = SHA224_H3;
198 sctx->state[4] = SHA224_H4;
199 sctx->state[5] = SHA224_H5;
200 sctx->state[6] = SHA224_H6;
201 sctx->state[7] = SHA224_H7;
202 sctx->count = 0;
203
204 return 0;
205}
206
207static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
208{ 96{
209 u8 D[SHA256_DIGEST_SIZE]; 97 return sha256_ssse3_finup(desc, NULL, 0, out);
210
211 sha256_ssse3_final(desc, D);
212
213 memcpy(hash, D, SHA224_DIGEST_SIZE);
214 memzero_explicit(D, SHA256_DIGEST_SIZE);
215
216 return 0;
217} 98}
218 99
219static struct shash_alg algs[] = { { 100static struct shash_alg algs[] = { {
220 .digestsize = SHA256_DIGEST_SIZE, 101 .digestsize = SHA256_DIGEST_SIZE,
221 .init = sha256_ssse3_init, 102 .init = sha256_base_init,
222 .update = sha256_ssse3_update, 103 .update = sha256_ssse3_update,
223 .final = sha256_ssse3_final, 104 .final = sha256_ssse3_final,
224 .export = sha256_ssse3_export, 105 .finup = sha256_ssse3_finup,
225 .import = sha256_ssse3_import,
226 .descsize = sizeof(struct sha256_state), 106 .descsize = sizeof(struct sha256_state),
227 .statesize = sizeof(struct sha256_state),
228 .base = { 107 .base = {
229 .cra_name = "sha256", 108 .cra_name = "sha256",
230 .cra_driver_name = "sha256-ssse3", 109 .cra_driver_name = "sha256-ssse3",
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { {
235 } 114 }
236}, { 115}, {
237 .digestsize = SHA224_DIGEST_SIZE, 116 .digestsize = SHA224_DIGEST_SIZE,
238 .init = sha224_ssse3_init, 117 .init = sha224_base_init,
239 .update = sha256_ssse3_update, 118 .update = sha256_ssse3_update,
240 .final = sha224_ssse3_final, 119 .final = sha256_ssse3_final,
241 .export = sha256_ssse3_export, 120 .finup = sha256_ssse3_finup,
242 .import = sha256_ssse3_import,
243 .descsize = sizeof(struct sha256_state), 121 .descsize = sizeof(struct sha256_state),
244 .statesize = sizeof(struct sha256_state),
245 .base = { 122 .base = {
246 .cra_name = "sha224", 123 .cra_name = "sha224",
247 .cra_driver_name = "sha224-ssse3", 124 .cra_driver_name = "sha224-ssse3",
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 974dde9bc6cd..565274d6a641 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -54,9 +54,9 @@
54 54
55# Virtual Registers 55# Virtual Registers
56# ARG1 56# ARG1
57msg = %rdi 57digest = %rdi
58# ARG2 58# ARG2
59digest = %rsi 59msg = %rsi
60# ARG3 60# ARG3
61msglen = %rdx 61msglen = %rdx
62T1 = %rcx 62T1 = %rcx
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
271.endm 271.endm
272 272
273######################################################################## 273########################################################################
274# void sha512_transform_avx(const void* M, void* D, u64 L) 274# void sha512_transform_avx(void* D, const void* M, u64 L)
275# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 275# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
276# The size of the message pointed to by M must be an integer multiple of SHA512 276# The size of the message pointed to by M must be an integer multiple of SHA512
277# message blocks. 277# message blocks.
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 568b96105f5c..a4771dcd1fcf 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -70,9 +70,9 @@ XFER = YTMP0
70BYTE_FLIP_MASK = %ymm9 70BYTE_FLIP_MASK = %ymm9
71 71
72# 1st arg 72# 1st arg
73INP = %rdi 73CTX = %rdi
74# 2nd arg 74# 2nd arg
75CTX = %rsi 75INP = %rsi
76# 3rd arg 76# 3rd arg
77NUM_BLKS = %rdx 77NUM_BLKS = %rdx
78 78
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
562.endm 562.endm
563 563
564######################################################################## 564########################################################################
565# void sha512_transform_rorx(const void* M, void* D, uint64_t L)# 565# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
566# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 566# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
567# The size of the message pointed to by M must be an integer multiple of SHA512 567# The size of the message pointed to by M must be an integer multiple of SHA512
568# message blocks. 568# message blocks.
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index fb56855d51f5..e610e29cbc81 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -53,9 +53,9 @@
53 53
54# Virtual Registers 54# Virtual Registers
55# ARG1 55# ARG1
56msg = %rdi 56digest = %rdi
57# ARG2 57# ARG2
58digest = %rsi 58msg = %rsi
59# ARG3 59# ARG3
60msglen = %rdx 60msglen = %rdx
61T1 = %rcx 61T1 = %rcx
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
269.endm 269.endm
270 270
271######################################################################## 271########################################################################
272# void sha512_transform_ssse3(const void* M, void* D, u64 L)# 272# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
273# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 273# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
274# The size of the message pointed to by M must be an integer multiple of SHA512 274# The size of the message pointed to by M must be an integer multiple of SHA512
275# message blocks. 275# message blocks.
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b6af26832bf..d9fa4c1e063f 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,205 +34,75 @@
34#include <linux/cryptohash.h> 34#include <linux/cryptohash.h>
35#include <linux/types.h> 35#include <linux/types.h>
36#include <crypto/sha.h> 36#include <crypto/sha.h>
37#include <asm/byteorder.h> 37#include <crypto/sha512_base.h>
38#include <asm/i387.h> 38#include <asm/i387.h>
39#include <asm/xcr.h> 39#include <asm/xcr.h>
40#include <asm/xsave.h> 40#include <asm/xsave.h>
41 41
42#include <linux/string.h> 42#include <linux/string.h>
43 43
44asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, 44asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
45 u64 rounds); 45 u64 rounds);
46#ifdef CONFIG_AS_AVX 46#ifdef CONFIG_AS_AVX
47asmlinkage void sha512_transform_avx(const char *data, u64 *digest, 47asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
48 u64 rounds); 48 u64 rounds);
49#endif 49#endif
50#ifdef CONFIG_AS_AVX2 50#ifdef CONFIG_AS_AVX2
51asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, 51asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
52 u64 rounds); 52 u64 rounds);
53#endif 53#endif
54 54
55static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); 55static void (*sha512_transform_asm)(u64 *, const char *, u64);
56
57
58static int sha512_ssse3_init(struct shash_desc *desc)
59{
60 struct sha512_state *sctx = shash_desc_ctx(desc);
61
62 sctx->state[0] = SHA512_H0;
63 sctx->state[1] = SHA512_H1;
64 sctx->state[2] = SHA512_H2;
65 sctx->state[3] = SHA512_H3;
66 sctx->state[4] = SHA512_H4;
67 sctx->state[5] = SHA512_H5;
68 sctx->state[6] = SHA512_H6;
69 sctx->state[7] = SHA512_H7;
70 sctx->count[0] = sctx->count[1] = 0;
71
72 return 0;
73}
74 56
75static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 57static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
76 unsigned int len, unsigned int partial) 58 unsigned int len)
77{ 59{
78 struct sha512_state *sctx = shash_desc_ctx(desc); 60 struct sha512_state *sctx = shash_desc_ctx(desc);
79 unsigned int done = 0;
80
81 sctx->count[0] += len;
82 if (sctx->count[0] < len)
83 sctx->count[1]++;
84 61
85 if (partial) { 62 if (!irq_fpu_usable() ||
86 done = SHA512_BLOCK_SIZE - partial; 63 (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
87 memcpy(sctx->buf + partial, data, done); 64 return crypto_sha512_update(desc, data, len);
88 sha512_transform_asm(sctx->buf, sctx->state, 1);
89 }
90
91 if (len - done >= SHA512_BLOCK_SIZE) {
92 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
93 65
94 sha512_transform_asm(data + done, sctx->state, (u64) rounds); 66 /* make sure casting to sha512_block_fn() is safe */
95 67 BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
96 done += rounds * SHA512_BLOCK_SIZE;
97 }
98 68
99 memcpy(sctx->buf, data + done, len - done); 69 kernel_fpu_begin();
70 sha512_base_do_update(desc, data, len,
71 (sha512_block_fn *)sha512_transform_asm);
72 kernel_fpu_end();
100 73
101 return 0; 74 return 0;
102} 75}
103 76
104static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 77static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
105 unsigned int len) 78 unsigned int len, u8 *out)
106{ 79{
107 struct sha512_state *sctx = shash_desc_ctx(desc); 80 if (!irq_fpu_usable())
108 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; 81 return crypto_sha512_finup(desc, data, len, out);
109 int res;
110
111 /* Handle the fast case right here */
112 if (partial + len < SHA512_BLOCK_SIZE) {
113 sctx->count[0] += len;
114 if (sctx->count[0] < len)
115 sctx->count[1]++;
116 memcpy(sctx->buf + partial, data, len);
117
118 return 0;
119 }
120 82
121 if (!irq_fpu_usable()) { 83 kernel_fpu_begin();
122 res = crypto_sha512_update(desc, data, len); 84 if (len)
123 } else { 85 sha512_base_do_update(desc, data, len,
124 kernel_fpu_begin(); 86 (sha512_block_fn *)sha512_transform_asm);
125 res = __sha512_ssse3_update(desc, data, len, partial); 87 sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
126 kernel_fpu_end(); 88 kernel_fpu_end();
127 }
128 89
129 return res; 90 return sha512_base_finish(desc, out);
130} 91}
131 92
132
133/* Add padding and return the message digest. */ 93/* Add padding and return the message digest. */
134static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) 94static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
135{ 95{
136 struct sha512_state *sctx = shash_desc_ctx(desc); 96 return sha512_ssse3_finup(desc, NULL, 0, out);
137 unsigned int i, index, padlen;
138 __be64 *dst = (__be64 *)out;
139 __be64 bits[2];
140 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
141
142 /* save number of bits */
143 bits[1] = cpu_to_be64(sctx->count[0] << 3);
144 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
145
146 /* Pad out to 112 mod 128 and append length */
147 index = sctx->count[0] & 0x7f;
148 padlen = (index < 112) ? (112 - index) : ((128+112) - index);
149
150 if (!irq_fpu_usable()) {
151 crypto_sha512_update(desc, padding, padlen);
152 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
153 } else {
154 kernel_fpu_begin();
155 /* We need to fill a whole block for __sha512_ssse3_update() */
156 if (padlen <= 112) {
157 sctx->count[0] += padlen;
158 if (sctx->count[0] < padlen)
159 sctx->count[1]++;
160 memcpy(sctx->buf + index, padding, padlen);
161 } else {
162 __sha512_ssse3_update(desc, padding, padlen, index);
163 }
164 __sha512_ssse3_update(desc, (const u8 *)&bits,
165 sizeof(bits), 112);
166 kernel_fpu_end();
167 }
168
169 /* Store state in digest */
170 for (i = 0; i < 8; i++)
171 dst[i] = cpu_to_be64(sctx->state[i]);
172
173 /* Wipe context */
174 memset(sctx, 0, sizeof(*sctx));
175
176 return 0;
177}
178
179static int sha512_ssse3_export(struct shash_desc *desc, void *out)
180{
181 struct sha512_state *sctx = shash_desc_ctx(desc);
182
183 memcpy(out, sctx, sizeof(*sctx));
184
185 return 0;
186}
187
188static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
189{
190 struct sha512_state *sctx = shash_desc_ctx(desc);
191
192 memcpy(sctx, in, sizeof(*sctx));
193
194 return 0;
195}
196
197static int sha384_ssse3_init(struct shash_desc *desc)
198{
199 struct sha512_state *sctx = shash_desc_ctx(desc);
200
201 sctx->state[0] = SHA384_H0;
202 sctx->state[1] = SHA384_H1;
203 sctx->state[2] = SHA384_H2;
204 sctx->state[3] = SHA384_H3;
205 sctx->state[4] = SHA384_H4;
206 sctx->state[5] = SHA384_H5;
207 sctx->state[6] = SHA384_H6;
208 sctx->state[7] = SHA384_H7;
209
210 sctx->count[0] = sctx->count[1] = 0;
211
212 return 0;
213}
214
215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
216{
217 u8 D[SHA512_DIGEST_SIZE];
218
219 sha512_ssse3_final(desc, D);
220
221 memcpy(hash, D, SHA384_DIGEST_SIZE);
222 memzero_explicit(D, SHA512_DIGEST_SIZE);
223
224 return 0;
225} 97}
226 98
227static struct shash_alg algs[] = { { 99static struct shash_alg algs[] = { {
228 .digestsize = SHA512_DIGEST_SIZE, 100 .digestsize = SHA512_DIGEST_SIZE,
229 .init = sha512_ssse3_init, 101 .init = sha512_base_init,
230 .update = sha512_ssse3_update, 102 .update = sha512_ssse3_update,
231 .final = sha512_ssse3_final, 103 .final = sha512_ssse3_final,
232 .export = sha512_ssse3_export, 104 .finup = sha512_ssse3_finup,
233 .import = sha512_ssse3_import,
234 .descsize = sizeof(struct sha512_state), 105 .descsize = sizeof(struct sha512_state),
235 .statesize = sizeof(struct sha512_state),
236 .base = { 106 .base = {
237 .cra_name = "sha512", 107 .cra_name = "sha512",
238 .cra_driver_name = "sha512-ssse3", 108 .cra_driver_name = "sha512-ssse3",
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { {
243 } 113 }
244}, { 114}, {
245 .digestsize = SHA384_DIGEST_SIZE, 115 .digestsize = SHA384_DIGEST_SIZE,
246 .init = sha384_ssse3_init, 116 .init = sha384_base_init,
247 .update = sha512_ssse3_update, 117 .update = sha512_ssse3_update,
248 .final = sha384_ssse3_final, 118 .final = sha512_ssse3_final,
249 .export = sha512_ssse3_export, 119 .finup = sha512_ssse3_finup,
250 .import = sha512_ssse3_import,
251 .descsize = sizeof(struct sha512_state), 120 .descsize = sizeof(struct sha512_state),
252 .statesize = sizeof(struct sha512_state),
253 .base = { 121 .base = {
254 .cra_name = "sha384", 122 .cra_name = "sha384",
255 .cra_driver_name = "sha384-ssse3", 123 .cra_driver_name = "sha384-ssse3",
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 1ac531ea9bcc..b5e2d5651851 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { {
340 .cra_name = "__ecb-twofish-avx", 340 .cra_name = "__ecb-twofish-avx",
341 .cra_driver_name = "__driver-ecb-twofish-avx", 341 .cra_driver_name = "__driver-ecb-twofish-avx",
342 .cra_priority = 0, 342 .cra_priority = 0,
343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
344 CRYPTO_ALG_INTERNAL,
344 .cra_blocksize = TF_BLOCK_SIZE, 345 .cra_blocksize = TF_BLOCK_SIZE,
345 .cra_ctxsize = sizeof(struct twofish_ctx), 346 .cra_ctxsize = sizeof(struct twofish_ctx),
346 .cra_alignmask = 0, 347 .cra_alignmask = 0,
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { {
359 .cra_name = "__cbc-twofish-avx", 360 .cra_name = "__cbc-twofish-avx",
360 .cra_driver_name = "__driver-cbc-twofish-avx", 361 .cra_driver_name = "__driver-cbc-twofish-avx",
361 .cra_priority = 0, 362 .cra_priority = 0,
362 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
364 CRYPTO_ALG_INTERNAL,
363 .cra_blocksize = TF_BLOCK_SIZE, 365 .cra_blocksize = TF_BLOCK_SIZE,
364 .cra_ctxsize = sizeof(struct twofish_ctx), 366 .cra_ctxsize = sizeof(struct twofish_ctx),
365 .cra_alignmask = 0, 367 .cra_alignmask = 0,
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { {
378 .cra_name = "__ctr-twofish-avx", 380 .cra_name = "__ctr-twofish-avx",
379 .cra_driver_name = "__driver-ctr-twofish-avx", 381 .cra_driver_name = "__driver-ctr-twofish-avx",
380 .cra_priority = 0, 382 .cra_priority = 0,
381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 383 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
384 CRYPTO_ALG_INTERNAL,
382 .cra_blocksize = 1, 385 .cra_blocksize = 1,
383 .cra_ctxsize = sizeof(struct twofish_ctx), 386 .cra_ctxsize = sizeof(struct twofish_ctx),
384 .cra_alignmask = 0, 387 .cra_alignmask = 0,
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { {
398 .cra_name = "__lrw-twofish-avx", 401 .cra_name = "__lrw-twofish-avx",
399 .cra_driver_name = "__driver-lrw-twofish-avx", 402 .cra_driver_name = "__driver-lrw-twofish-avx",
400 .cra_priority = 0, 403 .cra_priority = 0,
401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
405 CRYPTO_ALG_INTERNAL,
402 .cra_blocksize = TF_BLOCK_SIZE, 406 .cra_blocksize = TF_BLOCK_SIZE,
403 .cra_ctxsize = sizeof(struct twofish_lrw_ctx), 407 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
404 .cra_alignmask = 0, 408 .cra_alignmask = 0,
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { {
421 .cra_name = "__xts-twofish-avx", 425 .cra_name = "__xts-twofish-avx",
422 .cra_driver_name = "__driver-xts-twofish-avx", 426 .cra_driver_name = "__driver-xts-twofish-avx",
423 .cra_priority = 0, 427 .cra_priority = 0,
424 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 428 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
429 CRYPTO_ALG_INTERNAL,
425 .cra_blocksize = TF_BLOCK_SIZE, 430 .cra_blocksize = TF_BLOCK_SIZE,
426 .cra_ctxsize = sizeof(struct twofish_xts_ctx), 431 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
427 .cra_alignmask = 0, 432 .cra_alignmask = 0,
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 50f4da44a304..8aaf298a80e1 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -436,6 +436,14 @@ config CRYPTO_MD5_OCTEON
436 MD5 message digest algorithm (RFC1321) implemented 436 MD5 message digest algorithm (RFC1321) implemented
437 using OCTEON crypto instructions, when available. 437 using OCTEON crypto instructions, when available.
438 438
439config CRYPTO_MD5_PPC
440 tristate "MD5 digest algorithm (PPC)"
441 depends on PPC
442 select CRYPTO_HASH
443 help
444 MD5 message digest algorithm (RFC1321) implemented
445 in PPC assembler.
446
439config CRYPTO_MD5_SPARC64 447config CRYPTO_MD5_SPARC64
440 tristate "MD5 digest algorithm (SPARC64)" 448 tristate "MD5 digest algorithm (SPARC64)"
441 depends on SPARC64 449 depends on SPARC64
@@ -546,34 +554,23 @@ config CRYPTO_SHA512_SSSE3
546 Extensions version 1 (AVX1), or Advanced Vector Extensions 554 Extensions version 1 (AVX1), or Advanced Vector Extensions
547 version 2 (AVX2) instructions, when available. 555 version 2 (AVX2) instructions, when available.
548 556
549config CRYPTO_SHA1_SPARC64 557config CRYPTO_SHA1_OCTEON
550 tristate "SHA1 digest algorithm (SPARC64)" 558 tristate "SHA1 digest algorithm (OCTEON)"
551 depends on SPARC64 559 depends on CPU_CAVIUM_OCTEON
552 select CRYPTO_SHA1
553 select CRYPTO_HASH
554 help
555 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
556 using sparc64 crypto instructions, when available.
557
558config CRYPTO_SHA1_ARM
559 tristate "SHA1 digest algorithm (ARM-asm)"
560 depends on ARM
561 select CRYPTO_SHA1 560 select CRYPTO_SHA1
562 select CRYPTO_HASH 561 select CRYPTO_HASH
563 help 562 help
564 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented 563 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
565 using optimized ARM assembler. 564 using OCTEON crypto instructions, when available.
566 565
567config CRYPTO_SHA1_ARM_NEON 566config CRYPTO_SHA1_SPARC64
568 tristate "SHA1 digest algorithm (ARM NEON)" 567 tristate "SHA1 digest algorithm (SPARC64)"
569 depends on ARM && KERNEL_MODE_NEON 568 depends on SPARC64
570 select CRYPTO_SHA1_ARM
571 select CRYPTO_SHA1 569 select CRYPTO_SHA1
572 select CRYPTO_HASH 570 select CRYPTO_HASH
573 help 571 help
574 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented 572 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
575 using optimized ARM NEON assembly, when NEON instructions are 573 using sparc64 crypto instructions, when available.
576 available.
577 574
578config CRYPTO_SHA1_PPC 575config CRYPTO_SHA1_PPC
579 tristate "SHA1 digest algorithm (powerpc)" 576 tristate "SHA1 digest algorithm (powerpc)"
@@ -582,6 +579,13 @@ config CRYPTO_SHA1_PPC
582 This is the powerpc hardware accelerated implementation of the 579 This is the powerpc hardware accelerated implementation of the
583 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). 580 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
584 581
582config CRYPTO_SHA1_PPC_SPE
583 tristate "SHA1 digest algorithm (PPC SPE)"
584 depends on PPC && SPE
585 help
586 SHA-1 secure hash standard (DFIPS 180-4) implemented
587 using powerpc SPE SIMD instruction set.
588
585config CRYPTO_SHA1_MB 589config CRYPTO_SHA1_MB
586 tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" 590 tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
587 depends on X86 && 64BIT 591 depends on X86 && 64BIT
@@ -610,6 +614,24 @@ config CRYPTO_SHA256
610 This code also includes SHA-224, a 224 bit hash with 112 bits 614 This code also includes SHA-224, a 224 bit hash with 112 bits
611 of security against collision attacks. 615 of security against collision attacks.
612 616
617config CRYPTO_SHA256_PPC_SPE
618 tristate "SHA224 and SHA256 digest algorithm (PPC SPE)"
619 depends on PPC && SPE
620 select CRYPTO_SHA256
621 select CRYPTO_HASH
622 help
623 SHA224 and SHA256 secure hash standard (DFIPS 180-2)
624 implemented using powerpc SPE SIMD instruction set.
625
626config CRYPTO_SHA256_OCTEON
627 tristate "SHA224 and SHA256 digest algorithm (OCTEON)"
628 depends on CPU_CAVIUM_OCTEON
629 select CRYPTO_SHA256
630 select CRYPTO_HASH
631 help
632 SHA-256 secure hash standard (DFIPS 180-2) implemented
633 using OCTEON crypto instructions, when available.
634
613config CRYPTO_SHA256_SPARC64 635config CRYPTO_SHA256_SPARC64
614 tristate "SHA224 and SHA256 digest algorithm (SPARC64)" 636 tristate "SHA224 and SHA256 digest algorithm (SPARC64)"
615 depends on SPARC64 637 depends on SPARC64
@@ -631,29 +653,23 @@ config CRYPTO_SHA512
631 This code also includes SHA-384, a 384 bit hash with 192 bits 653 This code also includes SHA-384, a 384 bit hash with 192 bits
632 of security against collision attacks. 654 of security against collision attacks.
633 655
634config CRYPTO_SHA512_SPARC64 656config CRYPTO_SHA512_OCTEON
635 tristate "SHA384 and SHA512 digest algorithm (SPARC64)" 657 tristate "SHA384 and SHA512 digest algorithms (OCTEON)"
636 depends on SPARC64 658 depends on CPU_CAVIUM_OCTEON
637 select CRYPTO_SHA512 659 select CRYPTO_SHA512
638 select CRYPTO_HASH 660 select CRYPTO_HASH
639 help 661 help
640 SHA-512 secure hash standard (DFIPS 180-2) implemented 662 SHA-512 secure hash standard (DFIPS 180-2) implemented
641 using sparc64 crypto instructions, when available. 663 using OCTEON crypto instructions, when available.
642 664
643config CRYPTO_SHA512_ARM_NEON 665config CRYPTO_SHA512_SPARC64
644 tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" 666 tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
645 depends on ARM && KERNEL_MODE_NEON 667 depends on SPARC64
646 select CRYPTO_SHA512 668 select CRYPTO_SHA512
647 select CRYPTO_HASH 669 select CRYPTO_HASH
648 help 670 help
649 SHA-512 secure hash standard (DFIPS 180-2) implemented 671 SHA-512 secure hash standard (DFIPS 180-2) implemented
650 using ARM NEON instructions, when available. 672 using sparc64 crypto instructions, when available.
651
652 This version of SHA implements a 512 bit hash with 256 bits of
653 security against collision attacks.
654
655 This code also includes SHA-384, a 384 bit hash with 192 bits
656 of security against collision attacks.
657 673
658config CRYPTO_TGR192 674config CRYPTO_TGR192
659 tristate "Tiger digest algorithms" 675 tristate "Tiger digest algorithms"
@@ -817,45 +833,18 @@ config CRYPTO_AES_SPARC64
817 for some popular block cipher mode is supported too, including 833 for some popular block cipher mode is supported too, including
818 ECB and CBC. 834 ECB and CBC.
819 835
820config CRYPTO_AES_ARM 836config CRYPTO_AES_PPC_SPE
821 tristate "AES cipher algorithms (ARM-asm)" 837 tristate "AES cipher algorithms (PPC SPE)"
822 depends on ARM 838 depends on PPC && SPE
823 select CRYPTO_ALGAPI
824 select CRYPTO_AES
825 help
826 Use optimized AES assembler routines for ARM platforms.
827
828 AES cipher algorithms (FIPS-197). AES uses the Rijndael
829 algorithm.
830
831 Rijndael appears to be consistently a very good performer in
832 both hardware and software across a wide range of computing
833 environments regardless of its use in feedback or non-feedback
834 modes. Its key setup time is excellent, and its key agility is
835 good. Rijndael's very low memory requirements make it very well
836 suited for restricted-space environments, in which it also
837 demonstrates excellent performance. Rijndael's operations are
838 among the easiest to defend against power and timing attacks.
839
840 The AES specifies three key sizes: 128, 192 and 256 bits
841
842 See <http://csrc.nist.gov/encryption/aes/> for more information.
843
844config CRYPTO_AES_ARM_BS
845 tristate "Bit sliced AES using NEON instructions"
846 depends on ARM && KERNEL_MODE_NEON
847 select CRYPTO_ALGAPI
848 select CRYPTO_AES_ARM
849 select CRYPTO_ABLK_HELPER
850 help 839 help
851 Use a faster and more secure NEON based implementation of AES in CBC, 840 AES cipher algorithms (FIPS-197). Additionally the acceleration
852 CTR and XTS modes 841 for popular block cipher modes ECB, CBC, CTR and XTS is supported.
853 842 This module should only be used for low power (router) devices
854 Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode 843 without hardware AES acceleration (e.g. caam crypto). It reduces the
855 and for XTS mode encryption, CBC and XTS mode decryption speedup is 844 size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
856 around 25%. (CBC encryption speed is not affected by this driver.) 845 timining attacks. Nevertheless it might be not as secure as other
857 This implementation does not rely on any lookup tables so it is 846 architecture specific assembler implementations that work on 1KB
858 believed to be invulnerable to cache timing attacks. 847 tables or 256 bytes S-boxes.
859 848
860config CRYPTO_ANUBIS 849config CRYPTO_ANUBIS
861 tristate "Anubis cipher algorithm" 850 tristate "Anubis cipher algorithm"
@@ -1199,7 +1188,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
1199 Keys are allowed to be from 0 to 256 bits in length, in steps 1188 Keys are allowed to be from 0 to 256 bits in length, in steps
1200 of 8 bits. 1189 of 8 bits.
1201 1190
1202 This module provides Serpent cipher algorithm that processes eigth 1191 This module provides Serpent cipher algorithm that processes eight
1203 blocks parallel using SSE2 instruction set. 1192 blocks parallel using SSE2 instruction set.
1204 1193
1205 See also: 1194 See also:
@@ -1523,6 +1512,15 @@ config CRYPTO_USER_API_RNG
1523 This option enables the user-spaces interface for random 1512 This option enables the user-spaces interface for random
1524 number generator algorithms. 1513 number generator algorithms.
1525 1514
1515config CRYPTO_USER_API_AEAD
1516 tristate "User-space interface for AEAD cipher algorithms"
1517 depends on NET
1518 select CRYPTO_AEAD
1519 select CRYPTO_USER_API
1520 help
1521 This option enables the user-spaces interface for AEAD
1522 cipher algorithms.
1523
1526config CRYPTO_HASH_INFO 1524config CRYPTO_HASH_INFO
1527 bool 1525 bool
1528 1526
diff --git a/crypto/Makefile b/crypto/Makefile
index ba19465f9ad3..97b7d3ac87e7 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
100obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o 100obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
101obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o 101obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
102obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o 102obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
103obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
103 104
104# 105#
105# generic algorithms and the async_tx api 106# generic algorithms and the async_tx api
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index ffe7278d4bd8..e1fcf53bb931 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -124,7 +124,8 @@ int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
124 struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); 124 struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
125 struct cryptd_ablkcipher *cryptd_tfm; 125 struct cryptd_ablkcipher *cryptd_tfm;
126 126
127 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); 127 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
128 CRYPTO_ALG_INTERNAL);
128 if (IS_ERR(cryptd_tfm)) 129 if (IS_ERR(cryptd_tfm))
129 return PTR_ERR(cryptd_tfm); 130 return PTR_ERR(cryptd_tfm);
130 131
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 83b04e0884b1..2d0a1c64ce39 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -64,6 +64,8 @@ static int crypto_check_alg(struct crypto_alg *alg)
64 if (alg->cra_priority < 0) 64 if (alg->cra_priority < 0)
65 return -EINVAL; 65 return -EINVAL;
66 66
67 atomic_set(&alg->cra_refcnt, 1);
68
67 return crypto_set_driver_name(alg); 69 return crypto_set_driver_name(alg);
68} 70}
69 71
@@ -99,10 +101,9 @@ static struct list_head *crypto_more_spawns(struct crypto_alg *alg,
99 return &n->list == stack ? top : &n->inst->alg.cra_users; 101 return &n->list == stack ? top : &n->inst->alg.cra_users;
100} 102}
101 103
102static void crypto_remove_spawn(struct crypto_spawn *spawn, 104static void crypto_remove_instance(struct crypto_instance *inst,
103 struct list_head *list) 105 struct list_head *list)
104{ 106{
105 struct crypto_instance *inst = spawn->inst;
106 struct crypto_template *tmpl = inst->tmpl; 107 struct crypto_template *tmpl = inst->tmpl;
107 108
108 if (crypto_is_dead(&inst->alg)) 109 if (crypto_is_dead(&inst->alg))
@@ -167,7 +168,7 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
167 if (spawn->alg) 168 if (spawn->alg)
168 list_move(&spawn->list, &spawn->alg->cra_users); 169 list_move(&spawn->list, &spawn->alg->cra_users);
169 else 170 else
170 crypto_remove_spawn(spawn, list); 171 crypto_remove_instance(spawn->inst, list);
171 } 172 }
172} 173}
173EXPORT_SYMBOL_GPL(crypto_remove_spawns); 174EXPORT_SYMBOL_GPL(crypto_remove_spawns);
@@ -188,7 +189,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
188 189
189 ret = -EEXIST; 190 ret = -EEXIST;
190 191
191 atomic_set(&alg->cra_refcnt, 1);
192 list_for_each_entry(q, &crypto_alg_list, cra_list) { 192 list_for_each_entry(q, &crypto_alg_list, cra_list) {
193 if (q == alg) 193 if (q == alg)
194 goto err; 194 goto err;
@@ -523,7 +523,10 @@ int crypto_register_instance(struct crypto_template *tmpl,
523 523
524 err = crypto_check_alg(&inst->alg); 524 err = crypto_check_alg(&inst->alg);
525 if (err) 525 if (err)
526 goto err; 526 return err;
527
528 if (unlikely(!crypto_mod_get(&inst->alg)))
529 return -EAGAIN;
527 530
528 inst->alg.cra_module = tmpl->module; 531 inst->alg.cra_module = tmpl->module;
529 inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE; 532 inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
@@ -545,37 +548,30 @@ unlock:
545 goto err; 548 goto err;
546 549
547 crypto_wait_for_test(larval); 550 crypto_wait_for_test(larval);
551
552 /* Remove instance if test failed */
553 if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
554 crypto_unregister_instance(inst);
548 err = 0; 555 err = 0;
549 556
550err: 557err:
558 crypto_mod_put(&inst->alg);
551 return err; 559 return err;
552} 560}
553EXPORT_SYMBOL_GPL(crypto_register_instance); 561EXPORT_SYMBOL_GPL(crypto_register_instance);
554 562
555int crypto_unregister_instance(struct crypto_alg *alg) 563int crypto_unregister_instance(struct crypto_instance *inst)
556{ 564{
557 int err; 565 LIST_HEAD(list);
558 struct crypto_instance *inst = (void *)alg;
559 struct crypto_template *tmpl = inst->tmpl;
560 LIST_HEAD(users);
561
562 if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
563 return -EINVAL;
564
565 BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
566 566
567 down_write(&crypto_alg_sem); 567 down_write(&crypto_alg_sem);
568 568
569 hlist_del_init(&inst->list); 569 crypto_remove_spawns(&inst->alg, &list, NULL);
570 err = crypto_remove_alg(alg, &users); 570 crypto_remove_instance(inst, &list);
571 571
572 up_write(&crypto_alg_sem); 572 up_write(&crypto_alg_sem);
573 573
574 if (err) 574 crypto_remove_final(&list);
575 return err;
576
577 tmpl->free(inst);
578 crypto_remove_final(&users);
579 575
580 return 0; 576 return 0;
581} 577}
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
new file mode 100644
index 000000000000..527d27b023ab
--- /dev/null
+++ b/crypto/algif_aead.c
@@ -0,0 +1,666 @@
1/*
2 * algif_aead: User-space interface for AEAD algorithms
3 *
4 * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de>
5 *
6 * This file provides the user-space API for AEAD ciphers.
7 *
8 * This file is derived from algif_skcipher.c.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 */
15
16#include <crypto/scatterwalk.h>
17#include <crypto/if_alg.h>
18#include <linux/init.h>
19#include <linux/list.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
22#include <linux/module.h>
23#include <linux/net.h>
24#include <net/sock.h>
25
26struct aead_sg_list {
27 unsigned int cur;
28 struct scatterlist sg[ALG_MAX_PAGES];
29};
30
31struct aead_ctx {
32 struct aead_sg_list tsgl;
33 /*
34 * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum
35 * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES
36 * bytes
37 */
38#define RSGL_MAX_ENTRIES ALG_MAX_PAGES
39 struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES];
40
41 void *iv;
42
43 struct af_alg_completion completion;
44
45 unsigned long used;
46
47 unsigned int len;
48 bool more;
49 bool merge;
50 bool enc;
51
52 size_t aead_assoclen;
53 struct aead_request aead_req;
54};
55
56static inline int aead_sndbuf(struct sock *sk)
57{
58 struct alg_sock *ask = alg_sk(sk);
59 struct aead_ctx *ctx = ask->private;
60
61 return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
62 ctx->used, 0);
63}
64
65static inline bool aead_writable(struct sock *sk)
66{
67 return PAGE_SIZE <= aead_sndbuf(sk);
68}
69
70static inline bool aead_sufficient_data(struct aead_ctx *ctx)
71{
72 unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
73
74 return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as)));
75}
76
77static void aead_put_sgl(struct sock *sk)
78{
79 struct alg_sock *ask = alg_sk(sk);
80 struct aead_ctx *ctx = ask->private;
81 struct aead_sg_list *sgl = &ctx->tsgl;
82 struct scatterlist *sg = sgl->sg;
83 unsigned int i;
84
85 for (i = 0; i < sgl->cur; i++) {
86 if (!sg_page(sg + i))
87 continue;
88
89 put_page(sg_page(sg + i));
90 sg_assign_page(sg + i, NULL);
91 }
92 sgl->cur = 0;
93 ctx->used = 0;
94 ctx->more = 0;
95 ctx->merge = 0;
96}
97
98static void aead_wmem_wakeup(struct sock *sk)
99{
100 struct socket_wq *wq;
101
102 if (!aead_writable(sk))
103 return;
104
105 rcu_read_lock();
106 wq = rcu_dereference(sk->sk_wq);
107 if (wq_has_sleeper(wq))
108 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
109 POLLRDNORM |
110 POLLRDBAND);
111 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
112 rcu_read_unlock();
113}
114
115static int aead_wait_for_data(struct sock *sk, unsigned flags)
116{
117 struct alg_sock *ask = alg_sk(sk);
118 struct aead_ctx *ctx = ask->private;
119 long timeout;
120 DEFINE_WAIT(wait);
121 int err = -ERESTARTSYS;
122
123 if (flags & MSG_DONTWAIT)
124 return -EAGAIN;
125
126 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
127
128 for (;;) {
129 if (signal_pending(current))
130 break;
131 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
132 timeout = MAX_SCHEDULE_TIMEOUT;
133 if (sk_wait_event(sk, &timeout, !ctx->more)) {
134 err = 0;
135 break;
136 }
137 }
138 finish_wait(sk_sleep(sk), &wait);
139
140 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
141
142 return err;
143}
144
145static void aead_data_wakeup(struct sock *sk)
146{
147 struct alg_sock *ask = alg_sk(sk);
148 struct aead_ctx *ctx = ask->private;
149 struct socket_wq *wq;
150
151 if (ctx->more)
152 return;
153 if (!ctx->used)
154 return;
155
156 rcu_read_lock();
157 wq = rcu_dereference(sk->sk_wq);
158 if (wq_has_sleeper(wq))
159 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
160 POLLRDNORM |
161 POLLRDBAND);
162 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
163 rcu_read_unlock();
164}
165
166static int aead_sendmsg(struct kiocb *unused, struct socket *sock,
167 struct msghdr *msg, size_t size)
168{
169 struct sock *sk = sock->sk;
170 struct alg_sock *ask = alg_sk(sk);
171 struct aead_ctx *ctx = ask->private;
172 unsigned ivsize =
173 crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
174 struct aead_sg_list *sgl = &ctx->tsgl;
175 struct af_alg_control con = {};
176 long copied = 0;
177 bool enc = 0;
178 bool init = 0;
179 int err = -EINVAL;
180
181 if (msg->msg_controllen) {
182 err = af_alg_cmsg_send(msg, &con);
183 if (err)
184 return err;
185
186 init = 1;
187 switch (con.op) {
188 case ALG_OP_ENCRYPT:
189 enc = 1;
190 break;
191 case ALG_OP_DECRYPT:
192 enc = 0;
193 break;
194 default:
195 return -EINVAL;
196 }
197
198 if (con.iv && con.iv->ivlen != ivsize)
199 return -EINVAL;
200 }
201
202 lock_sock(sk);
203 if (!ctx->more && ctx->used)
204 goto unlock;
205
206 if (init) {
207 ctx->enc = enc;
208 if (con.iv)
209 memcpy(ctx->iv, con.iv->iv, ivsize);
210
211 ctx->aead_assoclen = con.aead_assoclen;
212 }
213
214 while (size) {
215 unsigned long len = size;
216 struct scatterlist *sg = NULL;
217
218 /* use the existing memory in an allocated page */
219 if (ctx->merge) {
220 sg = sgl->sg + sgl->cur - 1;
221 len = min_t(unsigned long, len,
222 PAGE_SIZE - sg->offset - sg->length);
223 err = memcpy_from_msg(page_address(sg_page(sg)) +
224 sg->offset + sg->length,
225 msg, len);
226 if (err)
227 goto unlock;
228
229 sg->length += len;
230 ctx->merge = (sg->offset + sg->length) &
231 (PAGE_SIZE - 1);
232
233 ctx->used += len;
234 copied += len;
235 size -= len;
236 continue;
237 }
238
239 if (!aead_writable(sk)) {
240 /* user space sent too much data */
241 aead_put_sgl(sk);
242 err = -EMSGSIZE;
243 goto unlock;
244 }
245
246 /* allocate a new page */
247 len = min_t(unsigned long, size, aead_sndbuf(sk));
248 while (len) {
249 int plen = 0;
250
251 if (sgl->cur >= ALG_MAX_PAGES) {
252 aead_put_sgl(sk);
253 err = -E2BIG;
254 goto unlock;
255 }
256
257 sg = sgl->sg + sgl->cur;
258 plen = min_t(int, len, PAGE_SIZE);
259
260 sg_assign_page(sg, alloc_page(GFP_KERNEL));
261 err = -ENOMEM;
262 if (!sg_page(sg))
263 goto unlock;
264
265 err = memcpy_from_msg(page_address(sg_page(sg)),
266 msg, plen);
267 if (err) {
268 __free_page(sg_page(sg));
269 sg_assign_page(sg, NULL);
270 goto unlock;
271 }
272
273 sg->offset = 0;
274 sg->length = plen;
275 len -= plen;
276 ctx->used += plen;
277 copied += plen;
278 sgl->cur++;
279 size -= plen;
280 ctx->merge = plen & (PAGE_SIZE - 1);
281 }
282 }
283
284 err = 0;
285
286 ctx->more = msg->msg_flags & MSG_MORE;
287 if (!ctx->more && !aead_sufficient_data(ctx)) {
288 aead_put_sgl(sk);
289 err = -EMSGSIZE;
290 }
291
292unlock:
293 aead_data_wakeup(sk);
294 release_sock(sk);
295
296 return err ?: copied;
297}
298
299static ssize_t aead_sendpage(struct socket *sock, struct page *page,
300 int offset, size_t size, int flags)
301{
302 struct sock *sk = sock->sk;
303 struct alg_sock *ask = alg_sk(sk);
304 struct aead_ctx *ctx = ask->private;
305 struct aead_sg_list *sgl = &ctx->tsgl;
306 int err = -EINVAL;
307
308 if (flags & MSG_SENDPAGE_NOTLAST)
309 flags |= MSG_MORE;
310
311 if (sgl->cur >= ALG_MAX_PAGES)
312 return -E2BIG;
313
314 lock_sock(sk);
315 if (!ctx->more && ctx->used)
316 goto unlock;
317
318 if (!size)
319 goto done;
320
321 if (!aead_writable(sk)) {
322 /* user space sent too much data */
323 aead_put_sgl(sk);
324 err = -EMSGSIZE;
325 goto unlock;
326 }
327
328 ctx->merge = 0;
329
330 get_page(page);
331 sg_set_page(sgl->sg + sgl->cur, page, size, offset);
332 sgl->cur++;
333 ctx->used += size;
334
335 err = 0;
336
337done:
338 ctx->more = flags & MSG_MORE;
339 if (!ctx->more && !aead_sufficient_data(ctx)) {
340 aead_put_sgl(sk);
341 err = -EMSGSIZE;
342 }
343
344unlock:
345 aead_data_wakeup(sk);
346 release_sock(sk);
347
348 return err ?: size;
349}
350
351static int aead_recvmsg(struct kiocb *unused, struct socket *sock,
352 struct msghdr *msg, size_t ignored, int flags)
353{
354 struct sock *sk = sock->sk;
355 struct alg_sock *ask = alg_sk(sk);
356 struct aead_ctx *ctx = ask->private;
357 unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req));
358 unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
359 struct aead_sg_list *sgl = &ctx->tsgl;
360 struct scatterlist *sg = NULL;
361 struct scatterlist assoc[ALG_MAX_PAGES];
362 size_t assoclen = 0;
363 unsigned int i = 0;
364 int err = -EINVAL;
365 unsigned long used = 0;
366 size_t outlen = 0;
367 size_t usedpages = 0;
368 unsigned int cnt = 0;
369
370 /* Limit number of IOV blocks to be accessed below */
371 if (msg->msg_iter.nr_segs > RSGL_MAX_ENTRIES)
372 return -ENOMSG;
373
374 lock_sock(sk);
375
376 /*
377 * AEAD memory structure: For encryption, the tag is appended to the
378 * ciphertext which implies that the memory allocated for the ciphertext
379 * must be increased by the tag length. For decryption, the tag
380 * is expected to be concatenated to the ciphertext. The plaintext
381 * therefore has a memory size of the ciphertext minus the tag length.
382 *
383 * The memory structure for cipher operation has the following
384 * structure:
385 * AEAD encryption input: assoc data || plaintext
386 * AEAD encryption output: cipherntext || auth tag
387 * AEAD decryption input: assoc data || ciphertext || auth tag
388 * AEAD decryption output: plaintext
389 */
390
391 if (ctx->more) {
392 err = aead_wait_for_data(sk, flags);
393 if (err)
394 goto unlock;
395 }
396
397 used = ctx->used;
398
399 /*
400 * Make sure sufficient data is present -- note, the same check is
401 * is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
402 * shall provide an information to the data sender that something is
403 * wrong, but they are irrelevant to maintain the kernel integrity.
404 * We need this check here too in case user space decides to not honor
405 * the error message in sendmsg/sendpage and still call recvmsg. This
406 * check here protects the kernel integrity.
407 */
408 if (!aead_sufficient_data(ctx))
409 goto unlock;
410
411 /*
412 * The cipher operation input data is reduced by the associated data
413 * length as this data is processed separately later on.
414 */
415 used -= ctx->aead_assoclen;
416
417 if (ctx->enc) {
418 /* round up output buffer to multiple of block size */
419 outlen = ((used + bs - 1) / bs * bs);
420 /* add the size needed for the auth tag to be created */
421 outlen += as;
422 } else {
423 /* output data size is input without the authentication tag */
424 outlen = used - as;
425 /* round up output buffer to multiple of block size */
426 outlen = ((outlen + bs - 1) / bs * bs);
427 }
428
429 /* convert iovecs of output buffers into scatterlists */
430 while (iov_iter_count(&msg->msg_iter)) {
431 size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
432 (outlen - usedpages));
433
434 /* make one iovec available as scatterlist */
435 err = af_alg_make_sg(&ctx->rsgl[cnt], &msg->msg_iter,
436 seglen);
437 if (err < 0)
438 goto unlock;
439 usedpages += err;
440 /* chain the new scatterlist with initial list */
441 if (cnt)
442 scatterwalk_crypto_chain(ctx->rsgl[0].sg,
443 ctx->rsgl[cnt].sg, 1,
444 sg_nents(ctx->rsgl[cnt-1].sg));
445 /* we do not need more iovecs as we have sufficient memory */
446 if (outlen <= usedpages)
447 break;
448 iov_iter_advance(&msg->msg_iter, err);
449 cnt++;
450 }
451
452 err = -EINVAL;
453 /* ensure output buffer is sufficiently large */
454 if (usedpages < outlen)
455 goto unlock;
456
457 sg_init_table(assoc, ALG_MAX_PAGES);
458 assoclen = ctx->aead_assoclen;
459 /*
460 * Split scatterlist into two: first part becomes AD, second part
461 * is plaintext / ciphertext. The first part is assigned to assoc
462 * scatterlist. When this loop finishes, sg points to the start of the
463 * plaintext / ciphertext.
464 */
465 for (i = 0; i < ctx->tsgl.cur; i++) {
466 sg = sgl->sg + i;
467 if (sg->length <= assoclen) {
468 /* AD is larger than one page */
469 sg_set_page(assoc + i, sg_page(sg),
470 sg->length, sg->offset);
471 assoclen -= sg->length;
472 if (i >= ctx->tsgl.cur)
473 goto unlock;
474 } else if (!assoclen) {
475 /* current page is to start of plaintext / ciphertext */
476 if (i)
477 /* AD terminates at page boundary */
478 sg_mark_end(assoc + i - 1);
479 else
480 /* AD size is zero */
481 sg_mark_end(assoc);
482 break;
483 } else {
484 /* AD does not terminate at page boundary */
485 sg_set_page(assoc + i, sg_page(sg),
486 assoclen, sg->offset);
487 sg_mark_end(assoc + i);
488 /* plaintext / ciphertext starts after AD */
489 sg->length -= assoclen;
490 sg->offset += assoclen;
491 break;
492 }
493 }
494
495 aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen);
496 aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl[0].sg, used,
497 ctx->iv);
498
499 err = af_alg_wait_for_completion(ctx->enc ?
500 crypto_aead_encrypt(&ctx->aead_req) :
501 crypto_aead_decrypt(&ctx->aead_req),
502 &ctx->completion);
503
504 if (err) {
505 /* EBADMSG implies a valid cipher operation took place */
506 if (err == -EBADMSG)
507 aead_put_sgl(sk);
508 goto unlock;
509 }
510
511 aead_put_sgl(sk);
512
513 err = 0;
514
515unlock:
516 for (i = 0; i < cnt; i++)
517 af_alg_free_sg(&ctx->rsgl[i]);
518
519 aead_wmem_wakeup(sk);
520 release_sock(sk);
521
522 return err ? err : outlen;
523}
524
525static unsigned int aead_poll(struct file *file, struct socket *sock,
526 poll_table *wait)
527{
528 struct sock *sk = sock->sk;
529 struct alg_sock *ask = alg_sk(sk);
530 struct aead_ctx *ctx = ask->private;
531 unsigned int mask;
532
533 sock_poll_wait(file, sk_sleep(sk), wait);
534 mask = 0;
535
536 if (!ctx->more)
537 mask |= POLLIN | POLLRDNORM;
538
539 if (aead_writable(sk))
540 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
541
542 return mask;
543}
544
545static struct proto_ops algif_aead_ops = {
546 .family = PF_ALG,
547
548 .connect = sock_no_connect,
549 .socketpair = sock_no_socketpair,
550 .getname = sock_no_getname,
551 .ioctl = sock_no_ioctl,
552 .listen = sock_no_listen,
553 .shutdown = sock_no_shutdown,
554 .getsockopt = sock_no_getsockopt,
555 .mmap = sock_no_mmap,
556 .bind = sock_no_bind,
557 .accept = sock_no_accept,
558 .setsockopt = sock_no_setsockopt,
559
560 .release = af_alg_release,
561 .sendmsg = aead_sendmsg,
562 .sendpage = aead_sendpage,
563 .recvmsg = aead_recvmsg,
564 .poll = aead_poll,
565};
566
567static void *aead_bind(const char *name, u32 type, u32 mask)
568{
569 return crypto_alloc_aead(name, type, mask);
570}
571
572static void aead_release(void *private)
573{
574 crypto_free_aead(private);
575}
576
577static int aead_setauthsize(void *private, unsigned int authsize)
578{
579 return crypto_aead_setauthsize(private, authsize);
580}
581
582static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
583{
584 return crypto_aead_setkey(private, key, keylen);
585}
586
587static void aead_sock_destruct(struct sock *sk)
588{
589 struct alg_sock *ask = alg_sk(sk);
590 struct aead_ctx *ctx = ask->private;
591 unsigned int ivlen = crypto_aead_ivsize(
592 crypto_aead_reqtfm(&ctx->aead_req));
593
594 aead_put_sgl(sk);
595 sock_kzfree_s(sk, ctx->iv, ivlen);
596 sock_kfree_s(sk, ctx, ctx->len);
597 af_alg_release_parent(sk);
598}
599
600static int aead_accept_parent(void *private, struct sock *sk)
601{
602 struct aead_ctx *ctx;
603 struct alg_sock *ask = alg_sk(sk);
604 unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private);
605 unsigned int ivlen = crypto_aead_ivsize(private);
606
607 ctx = sock_kmalloc(sk, len, GFP_KERNEL);
608 if (!ctx)
609 return -ENOMEM;
610 memset(ctx, 0, len);
611
612 ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL);
613 if (!ctx->iv) {
614 sock_kfree_s(sk, ctx, len);
615 return -ENOMEM;
616 }
617 memset(ctx->iv, 0, ivlen);
618
619 ctx->len = len;
620 ctx->used = 0;
621 ctx->more = 0;
622 ctx->merge = 0;
623 ctx->enc = 0;
624 ctx->tsgl.cur = 0;
625 ctx->aead_assoclen = 0;
626 af_alg_init_completion(&ctx->completion);
627 sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
628
629 ask->private = ctx;
630
631 aead_request_set_tfm(&ctx->aead_req, private);
632 aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
633 af_alg_complete, &ctx->completion);
634
635 sk->sk_destruct = aead_sock_destruct;
636
637 return 0;
638}
639
640static const struct af_alg_type algif_type_aead = {
641 .bind = aead_bind,
642 .release = aead_release,
643 .setkey = aead_setkey,
644 .setauthsize = aead_setauthsize,
645 .accept = aead_accept_parent,
646 .ops = &algif_aead_ops,
647 .name = "aead",
648 .owner = THIS_MODULE
649};
650
651static int __init algif_aead_init(void)
652{
653 return af_alg_register_type(&algif_type_aead);
654}
655
656static void __exit algif_aead_exit(void)
657{
658 int err = af_alg_unregister_type(&algif_type_aead);
659 BUG_ON(err);
660}
661
662module_init(algif_aead_init);
663module_exit(algif_aead_exit);
664MODULE_LICENSE("GPL");
665MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
666MODULE_DESCRIPTION("AEAD kernel crypto API user space interface");
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 3acba0a7cd55..8109aaad2726 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -87,7 +87,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
87 return genlen; 87 return genlen;
88 88
89 err = memcpy_to_msg(msg, result, len); 89 err = memcpy_to_msg(msg, result, len);
90 memzero_explicit(result, genlen); 90 memzero_explicit(result, len);
91 91
92 return err ? err : len; 92 return err ? err : len;
93} 93}
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 6f5bebc9bf01..765fe7609348 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -210,7 +210,11 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx,
210 byte_count = DEFAULT_BLK_SZ; 210 byte_count = DEFAULT_BLK_SZ;
211 } 211 }
212 212
213 err = byte_count; 213 /*
214 * Return 0 in case of success as mandated by the kernel
215 * crypto API interface definition.
216 */
217 err = 0;
214 218
215 dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", 219 dbgprint(KERN_CRIT "getting %d random bytes for context %p\n",
216 byte_count, ctx); 220 byte_count, ctx);
diff --git a/crypto/api.c b/crypto/api.c
index 2a81e98a0021..afe4610afc4b 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -257,6 +257,16 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
257 mask |= CRYPTO_ALG_TESTED; 257 mask |= CRYPTO_ALG_TESTED;
258 } 258 }
259 259
260 /*
261 * If the internal flag is set for a cipher, require a caller to
262 * to invoke the cipher with the internal flag to use that cipher.
263 * Also, if a caller wants to allocate a cipher that may or may
264 * not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and
265 * !(mask & CRYPTO_ALG_INTERNAL).
266 */
267 if (!((type | mask) & CRYPTO_ALG_INTERNAL))
268 mask |= CRYPTO_ALG_INTERNAL;
269
260 larval = crypto_larval_lookup(name, type, mask); 270 larval = crypto_larval_lookup(name, type, mask);
261 if (IS_ERR(larval) || !crypto_is_larval(larval)) 271 if (IS_ERR(larval) || !crypto_is_larval(larval))
262 return larval; 272 return larval;
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 650afac10fd7..b0602ba03111 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -168,6 +168,20 @@ static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
168 return ictx->queue; 168 return ictx->queue;
169} 169}
170 170
171static inline void cryptd_check_internal(struct rtattr **tb, u32 *type,
172 u32 *mask)
173{
174 struct crypto_attr_type *algt;
175
176 algt = crypto_get_attr_type(tb);
177 if (IS_ERR(algt))
178 return;
179 if ((algt->type & CRYPTO_ALG_INTERNAL))
180 *type |= CRYPTO_ALG_INTERNAL;
181 if ((algt->mask & CRYPTO_ALG_INTERNAL))
182 *mask |= CRYPTO_ALG_INTERNAL;
183}
184
171static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent, 185static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
172 const u8 *key, unsigned int keylen) 186 const u8 *key, unsigned int keylen)
173{ 187{
@@ -321,10 +335,13 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
321 struct cryptd_instance_ctx *ctx; 335 struct cryptd_instance_ctx *ctx;
322 struct crypto_instance *inst; 336 struct crypto_instance *inst;
323 struct crypto_alg *alg; 337 struct crypto_alg *alg;
338 u32 type = CRYPTO_ALG_TYPE_BLKCIPHER;
339 u32 mask = CRYPTO_ALG_TYPE_MASK;
324 int err; 340 int err;
325 341
326 alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, 342 cryptd_check_internal(tb, &type, &mask);
327 CRYPTO_ALG_TYPE_MASK); 343
344 alg = crypto_get_attr_alg(tb, type, mask);
328 if (IS_ERR(alg)) 345 if (IS_ERR(alg))
329 return PTR_ERR(alg); 346 return PTR_ERR(alg);
330 347
@@ -341,7 +358,10 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
341 if (err) 358 if (err)
342 goto out_free_inst; 359 goto out_free_inst;
343 360
344 inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; 361 type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
362 if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
363 type |= CRYPTO_ALG_INTERNAL;
364 inst->alg.cra_flags = type;
345 inst->alg.cra_type = &crypto_ablkcipher_type; 365 inst->alg.cra_type = &crypto_ablkcipher_type;
346 366
347 inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize; 367 inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize;
@@ -577,9 +597,13 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
577 struct ahash_instance *inst; 597 struct ahash_instance *inst;
578 struct shash_alg *salg; 598 struct shash_alg *salg;
579 struct crypto_alg *alg; 599 struct crypto_alg *alg;
600 u32 type = 0;
601 u32 mask = 0;
580 int err; 602 int err;
581 603
582 salg = shash_attr_alg(tb[1], 0, 0); 604 cryptd_check_internal(tb, &type, &mask);
605
606 salg = shash_attr_alg(tb[1], type, mask);
583 if (IS_ERR(salg)) 607 if (IS_ERR(salg))
584 return PTR_ERR(salg); 608 return PTR_ERR(salg);
585 609
@@ -598,7 +622,10 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
598 if (err) 622 if (err)
599 goto out_free_inst; 623 goto out_free_inst;
600 624
601 inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; 625 type = CRYPTO_ALG_ASYNC;
626 if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
627 type |= CRYPTO_ALG_INTERNAL;
628 inst->alg.halg.base.cra_flags = type;
602 629
603 inst->alg.halg.digestsize = salg->digestsize; 630 inst->alg.halg.digestsize = salg->digestsize;
604 inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); 631 inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx);
@@ -719,10 +746,13 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
719 struct aead_instance_ctx *ctx; 746 struct aead_instance_ctx *ctx;
720 struct crypto_instance *inst; 747 struct crypto_instance *inst;
721 struct crypto_alg *alg; 748 struct crypto_alg *alg;
749 u32 type = CRYPTO_ALG_TYPE_AEAD;
750 u32 mask = CRYPTO_ALG_TYPE_MASK;
722 int err; 751 int err;
723 752
724 alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD, 753 cryptd_check_internal(tb, &type, &mask);
725 CRYPTO_ALG_TYPE_MASK); 754
755 alg = crypto_get_attr_alg(tb, type, mask);
726 if (IS_ERR(alg)) 756 if (IS_ERR(alg))
727 return PTR_ERR(alg); 757 return PTR_ERR(alg);
728 758
@@ -739,7 +769,10 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
739 if (err) 769 if (err)
740 goto out_free_inst; 770 goto out_free_inst;
741 771
742 inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; 772 type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
773 if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
774 type |= CRYPTO_ALG_INTERNAL;
775 inst->alg.cra_flags = type;
743 inst->alg.cra_type = alg->cra_type; 776 inst->alg.cra_type = alg->cra_type;
744 inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx); 777 inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
745 inst->alg.cra_init = cryptd_aead_init_tfm; 778 inst->alg.cra_init = cryptd_aead_init_tfm;
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index c5148a35ae0a..41dfe762b7fb 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -62,10 +62,14 @@ static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
62 else if (!exact) 62 else if (!exact)
63 match = !strcmp(q->cra_name, p->cru_name); 63 match = !strcmp(q->cra_name, p->cru_name);
64 64
65 if (match) { 65 if (!match)
66 alg = q; 66 continue;
67 break; 67
68 } 68 if (unlikely(!crypto_mod_get(q)))
69 continue;
70
71 alg = q;
72 break;
69 } 73 }
70 74
71 up_read(&crypto_alg_sem); 75 up_read(&crypto_alg_sem);
@@ -205,9 +209,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
205 if (!alg) 209 if (!alg)
206 return -ENOENT; 210 return -ENOENT;
207 211
212 err = -ENOMEM;
208 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 213 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
209 if (!skb) 214 if (!skb)
210 return -ENOMEM; 215 goto drop_alg;
211 216
212 info.in_skb = in_skb; 217 info.in_skb = in_skb;
213 info.out_skb = skb; 218 info.out_skb = skb;
@@ -215,6 +220,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
215 info.nlmsg_flags = 0; 220 info.nlmsg_flags = 0;
216 221
217 err = crypto_report_alg(alg, &info); 222 err = crypto_report_alg(alg, &info);
223
224drop_alg:
225 crypto_mod_put(alg);
226
218 if (err) 227 if (err)
219 return err; 228 return err;
220 229
@@ -284,6 +293,7 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
284 293
285 up_write(&crypto_alg_sem); 294 up_write(&crypto_alg_sem);
286 295
296 crypto_mod_put(alg);
287 crypto_remove_final(&list); 297 crypto_remove_final(&list);
288 298
289 return 0; 299 return 0;
@@ -294,6 +304,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
294{ 304{
295 struct crypto_alg *alg; 305 struct crypto_alg *alg;
296 struct crypto_user_alg *p = nlmsg_data(nlh); 306 struct crypto_user_alg *p = nlmsg_data(nlh);
307 int err;
297 308
298 if (!netlink_capable(skb, CAP_NET_ADMIN)) 309 if (!netlink_capable(skb, CAP_NET_ADMIN))
299 return -EPERM; 310 return -EPERM;
@@ -310,13 +321,19 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
310 * if we try to unregister. Unregistering such an algorithm without 321 * if we try to unregister. Unregistering such an algorithm without
311 * removing the module is not possible, so we restrict to crypto 322 * removing the module is not possible, so we restrict to crypto
312 * instances that are build from templates. */ 323 * instances that are build from templates. */
324 err = -EINVAL;
313 if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) 325 if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
314 return -EINVAL; 326 goto drop_alg;
315 327
316 if (atomic_read(&alg->cra_refcnt) != 1) 328 err = -EBUSY;
317 return -EBUSY; 329 if (atomic_read(&alg->cra_refcnt) > 2)
330 goto drop_alg;
318 331
319 return crypto_unregister_instance(alg); 332 err = crypto_unregister_instance((struct crypto_instance *)alg);
333
334drop_alg:
335 crypto_mod_put(alg);
336 return err;
320} 337}
321 338
322static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type, 339static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type,
@@ -395,8 +412,10 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
395 return -EINVAL; 412 return -EINVAL;
396 413
397 alg = crypto_alg_match(p, exact); 414 alg = crypto_alg_match(p, exact);
398 if (alg) 415 if (alg) {
416 crypto_mod_put(alg);
399 return -EEXIST; 417 return -EEXIST;
418 }
400 419
401 if (strlen(p->cru_driver_name)) 420 if (strlen(p->cru_driver_name))
402 name = p->cru_driver_name; 421 name = p->cru_driver_name;
diff --git a/crypto/drbg.c b/crypto/drbg.c
index d8ff16e5c322..b69409cb7e6a 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -119,19 +119,19 @@ static const struct drbg_core drbg_cores[] = {
119 .statelen = 32, /* 256 bits as defined in 10.2.1 */ 119 .statelen = 32, /* 256 bits as defined in 10.2.1 */
120 .blocklen_bytes = 16, 120 .blocklen_bytes = 16,
121 .cra_name = "ctr_aes128", 121 .cra_name = "ctr_aes128",
122 .backend_cra_name = "ecb(aes)", 122 .backend_cra_name = "aes",
123 }, { 123 }, {
124 .flags = DRBG_CTR | DRBG_STRENGTH192, 124 .flags = DRBG_CTR | DRBG_STRENGTH192,
125 .statelen = 40, /* 320 bits as defined in 10.2.1 */ 125 .statelen = 40, /* 320 bits as defined in 10.2.1 */
126 .blocklen_bytes = 16, 126 .blocklen_bytes = 16,
127 .cra_name = "ctr_aes192", 127 .cra_name = "ctr_aes192",
128 .backend_cra_name = "ecb(aes)", 128 .backend_cra_name = "aes",
129 }, { 129 }, {
130 .flags = DRBG_CTR | DRBG_STRENGTH256, 130 .flags = DRBG_CTR | DRBG_STRENGTH256,
131 .statelen = 48, /* 384 bits as defined in 10.2.1 */ 131 .statelen = 48, /* 384 bits as defined in 10.2.1 */
132 .blocklen_bytes = 16, 132 .blocklen_bytes = 16,
133 .cra_name = "ctr_aes256", 133 .cra_name = "ctr_aes256",
134 .backend_cra_name = "ecb(aes)", 134 .backend_cra_name = "aes",
135 }, 135 },
136#endif /* CONFIG_CRYPTO_DRBG_CTR */ 136#endif /* CONFIG_CRYPTO_DRBG_CTR */
137#ifdef CONFIG_CRYPTO_DRBG_HASH 137#ifdef CONFIG_CRYPTO_DRBG_HASH
@@ -308,9 +308,6 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
308 308
309 drbg_string_fill(&data, out, drbg_blocklen(drbg)); 309 drbg_string_fill(&data, out, drbg_blocklen(drbg));
310 310
311 /* 10.4.3 step 1 */
312 memset(out, 0, drbg_blocklen(drbg));
313
314 /* 10.4.3 step 2 / 4 */ 311 /* 10.4.3 step 2 / 4 */
315 list_for_each_entry(curr, in, list) { 312 list_for_each_entry(curr, in, list) {
316 const unsigned char *pos = curr->buf; 313 const unsigned char *pos = curr->buf;
@@ -406,7 +403,6 @@ static int drbg_ctr_df(struct drbg_state *drbg,
406 403
407 memset(pad, 0, drbg_blocklen(drbg)); 404 memset(pad, 0, drbg_blocklen(drbg));
408 memset(iv, 0, drbg_blocklen(drbg)); 405 memset(iv, 0, drbg_blocklen(drbg));
409 memset(temp, 0, drbg_statelen(drbg));
410 406
411 /* 10.4.2 step 1 is implicit as we work byte-wise */ 407 /* 10.4.2 step 1 is implicit as we work byte-wise */
412 408
@@ -523,7 +519,6 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed,
523 unsigned int len = 0; 519 unsigned int len = 0;
524 struct drbg_string cipherin; 520 struct drbg_string cipherin;
525 521
526 memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg));
527 if (3 > reseed) 522 if (3 > reseed)
528 memset(df_data, 0, drbg_statelen(drbg)); 523 memset(df_data, 0, drbg_statelen(drbg));
529 524
@@ -585,8 +580,6 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
585 int ret = 0; 580 int ret = 0;
586 struct drbg_string data; 581 struct drbg_string data;
587 582
588 memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
589
590 /* 10.2.1.5.2 step 2 */ 583 /* 10.2.1.5.2 step 2 */
591 if (addtl && !list_empty(addtl)) { 584 if (addtl && !list_empty(addtl)) {
592 ret = drbg_ctr_update(drbg, addtl, 2); 585 ret = drbg_ctr_update(drbg, addtl, 2);
@@ -761,7 +754,6 @@ static struct drbg_state_ops drbg_hmac_ops = {
761 .generate = drbg_hmac_generate, 754 .generate = drbg_hmac_generate,
762 .crypto_init = drbg_init_hash_kernel, 755 .crypto_init = drbg_init_hash_kernel,
763 .crypto_fini = drbg_fini_hash_kernel, 756 .crypto_fini = drbg_fini_hash_kernel,
764
765}; 757};
766#endif /* CONFIG_CRYPTO_DRBG_HMAC */ 758#endif /* CONFIG_CRYPTO_DRBG_HMAC */
767 759
@@ -838,8 +830,6 @@ static int drbg_hash_df(struct drbg_state *drbg,
838 unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg); 830 unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg);
839 struct drbg_string data; 831 struct drbg_string data;
840 832
841 memset(tmp, 0, drbg_blocklen(drbg));
842
843 /* 10.4.1 step 3 */ 833 /* 10.4.1 step 3 */
844 input[0] = 1; 834 input[0] = 1;
845 drbg_cpu_to_be32((outlen * 8), &input[1]); 835 drbg_cpu_to_be32((outlen * 8), &input[1]);
@@ -879,7 +869,6 @@ static int drbg_hash_update(struct drbg_state *drbg, struct list_head *seed,
879 unsigned char *V = drbg->scratchpad; 869 unsigned char *V = drbg->scratchpad;
880 unsigned char prefix = DRBG_PREFIX1; 870 unsigned char prefix = DRBG_PREFIX1;
881 871
882 memset(drbg->scratchpad, 0, drbg_statelen(drbg));
883 if (!seed) 872 if (!seed)
884 return -EINVAL; 873 return -EINVAL;
885 874
@@ -921,9 +910,6 @@ static int drbg_hash_process_addtl(struct drbg_state *drbg,
921 LIST_HEAD(datalist); 910 LIST_HEAD(datalist);
922 unsigned char prefix = DRBG_PREFIX2; 911 unsigned char prefix = DRBG_PREFIX2;
923 912
924 /* this is value w as per documentation */
925 memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
926
927 /* 10.1.1.4 step 2 */ 913 /* 10.1.1.4 step 2 */
928 if (!addtl || list_empty(addtl)) 914 if (!addtl || list_empty(addtl))
929 return 0; 915 return 0;
@@ -959,9 +945,6 @@ static int drbg_hash_hashgen(struct drbg_state *drbg,
959 struct drbg_string data; 945 struct drbg_string data;
960 LIST_HEAD(datalist); 946 LIST_HEAD(datalist);
961 947
962 memset(src, 0, drbg_statelen(drbg));
963 memset(dst, 0, drbg_blocklen(drbg));
964
965 /* 10.1.1.4 step hashgen 2 */ 948 /* 10.1.1.4 step hashgen 2 */
966 memcpy(src, drbg->V, drbg_statelen(drbg)); 949 memcpy(src, drbg->V, drbg_statelen(drbg));
967 950
@@ -1018,7 +1001,6 @@ static int drbg_hash_generate(struct drbg_state *drbg,
1018 len = drbg_hash_hashgen(drbg, buf, buflen); 1001 len = drbg_hash_hashgen(drbg, buf, buflen);
1019 1002
1020 /* this is the value H as documented in 10.1.1.4 */ 1003 /* this is the value H as documented in 10.1.1.4 */
1021 memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
1022 /* 10.1.1.4 step 4 */ 1004 /* 10.1.1.4 step 4 */
1023 drbg_string_fill(&data1, &prefix, 1); 1005 drbg_string_fill(&data1, &prefix, 1);
1024 list_add_tail(&data1.list, &datalist); 1006 list_add_tail(&data1.list, &datalist);
@@ -1298,7 +1280,7 @@ static void drbg_restore_shadow(struct drbg_state *drbg,
1298 * as defined in SP800-90A. The additional input is mixed into 1280 * as defined in SP800-90A. The additional input is mixed into
1299 * the state in addition to the pulled entropy. 1281 * the state in addition to the pulled entropy.
1300 * 1282 *
1301 * return: generated number of bytes 1283 * return: 0 when all bytes are generated; < 0 in case of an error
1302 */ 1284 */
1303static int drbg_generate(struct drbg_state *drbg, 1285static int drbg_generate(struct drbg_state *drbg,
1304 unsigned char *buf, unsigned int buflen, 1286 unsigned char *buf, unsigned int buflen,
@@ -1437,6 +1419,11 @@ static int drbg_generate(struct drbg_state *drbg,
1437 } 1419 }
1438#endif 1420#endif
1439 1421
1422 /*
1423 * All operations were successful, return 0 as mandated by
1424 * the kernel crypto API interface.
1425 */
1426 len = 0;
1440err: 1427err:
1441 shadow->d_ops->crypto_fini(shadow); 1428 shadow->d_ops->crypto_fini(shadow);
1442 drbg_restore_shadow(drbg, &shadow); 1429 drbg_restore_shadow(drbg, &shadow);
@@ -1644,24 +1631,24 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, const unsigned char *key,
1644static int drbg_init_sym_kernel(struct drbg_state *drbg) 1631static int drbg_init_sym_kernel(struct drbg_state *drbg)
1645{ 1632{
1646 int ret = 0; 1633 int ret = 0;
1647 struct crypto_blkcipher *tfm; 1634 struct crypto_cipher *tfm;
1648 1635
1649 tfm = crypto_alloc_blkcipher(drbg->core->backend_cra_name, 0, 0); 1636 tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0);
1650 if (IS_ERR(tfm)) { 1637 if (IS_ERR(tfm)) {
1651 pr_info("DRBG: could not allocate cipher TFM handle\n"); 1638 pr_info("DRBG: could not allocate cipher TFM handle\n");
1652 return PTR_ERR(tfm); 1639 return PTR_ERR(tfm);
1653 } 1640 }
1654 BUG_ON(drbg_blocklen(drbg) != crypto_blkcipher_blocksize(tfm)); 1641 BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm));
1655 drbg->priv_data = tfm; 1642 drbg->priv_data = tfm;
1656 return ret; 1643 return ret;
1657} 1644}
1658 1645
1659static int drbg_fini_sym_kernel(struct drbg_state *drbg) 1646static int drbg_fini_sym_kernel(struct drbg_state *drbg)
1660{ 1647{
1661 struct crypto_blkcipher *tfm = 1648 struct crypto_cipher *tfm =
1662 (struct crypto_blkcipher *)drbg->priv_data; 1649 (struct crypto_cipher *)drbg->priv_data;
1663 if (tfm) 1650 if (tfm)
1664 crypto_free_blkcipher(tfm); 1651 crypto_free_cipher(tfm);
1665 drbg->priv_data = NULL; 1652 drbg->priv_data = NULL;
1666 return 0; 1653 return 0;
1667} 1654}
@@ -1669,21 +1656,14 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg)
1669static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key, 1656static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key,
1670 unsigned char *outval, const struct drbg_string *in) 1657 unsigned char *outval, const struct drbg_string *in)
1671{ 1658{
1672 int ret = 0; 1659 struct crypto_cipher *tfm =
1673 struct scatterlist sg_in, sg_out; 1660 (struct crypto_cipher *)drbg->priv_data;
1674 struct blkcipher_desc desc;
1675 struct crypto_blkcipher *tfm =
1676 (struct crypto_blkcipher *)drbg->priv_data;
1677
1678 desc.tfm = tfm;
1679 desc.flags = 0;
1680 crypto_blkcipher_setkey(tfm, key, (drbg_keylen(drbg)));
1681 /* there is only component in *in */
1682 sg_init_one(&sg_in, in->buf, in->len);
1683 sg_init_one(&sg_out, outval, drbg_blocklen(drbg));
1684 ret = crypto_blkcipher_encrypt(&desc, &sg_out, &sg_in, in->len);
1685 1661
1686 return ret; 1662 crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg)));
1663 /* there is only component in *in */
1664 BUG_ON(in->len < drbg_blocklen(drbg));
1665 crypto_cipher_encrypt_one(tfm, outval, in->buf);
1666 return 0;
1687} 1667}
1688#endif /* CONFIG_CRYPTO_DRBG_CTR */ 1668#endif /* CONFIG_CRYPTO_DRBG_CTR */
1689 1669
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index a8e870444ea9..fe5b495a434d 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -258,6 +258,20 @@ out_free_inst:
258 goto out; 258 goto out;
259} 259}
260 260
261static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type,
262 u32 *mask)
263{
264 struct crypto_attr_type *algt;
265
266 algt = crypto_get_attr_type(tb);
267 if (IS_ERR(algt))
268 return;
269 if ((algt->type & CRYPTO_ALG_INTERNAL))
270 *type |= CRYPTO_ALG_INTERNAL;
271 if ((algt->mask & CRYPTO_ALG_INTERNAL))
272 *mask |= CRYPTO_ALG_INTERNAL;
273}
274
261static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) 275static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
262{ 276{
263 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); 277 struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
@@ -480,9 +494,13 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
480 struct ahash_instance *inst; 494 struct ahash_instance *inst;
481 struct shash_alg *salg; 495 struct shash_alg *salg;
482 struct crypto_alg *alg; 496 struct crypto_alg *alg;
497 u32 type = 0;
498 u32 mask = 0;
483 int err; 499 int err;
484 500
485 salg = shash_attr_alg(tb[1], 0, 0); 501 mcryptd_check_internal(tb, &type, &mask);
502
503 salg = shash_attr_alg(tb[1], type, mask);
486 if (IS_ERR(salg)) 504 if (IS_ERR(salg))
487 return PTR_ERR(salg); 505 return PTR_ERR(salg);
488 506
@@ -502,7 +520,10 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
502 if (err) 520 if (err)
503 goto out_free_inst; 521 goto out_free_inst;
504 522
505 inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; 523 type = CRYPTO_ALG_ASYNC;
524 if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
525 type |= CRYPTO_ALG_INTERNAL;
526 inst->alg.halg.base.cra_flags = type;
506 527
507 inst->alg.halg.digestsize = salg->digestsize; 528 inst->alg.halg.digestsize = salg->digestsize;
508 inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); 529 inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
diff --git a/crypto/proc.c b/crypto/proc.c
index 4a0a7aad2204..4ffe73b51612 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -89,6 +89,9 @@ static int c_show(struct seq_file *m, void *p)
89 seq_printf(m, "selftest : %s\n", 89 seq_printf(m, "selftest : %s\n",
90 (alg->cra_flags & CRYPTO_ALG_TESTED) ? 90 (alg->cra_flags & CRYPTO_ALG_TESTED) ?
91 "passed" : "unknown"); 91 "passed" : "unknown");
92 seq_printf(m, "internal : %s\n",
93 (alg->cra_flags & CRYPTO_ALG_INTERNAL) ?
94 "yes" : "no");
92 95
93 if (alg->cra_flags & CRYPTO_ALG_LARVAL) { 96 if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
94 seq_printf(m, "type : larval\n"); 97 seq_printf(m, "type : larval\n");
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index a3e50c37eb6f..39e3acc438d9 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -23,111 +23,49 @@
23#include <linux/cryptohash.h> 23#include <linux/cryptohash.h>
24#include <linux/types.h> 24#include <linux/types.h>
25#include <crypto/sha.h> 25#include <crypto/sha.h>
26#include <crypto/sha1_base.h>
26#include <asm/byteorder.h> 27#include <asm/byteorder.h>
27 28
28static int sha1_init(struct shash_desc *desc) 29static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
30 int blocks)
29{ 31{
30 struct sha1_state *sctx = shash_desc_ctx(desc); 32 u32 temp[SHA_WORKSPACE_WORDS];
31 33
32 *sctx = (struct sha1_state){ 34 while (blocks--) {
33 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 35 sha_transform(sst->state, src, temp);
34 }; 36 src += SHA1_BLOCK_SIZE;
35 37 }
36 return 0; 38 memzero_explicit(temp, sizeof(temp));
37} 39}
38 40
39int crypto_sha1_update(struct shash_desc *desc, const u8 *data, 41int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
40 unsigned int len) 42 unsigned int len)
41{ 43{
42 struct sha1_state *sctx = shash_desc_ctx(desc); 44 return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
43 unsigned int partial, done;
44 const u8 *src;
45
46 partial = sctx->count % SHA1_BLOCK_SIZE;
47 sctx->count += len;
48 done = 0;
49 src = data;
50
51 if ((partial + len) >= SHA1_BLOCK_SIZE) {
52 u32 temp[SHA_WORKSPACE_WORDS];
53
54 if (partial) {
55 done = -partial;
56 memcpy(sctx->buffer + partial, data,
57 done + SHA1_BLOCK_SIZE);
58 src = sctx->buffer;
59 }
60
61 do {
62 sha_transform(sctx->state, src, temp);
63 done += SHA1_BLOCK_SIZE;
64 src = data + done;
65 } while (done + SHA1_BLOCK_SIZE <= len);
66
67 memzero_explicit(temp, sizeof(temp));
68 partial = 0;
69 }
70 memcpy(sctx->buffer + partial, src, len - done);
71
72 return 0;
73} 45}
74EXPORT_SYMBOL(crypto_sha1_update); 46EXPORT_SYMBOL(crypto_sha1_update);
75 47
76
77/* Add padding and return the message digest. */
78static int sha1_final(struct shash_desc *desc, u8 *out) 48static int sha1_final(struct shash_desc *desc, u8 *out)
79{ 49{
80 struct sha1_state *sctx = shash_desc_ctx(desc); 50 sha1_base_do_finalize(desc, sha1_generic_block_fn);
81 __be32 *dst = (__be32 *)out; 51 return sha1_base_finish(desc, out);
82 u32 i, index, padlen;
83 __be64 bits;
84 static const u8 padding[64] = { 0x80, };
85
86 bits = cpu_to_be64(sctx->count << 3);
87
88 /* Pad out to 56 mod 64 */
89 index = sctx->count & 0x3f;
90 padlen = (index < 56) ? (56 - index) : ((64+56) - index);
91 crypto_sha1_update(desc, padding, padlen);
92
93 /* Append length */
94 crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
95
96 /* Store state in digest */
97 for (i = 0; i < 5; i++)
98 dst[i] = cpu_to_be32(sctx->state[i]);
99
100 /* Wipe context */
101 memset(sctx, 0, sizeof *sctx);
102
103 return 0;
104} 52}
105 53
106static int sha1_export(struct shash_desc *desc, void *out) 54int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
55 unsigned int len, u8 *out)
107{ 56{
108 struct sha1_state *sctx = shash_desc_ctx(desc); 57 sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
109 58 return sha1_final(desc, out);
110 memcpy(out, sctx, sizeof(*sctx));
111 return 0;
112}
113
114static int sha1_import(struct shash_desc *desc, const void *in)
115{
116 struct sha1_state *sctx = shash_desc_ctx(desc);
117
118 memcpy(sctx, in, sizeof(*sctx));
119 return 0;
120} 59}
60EXPORT_SYMBOL(crypto_sha1_finup);
121 61
122static struct shash_alg alg = { 62static struct shash_alg alg = {
123 .digestsize = SHA1_DIGEST_SIZE, 63 .digestsize = SHA1_DIGEST_SIZE,
124 .init = sha1_init, 64 .init = sha1_base_init,
125 .update = crypto_sha1_update, 65 .update = crypto_sha1_update,
126 .final = sha1_final, 66 .final = sha1_final,
127 .export = sha1_export, 67 .finup = crypto_sha1_finup,
128 .import = sha1_import,
129 .descsize = sizeof(struct sha1_state), 68 .descsize = sizeof(struct sha1_state),
130 .statesize = sizeof(struct sha1_state),
131 .base = { 69 .base = {
132 .cra_name = "sha1", 70 .cra_name = "sha1",
133 .cra_driver_name= "sha1-generic", 71 .cra_driver_name= "sha1-generic",
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index b001ff5c2efc..78431163ed3c 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -23,6 +23,7 @@
23#include <linux/mm.h> 23#include <linux/mm.h>
24#include <linux/types.h> 24#include <linux/types.h>
25#include <crypto/sha.h> 25#include <crypto/sha.h>
26#include <crypto/sha256_base.h>
26#include <asm/byteorder.h> 27#include <asm/byteorder.h>
27#include <asm/unaligned.h> 28#include <asm/unaligned.h>
28 29
@@ -214,138 +215,43 @@ static void sha256_transform(u32 *state, const u8 *input)
214 memzero_explicit(W, 64 * sizeof(u32)); 215 memzero_explicit(W, 64 * sizeof(u32));
215} 216}
216 217
217static int sha224_init(struct shash_desc *desc) 218static void sha256_generic_block_fn(struct sha256_state *sst, u8 const *src,
219 int blocks)
218{ 220{
219 struct sha256_state *sctx = shash_desc_ctx(desc); 221 while (blocks--) {
220 sctx->state[0] = SHA224_H0; 222 sha256_transform(sst->state, src);
221 sctx->state[1] = SHA224_H1; 223 src += SHA256_BLOCK_SIZE;
222 sctx->state[2] = SHA224_H2; 224 }
223 sctx->state[3] = SHA224_H3;
224 sctx->state[4] = SHA224_H4;
225 sctx->state[5] = SHA224_H5;
226 sctx->state[6] = SHA224_H6;
227 sctx->state[7] = SHA224_H7;
228 sctx->count = 0;
229
230 return 0;
231}
232
233static int sha256_init(struct shash_desc *desc)
234{
235 struct sha256_state *sctx = shash_desc_ctx(desc);
236 sctx->state[0] = SHA256_H0;
237 sctx->state[1] = SHA256_H1;
238 sctx->state[2] = SHA256_H2;
239 sctx->state[3] = SHA256_H3;
240 sctx->state[4] = SHA256_H4;
241 sctx->state[5] = SHA256_H5;
242 sctx->state[6] = SHA256_H6;
243 sctx->state[7] = SHA256_H7;
244 sctx->count = 0;
245
246 return 0;
247} 225}
248 226
249int crypto_sha256_update(struct shash_desc *desc, const u8 *data, 227int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
250 unsigned int len) 228 unsigned int len)
251{ 229{
252 struct sha256_state *sctx = shash_desc_ctx(desc); 230 return sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
253 unsigned int partial, done;
254 const u8 *src;
255
256 partial = sctx->count & 0x3f;
257 sctx->count += len;
258 done = 0;
259 src = data;
260
261 if ((partial + len) > 63) {
262 if (partial) {
263 done = -partial;
264 memcpy(sctx->buf + partial, data, done + 64);
265 src = sctx->buf;
266 }
267
268 do {
269 sha256_transform(sctx->state, src);
270 done += 64;
271 src = data + done;
272 } while (done + 63 < len);
273
274 partial = 0;
275 }
276 memcpy(sctx->buf + partial, src, len - done);
277
278 return 0;
279} 231}
280EXPORT_SYMBOL(crypto_sha256_update); 232EXPORT_SYMBOL(crypto_sha256_update);
281 233
282static int sha256_final(struct shash_desc *desc, u8 *out) 234static int sha256_final(struct shash_desc *desc, u8 *out)
283{ 235{
284 struct sha256_state *sctx = shash_desc_ctx(desc); 236 sha256_base_do_finalize(desc, sha256_generic_block_fn);
285 __be32 *dst = (__be32 *)out; 237 return sha256_base_finish(desc, out);
286 __be64 bits;
287 unsigned int index, pad_len;
288 int i;
289 static const u8 padding[64] = { 0x80, };
290
291 /* Save number of bits */
292 bits = cpu_to_be64(sctx->count << 3);
293
294 /* Pad out to 56 mod 64. */
295 index = sctx->count & 0x3f;
296 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
297 crypto_sha256_update(desc, padding, pad_len);
298
299 /* Append length (before padding) */
300 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
301
302 /* Store state in digest */
303 for (i = 0; i < 8; i++)
304 dst[i] = cpu_to_be32(sctx->state[i]);
305
306 /* Zeroize sensitive information. */
307 memset(sctx, 0, sizeof(*sctx));
308
309 return 0;
310} 238}
311 239
312static int sha224_final(struct shash_desc *desc, u8 *hash) 240int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
241 unsigned int len, u8 *hash)
313{ 242{
314 u8 D[SHA256_DIGEST_SIZE]; 243 sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
315 244 return sha256_final(desc, hash);
316 sha256_final(desc, D);
317
318 memcpy(hash, D, SHA224_DIGEST_SIZE);
319 memzero_explicit(D, SHA256_DIGEST_SIZE);
320
321 return 0;
322}
323
324static int sha256_export(struct shash_desc *desc, void *out)
325{
326 struct sha256_state *sctx = shash_desc_ctx(desc);
327
328 memcpy(out, sctx, sizeof(*sctx));
329 return 0;
330}
331
332static int sha256_import(struct shash_desc *desc, const void *in)
333{
334 struct sha256_state *sctx = shash_desc_ctx(desc);
335
336 memcpy(sctx, in, sizeof(*sctx));
337 return 0;
338} 245}
246EXPORT_SYMBOL(crypto_sha256_finup);
339 247
340static struct shash_alg sha256_algs[2] = { { 248static struct shash_alg sha256_algs[2] = { {
341 .digestsize = SHA256_DIGEST_SIZE, 249 .digestsize = SHA256_DIGEST_SIZE,
342 .init = sha256_init, 250 .init = sha256_base_init,
343 .update = crypto_sha256_update, 251 .update = crypto_sha256_update,
344 .final = sha256_final, 252 .final = sha256_final,
345 .export = sha256_export, 253 .finup = crypto_sha256_finup,
346 .import = sha256_import,
347 .descsize = sizeof(struct sha256_state), 254 .descsize = sizeof(struct sha256_state),
348 .statesize = sizeof(struct sha256_state),
349 .base = { 255 .base = {
350 .cra_name = "sha256", 256 .cra_name = "sha256",
351 .cra_driver_name= "sha256-generic", 257 .cra_driver_name= "sha256-generic",
@@ -355,9 +261,10 @@ static struct shash_alg sha256_algs[2] = { {
355 } 261 }
356}, { 262}, {
357 .digestsize = SHA224_DIGEST_SIZE, 263 .digestsize = SHA224_DIGEST_SIZE,
358 .init = sha224_init, 264 .init = sha224_base_init,
359 .update = crypto_sha256_update, 265 .update = crypto_sha256_update,
360 .final = sha224_final, 266 .final = sha256_final,
267 .finup = crypto_sha256_finup,
361 .descsize = sizeof(struct sha256_state), 268 .descsize = sizeof(struct sha256_state),
362 .base = { 269 .base = {
363 .cra_name = "sha224", 270 .cra_name = "sha224",
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 1c3c3767e079..eba965d18bfc 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -18,6 +18,7 @@
18#include <linux/crypto.h> 18#include <linux/crypto.h>
19#include <linux/types.h> 19#include <linux/types.h>
20#include <crypto/sha.h> 20#include <crypto/sha.h>
21#include <crypto/sha512_base.h>
21#include <linux/percpu.h> 22#include <linux/percpu.h>
22#include <asm/byteorder.h> 23#include <asm/byteorder.h>
23#include <asm/unaligned.h> 24#include <asm/unaligned.h>
@@ -130,125 +131,42 @@ sha512_transform(u64 *state, const u8 *input)
130 a = b = c = d = e = f = g = h = t1 = t2 = 0; 131 a = b = c = d = e = f = g = h = t1 = t2 = 0;
131} 132}
132 133
133static int 134static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
134sha512_init(struct shash_desc *desc) 135 int blocks)
135{ 136{
136 struct sha512_state *sctx = shash_desc_ctx(desc); 137 while (blocks--) {
137 sctx->state[0] = SHA512_H0; 138 sha512_transform(sst->state, src);
138 sctx->state[1] = SHA512_H1; 139 src += SHA512_BLOCK_SIZE;
139 sctx->state[2] = SHA512_H2; 140 }
140 sctx->state[3] = SHA512_H3;
141 sctx->state[4] = SHA512_H4;
142 sctx->state[5] = SHA512_H5;
143 sctx->state[6] = SHA512_H6;
144 sctx->state[7] = SHA512_H7;
145 sctx->count[0] = sctx->count[1] = 0;
146
147 return 0;
148}
149
150static int
151sha384_init(struct shash_desc *desc)
152{
153 struct sha512_state *sctx = shash_desc_ctx(desc);
154 sctx->state[0] = SHA384_H0;
155 sctx->state[1] = SHA384_H1;
156 sctx->state[2] = SHA384_H2;
157 sctx->state[3] = SHA384_H3;
158 sctx->state[4] = SHA384_H4;
159 sctx->state[5] = SHA384_H5;
160 sctx->state[6] = SHA384_H6;
161 sctx->state[7] = SHA384_H7;
162 sctx->count[0] = sctx->count[1] = 0;
163
164 return 0;
165} 141}
166 142
167int crypto_sha512_update(struct shash_desc *desc, const u8 *data, 143int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
168 unsigned int len) 144 unsigned int len)
169{ 145{
170 struct sha512_state *sctx = shash_desc_ctx(desc); 146 return sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
171
172 unsigned int i, index, part_len;
173
174 /* Compute number of bytes mod 128 */
175 index = sctx->count[0] & 0x7f;
176
177 /* Update number of bytes */
178 if ((sctx->count[0] += len) < len)
179 sctx->count[1]++;
180
181 part_len = 128 - index;
182
183 /* Transform as many times as possible. */
184 if (len >= part_len) {
185 memcpy(&sctx->buf[index], data, part_len);
186 sha512_transform(sctx->state, sctx->buf);
187
188 for (i = part_len; i + 127 < len; i+=128)
189 sha512_transform(sctx->state, &data[i]);
190
191 index = 0;
192 } else {
193 i = 0;
194 }
195
196 /* Buffer remaining input */
197 memcpy(&sctx->buf[index], &data[i], len - i);
198
199 return 0;
200} 147}
201EXPORT_SYMBOL(crypto_sha512_update); 148EXPORT_SYMBOL(crypto_sha512_update);
202 149
203static int 150static int sha512_final(struct shash_desc *desc, u8 *hash)
204sha512_final(struct shash_desc *desc, u8 *hash)
205{ 151{
206 struct sha512_state *sctx = shash_desc_ctx(desc); 152 sha512_base_do_finalize(desc, sha512_generic_block_fn);
207 static u8 padding[128] = { 0x80, }; 153 return sha512_base_finish(desc, hash);
208 __be64 *dst = (__be64 *)hash;
209 __be64 bits[2];
210 unsigned int index, pad_len;
211 int i;
212
213 /* Save number of bits */
214 bits[1] = cpu_to_be64(sctx->count[0] << 3);
215 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
216
217 /* Pad out to 112 mod 128. */
218 index = sctx->count[0] & 0x7f;
219 pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
220 crypto_sha512_update(desc, padding, pad_len);
221
222 /* Append length (before padding) */
223 crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits));
224
225 /* Store state in digest */
226 for (i = 0; i < 8; i++)
227 dst[i] = cpu_to_be64(sctx->state[i]);
228
229 /* Zeroize sensitive information. */
230 memset(sctx, 0, sizeof(struct sha512_state));
231
232 return 0;
233} 154}
234 155
235static int sha384_final(struct shash_desc *desc, u8 *hash) 156int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
157 unsigned int len, u8 *hash)
236{ 158{
237 u8 D[64]; 159 sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
238 160 return sha512_final(desc, hash);
239 sha512_final(desc, D);
240
241 memcpy(hash, D, 48);
242 memzero_explicit(D, 64);
243
244 return 0;
245} 161}
162EXPORT_SYMBOL(crypto_sha512_finup);
246 163
247static struct shash_alg sha512_algs[2] = { { 164static struct shash_alg sha512_algs[2] = { {
248 .digestsize = SHA512_DIGEST_SIZE, 165 .digestsize = SHA512_DIGEST_SIZE,
249 .init = sha512_init, 166 .init = sha512_base_init,
250 .update = crypto_sha512_update, 167 .update = crypto_sha512_update,
251 .final = sha512_final, 168 .final = sha512_final,
169 .finup = crypto_sha512_finup,
252 .descsize = sizeof(struct sha512_state), 170 .descsize = sizeof(struct sha512_state),
253 .base = { 171 .base = {
254 .cra_name = "sha512", 172 .cra_name = "sha512",
@@ -259,9 +177,10 @@ static struct shash_alg sha512_algs[2] = { {
259 } 177 }
260}, { 178}, {
261 .digestsize = SHA384_DIGEST_SIZE, 179 .digestsize = SHA384_DIGEST_SIZE,
262 .init = sha384_init, 180 .init = sha384_base_init,
263 .update = crypto_sha512_update, 181 .update = crypto_sha512_update,
264 .final = sha384_final, 182 .final = sha512_final,
183 .finup = crypto_sha512_finup,
265 .descsize = sizeof(struct sha512_state), 184 .descsize = sizeof(struct sha512_state),
266 .base = { 185 .base = {
267 .cra_name = "sha384", 186 .cra_name = "sha384",
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 4b9e23fa4204..1a2800107fc8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1155,9 +1155,9 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
1155 goto out_free_req; 1155 goto out_free_req;
1156 } 1156 }
1157 1157
1158 sg_init_table(sg, TVMEMSIZE);
1159
1160 k = *keysize + *b_size; 1158 k = *keysize + *b_size;
1159 sg_init_table(sg, DIV_ROUND_UP(k, PAGE_SIZE));
1160
1161 if (k > PAGE_SIZE) { 1161 if (k > PAGE_SIZE) {
1162 sg_set_buf(sg, tvmem[0] + *keysize, 1162 sg_set_buf(sg, tvmem[0] + *keysize,
1163 PAGE_SIZE - *keysize); 1163 PAGE_SIZE - *keysize);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f4ed6d4205e7..f9bce3d7ee7f 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1474,11 +1474,11 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
1474 for (j = 0; j < template[i].loops; j++) { 1474 for (j = 0; j < template[i].loops; j++) {
1475 err = crypto_rng_get_bytes(tfm, result, 1475 err = crypto_rng_get_bytes(tfm, result,
1476 template[i].rlen); 1476 template[i].rlen);
1477 if (err != template[i].rlen) { 1477 if (err < 0) {
1478 printk(KERN_ERR "alg: cprng: Failed to obtain " 1478 printk(KERN_ERR "alg: cprng: Failed to obtain "
1479 "the correct amount of random data for " 1479 "the correct amount of random data for "
1480 "%s (requested %d, got %d)\n", algo, 1480 "%s (requested %d)\n", algo,
1481 template[i].rlen, err); 1481 template[i].rlen);
1482 goto out; 1482 goto out;
1483 } 1483 }
1484 } 1484 }
@@ -1505,7 +1505,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
1505 struct crypto_aead *tfm; 1505 struct crypto_aead *tfm;
1506 int err = 0; 1506 int err = 0;
1507 1507
1508 tfm = crypto_alloc_aead(driver, type, mask); 1508 tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
1509 if (IS_ERR(tfm)) { 1509 if (IS_ERR(tfm)) {
1510 printk(KERN_ERR "alg: aead: Failed to load transform for %s: " 1510 printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
1511 "%ld\n", driver, PTR_ERR(tfm)); 1511 "%ld\n", driver, PTR_ERR(tfm));
@@ -1534,7 +1534,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
1534 struct crypto_cipher *tfm; 1534 struct crypto_cipher *tfm;
1535 int err = 0; 1535 int err = 0;
1536 1536
1537 tfm = crypto_alloc_cipher(driver, type, mask); 1537 tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
1538 if (IS_ERR(tfm)) { 1538 if (IS_ERR(tfm)) {
1539 printk(KERN_ERR "alg: cipher: Failed to load transform for " 1539 printk(KERN_ERR "alg: cipher: Failed to load transform for "
1540 "%s: %ld\n", driver, PTR_ERR(tfm)); 1540 "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1563,7 +1563,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
1563 struct crypto_ablkcipher *tfm; 1563 struct crypto_ablkcipher *tfm;
1564 int err = 0; 1564 int err = 0;
1565 1565
1566 tfm = crypto_alloc_ablkcipher(driver, type, mask); 1566 tfm = crypto_alloc_ablkcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
1567 if (IS_ERR(tfm)) { 1567 if (IS_ERR(tfm)) {
1568 printk(KERN_ERR "alg: skcipher: Failed to load transform for " 1568 printk(KERN_ERR "alg: skcipher: Failed to load transform for "
1569 "%s: %ld\n", driver, PTR_ERR(tfm)); 1569 "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1636,7 +1636,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
1636 struct crypto_ahash *tfm; 1636 struct crypto_ahash *tfm;
1637 int err; 1637 int err;
1638 1638
1639 tfm = crypto_alloc_ahash(driver, type, mask); 1639 tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
1640 if (IS_ERR(tfm)) { 1640 if (IS_ERR(tfm)) {
1641 printk(KERN_ERR "alg: hash: Failed to load transform for %s: " 1641 printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
1642 "%ld\n", driver, PTR_ERR(tfm)); 1642 "%ld\n", driver, PTR_ERR(tfm));
@@ -1664,7 +1664,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
1664 if (err) 1664 if (err)
1665 goto out; 1665 goto out;
1666 1666
1667 tfm = crypto_alloc_shash(driver, type, mask); 1667 tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
1668 if (IS_ERR(tfm)) { 1668 if (IS_ERR(tfm)) {
1669 printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " 1669 printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
1670 "%ld\n", driver, PTR_ERR(tfm)); 1670 "%ld\n", driver, PTR_ERR(tfm));
@@ -1706,7 +1706,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
1706 struct crypto_rng *rng; 1706 struct crypto_rng *rng;
1707 int err; 1707 int err;
1708 1708
1709 rng = crypto_alloc_rng(driver, type, mask); 1709 rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
1710 if (IS_ERR(rng)) { 1710 if (IS_ERR(rng)) {
1711 printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " 1711 printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
1712 "%ld\n", driver, PTR_ERR(rng)); 1712 "%ld\n", driver, PTR_ERR(rng));
@@ -1733,7 +1733,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
1733 if (!buf) 1733 if (!buf)
1734 return -ENOMEM; 1734 return -ENOMEM;
1735 1735
1736 drng = crypto_alloc_rng(driver, type, mask); 1736 drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
1737 if (IS_ERR(drng)) { 1737 if (IS_ERR(drng)) {
1738 printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for " 1738 printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
1739 "%s\n", driver); 1739 "%s\n", driver);
@@ -1759,7 +1759,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
1759 ret = crypto_drbg_get_bytes_addtl(drng, 1759 ret = crypto_drbg_get_bytes_addtl(drng,
1760 buf, test->expectedlen, &addtl); 1760 buf, test->expectedlen, &addtl);
1761 } 1761 }
1762 if (ret <= 0) { 1762 if (ret < 0) {
1763 printk(KERN_ERR "alg: drbg: could not obtain random data for " 1763 printk(KERN_ERR "alg: drbg: could not obtain random data for "
1764 "driver %s\n", driver); 1764 "driver %s\n", driver);
1765 goto outbuf; 1765 goto outbuf;
@@ -1774,7 +1774,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
1774 ret = crypto_drbg_get_bytes_addtl(drng, 1774 ret = crypto_drbg_get_bytes_addtl(drng,
1775 buf, test->expectedlen, &addtl); 1775 buf, test->expectedlen, &addtl);
1776 } 1776 }
1777 if (ret <= 0) { 1777 if (ret < 0) {
1778 printk(KERN_ERR "alg: drbg: could not obtain random data for " 1778 printk(KERN_ERR "alg: drbg: could not obtain random data for "
1779 "driver %s\n", driver); 1779 "driver %s\n", driver);
1780 goto outbuf; 1780 goto outbuf;
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index de57b38809c7..f48cf11c655e 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -101,6 +101,19 @@ config HW_RANDOM_BCM2835
101 101
102 If unsure, say Y. 102 If unsure, say Y.
103 103
104config HW_RANDOM_IPROC_RNG200
105 tristate "Broadcom iProc RNG200 support"
106 depends on ARCH_BCM_IPROC
107 default HW_RANDOM
108 ---help---
109 This driver provides kernel-side support for the RNG200
110 hardware found on the Broadcom iProc SoCs.
111
112 To compile this driver as a module, choose M here: the
113 module will be called iproc-rng200
114
115 If unsure, say Y.
116
104config HW_RANDOM_GEODE 117config HW_RANDOM_GEODE
105 tristate "AMD Geode HW Random Number Generator support" 118 tristate "AMD Geode HW Random Number Generator support"
106 depends on X86_32 && PCI 119 depends on X86_32 && PCI
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 0b4cd57f4e24..055bb01510ad 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -28,5 +28,6 @@ obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o
28obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o 28obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
29obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o 29obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
30obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o 30obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
31obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
31obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o 32obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o
32obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o 33obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
index ba6a65ac023b..d1494ecd9e11 100644
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ b/drivers/char/hw_random/bcm63xx-rng.c
@@ -13,24 +13,37 @@
13#include <linux/platform_device.h> 13#include <linux/platform_device.h>
14#include <linux/hw_random.h> 14#include <linux/hw_random.h>
15 15
16#include <bcm63xx_io.h> 16#define RNG_CTRL 0x00
17#include <bcm63xx_regs.h> 17#define RNG_EN (1 << 0)
18
19#define RNG_STAT 0x04
20#define RNG_AVAIL_MASK (0xff000000)
21
22#define RNG_DATA 0x08
23#define RNG_THRES 0x0c
24#define RNG_MASK 0x10
18 25
19struct bcm63xx_rng_priv { 26struct bcm63xx_rng_priv {
27 struct hwrng rng;
20 struct clk *clk; 28 struct clk *clk;
21 void __iomem *regs; 29 void __iomem *regs;
22}; 30};
23 31
24#define to_rng_priv(rng) ((struct bcm63xx_rng_priv *)rng->priv) 32#define to_rng_priv(rng) container_of(rng, struct bcm63xx_rng_priv, rng)
25 33
26static int bcm63xx_rng_init(struct hwrng *rng) 34static int bcm63xx_rng_init(struct hwrng *rng)
27{ 35{
28 struct bcm63xx_rng_priv *priv = to_rng_priv(rng); 36 struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
29 u32 val; 37 u32 val;
38 int error;
39
40 error = clk_prepare_enable(priv->clk);
41 if (error)
42 return error;
30 43
31 val = bcm_readl(priv->regs + RNG_CTRL); 44 val = __raw_readl(priv->regs + RNG_CTRL);
32 val |= RNG_EN; 45 val |= RNG_EN;
33 bcm_writel(val, priv->regs + RNG_CTRL); 46 __raw_writel(val, priv->regs + RNG_CTRL);
34 47
35 return 0; 48 return 0;
36} 49}
@@ -40,23 +53,25 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng)
40 struct bcm63xx_rng_priv *priv = to_rng_priv(rng); 53 struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
41 u32 val; 54 u32 val;
42 55
43 val = bcm_readl(priv->regs + RNG_CTRL); 56 val = __raw_readl(priv->regs + RNG_CTRL);
44 val &= ~RNG_EN; 57 val &= ~RNG_EN;
45 bcm_writel(val, priv->regs + RNG_CTRL); 58 __raw_writel(val, priv->regs + RNG_CTRL);
59
60 clk_didsable_unprepare(prov->clk);
46} 61}
47 62
48static int bcm63xx_rng_data_present(struct hwrng *rng, int wait) 63static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
49{ 64{
50 struct bcm63xx_rng_priv *priv = to_rng_priv(rng); 65 struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
51 66
52 return bcm_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK; 67 return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
53} 68}
54 69
55static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data) 70static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
56{ 71{
57 struct bcm63xx_rng_priv *priv = to_rng_priv(rng); 72 struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
58 73
59 *data = bcm_readl(priv->regs + RNG_DATA); 74 *data = __raw_readl(priv->regs + RNG_DATA);
60 75
61 return 4; 76 return 4;
62} 77}
@@ -72,94 +87,53 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
72 r = platform_get_resource(pdev, IORESOURCE_MEM, 0); 87 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
73 if (!r) { 88 if (!r) {
74 dev_err(&pdev->dev, "no iomem resource\n"); 89 dev_err(&pdev->dev, "no iomem resource\n");
75 ret = -ENXIO; 90 return -ENXIO;
76 goto out;
77 } 91 }
78 92
79 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 93 priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
80 if (!priv) { 94 if (!priv)
81 dev_err(&pdev->dev, "no memory for private structure\n"); 95 return -ENOMEM;
82 ret = -ENOMEM; 96
83 goto out; 97 priv->rng.name = pdev->name;
98 priv->rng.init = bcm63xx_rng_init;
99 priv->rng.cleanup = bcm63xx_rng_cleanup;
100 prov->rng.data_present = bcm63xx_rng_data_present;
101 priv->rng.data_read = bcm63xx_rng_data_read;
102
103 priv->clk = devm_clk_get(&pdev->dev, "ipsec");
104 if (IS_ERR(priv->clk)) {
105 error = PTR_ERR(priv->clk);
106 dev_err(&pdev->dev, "no clock for device: %d\n", error);
107 return error;
84 } 108 }
85 109
86 rng = kzalloc(sizeof(*rng), GFP_KERNEL);
87 if (!rng) {
88 dev_err(&pdev->dev, "no memory for rng structure\n");
89 ret = -ENOMEM;
90 goto out_free_priv;
91 }
92
93 platform_set_drvdata(pdev, rng);
94 rng->priv = (unsigned long)priv;
95 rng->name = pdev->name;
96 rng->init = bcm63xx_rng_init;
97 rng->cleanup = bcm63xx_rng_cleanup;
98 rng->data_present = bcm63xx_rng_data_present;
99 rng->data_read = bcm63xx_rng_data_read;
100
101 clk = clk_get(&pdev->dev, "ipsec");
102 if (IS_ERR(clk)) {
103 dev_err(&pdev->dev, "no clock for device\n");
104 ret = PTR_ERR(clk);
105 goto out_free_rng;
106 }
107
108 priv->clk = clk;
109
110 if (!devm_request_mem_region(&pdev->dev, r->start, 110 if (!devm_request_mem_region(&pdev->dev, r->start,
111 resource_size(r), pdev->name)) { 111 resource_size(r), pdev->name)) {
112 dev_err(&pdev->dev, "request mem failed"); 112 dev_err(&pdev->dev, "request mem failed");
113 ret = -ENOMEM; 113 return -EBUSY;
114 goto out_free_rng;
115 } 114 }
116 115
117 priv->regs = devm_ioremap_nocache(&pdev->dev, r->start, 116 priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
118 resource_size(r)); 117 resource_size(r));
119 if (!priv->regs) { 118 if (!priv->regs) {
120 dev_err(&pdev->dev, "ioremap failed"); 119 dev_err(&pdev->dev, "ioremap failed");
121 ret = -ENOMEM; 120 return -ENOMEM;
122 goto out_free_rng;
123 } 121 }
124 122
125 clk_enable(clk); 123 error = devm_hwrng_register(&pdev->dev, &priv->rng);
126 124 if (error) {
127 ret = hwrng_register(rng); 125 dev_err(&pdev->dev, "failed to register rng device: %d\n",
128 if (ret) { 126 error);
129 dev_err(&pdev->dev, "failed to register rng device\n"); 127 return error;
130 goto out_clk_disable;
131 } 128 }
132 129
133 dev_info(&pdev->dev, "registered RNG driver\n"); 130 dev_info(&pdev->dev, "registered RNG driver\n");
134 131
135 return 0; 132 return 0;
136
137out_clk_disable:
138 clk_disable(clk);
139out_free_rng:
140 kfree(rng);
141out_free_priv:
142 kfree(priv);
143out:
144 return ret;
145}
146
147static int bcm63xx_rng_remove(struct platform_device *pdev)
148{
149 struct hwrng *rng = platform_get_drvdata(pdev);
150 struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
151
152 hwrng_unregister(rng);
153 clk_disable(priv->clk);
154 kfree(priv);
155 kfree(rng);
156
157 return 0;
158} 133}
159 134
160static struct platform_driver bcm63xx_rng_driver = { 135static struct platform_driver bcm63xx_rng_driver = {
161 .probe = bcm63xx_rng_probe, 136 .probe = bcm63xx_rng_probe,
162 .remove = bcm63xx_rng_remove,
163 .driver = { 137 .driver = {
164 .name = "bcm63xx-rng", 138 .name = "bcm63xx-rng",
165 }, 139 },
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 32a8a867f7f8..571ef61f8ea9 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -179,7 +179,8 @@ skip_init:
179 add_early_randomness(rng); 179 add_early_randomness(rng);
180 180
181 current_quality = rng->quality ? : default_quality; 181 current_quality = rng->quality ? : default_quality;
182 current_quality &= 1023; 182 if (current_quality > 1024)
183 current_quality = 1024;
183 184
184 if (current_quality == 0 && hwrng_fill) 185 if (current_quality == 0 && hwrng_fill)
185 kthread_stop(hwrng_fill); 186 kthread_stop(hwrng_fill);
@@ -536,6 +537,48 @@ void hwrng_unregister(struct hwrng *rng)
536} 537}
537EXPORT_SYMBOL_GPL(hwrng_unregister); 538EXPORT_SYMBOL_GPL(hwrng_unregister);
538 539
540static void devm_hwrng_release(struct device *dev, void *res)
541{
542 hwrng_unregister(*(struct hwrng **)res);
543}
544
545static int devm_hwrng_match(struct device *dev, void *res, void *data)
546{
547 struct hwrng **r = res;
548
549 if (WARN_ON(!r || !*r))
550 return 0;
551
552 return *r == data;
553}
554
555int devm_hwrng_register(struct device *dev, struct hwrng *rng)
556{
557 struct hwrng **ptr;
558 int error;
559
560 ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL);
561 if (!ptr)
562 return -ENOMEM;
563
564 error = hwrng_register(rng);
565 if (error) {
566 devres_free(ptr);
567 return error;
568 }
569
570 *ptr = rng;
571 devres_add(dev, ptr);
572 return 0;
573}
574EXPORT_SYMBOL_GPL(devm_hwrng_register);
575
576void devm_hwrng_unregister(struct device *dev, struct hwrng *rng)
577{
578 devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng);
579}
580EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
581
539static int __init hwrng_modinit(void) 582static int __init hwrng_modinit(void)
540{ 583{
541 return register_miscdev(); 584 return register_miscdev();
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
index fed0830bf724..dc4701fd814f 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -131,16 +131,7 @@ static int exynos_rng_probe(struct platform_device *pdev)
131 pm_runtime_use_autosuspend(&pdev->dev); 131 pm_runtime_use_autosuspend(&pdev->dev);
132 pm_runtime_enable(&pdev->dev); 132 pm_runtime_enable(&pdev->dev);
133 133
134 return hwrng_register(&exynos_rng->rng); 134 return devm_hwrng_register(&pdev->dev, &exynos_rng->rng);
135}
136
137static int exynos_rng_remove(struct platform_device *pdev)
138{
139 struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
140
141 hwrng_unregister(&exynos_rng->rng);
142
143 return 0;
144} 135}
145 136
146#ifdef CONFIG_PM 137#ifdef CONFIG_PM
@@ -172,7 +163,6 @@ static struct platform_driver exynos_rng_driver = {
172 .pm = &exynos_rng_pm_ops, 163 .pm = &exynos_rng_pm_ops,
173 }, 164 },
174 .probe = exynos_rng_probe, 165 .probe = exynos_rng_probe,
175 .remove = exynos_rng_remove,
176}; 166};
177 167
178module_platform_driver(exynos_rng_driver); 168module_platform_driver(exynos_rng_driver);
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
new file mode 100644
index 000000000000..3eaf7cb96d36
--- /dev/null
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -0,0 +1,239 @@
1/*
2* Copyright (C) 2015 Broadcom Corporation
3*
4* This program is free software; you can redistribute it and/or
5* modify it under the terms of the GNU General Public License as
6* published by the Free Software Foundation version 2.
7*
8* This program is distributed "as is" WITHOUT ANY WARRANTY of any
9* kind, whether express or implied; without even the implied warranty
10* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11* GNU General Public License for more details.
12*/
13/*
14 * DESCRIPTION: The Broadcom iProc RNG200 Driver
15 */
16
17#include <linux/hw_random.h>
18#include <linux/init.h>
19#include <linux/io.h>
20#include <linux/kernel.h>
21#include <linux/module.h>
22#include <linux/of_address.h>
23#include <linux/of_platform.h>
24#include <linux/platform_device.h>
25#include <linux/delay.h>
26
27/* Registers */
28#define RNG_CTRL_OFFSET 0x00
29#define RNG_CTRL_RNG_RBGEN_MASK 0x00001FFF
30#define RNG_CTRL_RNG_RBGEN_ENABLE 0x00000001
31#define RNG_CTRL_RNG_RBGEN_DISABLE 0x00000000
32
33#define RNG_SOFT_RESET_OFFSET 0x04
34#define RNG_SOFT_RESET 0x00000001
35
36#define RBG_SOFT_RESET_OFFSET 0x08
37#define RBG_SOFT_RESET 0x00000001
38
39#define RNG_INT_STATUS_OFFSET 0x18
40#define RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK 0x80000000
41#define RNG_INT_STATUS_STARTUP_TRANSITIONS_MET_IRQ_MASK 0x00020000
42#define RNG_INT_STATUS_NIST_FAIL_IRQ_MASK 0x00000020
43#define RNG_INT_STATUS_TOTAL_BITS_COUNT_IRQ_MASK 0x00000001
44
45#define RNG_FIFO_DATA_OFFSET 0x20
46
47#define RNG_FIFO_COUNT_OFFSET 0x24
48#define RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK 0x000000FF
49
50struct iproc_rng200_dev {
51 struct hwrng rng;
52 void __iomem *base;
53};
54
55#define to_rng_priv(rng) container_of(rng, struct iproc_rng200_dev, rng)
56
57static void iproc_rng200_restart(void __iomem *rng_base)
58{
59 uint32_t val;
60
61 /* Disable RBG */
62 val = ioread32(rng_base + RNG_CTRL_OFFSET);
63 val &= ~RNG_CTRL_RNG_RBGEN_MASK;
64 val |= RNG_CTRL_RNG_RBGEN_DISABLE;
65 iowrite32(val, rng_base + RNG_CTRL_OFFSET);
66
67 /* Clear all interrupt status */
68 iowrite32(0xFFFFFFFFUL, rng_base + RNG_INT_STATUS_OFFSET);
69
70 /* Reset RNG and RBG */
71 val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
72 val |= RBG_SOFT_RESET;
73 iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
74
75 val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
76 val |= RNG_SOFT_RESET;
77 iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
78
79 val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
80 val &= ~RNG_SOFT_RESET;
81 iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
82
83 val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
84 val &= ~RBG_SOFT_RESET;
85 iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
86
87 /* Enable RBG */
88 val = ioread32(rng_base + RNG_CTRL_OFFSET);
89 val &= ~RNG_CTRL_RNG_RBGEN_MASK;
90 val |= RNG_CTRL_RNG_RBGEN_ENABLE;
91 iowrite32(val, rng_base + RNG_CTRL_OFFSET);
92}
93
94static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max,
95 bool wait)
96{
97 struct iproc_rng200_dev *priv = to_rng_priv(rng);
98 uint32_t num_remaining = max;
99 uint32_t status;
100
101 #define MAX_RESETS_PER_READ 1
102 uint32_t num_resets = 0;
103
104 #define MAX_IDLE_TIME (1 * HZ)
105 unsigned long idle_endtime = jiffies + MAX_IDLE_TIME;
106
107 while ((num_remaining > 0) && time_before(jiffies, idle_endtime)) {
108
109 /* Is RNG sane? If not, reset it. */
110 status = ioread32(priv->base + RNG_INT_STATUS_OFFSET);
111 if ((status & (RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK |
112 RNG_INT_STATUS_NIST_FAIL_IRQ_MASK)) != 0) {
113
114 if (num_resets >= MAX_RESETS_PER_READ)
115 return max - num_remaining;
116
117 iproc_rng200_restart(priv->base);
118 num_resets++;
119 }
120
121 /* Are there any random numbers available? */
122 if ((ioread32(priv->base + RNG_FIFO_COUNT_OFFSET) &
123 RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK) > 0) {
124
125 if (num_remaining >= sizeof(uint32_t)) {
126 /* Buffer has room to store entire word */
127 *(uint32_t *)buf = ioread32(priv->base +
128 RNG_FIFO_DATA_OFFSET);
129 buf += sizeof(uint32_t);
130 num_remaining -= sizeof(uint32_t);
131 } else {
132 /* Buffer can only store partial word */
133 uint32_t rnd_number = ioread32(priv->base +
134 RNG_FIFO_DATA_OFFSET);
135 memcpy(buf, &rnd_number, num_remaining);
136 buf += num_remaining;
137 num_remaining = 0;
138 }
139
140 /* Reset the IDLE timeout */
141 idle_endtime = jiffies + MAX_IDLE_TIME;
142 } else {
143 if (!wait)
144 /* Cannot wait, return immediately */
145 return max - num_remaining;
146
147 /* Can wait, give others chance to run */
148 usleep_range(min(num_remaining * 10, 500U), 500);
149 }
150 }
151
152 return max - num_remaining;
153}
154
155static int iproc_rng200_init(struct hwrng *rng)
156{
157 struct iproc_rng200_dev *priv = to_rng_priv(rng);
158 uint32_t val;
159
160 /* Setup RNG. */
161 val = ioread32(priv->base + RNG_CTRL_OFFSET);
162 val &= ~RNG_CTRL_RNG_RBGEN_MASK;
163 val |= RNG_CTRL_RNG_RBGEN_ENABLE;
164 iowrite32(val, priv->base + RNG_CTRL_OFFSET);
165
166 return 0;
167}
168
169static void iproc_rng200_cleanup(struct hwrng *rng)
170{
171 struct iproc_rng200_dev *priv = to_rng_priv(rng);
172 uint32_t val;
173
174 /* Disable RNG hardware */
175 val = ioread32(priv->base + RNG_CTRL_OFFSET);
176 val &= ~RNG_CTRL_RNG_RBGEN_MASK;
177 val |= RNG_CTRL_RNG_RBGEN_DISABLE;
178 iowrite32(val, priv->base + RNG_CTRL_OFFSET);
179}
180
181static int iproc_rng200_probe(struct platform_device *pdev)
182{
183 struct iproc_rng200_dev *priv;
184 struct resource *res;
185 struct device *dev = &pdev->dev;
186 int ret;
187
188 priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
189 if (!priv)
190 return -ENOMEM;
191
192 /* Map peripheral */
193 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
194 if (!res) {
195 dev_err(dev, "failed to get rng resources\n");
196 return -EINVAL;
197 }
198
199 priv->base = devm_ioremap_resource(dev, res);
200 if (IS_ERR(priv->base)) {
201 dev_err(dev, "failed to remap rng regs\n");
202 return PTR_ERR(priv->base);
203 }
204
205 priv->rng.name = "iproc-rng200",
206 priv->rng.read = iproc_rng200_read,
207 priv->rng.init = iproc_rng200_init,
208 priv->rng.cleanup = iproc_rng200_cleanup,
209
210 /* Register driver */
211 ret = devm_hwrng_register(dev, &priv->rng);
212 if (ret) {
213 dev_err(dev, "hwrng registration failed\n");
214 return ret;
215 }
216
217 dev_info(dev, "hwrng registered\n");
218
219 return 0;
220}
221
222static const struct of_device_id iproc_rng200_of_match[] = {
223 { .compatible = "brcm,iproc-rng200", },
224 {},
225};
226MODULE_DEVICE_TABLE(of, iproc_rng200_of_match);
227
228static struct platform_driver iproc_rng200_driver = {
229 .driver = {
230 .name = "iproc-rng200",
231 .of_match_table = iproc_rng200_of_match,
232 },
233 .probe = iproc_rng200_probe,
234};
235module_platform_driver(iproc_rng200_driver);
236
237MODULE_AUTHOR("Broadcom");
238MODULE_DESCRIPTION("iProc RNG200 Random Number Generator driver");
239MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
index cea1c703d62f..96fb986402eb 100644
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -157,7 +157,7 @@ static int msm_rng_probe(struct platform_device *pdev)
157 rng->hwrng.cleanup = msm_rng_cleanup, 157 rng->hwrng.cleanup = msm_rng_cleanup,
158 rng->hwrng.read = msm_rng_read, 158 rng->hwrng.read = msm_rng_read,
159 159
160 ret = hwrng_register(&rng->hwrng); 160 ret = devm_hwrng_register(&pdev->dev, &rng->hwrng);
161 if (ret) { 161 if (ret) {
162 dev_err(&pdev->dev, "failed to register hwrng\n"); 162 dev_err(&pdev->dev, "failed to register hwrng\n");
163 return ret; 163 return ret;
@@ -166,14 +166,6 @@ static int msm_rng_probe(struct platform_device *pdev)
166 return 0; 166 return 0;
167} 167}
168 168
169static int msm_rng_remove(struct platform_device *pdev)
170{
171 struct msm_rng *rng = platform_get_drvdata(pdev);
172
173 hwrng_unregister(&rng->hwrng);
174 return 0;
175}
176
177static const struct of_device_id msm_rng_of_match[] = { 169static const struct of_device_id msm_rng_of_match[] = {
178 { .compatible = "qcom,prng", }, 170 { .compatible = "qcom,prng", },
179 {} 171 {}
@@ -182,7 +174,6 @@ MODULE_DEVICE_TABLE(of, msm_rng_of_match);
182 174
183static struct platform_driver msm_rng_driver = { 175static struct platform_driver msm_rng_driver = {
184 .probe = msm_rng_probe, 176 .probe = msm_rng_probe,
185 .remove = msm_rng_remove,
186 .driver = { 177 .driver = {
187 .name = KBUILD_MODNAME, 178 .name = KBUILD_MODNAME,
188 .of_match_table = of_match_ptr(msm_rng_of_match), 179 .of_match_table = of_match_ptr(msm_rng_of_match),
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c
index be1c3f607398..6234a4a19b56 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -105,7 +105,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
105 return 0; 105 return 0;
106} 106}
107 107
108static int __exit octeon_rng_remove(struct platform_device *pdev) 108static int octeon_rng_remove(struct platform_device *pdev)
109{ 109{
110 struct hwrng *rng = platform_get_drvdata(pdev); 110 struct hwrng *rng = platform_get_drvdata(pdev);
111 111
@@ -119,7 +119,7 @@ static struct platform_driver octeon_rng_driver = {
119 .name = "octeon_rng", 119 .name = "octeon_rng",
120 }, 120 },
121 .probe = octeon_rng_probe, 121 .probe = octeon_rng_probe,
122 .remove = __exit_p(octeon_rng_remove), 122 .remove = octeon_rng_remove,
123}; 123};
124 124
125module_platform_driver(octeon_rng_driver); 125module_platform_driver(octeon_rng_driver);
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index d14dcf788f17..8a1432e8bb80 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -236,7 +236,7 @@ static int omap4_rng_init(struct omap_rng_dev *priv)
236 u32 val; 236 u32 val;
237 237
238 /* Return if RNG is already running. */ 238 /* Return if RNG is already running. */
239 if (omap_rng_read(priv, RNG_CONFIG_REG) & RNG_CONTROL_ENABLE_TRNG_MASK) 239 if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
240 return 0; 240 return 0;
241 241
242 val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT; 242 val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT;
@@ -262,7 +262,7 @@ static void omap4_rng_cleanup(struct omap_rng_dev *priv)
262 262
263 val = omap_rng_read(priv, RNG_CONTROL_REG); 263 val = omap_rng_read(priv, RNG_CONTROL_REG);
264 val &= ~RNG_CONTROL_ENABLE_TRNG_MASK; 264 val &= ~RNG_CONTROL_ENABLE_TRNG_MASK;
265 omap_rng_write(priv, RNG_CONFIG_REG, val); 265 omap_rng_write(priv, RNG_CONTROL_REG, val);
266} 266}
267 267
268static irqreturn_t omap4_rng_irq(int irq, void *dev_id) 268static irqreturn_t omap4_rng_irq(int irq, void *dev_id)
@@ -408,7 +408,7 @@ err_ioremap:
408 return ret; 408 return ret;
409} 409}
410 410
411static int __exit omap_rng_remove(struct platform_device *pdev) 411static int omap_rng_remove(struct platform_device *pdev)
412{ 412{
413 struct omap_rng_dev *priv = platform_get_drvdata(pdev); 413 struct omap_rng_dev *priv = platform_get_drvdata(pdev);
414 414
@@ -422,9 +422,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev)
422 return 0; 422 return 0;
423} 423}
424 424
425#ifdef CONFIG_PM_SLEEP 425static int __maybe_unused omap_rng_suspend(struct device *dev)
426
427static int omap_rng_suspend(struct device *dev)
428{ 426{
429 struct omap_rng_dev *priv = dev_get_drvdata(dev); 427 struct omap_rng_dev *priv = dev_get_drvdata(dev);
430 428
@@ -434,7 +432,7 @@ static int omap_rng_suspend(struct device *dev)
434 return 0; 432 return 0;
435} 433}
436 434
437static int omap_rng_resume(struct device *dev) 435static int __maybe_unused omap_rng_resume(struct device *dev)
438{ 436{
439 struct omap_rng_dev *priv = dev_get_drvdata(dev); 437 struct omap_rng_dev *priv = dev_get_drvdata(dev);
440 438
@@ -445,22 +443,15 @@ static int omap_rng_resume(struct device *dev)
445} 443}
446 444
447static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume); 445static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume);
448#define OMAP_RNG_PM (&omap_rng_pm)
449
450#else
451
452#define OMAP_RNG_PM NULL
453
454#endif
455 446
456static struct platform_driver omap_rng_driver = { 447static struct platform_driver omap_rng_driver = {
457 .driver = { 448 .driver = {
458 .name = "omap_rng", 449 .name = "omap_rng",
459 .pm = OMAP_RNG_PM, 450 .pm = &omap_rng_pm,
460 .of_match_table = of_match_ptr(omap_rng_of_match), 451 .of_match_table = of_match_ptr(omap_rng_of_match),
461 }, 452 },
462 .probe = omap_rng_probe, 453 .probe = omap_rng_probe,
463 .remove = __exit_p(omap_rng_remove), 454 .remove = omap_rng_remove,
464}; 455};
465 456
466module_platform_driver(omap_rng_driver); 457module_platform_driver(omap_rng_driver);
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c
index bcf86f91800a..63ce51d09af1 100644
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -61,13 +61,13 @@ static struct hwrng pseries_rng = {
61 .read = pseries_rng_read, 61 .read = pseries_rng_read,
62}; 62};
63 63
64static int __init pseries_rng_probe(struct vio_dev *dev, 64static int pseries_rng_probe(struct vio_dev *dev,
65 const struct vio_device_id *id) 65 const struct vio_device_id *id)
66{ 66{
67 return hwrng_register(&pseries_rng); 67 return hwrng_register(&pseries_rng);
68} 68}
69 69
70static int __exit pseries_rng_remove(struct vio_dev *dev) 70static int pseries_rng_remove(struct vio_dev *dev)
71{ 71{
72 hwrng_unregister(&pseries_rng); 72 hwrng_unregister(&pseries_rng);
73 return 0; 73 return 0;
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index 23caa05380a8..c37cf754a985 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -21,6 +21,7 @@
21 * 21 *
22 */ 22 */
23 23
24#include <linux/acpi.h>
24#include <linux/clk.h> 25#include <linux/clk.h>
25#include <linux/delay.h> 26#include <linux/delay.h>
26#include <linux/hw_random.h> 27#include <linux/hw_random.h>
@@ -310,6 +311,14 @@ static int xgene_rng_init(struct hwrng *rng)
310 return 0; 311 return 0;
311} 312}
312 313
314#ifdef CONFIG_ACPI
315static const struct acpi_device_id xgene_rng_acpi_match[] = {
316 { "APMC0D18", },
317 { }
318};
319MODULE_DEVICE_TABLE(acpi, xgene_rng_acpi_match);
320#endif
321
313static struct hwrng xgene_rng_func = { 322static struct hwrng xgene_rng_func = {
314 .name = "xgene-rng", 323 .name = "xgene-rng",
315 .init = xgene_rng_init, 324 .init = xgene_rng_init,
@@ -415,6 +424,7 @@ static struct platform_driver xgene_rng_driver = {
415 .driver = { 424 .driver = {
416 .name = "xgene-rng", 425 .name = "xgene-rng",
417 .of_match_table = xgene_rng_of_match, 426 .of_match_table = xgene_rng_of_match,
427 .acpi_match_table = ACPI_PTR(xgene_rng_acpi_match),
418 }, 428 },
419}; 429};
420 430
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdfc87df..800bf41718e1 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -391,7 +391,7 @@ config CRYPTO_DEV_ATMEL_SHA
391 391
392config CRYPTO_DEV_CCP 392config CRYPTO_DEV_CCP
393 bool "Support for AMD Cryptographic Coprocessor" 393 bool "Support for AMD Cryptographic Coprocessor"
394 depends on (X86 && PCI) || ARM64 394 depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
395 default n 395 default n
396 help 396 help
397 The AMD Cryptographic Coprocessor provides hardware support 397 The AMD Cryptographic Coprocessor provides hardware support
@@ -436,4 +436,26 @@ config CRYPTO_DEV_QCE
436 hardware. To compile this driver as a module, choose M here. The 436 hardware. To compile this driver as a module, choose M here. The
437 module will be called qcrypto. 437 module will be called qcrypto.
438 438
439config CRYPTO_DEV_VMX
440 bool "Support for VMX cryptographic acceleration instructions"
441 depends on PPC64
442 default n
443 help
444 Support for VMX cryptographic acceleration instructions.
445
446source "drivers/crypto/vmx/Kconfig"
447
448config CRYPTO_DEV_IMGTEC_HASH
449 depends on MIPS || COMPILE_TEST
450 tristate "Imagination Technologies hardware hash accelerator"
451 select CRYPTO_ALGAPI
452 select CRYPTO_MD5
453 select CRYPTO_SHA1
454 select CRYPTO_SHA256
455 select CRYPTO_HASH
456 help
457 This driver interfaces with the Imagination Technologies
458 hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
459 hashing algorithms.
460
439endif # CRYPTO_HW 461endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3924f93d5774..fb84be7e6be5 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
6obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ 6obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
7obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o 7obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
8obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o 8obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
9obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
9obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o 10obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
10obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o 11obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
11obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o 12obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
25obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ 26obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
26obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ 27obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
27obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ 28obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
29obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index d02b77150070..3b28e8c3de28 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -1155,7 +1155,7 @@ struct crypto4xx_alg_common crypto4xx_alg[] = {
1155/** 1155/**
1156 * Module Initialization Routine 1156 * Module Initialization Routine
1157 */ 1157 */
1158static int __init crypto4xx_probe(struct platform_device *ofdev) 1158static int crypto4xx_probe(struct platform_device *ofdev)
1159{ 1159{
1160 int rc; 1160 int rc;
1161 struct resource res; 1161 struct resource res;
@@ -1263,7 +1263,7 @@ err_alloc_dev:
1263 return rc; 1263 return rc;
1264} 1264}
1265 1265
1266static int __exit crypto4xx_remove(struct platform_device *ofdev) 1266static int crypto4xx_remove(struct platform_device *ofdev)
1267{ 1267{
1268 struct device *dev = &ofdev->dev; 1268 struct device *dev = &ofdev->dev;
1269 struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); 1269 struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev);
@@ -1291,7 +1291,7 @@ static struct platform_driver crypto4xx_driver = {
1291 .of_match_table = crypto4xx_match, 1291 .of_match_table = crypto4xx_match,
1292 }, 1292 },
1293 .probe = crypto4xx_probe, 1293 .probe = crypto4xx_probe,
1294 .remove = __exit_p(crypto4xx_remove), 1294 .remove = crypto4xx_remove,
1295}; 1295};
1296 1296
1297module_platform_driver(crypto4xx_driver); 1297module_platform_driver(crypto4xx_driver);
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6597aac9905d..0f9a9dc06a83 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd,
315 315
316 dd->dma_size = length; 316 dd->dma_size = length;
317 317
318 if (!(dd->flags & AES_FLAGS_FAST)) { 318 dma_sync_single_for_device(dd->dev, dma_addr_in, length,
319 dma_sync_single_for_device(dd->dev, dma_addr_in, length, 319 DMA_TO_DEVICE);
320 DMA_TO_DEVICE); 320 dma_sync_single_for_device(dd->dev, dma_addr_out, length,
321 } 321 DMA_FROM_DEVICE);
322 322
323 if (dd->flags & AES_FLAGS_CFB8) { 323 if (dd->flags & AES_FLAGS_CFB8) {
324 dd->dma_lch_in.dma_conf.dst_addr_width = 324 dd->dma_lch_in.dma_conf.dst_addr_width =
@@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
391{ 391{
392 dd->flags &= ~AES_FLAGS_DMA; 392 dd->flags &= ~AES_FLAGS_DMA;
393 393
394 dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
395 dd->dma_size, DMA_TO_DEVICE);
396 dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
397 dd->dma_size, DMA_FROM_DEVICE);
398
394 /* use cache buffers */ 399 /* use cache buffers */
395 dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); 400 dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
396 if (!dd->nb_in_sg) 401 if (!dd->nb_in_sg)
@@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd)
459 dd->flags |= AES_FLAGS_FAST; 464 dd->flags |= AES_FLAGS_FAST;
460 465
461 } else { 466 } else {
467 dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
468 dd->dma_size, DMA_TO_DEVICE);
469
462 /* use cache buffers */ 470 /* use cache buffers */
463 count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset, 471 count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset,
464 dd->buf_in, dd->buflen, dd->total, 0); 472 dd->buf_in, dd->buflen, dd->total, 0);
@@ -619,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd)
619 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); 627 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
620 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); 628 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
621 } else { 629 } else {
622 dma_sync_single_for_device(dd->dev, dd->dma_addr_out, 630 dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
623 dd->dma_size, DMA_FROM_DEVICE); 631 dd->dma_size, DMA_FROM_DEVICE);
624 632
625 /* copy data */ 633 /* copy data */
@@ -1246,6 +1254,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
1246 1254
1247 /* keep only major version number */ 1255 /* keep only major version number */
1248 switch (dd->hw_version & 0xff0) { 1256 switch (dd->hw_version & 0xff0) {
1257 case 0x200:
1258 dd->caps.has_dualbuff = 1;
1259 dd->caps.has_cfb64 = 1;
1260 dd->caps.max_burst_size = 4;
1261 break;
1249 case 0x130: 1262 case 0x130:
1250 dd->caps.has_dualbuff = 1; 1263 dd->caps.has_dualbuff = 1;
1251 dd->caps.has_cfb64 = 1; 1264 dd->caps.has_cfb64 = 1;
@@ -1336,6 +1349,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
1336 platform_set_drvdata(pdev, aes_dd); 1349 platform_set_drvdata(pdev, aes_dd);
1337 1350
1338 INIT_LIST_HEAD(&aes_dd->list); 1351 INIT_LIST_HEAD(&aes_dd->list);
1352 spin_lock_init(&aes_dd->lock);
1339 1353
1340 tasklet_init(&aes_dd->done_task, atmel_aes_done_task, 1354 tasklet_init(&aes_dd->done_task, atmel_aes_done_task,
1341 (unsigned long)aes_dd); 1355 (unsigned long)aes_dd);
@@ -1374,7 +1388,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
1374 /* Initializing the clock */ 1388 /* Initializing the clock */
1375 aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); 1389 aes_dd->iclk = clk_get(&pdev->dev, "aes_clk");
1376 if (IS_ERR(aes_dd->iclk)) { 1390 if (IS_ERR(aes_dd->iclk)) {
1377 dev_err(dev, "clock intialization failed.\n"); 1391 dev_err(dev, "clock initialization failed.\n");
1378 err = PTR_ERR(aes_dd->iclk); 1392 err = PTR_ERR(aes_dd->iclk);
1379 goto clk_err; 1393 goto clk_err;
1380 } 1394 }
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 34db04addc18..5b35433c5399 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
163 count = min(ctx->sg->length - ctx->offset, ctx->total); 163 count = min(ctx->sg->length - ctx->offset, ctx->total);
164 count = min(count, ctx->buflen - ctx->bufcnt); 164 count = min(count, ctx->buflen - ctx->bufcnt);
165 165
166 if (count <= 0) 166 if (count <= 0) {
167 break; 167 /*
168 * Check if count <= 0 because the buffer is full or
169 * because the sg length is 0. In the latest case,
170 * check if there is another sg in the list, a 0 length
171 * sg doesn't necessarily mean the end of the sg list.
172 */
173 if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
174 ctx->sg = sg_next(ctx->sg);
175 continue;
176 } else {
177 break;
178 }
179 }
168 180
169 scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, 181 scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
170 ctx->offset, count, 0); 182 ctx->offset, count, 0);
@@ -420,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
420 dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", 432 dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
421 ctx->digcnt[1], ctx->digcnt[0], length1, final); 433 ctx->digcnt[1], ctx->digcnt[0], length1, final);
422 434
423 if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 | 435 dd->dma_lch_in.dma_conf.src_maxburst = 16;
424 SHA_FLAGS_SHA256)) { 436 dd->dma_lch_in.dma_conf.dst_maxburst = 16;
425 dd->dma_lch_in.dma_conf.src_maxburst = 16;
426 dd->dma_lch_in.dma_conf.dst_maxburst = 16;
427 } else {
428 dd->dma_lch_in.dma_conf.src_maxburst = 32;
429 dd->dma_lch_in.dma_conf.dst_maxburst = 32;
430 }
431 437
432 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); 438 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
433 439
@@ -529,7 +535,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
529 if (final) 535 if (final)
530 atmel_sha_fill_padding(ctx, 0); 536 atmel_sha_fill_padding(ctx, 0);
531 537
532 if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { 538 if (final || (ctx->bufcnt == ctx->buflen)) {
533 count = ctx->bufcnt; 539 count = ctx->bufcnt;
534 ctx->bufcnt = 0; 540 ctx->bufcnt = 0;
535 return atmel_sha_xmit_dma_map(dd, ctx, count, final); 541 return atmel_sha_xmit_dma_map(dd, ctx, count, final);
@@ -1266,6 +1272,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
1266 1272
1267 /* keep only major version number */ 1273 /* keep only major version number */
1268 switch (dd->hw_version & 0xff0) { 1274 switch (dd->hw_version & 0xff0) {
1275 case 0x420:
1276 dd->caps.has_dma = 1;
1277 dd->caps.has_dualbuff = 1;
1278 dd->caps.has_sha224 = 1;
1279 dd->caps.has_sha_384_512 = 1;
1280 break;
1269 case 0x410: 1281 case 0x410:
1270 dd->caps.has_dma = 1; 1282 dd->caps.has_dma = 1;
1271 dd->caps.has_dualbuff = 1; 1283 dd->caps.has_dualbuff = 1;
@@ -1349,6 +1361,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
1349 platform_set_drvdata(pdev, sha_dd); 1361 platform_set_drvdata(pdev, sha_dd);
1350 1362
1351 INIT_LIST_HEAD(&sha_dd->list); 1363 INIT_LIST_HEAD(&sha_dd->list);
1364 spin_lock_init(&sha_dd->lock);
1352 1365
1353 tasklet_init(&sha_dd->done_task, atmel_sha_done_task, 1366 tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
1354 (unsigned long)sha_dd); 1367 (unsigned long)sha_dd);
@@ -1385,7 +1398,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
1385 /* Initializing the clock */ 1398 /* Initializing the clock */
1386 sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); 1399 sha_dd->iclk = clk_get(&pdev->dev, "sha_clk");
1387 if (IS_ERR(sha_dd->iclk)) { 1400 if (IS_ERR(sha_dd->iclk)) {
1388 dev_err(dev, "clock intialization failed.\n"); 1401 dev_err(dev, "clock initialization failed.\n");
1389 err = PTR_ERR(sha_dd->iclk); 1402 err = PTR_ERR(sha_dd->iclk);
1390 goto clk_err; 1403 goto clk_err;
1391 } 1404 }
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 258772d9b22f..ca2999709eb4 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
1370 platform_set_drvdata(pdev, tdes_dd); 1370 platform_set_drvdata(pdev, tdes_dd);
1371 1371
1372 INIT_LIST_HEAD(&tdes_dd->list); 1372 INIT_LIST_HEAD(&tdes_dd->list);
1373 spin_lock_init(&tdes_dd->lock);
1373 1374
1374 tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task, 1375 tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task,
1375 (unsigned long)tdes_dd); 1376 (unsigned long)tdes_dd);
@@ -1408,7 +1409,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
1408 /* Initializing the clock */ 1409 /* Initializing the clock */
1409 tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); 1410 tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk");
1410 if (IS_ERR(tdes_dd->iclk)) { 1411 if (IS_ERR(tdes_dd->iclk)) {
1411 dev_err(dev, "clock intialization failed.\n"); 1412 dev_err(dev, "clock initialization failed.\n");
1412 err = PTR_ERR(tdes_dd->iclk); 1413 err = PTR_ERR(tdes_dd->iclk);
1413 goto clk_err; 1414 goto clk_err;
1414 } 1415 }
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index f347ab7eea95..ba0532efd3ae 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1172,6 +1172,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
1172 return -ENOMEM; 1172 return -ENOMEM;
1173 } 1173 }
1174 1174
1175 edesc->sec4_sg_bytes = 0;
1175 sh_len = desc_len(sh_desc); 1176 sh_len = desc_len(sh_desc);
1176 desc = edesc->hw_desc; 1177 desc = edesc->hw_desc;
1177 init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); 1178 init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index ae31e555793c..26a544b505f1 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -52,7 +52,7 @@
52 52
53/* length of descriptors */ 53/* length of descriptors */
54#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) 54#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
55#define DESC_RNG_LEN (10 * CAAM_CMD_SZ) 55#define DESC_RNG_LEN (4 * CAAM_CMD_SZ)
56 56
57/* Buffer, its dma address and lock */ 57/* Buffer, its dma address and lock */
58struct buf_data { 58struct buf_data {
@@ -90,8 +90,8 @@ static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
90 struct device *jrdev = ctx->jrdev; 90 struct device *jrdev = ctx->jrdev;
91 91
92 if (ctx->sh_desc_dma) 92 if (ctx->sh_desc_dma)
93 dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN, 93 dma_unmap_single(jrdev, ctx->sh_desc_dma,
94 DMA_TO_DEVICE); 94 desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
95 rng_unmap_buf(jrdev, &ctx->bufs[0]); 95 rng_unmap_buf(jrdev, &ctx->bufs[0]);
96 rng_unmap_buf(jrdev, &ctx->bufs[1]); 96 rng_unmap_buf(jrdev, &ctx->bufs[1]);
97} 97}
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 7f592d8d07bb..55a1f3951578 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -1,11 +1,6 @@
1obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o 1obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
2ccp-objs := ccp-dev.o ccp-ops.o 2ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o
3ifdef CONFIG_X86 3ccp-$(CONFIG_PCI) += ccp-pci.o
4ccp-objs += ccp-pci.o
5endif
6ifdef CONFIG_ARM64
7ccp-objs += ccp-platform.o
8endif
9 4
10obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o 5obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
11ccp-crypto-objs := ccp-crypto-main.o \ 6ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 8e162ad82085..ea7e8446956a 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -23,7 +23,6 @@
23 23
24#include "ccp-crypto.h" 24#include "ccp-crypto.h"
25 25
26
27static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, 26static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
28 int ret) 27 int ret)
29{ 28{
@@ -38,11 +37,13 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
38 if (rctx->hash_rem) { 37 if (rctx->hash_rem) {
39 /* Save remaining data to buffer */ 38 /* Save remaining data to buffer */
40 unsigned int offset = rctx->nbytes - rctx->hash_rem; 39 unsigned int offset = rctx->nbytes - rctx->hash_rem;
40
41 scatterwalk_map_and_copy(rctx->buf, rctx->src, 41 scatterwalk_map_and_copy(rctx->buf, rctx->src,
42 offset, rctx->hash_rem, 0); 42 offset, rctx->hash_rem, 0);
43 rctx->buf_count = rctx->hash_rem; 43 rctx->buf_count = rctx->hash_rem;
44 } else 44 } else {
45 rctx->buf_count = 0; 45 rctx->buf_count = 0;
46 }
46 47
47 /* Update result area if supplied */ 48 /* Update result area if supplied */
48 if (req->result) 49 if (req->result)
@@ -202,7 +203,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
202} 203}
203 204
204static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, 205static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
205 unsigned int key_len) 206 unsigned int key_len)
206{ 207{
207 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); 208 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
208 struct ccp_crypto_ahash_alg *alg = 209 struct ccp_crypto_ahash_alg *alg =
@@ -292,7 +293,8 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
292 crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); 293 crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
293 294
294 cipher_tfm = crypto_alloc_cipher("aes", 0, 295 cipher_tfm = crypto_alloc_cipher("aes", 0,
295 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); 296 CRYPTO_ALG_ASYNC |
297 CRYPTO_ALG_NEED_FALLBACK);
296 if (IS_ERR(cipher_tfm)) { 298 if (IS_ERR(cipher_tfm)) {
297 pr_warn("could not load aes cipher driver\n"); 299 pr_warn("could not load aes cipher driver\n");
298 return PTR_ERR(cipher_tfm); 300 return PTR_ERR(cipher_tfm);
@@ -354,7 +356,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
354 ret = crypto_register_ahash(alg); 356 ret = crypto_register_ahash(alg);
355 if (ret) { 357 if (ret) {
356 pr_err("%s ahash algorithm registration error (%d)\n", 358 pr_err("%s ahash algorithm registration error (%d)\n",
357 base->cra_name, ret); 359 base->cra_name, ret);
358 kfree(ccp_alg); 360 kfree(ccp_alg);
359 return ret; 361 return ret;
360 } 362 }
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 0cc5594b7de3..52c7395cb8d8 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -21,7 +21,6 @@
21 21
22#include "ccp-crypto.h" 22#include "ccp-crypto.h"
23 23
24
25struct ccp_aes_xts_def { 24struct ccp_aes_xts_def {
26 const char *name; 25 const char *name;
27 const char *drv_name; 26 const char *drv_name;
@@ -216,7 +215,6 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
216 ctx->u.aes.tfm_ablkcipher = NULL; 215 ctx->u.aes.tfm_ablkcipher = NULL;
217} 216}
218 217
219
220static int ccp_register_aes_xts_alg(struct list_head *head, 218static int ccp_register_aes_xts_alg(struct list_head *head,
221 const struct ccp_aes_xts_def *def) 219 const struct ccp_aes_xts_def *def)
222{ 220{
@@ -255,7 +253,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
255 ret = crypto_register_alg(alg); 253 ret = crypto_register_alg(alg);
256 if (ret) { 254 if (ret) {
257 pr_err("%s ablkcipher algorithm registration error (%d)\n", 255 pr_err("%s ablkcipher algorithm registration error (%d)\n",
258 alg->cra_name, ret); 256 alg->cra_name, ret);
259 kfree(ccp_alg); 257 kfree(ccp_alg);
260 return ret; 258 return ret;
261 } 259 }
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e46490db0f63..7984f910884d 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -22,7 +22,6 @@
22 22
23#include "ccp-crypto.h" 23#include "ccp-crypto.h"
24 24
25
26static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) 25static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
27{ 26{
28 struct ablkcipher_request *req = ablkcipher_request_cast(async_req); 27 struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
@@ -345,7 +344,7 @@ static int ccp_register_aes_alg(struct list_head *head,
345 ret = crypto_register_alg(alg); 344 ret = crypto_register_alg(alg);
346 if (ret) { 345 if (ret) {
347 pr_err("%s ablkcipher algorithm registration error (%d)\n", 346 pr_err("%s ablkcipher algorithm registration error (%d)\n",
348 alg->cra_name, ret); 347 alg->cra_name, ret);
349 kfree(ccp_alg); 348 kfree(ccp_alg);
350 return ret; 349 return ret;
351 } 350 }
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 4d4e016d755b..bdec01ec608f 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -33,7 +33,6 @@ static unsigned int sha_disable;
33module_param(sha_disable, uint, 0444); 33module_param(sha_disable, uint, 0444);
34MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); 34MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
35 35
36
37/* List heads for the supported algorithms */ 36/* List heads for the supported algorithms */
38static LIST_HEAD(hash_algs); 37static LIST_HEAD(hash_algs);
39static LIST_HEAD(cipher_algs); 38static LIST_HEAD(cipher_algs);
@@ -48,6 +47,7 @@ struct ccp_crypto_queue {
48 struct list_head *backlog; 47 struct list_head *backlog;
49 unsigned int cmd_count; 48 unsigned int cmd_count;
50}; 49};
50
51#define CCP_CRYPTO_MAX_QLEN 100 51#define CCP_CRYPTO_MAX_QLEN 100
52 52
53static struct ccp_crypto_queue req_queue; 53static struct ccp_crypto_queue req_queue;
@@ -77,7 +77,6 @@ struct ccp_crypto_cpu {
77 int err; 77 int err;
78}; 78};
79 79
80
81static inline bool ccp_crypto_success(int err) 80static inline bool ccp_crypto_success(int err)
82{ 81{
83 if (err && (err != -EINPROGRESS) && (err != -EBUSY)) 82 if (err && (err != -EINPROGRESS) && (err != -EBUSY))
@@ -143,7 +142,7 @@ static void ccp_crypto_complete(void *data, int err)
143 int ret; 142 int ret;
144 143
145 if (err == -EINPROGRESS) { 144 if (err == -EINPROGRESS) {
146 /* Only propogate the -EINPROGRESS if necessary */ 145 /* Only propagate the -EINPROGRESS if necessary */
147 if (crypto_cmd->ret == -EBUSY) { 146 if (crypto_cmd->ret == -EBUSY) {
148 crypto_cmd->ret = -EINPROGRESS; 147 crypto_cmd->ret = -EINPROGRESS;
149 req->complete(req, -EINPROGRESS); 148 req->complete(req, -EINPROGRESS);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 96531571f7cf..507b34e0cc19 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -23,7 +23,6 @@
23 23
24#include "ccp-crypto.h" 24#include "ccp-crypto.h"
25 25
26
27static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) 26static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
28{ 27{
29 struct ahash_request *req = ahash_request_cast(async_req); 28 struct ahash_request *req = ahash_request_cast(async_req);
@@ -37,11 +36,13 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
37 if (rctx->hash_rem) { 36 if (rctx->hash_rem) {
38 /* Save remaining data to buffer */ 37 /* Save remaining data to buffer */
39 unsigned int offset = rctx->nbytes - rctx->hash_rem; 38 unsigned int offset = rctx->nbytes - rctx->hash_rem;
39
40 scatterwalk_map_and_copy(rctx->buf, rctx->src, 40 scatterwalk_map_and_copy(rctx->buf, rctx->src,
41 offset, rctx->hash_rem, 0); 41 offset, rctx->hash_rem, 0);
42 rctx->buf_count = rctx->hash_rem; 42 rctx->buf_count = rctx->hash_rem;
43 } else 43 } else {
44 rctx->buf_count = 0; 44 rctx->buf_count = 0;
45 }
45 46
46 /* Update result area if supplied */ 47 /* Update result area if supplied */
47 if (req->result) 48 if (req->result)
@@ -227,8 +228,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
227 } 228 }
228 229
229 key_len = digest_size; 230 key_len = digest_size;
230 } else 231 } else {
231 memcpy(ctx->u.sha.key, key, key_len); 232 memcpy(ctx->u.sha.key, key, key_len);
233 }
232 234
233 for (i = 0; i < block_size; i++) { 235 for (i = 0; i < block_size; i++) {
234 ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; 236 ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
@@ -355,7 +357,7 @@ static int ccp_register_hmac_alg(struct list_head *head,
355 ret = crypto_register_ahash(alg); 357 ret = crypto_register_ahash(alg);
356 if (ret) { 358 if (ret) {
357 pr_err("%s ahash algorithm registration error (%d)\n", 359 pr_err("%s ahash algorithm registration error (%d)\n",
358 base->cra_name, ret); 360 base->cra_name, ret);
359 kfree(ccp_alg); 361 kfree(ccp_alg);
360 return ret; 362 return ret;
361 } 363 }
@@ -410,7 +412,7 @@ static int ccp_register_sha_alg(struct list_head *head,
410 ret = crypto_register_ahash(alg); 412 ret = crypto_register_ahash(alg);
411 if (ret) { 413 if (ret) {
412 pr_err("%s ahash algorithm registration error (%d)\n", 414 pr_err("%s ahash algorithm registration error (%d)\n",
413 base->cra_name, ret); 415 base->cra_name, ret);
414 kfree(ccp_alg); 416 kfree(ccp_alg);
415 return ret; 417 return ret;
416 } 418 }
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 9aa4ae184f7f..76a96f0f44c6 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -13,7 +13,6 @@
13#ifndef __CCP_CRYPTO_H__ 13#ifndef __CCP_CRYPTO_H__
14#define __CCP_CRYPTO_H__ 14#define __CCP_CRYPTO_H__
15 15
16
17#include <linux/list.h> 16#include <linux/list.h>
18#include <linux/wait.h> 17#include <linux/wait.h>
19#include <linux/pci.h> 18#include <linux/pci.h>
@@ -25,7 +24,6 @@
25#include <crypto/hash.h> 24#include <crypto/hash.h>
26#include <crypto/sha.h> 25#include <crypto/sha.h>
27 26
28
29#define CCP_CRA_PRIORITY 300 27#define CCP_CRA_PRIORITY 300
30 28
31struct ccp_crypto_ablkcipher_alg { 29struct ccp_crypto_ablkcipher_alg {
@@ -68,7 +66,6 @@ static inline struct ccp_crypto_ahash_alg *
68 return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); 66 return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
69} 67}
70 68
71
72/***** AES related defines *****/ 69/***** AES related defines *****/
73struct ccp_aes_ctx { 70struct ccp_aes_ctx {
74 /* Fallback cipher for XTS with unsupported unit sizes */ 71 /* Fallback cipher for XTS with unsupported unit sizes */
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index ca29c120b85f..861bacc1bb94 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -37,7 +37,6 @@ struct ccp_tasklet_data {
37 struct ccp_cmd *cmd; 37 struct ccp_cmd *cmd;
38}; 38};
39 39
40
41static struct ccp_device *ccp_dev; 40static struct ccp_device *ccp_dev;
42static inline struct ccp_device *ccp_get_device(void) 41static inline struct ccp_device *ccp_get_device(void)
43{ 42{
@@ -296,11 +295,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
296{ 295{
297 struct ccp_device *ccp; 296 struct ccp_device *ccp;
298 297
299 ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); 298 ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
300 if (ccp == NULL) { 299 if (!ccp)
301 dev_err(dev, "unable to allocate device struct\n");
302 return NULL; 300 return NULL;
303 }
304 ccp->dev = dev; 301 ccp->dev = dev;
305 302
306 INIT_LIST_HEAD(&ccp->cmd); 303 INIT_LIST_HEAD(&ccp->cmd);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 62ff35a6b9ec..6ff89031fb96 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -21,7 +21,7 @@
21#include <linux/wait.h> 21#include <linux/wait.h>
22#include <linux/dmapool.h> 22#include <linux/dmapool.h>
23#include <linux/hw_random.h> 23#include <linux/hw_random.h>
24 24#include <linux/bitops.h>
25 25
26#define MAX_DMAPOOL_NAME_LEN 32 26#define MAX_DMAPOOL_NAME_LEN 32
27 27
@@ -33,7 +33,6 @@
33#define CACHE_NONE 0x00 33#define CACHE_NONE 0x00
34#define CACHE_WB_NO_ALLOC 0xb7 34#define CACHE_WB_NO_ALLOC 0xb7
35 35
36
37/****** Register Mappings ******/ 36/****** Register Mappings ******/
38#define Q_MASK_REG 0x000 37#define Q_MASK_REG 0x000
39#define TRNG_OUT_REG 0x00c 38#define TRNG_OUT_REG 0x00c
@@ -54,8 +53,8 @@
54#define CMD_Q_CACHE_BASE 0x228 53#define CMD_Q_CACHE_BASE 0x228
55#define CMD_Q_CACHE_INC 0x20 54#define CMD_Q_CACHE_INC 0x20
56 55
57#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f); 56#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
58#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f); 57#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f)
59 58
60/****** REQ0 Related Values ******/ 59/****** REQ0 Related Values ******/
61#define REQ0_WAIT_FOR_WRITE 0x00000004 60#define REQ0_WAIT_FOR_WRITE 0x00000004
@@ -103,7 +102,6 @@
103/****** REQ6 Related Values ******/ 102/****** REQ6 Related Values ******/
104#define REQ6_MEMTYPE_SHIFT 16 103#define REQ6_MEMTYPE_SHIFT 16
105 104
106
107/****** Key Storage Block ******/ 105/****** Key Storage Block ******/
108#define KSB_START 77 106#define KSB_START 77
109#define KSB_END 127 107#define KSB_END 127
@@ -114,7 +112,7 @@
114#define CCP_JOBID_MASK 0x0000003f 112#define CCP_JOBID_MASK 0x0000003f
115 113
116#define CCP_DMAPOOL_MAX_SIZE 64 114#define CCP_DMAPOOL_MAX_SIZE 64
117#define CCP_DMAPOOL_ALIGN (1 << 5) 115#define CCP_DMAPOOL_ALIGN BIT(5)
118 116
119#define CCP_REVERSE_BUF_SIZE 64 117#define CCP_REVERSE_BUF_SIZE 64
120 118
@@ -142,7 +140,6 @@
142#define CCP_ECC_RESULT_OFFSET 60 140#define CCP_ECC_RESULT_OFFSET 60
143#define CCP_ECC_RESULT_SUCCESS 0x0001 141#define CCP_ECC_RESULT_SUCCESS 0x0001
144 142
145
146struct ccp_device; 143struct ccp_device;
147struct ccp_cmd; 144struct ccp_cmd;
148 145
@@ -261,7 +258,6 @@ struct ccp_device {
261 unsigned int axcache; 258 unsigned int axcache;
262}; 259};
263 260
264
265int ccp_pci_init(void); 261int ccp_pci_init(void);
266void ccp_pci_exit(void); 262void ccp_pci_exit(void);
267 263
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 8729364261d7..71f2e3c89424 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -27,7 +27,6 @@
27 27
28#include "ccp-dev.h" 28#include "ccp-dev.h"
29 29
30
31enum ccp_memtype { 30enum ccp_memtype {
32 CCP_MEMTYPE_SYSTEM = 0, 31 CCP_MEMTYPE_SYSTEM = 0,
33 CCP_MEMTYPE_KSB, 32 CCP_MEMTYPE_KSB,
@@ -515,7 +514,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
515 if (!wa->dma_count) 514 if (!wa->dma_count)
516 return -ENOMEM; 515 return -ENOMEM;
517 516
518
519 return 0; 517 return 0;
520} 518}
521 519
@@ -763,8 +761,9 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
763 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; 761 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
764 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); 762 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
765 op_len = min(sg_src_len, sg_dst_len); 763 op_len = min(sg_src_len, sg_dst_len);
766 } else 764 } else {
767 op_len = sg_src_len; 765 op_len = sg_src_len;
766 }
768 767
769 /* The data operation length will be at least block_size in length 768 /* The data operation length will be at least block_size in length
770 * or the smaller of available sg room remaining for the source or 769 * or the smaller of available sg room remaining for the source or
@@ -1131,9 +1130,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1131 if (ret) 1130 if (ret)
1132 goto e_ctx; 1131 goto e_ctx;
1133 1132
1134 if (in_place) 1133 if (in_place) {
1135 dst = src; 1134 dst = src;
1136 else { 1135 } else {
1137 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, 1136 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138 AES_BLOCK_SIZE, DMA_FROM_DEVICE); 1137 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1139 if (ret) 1138 if (ret)
@@ -1304,9 +1303,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1304 if (ret) 1303 if (ret)
1305 goto e_ctx; 1304 goto e_ctx;
1306 1305
1307 if (in_place) 1306 if (in_place) {
1308 dst = src; 1307 dst = src;
1309 else { 1308 } else {
1310 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, 1309 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311 unit_size, DMA_FROM_DEVICE); 1310 unit_size, DMA_FROM_DEVICE);
1312 if (ret) 1311 if (ret)
@@ -1451,8 +1450,9 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1451 goto e_ctx; 1450 goto e_ctx;
1452 } 1451 }
1453 memcpy(ctx.address, init, CCP_SHA_CTXSIZE); 1452 memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1454 } else 1453 } else {
1455 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); 1454 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1455 }
1456 1456
1457 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, 1457 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458 CCP_PASSTHRU_BYTESWAP_256BIT); 1458 CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -1732,9 +1732,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1732 if (ret) 1732 if (ret)
1733 goto e_mask; 1733 goto e_mask;
1734 1734
1735 if (in_place) 1735 if (in_place) {
1736 dst = src; 1736 dst = src;
1737 else { 1737 } else {
1738 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, 1738 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); 1739 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740 if (ret) 1740 if (ret)
@@ -1974,7 +1974,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1974 src.address += CCP_ECC_OPERAND_SIZE; 1974 src.address += CCP_ECC_OPERAND_SIZE;
1975 1975
1976 /* Set the first point Z coordianate to 1 */ 1976 /* Set the first point Z coordianate to 1 */
1977 *(src.address) = 0x01; 1977 *src.address = 0x01;
1978 src.address += CCP_ECC_OPERAND_SIZE; 1978 src.address += CCP_ECC_OPERAND_SIZE;
1979 1979
1980 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { 1980 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
@@ -1989,7 +1989,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1989 src.address += CCP_ECC_OPERAND_SIZE; 1989 src.address += CCP_ECC_OPERAND_SIZE;
1990 1990
1991 /* Set the second point Z coordianate to 1 */ 1991 /* Set the second point Z coordianate to 1 */
1992 *(src.address) = 0x01; 1992 *src.address = 0x01;
1993 src.address += CCP_ECC_OPERAND_SIZE; 1993 src.address += CCP_ECC_OPERAND_SIZE;
1994 } else { 1994 } else {
1995 /* Copy the Domain "a" parameter */ 1995 /* Copy the Domain "a" parameter */
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index 7f89c946adfe..af190d4795a8 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -174,11 +174,10 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
174 if (!ccp) 174 if (!ccp)
175 goto e_err; 175 goto e_err;
176 176
177 ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL); 177 ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
178 if (!ccp_pci) { 178 if (!ccp_pci)
179 ret = -ENOMEM; 179 goto e_err;
180 goto e_free1; 180
181 }
182 ccp->dev_specific = ccp_pci; 181 ccp->dev_specific = ccp_pci;
183 ccp->get_irq = ccp_get_irqs; 182 ccp->get_irq = ccp_get_irqs;
184 ccp->free_irq = ccp_free_irqs; 183 ccp->free_irq = ccp_free_irqs;
@@ -186,7 +185,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
186 ret = pci_request_regions(pdev, "ccp"); 185 ret = pci_request_regions(pdev, "ccp");
187 if (ret) { 186 if (ret) {
188 dev_err(dev, "pci_request_regions failed (%d)\n", ret); 187 dev_err(dev, "pci_request_regions failed (%d)\n", ret);
189 goto e_free2; 188 goto e_err;
190 } 189 }
191 190
192 ret = pci_enable_device(pdev); 191 ret = pci_enable_device(pdev);
@@ -204,7 +203,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
204 203
205 ret = -EIO; 204 ret = -EIO;
206 ccp->io_map = pci_iomap(pdev, bar, 0); 205 ccp->io_map = pci_iomap(pdev, bar, 0);
207 if (ccp->io_map == NULL) { 206 if (!ccp->io_map) {
208 dev_err(dev, "pci_iomap failed\n"); 207 dev_err(dev, "pci_iomap failed\n");
209 goto e_device; 208 goto e_device;
210 } 209 }
@@ -239,12 +238,6 @@ e_device:
239e_regions: 238e_regions:
240 pci_release_regions(pdev); 239 pci_release_regions(pdev);
241 240
242e_free2:
243 kfree(ccp_pci);
244
245e_free1:
246 kfree(ccp);
247
248e_err: 241e_err:
249 dev_notice(dev, "initialization failed\n"); 242 dev_notice(dev, "initialization failed\n");
250 return ret; 243 return ret;
@@ -266,8 +259,6 @@ static void ccp_pci_remove(struct pci_dev *pdev)
266 259
267 pci_release_regions(pdev); 260 pci_release_regions(pdev);
268 261
269 kfree(ccp);
270
271 dev_notice(dev, "disabled\n"); 262 dev_notice(dev, "disabled\n");
272} 263}
273 264
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index 8c50bad25f7e..b1c20b2b5647 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -23,9 +23,15 @@
23#include <linux/delay.h> 23#include <linux/delay.h>
24#include <linux/ccp.h> 24#include <linux/ccp.h>
25#include <linux/of.h> 25#include <linux/of.h>
26#include <linux/of_address.h>
27#include <linux/acpi.h>
26 28
27#include "ccp-dev.h" 29#include "ccp-dev.h"
28 30
31struct ccp_platform {
32 int use_acpi;
33 int coherent;
34};
29 35
30static int ccp_get_irq(struct ccp_device *ccp) 36static int ccp_get_irq(struct ccp_device *ccp)
31{ 37{
@@ -84,10 +90,64 @@ static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
84 return NULL; 90 return NULL;
85} 91}
86 92
93#ifdef CONFIG_ACPI
94static int ccp_acpi_support(struct ccp_device *ccp)
95{
96 struct ccp_platform *ccp_platform = ccp->dev_specific;
97 struct acpi_device *adev = ACPI_COMPANION(ccp->dev);
98 acpi_handle handle;
99 acpi_status status;
100 unsigned long long data;
101 int cca;
102
103 /* Retrieve the device cache coherency value */
104 handle = adev->handle;
105 do {
106 status = acpi_evaluate_integer(handle, "_CCA", NULL, &data);
107 if (!ACPI_FAILURE(status)) {
108 cca = data;
109 break;
110 }
111 } while (!ACPI_FAILURE(status));
112
113 if (ACPI_FAILURE(status)) {
114 dev_err(ccp->dev, "error obtaining acpi coherency value\n");
115 return -EINVAL;
116 }
117
118 ccp_platform->coherent = !!cca;
119
120 return 0;
121}
122#else /* CONFIG_ACPI */
123static int ccp_acpi_support(struct ccp_device *ccp)
124{
125 return -EINVAL;
126}
127#endif
128
129#ifdef CONFIG_OF
130static int ccp_of_support(struct ccp_device *ccp)
131{
132 struct ccp_platform *ccp_platform = ccp->dev_specific;
133
134 ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node);
135
136 return 0;
137}
138#else
139static int ccp_of_support(struct ccp_device *ccp)
140{
141 return -EINVAL;
142}
143#endif
144
87static int ccp_platform_probe(struct platform_device *pdev) 145static int ccp_platform_probe(struct platform_device *pdev)
88{ 146{
89 struct ccp_device *ccp; 147 struct ccp_device *ccp;
148 struct ccp_platform *ccp_platform;
90 struct device *dev = &pdev->dev; 149 struct device *dev = &pdev->dev;
150 struct acpi_device *adev = ACPI_COMPANION(dev);
91 struct resource *ior; 151 struct resource *ior;
92 int ret; 152 int ret;
93 153
@@ -96,24 +156,40 @@ static int ccp_platform_probe(struct platform_device *pdev)
96 if (!ccp) 156 if (!ccp)
97 goto e_err; 157 goto e_err;
98 158
99 ccp->dev_specific = NULL; 159 ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
160 if (!ccp_platform)
161 goto e_err;
162
163 ccp->dev_specific = ccp_platform;
100 ccp->get_irq = ccp_get_irqs; 164 ccp->get_irq = ccp_get_irqs;
101 ccp->free_irq = ccp_free_irqs; 165 ccp->free_irq = ccp_free_irqs;
102 166
167 ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1;
168
103 ior = ccp_find_mmio_area(ccp); 169 ior = ccp_find_mmio_area(ccp);
104 ccp->io_map = devm_ioremap_resource(dev, ior); 170 ccp->io_map = devm_ioremap_resource(dev, ior);
105 if (IS_ERR(ccp->io_map)) { 171 if (IS_ERR(ccp->io_map)) {
106 ret = PTR_ERR(ccp->io_map); 172 ret = PTR_ERR(ccp->io_map);
107 goto e_free; 173 goto e_err;
108 } 174 }
109 ccp->io_regs = ccp->io_map; 175 ccp->io_regs = ccp->io_map;
110 176
111 if (!dev->dma_mask) 177 if (!dev->dma_mask)
112 dev->dma_mask = &dev->coherent_dma_mask; 178 dev->dma_mask = &dev->coherent_dma_mask;
113 *(dev->dma_mask) = DMA_BIT_MASK(48); 179 ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
114 dev->coherent_dma_mask = DMA_BIT_MASK(48); 180 if (ret) {
181 dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
182 goto e_err;
183 }
184
185 if (ccp_platform->use_acpi)
186 ret = ccp_acpi_support(ccp);
187 else
188 ret = ccp_of_support(ccp);
189 if (ret)
190 goto e_err;
115 191
116 if (of_property_read_bool(dev->of_node, "dma-coherent")) 192 if (ccp_platform->coherent)
117 ccp->axcache = CACHE_WB_NO_ALLOC; 193 ccp->axcache = CACHE_WB_NO_ALLOC;
118 else 194 else
119 ccp->axcache = CACHE_NONE; 195 ccp->axcache = CACHE_NONE;
@@ -122,15 +198,12 @@ static int ccp_platform_probe(struct platform_device *pdev)
122 198
123 ret = ccp_init(ccp); 199 ret = ccp_init(ccp);
124 if (ret) 200 if (ret)
125 goto e_free; 201 goto e_err;
126 202
127 dev_notice(dev, "enabled\n"); 203 dev_notice(dev, "enabled\n");
128 204
129 return 0; 205 return 0;
130 206
131e_free:
132 kfree(ccp);
133
134e_err: 207e_err:
135 dev_notice(dev, "initialization failed\n"); 208 dev_notice(dev, "initialization failed\n");
136 return ret; 209 return ret;
@@ -143,8 +216,6 @@ static int ccp_platform_remove(struct platform_device *pdev)
143 216
144 ccp_destroy(ccp); 217 ccp_destroy(ccp);
145 218
146 kfree(ccp);
147
148 dev_notice(dev, "disabled\n"); 219 dev_notice(dev, "disabled\n");
149 220
150 return 0; 221 return 0;
@@ -200,15 +271,29 @@ static int ccp_platform_resume(struct platform_device *pdev)
200} 271}
201#endif 272#endif
202 273
203static const struct of_device_id ccp_platform_ids[] = { 274#ifdef CONFIG_ACPI
275static const struct acpi_device_id ccp_acpi_match[] = {
276 { "AMDI0C00", 0 },
277 { },
278};
279#endif
280
281#ifdef CONFIG_OF
282static const struct of_device_id ccp_of_match[] = {
204 { .compatible = "amd,ccp-seattle-v1a" }, 283 { .compatible = "amd,ccp-seattle-v1a" },
205 { }, 284 { },
206}; 285};
286#endif
207 287
208static struct platform_driver ccp_platform_driver = { 288static struct platform_driver ccp_platform_driver = {
209 .driver = { 289 .driver = {
210 .name = "AMD Cryptographic Coprocessor", 290 .name = "AMD Cryptographic Coprocessor",
211 .of_match_table = ccp_platform_ids, 291#ifdef CONFIG_ACPI
292 .acpi_match_table = ccp_acpi_match,
293#endif
294#ifdef CONFIG_OF
295 .of_match_table = ccp_of_match,
296#endif
212 }, 297 },
213 .probe = ccp_platform_probe, 298 .probe = ccp_platform_probe,
214 .remove = ccp_platform_remove, 299 .remove = ccp_platform_remove,
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
new file mode 100644
index 000000000000..ad47d0d61098
--- /dev/null
+++ b/drivers/crypto/img-hash.c
@@ -0,0 +1,1029 @@
1/*
2 * Copyright (c) 2014 Imagination Technologies
3 * Authors: Will Thomas, James Hartley
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * Interface structure taken from omap-sham driver
10 */
11
12#include <linux/clk.h>
13#include <linux/dmaengine.h>
14#include <linux/interrupt.h>
15#include <linux/io.h>
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/of_device.h>
19#include <linux/platform_device.h>
20#include <linux/scatterlist.h>
21
22#include <crypto/internal/hash.h>
23#include <crypto/md5.h>
24#include <crypto/sha.h>
25
26#define CR_RESET 0
27#define CR_RESET_SET 1
28#define CR_RESET_UNSET 0
29
30#define CR_MESSAGE_LENGTH_H 0x4
31#define CR_MESSAGE_LENGTH_L 0x8
32
33#define CR_CONTROL 0xc
34#define CR_CONTROL_BYTE_ORDER_3210 0
35#define CR_CONTROL_BYTE_ORDER_0123 1
36#define CR_CONTROL_BYTE_ORDER_2310 2
37#define CR_CONTROL_BYTE_ORDER_1032 3
38#define CR_CONTROL_BYTE_ORDER_SHIFT 8
39#define CR_CONTROL_ALGO_MD5 0
40#define CR_CONTROL_ALGO_SHA1 1
41#define CR_CONTROL_ALGO_SHA224 2
42#define CR_CONTROL_ALGO_SHA256 3
43
44#define CR_INTSTAT 0x10
45#define CR_INTENAB 0x14
46#define CR_INTCLEAR 0x18
47#define CR_INT_RESULTS_AVAILABLE BIT(0)
48#define CR_INT_NEW_RESULTS_SET BIT(1)
49#define CR_INT_RESULT_READ_ERR BIT(2)
50#define CR_INT_MESSAGE_WRITE_ERROR BIT(3)
51#define CR_INT_STATUS BIT(8)
52
53#define CR_RESULT_QUEUE 0x1c
54#define CR_RSD0 0x40
55#define CR_CORE_REV 0x50
56#define CR_CORE_DES1 0x60
57#define CR_CORE_DES2 0x70
58
59#define DRIVER_FLAGS_BUSY BIT(0)
60#define DRIVER_FLAGS_FINAL BIT(1)
61#define DRIVER_FLAGS_DMA_ACTIVE BIT(2)
62#define DRIVER_FLAGS_OUTPUT_READY BIT(3)
63#define DRIVER_FLAGS_INIT BIT(4)
64#define DRIVER_FLAGS_CPU BIT(5)
65#define DRIVER_FLAGS_DMA_READY BIT(6)
66#define DRIVER_FLAGS_ERROR BIT(7)
67#define DRIVER_FLAGS_SG BIT(8)
68#define DRIVER_FLAGS_SHA1 BIT(18)
69#define DRIVER_FLAGS_SHA224 BIT(19)
70#define DRIVER_FLAGS_SHA256 BIT(20)
71#define DRIVER_FLAGS_MD5 BIT(21)
72
73#define IMG_HASH_QUEUE_LENGTH 20
74#define IMG_HASH_DMA_THRESHOLD 64
75
76#ifdef __LITTLE_ENDIAN
77#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_3210
78#else
79#define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_0123
80#endif
81
82struct img_hash_dev;
83
84struct img_hash_request_ctx {
85 struct img_hash_dev *hdev;
86 u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
87 unsigned long flags;
88 size_t digsize;
89
90 dma_addr_t dma_addr;
91 size_t dma_ct;
92
93 /* sg root */
94 struct scatterlist *sgfirst;
95 /* walk state */
96 struct scatterlist *sg;
97 size_t nents;
98 size_t offset;
99 unsigned int total;
100 size_t sent;
101
102 unsigned long op;
103
104 size_t bufcnt;
105 u8 buffer[0] __aligned(sizeof(u32));
106 struct ahash_request fallback_req;
107};
108
109struct img_hash_ctx {
110 struct img_hash_dev *hdev;
111 unsigned long flags;
112 struct crypto_ahash *fallback;
113};
114
115struct img_hash_dev {
116 struct list_head list;
117 struct device *dev;
118 struct clk *hash_clk;
119 struct clk *sys_clk;
120 void __iomem *io_base;
121
122 phys_addr_t bus_addr;
123 void __iomem *cpu_addr;
124
125 spinlock_t lock;
126 int err;
127 struct tasklet_struct done_task;
128 struct tasklet_struct dma_task;
129
130 unsigned long flags;
131 struct crypto_queue queue;
132 struct ahash_request *req;
133
134 struct dma_chan *dma_lch;
135};
136
137struct img_hash_drv {
138 struct list_head dev_list;
139 spinlock_t lock;
140};
141
142static struct img_hash_drv img_hash = {
143 .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
144 .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
145};
146
147static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
148{
149 return readl_relaxed(hdev->io_base + offset);
150}
151
152static inline void img_hash_write(struct img_hash_dev *hdev,
153 u32 offset, u32 value)
154{
155 writel_relaxed(value, hdev->io_base + offset);
156}
157
158static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
159{
160 return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
161}
162
163static void img_hash_start(struct img_hash_dev *hdev, bool dma)
164{
165 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
166 u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
167
168 if (ctx->flags & DRIVER_FLAGS_MD5)
169 cr |= CR_CONTROL_ALGO_MD5;
170 else if (ctx->flags & DRIVER_FLAGS_SHA1)
171 cr |= CR_CONTROL_ALGO_SHA1;
172 else if (ctx->flags & DRIVER_FLAGS_SHA224)
173 cr |= CR_CONTROL_ALGO_SHA224;
174 else if (ctx->flags & DRIVER_FLAGS_SHA256)
175 cr |= CR_CONTROL_ALGO_SHA256;
176 dev_dbg(hdev->dev, "Starting hash process\n");
177 img_hash_write(hdev, CR_CONTROL, cr);
178
179 /*
180 * The hardware block requires two cycles between writing the control
181 * register and writing the first word of data in non DMA mode, to
182 * ensure the first data write is not grouped in burst with the control
183 * register write a read is issued to 'flush' the bus.
184 */
185 if (!dma)
186 img_hash_read(hdev, CR_CONTROL);
187}
188
189static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
190 size_t length, int final)
191{
192 u32 count, len32;
193 const u32 *buffer = (const u32 *)buf;
194
195 dev_dbg(hdev->dev, "xmit_cpu: length: %zu bytes\n", length);
196
197 if (final)
198 hdev->flags |= DRIVER_FLAGS_FINAL;
199
200 len32 = DIV_ROUND_UP(length, sizeof(u32));
201
202 for (count = 0; count < len32; count++)
203 writel_relaxed(buffer[count], hdev->cpu_addr);
204
205 return -EINPROGRESS;
206}
207
208static void img_hash_dma_callback(void *data)
209{
210 struct img_hash_dev *hdev = (struct img_hash_dev *)data;
211 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
212
213 if (ctx->bufcnt) {
214 img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
215 ctx->bufcnt = 0;
216 }
217 if (ctx->sg)
218 tasklet_schedule(&hdev->dma_task);
219}
220
221static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
222{
223 struct dma_async_tx_descriptor *desc;
224 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
225
226 ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
227 if (ctx->dma_ct == 0) {
228 dev_err(hdev->dev, "Invalid DMA sg\n");
229 hdev->err = -EINVAL;
230 return -EINVAL;
231 }
232
233 desc = dmaengine_prep_slave_sg(hdev->dma_lch,
234 sg,
235 ctx->dma_ct,
236 DMA_MEM_TO_DEV,
237 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
238 if (!desc) {
239 dev_err(hdev->dev, "Null DMA descriptor\n");
240 hdev->err = -EINVAL;
241 dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
242 return -EINVAL;
243 }
244 desc->callback = img_hash_dma_callback;
245 desc->callback_param = hdev;
246 dmaengine_submit(desc);
247 dma_async_issue_pending(hdev->dma_lch);
248
249 return 0;
250}
251
252static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
253{
254 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
255
256 ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
257 ctx->buffer, hdev->req->nbytes);
258
259 ctx->total = hdev->req->nbytes;
260 ctx->bufcnt = 0;
261
262 hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
263
264 img_hash_start(hdev, false);
265
266 return img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
267}
268
269static int img_hash_finish(struct ahash_request *req)
270{
271 struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
272
273 if (!req->result)
274 return -EINVAL;
275
276 memcpy(req->result, ctx->digest, ctx->digsize);
277
278 return 0;
279}
280
281static void img_hash_copy_hash(struct ahash_request *req)
282{
283 struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
284 u32 *hash = (u32 *)ctx->digest;
285 int i;
286
287 for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
288 hash[i] = img_hash_read_result_queue(ctx->hdev);
289}
290
291static void img_hash_finish_req(struct ahash_request *req, int err)
292{
293 struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
294 struct img_hash_dev *hdev = ctx->hdev;
295
296 if (!err) {
297 img_hash_copy_hash(req);
298 if (DRIVER_FLAGS_FINAL & hdev->flags)
299 err = img_hash_finish(req);
300 } else {
301 dev_warn(hdev->dev, "Hash failed with error %d\n", err);
302 ctx->flags |= DRIVER_FLAGS_ERROR;
303 }
304
305 hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
306 DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
307
308 if (req->base.complete)
309 req->base.complete(&req->base, err);
310}
311
312static int img_hash_write_via_dma(struct img_hash_dev *hdev)
313{
314 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
315
316 img_hash_start(hdev, true);
317
318 dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
319
320 if (!ctx->total)
321 hdev->flags |= DRIVER_FLAGS_FINAL;
322
323 hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
324
325 tasklet_schedule(&hdev->dma_task);
326
327 return -EINPROGRESS;
328}
329
330static int img_hash_dma_init(struct img_hash_dev *hdev)
331{
332 struct dma_slave_config dma_conf;
333 int err = -EINVAL;
334
335 hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
336 if (!hdev->dma_lch) {
337 dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
338 return -EBUSY;
339 }
340 dma_conf.direction = DMA_MEM_TO_DEV;
341 dma_conf.dst_addr = hdev->bus_addr;
342 dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
343 dma_conf.dst_maxburst = 16;
344 dma_conf.device_fc = false;
345
346 err = dmaengine_slave_config(hdev->dma_lch, &dma_conf);
347 if (err) {
348 dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
349 dma_release_channel(hdev->dma_lch);
350 return err;
351 }
352
353 return 0;
354}
355
356static void img_hash_dma_task(unsigned long d)
357{
358 struct img_hash_dev *hdev = (struct img_hash_dev *)d;
359 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
360 u8 *addr;
361 size_t nbytes, bleft, wsend, len, tbc;
362 struct scatterlist tsg;
363
364 if (!ctx->sg)
365 return;
366
367 addr = sg_virt(ctx->sg);
368 nbytes = ctx->sg->length - ctx->offset;
369
370 /*
371 * The hash accelerator does not support a data valid mask. This means
372 * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
373 * padding bytes in the last word written by that dma would erroneously
374 * be included in the hash. To avoid this we round down the transfer,
375 * and add the excess to the start of the next dma. It does not matter
376 * that the final dma may not be a multiple of 4 bytes as the hashing
377 * block is programmed to accept the correct number of bytes.
378 */
379
380 bleft = nbytes % 4;
381 wsend = (nbytes / 4);
382
383 if (wsend) {
384 sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
385 if (img_hash_xmit_dma(hdev, &tsg)) {
386 dev_err(hdev->dev, "DMA failed, falling back to CPU");
387 ctx->flags |= DRIVER_FLAGS_CPU;
388 hdev->err = 0;
389 img_hash_xmit_cpu(hdev, addr + ctx->offset,
390 wsend * 4, 0);
391 ctx->sent += wsend * 4;
392 wsend = 0;
393 } else {
394 ctx->sent += wsend * 4;
395 }
396 }
397
398 if (bleft) {
399 ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
400 ctx->buffer, bleft, ctx->sent);
401 tbc = 0;
402 ctx->sg = sg_next(ctx->sg);
403 while (ctx->sg && (ctx->bufcnt < 4)) {
404 len = ctx->sg->length;
405 if (likely(len > (4 - ctx->bufcnt)))
406 len = 4 - ctx->bufcnt;
407 tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
408 ctx->buffer + ctx->bufcnt, len,
409 ctx->sent + ctx->bufcnt);
410 ctx->bufcnt += tbc;
411 if (tbc >= ctx->sg->length) {
412 ctx->sg = sg_next(ctx->sg);
413 tbc = 0;
414 }
415 }
416
417 ctx->sent += ctx->bufcnt;
418 ctx->offset = tbc;
419
420 if (!wsend)
421 img_hash_dma_callback(hdev);
422 } else {
423 ctx->offset = 0;
424 ctx->sg = sg_next(ctx->sg);
425 }
426}
427
428static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
429{
430 struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
431
432 if (ctx->flags & DRIVER_FLAGS_SG)
433 dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
434
435 return 0;
436}
437
438static int img_hash_process_data(struct img_hash_dev *hdev)
439{
440 struct ahash_request *req = hdev->req;
441 struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
442 int err = 0;
443
444 ctx->bufcnt = 0;
445
446 if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
447 dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
448 req->nbytes);
449 err = img_hash_write_via_dma(hdev);
450 } else {
451 dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
452 req->nbytes);
453 err = img_hash_write_via_cpu(hdev);
454 }
455 return err;
456}
457
458static int img_hash_hw_init(struct img_hash_dev *hdev)
459{
460 unsigned long long nbits;
461 u32 u, l;
462
463 img_hash_write(hdev, CR_RESET, CR_RESET_SET);
464 img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
465 img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
466
467 nbits = (u64)hdev->req->nbytes << 3;
468 u = nbits >> 32;
469 l = nbits;
470 img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
471 img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
472
473 if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
474 hdev->flags |= DRIVER_FLAGS_INIT;
475 hdev->err = 0;
476 }
477 dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
478 return 0;
479}
480
481static int img_hash_init(struct ahash_request *req)
482{
483 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
484 struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
485 struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
486
487 ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
488 rctx->fallback_req.base.flags = req->base.flags
489 & CRYPTO_TFM_REQ_MAY_SLEEP;
490
491 return crypto_ahash_init(&rctx->fallback_req);
492}
493
494static int img_hash_handle_queue(struct img_hash_dev *hdev,
495 struct ahash_request *req)
496{
497 struct crypto_async_request *async_req, *backlog;
498 struct img_hash_request_ctx *ctx;
499 unsigned long flags;
500 int err = 0, res = 0;
501
502 spin_lock_irqsave(&hdev->lock, flags);
503
504 if (req)
505 res = ahash_enqueue_request(&hdev->queue, req);
506
507 if (DRIVER_FLAGS_BUSY & hdev->flags) {
508 spin_unlock_irqrestore(&hdev->lock, flags);
509 return res;
510 }
511
512 backlog = crypto_get_backlog(&hdev->queue);
513 async_req = crypto_dequeue_request(&hdev->queue);
514 if (async_req)
515 hdev->flags |= DRIVER_FLAGS_BUSY;
516
517 spin_unlock_irqrestore(&hdev->lock, flags);
518
519 if (!async_req)
520 return res;
521
522 if (backlog)
523 backlog->complete(backlog, -EINPROGRESS);
524
525 req = ahash_request_cast(async_req);
526 hdev->req = req;
527
528 ctx = ahash_request_ctx(req);
529
530 dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
531 ctx->op, req->nbytes);
532
533 err = img_hash_hw_init(hdev);
534
535 if (!err)
536 err = img_hash_process_data(hdev);
537
538 if (err != -EINPROGRESS) {
539 /* done_task will not finish so do it here */
540 img_hash_finish_req(req, err);
541 }
542 return res;
543}
544
545static int img_hash_update(struct ahash_request *req)
546{
547 struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
548 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
549 struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
550
551 ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
552 rctx->fallback_req.base.flags = req->base.flags
553 & CRYPTO_TFM_REQ_MAY_SLEEP;
554 rctx->fallback_req.nbytes = req->nbytes;
555 rctx->fallback_req.src = req->src;
556
557 return crypto_ahash_update(&rctx->fallback_req);
558}
559
560static int img_hash_final(struct ahash_request *req)
561{
562 struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
563 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
564 struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
565
566 ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
567 rctx->fallback_req.base.flags = req->base.flags
568 & CRYPTO_TFM_REQ_MAY_SLEEP;
569 rctx->fallback_req.result = req->result;
570
571 return crypto_ahash_final(&rctx->fallback_req);
572}
573
574static int img_hash_finup(struct ahash_request *req)
575{
576 struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
577 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
578 struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
579
580 ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
581 rctx->fallback_req.base.flags = req->base.flags
582 & CRYPTO_TFM_REQ_MAY_SLEEP;
583 rctx->fallback_req.nbytes = req->nbytes;
584 rctx->fallback_req.src = req->src;
585 rctx->fallback_req.result = req->result;
586
587 return crypto_ahash_finup(&rctx->fallback_req);
588}
589
590static int img_hash_digest(struct ahash_request *req)
591{
592 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
593 struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
594 struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
595 struct img_hash_dev *hdev = NULL;
596 struct img_hash_dev *tmp;
597 int err;
598
599 spin_lock(&img_hash.lock);
600 if (!tctx->hdev) {
601 list_for_each_entry(tmp, &img_hash.dev_list, list) {
602 hdev = tmp;
603 break;
604 }
605 tctx->hdev = hdev;
606
607 } else {
608 hdev = tctx->hdev;
609 }
610
611 spin_unlock(&img_hash.lock);
612 ctx->hdev = hdev;
613 ctx->flags = 0;
614 ctx->digsize = crypto_ahash_digestsize(tfm);
615
616 switch (ctx->digsize) {
617 case SHA1_DIGEST_SIZE:
618 ctx->flags |= DRIVER_FLAGS_SHA1;
619 break;
620 case SHA256_DIGEST_SIZE:
621 ctx->flags |= DRIVER_FLAGS_SHA256;
622 break;
623 case SHA224_DIGEST_SIZE:
624 ctx->flags |= DRIVER_FLAGS_SHA224;
625 break;
626 case MD5_DIGEST_SIZE:
627 ctx->flags |= DRIVER_FLAGS_MD5;
628 break;
629 default:
630 return -EINVAL;
631 }
632
633 ctx->bufcnt = 0;
634 ctx->offset = 0;
635 ctx->sent = 0;
636 ctx->total = req->nbytes;
637 ctx->sg = req->src;
638 ctx->sgfirst = req->src;
639 ctx->nents = sg_nents(ctx->sg);
640
641 err = img_hash_handle_queue(tctx->hdev, req);
642
643 return err;
644}
645
646static int img_hash_cra_init(struct crypto_tfm *tfm)
647{
648 struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
649 const char *alg_name = crypto_tfm_alg_name(tfm);
650 int err = -ENOMEM;
651
652 ctx->fallback = crypto_alloc_ahash(alg_name, 0,
653 CRYPTO_ALG_NEED_FALLBACK);
654 if (IS_ERR(ctx->fallback)) {
655 pr_err("img_hash: Could not load fallback driver.\n");
656 err = PTR_ERR(ctx->fallback);
657 goto err;
658 }
659 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
660 sizeof(struct img_hash_request_ctx) +
661 IMG_HASH_DMA_THRESHOLD);
662
663 return 0;
664
665err:
666 return err;
667}
668
669static void img_hash_cra_exit(struct crypto_tfm *tfm)
670{
671 struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
672
673 crypto_free_ahash(tctx->fallback);
674}
675
676static irqreturn_t img_irq_handler(int irq, void *dev_id)
677{
678 struct img_hash_dev *hdev = dev_id;
679 u32 reg;
680
681 reg = img_hash_read(hdev, CR_INTSTAT);
682 img_hash_write(hdev, CR_INTCLEAR, reg);
683
684 if (reg & CR_INT_NEW_RESULTS_SET) {
685 dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
686 if (DRIVER_FLAGS_BUSY & hdev->flags) {
687 hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
688 if (!(DRIVER_FLAGS_CPU & hdev->flags))
689 hdev->flags |= DRIVER_FLAGS_DMA_READY;
690 tasklet_schedule(&hdev->done_task);
691 } else {
692 dev_warn(hdev->dev,
693 "HASH interrupt when no active requests.\n");
694 }
695 } else if (reg & CR_INT_RESULTS_AVAILABLE) {
696 dev_warn(hdev->dev,
697 "IRQ triggered before the hash had completed\n");
698 } else if (reg & CR_INT_RESULT_READ_ERR) {
699 dev_warn(hdev->dev,
700 "Attempt to read from an empty result queue\n");
701 } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
702 dev_warn(hdev->dev,
703 "Data written before the hardware was configured\n");
704 }
705 return IRQ_HANDLED;
706}
707
708static struct ahash_alg img_algs[] = {
709 {
710 .init = img_hash_init,
711 .update = img_hash_update,
712 .final = img_hash_final,
713 .finup = img_hash_finup,
714 .digest = img_hash_digest,
715 .halg = {
716 .digestsize = MD5_DIGEST_SIZE,
717 .base = {
718 .cra_name = "md5",
719 .cra_driver_name = "img-md5",
720 .cra_priority = 300,
721 .cra_flags =
722 CRYPTO_ALG_ASYNC |
723 CRYPTO_ALG_NEED_FALLBACK,
724 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
725 .cra_ctxsize = sizeof(struct img_hash_ctx),
726 .cra_init = img_hash_cra_init,
727 .cra_exit = img_hash_cra_exit,
728 .cra_module = THIS_MODULE,
729 }
730 }
731 },
732 {
733 .init = img_hash_init,
734 .update = img_hash_update,
735 .final = img_hash_final,
736 .finup = img_hash_finup,
737 .digest = img_hash_digest,
738 .halg = {
739 .digestsize = SHA1_DIGEST_SIZE,
740 .base = {
741 .cra_name = "sha1",
742 .cra_driver_name = "img-sha1",
743 .cra_priority = 300,
744 .cra_flags =
745 CRYPTO_ALG_ASYNC |
746 CRYPTO_ALG_NEED_FALLBACK,
747 .cra_blocksize = SHA1_BLOCK_SIZE,
748 .cra_ctxsize = sizeof(struct img_hash_ctx),
749 .cra_init = img_hash_cra_init,
750 .cra_exit = img_hash_cra_exit,
751 .cra_module = THIS_MODULE,
752 }
753 }
754 },
755 {
756 .init = img_hash_init,
757 .update = img_hash_update,
758 .final = img_hash_final,
759 .finup = img_hash_finup,
760 .digest = img_hash_digest,
761 .halg = {
762 .digestsize = SHA224_DIGEST_SIZE,
763 .base = {
764 .cra_name = "sha224",
765 .cra_driver_name = "img-sha224",
766 .cra_priority = 300,
767 .cra_flags =
768 CRYPTO_ALG_ASYNC |
769 CRYPTO_ALG_NEED_FALLBACK,
770 .cra_blocksize = SHA224_BLOCK_SIZE,
771 .cra_ctxsize = sizeof(struct img_hash_ctx),
772 .cra_init = img_hash_cra_init,
773 .cra_exit = img_hash_cra_exit,
774 .cra_module = THIS_MODULE,
775 }
776 }
777 },
778 {
779 .init = img_hash_init,
780 .update = img_hash_update,
781 .final = img_hash_final,
782 .finup = img_hash_finup,
783 .digest = img_hash_digest,
784 .halg = {
785 .digestsize = SHA256_DIGEST_SIZE,
786 .base = {
787 .cra_name = "sha256",
788 .cra_driver_name = "img-sha256",
789 .cra_priority = 300,
790 .cra_flags =
791 CRYPTO_ALG_ASYNC |
792 CRYPTO_ALG_NEED_FALLBACK,
793 .cra_blocksize = SHA256_BLOCK_SIZE,
794 .cra_ctxsize = sizeof(struct img_hash_ctx),
795 .cra_init = img_hash_cra_init,
796 .cra_exit = img_hash_cra_exit,
797 .cra_module = THIS_MODULE,
798 }
799 }
800 }
801};
802
803static int img_register_algs(struct img_hash_dev *hdev)
804{
805 int i, err;
806
807 for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
808 err = crypto_register_ahash(&img_algs[i]);
809 if (err)
810 goto err_reg;
811 }
812 return 0;
813
814err_reg:
815 for (; i--; )
816 crypto_unregister_ahash(&img_algs[i]);
817
818 return err;
819}
820
821static int img_unregister_algs(struct img_hash_dev *hdev)
822{
823 int i;
824
825 for (i = 0; i < ARRAY_SIZE(img_algs); i++)
826 crypto_unregister_ahash(&img_algs[i]);
827 return 0;
828}
829
830static void img_hash_done_task(unsigned long data)
831{
832 struct img_hash_dev *hdev = (struct img_hash_dev *)data;
833 int err = 0;
834
835 if (hdev->err == -EINVAL) {
836 err = hdev->err;
837 goto finish;
838 }
839
840 if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
841 img_hash_handle_queue(hdev, NULL);
842 return;
843 }
844
845 if (DRIVER_FLAGS_CPU & hdev->flags) {
846 if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
847 hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
848 goto finish;
849 }
850 } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
851 if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
852 hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
853 img_hash_write_via_dma_stop(hdev);
854 if (hdev->err) {
855 err = hdev->err;
856 goto finish;
857 }
858 }
859 if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
860 hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
861 DRIVER_FLAGS_OUTPUT_READY);
862 goto finish;
863 }
864 }
865 return;
866
867finish:
868 img_hash_finish_req(hdev->req, err);
869}
870
871static const struct of_device_id img_hash_match[] = {
872 { .compatible = "img,hash-accelerator" },
873 {}
874};
875MODULE_DEVICE_TABLE(of, img_hash_match);
876
877static int img_hash_probe(struct platform_device *pdev)
878{
879 struct img_hash_dev *hdev;
880 struct device *dev = &pdev->dev;
881 struct resource *hash_res;
882 int irq;
883 int err;
884
885 hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
886 if (hdev == NULL)
887 return -ENOMEM;
888
889 spin_lock_init(&hdev->lock);
890
891 hdev->dev = dev;
892
893 platform_set_drvdata(pdev, hdev);
894
895 INIT_LIST_HEAD(&hdev->list);
896
897 tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
898 tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
899
900 crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
901
902 /* Register bank */
903 hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
904
905 hdev->io_base = devm_ioremap_resource(dev, hash_res);
906 if (IS_ERR(hdev->io_base)) {
907 err = PTR_ERR(hdev->io_base);
908 dev_err(dev, "can't ioremap, returned %d\n", err);
909
910 goto res_err;
911 }
912
913 /* Write port (DMA or CPU) */
914 hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
915 hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
916 if (IS_ERR(hdev->cpu_addr)) {
917 dev_err(dev, "can't ioremap write port\n");
918 err = PTR_ERR(hdev->cpu_addr);
919 goto res_err;
920 }
921 hdev->bus_addr = hash_res->start;
922
923 irq = platform_get_irq(pdev, 0);
924 if (irq < 0) {
925 dev_err(dev, "no IRQ resource info\n");
926 err = irq;
927 goto res_err;
928 }
929
930 err = devm_request_irq(dev, irq, img_irq_handler, 0,
931 dev_name(dev), hdev);
932 if (err) {
933 dev_err(dev, "unable to request irq\n");
934 goto res_err;
935 }
936 dev_dbg(dev, "using IRQ channel %d\n", irq);
937
938 hdev->hash_clk = devm_clk_get(&pdev->dev, "hash");
939 if (IS_ERR(hdev->hash_clk)) {
940 dev_err(dev, "clock initialization failed.\n");
941 err = PTR_ERR(hdev->hash_clk);
942 goto res_err;
943 }
944
945 hdev->sys_clk = devm_clk_get(&pdev->dev, "sys");
946 if (IS_ERR(hdev->sys_clk)) {
947 dev_err(dev, "clock initialization failed.\n");
948 err = PTR_ERR(hdev->sys_clk);
949 goto res_err;
950 }
951
952 err = clk_prepare_enable(hdev->hash_clk);
953 if (err)
954 goto res_err;
955
956 err = clk_prepare_enable(hdev->sys_clk);
957 if (err)
958 goto clk_err;
959
960 err = img_hash_dma_init(hdev);
961 if (err)
962 goto dma_err;
963
964 dev_dbg(dev, "using %s for DMA transfers\n",
965 dma_chan_name(hdev->dma_lch));
966
967 spin_lock(&img_hash.lock);
968 list_add_tail(&hdev->list, &img_hash.dev_list);
969 spin_unlock(&img_hash.lock);
970
971 err = img_register_algs(hdev);
972 if (err)
973 goto err_algs;
974 dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
975
976 return 0;
977
978err_algs:
979 spin_lock(&img_hash.lock);
980 list_del(&hdev->list);
981 spin_unlock(&img_hash.lock);
982 dma_release_channel(hdev->dma_lch);
983dma_err:
984 clk_disable_unprepare(hdev->sys_clk);
985clk_err:
986 clk_disable_unprepare(hdev->hash_clk);
987res_err:
988 tasklet_kill(&hdev->done_task);
989 tasklet_kill(&hdev->dma_task);
990
991 return err;
992}
993
994static int img_hash_remove(struct platform_device *pdev)
995{
996 static struct img_hash_dev *hdev;
997
998 hdev = platform_get_drvdata(pdev);
999 spin_lock(&img_hash.lock);
1000 list_del(&hdev->list);
1001 spin_unlock(&img_hash.lock);
1002
1003 img_unregister_algs(hdev);
1004
1005 tasklet_kill(&hdev->done_task);
1006 tasklet_kill(&hdev->dma_task);
1007
1008 dma_release_channel(hdev->dma_lch);
1009
1010 clk_disable_unprepare(hdev->hash_clk);
1011 clk_disable_unprepare(hdev->sys_clk);
1012
1013 return 0;
1014}
1015
1016static struct platform_driver img_hash_driver = {
1017 .probe = img_hash_probe,
1018 .remove = img_hash_remove,
1019 .driver = {
1020 .name = "img-hash-accelerator",
1021 .of_match_table = of_match_ptr(img_hash_match),
1022 }
1023};
1024module_platform_driver(img_hash_driver);
1025
1026MODULE_LICENSE("GPL v2");
1027MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
1028MODULE_AUTHOR("Will Thomas.");
1029MODULE_AUTHOR("James Hartley <james.hartley@imgtec.com>");
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 829d6394fb33..59ed54e464a9 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -153,7 +153,7 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
153 struct dcp *sdcp = global_sdcp; 153 struct dcp *sdcp = global_sdcp;
154 const int chan = actx->chan; 154 const int chan = actx->chan;
155 uint32_t stat; 155 uint32_t stat;
156 int ret; 156 unsigned long ret;
157 struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; 157 struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
158 158
159 dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), 159 dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 42f95a4326b0..9a28b7e07c71 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -554,15 +554,23 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
554 return err; 554 return err;
555} 555}
556 556
557static int omap_aes_check_aligned(struct scatterlist *sg) 557static int omap_aes_check_aligned(struct scatterlist *sg, int total)
558{ 558{
559 int len = 0;
560
559 while (sg) { 561 while (sg) {
560 if (!IS_ALIGNED(sg->offset, 4)) 562 if (!IS_ALIGNED(sg->offset, 4))
561 return -1; 563 return -1;
562 if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) 564 if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE))
563 return -1; 565 return -1;
566
567 len += sg->length;
564 sg = sg_next(sg); 568 sg = sg_next(sg);
565 } 569 }
570
571 if (len != total)
572 return -1;
573
566 return 0; 574 return 0;
567} 575}
568 576
@@ -633,8 +641,8 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
633 dd->in_sg = req->src; 641 dd->in_sg = req->src;
634 dd->out_sg = req->dst; 642 dd->out_sg = req->dst;
635 643
636 if (omap_aes_check_aligned(dd->in_sg) || 644 if (omap_aes_check_aligned(dd->in_sg, dd->total) ||
637 omap_aes_check_aligned(dd->out_sg)) { 645 omap_aes_check_aligned(dd->out_sg, dd->total)) {
638 if (omap_aes_copy_sgs(dd)) 646 if (omap_aes_copy_sgs(dd))
639 pr_err("Failed to copy SGs for unaligned cases\n"); 647 pr_err("Failed to copy SGs for unaligned cases\n");
640 dd->sgs_copied = 1; 648 dd->sgs_copied = 1;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 3c76696ee578..4d63e0d4da9a 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -640,6 +640,7 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
640 640
641 while (ctx->sg) { 641 while (ctx->sg) {
642 vaddr = kmap_atomic(sg_page(ctx->sg)); 642 vaddr = kmap_atomic(sg_page(ctx->sg));
643 vaddr += ctx->sg->offset;
643 644
644 count = omap_sham_append_buffer(ctx, 645 count = omap_sham_append_buffer(ctx,
645 vaddr + ctx->offset, 646 vaddr + ctx->offset,
@@ -1945,6 +1946,7 @@ static int omap_sham_probe(struct platform_device *pdev)
1945 dd->flags |= dd->pdata->flags; 1946 dd->flags |= dd->pdata->flags;
1946 1947
1947 pm_runtime_enable(dev); 1948 pm_runtime_enable(dev);
1949 pm_runtime_irq_safe(dev);
1948 pm_runtime_get_sync(dev); 1950 pm_runtime_get_sync(dev);
1949 rev = omap_sham_read(dd, SHA_REG_REV(dd)); 1951 rev = omap_sham_read(dd, SHA_REG_REV(dd));
1950 pm_runtime_put_sync(&pdev->dev); 1952 pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 19c0efa29ab3..f22ce7169fa5 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -52,7 +52,6 @@
52#include <linux/io.h> 52#include <linux/io.h>
53#include "adf_cfg_common.h" 53#include "adf_cfg_common.h"
54 54
55#define PCI_VENDOR_ID_INTEL 0x8086
56#define ADF_DH895XCC_DEVICE_NAME "dh895xcc" 55#define ADF_DH895XCC_DEVICE_NAME "dh895xcc"
57#define ADF_DH895XCC_PCI_DEVICE_ID 0x435 56#define ADF_DH895XCC_PCI_DEVICE_ID 0x435
58#define ADF_PCI_MAX_BARS 3 57#define ADF_PCI_MAX_BARS 3
diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c
index c77453b900a3..7f8b66c915ed 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_engine.c
+++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c
@@ -60,36 +60,40 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev)
60 60
61 if (request_firmware(&loader_data->uof_fw, hw_device->fw_name, 61 if (request_firmware(&loader_data->uof_fw, hw_device->fw_name,
62 &accel_dev->accel_pci_dev.pci_dev->dev)) { 62 &accel_dev->accel_pci_dev.pci_dev->dev)) {
63 pr_err("QAT: Failed to load firmware %s\n", hw_device->fw_name); 63 dev_err(&GET_DEV(accel_dev), "Failed to load firmware %s\n",
64 hw_device->fw_name);
64 return -EFAULT; 65 return -EFAULT;
65 } 66 }
66 67
67 uof_size = loader_data->uof_fw->size; 68 uof_size = loader_data->uof_fw->size;
68 uof_addr = (void *)loader_data->uof_fw->data; 69 uof_addr = (void *)loader_data->uof_fw->data;
69 if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) { 70 if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) {
70 pr_err("QAT: Failed to map UOF\n"); 71 dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
71 goto out_err; 72 goto out_err;
72 } 73 }
73 if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) { 74 if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) {
74 pr_err("QAT: Failed to map UOF\n"); 75 dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
75 goto out_err; 76 goto out_err;
76 } 77 }
77 return 0; 78 return 0;
78 79
79out_err: 80out_err:
80 release_firmware(loader_data->uof_fw); 81 adf_ae_fw_release(accel_dev);
81 return -EFAULT; 82 return -EFAULT;
82} 83}
83 84
84int adf_ae_fw_release(struct adf_accel_dev *accel_dev) 85void adf_ae_fw_release(struct adf_accel_dev *accel_dev)
85{ 86{
86 struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; 87 struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
87 88
88 release_firmware(loader_data->uof_fw);
89 qat_uclo_del_uof_obj(loader_data->fw_loader); 89 qat_uclo_del_uof_obj(loader_data->fw_loader);
90 qat_hal_deinit(loader_data->fw_loader); 90 qat_hal_deinit(loader_data->fw_loader);
91
92 if (loader_data->uof_fw)
93 release_firmware(loader_data->uof_fw);
94
95 loader_data->uof_fw = NULL;
91 loader_data->fw_loader = NULL; 96 loader_data->fw_loader = NULL;
92 return 0;
93} 97}
94 98
95int adf_ae_start(struct adf_accel_dev *accel_dev) 99int adf_ae_start(struct adf_accel_dev *accel_dev)
@@ -104,8 +108,9 @@ int adf_ae_start(struct adf_accel_dev *accel_dev)
104 ae_ctr++; 108 ae_ctr++;
105 } 109 }
106 } 110 }
107 pr_info("QAT: qat_dev%d started %d acceleration engines\n", 111 dev_info(&GET_DEV(accel_dev),
108 accel_dev->accel_id, ae_ctr); 112 "qat_dev%d started %d acceleration engines\n",
113 accel_dev->accel_id, ae_ctr);
109 return 0; 114 return 0;
110} 115}
111 116
@@ -121,8 +126,9 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev)
121 ae_ctr++; 126 ae_ctr++;
122 } 127 }
123 } 128 }
124 pr_info("QAT: qat_dev%d stopped %d acceleration engines\n", 129 dev_info(&GET_DEV(accel_dev),
125 accel_dev->accel_id, ae_ctr); 130 "qat_dev%d stopped %d acceleration engines\n",
131 accel_dev->accel_id, ae_ctr);
126 return 0; 132 return 0;
127} 133}
128 134
@@ -147,12 +153,12 @@ int adf_ae_init(struct adf_accel_dev *accel_dev)
147 153
148 accel_dev->fw_loader = loader_data; 154 accel_dev->fw_loader = loader_data;
149 if (qat_hal_init(accel_dev)) { 155 if (qat_hal_init(accel_dev)) {
150 pr_err("QAT: Failed to init the AEs\n"); 156 dev_err(&GET_DEV(accel_dev), "Failed to init the AEs\n");
151 kfree(loader_data); 157 kfree(loader_data);
152 return -EFAULT; 158 return -EFAULT;
153 } 159 }
154 if (adf_ae_reset(accel_dev, 0)) { 160 if (adf_ae_reset(accel_dev, 0)) {
155 pr_err("QAT: Failed to reset the AEs\n"); 161 dev_err(&GET_DEV(accel_dev), "Failed to reset the AEs\n");
156 qat_hal_deinit(loader_data->fw_loader); 162 qat_hal_deinit(loader_data->fw_loader);
157 kfree(loader_data); 163 kfree(loader_data);
158 return -EFAULT; 164 return -EFAULT;
@@ -162,6 +168,9 @@ int adf_ae_init(struct adf_accel_dev *accel_dev)
162 168
163int adf_ae_shutdown(struct adf_accel_dev *accel_dev) 169int adf_ae_shutdown(struct adf_accel_dev *accel_dev)
164{ 170{
171 struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
172
173 qat_hal_deinit(loader_data->fw_loader);
165 kfree(accel_dev->fw_loader); 174 kfree(accel_dev->fw_loader);
166 accel_dev->fw_loader = NULL; 175 accel_dev->fw_loader = NULL;
167 return 0; 176 return 0;
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c
index fa1fef824de2..2dbc733b8ab2 100644
--- a/drivers/crypto/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/qat/qat_common/adf_aer.c
@@ -60,14 +60,14 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
60{ 60{
61 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 61 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
62 62
63 pr_info("QAT: Acceleration driver hardware error detected.\n"); 63 dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n");
64 if (!accel_dev) { 64 if (!accel_dev) {
65 pr_err("QAT: Can't find acceleration device\n"); 65 dev_err(&pdev->dev, "Can't find acceleration device\n");
66 return PCI_ERS_RESULT_DISCONNECT; 66 return PCI_ERS_RESULT_DISCONNECT;
67 } 67 }
68 68
69 if (state == pci_channel_io_perm_failure) { 69 if (state == pci_channel_io_perm_failure) {
70 pr_err("QAT: Can't recover from device error\n"); 70 dev_err(&pdev->dev, "Can't recover from device error\n");
71 return PCI_ERS_RESULT_DISCONNECT; 71 return PCI_ERS_RESULT_DISCONNECT;
72 } 72 }
73 73
@@ -88,10 +88,12 @@ static void adf_dev_restore(struct adf_accel_dev *accel_dev)
88 struct pci_dev *parent = pdev->bus->self; 88 struct pci_dev *parent = pdev->bus->self;
89 uint16_t bridge_ctl = 0; 89 uint16_t bridge_ctl = 0;
90 90
91 pr_info("QAT: Resetting device qat_dev%d\n", accel_dev->accel_id); 91 dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n",
92 accel_dev->accel_id);
92 93
93 if (!pci_wait_for_pending_transaction(pdev)) 94 if (!pci_wait_for_pending_transaction(pdev))
94 pr_info("QAT: Transaction still in progress. Proceeding\n"); 95 dev_info(&GET_DEV(accel_dev),
96 "Transaction still in progress. Proceeding\n");
95 97
96 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); 98 pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl);
97 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; 99 bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET;
@@ -158,7 +160,8 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
158 unsigned long timeout = wait_for_completion_timeout( 160 unsigned long timeout = wait_for_completion_timeout(
159 &reset_data->compl, wait_jiffies); 161 &reset_data->compl, wait_jiffies);
160 if (!timeout) { 162 if (!timeout) {
161 pr_err("QAT: Reset device timeout expired\n"); 163 dev_err(&GET_DEV(accel_dev),
164 "Reset device timeout expired\n");
162 ret = -EFAULT; 165 ret = -EFAULT;
163 } 166 }
164 kfree(reset_data); 167 kfree(reset_data);
@@ -184,8 +187,8 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
184 187
185static void adf_resume(struct pci_dev *pdev) 188static void adf_resume(struct pci_dev *pdev)
186{ 189{
187 pr_info("QAT: Acceleration driver reset completed\n"); 190 dev_info(&pdev->dev, "Acceleration driver reset completed\n");
188 pr_info("QAT: Device is up and runnig\n"); 191 dev_info(&pdev->dev, "Device is up and runnig\n");
189} 192}
190 193
191static struct pci_error_handlers adf_err_handler = { 194static struct pci_error_handlers adf_err_handler = {
@@ -236,7 +239,7 @@ EXPORT_SYMBOL_GPL(adf_disable_aer);
236int adf_init_aer(void) 239int adf_init_aer(void)
237{ 240{
238 device_reset_wq = create_workqueue("qat_device_reset_wq"); 241 device_reset_wq = create_workqueue("qat_device_reset_wq");
239 return (device_reset_wq == NULL) ? -EFAULT : 0; 242 return !device_reset_wq ? -EFAULT : 0;
240} 243}
241 244
242void adf_exit_aer(void) 245void adf_exit_aer(void)
diff --git a/drivers/crypto/qat/qat_common/adf_cfg.c b/drivers/crypto/qat/qat_common/adf_cfg.c
index de16da9070a5..ab65bc274561 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg.c
+++ b/drivers/crypto/qat/qat_common/adf_cfg.c
@@ -142,7 +142,8 @@ int adf_cfg_dev_add(struct adf_accel_dev *accel_dev)
142 dev_cfg_data, 142 dev_cfg_data,
143 &qat_dev_cfg_fops); 143 &qat_dev_cfg_fops);
144 if (!dev_cfg_data->debug) { 144 if (!dev_cfg_data->debug) {
145 pr_err("QAT: Failed to create qat cfg debugfs entry.\n"); 145 dev_err(&GET_DEV(accel_dev),
146 "Failed to create qat cfg debugfs entry.\n");
146 kfree(dev_cfg_data); 147 kfree(dev_cfg_data);
147 accel_dev->cfg = NULL; 148 accel_dev->cfg = NULL;
148 return -EFAULT; 149 return -EFAULT;
@@ -305,7 +306,7 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev,
305 snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES, 306 snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES,
306 "0x%lx", (unsigned long)val); 307 "0x%lx", (unsigned long)val);
307 } else { 308 } else {
308 pr_err("QAT: Unknown type given.\n"); 309 dev_err(&GET_DEV(accel_dev), "Unknown type given.\n");
309 kfree(key_val); 310 kfree(key_val);
310 return -1; 311 return -1;
311 } 312 }
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
index c7ac758ebc90..13575111382c 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
@@ -59,7 +59,7 @@
59#define ADF_RING_SYM_TX "RingSymTx" 59#define ADF_RING_SYM_TX "RingSymTx"
60#define ADF_RING_RND_TX "RingNrbgTx" 60#define ADF_RING_RND_TX "RingNrbgTx"
61#define ADF_RING_ASYM_RX "RingAsymRx" 61#define ADF_RING_ASYM_RX "RingAsymRx"
62#define ADF_RING_SYM_RX "RinSymRx" 62#define ADF_RING_SYM_RX "RingSymRx"
63#define ADF_RING_RND_RX "RingNrbgRx" 63#define ADF_RING_RND_RX "RingNrbgRx"
64#define ADF_RING_DC_TX "RingTx" 64#define ADF_RING_DC_TX "RingTx"
65#define ADF_RING_DC_RX "RingRx" 65#define ADF_RING_DC_RX "RingRx"
@@ -69,15 +69,15 @@
69#define ADF_DC "Dc" 69#define ADF_DC "Dc"
70#define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled" 70#define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled"
71#define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \ 71#define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \
72 ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_ENABLED 72 ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_ENABLED
73#define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs" 73#define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs"
74#define ADF_ETRMGR_COALESCE_TIMER_FORMAT \ 74#define ADF_ETRMGR_COALESCE_TIMER_FORMAT \
75 ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCE_TIMER 75 ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCE_TIMER
76#define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses" 76#define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses"
77#define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \ 77#define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \
78 ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_MSG_ENABLED 78 ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_MSG_ENABLED
79#define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity" 79#define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity"
80#define ADF_ETRMGR_CORE_AFFINITY_FORMAT \ 80#define ADF_ETRMGR_CORE_AFFINITY_FORMAT \
81 ADF_ETRMGR_BANK"%d"ADF_ETRMGR_CORE_AFFINITY 81 ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
82#define ADF_ACCEL_STR "Accelerator%d" 82#define ADF_ACCEL_STR "Accelerator%d"
83#endif 83#endif
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index a62e485c8786..0666ee6a3360 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -115,7 +115,7 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev);
115int adf_ae_init(struct adf_accel_dev *accel_dev); 115int adf_ae_init(struct adf_accel_dev *accel_dev);
116int adf_ae_shutdown(struct adf_accel_dev *accel_dev); 116int adf_ae_shutdown(struct adf_accel_dev *accel_dev);
117int adf_ae_fw_load(struct adf_accel_dev *accel_dev); 117int adf_ae_fw_load(struct adf_accel_dev *accel_dev);
118int adf_ae_fw_release(struct adf_accel_dev *accel_dev); 118void adf_ae_fw_release(struct adf_accel_dev *accel_dev);
119int adf_ae_start(struct adf_accel_dev *accel_dev); 119int adf_ae_start(struct adf_accel_dev *accel_dev);
120int adf_ae_stop(struct adf_accel_dev *accel_dev); 120int adf_ae_stop(struct adf_accel_dev *accel_dev);
121 121
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 74207a6f0516..cb5f066e93a6 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -77,14 +77,14 @@ struct adf_ctl_drv_info {
77 struct class *drv_class; 77 struct class *drv_class;
78}; 78};
79 79
80static struct adf_ctl_drv_info adt_ctl_drv; 80static struct adf_ctl_drv_info adf_ctl_drv;
81 81
82static void adf_chr_drv_destroy(void) 82static void adf_chr_drv_destroy(void)
83{ 83{
84 device_destroy(adt_ctl_drv.drv_class, MKDEV(adt_ctl_drv.major, 0)); 84 device_destroy(adf_ctl_drv.drv_class, MKDEV(adf_ctl_drv.major, 0));
85 cdev_del(&adt_ctl_drv.drv_cdev); 85 cdev_del(&adf_ctl_drv.drv_cdev);
86 class_destroy(adt_ctl_drv.drv_class); 86 class_destroy(adf_ctl_drv.drv_class);
87 unregister_chrdev_region(MKDEV(adt_ctl_drv.major, 0), 1); 87 unregister_chrdev_region(MKDEV(adf_ctl_drv.major, 0), 1);
88} 88}
89 89
90static int adf_chr_drv_create(void) 90static int adf_chr_drv_create(void)
@@ -97,20 +97,20 @@ static int adf_chr_drv_create(void)
97 return -EFAULT; 97 return -EFAULT;
98 } 98 }
99 99
100 adt_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME); 100 adf_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME);
101 if (IS_ERR(adt_ctl_drv.drv_class)) { 101 if (IS_ERR(adf_ctl_drv.drv_class)) {
102 pr_err("QAT: class_create failed for adf_ctl\n"); 102 pr_err("QAT: class_create failed for adf_ctl\n");
103 goto err_chrdev_unreg; 103 goto err_chrdev_unreg;
104 } 104 }
105 adt_ctl_drv.major = MAJOR(dev_id); 105 adf_ctl_drv.major = MAJOR(dev_id);
106 cdev_init(&adt_ctl_drv.drv_cdev, &adf_ctl_ops); 106 cdev_init(&adf_ctl_drv.drv_cdev, &adf_ctl_ops);
107 if (cdev_add(&adt_ctl_drv.drv_cdev, dev_id, 1)) { 107 if (cdev_add(&adf_ctl_drv.drv_cdev, dev_id, 1)) {
108 pr_err("QAT: cdev add failed\n"); 108 pr_err("QAT: cdev add failed\n");
109 goto err_class_destr; 109 goto err_class_destr;
110 } 110 }
111 111
112 drv_device = device_create(adt_ctl_drv.drv_class, NULL, 112 drv_device = device_create(adf_ctl_drv.drv_class, NULL,
113 MKDEV(adt_ctl_drv.major, 0), 113 MKDEV(adf_ctl_drv.major, 0),
114 NULL, DEVICE_NAME); 114 NULL, DEVICE_NAME);
115 if (IS_ERR(drv_device)) { 115 if (IS_ERR(drv_device)) {
116 pr_err("QAT: failed to create device\n"); 116 pr_err("QAT: failed to create device\n");
@@ -118,9 +118,9 @@ static int adf_chr_drv_create(void)
118 } 118 }
119 return 0; 119 return 0;
120err_cdev_del: 120err_cdev_del:
121 cdev_del(&adt_ctl_drv.drv_cdev); 121 cdev_del(&adf_ctl_drv.drv_cdev);
122err_class_destr: 122err_class_destr:
123 class_destroy(adt_ctl_drv.drv_class); 123 class_destroy(adf_ctl_drv.drv_class);
124err_chrdev_unreg: 124err_chrdev_unreg:
125 unregister_chrdev_region(dev_id, 1); 125 unregister_chrdev_region(dev_id, 1);
126 return -EFAULT; 126 return -EFAULT;
@@ -159,14 +159,16 @@ static int adf_add_key_value_data(struct adf_accel_dev *accel_dev,
159 if (adf_cfg_add_key_value_param(accel_dev, section, 159 if (adf_cfg_add_key_value_param(accel_dev, section,
160 key_val->key, (void *)val, 160 key_val->key, (void *)val,
161 key_val->type)) { 161 key_val->type)) {
162 pr_err("QAT: failed to add keyvalue.\n"); 162 dev_err(&GET_DEV(accel_dev),
163 "failed to add hex keyvalue.\n");
163 return -EFAULT; 164 return -EFAULT;
164 } 165 }
165 } else { 166 } else {
166 if (adf_cfg_add_key_value_param(accel_dev, section, 167 if (adf_cfg_add_key_value_param(accel_dev, section,
167 key_val->key, key_val->val, 168 key_val->key, key_val->val,
168 key_val->type)) { 169 key_val->type)) {
169 pr_err("QAT: failed to add keyvalue.\n"); 170 dev_err(&GET_DEV(accel_dev),
171 "failed to add keyvalue.\n");
170 return -EFAULT; 172 return -EFAULT;
171 } 173 }
172 } 174 }
@@ -185,12 +187,14 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev,
185 while (section_head) { 187 while (section_head) {
186 if (copy_from_user(&section, (void __user *)section_head, 188 if (copy_from_user(&section, (void __user *)section_head,
187 sizeof(*section_head))) { 189 sizeof(*section_head))) {
188 pr_err("QAT: failed to copy section info\n"); 190 dev_err(&GET_DEV(accel_dev),
191 "failed to copy section info\n");
189 goto out_err; 192 goto out_err;
190 } 193 }
191 194
192 if (adf_cfg_section_add(accel_dev, section.name)) { 195 if (adf_cfg_section_add(accel_dev, section.name)) {
193 pr_err("QAT: failed to add section.\n"); 196 dev_err(&GET_DEV(accel_dev),
197 "failed to add section.\n");
194 goto out_err; 198 goto out_err;
195 } 199 }
196 200
@@ -199,7 +203,8 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev,
199 while (params_head) { 203 while (params_head) {
200 if (copy_from_user(&key_val, (void __user *)params_head, 204 if (copy_from_user(&key_val, (void __user *)params_head,
201 sizeof(key_val))) { 205 sizeof(key_val))) {
202 pr_err("QAT: Failed to copy keyvalue.\n"); 206 dev_err(&GET_DEV(accel_dev),
207 "Failed to copy keyvalue.\n");
203 goto out_err; 208 goto out_err;
204 } 209 }
205 if (adf_add_key_value_data(accel_dev, section.name, 210 if (adf_add_key_value_data(accel_dev, section.name,
@@ -258,8 +263,9 @@ static int adf_ctl_is_device_in_use(int id)
258 263
259 if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) { 264 if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) {
260 if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) { 265 if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) {
261 pr_info("QAT: device qat_dev%d is busy\n", 266 dev_info(&GET_DEV(dev),
262 dev->accel_id); 267 "device qat_dev%d is busy\n",
268 dev->accel_id);
263 return -EBUSY; 269 return -EBUSY;
264 } 270 }
265 } 271 }
@@ -280,7 +286,8 @@ static int adf_ctl_stop_devices(uint32_t id)
280 continue; 286 continue;
281 287
282 if (adf_dev_stop(accel_dev)) { 288 if (adf_dev_stop(accel_dev)) {
283 pr_err("QAT: Failed to stop qat_dev%d\n", id); 289 dev_err(&GET_DEV(accel_dev),
290 "Failed to stop qat_dev%d\n", id);
284 ret = -EFAULT; 291 ret = -EFAULT;
285 } else { 292 } else {
286 adf_dev_shutdown(accel_dev); 293 adf_dev_shutdown(accel_dev);
@@ -343,17 +350,20 @@ static int adf_ctl_ioctl_dev_start(struct file *fp, unsigned int cmd,
343 } 350 }
344 351
345 if (!adf_dev_started(accel_dev)) { 352 if (!adf_dev_started(accel_dev)) {
346 pr_info("QAT: Starting acceleration device qat_dev%d.\n", 353 dev_info(&GET_DEV(accel_dev),
347 ctl_data->device_id); 354 "Starting acceleration device qat_dev%d.\n",
355 ctl_data->device_id);
348 ret = adf_dev_init(accel_dev); 356 ret = adf_dev_init(accel_dev);
349 if (!ret) 357 if (!ret)
350 ret = adf_dev_start(accel_dev); 358 ret = adf_dev_start(accel_dev);
351 } else { 359 } else {
352 pr_info("QAT: Acceleration device qat_dev%d already started.\n", 360 dev_info(&GET_DEV(accel_dev),
353 ctl_data->device_id); 361 "Acceleration device qat_dev%d already started.\n",
362 ctl_data->device_id);
354 } 363 }
355 if (ret) { 364 if (ret) {
356 pr_err("QAT: Failed to start qat_dev%d\n", ctl_data->device_id); 365 dev_err(&GET_DEV(accel_dev), "Failed to start qat_dev%d\n",
366 ctl_data->device_id);
357 adf_dev_stop(accel_dev); 367 adf_dev_stop(accel_dev);
358 adf_dev_shutdown(accel_dev); 368 adf_dev_shutdown(accel_dev);
359 } 369 }
@@ -408,7 +418,7 @@ static int adf_ctl_ioctl_get_status(struct file *fp, unsigned int cmd,
408 418
409 if (copy_to_user((void __user *)arg, &dev_info, 419 if (copy_to_user((void __user *)arg, &dev_info,
410 sizeof(struct adf_dev_status_info))) { 420 sizeof(struct adf_dev_status_info))) {
411 pr_err("QAT: failed to copy status.\n"); 421 dev_err(&GET_DEV(accel_dev), "failed to copy status.\n");
412 return -EFAULT; 422 return -EFAULT;
413 } 423 }
414 return 0; 424 return 0;
diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
index 4a0a829d4500..3f0ff9e7d840 100644
--- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c
+++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c
@@ -67,7 +67,8 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev)
67 struct list_head *itr; 67 struct list_head *itr;
68 68
69 if (num_devices == ADF_MAX_DEVICES) { 69 if (num_devices == ADF_MAX_DEVICES) {
70 pr_err("QAT: Only support up to %d devices\n", ADF_MAX_DEVICES); 70 dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n",
71 ADF_MAX_DEVICES);
71 return -EFAULT; 72 return -EFAULT;
72 } 73 }
73 74
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 8f0ca498ab87..245f43237a2d 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -124,12 +124,12 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
124 124
125 if (!hw_data) { 125 if (!hw_data) {
126 dev_err(&GET_DEV(accel_dev), 126 dev_err(&GET_DEV(accel_dev),
127 "QAT: Failed to init device - hw_data not set\n"); 127 "Failed to init device - hw_data not set\n");
128 return -EFAULT; 128 return -EFAULT;
129 } 129 }
130 130
131 if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) { 131 if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) {
132 pr_info("QAT: Device not configured\n"); 132 dev_err(&GET_DEV(accel_dev), "Device not configured\n");
133 return -EFAULT; 133 return -EFAULT;
134 } 134 }
135 135
@@ -151,20 +151,21 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
151 hw_data->enable_ints(accel_dev); 151 hw_data->enable_ints(accel_dev);
152 152
153 if (adf_ae_init(accel_dev)) { 153 if (adf_ae_init(accel_dev)) {
154 pr_err("QAT: Failed to initialise Acceleration Engine\n"); 154 dev_err(&GET_DEV(accel_dev),
155 "Failed to initialise Acceleration Engine\n");
155 return -EFAULT; 156 return -EFAULT;
156 } 157 }
157 set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status); 158 set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status);
158 159
159 if (adf_ae_fw_load(accel_dev)) { 160 if (adf_ae_fw_load(accel_dev)) {
160 pr_err("QAT: Failed to load acceleration FW\n"); 161 dev_err(&GET_DEV(accel_dev),
161 adf_ae_fw_release(accel_dev); 162 "Failed to load acceleration FW\n");
162 return -EFAULT; 163 return -EFAULT;
163 } 164 }
164 set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); 165 set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
165 166
166 if (hw_data->alloc_irq(accel_dev)) { 167 if (hw_data->alloc_irq(accel_dev)) {
167 pr_err("QAT: Failed to allocate interrupts\n"); 168 dev_err(&GET_DEV(accel_dev), "Failed to allocate interrupts\n");
168 return -EFAULT; 169 return -EFAULT;
169 } 170 }
170 set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); 171 set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status);
@@ -179,8 +180,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
179 if (!service->admin) 180 if (!service->admin)
180 continue; 181 continue;
181 if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { 182 if (service->event_hld(accel_dev, ADF_EVENT_INIT)) {
182 pr_err("QAT: Failed to initialise service %s\n", 183 dev_err(&GET_DEV(accel_dev),
183 service->name); 184 "Failed to initialise service %s\n",
185 service->name);
184 return -EFAULT; 186 return -EFAULT;
185 } 187 }
186 set_bit(accel_dev->accel_id, &service->init_status); 188 set_bit(accel_dev->accel_id, &service->init_status);
@@ -190,8 +192,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev)
190 if (service->admin) 192 if (service->admin)
191 continue; 193 continue;
192 if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { 194 if (service->event_hld(accel_dev, ADF_EVENT_INIT)) {
193 pr_err("QAT: Failed to initialise service %s\n", 195 dev_err(&GET_DEV(accel_dev),
194 service->name); 196 "Failed to initialise service %s\n",
197 service->name);
195 return -EFAULT; 198 return -EFAULT;
196 } 199 }
197 set_bit(accel_dev->accel_id, &service->init_status); 200 set_bit(accel_dev->accel_id, &service->init_status);
@@ -221,7 +224,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
221 set_bit(ADF_STATUS_STARTING, &accel_dev->status); 224 set_bit(ADF_STATUS_STARTING, &accel_dev->status);
222 225
223 if (adf_ae_start(accel_dev)) { 226 if (adf_ae_start(accel_dev)) {
224 pr_err("QAT: AE Start Failed\n"); 227 dev_err(&GET_DEV(accel_dev), "AE Start Failed\n");
225 return -EFAULT; 228 return -EFAULT;
226 } 229 }
227 set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); 230 set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
@@ -231,8 +234,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
231 if (!service->admin) 234 if (!service->admin)
232 continue; 235 continue;
233 if (service->event_hld(accel_dev, ADF_EVENT_START)) { 236 if (service->event_hld(accel_dev, ADF_EVENT_START)) {
234 pr_err("QAT: Failed to start service %s\n", 237 dev_err(&GET_DEV(accel_dev),
235 service->name); 238 "Failed to start service %s\n",
239 service->name);
236 return -EFAULT; 240 return -EFAULT;
237 } 241 }
238 set_bit(accel_dev->accel_id, &service->start_status); 242 set_bit(accel_dev->accel_id, &service->start_status);
@@ -242,8 +246,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
242 if (service->admin) 246 if (service->admin)
243 continue; 247 continue;
244 if (service->event_hld(accel_dev, ADF_EVENT_START)) { 248 if (service->event_hld(accel_dev, ADF_EVENT_START)) {
245 pr_err("QAT: Failed to start service %s\n", 249 dev_err(&GET_DEV(accel_dev),
246 service->name); 250 "Failed to start service %s\n",
251 service->name);
247 return -EFAULT; 252 return -EFAULT;
248 } 253 }
249 set_bit(accel_dev->accel_id, &service->start_status); 254 set_bit(accel_dev->accel_id, &service->start_status);
@@ -253,7 +258,8 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
253 set_bit(ADF_STATUS_STARTED, &accel_dev->status); 258 set_bit(ADF_STATUS_STARTED, &accel_dev->status);
254 259
255 if (qat_algs_register()) { 260 if (qat_algs_register()) {
256 pr_err("QAT: Failed to register crypto algs\n"); 261 dev_err(&GET_DEV(accel_dev),
262 "Failed to register crypto algs\n");
257 set_bit(ADF_STATUS_STARTING, &accel_dev->status); 263 set_bit(ADF_STATUS_STARTING, &accel_dev->status);
258 clear_bit(ADF_STATUS_STARTED, &accel_dev->status); 264 clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
259 return -EFAULT; 265 return -EFAULT;
@@ -287,7 +293,8 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
287 clear_bit(ADF_STATUS_STARTED, &accel_dev->status); 293 clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
288 294
289 if (qat_algs_unregister()) 295 if (qat_algs_unregister())
290 pr_err("QAT: Failed to unregister crypto algs\n"); 296 dev_err(&GET_DEV(accel_dev),
297 "Failed to unregister crypto algs\n");
291 298
292 list_for_each(list_itr, &service_table) { 299 list_for_each(list_itr, &service_table) {
293 service = list_entry(list_itr, struct service_hndl, list); 300 service = list_entry(list_itr, struct service_hndl, list);
@@ -310,8 +317,9 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
310 if (!test_bit(accel_dev->accel_id, &service->start_status)) 317 if (!test_bit(accel_dev->accel_id, &service->start_status))
311 continue; 318 continue;
312 if (service->event_hld(accel_dev, ADF_EVENT_STOP)) 319 if (service->event_hld(accel_dev, ADF_EVENT_STOP))
313 pr_err("QAT: Failed to shutdown service %s\n", 320 dev_err(&GET_DEV(accel_dev),
314 service->name); 321 "Failed to shutdown service %s\n",
322 service->name);
315 else 323 else
316 clear_bit(accel_dev->accel_id, &service->start_status); 324 clear_bit(accel_dev->accel_id, &service->start_status);
317 } 325 }
@@ -321,7 +329,7 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
321 329
322 if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) { 330 if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) {
323 if (adf_ae_stop(accel_dev)) 331 if (adf_ae_stop(accel_dev))
324 pr_err("QAT: failed to stop AE\n"); 332 dev_err(&GET_DEV(accel_dev), "failed to stop AE\n");
325 else 333 else
326 clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); 334 clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status);
327 } 335 }
@@ -350,16 +358,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
350 } 358 }
351 359
352 if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) { 360 if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) {
353 if (adf_ae_fw_release(accel_dev)) 361 adf_ae_fw_release(accel_dev);
354 pr_err("QAT: Failed to release the ucode\n"); 362 clear_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status);
355 else
356 clear_bit(ADF_STATUS_AE_UCODE_LOADED,
357 &accel_dev->status);
358 } 363 }
359 364
360 if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) { 365 if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) {
361 if (adf_ae_shutdown(accel_dev)) 366 if (adf_ae_shutdown(accel_dev))
362 pr_err("QAT: Failed to shutdown Accel Engine\n"); 367 dev_err(&GET_DEV(accel_dev),
368 "Failed to shutdown Accel Engine\n");
363 else 369 else
364 clear_bit(ADF_STATUS_AE_INITIALISED, 370 clear_bit(ADF_STATUS_AE_INITIALISED,
365 &accel_dev->status); 371 &accel_dev->status);
@@ -372,8 +378,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
372 if (!test_bit(accel_dev->accel_id, &service->init_status)) 378 if (!test_bit(accel_dev->accel_id, &service->init_status))
373 continue; 379 continue;
374 if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) 380 if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
375 pr_err("QAT: Failed to shutdown service %s\n", 381 dev_err(&GET_DEV(accel_dev),
376 service->name); 382 "Failed to shutdown service %s\n",
383 service->name);
377 else 384 else
378 clear_bit(accel_dev->accel_id, &service->init_status); 385 clear_bit(accel_dev->accel_id, &service->init_status);
379 } 386 }
@@ -384,8 +391,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
384 if (!test_bit(accel_dev->accel_id, &service->init_status)) 391 if (!test_bit(accel_dev->accel_id, &service->init_status))
385 continue; 392 continue;
386 if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) 393 if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN))
387 pr_err("QAT: Failed to shutdown service %s\n", 394 dev_err(&GET_DEV(accel_dev),
388 service->name); 395 "Failed to shutdown service %s\n",
396 service->name);
389 else 397 else
390 clear_bit(accel_dev->accel_id, &service->init_status); 398 clear_bit(accel_dev->accel_id, &service->init_status);
391 } 399 }
@@ -419,16 +427,18 @@ int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev)
419 if (service->admin) 427 if (service->admin)
420 continue; 428 continue;
421 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) 429 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING))
422 pr_err("QAT: Failed to restart service %s.\n", 430 dev_err(&GET_DEV(accel_dev),
423 service->name); 431 "Failed to restart service %s.\n",
432 service->name);
424 } 433 }
425 list_for_each(list_itr, &service_table) { 434 list_for_each(list_itr, &service_table) {
426 service = list_entry(list_itr, struct service_hndl, list); 435 service = list_entry(list_itr, struct service_hndl, list);
427 if (!service->admin) 436 if (!service->admin)
428 continue; 437 continue;
429 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) 438 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING))
430 pr_err("QAT: Failed to restart service %s.\n", 439 dev_err(&GET_DEV(accel_dev),
431 service->name); 440 "Failed to restart service %s.\n",
441 service->name);
432 } 442 }
433 return 0; 443 return 0;
434} 444}
@@ -443,16 +453,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
443 if (service->admin) 453 if (service->admin)
444 continue; 454 continue;
445 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) 455 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED))
446 pr_err("QAT: Failed to restart service %s.\n", 456 dev_err(&GET_DEV(accel_dev),
447 service->name); 457 "Failed to restart service %s.\n",
458 service->name);
448 } 459 }
449 list_for_each(list_itr, &service_table) { 460 list_for_each(list_itr, &service_table) {
450 service = list_entry(list_itr, struct service_hndl, list); 461 service = list_entry(list_itr, struct service_hndl, list);
451 if (!service->admin) 462 if (!service->admin)
452 continue; 463 continue;
453 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) 464 if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED))
454 pr_err("QAT: Failed to restart service %s.\n", 465 dev_err(&GET_DEV(accel_dev),
455 service->name); 466 "Failed to restart service %s.\n",
467 service->name);
456 } 468 }
457 return 0; 469 return 0;
458} 470}
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c
index 7dd54aaee9fa..ccec327489da 100644
--- a/drivers/crypto/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/qat/qat_common/adf_transport.c
@@ -195,7 +195,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring)
195 memset(ring->base_addr, 0x7F, ring_size_bytes); 195 memset(ring->base_addr, 0x7F, ring_size_bytes);
196 /* The base_addr has to be aligned to the size of the buffer */ 196 /* The base_addr has to be aligned to the size of the buffer */
197 if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) { 197 if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) {
198 pr_err("QAT: Ring address not aligned\n"); 198 dev_err(&GET_DEV(accel_dev), "Ring address not aligned\n");
199 dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes, 199 dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes,
200 ring->base_addr, ring->dma_addr); 200 ring->base_addr, ring->dma_addr);
201 return -EFAULT; 201 return -EFAULT;
@@ -242,32 +242,33 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
242 int ret; 242 int ret;
243 243
244 if (bank_num >= GET_MAX_BANKS(accel_dev)) { 244 if (bank_num >= GET_MAX_BANKS(accel_dev)) {
245 pr_err("QAT: Invalid bank number\n"); 245 dev_err(&GET_DEV(accel_dev), "Invalid bank number\n");
246 return -EFAULT; 246 return -EFAULT;
247 } 247 }
248 if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) { 248 if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) {
249 pr_err("QAT: Invalid msg size\n"); 249 dev_err(&GET_DEV(accel_dev), "Invalid msg size\n");
250 return -EFAULT; 250 return -EFAULT;
251 } 251 }
252 if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs), 252 if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs),
253 ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) { 253 ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) {
254 pr_err("QAT: Invalid ring size for given msg size\n"); 254 dev_err(&GET_DEV(accel_dev),
255 "Invalid ring size for given msg size\n");
255 return -EFAULT; 256 return -EFAULT;
256 } 257 }
257 if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) { 258 if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) {
258 pr_err("QAT: Section %s, no such entry : %s\n", 259 dev_err(&GET_DEV(accel_dev), "Section %s, no such entry : %s\n",
259 section, ring_name); 260 section, ring_name);
260 return -EFAULT; 261 return -EFAULT;
261 } 262 }
262 if (kstrtouint(val, 10, &ring_num)) { 263 if (kstrtouint(val, 10, &ring_num)) {
263 pr_err("QAT: Can't get ring number\n"); 264 dev_err(&GET_DEV(accel_dev), "Can't get ring number\n");
264 return -EFAULT; 265 return -EFAULT;
265 } 266 }
266 267
267 bank = &transport_data->banks[bank_num]; 268 bank = &transport_data->banks[bank_num];
268 if (adf_reserve_ring(bank, ring_num)) { 269 if (adf_reserve_ring(bank, ring_num)) {
269 pr_err("QAT: Ring %d, %s already exists.\n", 270 dev_err(&GET_DEV(accel_dev), "Ring %d, %s already exists.\n",
270 ring_num, ring_name); 271 ring_num, ring_name);
271 return -EFAULT; 272 return -EFAULT;
272 } 273 }
273 ring = &bank->rings[ring_num]; 274 ring = &bank->rings[ring_num];
@@ -287,7 +288,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section,
287 accel_dev->hw_device->hw_arb_ring_enable(ring); 288 accel_dev->hw_device->hw_arb_ring_enable(ring);
288 289
289 if (adf_ring_debugfs_add(ring, ring_name)) { 290 if (adf_ring_debugfs_add(ring, ring_name)) {
290 pr_err("QAT: Couldn't add ring debugfs entry\n"); 291 dev_err(&GET_DEV(accel_dev),
292 "Couldn't add ring debugfs entry\n");
291 ret = -EFAULT; 293 ret = -EFAULT;
292 goto err; 294 goto err;
293 } 295 }
@@ -428,7 +430,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
428 goto err; 430 goto err;
429 } else { 431 } else {
430 if (i < hw_data->tx_rx_gap) { 432 if (i < hw_data->tx_rx_gap) {
431 pr_err("QAT: Invalid tx rings mask config\n"); 433 dev_err(&GET_DEV(accel_dev),
434 "Invalid tx rings mask config\n");
432 goto err; 435 goto err;
433 } 436 }
434 tx_ring = &bank->rings[i - hw_data->tx_rx_gap]; 437 tx_ring = &bank->rings[i - hw_data->tx_rx_gap];
@@ -436,7 +439,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev,
436 } 439 }
437 } 440 }
438 if (adf_bank_debugfs_add(bank)) { 441 if (adf_bank_debugfs_add(bank)) {
439 pr_err("QAT: Failed to add bank debugfs entry\n"); 442 dev_err(&GET_DEV(accel_dev),
443 "Failed to add bank debugfs entry\n");
440 goto err; 444 goto err;
441 } 445 }
442 446
@@ -492,7 +496,8 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev)
492 etr_data->debug = debugfs_create_dir("transport", 496 etr_data->debug = debugfs_create_dir("transport",
493 accel_dev->debugfs_dir); 497 accel_dev->debugfs_dir);
494 if (!etr_data->debug) { 498 if (!etr_data->debug) {
495 pr_err("QAT: Unable to create transport debugfs entry\n"); 499 dev_err(&GET_DEV(accel_dev),
500 "Unable to create transport debugfs entry\n");
496 ret = -ENOENT; 501 ret = -ENOENT;
497 goto err_bank_debug; 502 goto err_bank_debug;
498 } 503 }
diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c
index 6b6974553514..e41986967294 100644
--- a/drivers/crypto/qat/qat_common/adf_transport_debug.c
+++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c
@@ -100,6 +100,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v)
100 empty = READ_CSR_E_STAT(csr, bank->bank_number); 100 empty = READ_CSR_E_STAT(csr, bank->bank_number);
101 101
102 seq_puts(sfile, "------- Ring configuration -------\n"); 102 seq_puts(sfile, "------- Ring configuration -------\n");
103 seq_printf(sfile, "ring name: %s\n",
104 ring->ring_debug->ring_name);
103 seq_printf(sfile, "ring num %d, bank num %d\n", 105 seq_printf(sfile, "ring num %d, bank num %d\n",
104 ring->ring_number, ring->bank->bank_number); 106 ring->ring_number, ring->bank->bank_number);
105 seq_printf(sfile, "head %x, tail %x, empty: %d\n", 107 seq_printf(sfile, "head %x, tail %x, empty: %d\n",
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h
index 68f191b653b0..121d5e6e46ca 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_hw.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h
@@ -145,7 +145,7 @@ struct icp_qat_hw_auth_setup {
145}; 145};
146 146
147#define QAT_HW_DEFAULT_ALIGNMENT 8 147#define QAT_HW_DEFAULT_ALIGNMENT 8
148#define QAT_HW_ROUND_UP(val, n) (((val) + ((n)-1)) & (~(n-1))) 148#define QAT_HW_ROUND_UP(val, n) (((val) + ((n) - 1)) & (~(n - 1)))
149#define ICP_QAT_HW_NULL_STATE1_SZ 32 149#define ICP_QAT_HW_NULL_STATE1_SZ 32
150#define ICP_QAT_HW_MD5_STATE1_SZ 16 150#define ICP_QAT_HW_MD5_STATE1_SZ 16
151#define ICP_QAT_HW_SHA1_STATE1_SZ 20 151#define ICP_QAT_HW_SHA1_STATE1_SZ 20
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 828f2a686aab..3bd705ca5973 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -110,13 +110,13 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
110 list_for_each(itr, adf_devmgr_get_head()) { 110 list_for_each(itr, adf_devmgr_get_head()) {
111 accel_dev = list_entry(itr, struct adf_accel_dev, list); 111 accel_dev = list_entry(itr, struct adf_accel_dev, list);
112 if ((node == dev_to_node(&GET_DEV(accel_dev)) || 112 if ((node == dev_to_node(&GET_DEV(accel_dev)) ||
113 dev_to_node(&GET_DEV(accel_dev)) < 0) 113 dev_to_node(&GET_DEV(accel_dev)) < 0) &&
114 && adf_dev_started(accel_dev)) 114 adf_dev_started(accel_dev))
115 break; 115 break;
116 accel_dev = NULL; 116 accel_dev = NULL;
117 } 117 }
118 if (!accel_dev) { 118 if (!accel_dev) {
119 pr_err("QAT: Could not find device on node %d\n", node); 119 pr_err("QAT: Could not find a device on node %d\n", node);
120 accel_dev = adf_devmgr_get_first(); 120 accel_dev = adf_devmgr_get_first();
121 } 121 }
122 if (!accel_dev || !adf_dev_started(accel_dev)) 122 if (!accel_dev || !adf_dev_started(accel_dev))
@@ -137,7 +137,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
137 if (atomic_add_return(1, &inst_best->refctr) == 1) { 137 if (atomic_add_return(1, &inst_best->refctr) == 1) {
138 if (adf_dev_get(accel_dev)) { 138 if (adf_dev_get(accel_dev)) {
139 atomic_dec(&inst_best->refctr); 139 atomic_dec(&inst_best->refctr);
140 pr_err("QAT: Could increment dev refctr\n"); 140 dev_err(&GET_DEV(accel_dev),
141 "Could not increment dev refctr\n");
141 return NULL; 142 return NULL;
142 } 143 }
143 } 144 }
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index b818c19713bf..274ff7e9de6e 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -434,8 +434,8 @@ static void qat_hal_reset_timestamp(struct icp_qat_fw_loader_handle *handle)
434 SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE); 434 SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE);
435} 435}
436 436
437#define ESRAM_AUTO_TINIT (1<<2) 437#define ESRAM_AUTO_TINIT BIT(2)
438#define ESRAM_AUTO_TINIT_DONE (1<<3) 438#define ESRAM_AUTO_TINIT_DONE BIT(3)
439#define ESRAM_AUTO_INIT_USED_CYCLES (1640) 439#define ESRAM_AUTO_INIT_USED_CYCLES (1640)
440#define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C 440#define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C
441static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) 441static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle)
@@ -718,7 +718,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
718 handle->hal_handle->ae_max_num = max_en_ae_id + 1; 718 handle->hal_handle->ae_max_num = max_en_ae_id + 1;
719 /* take all AEs out of reset */ 719 /* take all AEs out of reset */
720 if (qat_hal_clr_reset(handle)) { 720 if (qat_hal_clr_reset(handle)) {
721 pr_err("QAT: qat_hal_clr_reset error\n"); 721 dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n");
722 goto out_err; 722 goto out_err;
723 } 723 }
724 if (qat_hal_clear_gpr(handle)) 724 if (qat_hal_clear_gpr(handle))
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
index 53c491b59f07..e4666065c399 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c
@@ -93,7 +93,8 @@ int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev,
93 memcpy(out, admin->virt_addr + offset + 93 memcpy(out, admin->virt_addr + offset +
94 ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN); 94 ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN);
95 else 95 else
96 pr_err("QAT: Failed to send admin msg to accelerator\n"); 96 dev_err(&GET_DEV(accel_dev),
97 "Failed to send admin msg to accelerator\n");
97 98
98 mutex_unlock(&admin->lock); 99 mutex_unlock(&admin->lock);
99 return received ? 0 : -EFAULT; 100 return received ? 0 : -EFAULT;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index 6a735d5c0e37..b1386922d7a2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -150,7 +150,8 @@ void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev,
150 *arb_map_config = thrd_to_arb_map_sku6; 150 *arb_map_config = thrd_to_arb_map_sku6;
151 break; 151 break;
152 default: 152 default:
153 pr_err("QAT: The configuration doesn't match any SKU"); 153 dev_err(&GET_DEV(accel_dev),
154 "The configuration doesn't match any SKU");
154 *arb_map_config = NULL; 155 *arb_map_config = NULL;
155 } 156 }
156} 157}
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
index 01e0be21e93a..25269a9f24a2 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
@@ -73,11 +73,11 @@
73/* Error detection and correction */ 73/* Error detection and correction */
74#define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) 74#define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818)
75#define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) 75#define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960)
76#define ADF_DH895XCC_ENABLE_AE_ECC_ERR (1 << 28) 76#define ADF_DH895XCC_ENABLE_AE_ECC_ERR BIT(28)
77#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (1 << 24 | 1 << 12) 77#define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (BIT(24) | BIT(12))
78#define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18) 78#define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18)
79#define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10) 79#define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10)
80#define ADF_DH895XCC_ERRSSMSH_EN (1 << 3) 80#define ADF_DH895XCC_ERRSSMSH_EN BIT(3)
81 81
82/* Admin Messages Registers */ 82/* Admin Messages Registers */
83#define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574) 83#define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574)
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 8ffdb95c9804..9decea2779c6 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -236,7 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
236 } 236 }
237 237
238 accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, 238 accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL,
239 dev_to_node(&pdev->dev)); 239 dev_to_node(&pdev->dev));
240 if (!accel_dev) 240 if (!accel_dev)
241 return -ENOMEM; 241 return -ENOMEM;
242 242
@@ -379,7 +379,7 @@ out_err:
379 return ret; 379 return ret;
380} 380}
381 381
382static void __exit adf_remove(struct pci_dev *pdev) 382static void adf_remove(struct pci_dev *pdev)
383{ 383{
384 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); 384 struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
385 385
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
index fe8f89697ad8..0d03c109c2d3 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c
@@ -73,7 +73,7 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev)
73 if (pci_enable_msix_exact(pci_dev_info->pci_dev, 73 if (pci_enable_msix_exact(pci_dev_info->pci_dev,
74 pci_dev_info->msix_entries.entries, 74 pci_dev_info->msix_entries.entries,
75 msix_num_entries)) { 75 msix_num_entries)) {
76 pr_err("QAT: Failed to enable MSIX IRQ\n"); 76 dev_err(&GET_DEV(accel_dev), "Failed to enable MSIX IRQ\n");
77 return -EFAULT; 77 return -EFAULT;
78 } 78 }
79 return 0; 79 return 0;
@@ -97,7 +97,8 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
97{ 97{
98 struct adf_accel_dev *accel_dev = dev_ptr; 98 struct adf_accel_dev *accel_dev = dev_ptr;
99 99
100 pr_info("QAT: qat_dev%d spurious AE interrupt\n", accel_dev->accel_id); 100 dev_info(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n",
101 accel_dev->accel_id);
101 return IRQ_HANDLED; 102 return IRQ_HANDLED;
102} 103}
103 104
@@ -121,8 +122,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev)
121 ret = request_irq(msixe[i].vector, 122 ret = request_irq(msixe[i].vector,
122 adf_msix_isr_bundle, 0, name, bank); 123 adf_msix_isr_bundle, 0, name, bank);
123 if (ret) { 124 if (ret) {
124 pr_err("QAT: failed to enable irq %d for %s\n", 125 dev_err(&GET_DEV(accel_dev),
125 msixe[i].vector, name); 126 "failed to enable irq %d for %s\n",
127 msixe[i].vector, name);
126 return ret; 128 return ret;
127 } 129 }
128 130
@@ -136,8 +138,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev)
136 "qat%d-ae-cluster", accel_dev->accel_id); 138 "qat%d-ae-cluster", accel_dev->accel_id);
137 ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev); 139 ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev);
138 if (ret) { 140 if (ret) {
139 pr_err("QAT: failed to enable irq %d, for %s\n", 141 dev_err(&GET_DEV(accel_dev),
140 msixe[i].vector, name); 142 "failed to enable irq %d, for %s\n",
143 msixe[i].vector, name);
141 return ret; 144 return ret;
142 } 145 }
143 return ret; 146 return ret;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 290a7f0a681f..6be377f6b9e7 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -479,6 +479,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
479 struct scatterlist *sg; 479 struct scatterlist *sg;
480 int ret; 480 int ret;
481 int i, j; 481 int i, j;
482 int idx = 0;
482 483
483 /* Copy new key if necessary */ 484 /* Copy new key if necessary */
484 if (ctx->flags & FLAGS_NEW_KEY) { 485 if (ctx->flags & FLAGS_NEW_KEY) {
@@ -486,17 +487,20 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
486 ctx->flags &= ~FLAGS_NEW_KEY; 487 ctx->flags &= ~FLAGS_NEW_KEY;
487 488
488 if (dev->flags & FLAGS_CBC) { 489 if (dev->flags & FLAGS_CBC) {
489 dev->hw_desc[0]->len1 = AES_BLOCK_SIZE; 490 dev->hw_desc[idx]->len1 = AES_BLOCK_SIZE;
490 dev->hw_desc[0]->p1 = dev->iv_phys_base; 491 dev->hw_desc[idx]->p1 = dev->iv_phys_base;
491 } else { 492 } else {
492 dev->hw_desc[0]->len1 = 0; 493 dev->hw_desc[idx]->len1 = 0;
493 dev->hw_desc[0]->p1 = 0; 494 dev->hw_desc[idx]->p1 = 0;
494 } 495 }
495 dev->hw_desc[0]->len2 = ctx->keylen; 496 dev->hw_desc[idx]->len2 = ctx->keylen;
496 dev->hw_desc[0]->p2 = dev->key_phys_base; 497 dev->hw_desc[idx]->p2 = dev->key_phys_base;
497 dev->hw_desc[0]->next = dev->hw_phys_desc[1]; 498 dev->hw_desc[idx]->next = dev->hw_phys_desc[1];
499
500 dev->hw_desc[idx]->hdr = sahara_aes_key_hdr(dev);
501
502 idx++;
498 } 503 }
499 dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev);
500 504
501 dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total); 505 dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total);
502 dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total); 506 dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total);
@@ -520,7 +524,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
520 } 524 }
521 525
522 /* Create input links */ 526 /* Create input links */
523 dev->hw_desc[1]->p1 = dev->hw_phys_link[0]; 527 dev->hw_desc[idx]->p1 = dev->hw_phys_link[0];
524 sg = dev->in_sg; 528 sg = dev->in_sg;
525 for (i = 0; i < dev->nb_in_sg; i++) { 529 for (i = 0; i < dev->nb_in_sg; i++) {
526 dev->hw_link[i]->len = sg->length; 530 dev->hw_link[i]->len = sg->length;
@@ -534,7 +538,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
534 } 538 }
535 539
536 /* Create output links */ 540 /* Create output links */
537 dev->hw_desc[1]->p2 = dev->hw_phys_link[i]; 541 dev->hw_desc[idx]->p2 = dev->hw_phys_link[i];
538 sg = dev->out_sg; 542 sg = dev->out_sg;
539 for (j = i; j < dev->nb_out_sg + i; j++) { 543 for (j = i; j < dev->nb_out_sg + i; j++) {
540 dev->hw_link[j]->len = sg->length; 544 dev->hw_link[j]->len = sg->length;
@@ -548,10 +552,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev)
548 } 552 }
549 553
550 /* Fill remaining fields of hw_desc[1] */ 554 /* Fill remaining fields of hw_desc[1] */
551 dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev); 555 dev->hw_desc[idx]->hdr = sahara_aes_data_link_hdr(dev);
552 dev->hw_desc[1]->len1 = dev->total; 556 dev->hw_desc[idx]->len1 = dev->total;
553 dev->hw_desc[1]->len2 = dev->total; 557 dev->hw_desc[idx]->len2 = dev->total;
554 dev->hw_desc[1]->next = 0; 558 dev->hw_desc[idx]->next = 0;
555 559
556 sahara_dump_descriptors(dev); 560 sahara_dump_descriptors(dev);
557 sahara_dump_links(dev); 561 sahara_dump_links(dev);
@@ -576,6 +580,7 @@ static int sahara_aes_process(struct ablkcipher_request *req)
576 struct sahara_ctx *ctx; 580 struct sahara_ctx *ctx;
577 struct sahara_aes_reqctx *rctx; 581 struct sahara_aes_reqctx *rctx;
578 int ret; 582 int ret;
583 unsigned long timeout;
579 584
580 /* Request is ready to be dispatched by the device */ 585 /* Request is ready to be dispatched by the device */
581 dev_dbg(dev->device, 586 dev_dbg(dev->device,
@@ -601,10 +606,12 @@ static int sahara_aes_process(struct ablkcipher_request *req)
601 reinit_completion(&dev->dma_completion); 606 reinit_completion(&dev->dma_completion);
602 607
603 ret = sahara_hw_descriptor_create(dev); 608 ret = sahara_hw_descriptor_create(dev);
609 if (ret)
610 return -EINVAL;
604 611
605 ret = wait_for_completion_timeout(&dev->dma_completion, 612 timeout = wait_for_completion_timeout(&dev->dma_completion,
606 msecs_to_jiffies(SAHARA_TIMEOUT_MS)); 613 msecs_to_jiffies(SAHARA_TIMEOUT_MS));
607 if (!ret) { 614 if (!timeout) {
608 dev_err(dev->device, "AES timeout\n"); 615 dev_err(dev->device, "AES timeout\n");
609 return -ETIMEDOUT; 616 return -ETIMEDOUT;
610 } 617 }
@@ -1044,7 +1051,8 @@ static int sahara_sha_process(struct ahash_request *req)
1044{ 1051{
1045 struct sahara_dev *dev = dev_ptr; 1052 struct sahara_dev *dev = dev_ptr;
1046 struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); 1053 struct sahara_sha_reqctx *rctx = ahash_request_ctx(req);
1047 int ret = -EINPROGRESS; 1054 int ret;
1055 unsigned long timeout;
1048 1056
1049 ret = sahara_sha_prepare_request(req); 1057 ret = sahara_sha_prepare_request(req);
1050 if (!ret) 1058 if (!ret)
@@ -1070,9 +1078,9 @@ static int sahara_sha_process(struct ahash_request *req)
1070 1078
1071 sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); 1079 sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR);
1072 1080
1073 ret = wait_for_completion_timeout(&dev->dma_completion, 1081 timeout = wait_for_completion_timeout(&dev->dma_completion,
1074 msecs_to_jiffies(SAHARA_TIMEOUT_MS)); 1082 msecs_to_jiffies(SAHARA_TIMEOUT_MS));
1075 if (!ret) { 1083 if (!timeout) {
1076 dev_err(dev->device, "SHA timeout\n"); 1084 dev_err(dev->device, "SHA timeout\n");
1077 return -ETIMEDOUT; 1085 return -ETIMEDOUT;
1078 } 1086 }
@@ -1092,15 +1100,20 @@ static int sahara_queue_manage(void *data)
1092{ 1100{
1093 struct sahara_dev *dev = (struct sahara_dev *)data; 1101 struct sahara_dev *dev = (struct sahara_dev *)data;
1094 struct crypto_async_request *async_req; 1102 struct crypto_async_request *async_req;
1103 struct crypto_async_request *backlog;
1095 int ret = 0; 1104 int ret = 0;
1096 1105
1097 do { 1106 do {
1098 __set_current_state(TASK_INTERRUPTIBLE); 1107 __set_current_state(TASK_INTERRUPTIBLE);
1099 1108
1100 mutex_lock(&dev->queue_mutex); 1109 mutex_lock(&dev->queue_mutex);
1110 backlog = crypto_get_backlog(&dev->queue);
1101 async_req = crypto_dequeue_request(&dev->queue); 1111 async_req = crypto_dequeue_request(&dev->queue);
1102 mutex_unlock(&dev->queue_mutex); 1112 mutex_unlock(&dev->queue_mutex);
1103 1113
1114 if (backlog)
1115 backlog->complete(backlog, -EINPROGRESS);
1116
1104 if (async_req) { 1117 if (async_req) {
1105 if (crypto_tfm_alg_type(async_req->tfm) == 1118 if (crypto_tfm_alg_type(async_req->tfm) ==
1106 CRYPTO_ALG_TYPE_AHASH) { 1119 CRYPTO_ALG_TYPE_AHASH) {
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index ebbae8d3ce0d..857414afa29a 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -637,8 +637,6 @@ static void talitos_unregister_rng(struct device *dev)
637#define TALITOS_MAX_KEY_SIZE 96 637#define TALITOS_MAX_KEY_SIZE 96
638#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ 638#define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
639 639
640#define MD5_BLOCK_SIZE 64
641
642struct talitos_ctx { 640struct talitos_ctx {
643 struct device *dev; 641 struct device *dev;
644 int ch; 642 int ch;
@@ -2195,7 +2193,7 @@ static struct talitos_alg_template driver_algs[] = {
2195 .halg.base = { 2193 .halg.base = {
2196 .cra_name = "md5", 2194 .cra_name = "md5",
2197 .cra_driver_name = "md5-talitos", 2195 .cra_driver_name = "md5-talitos",
2198 .cra_blocksize = MD5_BLOCK_SIZE, 2196 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
2199 .cra_flags = CRYPTO_ALG_TYPE_AHASH | 2197 .cra_flags = CRYPTO_ALG_TYPE_AHASH |
2200 CRYPTO_ALG_ASYNC, 2198 CRYPTO_ALG_ASYNC,
2201 } 2199 }
@@ -2285,7 +2283,7 @@ static struct talitos_alg_template driver_algs[] = {
2285 .halg.base = { 2283 .halg.base = {
2286 .cra_name = "hmac(md5)", 2284 .cra_name = "hmac(md5)",
2287 .cra_driver_name = "hmac-md5-talitos", 2285 .cra_driver_name = "hmac-md5-talitos",
2288 .cra_blocksize = MD5_BLOCK_SIZE, 2286 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
2289 .cra_flags = CRYPTO_ALG_TYPE_AHASH | 2287 .cra_flags = CRYPTO_ALG_TYPE_AHASH |
2290 CRYPTO_ALG_ASYNC, 2288 CRYPTO_ALG_ASYNC,
2291 } 2289 }
@@ -2706,20 +2704,16 @@ static int talitos_probe(struct platform_device *ofdev)
2706 goto err_out; 2704 goto err_out;
2707 } 2705 }
2708 2706
2707 priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
2708
2709 for (i = 0; i < priv->num_channels; i++) { 2709 for (i = 0; i < priv->num_channels; i++) {
2710 priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); 2710 priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1);
2711 if (!priv->irq[1] || !(i & 1)) 2711 if (!priv->irq[1] || !(i & 1))
2712 priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; 2712 priv->chan[i].reg += TALITOS_CH_BASE_OFFSET;
2713 }
2714 2713
2715 for (i = 0; i < priv->num_channels; i++) {
2716 spin_lock_init(&priv->chan[i].head_lock); 2714 spin_lock_init(&priv->chan[i].head_lock);
2717 spin_lock_init(&priv->chan[i].tail_lock); 2715 spin_lock_init(&priv->chan[i].tail_lock);
2718 }
2719 2716
2720 priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
2721
2722 for (i = 0; i < priv->num_channels; i++) {
2723 priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) * 2717 priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) *
2724 priv->fifo_len, GFP_KERNEL); 2718 priv->fifo_len, GFP_KERNEL);
2725 if (!priv->chan[i].fifo) { 2719 if (!priv->chan[i].fifo) {
@@ -2727,11 +2721,10 @@ static int talitos_probe(struct platform_device *ofdev)
2727 err = -ENOMEM; 2721 err = -ENOMEM;
2728 goto err_out; 2722 goto err_out;
2729 } 2723 }
2730 }
2731 2724
2732 for (i = 0; i < priv->num_channels; i++)
2733 atomic_set(&priv->chan[i].submit_count, 2725 atomic_set(&priv->chan[i].submit_count,
2734 -(priv->chfifo_len - 1)); 2726 -(priv->chfifo_len - 1));
2727 }
2735 2728
2736 dma_set_mask(dev, DMA_BIT_MASK(36)); 2729 dma_set_mask(dev, DMA_BIT_MASK(36));
2737 2730
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 187a8fd7eee7..5f5f360628fc 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -184,7 +184,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg,
184 direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); 184 direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
185 if (!desc) { 185 if (!desc) {
186 dev_err(ctx->device->dev, 186 dev_err(ctx->device->dev,
187 "%s: device_prep_slave_sg() failed!\n", __func__); 187 "%s: dmaengine_prep_slave_sg() failed!\n", __func__);
188 return -EFAULT; 188 return -EFAULT;
189 } 189 }
190 190
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
new file mode 100644
index 000000000000..771babf16aa0
--- /dev/null
+++ b/drivers/crypto/vmx/Kconfig
@@ -0,0 +1,8 @@
1config CRYPTO_DEV_VMX_ENCRYPT
2 tristate "Encryption acceleration support on P8 CPU"
3 depends on PPC64 && CRYPTO_DEV_VMX
4 default y
5 help
6 Support for VMX cryptographic acceleration instructions on Power8 CPU.
7 This module supports acceleration for AES and GHASH in hardware. If you
8 choose 'M' here, this module will be called vmx-crypto.
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
new file mode 100644
index 000000000000..c699c6e6c82e
--- /dev/null
+++ b/drivers/crypto/vmx/Makefile
@@ -0,0 +1,19 @@
1obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
2vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o
3
4ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
5TARGET := linux-ppc64le
6else
7TARGET := linux-pcc64
8endif
9
10quiet_cmd_perl = PERL $@
11 cmd_perl = $(PERL) $(<) $(TARGET) > $(@)
12
13$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl
14 $(call cmd,perl)
15
16$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl
17 $(call cmd,perl)
18
19.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
new file mode 100644
index 000000000000..ab300ea19434
--- /dev/null
+++ b/drivers/crypto/vmx/aes.c
@@ -0,0 +1,139 @@
1/**
2 * AES routines supporting VMX instructions on the Power 8
3 *
4 * Copyright (C) 2015 International Business Machines Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 only.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
20 */
21
22#include <linux/types.h>
23#include <linux/err.h>
24#include <linux/crypto.h>
25#include <linux/delay.h>
26#include <linux/hardirq.h>
27#include <asm/switch_to.h>
28#include <crypto/aes.h>
29
30#include "aesp8-ppc.h"
31
32struct p8_aes_ctx {
33 struct crypto_cipher *fallback;
34 struct aes_key enc_key;
35 struct aes_key dec_key;
36};
37
38static int p8_aes_init(struct crypto_tfm *tfm)
39{
40 const char *alg;
41 struct crypto_cipher *fallback;
42 struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
43
44 if (!(alg = crypto_tfm_alg_name(tfm))) {
45 printk(KERN_ERR "Failed to get algorithm name.\n");
46 return -ENOENT;
47 }
48
49 fallback = crypto_alloc_cipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
50 if (IS_ERR(fallback)) {
51 printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
52 alg, PTR_ERR(fallback));
53 return PTR_ERR(fallback);
54 }
55 printk(KERN_INFO "Using '%s' as fallback implementation.\n",
56 crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
57
58 crypto_cipher_set_flags(fallback,
59 crypto_cipher_get_flags((struct crypto_cipher *) tfm));
60 ctx->fallback = fallback;
61
62 return 0;
63}
64
65static void p8_aes_exit(struct crypto_tfm *tfm)
66{
67 struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
68
69 if (ctx->fallback) {
70 crypto_free_cipher(ctx->fallback);
71 ctx->fallback = NULL;
72 }
73}
74
75static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
76 unsigned int keylen)
77{
78 int ret;
79 struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
80
81 pagefault_disable();
82 enable_kernel_altivec();
83 ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
84 ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
85 pagefault_enable();
86
87 ret += crypto_cipher_setkey(ctx->fallback, key, keylen);
88 return ret;
89}
90
91static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
92{
93 struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
94
95 if (in_interrupt()) {
96 crypto_cipher_encrypt_one(ctx->fallback, dst, src);
97 } else {
98 pagefault_disable();
99 enable_kernel_altivec();
100 aes_p8_encrypt(src, dst, &ctx->enc_key);
101 pagefault_enable();
102 }
103}
104
105static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
106{
107 struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
108
109 if (in_interrupt()) {
110 crypto_cipher_decrypt_one(ctx->fallback, dst, src);
111 } else {
112 pagefault_disable();
113 enable_kernel_altivec();
114 aes_p8_decrypt(src, dst, &ctx->dec_key);
115 pagefault_enable();
116 }
117}
118
119struct crypto_alg p8_aes_alg = {
120 .cra_name = "aes",
121 .cra_driver_name = "p8_aes",
122 .cra_module = THIS_MODULE,
123 .cra_priority = 1000,
124 .cra_type = NULL,
125 .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK,
126 .cra_alignmask = 0,
127 .cra_blocksize = AES_BLOCK_SIZE,
128 .cra_ctxsize = sizeof(struct p8_aes_ctx),
129 .cra_init = p8_aes_init,
130 .cra_exit = p8_aes_exit,
131 .cra_cipher = {
132 .cia_min_keysize = AES_MIN_KEY_SIZE,
133 .cia_max_keysize = AES_MAX_KEY_SIZE,
134 .cia_setkey = p8_aes_setkey,
135 .cia_encrypt = p8_aes_encrypt,
136 .cia_decrypt = p8_aes_decrypt,
137 },
138};
139
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
new file mode 100644
index 000000000000..1a559b7dddb5
--- /dev/null
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -0,0 +1,184 @@
1/**
2 * AES CBC routines supporting VMX instructions on the Power 8
3 *
4 * Copyright (C) 2015 International Business Machines Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 only.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
20 */
21
22#include <linux/types.h>
23#include <linux/err.h>
24#include <linux/crypto.h>
25#include <linux/delay.h>
26#include <linux/hardirq.h>
27#include <asm/switch_to.h>
28#include <crypto/aes.h>
29#include <crypto/scatterwalk.h>
30
31#include "aesp8-ppc.h"
32
33struct p8_aes_cbc_ctx {
34 struct crypto_blkcipher *fallback;
35 struct aes_key enc_key;
36 struct aes_key dec_key;
37};
38
39static int p8_aes_cbc_init(struct crypto_tfm *tfm)
40{
41 const char *alg;
42 struct crypto_blkcipher *fallback;
43 struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
44
45 if (!(alg = crypto_tfm_alg_name(tfm))) {
46 printk(KERN_ERR "Failed to get algorithm name.\n");
47 return -ENOENT;
48 }
49
50 fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
51 if (IS_ERR(fallback)) {
52 printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
53 alg, PTR_ERR(fallback));
54 return PTR_ERR(fallback);
55 }
56 printk(KERN_INFO "Using '%s' as fallback implementation.\n",
57 crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
58
59 crypto_blkcipher_set_flags(fallback,
60 crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm));
61 ctx->fallback = fallback;
62
63 return 0;
64}
65
66static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
67{
68 struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
69
70 if (ctx->fallback) {
71 crypto_free_blkcipher(ctx->fallback);
72 ctx->fallback = NULL;
73 }
74}
75
76static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
77 unsigned int keylen)
78{
79 int ret;
80 struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
81
82 pagefault_disable();
83 enable_kernel_altivec();
84 ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
85 ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
86 pagefault_enable();
87
88 ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
89 return ret;
90}
91
92static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
93 struct scatterlist *dst, struct scatterlist *src,
94 unsigned int nbytes)
95{
96 int ret;
97 struct blkcipher_walk walk;
98 struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(
99 crypto_blkcipher_tfm(desc->tfm));
100 struct blkcipher_desc fallback_desc = {
101 .tfm = ctx->fallback,
102 .info = desc->info,
103 .flags = desc->flags
104 };
105
106 if (in_interrupt()) {
107 ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes);
108 } else {
109 pagefault_disable();
110 enable_kernel_altivec();
111
112 blkcipher_walk_init(&walk, dst, src, nbytes);
113 ret = blkcipher_walk_virt(desc, &walk);
114 while ((nbytes = walk.nbytes)) {
115 aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
116 nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1);
117 nbytes &= AES_BLOCK_SIZE - 1;
118 ret = blkcipher_walk_done(desc, &walk, nbytes);
119 }
120
121 pagefault_enable();
122 }
123
124 return ret;
125}
126
127static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
128 struct scatterlist *dst, struct scatterlist *src,
129 unsigned int nbytes)
130{
131 int ret;
132 struct blkcipher_walk walk;
133 struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(
134 crypto_blkcipher_tfm(desc->tfm));
135 struct blkcipher_desc fallback_desc = {
136 .tfm = ctx->fallback,
137 .info = desc->info,
138 .flags = desc->flags
139 };
140
141 if (in_interrupt()) {
142 ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
143 } else {
144 pagefault_disable();
145 enable_kernel_altivec();
146
147 blkcipher_walk_init(&walk, dst, src, nbytes);
148 ret = blkcipher_walk_virt(desc, &walk);
149 while ((nbytes = walk.nbytes)) {
150 aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
151 nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0);
152 nbytes &= AES_BLOCK_SIZE - 1;
153 ret = blkcipher_walk_done(desc, &walk, nbytes);
154 }
155
156 pagefault_enable();
157 }
158
159 return ret;
160}
161
162
163struct crypto_alg p8_aes_cbc_alg = {
164 .cra_name = "cbc(aes)",
165 .cra_driver_name = "p8_aes_cbc",
166 .cra_module = THIS_MODULE,
167 .cra_priority = 1000,
168 .cra_type = &crypto_blkcipher_type,
169 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
170 .cra_alignmask = 0,
171 .cra_blocksize = AES_BLOCK_SIZE,
172 .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
173 .cra_init = p8_aes_cbc_init,
174 .cra_exit = p8_aes_cbc_exit,
175 .cra_blkcipher = {
176 .ivsize = 0,
177 .min_keysize = AES_MIN_KEY_SIZE,
178 .max_keysize = AES_MAX_KEY_SIZE,
179 .setkey = p8_aes_cbc_setkey,
180 .encrypt = p8_aes_cbc_encrypt,
181 .decrypt = p8_aes_cbc_decrypt,
182 },
183};
184
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
new file mode 100644
index 000000000000..96dbee4bf4a6
--- /dev/null
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -0,0 +1,167 @@
1/**
2 * AES CTR routines supporting VMX instructions on the Power 8
3 *
4 * Copyright (C) 2015 International Business Machines Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 only.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
20 */
21
22#include <linux/types.h>
23#include <linux/err.h>
24#include <linux/crypto.h>
25#include <linux/delay.h>
26#include <linux/hardirq.h>
27#include <asm/switch_to.h>
28#include <crypto/aes.h>
29#include <crypto/scatterwalk.h>
30#include "aesp8-ppc.h"
31
32struct p8_aes_ctr_ctx {
33 struct crypto_blkcipher *fallback;
34 struct aes_key enc_key;
35};
36
37static int p8_aes_ctr_init(struct crypto_tfm *tfm)
38{
39 const char *alg;
40 struct crypto_blkcipher *fallback;
41 struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
42
43 if (!(alg = crypto_tfm_alg_name(tfm))) {
44 printk(KERN_ERR "Failed to get algorithm name.\n");
45 return -ENOENT;
46 }
47
48 fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
49 if (IS_ERR(fallback)) {
50 printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
51 alg, PTR_ERR(fallback));
52 return PTR_ERR(fallback);
53 }
54 printk(KERN_INFO "Using '%s' as fallback implementation.\n",
55 crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
56
57 crypto_blkcipher_set_flags(fallback,
58 crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm));
59 ctx->fallback = fallback;
60
61 return 0;
62}
63
64static void p8_aes_ctr_exit(struct crypto_tfm *tfm)
65{
66 struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
67
68 if (ctx->fallback) {
69 crypto_free_blkcipher(ctx->fallback);
70 ctx->fallback = NULL;
71 }
72}
73
74static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
75 unsigned int keylen)
76{
77 int ret;
78 struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
79
80 pagefault_disable();
81 enable_kernel_altivec();
82 ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
83 pagefault_enable();
84
85 ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
86 return ret;
87}
88
89static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
90 struct blkcipher_walk *walk)
91{
92 u8 *ctrblk = walk->iv;
93 u8 keystream[AES_BLOCK_SIZE];
94 u8 *src = walk->src.virt.addr;
95 u8 *dst = walk->dst.virt.addr;
96 unsigned int nbytes = walk->nbytes;
97
98 pagefault_disable();
99 enable_kernel_altivec();
100 aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
101 pagefault_enable();
102
103 crypto_xor(keystream, src, nbytes);
104 memcpy(dst, keystream, nbytes);
105 crypto_inc(ctrblk, AES_BLOCK_SIZE);
106}
107
108static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
109 struct scatterlist *dst, struct scatterlist *src,
110 unsigned int nbytes)
111{
112 int ret;
113 struct blkcipher_walk walk;
114 struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(
115 crypto_blkcipher_tfm(desc->tfm));
116 struct blkcipher_desc fallback_desc = {
117 .tfm = ctx->fallback,
118 .info = desc->info,
119 .flags = desc->flags
120 };
121
122 if (in_interrupt()) {
123 ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes);
124 } else {
125 blkcipher_walk_init(&walk, dst, src, nbytes);
126 ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
127 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
128 pagefault_disable();
129 enable_kernel_altivec();
130 aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr,
131 (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv);
132 pagefault_enable();
133
134 crypto_inc(walk.iv, AES_BLOCK_SIZE);
135 nbytes &= AES_BLOCK_SIZE - 1;
136 ret = blkcipher_walk_done(desc, &walk, nbytes);
137 }
138 if (walk.nbytes) {
139 p8_aes_ctr_final(ctx, &walk);
140 ret = blkcipher_walk_done(desc, &walk, 0);
141 }
142 }
143
144 return ret;
145}
146
147struct crypto_alg p8_aes_ctr_alg = {
148 .cra_name = "ctr(aes)",
149 .cra_driver_name = "p8_aes_ctr",
150 .cra_module = THIS_MODULE,
151 .cra_priority = 1000,
152 .cra_type = &crypto_blkcipher_type,
153 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
154 .cra_alignmask = 0,
155 .cra_blocksize = 1,
156 .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
157 .cra_init = p8_aes_ctr_init,
158 .cra_exit = p8_aes_ctr_exit,
159 .cra_blkcipher = {
160 .ivsize = 0,
161 .min_keysize = AES_MIN_KEY_SIZE,
162 .max_keysize = AES_MAX_KEY_SIZE,
163 .setkey = p8_aes_ctr_setkey,
164 .encrypt = p8_aes_ctr_crypt,
165 .decrypt = p8_aes_ctr_crypt,
166 },
167};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
new file mode 100644
index 000000000000..e963945a83e1
--- /dev/null
+++ b/drivers/crypto/vmx/aesp8-ppc.h
@@ -0,0 +1,20 @@
1#include <linux/types.h>
2#include <crypto/aes.h>
3
4#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
5
6struct aes_key {
7 u8 key[AES_MAX_KEYLENGTH];
8 int rounds;
9};
10
11int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
12 struct aes_key *key);
13int aes_p8_set_decrypt_key(const u8 *userKey, const int bits,
14 struct aes_key *key);
15void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key);
16void aes_p8_decrypt(const u8 *in, u8 *out,const struct aes_key *key);
17void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
18 const struct aes_key *key, u8 *iv, const int enc);
19void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
20 size_t len, const struct aes_key *key, const u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
new file mode 100644
index 000000000000..6c5c20c6108e
--- /dev/null
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -0,0 +1,1930 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# This module implements support for AES instructions as per PowerISA
11# specification version 2.07, first implemented by POWER8 processor.
12# The module is endian-agnostic in sense that it supports both big-
13# and little-endian cases. Data alignment in parallelizable modes is
14# handled with VSX loads and stores, which implies MSR.VSX flag being
15# set. It should also be noted that ISA specification doesn't prohibit
16# alignment exceptions for these instructions on page boundaries.
17# Initially alignment was handled in pure AltiVec/VMX way [when data
18# is aligned programmatically, which in turn guarantees exception-
19# free execution], but it turned to hamper performance when vcipher
20# instructions are interleaved. It's reckoned that eventual
21# misalignment penalties at page boundaries are in average lower
22# than additional overhead in pure AltiVec approach.
23
24$flavour = shift;
25
26if ($flavour =~ /64/) {
27 $SIZE_T =8;
28 $LRSAVE =2*$SIZE_T;
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32 $UCMP ="cmpld";
33 $SHL ="sldi";
34} elsif ($flavour =~ /32/) {
35 $SIZE_T =4;
36 $LRSAVE =$SIZE_T;
37 $STU ="stwu";
38 $POP ="lwz";
39 $PUSH ="stw";
40 $UCMP ="cmplw";
41 $SHL ="slwi";
42} else { die "nonsense $flavour"; }
43
44$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
45
46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49die "can't locate ppc-xlate.pl";
50
51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
52
53$FRAME=8*$SIZE_T;
54$prefix="aes_p8";
55
56$sp="r1";
57$vrsave="r12";
58
59#########################################################################
60{{{ # Key setup procedures #
61my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
64
65$code.=<<___;
66.machine "any"
67
68.text
69
70.align 7
71rcon:
72.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
75.long 0,0,0,0 ?asis
76Lconsts:
77 mflr r0
78 bcl 20,31,\$+4
79 mflr $ptr #vvvvv "distance between . and rcon
80 addi $ptr,$ptr,-0x48
81 mtlr r0
82 blr
83 .long 0
84 .byte 0,12,0x14,0,0,0,0,0
85.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
86
87.globl .${prefix}_set_encrypt_key
88Lset_encrypt_key:
89 mflr r11
90 $PUSH r11,$LRSAVE($sp)
91
92 li $ptr,-1
93 ${UCMP}i $inp,0
94 beq- Lenc_key_abort # if ($inp==0) return -1;
95 ${UCMP}i $out,0
96 beq- Lenc_key_abort # if ($out==0) return -1;
97 li $ptr,-2
98 cmpwi $bits,128
99 blt- Lenc_key_abort
100 cmpwi $bits,256
101 bgt- Lenc_key_abort
102 andi. r0,$bits,0x3f
103 bne- Lenc_key_abort
104
105 lis r0,0xfff0
106 mfspr $vrsave,256
107 mtspr 256,r0
108
109 bl Lconsts
110 mtlr r11
111
112 neg r9,$inp
113 lvx $in0,0,$inp
114 addi $inp,$inp,15 # 15 is not typo
115 lvsr $key,0,r9 # borrow $key
116 li r8,0x20
117 cmpwi $bits,192
118 lvx $in1,0,$inp
119 le?vspltisb $mask,0x0f # borrow $mask
120 lvx $rcon,0,$ptr
121 le?vxor $key,$key,$mask # adjust for byte swap
122 lvx $mask,r8,$ptr
123 addi $ptr,$ptr,0x10
124 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
125 li $cnt,8
126 vxor $zero,$zero,$zero
127 mtctr $cnt
128
129 ?lvsr $outperm,0,$out
130 vspltisb $outmask,-1
131 lvx $outhead,0,$out
132 ?vperm $outmask,$zero,$outmask,$outperm
133
134 blt Loop128
135 addi $inp,$inp,8
136 beq L192
137 addi $inp,$inp,8
138 b L256
139
140.align 4
141Loop128:
142 vperm $key,$in0,$in0,$mask # rotate-n-splat
143 vsldoi $tmp,$zero,$in0,12 # >>32
144 vperm $outtail,$in0,$in0,$outperm # rotate
145 vsel $stage,$outhead,$outtail,$outmask
146 vmr $outhead,$outtail
147 vcipherlast $key,$key,$rcon
148 stvx $stage,0,$out
149 addi $out,$out,16
150
151 vxor $in0,$in0,$tmp
152 vsldoi $tmp,$zero,$tmp,12 # >>32
153 vxor $in0,$in0,$tmp
154 vsldoi $tmp,$zero,$tmp,12 # >>32
155 vxor $in0,$in0,$tmp
156 vadduwm $rcon,$rcon,$rcon
157 vxor $in0,$in0,$key
158 bdnz Loop128
159
160 lvx $rcon,0,$ptr # last two round keys
161
162 vperm $key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi $tmp,$zero,$in0,12 # >>32
164 vperm $outtail,$in0,$in0,$outperm # rotate
165 vsel $stage,$outhead,$outtail,$outmask
166 vmr $outhead,$outtail
167 vcipherlast $key,$key,$rcon
168 stvx $stage,0,$out
169 addi $out,$out,16
170
171 vxor $in0,$in0,$tmp
172 vsldoi $tmp,$zero,$tmp,12 # >>32
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vadduwm $rcon,$rcon,$rcon
177 vxor $in0,$in0,$key
178
179 vperm $key,$in0,$in0,$mask # rotate-n-splat
180 vsldoi $tmp,$zero,$in0,12 # >>32
181 vperm $outtail,$in0,$in0,$outperm # rotate
182 vsel $stage,$outhead,$outtail,$outmask
183 vmr $outhead,$outtail
184 vcipherlast $key,$key,$rcon
185 stvx $stage,0,$out
186 addi $out,$out,16
187
188 vxor $in0,$in0,$tmp
189 vsldoi $tmp,$zero,$tmp,12 # >>32
190 vxor $in0,$in0,$tmp
191 vsldoi $tmp,$zero,$tmp,12 # >>32
192 vxor $in0,$in0,$tmp
193 vxor $in0,$in0,$key
194 vperm $outtail,$in0,$in0,$outperm # rotate
195 vsel $stage,$outhead,$outtail,$outmask
196 vmr $outhead,$outtail
197 stvx $stage,0,$out
198
199 addi $inp,$out,15 # 15 is not typo
200 addi $out,$out,0x50
201
202 li $rounds,10
203 b Ldone
204
205.align 4
206L192:
207 lvx $tmp,0,$inp
208 li $cnt,4
209 vperm $outtail,$in0,$in0,$outperm # rotate
210 vsel $stage,$outhead,$outtail,$outmask
211 vmr $outhead,$outtail
212 stvx $stage,0,$out
213 addi $out,$out,16
214 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
215 vspltisb $key,8 # borrow $key
216 mtctr $cnt
217 vsububm $mask,$mask,$key # adjust the mask
218
219Loop192:
220 vperm $key,$in1,$in1,$mask # roate-n-splat
221 vsldoi $tmp,$zero,$in0,12 # >>32
222 vcipherlast $key,$key,$rcon
223
224 vxor $in0,$in0,$tmp
225 vsldoi $tmp,$zero,$tmp,12 # >>32
226 vxor $in0,$in0,$tmp
227 vsldoi $tmp,$zero,$tmp,12 # >>32
228 vxor $in0,$in0,$tmp
229
230 vsldoi $stage,$zero,$in1,8
231 vspltw $tmp,$in0,3
232 vxor $tmp,$tmp,$in1
233 vsldoi $in1,$zero,$in1,12 # >>32
234 vadduwm $rcon,$rcon,$rcon
235 vxor $in1,$in1,$tmp
236 vxor $in0,$in0,$key
237 vxor $in1,$in1,$key
238 vsldoi $stage,$stage,$in0,8
239
240 vperm $key,$in1,$in1,$mask # rotate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vperm $outtail,$stage,$stage,$outperm # rotate
243 vsel $stage,$outhead,$outtail,$outmask
244 vmr $outhead,$outtail
245 vcipherlast $key,$key,$rcon
246 stvx $stage,0,$out
247 addi $out,$out,16
248
249 vsldoi $stage,$in0,$in1,8
250 vxor $in0,$in0,$tmp
251 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vperm $outtail,$stage,$stage,$outperm # rotate
253 vsel $stage,$outhead,$outtail,$outmask
254 vmr $outhead,$outtail
255 vxor $in0,$in0,$tmp
256 vsldoi $tmp,$zero,$tmp,12 # >>32
257 vxor $in0,$in0,$tmp
258 stvx $stage,0,$out
259 addi $out,$out,16
260
261 vspltw $tmp,$in0,3
262 vxor $tmp,$tmp,$in1
263 vsldoi $in1,$zero,$in1,12 # >>32
264 vadduwm $rcon,$rcon,$rcon
265 vxor $in1,$in1,$tmp
266 vxor $in0,$in0,$key
267 vxor $in1,$in1,$key
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
271 stvx $stage,0,$out
272 addi $inp,$out,15 # 15 is not typo
273 addi $out,$out,16
274 bdnz Loop192
275
276 li $rounds,12
277 addi $out,$out,0x20
278 b Ldone
279
280.align 4
281L256:
282 lvx $tmp,0,$inp
283 li $cnt,7
284 li $rounds,14
285 vperm $outtail,$in0,$in0,$outperm # rotate
286 vsel $stage,$outhead,$outtail,$outmask
287 vmr $outhead,$outtail
288 stvx $stage,0,$out
289 addi $out,$out,16
290 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
291 mtctr $cnt
292
293Loop256:
294 vperm $key,$in1,$in1,$mask # rotate-n-splat
295 vsldoi $tmp,$zero,$in0,12 # >>32
296 vperm $outtail,$in1,$in1,$outperm # rotate
297 vsel $stage,$outhead,$outtail,$outmask
298 vmr $outhead,$outtail
299 vcipherlast $key,$key,$rcon
300 stvx $stage,0,$out
301 addi $out,$out,16
302
303 vxor $in0,$in0,$tmp
304 vsldoi $tmp,$zero,$tmp,12 # >>32
305 vxor $in0,$in0,$tmp
306 vsldoi $tmp,$zero,$tmp,12 # >>32
307 vxor $in0,$in0,$tmp
308 vadduwm $rcon,$rcon,$rcon
309 vxor $in0,$in0,$key
310 vperm $outtail,$in0,$in0,$outperm # rotate
311 vsel $stage,$outhead,$outtail,$outmask
312 vmr $outhead,$outtail
313 stvx $stage,0,$out
314 addi $inp,$out,15 # 15 is not typo
315 addi $out,$out,16
316 bdz Ldone
317
318 vspltw $key,$in0,3 # just splat
319 vsldoi $tmp,$zero,$in1,12 # >>32
320 vsbox $key,$key
321
322 vxor $in1,$in1,$tmp
323 vsldoi $tmp,$zero,$tmp,12 # >>32
324 vxor $in1,$in1,$tmp
325 vsldoi $tmp,$zero,$tmp,12 # >>32
326 vxor $in1,$in1,$tmp
327
328 vxor $in1,$in1,$key
329 b Loop256
330
331.align 4
332Ldone:
333 lvx $in1,0,$inp # redundant in aligned case
334 vsel $in1,$outhead,$in1,$outmask
335 stvx $in1,0,$inp
336 li $ptr,0
337 mtspr 256,$vrsave
338 stw $rounds,0($out)
339
340Lenc_key_abort:
341 mr r3,$ptr
342 blr
343 .long 0
344 .byte 0,12,0x14,1,0,0,3,0
345 .long 0
346.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
347
348.globl .${prefix}_set_decrypt_key
349 $STU $sp,-$FRAME($sp)
350 mflr r10
351 $PUSH r10,$FRAME+$LRSAVE($sp)
352 bl Lset_encrypt_key
353 mtlr r10
354
355 cmpwi r3,0
356 bne- Ldec_key_abort
357
358 slwi $cnt,$rounds,4
359 subi $inp,$out,240 # first round key
360 srwi $rounds,$rounds,1
361 add $out,$inp,$cnt # last round key
362 mtctr $rounds
363
364Ldeckey:
365 lwz r0, 0($inp)
366 lwz r6, 4($inp)
367 lwz r7, 8($inp)
368 lwz r8, 12($inp)
369 addi $inp,$inp,16
370 lwz r9, 0($out)
371 lwz r10,4($out)
372 lwz r11,8($out)
373 lwz r12,12($out)
374 stw r0, 0($out)
375 stw r6, 4($out)
376 stw r7, 8($out)
377 stw r8, 12($out)
378 subi $out,$out,16
379 stw r9, -16($inp)
380 stw r10,-12($inp)
381 stw r11,-8($inp)
382 stw r12,-4($inp)
383 bdnz Ldeckey
384
385 xor r3,r3,r3 # return value
386Ldec_key_abort:
387 addi $sp,$sp,$FRAME
388 blr
389 .long 0
390 .byte 0,12,4,1,0x80,0,3,0
391 .long 0
392.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
393___
394}}}
395#########################################################################
396{{{ # Single block en- and decrypt procedures #
397sub gen_block () {
398my $dir = shift;
399my $n = $dir eq "de" ? "n" : "";
400my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
401
402$code.=<<___;
403.globl .${prefix}_${dir}crypt
404 lwz $rounds,240($key)
405 lis r0,0xfc00
406 mfspr $vrsave,256
407 li $idx,15 # 15 is not typo
408 mtspr 256,r0
409
410 lvx v0,0,$inp
411 neg r11,$out
412 lvx v1,$idx,$inp
413 lvsl v2,0,$inp # inpperm
414 le?vspltisb v4,0x0f
415 ?lvsl v3,0,r11 # outperm
416 le?vxor v2,v2,v4
417 li $idx,16
418 vperm v0,v0,v1,v2 # align [and byte swap in LE]
419 lvx v1,0,$key
420 ?lvsl v5,0,$key # keyperm
421 srwi $rounds,$rounds,1
422 lvx v2,$idx,$key
423 addi $idx,$idx,16
424 subi $rounds,$rounds,1
425 ?vperm v1,v1,v2,v5 # align round key
426
427 vxor v0,v0,v1
428 lvx v1,$idx,$key
429 addi $idx,$idx,16
430 mtctr $rounds
431
432Loop_${dir}c:
433 ?vperm v2,v2,v1,v5
434 v${n}cipher v0,v0,v2
435 lvx v2,$idx,$key
436 addi $idx,$idx,16
437 ?vperm v1,v1,v2,v5
438 v${n}cipher v0,v0,v1
439 lvx v1,$idx,$key
440 addi $idx,$idx,16
441 bdnz Loop_${dir}c
442
443 ?vperm v2,v2,v1,v5
444 v${n}cipher v0,v0,v2
445 lvx v2,$idx,$key
446 ?vperm v1,v1,v2,v5
447 v${n}cipherlast v0,v0,v1
448
449 vspltisb v2,-1
450 vxor v1,v1,v1
451 li $idx,15 # 15 is not typo
452 ?vperm v2,v1,v2,v3 # outmask
453 le?vxor v3,v3,v4
454 lvx v1,0,$out # outhead
455 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
456 vsel v1,v1,v0,v2
457 lvx v4,$idx,$out
458 stvx v1,0,$out
459 vsel v0,v0,v4,v2
460 stvx v0,$idx,$out
461
462 mtspr 256,$vrsave
463 blr
464 .long 0
465 .byte 0,12,0x14,0,0,0,3,0
466 .long 0
467.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
468___
469}
470&gen_block("en");
471&gen_block("de");
472}}}
473#########################################################################
474{{{ # CBC en- and decrypt procedures #
475my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
476my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
477my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
478 map("v$_",(4..10));
479$code.=<<___;
480.globl .${prefix}_cbc_encrypt
481 ${UCMP}i $len,16
482 bltlr-
483
484 cmpwi $enc,0 # test direction
485 lis r0,0xffe0
486 mfspr $vrsave,256
487 mtspr 256,r0
488
489 li $idx,15
490 vxor $rndkey0,$rndkey0,$rndkey0
491 le?vspltisb $tmp,0x0f
492
493 lvx $ivec,0,$ivp # load [unaligned] iv
494 lvsl $inpperm,0,$ivp
495 lvx $inptail,$idx,$ivp
496 le?vxor $inpperm,$inpperm,$tmp
497 vperm $ivec,$ivec,$inptail,$inpperm
498
499 neg r11,$inp
500 ?lvsl $keyperm,0,$key # prepare for unaligned key
501 lwz $rounds,240($key)
502
503 lvsr $inpperm,0,r11 # prepare for unaligned load
504 lvx $inptail,0,$inp
505 addi $inp,$inp,15 # 15 is not typo
506 le?vxor $inpperm,$inpperm,$tmp
507
508 ?lvsr $outperm,0,$out # prepare for unaligned store
509 vspltisb $outmask,-1
510 lvx $outhead,0,$out
511 ?vperm $outmask,$rndkey0,$outmask,$outperm
512 le?vxor $outperm,$outperm,$tmp
513
514 srwi $rounds,$rounds,1
515 li $idx,16
516 subi $rounds,$rounds,1
517 beq Lcbc_dec
518
519Lcbc_enc:
520 vmr $inout,$inptail
521 lvx $inptail,0,$inp
522 addi $inp,$inp,16
523 mtctr $rounds
524 subi $len,$len,16 # len-=16
525
526 lvx $rndkey0,0,$key
527 vperm $inout,$inout,$inptail,$inpperm
528 lvx $rndkey1,$idx,$key
529 addi $idx,$idx,16
530 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
531 vxor $inout,$inout,$rndkey0
532 lvx $rndkey0,$idx,$key
533 addi $idx,$idx,16
534 vxor $inout,$inout,$ivec
535
536Loop_cbc_enc:
537 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
538 vcipher $inout,$inout,$rndkey1
539 lvx $rndkey1,$idx,$key
540 addi $idx,$idx,16
541 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
542 vcipher $inout,$inout,$rndkey0
543 lvx $rndkey0,$idx,$key
544 addi $idx,$idx,16
545 bdnz Loop_cbc_enc
546
547 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
548 vcipher $inout,$inout,$rndkey1
549 lvx $rndkey1,$idx,$key
550 li $idx,16
551 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
552 vcipherlast $ivec,$inout,$rndkey0
553 ${UCMP}i $len,16
554
555 vperm $tmp,$ivec,$ivec,$outperm
556 vsel $inout,$outhead,$tmp,$outmask
557 vmr $outhead,$tmp
558 stvx $inout,0,$out
559 addi $out,$out,16
560 bge Lcbc_enc
561
562 b Lcbc_done
563
564.align 4
565Lcbc_dec:
566 ${UCMP}i $len,128
567 bge _aesp8_cbc_decrypt8x
568 vmr $tmp,$inptail
569 lvx $inptail,0,$inp
570 addi $inp,$inp,16
571 mtctr $rounds
572 subi $len,$len,16 # len-=16
573
574 lvx $rndkey0,0,$key
575 vperm $tmp,$tmp,$inptail,$inpperm
576 lvx $rndkey1,$idx,$key
577 addi $idx,$idx,16
578 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
579 vxor $inout,$tmp,$rndkey0
580 lvx $rndkey0,$idx,$key
581 addi $idx,$idx,16
582
583Loop_cbc_dec:
584 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
585 vncipher $inout,$inout,$rndkey1
586 lvx $rndkey1,$idx,$key
587 addi $idx,$idx,16
588 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
589 vncipher $inout,$inout,$rndkey0
590 lvx $rndkey0,$idx,$key
591 addi $idx,$idx,16
592 bdnz Loop_cbc_dec
593
594 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
595 vncipher $inout,$inout,$rndkey1
596 lvx $rndkey1,$idx,$key
597 li $idx,16
598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
599 vncipherlast $inout,$inout,$rndkey0
600 ${UCMP}i $len,16
601
602 vxor $inout,$inout,$ivec
603 vmr $ivec,$tmp
604 vperm $tmp,$inout,$inout,$outperm
605 vsel $inout,$outhead,$tmp,$outmask
606 vmr $outhead,$tmp
607 stvx $inout,0,$out
608 addi $out,$out,16
609 bge Lcbc_dec
610
611Lcbc_done:
612 addi $out,$out,-1
613 lvx $inout,0,$out # redundant in aligned case
614 vsel $inout,$outhead,$inout,$outmask
615 stvx $inout,0,$out
616
617 neg $enc,$ivp # write [unaligned] iv
618 li $idx,15 # 15 is not typo
619 vxor $rndkey0,$rndkey0,$rndkey0
620 vspltisb $outmask,-1
621 le?vspltisb $tmp,0x0f
622 ?lvsl $outperm,0,$enc
623 ?vperm $outmask,$rndkey0,$outmask,$outperm
624 le?vxor $outperm,$outperm,$tmp
625 lvx $outhead,0,$ivp
626 vperm $ivec,$ivec,$ivec,$outperm
627 vsel $inout,$outhead,$ivec,$outmask
628 lvx $inptail,$idx,$ivp
629 stvx $inout,0,$ivp
630 vsel $inout,$ivec,$inptail,$outmask
631 stvx $inout,$idx,$ivp
632
633 mtspr 256,$vrsave
634 blr
635 .long 0
636 .byte 0,12,0x14,0,0,0,6,0
637 .long 0
638___
639#########################################################################
640{{ # Optimized CBC decrypt procedure #
641my $key_="r11";
642my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
643my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
644my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
645my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
646 # v26-v31 last 6 round keys
647my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
648
649$code.=<<___;
650.align 5
651_aesp8_cbc_decrypt8x:
652 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
653 li r10,`$FRAME+8*16+15`
654 li r11,`$FRAME+8*16+31`
655 stvx v20,r10,$sp # ABI says so
656 addi r10,r10,32
657 stvx v21,r11,$sp
658 addi r11,r11,32
659 stvx v22,r10,$sp
660 addi r10,r10,32
661 stvx v23,r11,$sp
662 addi r11,r11,32
663 stvx v24,r10,$sp
664 addi r10,r10,32
665 stvx v25,r11,$sp
666 addi r11,r11,32
667 stvx v26,r10,$sp
668 addi r10,r10,32
669 stvx v27,r11,$sp
670 addi r11,r11,32
671 stvx v28,r10,$sp
672 addi r10,r10,32
673 stvx v29,r11,$sp
674 addi r11,r11,32
675 stvx v30,r10,$sp
676 stvx v31,r11,$sp
677 li r0,-1
678 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
679 li $x10,0x10
680 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
681 li $x20,0x20
682 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
683 li $x30,0x30
684 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
685 li $x40,0x40
686 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
687 li $x50,0x50
688 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
689 li $x60,0x60
690 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
691 li $x70,0x70
692 mtspr 256,r0
693
694 subi $rounds,$rounds,3 # -4 in total
695 subi $len,$len,128 # bias
696
697 lvx $rndkey0,$x00,$key # load key schedule
698 lvx v30,$x10,$key
699 addi $key,$key,0x20
700 lvx v31,$x00,$key
701 ?vperm $rndkey0,$rndkey0,v30,$keyperm
702 addi $key_,$sp,$FRAME+15
703 mtctr $rounds
704
705Load_cbc_dec_key:
706 ?vperm v24,v30,v31,$keyperm
707 lvx v30,$x10,$key
708 addi $key,$key,0x20
709 stvx v24,$x00,$key_ # off-load round[1]
710 ?vperm v25,v31,v30,$keyperm
711 lvx v31,$x00,$key
712 stvx v25,$x10,$key_ # off-load round[2]
713 addi $key_,$key_,0x20
714 bdnz Load_cbc_dec_key
715
716 lvx v26,$x10,$key
717 ?vperm v24,v30,v31,$keyperm
718 lvx v27,$x20,$key
719 stvx v24,$x00,$key_ # off-load round[3]
720 ?vperm v25,v31,v26,$keyperm
721 lvx v28,$x30,$key
722 stvx v25,$x10,$key_ # off-load round[4]
723 addi $key_,$sp,$FRAME+15 # rewind $key_
724 ?vperm v26,v26,v27,$keyperm
725 lvx v29,$x40,$key
726 ?vperm v27,v27,v28,$keyperm
727 lvx v30,$x50,$key
728 ?vperm v28,v28,v29,$keyperm
729 lvx v31,$x60,$key
730 ?vperm v29,v29,v30,$keyperm
731 lvx $out0,$x70,$key # borrow $out0
732 ?vperm v30,v30,v31,$keyperm
733 lvx v24,$x00,$key_ # pre-load round[1]
734 ?vperm v31,v31,$out0,$keyperm
735 lvx v25,$x10,$key_ # pre-load round[2]
736
737 #lvx $inptail,0,$inp # "caller" already did this
738 #addi $inp,$inp,15 # 15 is not typo
739 subi $inp,$inp,15 # undo "caller"
740
741 le?li $idx,8
742 lvx_u $in0,$x00,$inp # load first 8 "words"
743 le?lvsl $inpperm,0,$idx
744 le?vspltisb $tmp,0x0f
745 lvx_u $in1,$x10,$inp
746 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
747 lvx_u $in2,$x20,$inp
748 le?vperm $in0,$in0,$in0,$inpperm
749 lvx_u $in3,$x30,$inp
750 le?vperm $in1,$in1,$in1,$inpperm
751 lvx_u $in4,$x40,$inp
752 le?vperm $in2,$in2,$in2,$inpperm
753 vxor $out0,$in0,$rndkey0
754 lvx_u $in5,$x50,$inp
755 le?vperm $in3,$in3,$in3,$inpperm
756 vxor $out1,$in1,$rndkey0
757 lvx_u $in6,$x60,$inp
758 le?vperm $in4,$in4,$in4,$inpperm
759 vxor $out2,$in2,$rndkey0
760 lvx_u $in7,$x70,$inp
761 addi $inp,$inp,0x80
762 le?vperm $in5,$in5,$in5,$inpperm
763 vxor $out3,$in3,$rndkey0
764 le?vperm $in6,$in6,$in6,$inpperm
765 vxor $out4,$in4,$rndkey0
766 le?vperm $in7,$in7,$in7,$inpperm
767 vxor $out5,$in5,$rndkey0
768 vxor $out6,$in6,$rndkey0
769 vxor $out7,$in7,$rndkey0
770
771 mtctr $rounds
772 b Loop_cbc_dec8x
773.align 5
774Loop_cbc_dec8x:
775 vncipher $out0,$out0,v24
776 vncipher $out1,$out1,v24
777 vncipher $out2,$out2,v24
778 vncipher $out3,$out3,v24
779 vncipher $out4,$out4,v24
780 vncipher $out5,$out5,v24
781 vncipher $out6,$out6,v24
782 vncipher $out7,$out7,v24
783 lvx v24,$x20,$key_ # round[3]
784 addi $key_,$key_,0x20
785
786 vncipher $out0,$out0,v25
787 vncipher $out1,$out1,v25
788 vncipher $out2,$out2,v25
789 vncipher $out3,$out3,v25
790 vncipher $out4,$out4,v25
791 vncipher $out5,$out5,v25
792 vncipher $out6,$out6,v25
793 vncipher $out7,$out7,v25
794 lvx v25,$x10,$key_ # round[4]
795 bdnz Loop_cbc_dec8x
796
797 subic $len,$len,128 # $len-=128
798 vncipher $out0,$out0,v24
799 vncipher $out1,$out1,v24
800 vncipher $out2,$out2,v24
801 vncipher $out3,$out3,v24
802 vncipher $out4,$out4,v24
803 vncipher $out5,$out5,v24
804 vncipher $out6,$out6,v24
805 vncipher $out7,$out7,v24
806
807 subfe. r0,r0,r0 # borrow?-1:0
808 vncipher $out0,$out0,v25
809 vncipher $out1,$out1,v25
810 vncipher $out2,$out2,v25
811 vncipher $out3,$out3,v25
812 vncipher $out4,$out4,v25
813 vncipher $out5,$out5,v25
814 vncipher $out6,$out6,v25
815 vncipher $out7,$out7,v25
816
817 and r0,r0,$len
818 vncipher $out0,$out0,v26
819 vncipher $out1,$out1,v26
820 vncipher $out2,$out2,v26
821 vncipher $out3,$out3,v26
822 vncipher $out4,$out4,v26
823 vncipher $out5,$out5,v26
824 vncipher $out6,$out6,v26
825 vncipher $out7,$out7,v26
826
827 add $inp,$inp,r0 # $inp is adjusted in such
828 # way that at exit from the
829 # loop inX-in7 are loaded
830 # with last "words"
831 vncipher $out0,$out0,v27
832 vncipher $out1,$out1,v27
833 vncipher $out2,$out2,v27
834 vncipher $out3,$out3,v27
835 vncipher $out4,$out4,v27
836 vncipher $out5,$out5,v27
837 vncipher $out6,$out6,v27
838 vncipher $out7,$out7,v27
839
840 addi $key_,$sp,$FRAME+15 # rewind $key_
841 vncipher $out0,$out0,v28
842 vncipher $out1,$out1,v28
843 vncipher $out2,$out2,v28
844 vncipher $out3,$out3,v28
845 vncipher $out4,$out4,v28
846 vncipher $out5,$out5,v28
847 vncipher $out6,$out6,v28
848 vncipher $out7,$out7,v28
849 lvx v24,$x00,$key_ # re-pre-load round[1]
850
851 vncipher $out0,$out0,v29
852 vncipher $out1,$out1,v29
853 vncipher $out2,$out2,v29
854 vncipher $out3,$out3,v29
855 vncipher $out4,$out4,v29
856 vncipher $out5,$out5,v29
857 vncipher $out6,$out6,v29
858 vncipher $out7,$out7,v29
859 lvx v25,$x10,$key_ # re-pre-load round[2]
860
861 vncipher $out0,$out0,v30
862 vxor $ivec,$ivec,v31 # xor with last round key
863 vncipher $out1,$out1,v30
864 vxor $in0,$in0,v31
865 vncipher $out2,$out2,v30
866 vxor $in1,$in1,v31
867 vncipher $out3,$out3,v30
868 vxor $in2,$in2,v31
869 vncipher $out4,$out4,v30
870 vxor $in3,$in3,v31
871 vncipher $out5,$out5,v30
872 vxor $in4,$in4,v31
873 vncipher $out6,$out6,v30
874 vxor $in5,$in5,v31
875 vncipher $out7,$out7,v30
876 vxor $in6,$in6,v31
877
878 vncipherlast $out0,$out0,$ivec
879 vncipherlast $out1,$out1,$in0
880 lvx_u $in0,$x00,$inp # load next input block
881 vncipherlast $out2,$out2,$in1
882 lvx_u $in1,$x10,$inp
883 vncipherlast $out3,$out3,$in2
884 le?vperm $in0,$in0,$in0,$inpperm
885 lvx_u $in2,$x20,$inp
886 vncipherlast $out4,$out4,$in3
887 le?vperm $in1,$in1,$in1,$inpperm
888 lvx_u $in3,$x30,$inp
889 vncipherlast $out5,$out5,$in4
890 le?vperm $in2,$in2,$in2,$inpperm
891 lvx_u $in4,$x40,$inp
892 vncipherlast $out6,$out6,$in5
893 le?vperm $in3,$in3,$in3,$inpperm
894 lvx_u $in5,$x50,$inp
895 vncipherlast $out7,$out7,$in6
896 le?vperm $in4,$in4,$in4,$inpperm
897 lvx_u $in6,$x60,$inp
898 vmr $ivec,$in7
899 le?vperm $in5,$in5,$in5,$inpperm
900 lvx_u $in7,$x70,$inp
901 addi $inp,$inp,0x80
902
903 le?vperm $out0,$out0,$out0,$inpperm
904 le?vperm $out1,$out1,$out1,$inpperm
905 stvx_u $out0,$x00,$out
906 le?vperm $in6,$in6,$in6,$inpperm
907 vxor $out0,$in0,$rndkey0
908 le?vperm $out2,$out2,$out2,$inpperm
909 stvx_u $out1,$x10,$out
910 le?vperm $in7,$in7,$in7,$inpperm
911 vxor $out1,$in1,$rndkey0
912 le?vperm $out3,$out3,$out3,$inpperm
913 stvx_u $out2,$x20,$out
914 vxor $out2,$in2,$rndkey0
915 le?vperm $out4,$out4,$out4,$inpperm
916 stvx_u $out3,$x30,$out
917 vxor $out3,$in3,$rndkey0
918 le?vperm $out5,$out5,$out5,$inpperm
919 stvx_u $out4,$x40,$out
920 vxor $out4,$in4,$rndkey0
921 le?vperm $out6,$out6,$out6,$inpperm
922 stvx_u $out5,$x50,$out
923 vxor $out5,$in5,$rndkey0
924 le?vperm $out7,$out7,$out7,$inpperm
925 stvx_u $out6,$x60,$out
926 vxor $out6,$in6,$rndkey0
927 stvx_u $out7,$x70,$out
928 addi $out,$out,0x80
929 vxor $out7,$in7,$rndkey0
930
931 mtctr $rounds
932 beq Loop_cbc_dec8x # did $len-=128 borrow?
933
934 addic. $len,$len,128
935 beq Lcbc_dec8x_done
936 nop
937 nop
938
939Loop_cbc_dec8x_tail: # up to 7 "words" tail...
940 vncipher $out1,$out1,v24
941 vncipher $out2,$out2,v24
942 vncipher $out3,$out3,v24
943 vncipher $out4,$out4,v24
944 vncipher $out5,$out5,v24
945 vncipher $out6,$out6,v24
946 vncipher $out7,$out7,v24
947 lvx v24,$x20,$key_ # round[3]
948 addi $key_,$key_,0x20
949
950 vncipher $out1,$out1,v25
951 vncipher $out2,$out2,v25
952 vncipher $out3,$out3,v25
953 vncipher $out4,$out4,v25
954 vncipher $out5,$out5,v25
955 vncipher $out6,$out6,v25
956 vncipher $out7,$out7,v25
957 lvx v25,$x10,$key_ # round[4]
958 bdnz Loop_cbc_dec8x_tail
959
960 vncipher $out1,$out1,v24
961 vncipher $out2,$out2,v24
962 vncipher $out3,$out3,v24
963 vncipher $out4,$out4,v24
964 vncipher $out5,$out5,v24
965 vncipher $out6,$out6,v24
966 vncipher $out7,$out7,v24
967
968 vncipher $out1,$out1,v25
969 vncipher $out2,$out2,v25
970 vncipher $out3,$out3,v25
971 vncipher $out4,$out4,v25
972 vncipher $out5,$out5,v25
973 vncipher $out6,$out6,v25
974 vncipher $out7,$out7,v25
975
976 vncipher $out1,$out1,v26
977 vncipher $out2,$out2,v26
978 vncipher $out3,$out3,v26
979 vncipher $out4,$out4,v26
980 vncipher $out5,$out5,v26
981 vncipher $out6,$out6,v26
982 vncipher $out7,$out7,v26
983
984 vncipher $out1,$out1,v27
985 vncipher $out2,$out2,v27
986 vncipher $out3,$out3,v27
987 vncipher $out4,$out4,v27
988 vncipher $out5,$out5,v27
989 vncipher $out6,$out6,v27
990 vncipher $out7,$out7,v27
991
992 vncipher $out1,$out1,v28
993 vncipher $out2,$out2,v28
994 vncipher $out3,$out3,v28
995 vncipher $out4,$out4,v28
996 vncipher $out5,$out5,v28
997 vncipher $out6,$out6,v28
998 vncipher $out7,$out7,v28
999
1000 vncipher $out1,$out1,v29
1001 vncipher $out2,$out2,v29
1002 vncipher $out3,$out3,v29
1003 vncipher $out4,$out4,v29
1004 vncipher $out5,$out5,v29
1005 vncipher $out6,$out6,v29
1006 vncipher $out7,$out7,v29
1007
1008 vncipher $out1,$out1,v30
1009 vxor $ivec,$ivec,v31 # last round key
1010 vncipher $out2,$out2,v30
1011 vxor $in1,$in1,v31
1012 vncipher $out3,$out3,v30
1013 vxor $in2,$in2,v31
1014 vncipher $out4,$out4,v30
1015 vxor $in3,$in3,v31
1016 vncipher $out5,$out5,v30
1017 vxor $in4,$in4,v31
1018 vncipher $out6,$out6,v30
1019 vxor $in5,$in5,v31
1020 vncipher $out7,$out7,v30
1021 vxor $in6,$in6,v31
1022
1023 cmplwi $len,32 # switch($len)
1024 blt Lcbc_dec8x_one
1025 nop
1026 beq Lcbc_dec8x_two
1027 cmplwi $len,64
1028 blt Lcbc_dec8x_three
1029 nop
1030 beq Lcbc_dec8x_four
1031 cmplwi $len,96
1032 blt Lcbc_dec8x_five
1033 nop
1034 beq Lcbc_dec8x_six
1035
1036Lcbc_dec8x_seven:
1037 vncipherlast $out1,$out1,$ivec
1038 vncipherlast $out2,$out2,$in1
1039 vncipherlast $out3,$out3,$in2
1040 vncipherlast $out4,$out4,$in3
1041 vncipherlast $out5,$out5,$in4
1042 vncipherlast $out6,$out6,$in5
1043 vncipherlast $out7,$out7,$in6
1044 vmr $ivec,$in7
1045
1046 le?vperm $out1,$out1,$out1,$inpperm
1047 le?vperm $out2,$out2,$out2,$inpperm
1048 stvx_u $out1,$x00,$out
1049 le?vperm $out3,$out3,$out3,$inpperm
1050 stvx_u $out2,$x10,$out
1051 le?vperm $out4,$out4,$out4,$inpperm
1052 stvx_u $out3,$x20,$out
1053 le?vperm $out5,$out5,$out5,$inpperm
1054 stvx_u $out4,$x30,$out
1055 le?vperm $out6,$out6,$out6,$inpperm
1056 stvx_u $out5,$x40,$out
1057 le?vperm $out7,$out7,$out7,$inpperm
1058 stvx_u $out6,$x50,$out
1059 stvx_u $out7,$x60,$out
1060 addi $out,$out,0x70
1061 b Lcbc_dec8x_done
1062
1063.align 5
1064Lcbc_dec8x_six:
1065 vncipherlast $out2,$out2,$ivec
1066 vncipherlast $out3,$out3,$in2
1067 vncipherlast $out4,$out4,$in3
1068 vncipherlast $out5,$out5,$in4
1069 vncipherlast $out6,$out6,$in5
1070 vncipherlast $out7,$out7,$in6
1071 vmr $ivec,$in7
1072
1073 le?vperm $out2,$out2,$out2,$inpperm
1074 le?vperm $out3,$out3,$out3,$inpperm
1075 stvx_u $out2,$x00,$out
1076 le?vperm $out4,$out4,$out4,$inpperm
1077 stvx_u $out3,$x10,$out
1078 le?vperm $out5,$out5,$out5,$inpperm
1079 stvx_u $out4,$x20,$out
1080 le?vperm $out6,$out6,$out6,$inpperm
1081 stvx_u $out5,$x30,$out
1082 le?vperm $out7,$out7,$out7,$inpperm
1083 stvx_u $out6,$x40,$out
1084 stvx_u $out7,$x50,$out
1085 addi $out,$out,0x60
1086 b Lcbc_dec8x_done
1087
1088.align 5
1089Lcbc_dec8x_five:
1090 vncipherlast $out3,$out3,$ivec
1091 vncipherlast $out4,$out4,$in3
1092 vncipherlast $out5,$out5,$in4
1093 vncipherlast $out6,$out6,$in5
1094 vncipherlast $out7,$out7,$in6
1095 vmr $ivec,$in7
1096
1097 le?vperm $out3,$out3,$out3,$inpperm
1098 le?vperm $out4,$out4,$out4,$inpperm
1099 stvx_u $out3,$x00,$out
1100 le?vperm $out5,$out5,$out5,$inpperm
1101 stvx_u $out4,$x10,$out
1102 le?vperm $out6,$out6,$out6,$inpperm
1103 stvx_u $out5,$x20,$out
1104 le?vperm $out7,$out7,$out7,$inpperm
1105 stvx_u $out6,$x30,$out
1106 stvx_u $out7,$x40,$out
1107 addi $out,$out,0x50
1108 b Lcbc_dec8x_done
1109
1110.align 5
1111Lcbc_dec8x_four:
1112 vncipherlast $out4,$out4,$ivec
1113 vncipherlast $out5,$out5,$in4
1114 vncipherlast $out6,$out6,$in5
1115 vncipherlast $out7,$out7,$in6
1116 vmr $ivec,$in7
1117
1118 le?vperm $out4,$out4,$out4,$inpperm
1119 le?vperm $out5,$out5,$out5,$inpperm
1120 stvx_u $out4,$x00,$out
1121 le?vperm $out6,$out6,$out6,$inpperm
1122 stvx_u $out5,$x10,$out
1123 le?vperm $out7,$out7,$out7,$inpperm
1124 stvx_u $out6,$x20,$out
1125 stvx_u $out7,$x30,$out
1126 addi $out,$out,0x40
1127 b Lcbc_dec8x_done
1128
1129.align 5
1130Lcbc_dec8x_three:
1131 vncipherlast $out5,$out5,$ivec
1132 vncipherlast $out6,$out6,$in5
1133 vncipherlast $out7,$out7,$in6
1134 vmr $ivec,$in7
1135
1136 le?vperm $out5,$out5,$out5,$inpperm
1137 le?vperm $out6,$out6,$out6,$inpperm
1138 stvx_u $out5,$x00,$out
1139 le?vperm $out7,$out7,$out7,$inpperm
1140 stvx_u $out6,$x10,$out
1141 stvx_u $out7,$x20,$out
1142 addi $out,$out,0x30
1143 b Lcbc_dec8x_done
1144
1145.align 5
1146Lcbc_dec8x_two:
1147 vncipherlast $out6,$out6,$ivec
1148 vncipherlast $out7,$out7,$in6
1149 vmr $ivec,$in7
1150
1151 le?vperm $out6,$out6,$out6,$inpperm
1152 le?vperm $out7,$out7,$out7,$inpperm
1153 stvx_u $out6,$x00,$out
1154 stvx_u $out7,$x10,$out
1155 addi $out,$out,0x20
1156 b Lcbc_dec8x_done
1157
1158.align 5
1159Lcbc_dec8x_one:
1160 vncipherlast $out7,$out7,$ivec
1161 vmr $ivec,$in7
1162
1163 le?vperm $out7,$out7,$out7,$inpperm
1164 stvx_u $out7,0,$out
1165 addi $out,$out,0x10
1166
1167Lcbc_dec8x_done:
1168 le?vperm $ivec,$ivec,$ivec,$inpperm
1169 stvx_u $ivec,0,$ivp # write [unaligned] iv
1170
1171 li r10,`$FRAME+15`
1172 li r11,`$FRAME+31`
1173 stvx $inpperm,r10,$sp # wipe copies of round keys
1174 addi r10,r10,32
1175 stvx $inpperm,r11,$sp
1176 addi r11,r11,32
1177 stvx $inpperm,r10,$sp
1178 addi r10,r10,32
1179 stvx $inpperm,r11,$sp
1180 addi r11,r11,32
1181 stvx $inpperm,r10,$sp
1182 addi r10,r10,32
1183 stvx $inpperm,r11,$sp
1184 addi r11,r11,32
1185 stvx $inpperm,r10,$sp
1186 addi r10,r10,32
1187 stvx $inpperm,r11,$sp
1188 addi r11,r11,32
1189
1190 mtspr 256,$vrsave
1191 lvx v20,r10,$sp # ABI says so
1192 addi r10,r10,32
1193 lvx v21,r11,$sp
1194 addi r11,r11,32
1195 lvx v22,r10,$sp
1196 addi r10,r10,32
1197 lvx v23,r11,$sp
1198 addi r11,r11,32
1199 lvx v24,r10,$sp
1200 addi r10,r10,32
1201 lvx v25,r11,$sp
1202 addi r11,r11,32
1203 lvx v26,r10,$sp
1204 addi r10,r10,32
1205 lvx v27,r11,$sp
1206 addi r11,r11,32
1207 lvx v28,r10,$sp
1208 addi r10,r10,32
1209 lvx v29,r11,$sp
1210 addi r11,r11,32
1211 lvx v30,r10,$sp
1212 lvx v31,r11,$sp
1213 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1214 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1215 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1216 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1217 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1218 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1219 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1220 blr
1221 .long 0
1222 .byte 0,12,0x14,0,0x80,6,6,0
1223 .long 0
1224.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1225___
1226}} }}}
1227
1228#########################################################################
1229{{{ # CTR procedure[s] #
1230my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1231my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1232my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1233 map("v$_",(4..11));
1234my $dat=$tmp;
1235
1236$code.=<<___;
1237.globl .${prefix}_ctr32_encrypt_blocks
1238 ${UCMP}i $len,1
1239 bltlr-
1240
1241 lis r0,0xfff0
1242 mfspr $vrsave,256
1243 mtspr 256,r0
1244
1245 li $idx,15
1246 vxor $rndkey0,$rndkey0,$rndkey0
1247 le?vspltisb $tmp,0x0f
1248
1249 lvx $ivec,0,$ivp # load [unaligned] iv
1250 lvsl $inpperm,0,$ivp
1251 lvx $inptail,$idx,$ivp
1252 vspltisb $one,1
1253 le?vxor $inpperm,$inpperm,$tmp
1254 vperm $ivec,$ivec,$inptail,$inpperm
1255 vsldoi $one,$rndkey0,$one,1
1256
1257 neg r11,$inp
1258 ?lvsl $keyperm,0,$key # prepare for unaligned key
1259 lwz $rounds,240($key)
1260
1261 lvsr $inpperm,0,r11 # prepare for unaligned load
1262 lvx $inptail,0,$inp
1263 addi $inp,$inp,15 # 15 is not typo
1264 le?vxor $inpperm,$inpperm,$tmp
1265
1266 srwi $rounds,$rounds,1
1267 li $idx,16
1268 subi $rounds,$rounds,1
1269
1270 ${UCMP}i $len,8
1271 bge _aesp8_ctr32_encrypt8x
1272
1273 ?lvsr $outperm,0,$out # prepare for unaligned store
1274 vspltisb $outmask,-1
1275 lvx $outhead,0,$out
1276 ?vperm $outmask,$rndkey0,$outmask,$outperm
1277 le?vxor $outperm,$outperm,$tmp
1278
1279 lvx $rndkey0,0,$key
1280 mtctr $rounds
1281 lvx $rndkey1,$idx,$key
1282 addi $idx,$idx,16
1283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1284 vxor $inout,$ivec,$rndkey0
1285 lvx $rndkey0,$idx,$key
1286 addi $idx,$idx,16
1287 b Loop_ctr32_enc
1288
1289.align 5
1290Loop_ctr32_enc:
1291 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1292 vcipher $inout,$inout,$rndkey1
1293 lvx $rndkey1,$idx,$key
1294 addi $idx,$idx,16
1295 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1296 vcipher $inout,$inout,$rndkey0
1297 lvx $rndkey0,$idx,$key
1298 addi $idx,$idx,16
1299 bdnz Loop_ctr32_enc
1300
1301 vadduwm $ivec,$ivec,$one
1302 vmr $dat,$inptail
1303 lvx $inptail,0,$inp
1304 addi $inp,$inp,16
1305 subic. $len,$len,1 # blocks--
1306
1307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1308 vcipher $inout,$inout,$rndkey1
1309 lvx $rndkey1,$idx,$key
1310 vperm $dat,$dat,$inptail,$inpperm
1311 li $idx,16
1312 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1313 lvx $rndkey0,0,$key
1314 vxor $dat,$dat,$rndkey1 # last round key
1315 vcipherlast $inout,$inout,$dat
1316
1317 lvx $rndkey1,$idx,$key
1318 addi $idx,$idx,16
1319 vperm $inout,$inout,$inout,$outperm
1320 vsel $dat,$outhead,$inout,$outmask
1321 mtctr $rounds
1322 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1323 vmr $outhead,$inout
1324 vxor $inout,$ivec,$rndkey0
1325 lvx $rndkey0,$idx,$key
1326 addi $idx,$idx,16
1327 stvx $dat,0,$out
1328 addi $out,$out,16
1329 bne Loop_ctr32_enc
1330
1331 addi $out,$out,-1
1332 lvx $inout,0,$out # redundant in aligned case
1333 vsel $inout,$outhead,$inout,$outmask
1334 stvx $inout,0,$out
1335
1336 mtspr 256,$vrsave
1337 blr
1338 .long 0
1339 .byte 0,12,0x14,0,0,0,6,0
1340 .long 0
1341___
1342#########################################################################
1343{{ # Optimized CTR procedure #
1344my $key_="r11";
1345my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1346my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1347my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1348my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1349 # v26-v31 last 6 round keys
1350my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1351my ($two,$three,$four)=($outhead,$outperm,$outmask);
1352
1353$code.=<<___;
1354.align 5
1355_aesp8_ctr32_encrypt8x:
1356 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1357 li r10,`$FRAME+8*16+15`
1358 li r11,`$FRAME+8*16+31`
1359 stvx v20,r10,$sp # ABI says so
1360 addi r10,r10,32
1361 stvx v21,r11,$sp
1362 addi r11,r11,32
1363 stvx v22,r10,$sp
1364 addi r10,r10,32
1365 stvx v23,r11,$sp
1366 addi r11,r11,32
1367 stvx v24,r10,$sp
1368 addi r10,r10,32
1369 stvx v25,r11,$sp
1370 addi r11,r11,32
1371 stvx v26,r10,$sp
1372 addi r10,r10,32
1373 stvx v27,r11,$sp
1374 addi r11,r11,32
1375 stvx v28,r10,$sp
1376 addi r10,r10,32
1377 stvx v29,r11,$sp
1378 addi r11,r11,32
1379 stvx v30,r10,$sp
1380 stvx v31,r11,$sp
1381 li r0,-1
1382 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1383 li $x10,0x10
1384 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1385 li $x20,0x20
1386 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1387 li $x30,0x30
1388 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1389 li $x40,0x40
1390 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1391 li $x50,0x50
1392 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1393 li $x60,0x60
1394 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1395 li $x70,0x70
1396 mtspr 256,r0
1397
1398 subi $rounds,$rounds,3 # -4 in total
1399
1400 lvx $rndkey0,$x00,$key # load key schedule
1401 lvx v30,$x10,$key
1402 addi $key,$key,0x20
1403 lvx v31,$x00,$key
1404 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1405 addi $key_,$sp,$FRAME+15
1406 mtctr $rounds
1407
1408Load_ctr32_enc_key:
1409 ?vperm v24,v30,v31,$keyperm
1410 lvx v30,$x10,$key
1411 addi $key,$key,0x20
1412 stvx v24,$x00,$key_ # off-load round[1]
1413 ?vperm v25,v31,v30,$keyperm
1414 lvx v31,$x00,$key
1415 stvx v25,$x10,$key_ # off-load round[2]
1416 addi $key_,$key_,0x20
1417 bdnz Load_ctr32_enc_key
1418
1419 lvx v26,$x10,$key
1420 ?vperm v24,v30,v31,$keyperm
1421 lvx v27,$x20,$key
1422 stvx v24,$x00,$key_ # off-load round[3]
1423 ?vperm v25,v31,v26,$keyperm
1424 lvx v28,$x30,$key
1425 stvx v25,$x10,$key_ # off-load round[4]
1426 addi $key_,$sp,$FRAME+15 # rewind $key_
1427 ?vperm v26,v26,v27,$keyperm
1428 lvx v29,$x40,$key
1429 ?vperm v27,v27,v28,$keyperm
1430 lvx v30,$x50,$key
1431 ?vperm v28,v28,v29,$keyperm
1432 lvx v31,$x60,$key
1433 ?vperm v29,v29,v30,$keyperm
1434 lvx $out0,$x70,$key # borrow $out0
1435 ?vperm v30,v30,v31,$keyperm
1436 lvx v24,$x00,$key_ # pre-load round[1]
1437 ?vperm v31,v31,$out0,$keyperm
1438 lvx v25,$x10,$key_ # pre-load round[2]
1439
1440 vadduwm $two,$one,$one
1441 subi $inp,$inp,15 # undo "caller"
1442 $SHL $len,$len,4
1443
1444 vadduwm $out1,$ivec,$one # counter values ...
1445 vadduwm $out2,$ivec,$two
1446 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1447 le?li $idx,8
1448 vadduwm $out3,$out1,$two
1449 vxor $out1,$out1,$rndkey0
1450 le?lvsl $inpperm,0,$idx
1451 vadduwm $out4,$out2,$two
1452 vxor $out2,$out2,$rndkey0
1453 le?vspltisb $tmp,0x0f
1454 vadduwm $out5,$out3,$two
1455 vxor $out3,$out3,$rndkey0
1456 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1457 vadduwm $out6,$out4,$two
1458 vxor $out4,$out4,$rndkey0
1459 vadduwm $out7,$out5,$two
1460 vxor $out5,$out5,$rndkey0
1461 vadduwm $ivec,$out6,$two # next counter value
1462 vxor $out6,$out6,$rndkey0
1463 vxor $out7,$out7,$rndkey0
1464
1465 mtctr $rounds
1466 b Loop_ctr32_enc8x
1467.align 5
1468Loop_ctr32_enc8x:
1469 vcipher $out0,$out0,v24
1470 vcipher $out1,$out1,v24
1471 vcipher $out2,$out2,v24
1472 vcipher $out3,$out3,v24
1473 vcipher $out4,$out4,v24
1474 vcipher $out5,$out5,v24
1475 vcipher $out6,$out6,v24
1476 vcipher $out7,$out7,v24
1477Loop_ctr32_enc8x_middle:
1478 lvx v24,$x20,$key_ # round[3]
1479 addi $key_,$key_,0x20
1480
1481 vcipher $out0,$out0,v25
1482 vcipher $out1,$out1,v25
1483 vcipher $out2,$out2,v25
1484 vcipher $out3,$out3,v25
1485 vcipher $out4,$out4,v25
1486 vcipher $out5,$out5,v25
1487 vcipher $out6,$out6,v25
1488 vcipher $out7,$out7,v25
1489 lvx v25,$x10,$key_ # round[4]
1490 bdnz Loop_ctr32_enc8x
1491
1492 subic r11,$len,256 # $len-256, borrow $key_
1493 vcipher $out0,$out0,v24
1494 vcipher $out1,$out1,v24
1495 vcipher $out2,$out2,v24
1496 vcipher $out3,$out3,v24
1497 vcipher $out4,$out4,v24
1498 vcipher $out5,$out5,v24
1499 vcipher $out6,$out6,v24
1500 vcipher $out7,$out7,v24
1501
1502 subfe r0,r0,r0 # borrow?-1:0
1503 vcipher $out0,$out0,v25
1504 vcipher $out1,$out1,v25
1505 vcipher $out2,$out2,v25
1506 vcipher $out3,$out3,v25
1507 vcipher $out4,$out4,v25
1508 vcipher $out5,$out5,v25
1509 vcipher $out6,$out6,v25
1510 vcipher $out7,$out7,v25
1511
1512 and r0,r0,r11
1513 addi $key_,$sp,$FRAME+15 # rewind $key_
1514 vcipher $out0,$out0,v26
1515 vcipher $out1,$out1,v26
1516 vcipher $out2,$out2,v26
1517 vcipher $out3,$out3,v26
1518 vcipher $out4,$out4,v26
1519 vcipher $out5,$out5,v26
1520 vcipher $out6,$out6,v26
1521 vcipher $out7,$out7,v26
1522 lvx v24,$x00,$key_ # re-pre-load round[1]
1523
1524 subic $len,$len,129 # $len-=129
1525 vcipher $out0,$out0,v27
1526 addi $len,$len,1 # $len-=128 really
1527 vcipher $out1,$out1,v27
1528 vcipher $out2,$out2,v27
1529 vcipher $out3,$out3,v27
1530 vcipher $out4,$out4,v27
1531 vcipher $out5,$out5,v27
1532 vcipher $out6,$out6,v27
1533 vcipher $out7,$out7,v27
1534 lvx v25,$x10,$key_ # re-pre-load round[2]
1535
1536 vcipher $out0,$out0,v28
1537 lvx_u $in0,$x00,$inp # load input
1538 vcipher $out1,$out1,v28
1539 lvx_u $in1,$x10,$inp
1540 vcipher $out2,$out2,v28
1541 lvx_u $in2,$x20,$inp
1542 vcipher $out3,$out3,v28
1543 lvx_u $in3,$x30,$inp
1544 vcipher $out4,$out4,v28
1545 lvx_u $in4,$x40,$inp
1546 vcipher $out5,$out5,v28
1547 lvx_u $in5,$x50,$inp
1548 vcipher $out6,$out6,v28
1549 lvx_u $in6,$x60,$inp
1550 vcipher $out7,$out7,v28
1551 lvx_u $in7,$x70,$inp
1552 addi $inp,$inp,0x80
1553
1554 vcipher $out0,$out0,v29
1555 le?vperm $in0,$in0,$in0,$inpperm
1556 vcipher $out1,$out1,v29
1557 le?vperm $in1,$in1,$in1,$inpperm
1558 vcipher $out2,$out2,v29
1559 le?vperm $in2,$in2,$in2,$inpperm
1560 vcipher $out3,$out3,v29
1561 le?vperm $in3,$in3,$in3,$inpperm
1562 vcipher $out4,$out4,v29
1563 le?vperm $in4,$in4,$in4,$inpperm
1564 vcipher $out5,$out5,v29
1565 le?vperm $in5,$in5,$in5,$inpperm
1566 vcipher $out6,$out6,v29
1567 le?vperm $in6,$in6,$in6,$inpperm
1568 vcipher $out7,$out7,v29
1569 le?vperm $in7,$in7,$in7,$inpperm
1570
1571 add $inp,$inp,r0 # $inp is adjusted in such
1572 # way that at exit from the
1573 # loop inX-in7 are loaded
1574 # with last "words"
1575 subfe. r0,r0,r0 # borrow?-1:0
1576 vcipher $out0,$out0,v30
1577 vxor $in0,$in0,v31 # xor with last round key
1578 vcipher $out1,$out1,v30
1579 vxor $in1,$in1,v31
1580 vcipher $out2,$out2,v30
1581 vxor $in2,$in2,v31
1582 vcipher $out3,$out3,v30
1583 vxor $in3,$in3,v31
1584 vcipher $out4,$out4,v30
1585 vxor $in4,$in4,v31
1586 vcipher $out5,$out5,v30
1587 vxor $in5,$in5,v31
1588 vcipher $out6,$out6,v30
1589 vxor $in6,$in6,v31
1590 vcipher $out7,$out7,v30
1591 vxor $in7,$in7,v31
1592
1593 bne Lctr32_enc8x_break # did $len-129 borrow?
1594
1595 vcipherlast $in0,$out0,$in0
1596 vcipherlast $in1,$out1,$in1
1597 vadduwm $out1,$ivec,$one # counter values ...
1598 vcipherlast $in2,$out2,$in2
1599 vadduwm $out2,$ivec,$two
1600 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1601 vcipherlast $in3,$out3,$in3
1602 vadduwm $out3,$out1,$two
1603 vxor $out1,$out1,$rndkey0
1604 vcipherlast $in4,$out4,$in4
1605 vadduwm $out4,$out2,$two
1606 vxor $out2,$out2,$rndkey0
1607 vcipherlast $in5,$out5,$in5
1608 vadduwm $out5,$out3,$two
1609 vxor $out3,$out3,$rndkey0
1610 vcipherlast $in6,$out6,$in6
1611 vadduwm $out6,$out4,$two
1612 vxor $out4,$out4,$rndkey0
1613 vcipherlast $in7,$out7,$in7
1614 vadduwm $out7,$out5,$two
1615 vxor $out5,$out5,$rndkey0
1616 le?vperm $in0,$in0,$in0,$inpperm
1617 vadduwm $ivec,$out6,$two # next counter value
1618 vxor $out6,$out6,$rndkey0
1619 le?vperm $in1,$in1,$in1,$inpperm
1620 vxor $out7,$out7,$rndkey0
1621 mtctr $rounds
1622
1623 vcipher $out0,$out0,v24
1624 stvx_u $in0,$x00,$out
1625 le?vperm $in2,$in2,$in2,$inpperm
1626 vcipher $out1,$out1,v24
1627 stvx_u $in1,$x10,$out
1628 le?vperm $in3,$in3,$in3,$inpperm
1629 vcipher $out2,$out2,v24
1630 stvx_u $in2,$x20,$out
1631 le?vperm $in4,$in4,$in4,$inpperm
1632 vcipher $out3,$out3,v24
1633 stvx_u $in3,$x30,$out
1634 le?vperm $in5,$in5,$in5,$inpperm
1635 vcipher $out4,$out4,v24
1636 stvx_u $in4,$x40,$out
1637 le?vperm $in6,$in6,$in6,$inpperm
1638 vcipher $out5,$out5,v24
1639 stvx_u $in5,$x50,$out
1640 le?vperm $in7,$in7,$in7,$inpperm
1641 vcipher $out6,$out6,v24
1642 stvx_u $in6,$x60,$out
1643 vcipher $out7,$out7,v24
1644 stvx_u $in7,$x70,$out
1645 addi $out,$out,0x80
1646
1647 b Loop_ctr32_enc8x_middle
1648
1649.align 5
1650Lctr32_enc8x_break:
1651 cmpwi $len,-0x60
1652 blt Lctr32_enc8x_one
1653 nop
1654 beq Lctr32_enc8x_two
1655 cmpwi $len,-0x40
1656 blt Lctr32_enc8x_three
1657 nop
1658 beq Lctr32_enc8x_four
1659 cmpwi $len,-0x20
1660 blt Lctr32_enc8x_five
1661 nop
1662 beq Lctr32_enc8x_six
1663 cmpwi $len,0x00
1664 blt Lctr32_enc8x_seven
1665
1666Lctr32_enc8x_eight:
1667 vcipherlast $out0,$out0,$in0
1668 vcipherlast $out1,$out1,$in1
1669 vcipherlast $out2,$out2,$in2
1670 vcipherlast $out3,$out3,$in3
1671 vcipherlast $out4,$out4,$in4
1672 vcipherlast $out5,$out5,$in5
1673 vcipherlast $out6,$out6,$in6
1674 vcipherlast $out7,$out7,$in7
1675
1676 le?vperm $out0,$out0,$out0,$inpperm
1677 le?vperm $out1,$out1,$out1,$inpperm
1678 stvx_u $out0,$x00,$out
1679 le?vperm $out2,$out2,$out2,$inpperm
1680 stvx_u $out1,$x10,$out
1681 le?vperm $out3,$out3,$out3,$inpperm
1682 stvx_u $out2,$x20,$out
1683 le?vperm $out4,$out4,$out4,$inpperm
1684 stvx_u $out3,$x30,$out
1685 le?vperm $out5,$out5,$out5,$inpperm
1686 stvx_u $out4,$x40,$out
1687 le?vperm $out6,$out6,$out6,$inpperm
1688 stvx_u $out5,$x50,$out
1689 le?vperm $out7,$out7,$out7,$inpperm
1690 stvx_u $out6,$x60,$out
1691 stvx_u $out7,$x70,$out
1692 addi $out,$out,0x80
1693 b Lctr32_enc8x_done
1694
1695.align 5
1696Lctr32_enc8x_seven:
1697 vcipherlast $out0,$out0,$in1
1698 vcipherlast $out1,$out1,$in2
1699 vcipherlast $out2,$out2,$in3
1700 vcipherlast $out3,$out3,$in4
1701 vcipherlast $out4,$out4,$in5
1702 vcipherlast $out5,$out5,$in6
1703 vcipherlast $out6,$out6,$in7
1704
1705 le?vperm $out0,$out0,$out0,$inpperm
1706 le?vperm $out1,$out1,$out1,$inpperm
1707 stvx_u $out0,$x00,$out
1708 le?vperm $out2,$out2,$out2,$inpperm
1709 stvx_u $out1,$x10,$out
1710 le?vperm $out3,$out3,$out3,$inpperm
1711 stvx_u $out2,$x20,$out
1712 le?vperm $out4,$out4,$out4,$inpperm
1713 stvx_u $out3,$x30,$out
1714 le?vperm $out5,$out5,$out5,$inpperm
1715 stvx_u $out4,$x40,$out
1716 le?vperm $out6,$out6,$out6,$inpperm
1717 stvx_u $out5,$x50,$out
1718 stvx_u $out6,$x60,$out
1719 addi $out,$out,0x70
1720 b Lctr32_enc8x_done
1721
1722.align 5
1723Lctr32_enc8x_six:
1724 vcipherlast $out0,$out0,$in2
1725 vcipherlast $out1,$out1,$in3
1726 vcipherlast $out2,$out2,$in4
1727 vcipherlast $out3,$out3,$in5
1728 vcipherlast $out4,$out4,$in6
1729 vcipherlast $out5,$out5,$in7
1730
1731 le?vperm $out0,$out0,$out0,$inpperm
1732 le?vperm $out1,$out1,$out1,$inpperm
1733 stvx_u $out0,$x00,$out
1734 le?vperm $out2,$out2,$out2,$inpperm
1735 stvx_u $out1,$x10,$out
1736 le?vperm $out3,$out3,$out3,$inpperm
1737 stvx_u $out2,$x20,$out
1738 le?vperm $out4,$out4,$out4,$inpperm
1739 stvx_u $out3,$x30,$out
1740 le?vperm $out5,$out5,$out5,$inpperm
1741 stvx_u $out4,$x40,$out
1742 stvx_u $out5,$x50,$out
1743 addi $out,$out,0x60
1744 b Lctr32_enc8x_done
1745
1746.align 5
1747Lctr32_enc8x_five:
1748 vcipherlast $out0,$out0,$in3
1749 vcipherlast $out1,$out1,$in4
1750 vcipherlast $out2,$out2,$in5
1751 vcipherlast $out3,$out3,$in6
1752 vcipherlast $out4,$out4,$in7
1753
1754 le?vperm $out0,$out0,$out0,$inpperm
1755 le?vperm $out1,$out1,$out1,$inpperm
1756 stvx_u $out0,$x00,$out
1757 le?vperm $out2,$out2,$out2,$inpperm
1758 stvx_u $out1,$x10,$out
1759 le?vperm $out3,$out3,$out3,$inpperm
1760 stvx_u $out2,$x20,$out
1761 le?vperm $out4,$out4,$out4,$inpperm
1762 stvx_u $out3,$x30,$out
1763 stvx_u $out4,$x40,$out
1764 addi $out,$out,0x50
1765 b Lctr32_enc8x_done
1766
1767.align 5
1768Lctr32_enc8x_four:
1769 vcipherlast $out0,$out0,$in4
1770 vcipherlast $out1,$out1,$in5
1771 vcipherlast $out2,$out2,$in6
1772 vcipherlast $out3,$out3,$in7
1773
1774 le?vperm $out0,$out0,$out0,$inpperm
1775 le?vperm $out1,$out1,$out1,$inpperm
1776 stvx_u $out0,$x00,$out
1777 le?vperm $out2,$out2,$out2,$inpperm
1778 stvx_u $out1,$x10,$out
1779 le?vperm $out3,$out3,$out3,$inpperm
1780 stvx_u $out2,$x20,$out
1781 stvx_u $out3,$x30,$out
1782 addi $out,$out,0x40
1783 b Lctr32_enc8x_done
1784
1785.align 5
1786Lctr32_enc8x_three:
1787 vcipherlast $out0,$out0,$in5
1788 vcipherlast $out1,$out1,$in6
1789 vcipherlast $out2,$out2,$in7
1790
1791 le?vperm $out0,$out0,$out0,$inpperm
1792 le?vperm $out1,$out1,$out1,$inpperm
1793 stvx_u $out0,$x00,$out
1794 le?vperm $out2,$out2,$out2,$inpperm
1795 stvx_u $out1,$x10,$out
1796 stvx_u $out2,$x20,$out
1797 addi $out,$out,0x30
1798 b Lcbc_dec8x_done
1799
1800.align 5
1801Lctr32_enc8x_two:
1802 vcipherlast $out0,$out0,$in6
1803 vcipherlast $out1,$out1,$in7
1804
1805 le?vperm $out0,$out0,$out0,$inpperm
1806 le?vperm $out1,$out1,$out1,$inpperm
1807 stvx_u $out0,$x00,$out
1808 stvx_u $out1,$x10,$out
1809 addi $out,$out,0x20
1810 b Lcbc_dec8x_done
1811
1812.align 5
1813Lctr32_enc8x_one:
1814 vcipherlast $out0,$out0,$in7
1815
1816 le?vperm $out0,$out0,$out0,$inpperm
1817 stvx_u $out0,0,$out
1818 addi $out,$out,0x10
1819
1820Lctr32_enc8x_done:
1821 li r10,`$FRAME+15`
1822 li r11,`$FRAME+31`
1823 stvx $inpperm,r10,$sp # wipe copies of round keys
1824 addi r10,r10,32
1825 stvx $inpperm,r11,$sp
1826 addi r11,r11,32
1827 stvx $inpperm,r10,$sp
1828 addi r10,r10,32
1829 stvx $inpperm,r11,$sp
1830 addi r11,r11,32
1831 stvx $inpperm,r10,$sp
1832 addi r10,r10,32
1833 stvx $inpperm,r11,$sp
1834 addi r11,r11,32
1835 stvx $inpperm,r10,$sp
1836 addi r10,r10,32
1837 stvx $inpperm,r11,$sp
1838 addi r11,r11,32
1839
1840 mtspr 256,$vrsave
1841 lvx v20,r10,$sp # ABI says so
1842 addi r10,r10,32
1843 lvx v21,r11,$sp
1844 addi r11,r11,32
1845 lvx v22,r10,$sp
1846 addi r10,r10,32
1847 lvx v23,r11,$sp
1848 addi r11,r11,32
1849 lvx v24,r10,$sp
1850 addi r10,r10,32
1851 lvx v25,r11,$sp
1852 addi r11,r11,32
1853 lvx v26,r10,$sp
1854 addi r10,r10,32
1855 lvx v27,r11,$sp
1856 addi r11,r11,32
1857 lvx v28,r10,$sp
1858 addi r10,r10,32
1859 lvx v29,r11,$sp
1860 addi r11,r11,32
1861 lvx v30,r10,$sp
1862 lvx v31,r11,$sp
1863 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1864 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1865 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1866 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1867 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1868 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1869 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1870 blr
1871 .long 0
1872 .byte 0,12,0x14,0,0x80,6,6,0
1873 .long 0
1874.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1875___
1876}} }}}
1877
1878my $consts=1;
1879foreach(split("\n",$code)) {
1880 s/\`([^\`]*)\`/eval($1)/geo;
1881
1882 # constants table endian-specific conversion
1883 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1884 my $conv=$3;
1885 my @bytes=();
1886
1887 # convert to endian-agnostic format
1888 if ($1 eq "long") {
1889 foreach (split(/,\s*/,$2)) {
1890 my $l = /^0/?oct:int;
1891 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1892 }
1893 } else {
1894 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1895 }
1896
1897 # little-endian conversion
1898 if ($flavour =~ /le$/o) {
1899 SWITCH: for($conv) {
1900 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1901 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1902 }
1903 }
1904
1905 #emit
1906 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1907 next;
1908 }
1909 $consts=0 if (m/Lconsts:/o); # end of table
1910
1911 # instructions prefixed with '?' are endian-specific and need
1912 # to be adjusted accordingly...
1913 if ($flavour =~ /le$/o) { # little-endian
1914 s/le\?//o or
1915 s/be\?/#be#/o or
1916 s/\?lvsr/lvsl/o or
1917 s/\?lvsl/lvsr/o or
1918 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1919 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1920 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1921 } else { # big-endian
1922 s/le\?/#le#/o or
1923 s/be\?//o or
1924 s/\?([a-z]+)/$1/o;
1925 }
1926
1927 print $_,"\n";
1928}
1929
1930close STDOUT;
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
new file mode 100644
index 000000000000..d0ffe277af5c
--- /dev/null
+++ b/drivers/crypto/vmx/ghash.c
@@ -0,0 +1,214 @@
1/**
2 * GHASH routines supporting VMX instructions on the Power 8
3 *
4 * Copyright (C) 2015 International Business Machines Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 only.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
20 */
21
22#include <linux/types.h>
23#include <linux/err.h>
24#include <linux/crypto.h>
25#include <linux/delay.h>
26#include <linux/hardirq.h>
27#include <asm/switch_to.h>
28#include <crypto/aes.h>
29#include <crypto/scatterwalk.h>
30#include <crypto/internal/hash.h>
31#include <crypto/b128ops.h>
32
33#define IN_INTERRUPT in_interrupt()
34
35#define GHASH_BLOCK_SIZE (16)
36#define GHASH_DIGEST_SIZE (16)
37#define GHASH_KEY_LEN (16)
38
39void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
40void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
41void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
42 const u8 *in,size_t len);
43
44struct p8_ghash_ctx {
45 u128 htable[16];
46 struct crypto_shash *fallback;
47};
48
49struct p8_ghash_desc_ctx {
50 u64 shash[2];
51 u8 buffer[GHASH_DIGEST_SIZE];
52 int bytes;
53 struct shash_desc fallback_desc;
54};
55
56static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
57{
58 const char *alg;
59 struct crypto_shash *fallback;
60 struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
61 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
62
63 if (!(alg = crypto_tfm_alg_name(tfm))) {
64 printk(KERN_ERR "Failed to get algorithm name.\n");
65 return -ENOENT;
66 }
67
68 fallback = crypto_alloc_shash(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK);
69 if (IS_ERR(fallback)) {
70 printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n",
71 alg, PTR_ERR(fallback));
72 return PTR_ERR(fallback);
73 }
74 printk(KERN_INFO "Using '%s' as fallback implementation.\n",
75 crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback)));
76
77 crypto_shash_set_flags(fallback,
78 crypto_shash_get_flags((struct crypto_shash *) tfm));
79 ctx->fallback = fallback;
80
81 shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx)
82 + crypto_shash_descsize(fallback);
83
84 return 0;
85}
86
87static void p8_ghash_exit_tfm(struct crypto_tfm *tfm)
88{
89 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
90
91 if (ctx->fallback) {
92 crypto_free_shash(ctx->fallback);
93 ctx->fallback = NULL;
94 }
95}
96
97static int p8_ghash_init(struct shash_desc *desc)
98{
99 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
100 struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
101
102 dctx->bytes = 0;
103 memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
104 dctx->fallback_desc.tfm = ctx->fallback;
105 dctx->fallback_desc.flags = desc->flags;
106 return crypto_shash_init(&dctx->fallback_desc);
107}
108
109static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
110 unsigned int keylen)
111{
112 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
113
114 if (keylen != GHASH_KEY_LEN)
115 return -EINVAL;
116
117 pagefault_disable();
118 enable_kernel_altivec();
119 enable_kernel_fp();
120 gcm_init_p8(ctx->htable, (const u64 *) key);
121 pagefault_enable();
122 return crypto_shash_setkey(ctx->fallback, key, keylen);
123}
124
125static int p8_ghash_update(struct shash_desc *desc,
126 const u8 *src, unsigned int srclen)
127{
128 unsigned int len;
129 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
130 struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
131
132 if (IN_INTERRUPT) {
133 return crypto_shash_update(&dctx->fallback_desc, src, srclen);
134 } else {
135 if (dctx->bytes) {
136 if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
137 memcpy(dctx->buffer + dctx->bytes, src, srclen);
138 dctx->bytes += srclen;
139 return 0;
140 }
141 memcpy(dctx->buffer + dctx->bytes, src,
142 GHASH_DIGEST_SIZE - dctx->bytes);
143 pagefault_disable();
144 enable_kernel_altivec();
145 enable_kernel_fp();
146 gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
147 GHASH_DIGEST_SIZE);
148 pagefault_enable();
149 src += GHASH_DIGEST_SIZE - dctx->bytes;
150 srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
151 dctx->bytes = 0;
152 }
153 len = srclen & ~(GHASH_DIGEST_SIZE - 1);
154 if (len) {
155 pagefault_disable();
156 enable_kernel_altivec();
157 enable_kernel_fp();
158 gcm_ghash_p8(dctx->shash, ctx->htable, src, len);
159 pagefault_enable();
160 src += len;
161 srclen -= len;
162 }
163 if (srclen) {
164 memcpy(dctx->buffer, src, srclen);
165 dctx->bytes = srclen;
166 }
167 return 0;
168 }
169}
170
171static int p8_ghash_final(struct shash_desc *desc, u8 *out)
172{
173 int i;
174 struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
175 struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
176
177 if (IN_INTERRUPT) {
178 return crypto_shash_final(&dctx->fallback_desc, out);
179 } else {
180 if (dctx->bytes) {
181 for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
182 dctx->buffer[i] = 0;
183 pagefault_disable();
184 enable_kernel_altivec();
185 enable_kernel_fp();
186 gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer,
187 GHASH_DIGEST_SIZE);
188 pagefault_enable();
189 dctx->bytes = 0;
190 }
191 memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
192 return 0;
193 }
194}
195
196struct shash_alg p8_ghash_alg = {
197 .digestsize = GHASH_DIGEST_SIZE,
198 .init = p8_ghash_init,
199 .update = p8_ghash_update,
200 .final = p8_ghash_final,
201 .setkey = p8_ghash_setkey,
202 .descsize = sizeof(struct p8_ghash_desc_ctx),
203 .base = {
204 .cra_name = "ghash",
205 .cra_driver_name = "p8_ghash",
206 .cra_priority = 1000,
207 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK,
208 .cra_blocksize = GHASH_BLOCK_SIZE,
209 .cra_ctxsize = sizeof(struct p8_ghash_ctx),
210 .cra_module = THIS_MODULE,
211 .cra_init = p8_ghash_init_tfm,
212 .cra_exit = p8_ghash_exit_tfm,
213 },
214};
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
new file mode 100644
index 000000000000..0a6f899839dd
--- /dev/null
+++ b/drivers/crypto/vmx/ghashp8-ppc.pl
@@ -0,0 +1,228 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# GHASH for for PowerISA v2.07.
11#
12# July 2014
13#
14# Accurate performance measurements are problematic, because it's
15# always virtualized setup with possibly throttled processor.
16# Relative comparison is therefore more informative. This initial
17# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
18# faster than "4-bit" integer-only compiler-generated 64-bit code.
19# "Initial version" means that there is room for futher improvement.
20
21$flavour=shift;
22$output =shift;
23
24if ($flavour =~ /64/) {
25 $SIZE_T=8;
26 $LRSAVE=2*$SIZE_T;
27 $STU="stdu";
28 $POP="ld";
29 $PUSH="std";
30} elsif ($flavour =~ /32/) {
31 $SIZE_T=4;
32 $LRSAVE=$SIZE_T;
33 $STU="stwu";
34 $POP="lwz";
35 $PUSH="stw";
36} else { die "nonsense $flavour"; }
37
38$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
39( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
40( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
41die "can't locate ppc-xlate.pl";
42
43open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
44
45my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
46
47my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
48my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
49my $vrsave="r12";
50
51$code=<<___;
52.machine "any"
53
54.text
55
56.globl .gcm_init_p8
57 lis r0,0xfff0
58 li r8,0x10
59 mfspr $vrsave,256
60 li r9,0x20
61 mtspr 256,r0
62 li r10,0x30
63 lvx_u $H,0,r4 # load H
64
65 vspltisb $xC2,-16 # 0xf0
66 vspltisb $t0,1 # one
67 vaddubm $xC2,$xC2,$xC2 # 0xe0
68 vxor $zero,$zero,$zero
69 vor $xC2,$xC2,$t0 # 0xe1
70 vsldoi $xC2,$xC2,$zero,15 # 0xe1...
71 vsldoi $t1,$zero,$t0,1 # ...1
72 vaddubm $xC2,$xC2,$xC2 # 0xc2...
73 vspltisb $t2,7
74 vor $xC2,$xC2,$t1 # 0xc2....01
75 vspltb $t1,$H,0 # most significant byte
76 vsl $H,$H,$t0 # H<<=1
77 vsrab $t1,$t1,$t2 # broadcast carry bit
78 vand $t1,$t1,$xC2
79 vxor $H,$H,$t1 # twisted H
80
81 vsldoi $H,$H,$H,8 # twist even more ...
82 vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
83 vsldoi $Hl,$zero,$H,8 # ... and split
84 vsldoi $Hh,$H,$zero,8
85
86 stvx_u $xC2,0,r3 # save pre-computed table
87 stvx_u $Hl,r8,r3
88 stvx_u $H, r9,r3
89 stvx_u $Hh,r10,r3
90
91 mtspr 256,$vrsave
92 blr
93 .long 0
94 .byte 0,12,0x14,0,0,0,2,0
95 .long 0
96.size .gcm_init_p8,.-.gcm_init_p8
97
98.globl .gcm_gmult_p8
99 lis r0,0xfff8
100 li r8,0x10
101 mfspr $vrsave,256
102 li r9,0x20
103 mtspr 256,r0
104 li r10,0x30
105 lvx_u $IN,0,$Xip # load Xi
106
107 lvx_u $Hl,r8,$Htbl # load pre-computed table
108 le?lvsl $lemask,r0,r0
109 lvx_u $H, r9,$Htbl
110 le?vspltisb $t0,0x07
111 lvx_u $Hh,r10,$Htbl
112 le?vxor $lemask,$lemask,$t0
113 lvx_u $xC2,0,$Htbl
114 le?vperm $IN,$IN,$IN,$lemask
115 vxor $zero,$zero,$zero
116
117 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
118 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
119 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
120
121 vpmsumd $t2,$Xl,$xC2 # 1st phase
122
123 vsldoi $t0,$Xm,$zero,8
124 vsldoi $t1,$zero,$Xm,8
125 vxor $Xl,$Xl,$t0
126 vxor $Xh,$Xh,$t1
127
128 vsldoi $Xl,$Xl,$Xl,8
129 vxor $Xl,$Xl,$t2
130
131 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
132 vpmsumd $Xl,$Xl,$xC2
133 vxor $t1,$t1,$Xh
134 vxor $Xl,$Xl,$t1
135
136 le?vperm $Xl,$Xl,$Xl,$lemask
137 stvx_u $Xl,0,$Xip # write out Xi
138
139 mtspr 256,$vrsave
140 blr
141 .long 0
142 .byte 0,12,0x14,0,0,0,2,0
143 .long 0
144.size .gcm_gmult_p8,.-.gcm_gmult_p8
145
146.globl .gcm_ghash_p8
147 lis r0,0xfff8
148 li r8,0x10
149 mfspr $vrsave,256
150 li r9,0x20
151 mtspr 256,r0
152 li r10,0x30
153 lvx_u $Xl,0,$Xip # load Xi
154
155 lvx_u $Hl,r8,$Htbl # load pre-computed table
156 le?lvsl $lemask,r0,r0
157 lvx_u $H, r9,$Htbl
158 le?vspltisb $t0,0x07
159 lvx_u $Hh,r10,$Htbl
160 le?vxor $lemask,$lemask,$t0
161 lvx_u $xC2,0,$Htbl
162 le?vperm $Xl,$Xl,$Xl,$lemask
163 vxor $zero,$zero,$zero
164
165 lvx_u $IN,0,$inp
166 addi $inp,$inp,16
167 subi $len,$len,16
168 le?vperm $IN,$IN,$IN,$lemask
169 vxor $IN,$IN,$Xl
170 b Loop
171
172.align 5
173Loop:
174 subic $len,$len,16
175 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
176 subfe. r0,r0,r0 # borrow?-1:0
177 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
178 and r0,r0,$len
179 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
180 add $inp,$inp,r0
181
182 vpmsumd $t2,$Xl,$xC2 # 1st phase
183
184 vsldoi $t0,$Xm,$zero,8
185 vsldoi $t1,$zero,$Xm,8
186 vxor $Xl,$Xl,$t0
187 vxor $Xh,$Xh,$t1
188
189 vsldoi $Xl,$Xl,$Xl,8
190 vxor $Xl,$Xl,$t2
191 lvx_u $IN,0,$inp
192 addi $inp,$inp,16
193
194 vsldoi $t1,$Xl,$Xl,8 # 2nd phase
195 vpmsumd $Xl,$Xl,$xC2
196 le?vperm $IN,$IN,$IN,$lemask
197 vxor $t1,$t1,$Xh
198 vxor $IN,$IN,$t1
199 vxor $IN,$IN,$Xl
200 beq Loop # did $len-=16 borrow?
201
202 vxor $Xl,$Xl,$t1
203 le?vperm $Xl,$Xl,$Xl,$lemask
204 stvx_u $Xl,0,$Xip # write out Xi
205
206 mtspr 256,$vrsave
207 blr
208 .long 0
209 .byte 0,12,0x14,0,0,0,4,0
210 .long 0
211.size .gcm_ghash_p8,.-.gcm_ghash_p8
212
213.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
214.align 2
215___
216
217foreach (split("\n",$code)) {
218 if ($flavour =~ /le$/o) { # little-endian
219 s/le\?//o or
220 s/be\?/#be#/o;
221 } else {
222 s/le\?/#le#/o or
223 s/be\?//o;
224 }
225 print $_,"\n";
226}
227
228close STDOUT; # enforce flush
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
new file mode 100644
index 000000000000..a59188494af8
--- /dev/null
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -0,0 +1,207 @@
1#!/usr/bin/env perl
2
3# PowerPC assembler distiller by <appro>.
4
5my $flavour = shift;
6my $output = shift;
7open STDOUT,">$output" || die "can't open $output: $!";
8
9my %GLOBALS;
10my $dotinlocallabels=($flavour=~/linux/)?1:0;
11
12################################################################
13# directives which need special treatment on different platforms
14################################################################
15my $globl = sub {
16 my $junk = shift;
17 my $name = shift;
18 my $global = \$GLOBALS{$name};
19 my $ret;
20
21 $name =~ s|^[\.\_]||;
22
23 SWITCH: for ($flavour) {
24 /aix/ && do { $name = ".$name";
25 last;
26 };
27 /osx/ && do { $name = "_$name";
28 last;
29 };
30 /linux/
31 && do { $ret = "_GLOBAL($name)";
32 last;
33 };
34 }
35
36 $ret = ".globl $name\nalign 5\n$name:" if (!$ret);
37 $$global = $name;
38 $ret;
39};
40my $text = sub {
41 my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
42 $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
43 $ret;
44};
45my $machine = sub {
46 my $junk = shift;
47 my $arch = shift;
48 if ($flavour =~ /osx/)
49 { $arch =~ s/\"//g;
50 $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
51 }
52 ".machine $arch";
53};
54my $size = sub {
55 if ($flavour =~ /linux/)
56 { shift;
57 my $name = shift; $name =~ s|^[\.\_]||;
58 my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
59 $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
60 $ret;
61 }
62 else
63 { ""; }
64};
65my $asciz = sub {
66 shift;
67 my $line = join(",",@_);
68 if ($line =~ /^"(.*)"$/)
69 { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
70 else
71 { ""; }
72};
73my $quad = sub {
74 shift;
75 my @ret;
76 my ($hi,$lo);
77 for (@_) {
78 if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
79 { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
80 elsif (/^([0-9]+)$/o)
81 { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
82 else
83 { $hi=undef; $lo=$_; }
84
85 if (defined($hi))
86 { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
87 else
88 { push(@ret,".quad $lo"); }
89 }
90 join("\n",@ret);
91};
92
93################################################################
94# simplified mnemonics not handled by at least one assembler
95################################################################
96my $cmplw = sub {
97 my $f = shift;
98 my $cr = 0; $cr = shift if ($#_>1);
99 # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
100 ($flavour =~ /linux.*32/) ?
101 " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
102 " cmplw ".join(',',$cr,@_);
103};
104my $bdnz = sub {
105 my $f = shift;
106 my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
107 " bc $bo,0,".shift;
108} if ($flavour!~/linux/);
109my $bltlr = sub {
110 my $f = shift;
111 my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
112 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
113 " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
114 " bclr $bo,0";
115};
116my $bnelr = sub {
117 my $f = shift;
118 my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
119 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
120 " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
121 " bclr $bo,2";
122};
123my $beqlr = sub {
124 my $f = shift;
125 my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
126 ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
127 " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
128 " bclr $bo,2";
129};
130# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
131# arguments is 64, with "operand out of range" error.
132my $extrdi = sub {
133 my ($f,$ra,$rs,$n,$b) = @_;
134 $b = ($b+$n)&63; $n = 64-$n;
135 " rldicl $ra,$rs,$b,$n";
136};
137my $vmr = sub {
138 my ($f,$vx,$vy) = @_;
139 " vor $vx,$vy,$vy";
140};
141
142# PowerISA 2.06 stuff
143sub vsxmem_op {
144 my ($f, $vrt, $ra, $rb, $op) = @_;
145 " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
146}
147# made-up unaligned memory reference AltiVec/VMX instructions
148my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
149my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
150my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
151my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
152my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
153my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
154
155# PowerISA 2.07 stuff
156sub vcrypto_op {
157 my ($f, $vrt, $vra, $vrb, $op) = @_;
158 " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
159}
160my $vcipher = sub { vcrypto_op(@_, 1288); };
161my $vcipherlast = sub { vcrypto_op(@_, 1289); };
162my $vncipher = sub { vcrypto_op(@_, 1352); };
163my $vncipherlast= sub { vcrypto_op(@_, 1353); };
164my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
165my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
166my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
167my $vpmsumb = sub { vcrypto_op(@_, 1032); };
168my $vpmsumd = sub { vcrypto_op(@_, 1224); };
169my $vpmsubh = sub { vcrypto_op(@_, 1096); };
170my $vpmsumw = sub { vcrypto_op(@_, 1160); };
171my $vaddudm = sub { vcrypto_op(@_, 192); };
172
173my $mtsle = sub {
174 my ($f, $arg) = @_;
175 " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
176};
177
178print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
179
180while($line=<>) {
181
182 $line =~ s|[#!;].*$||; # get rid of asm-style comments...
183 $line =~ s|/\*.*\*/||; # ... and C-style comments...
184 $line =~ s|^\s+||; # ... and skip white spaces in beginning...
185 $line =~ s|\s+$||; # ... and at the end
186
187 {
188 $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
189 $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
190 }
191
192 {
193 $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
194 my $c = $1; $c = "\t" if ($c eq "");
195 my $mnemonic = $2;
196 my $f = $3;
197 my $opcode = eval("\$$mnemonic");
198 $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
199 if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
200 elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
201 }
202
203 print $line if ($line);
204 print "\n";
205}
206
207close STDOUT;
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
new file mode 100644
index 000000000000..44d8d5cfe40d
--- /dev/null
+++ b/drivers/crypto/vmx/vmx.c
@@ -0,0 +1,88 @@
1/**
2 * Routines supporting VMX instructions on the Power 8
3 *
4 * Copyright (C) 2015 International Business Machines Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 only.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
20 */
21
22#include <linux/module.h>
23#include <linux/moduleparam.h>
24#include <linux/types.h>
25#include <linux/err.h>
26#include <linux/crypto.h>
27#include <asm/cputable.h>
28#include <crypto/internal/hash.h>
29
30extern struct shash_alg p8_ghash_alg;
31extern struct crypto_alg p8_aes_alg;
32extern struct crypto_alg p8_aes_cbc_alg;
33extern struct crypto_alg p8_aes_ctr_alg;
34static struct crypto_alg *algs[] = {
35 &p8_aes_alg,
36 &p8_aes_cbc_alg,
37 &p8_aes_ctr_alg,
38 NULL,
39};
40
41int __init p8_init(void)
42{
43 int ret = 0;
44 struct crypto_alg **alg_it;
45
46 if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
47 return -ENODEV;
48
49 for (alg_it = algs; *alg_it; alg_it++) {
50 ret = crypto_register_alg(*alg_it);
51 printk(KERN_INFO "crypto_register_alg '%s' = %d\n",
52 (*alg_it)->cra_name, ret);
53 if (ret) {
54 for (alg_it--; alg_it >= algs; alg_it--)
55 crypto_unregister_alg(*alg_it);
56 break;
57 }
58 }
59 if (ret)
60 return ret;
61
62 ret = crypto_register_shash(&p8_ghash_alg);
63 if (ret) {
64 for (alg_it = algs; *alg_it; alg_it++)
65 crypto_unregister_alg(*alg_it);
66 }
67 return ret;
68}
69
70void __exit p8_exit(void)
71{
72 struct crypto_alg **alg_it;
73
74 for (alg_it = algs; *alg_it; alg_it++) {
75 printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name);
76 crypto_unregister_alg(*alg_it);
77 }
78 crypto_unregister_shash(&p8_ghash_alg);
79}
80
81module_init(p8_init);
82module_exit(p8_exit);
83
84MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
85MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8");
86MODULE_LICENSE("GPL");
87MODULE_VERSION("1.0.0");
88
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 623a59c1ff5a..0ecb7688af71 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -137,7 +137,7 @@ struct crypto_template *crypto_lookup_template(const char *name);
137 137
138int crypto_register_instance(struct crypto_template *tmpl, 138int crypto_register_instance(struct crypto_template *tmpl,
139 struct crypto_instance *inst); 139 struct crypto_instance *inst);
140int crypto_unregister_instance(struct crypto_alg *alg); 140int crypto_unregister_instance(struct crypto_instance *inst);
141 141
142int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, 142int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
143 struct crypto_instance *inst, u32 mask); 143 struct crypto_instance *inst, u32 mask);
diff --git a/include/crypto/rng.h b/include/crypto/rng.h
index a16fb10142bf..6e28ea5be9f1 100644
--- a/include/crypto/rng.h
+++ b/include/crypto/rng.h
@@ -103,8 +103,7 @@ static inline void crypto_free_rng(struct crypto_rng *tfm)
103 * This function fills the caller-allocated buffer with random numbers using the 103 * This function fills the caller-allocated buffer with random numbers using the
104 * random number generator referenced by the cipher handle. 104 * random number generator referenced by the cipher handle.
105 * 105 *
106 * Return: > 0 function was successful and returns the number of generated 106 * Return: 0 function was successful; < 0 if an error occurred
107 * bytes; < 0 if an error occurred
108 */ 107 */
109static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, 108static inline int crypto_rng_get_bytes(struct crypto_rng *tfm,
110 u8 *rdata, unsigned int dlen) 109 u8 *rdata, unsigned int dlen)
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 190f8a0e0242..dd7905a3c22e 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -65,20 +65,20 @@
65#define SHA512_H7 0x5be0cd19137e2179ULL 65#define SHA512_H7 0x5be0cd19137e2179ULL
66 66
67struct sha1_state { 67struct sha1_state {
68 u64 count;
69 u32 state[SHA1_DIGEST_SIZE / 4]; 68 u32 state[SHA1_DIGEST_SIZE / 4];
69 u64 count;
70 u8 buffer[SHA1_BLOCK_SIZE]; 70 u8 buffer[SHA1_BLOCK_SIZE];
71}; 71};
72 72
73struct sha256_state { 73struct sha256_state {
74 u64 count;
75 u32 state[SHA256_DIGEST_SIZE / 4]; 74 u32 state[SHA256_DIGEST_SIZE / 4];
75 u64 count;
76 u8 buf[SHA256_BLOCK_SIZE]; 76 u8 buf[SHA256_BLOCK_SIZE];
77}; 77};
78 78
79struct sha512_state { 79struct sha512_state {
80 u64 count[2];
81 u64 state[SHA512_DIGEST_SIZE / 8]; 80 u64 state[SHA512_DIGEST_SIZE / 8];
81 u64 count[2];
82 u8 buf[SHA512_BLOCK_SIZE]; 82 u8 buf[SHA512_BLOCK_SIZE];
83}; 83};
84 84
@@ -87,9 +87,18 @@ struct shash_desc;
87extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, 87extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
88 unsigned int len); 88 unsigned int len);
89 89
90extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
91 unsigned int len, u8 *hash);
92
90extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, 93extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
91 unsigned int len); 94 unsigned int len);
92 95
96extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
97 unsigned int len, u8 *hash);
98
93extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, 99extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
94 unsigned int len); 100 unsigned int len);
101
102extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
103 unsigned int len, u8 *hash);
95#endif 104#endif
diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
new file mode 100644
index 000000000000..d0df431f9a97
--- /dev/null
+++ b/include/crypto/sha1_base.h
@@ -0,0 +1,106 @@
1/*
2 * sha1_base.h - core logic for SHA-1 implementations
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <linux/crypto.h>
14#include <linux/module.h>
15
16#include <asm/unaligned.h>
17
18typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks);
19
20static inline int sha1_base_init(struct shash_desc *desc)
21{
22 struct sha1_state *sctx = shash_desc_ctx(desc);
23
24 sctx->state[0] = SHA1_H0;
25 sctx->state[1] = SHA1_H1;
26 sctx->state[2] = SHA1_H2;
27 sctx->state[3] = SHA1_H3;
28 sctx->state[4] = SHA1_H4;
29 sctx->count = 0;
30
31 return 0;
32}
33
34static inline int sha1_base_do_update(struct shash_desc *desc,
35 const u8 *data,
36 unsigned int len,
37 sha1_block_fn *block_fn)
38{
39 struct sha1_state *sctx = shash_desc_ctx(desc);
40 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
41
42 sctx->count += len;
43
44 if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) {
45 int blocks;
46
47 if (partial) {
48 int p = SHA1_BLOCK_SIZE - partial;
49
50 memcpy(sctx->buffer + partial, data, p);
51 data += p;
52 len -= p;
53
54 block_fn(sctx, sctx->buffer, 1);
55 }
56
57 blocks = len / SHA1_BLOCK_SIZE;
58 len %= SHA1_BLOCK_SIZE;
59
60 if (blocks) {
61 block_fn(sctx, data, blocks);
62 data += blocks * SHA1_BLOCK_SIZE;
63 }
64 partial = 0;
65 }
66 if (len)
67 memcpy(sctx->buffer + partial, data, len);
68
69 return 0;
70}
71
72static inline int sha1_base_do_finalize(struct shash_desc *desc,
73 sha1_block_fn *block_fn)
74{
75 const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
76 struct sha1_state *sctx = shash_desc_ctx(desc);
77 __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
78 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
79
80 sctx->buffer[partial++] = 0x80;
81 if (partial > bit_offset) {
82 memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial);
83 partial = 0;
84
85 block_fn(sctx, sctx->buffer, 1);
86 }
87
88 memset(sctx->buffer + partial, 0x0, bit_offset - partial);
89 *bits = cpu_to_be64(sctx->count << 3);
90 block_fn(sctx, sctx->buffer, 1);
91
92 return 0;
93}
94
95static inline int sha1_base_finish(struct shash_desc *desc, u8 *out)
96{
97 struct sha1_state *sctx = shash_desc_ctx(desc);
98 __be32 *digest = (__be32 *)out;
99 int i;
100
101 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
102 put_unaligned_be32(sctx->state[i], digest++);
103
104 *sctx = (struct sha1_state){};
105 return 0;
106}
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
new file mode 100644
index 000000000000..d1f2195bb7de
--- /dev/null
+++ b/include/crypto/sha256_base.h
@@ -0,0 +1,128 @@
1/*
2 * sha256_base.h - core logic for SHA-256 implementations
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <linux/crypto.h>
14#include <linux/module.h>
15
16#include <asm/unaligned.h>
17
18typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src,
19 int blocks);
20
21static inline int sha224_base_init(struct shash_desc *desc)
22{
23 struct sha256_state *sctx = shash_desc_ctx(desc);
24
25 sctx->state[0] = SHA224_H0;
26 sctx->state[1] = SHA224_H1;
27 sctx->state[2] = SHA224_H2;
28 sctx->state[3] = SHA224_H3;
29 sctx->state[4] = SHA224_H4;
30 sctx->state[5] = SHA224_H5;
31 sctx->state[6] = SHA224_H6;
32 sctx->state[7] = SHA224_H7;
33 sctx->count = 0;
34
35 return 0;
36}
37
38static inline int sha256_base_init(struct shash_desc *desc)
39{
40 struct sha256_state *sctx = shash_desc_ctx(desc);
41
42 sctx->state[0] = SHA256_H0;
43 sctx->state[1] = SHA256_H1;
44 sctx->state[2] = SHA256_H2;
45 sctx->state[3] = SHA256_H3;
46 sctx->state[4] = SHA256_H4;
47 sctx->state[5] = SHA256_H5;
48 sctx->state[6] = SHA256_H6;
49 sctx->state[7] = SHA256_H7;
50 sctx->count = 0;
51
52 return 0;
53}
54
55static inline int sha256_base_do_update(struct shash_desc *desc,
56 const u8 *data,
57 unsigned int len,
58 sha256_block_fn *block_fn)
59{
60 struct sha256_state *sctx = shash_desc_ctx(desc);
61 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
62
63 sctx->count += len;
64
65 if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) {
66 int blocks;
67
68 if (partial) {
69 int p = SHA256_BLOCK_SIZE - partial;
70
71 memcpy(sctx->buf + partial, data, p);
72 data += p;
73 len -= p;
74
75 block_fn(sctx, sctx->buf, 1);
76 }
77
78 blocks = len / SHA256_BLOCK_SIZE;
79 len %= SHA256_BLOCK_SIZE;
80
81 if (blocks) {
82 block_fn(sctx, data, blocks);
83 data += blocks * SHA256_BLOCK_SIZE;
84 }
85 partial = 0;
86 }
87 if (len)
88 memcpy(sctx->buf + partial, data, len);
89
90 return 0;
91}
92
93static inline int sha256_base_do_finalize(struct shash_desc *desc,
94 sha256_block_fn *block_fn)
95{
96 const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64);
97 struct sha256_state *sctx = shash_desc_ctx(desc);
98 __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
99 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
100
101 sctx->buf[partial++] = 0x80;
102 if (partial > bit_offset) {
103 memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial);
104 partial = 0;
105
106 block_fn(sctx, sctx->buf, 1);
107 }
108
109 memset(sctx->buf + partial, 0x0, bit_offset - partial);
110 *bits = cpu_to_be64(sctx->count << 3);
111 block_fn(sctx, sctx->buf, 1);
112
113 return 0;
114}
115
116static inline int sha256_base_finish(struct shash_desc *desc, u8 *out)
117{
118 unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
119 struct sha256_state *sctx = shash_desc_ctx(desc);
120 __be32 *digest = (__be32 *)out;
121 int i;
122
123 for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32))
124 put_unaligned_be32(sctx->state[i], digest++);
125
126 *sctx = (struct sha256_state){};
127 return 0;
128}
diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h
new file mode 100644
index 000000000000..6c5341e005ea
--- /dev/null
+++ b/include/crypto/sha512_base.h
@@ -0,0 +1,131 @@
1/*
2 * sha512_base.h - core logic for SHA-512 implementations
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <linux/crypto.h>
14#include <linux/module.h>
15
16#include <asm/unaligned.h>
17
18typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src,
19 int blocks);
20
21static inline int sha384_base_init(struct shash_desc *desc)
22{
23 struct sha512_state *sctx = shash_desc_ctx(desc);
24
25 sctx->state[0] = SHA384_H0;
26 sctx->state[1] = SHA384_H1;
27 sctx->state[2] = SHA384_H2;
28 sctx->state[3] = SHA384_H3;
29 sctx->state[4] = SHA384_H4;
30 sctx->state[5] = SHA384_H5;
31 sctx->state[6] = SHA384_H6;
32 sctx->state[7] = SHA384_H7;
33 sctx->count[0] = sctx->count[1] = 0;
34
35 return 0;
36}
37
38static inline int sha512_base_init(struct shash_desc *desc)
39{
40 struct sha512_state *sctx = shash_desc_ctx(desc);
41
42 sctx->state[0] = SHA512_H0;
43 sctx->state[1] = SHA512_H1;
44 sctx->state[2] = SHA512_H2;
45 sctx->state[3] = SHA512_H3;
46 sctx->state[4] = SHA512_H4;
47 sctx->state[5] = SHA512_H5;
48 sctx->state[6] = SHA512_H6;
49 sctx->state[7] = SHA512_H7;
50 sctx->count[0] = sctx->count[1] = 0;
51
52 return 0;
53}
54
55static inline int sha512_base_do_update(struct shash_desc *desc,
56 const u8 *data,
57 unsigned int len,
58 sha512_block_fn *block_fn)
59{
60 struct sha512_state *sctx = shash_desc_ctx(desc);
61 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
62
63 sctx->count[0] += len;
64 if (sctx->count[0] < len)
65 sctx->count[1]++;
66
67 if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) {
68 int blocks;
69
70 if (partial) {
71 int p = SHA512_BLOCK_SIZE - partial;
72
73 memcpy(sctx->buf + partial, data, p);
74 data += p;
75 len -= p;
76
77 block_fn(sctx, sctx->buf, 1);
78 }
79
80 blocks = len / SHA512_BLOCK_SIZE;
81 len %= SHA512_BLOCK_SIZE;
82
83 if (blocks) {
84 block_fn(sctx, data, blocks);
85 data += blocks * SHA512_BLOCK_SIZE;
86 }
87 partial = 0;
88 }
89 if (len)
90 memcpy(sctx->buf + partial, data, len);
91
92 return 0;
93}
94
95static inline int sha512_base_do_finalize(struct shash_desc *desc,
96 sha512_block_fn *block_fn)
97{
98 const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]);
99 struct sha512_state *sctx = shash_desc_ctx(desc);
100 __be64 *bits = (__be64 *)(sctx->buf + bit_offset);
101 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
102
103 sctx->buf[partial++] = 0x80;
104 if (partial > bit_offset) {
105 memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial);
106 partial = 0;
107
108 block_fn(sctx, sctx->buf, 1);
109 }
110
111 memset(sctx->buf + partial, 0x0, bit_offset - partial);
112 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
113 bits[1] = cpu_to_be64(sctx->count[0] << 3);
114 block_fn(sctx, sctx->buf, 1);
115
116 return 0;
117}
118
119static inline int sha512_base_finish(struct shash_desc *desc, u8 *out)
120{
121 unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
122 struct sha512_state *sctx = shash_desc_ctx(desc);
123 __be64 *digest = (__be64 *)out;
124 int i;
125
126 for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64))
127 put_unaligned_be64(sctx->state[i], digest++);
128
129 *sctx = (struct sha512_state){};
130 return 0;
131}
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fb5ef16d6a12..10df5d2d093a 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -95,6 +95,12 @@
95#define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 95#define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000
96 96
97/* 97/*
98 * Mark a cipher as a service implementation only usable by another
99 * cipher and never by a normal user of the kernel crypto API
100 */
101#define CRYPTO_ALG_INTERNAL 0x00002000
102
103/*
98 * Transform masks and values (for crt_flags). 104 * Transform masks and values (for crt_flags).
99 */ 105 */
100#define CRYPTO_TFM_REQ_MASK 0x000fff00 106#define CRYPTO_TFM_REQ_MASK 0x000fff00
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index eb7b414d232b..4f7d8f4b1e9a 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -50,10 +50,14 @@ struct hwrng {
50 struct completion cleanup_done; 50 struct completion cleanup_done;
51}; 51};
52 52
53struct device;
54
53/** Register a new Hardware Random Number Generator driver. */ 55/** Register a new Hardware Random Number Generator driver. */
54extern int hwrng_register(struct hwrng *rng); 56extern int hwrng_register(struct hwrng *rng);
57extern int devm_hwrng_register(struct device *dev, struct hwrng *rng);
55/** Unregister a Hardware Random Number Generator driver. */ 58/** Unregister a Hardware Random Number Generator driver. */
56extern void hwrng_unregister(struct hwrng *rng); 59extern void hwrng_unregister(struct hwrng *rng);
60extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
57/** Feed random bits into the pool. */ 61/** Feed random bits into the pool. */
58extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); 62extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy);
59 63
diff --git a/lib/string.c b/lib/string.c
index ce81aaec3839..a5792019193c 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset);
607void memzero_explicit(void *s, size_t count) 607void memzero_explicit(void *s, size_t count)
608{ 608{
609 memset(s, 0, count); 609 memset(s, 0, count);
610 OPTIMIZER_HIDE_VAR(s); 610 barrier();
611} 611}
612EXPORT_SYMBOL(memzero_explicit); 612EXPORT_SYMBOL(memzero_explicit);
613 613