diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 13:42:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-15 13:42:15 -0400 |
commit | cb906953d2c3fd450655d9fa833f03690ad50c23 (patch) | |
tree | 06c5665afb24baee3ac49f62db61ca97918079b4 | |
parent | 6c373ca89399c5a3f7ef210ad8f63dc3437da345 (diff) | |
parent | 3abafaf2192b1712079edfd4232b19877d6f41a5 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 4.1:
New interfaces:
- user-space interface for AEAD
- user-space interface for RNG (i.e., pseudo RNG)
New hashes:
- ARMv8 SHA1/256
- ARMv8 AES
- ARMv8 GHASH
- ARM assembler and NEON SHA256
- MIPS OCTEON SHA1/256/512
- MIPS img-hash SHA1/256 and MD5
- Power 8 VMX AES/CBC/CTR/GHASH
- PPC assembler AES, SHA1/256 and MD5
- Broadcom IPROC RNG driver
Cleanups/fixes:
- prevent internal helper algos from being exposed to user-space
- merge common code from assembly/C SHA implementations
- misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits)
crypto: arm - workaround for building with old binutils
crypto: arm/sha256 - avoid sha256 code on ARMv7-M
crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer
crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer
crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer
crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer
crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer
crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer
crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer
crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer
crypto: sha512-generic - move to generic glue implementation
crypto: sha256-generic - move to generic glue implementation
crypto: sha1-generic - move to generic glue implementation
crypto: sha512 - implement base layer for SHA-512
crypto: sha256 - implement base layer for SHA-256
crypto: sha1 - implement base layer for SHA-1
crypto: api - remove instance when test failed
crypto: api - Move alg ref count init to crypto_check_alg
...
168 files changed, 18223 insertions, 2202 deletions
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl index 04a8c24ead47..efc8d90a9a3f 100644 --- a/Documentation/DocBook/crypto-API.tmpl +++ b/Documentation/DocBook/crypto-API.tmpl | |||
@@ -509,6 +509,270 @@ | |||
509 | select it due to the used type and mask field. | 509 | select it due to the used type and mask field. |
510 | </para> | 510 | </para> |
511 | </sect1> | 511 | </sect1> |
512 | |||
513 | <sect1><title>Internal Structure of Kernel Crypto API</title> | ||
514 | |||
515 | <para> | ||
516 | The kernel crypto API has an internal structure where a cipher | ||
517 | implementation may use many layers and indirections. This section | ||
518 | shall help to clarify how the kernel crypto API uses | ||
519 | various components to implement the complete cipher. | ||
520 | </para> | ||
521 | |||
522 | <para> | ||
523 | The following subsections explain the internal structure based | ||
524 | on existing cipher implementations. The first section addresses | ||
525 | the most complex scenario where all other scenarios form a logical | ||
526 | subset. | ||
527 | </para> | ||
528 | |||
529 | <sect2><title>Generic AEAD Cipher Structure</title> | ||
530 | |||
531 | <para> | ||
532 | The following ASCII art decomposes the kernel crypto API layers | ||
533 | when using the AEAD cipher with the automated IV generation. The | ||
534 | shown example is used by the IPSEC layer. | ||
535 | </para> | ||
536 | |||
537 | <para> | ||
538 | For other use cases of AEAD ciphers, the ASCII art applies as | ||
539 | well, but the caller may not use the GIVCIPHER interface. In | ||
540 | this case, the caller must generate the IV. | ||
541 | </para> | ||
542 | |||
543 | <para> | ||
544 | The depicted example decomposes the AEAD cipher of GCM(AES) based | ||
545 | on the generic C implementations (gcm.c, aes-generic.c, ctr.c, | ||
546 | ghash-generic.c, seqiv.c). The generic implementation serves as an | ||
547 | example showing the complete logic of the kernel crypto API. | ||
548 | </para> | ||
549 | |||
550 | <para> | ||
551 | It is possible that some streamlined cipher implementations (like | ||
552 | AES-NI) provide implementations merging aspects which in the view | ||
553 | of the kernel crypto API cannot be decomposed into layers any more. | ||
554 | In case of the AES-NI implementation, the CTR mode, the GHASH | ||
555 | implementation and the AES cipher are all merged into one cipher | ||
556 | implementation registered with the kernel crypto API. In this case, | ||
557 | the concept described by the following ASCII art applies too. However, | ||
558 | the decomposition of GCM into the individual sub-components | ||
559 | by the kernel crypto API is not done any more. | ||
560 | </para> | ||
561 | |||
562 | <para> | ||
563 | Each block in the following ASCII art is an independent cipher | ||
564 | instance obtained from the kernel crypto API. Each block | ||
565 | is accessed by the caller or by other blocks using the API functions | ||
566 | defined by the kernel crypto API for the cipher implementation type. | ||
567 | </para> | ||
568 | |||
569 | <para> | ||
570 | The blocks below indicate the cipher type as well as the specific | ||
571 | logic implemented in the cipher. | ||
572 | </para> | ||
573 | |||
574 | <para> | ||
575 | The ASCII art picture also indicates the call structure, i.e. who | ||
576 | calls which component. The arrows point to the invoked block | ||
577 | where the caller uses the API applicable to the cipher type | ||
578 | specified for the block. | ||
579 | </para> | ||
580 | |||
581 | <programlisting> | ||
582 | <![CDATA[ | ||
583 | kernel crypto API | IPSEC Layer | ||
584 | | | ||
585 | +-----------+ | | ||
586 | | | (1) | ||
587 | | givcipher | <----------------------------------- esp_output | ||
588 | | (seqiv) | ---+ | ||
589 | +-----------+ | | ||
590 | | (2) | ||
591 | +-----------+ | | ||
592 | | | <--+ (2) | ||
593 | | aead | <----------------------------------- esp_input | ||
594 | | (gcm) | ------------+ | ||
595 | +-----------+ | | ||
596 | | (3) | (5) | ||
597 | v v | ||
598 | +-----------+ +-----------+ | ||
599 | | | | | | ||
600 | | ablkcipher| | ahash | | ||
601 | | (ctr) | ---+ | (ghash) | | ||
602 | +-----------+ | +-----------+ | ||
603 | | | ||
604 | +-----------+ | (4) | ||
605 | | | <--+ | ||
606 | | cipher | | ||
607 | | (aes) | | ||
608 | +-----------+ | ||
609 | ]]> | ||
610 | </programlisting> | ||
611 | |||
612 | <para> | ||
613 | The following call sequence is applicable when the IPSEC layer | ||
614 | triggers an encryption operation with the esp_output function. During | ||
615 | configuration, the administrator set up the use of rfc4106(gcm(aes)) as | ||
616 | the cipher for ESP. The following call sequence is now depicted in the | ||
617 | ASCII art above: | ||
618 | </para> | ||
619 | |||
620 | <orderedlist> | ||
621 | <listitem> | ||
622 | <para> | ||
623 | esp_output() invokes crypto_aead_givencrypt() to trigger an encryption | ||
624 | operation of the GIVCIPHER implementation. | ||
625 | </para> | ||
626 | |||
627 | <para> | ||
628 | In case of GCM, the SEQIV implementation is registered as GIVCIPHER | ||
629 | in crypto_rfc4106_alloc(). | ||
630 | </para> | ||
631 | |||
632 | <para> | ||
633 | The SEQIV performs its operation to generate an IV where the core | ||
634 | function is seqiv_geniv(). | ||
635 | </para> | ||
636 | </listitem> | ||
637 | |||
638 | <listitem> | ||
639 | <para> | ||
640 | Now, SEQIV uses the AEAD API function calls to invoke the associated | ||
641 | AEAD cipher. In our case, during the instantiation of SEQIV, the | ||
642 | cipher handle for GCM is provided to SEQIV. This means that SEQIV | ||
643 | invokes AEAD cipher operations with the GCM cipher handle. | ||
644 | </para> | ||
645 | |||
646 | <para> | ||
647 | During instantiation of the GCM handle, the CTR(AES) and GHASH | ||
648 | ciphers are instantiated. The cipher handles for CTR(AES) and GHASH | ||
649 | are retained for later use. | ||
650 | </para> | ||
651 | |||
652 | <para> | ||
653 | The GCM implementation is responsible to invoke the CTR mode AES and | ||
654 | the GHASH cipher in the right manner to implement the GCM | ||
655 | specification. | ||
656 | </para> | ||
657 | </listitem> | ||
658 | |||
659 | <listitem> | ||
660 | <para> | ||
661 | The GCM AEAD cipher type implementation now invokes the ABLKCIPHER API | ||
662 | with the instantiated CTR(AES) cipher handle. | ||
663 | </para> | ||
664 | |||
665 | <para> | ||
666 | During instantiation of the CTR(AES) cipher, the CIPHER type | ||
667 | implementation of AES is instantiated. The cipher handle for AES is | ||
668 | retained. | ||
669 | </para> | ||
670 | |||
671 | <para> | ||
672 | That means that the ABLKCIPHER implementation of CTR(AES) only | ||
673 | implements the CTR block chaining mode. After performing the block | ||
674 | chaining operation, the CIPHER implementation of AES is invoked. | ||
675 | </para> | ||
676 | </listitem> | ||
677 | |||
678 | <listitem> | ||
679 | <para> | ||
680 | The ABLKCIPHER of CTR(AES) now invokes the CIPHER API with the AES | ||
681 | cipher handle to encrypt one block. | ||
682 | </para> | ||
683 | </listitem> | ||
684 | |||
685 | <listitem> | ||
686 | <para> | ||
687 | The GCM AEAD implementation also invokes the GHASH cipher | ||
688 | implementation via the AHASH API. | ||
689 | </para> | ||
690 | </listitem> | ||
691 | </orderedlist> | ||
692 | |||
693 | <para> | ||
694 | When the IPSEC layer triggers the esp_input() function, the same call | ||
695 | sequence is followed with the only difference that the operation starts | ||
696 | with step (2). | ||
697 | </para> | ||
698 | </sect2> | ||
699 | |||
700 | <sect2><title>Generic Block Cipher Structure</title> | ||
701 | <para> | ||
702 | Generic block ciphers follow the same concept as depicted with the ASCII | ||
703 | art picture above. | ||
704 | </para> | ||
705 | |||
706 | <para> | ||
707 | For example, CBC(AES) is implemented with cbc.c, and aes-generic.c. The | ||
708 | ASCII art picture above applies as well with the difference that only | ||
709 | step (4) is used and the ABLKCIPHER block chaining mode is CBC. | ||
710 | </para> | ||
711 | </sect2> | ||
712 | |||
713 | <sect2><title>Generic Keyed Message Digest Structure</title> | ||
714 | <para> | ||
715 | Keyed message digest implementations again follow the same concept as | ||
716 | depicted in the ASCII art picture above. | ||
717 | </para> | ||
718 | |||
719 | <para> | ||
720 | For example, HMAC(SHA256) is implemented with hmac.c and | ||
721 | sha256_generic.c. The following ASCII art illustrates the | ||
722 | implementation: | ||
723 | </para> | ||
724 | |||
725 | <programlisting> | ||
726 | <![CDATA[ | ||
727 | kernel crypto API | Caller | ||
728 | | | ||
729 | +-----------+ (1) | | ||
730 | | | <------------------ some_function | ||
731 | | ahash | | ||
732 | | (hmac) | ---+ | ||
733 | +-----------+ | | ||
734 | | (2) | ||
735 | +-----------+ | | ||
736 | | | <--+ | ||
737 | | shash | | ||
738 | | (sha256) | | ||
739 | +-----------+ | ||
740 | ]]> | ||
741 | </programlisting> | ||
742 | |||
743 | <para> | ||
744 | The following call sequence is applicable when a caller triggers | ||
745 | an HMAC operation: | ||
746 | </para> | ||
747 | |||
748 | <orderedlist> | ||
749 | <listitem> | ||
750 | <para> | ||
751 | The AHASH API functions are invoked by the caller. The HMAC | ||
752 | implementation performs its operation as needed. | ||
753 | </para> | ||
754 | |||
755 | <para> | ||
756 | During initialization of the HMAC cipher, the SHASH cipher type of | ||
757 | SHA256 is instantiated. The cipher handle for the SHA256 instance is | ||
758 | retained. | ||
759 | </para> | ||
760 | |||
761 | <para> | ||
762 | At one time, the HMAC implementation requires a SHA256 operation | ||
763 | where the SHA256 cipher handle is used. | ||
764 | </para> | ||
765 | </listitem> | ||
766 | |||
767 | <listitem> | ||
768 | <para> | ||
769 | The HMAC instance now invokes the SHASH API with the SHA256 | ||
770 | cipher handle to calculate the message digest. | ||
771 | </para> | ||
772 | </listitem> | ||
773 | </orderedlist> | ||
774 | </sect2> | ||
775 | </sect1> | ||
512 | </chapter> | 776 | </chapter> |
513 | 777 | ||
514 | <chapter id="Development"><title>Developing Cipher Algorithms</title> | 778 | <chapter id="Development"><title>Developing Cipher Algorithms</title> |
@@ -808,6 +1072,602 @@ | |||
808 | </sect1> | 1072 | </sect1> |
809 | </chapter> | 1073 | </chapter> |
810 | 1074 | ||
1075 | <chapter id="User"><title>User Space Interface</title> | ||
1076 | <sect1><title>Introduction</title> | ||
1077 | <para> | ||
1078 | The concepts of the kernel crypto API visible to kernel space is fully | ||
1079 | applicable to the user space interface as well. Therefore, the kernel | ||
1080 | crypto API high level discussion for the in-kernel use cases applies | ||
1081 | here as well. | ||
1082 | </para> | ||
1083 | |||
1084 | <para> | ||
1085 | The major difference, however, is that user space can only act as a | ||
1086 | consumer and never as a provider of a transformation or cipher algorithm. | ||
1087 | </para> | ||
1088 | |||
1089 | <para> | ||
1090 | The following covers the user space interface exported by the kernel | ||
1091 | crypto API. A working example of this description is libkcapi that | ||
1092 | can be obtained from [1]. That library can be used by user space | ||
1093 | applications that require cryptographic services from the kernel. | ||
1094 | </para> | ||
1095 | |||
1096 | <para> | ||
1097 | Some details of the in-kernel kernel crypto API aspects do not | ||
1098 | apply to user space, however. This includes the difference between | ||
1099 | synchronous and asynchronous invocations. The user space API call | ||
1100 | is fully synchronous. | ||
1101 | </para> | ||
1102 | |||
1103 | <para> | ||
1104 | [1] http://www.chronox.de/libkcapi.html | ||
1105 | </para> | ||
1106 | |||
1107 | </sect1> | ||
1108 | |||
1109 | <sect1><title>User Space API General Remarks</title> | ||
1110 | <para> | ||
1111 | The kernel crypto API is accessible from user space. Currently, | ||
1112 | the following ciphers are accessible: | ||
1113 | </para> | ||
1114 | |||
1115 | <itemizedlist> | ||
1116 | <listitem> | ||
1117 | <para>Message digest including keyed message digest (HMAC, CMAC)</para> | ||
1118 | </listitem> | ||
1119 | |||
1120 | <listitem> | ||
1121 | <para>Symmetric ciphers</para> | ||
1122 | </listitem> | ||
1123 | |||
1124 | <listitem> | ||
1125 | <para>AEAD ciphers</para> | ||
1126 | </listitem> | ||
1127 | |||
1128 | <listitem> | ||
1129 | <para>Random Number Generators</para> | ||
1130 | </listitem> | ||
1131 | </itemizedlist> | ||
1132 | |||
1133 | <para> | ||
1134 | The interface is provided via socket type using the type AF_ALG. | ||
1135 | In addition, the setsockopt option type is SOL_ALG. In case the | ||
1136 | user space header files do not export these flags yet, use the | ||
1137 | following macros: | ||
1138 | </para> | ||
1139 | |||
1140 | <programlisting> | ||
1141 | #ifndef AF_ALG | ||
1142 | #define AF_ALG 38 | ||
1143 | #endif | ||
1144 | #ifndef SOL_ALG | ||
1145 | #define SOL_ALG 279 | ||
1146 | #endif | ||
1147 | </programlisting> | ||
1148 | |||
1149 | <para> | ||
1150 | A cipher is accessed with the same name as done for the in-kernel | ||
1151 | API calls. This includes the generic vs. unique naming schema for | ||
1152 | ciphers as well as the enforcement of priorities for generic names. | ||
1153 | </para> | ||
1154 | |||
1155 | <para> | ||
1156 | To interact with the kernel crypto API, a socket must be | ||
1157 | created by the user space application. User space invokes the cipher | ||
1158 | operation with the send()/write() system call family. The result of the | ||
1159 | cipher operation is obtained with the read()/recv() system call family. | ||
1160 | </para> | ||
1161 | |||
1162 | <para> | ||
1163 | The following API calls assume that the socket descriptor | ||
1164 | is already opened by the user space application and discusses only | ||
1165 | the kernel crypto API specific invocations. | ||
1166 | </para> | ||
1167 | |||
1168 | <para> | ||
1169 | To initialize the socket interface, the following sequence has to | ||
1170 | be performed by the consumer: | ||
1171 | </para> | ||
1172 | |||
1173 | <orderedlist> | ||
1174 | <listitem> | ||
1175 | <para> | ||
1176 | Create a socket of type AF_ALG with the struct sockaddr_alg | ||
1177 | parameter specified below for the different cipher types. | ||
1178 | </para> | ||
1179 | </listitem> | ||
1180 | |||
1181 | <listitem> | ||
1182 | <para> | ||
1183 | Invoke bind with the socket descriptor | ||
1184 | </para> | ||
1185 | </listitem> | ||
1186 | |||
1187 | <listitem> | ||
1188 | <para> | ||
1189 | Invoke accept with the socket descriptor. The accept system call | ||
1190 | returns a new file descriptor that is to be used to interact with | ||
1191 | the particular cipher instance. When invoking send/write or recv/read | ||
1192 | system calls to send data to the kernel or obtain data from the | ||
1193 | kernel, the file descriptor returned by accept must be used. | ||
1194 | </para> | ||
1195 | </listitem> | ||
1196 | </orderedlist> | ||
1197 | </sect1> | ||
1198 | |||
1199 | <sect1><title>In-place Cipher operation</title> | ||
1200 | <para> | ||
1201 | Just like the in-kernel operation of the kernel crypto API, the user | ||
1202 | space interface allows the cipher operation in-place. That means that | ||
1203 | the input buffer used for the send/write system call and the output | ||
1204 | buffer used by the read/recv system call may be one and the same. | ||
1205 | This is of particular interest for symmetric cipher operations where a | ||
1206 | copying of the output data to its final destination can be avoided. | ||
1207 | </para> | ||
1208 | |||
1209 | <para> | ||
1210 | If a consumer on the other hand wants to maintain the plaintext and | ||
1211 | the ciphertext in different memory locations, all a consumer needs | ||
1212 | to do is to provide different memory pointers for the encryption and | ||
1213 | decryption operation. | ||
1214 | </para> | ||
1215 | </sect1> | ||
1216 | |||
1217 | <sect1><title>Message Digest API</title> | ||
1218 | <para> | ||
1219 | The message digest type to be used for the cipher operation is | ||
1220 | selected when invoking the bind syscall. bind requires the caller | ||
1221 | to provide a filled struct sockaddr data structure. This data | ||
1222 | structure must be filled as follows: | ||
1223 | </para> | ||
1224 | |||
1225 | <programlisting> | ||
1226 | struct sockaddr_alg sa = { | ||
1227 | .salg_family = AF_ALG, | ||
1228 | .salg_type = "hash", /* this selects the hash logic in the kernel */ | ||
1229 | .salg_name = "sha1" /* this is the cipher name */ | ||
1230 | }; | ||
1231 | </programlisting> | ||
1232 | |||
1233 | <para> | ||
1234 | The salg_type value "hash" applies to message digests and keyed | ||
1235 | message digests. Though, a keyed message digest is referenced by | ||
1236 | the appropriate salg_name. Please see below for the setsockopt | ||
1237 | interface that explains how the key can be set for a keyed message | ||
1238 | digest. | ||
1239 | </para> | ||
1240 | |||
1241 | <para> | ||
1242 | Using the send() system call, the application provides the data that | ||
1243 | should be processed with the message digest. The send system call | ||
1244 | allows the following flags to be specified: | ||
1245 | </para> | ||
1246 | |||
1247 | <itemizedlist> | ||
1248 | <listitem> | ||
1249 | <para> | ||
1250 | MSG_MORE: If this flag is set, the send system call acts like a | ||
1251 | message digest update function where the final hash is not | ||
1252 | yet calculated. If the flag is not set, the send system call | ||
1253 | calculates the final message digest immediately. | ||
1254 | </para> | ||
1255 | </listitem> | ||
1256 | </itemizedlist> | ||
1257 | |||
1258 | <para> | ||
1259 | With the recv() system call, the application can read the message | ||
1260 | digest from the kernel crypto API. If the buffer is too small for the | ||
1261 | message digest, the flag MSG_TRUNC is set by the kernel. | ||
1262 | </para> | ||
1263 | |||
1264 | <para> | ||
1265 | In order to set a message digest key, the calling application must use | ||
1266 | the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC | ||
1267 | operation is performed without the initial HMAC state change caused by | ||
1268 | the key. | ||
1269 | </para> | ||
1270 | </sect1> | ||
1271 | |||
1272 | <sect1><title>Symmetric Cipher API</title> | ||
1273 | <para> | ||
1274 | The operation is very similar to the message digest discussion. | ||
1275 | During initialization, the struct sockaddr data structure must be | ||
1276 | filled as follows: | ||
1277 | </para> | ||
1278 | |||
1279 | <programlisting> | ||
1280 | struct sockaddr_alg sa = { | ||
1281 | .salg_family = AF_ALG, | ||
1282 | .salg_type = "skcipher", /* this selects the symmetric cipher */ | ||
1283 | .salg_name = "cbc(aes)" /* this is the cipher name */ | ||
1284 | }; | ||
1285 | </programlisting> | ||
1286 | |||
1287 | <para> | ||
1288 | Before data can be sent to the kernel using the write/send system | ||
1289 | call family, the consumer must set the key. The key setting is | ||
1290 | described with the setsockopt invocation below. | ||
1291 | </para> | ||
1292 | |||
1293 | <para> | ||
1294 | Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is | ||
1295 | specified with the data structure provided by the sendmsg() system call. | ||
1296 | </para> | ||
1297 | |||
1298 | <para> | ||
1299 | The sendmsg system call parameter of struct msghdr is embedded into the | ||
1300 | struct cmsghdr data structure. See recv(2) and cmsg(3) for more | ||
1301 | information on how the cmsghdr data structure is used together with the | ||
1302 | send/recv system call family. That cmsghdr data structure holds the | ||
1303 | following information specified with a separate header instances: | ||
1304 | </para> | ||
1305 | |||
1306 | <itemizedlist> | ||
1307 | <listitem> | ||
1308 | <para> | ||
1309 | specification of the cipher operation type with one of these flags: | ||
1310 | </para> | ||
1311 | <itemizedlist> | ||
1312 | <listitem> | ||
1313 | <para>ALG_OP_ENCRYPT - encryption of data</para> | ||
1314 | </listitem> | ||
1315 | <listitem> | ||
1316 | <para>ALG_OP_DECRYPT - decryption of data</para> | ||
1317 | </listitem> | ||
1318 | </itemizedlist> | ||
1319 | </listitem> | ||
1320 | |||
1321 | <listitem> | ||
1322 | <para> | ||
1323 | specification of the IV information marked with the flag ALG_SET_IV | ||
1324 | </para> | ||
1325 | </listitem> | ||
1326 | </itemizedlist> | ||
1327 | |||
1328 | <para> | ||
1329 | The send system call family allows the following flag to be specified: | ||
1330 | </para> | ||
1331 | |||
1332 | <itemizedlist> | ||
1333 | <listitem> | ||
1334 | <para> | ||
1335 | MSG_MORE: If this flag is set, the send system call acts like a | ||
1336 | cipher update function where more input data is expected | ||
1337 | with a subsequent invocation of the send system call. | ||
1338 | </para> | ||
1339 | </listitem> | ||
1340 | </itemizedlist> | ||
1341 | |||
1342 | <para> | ||
1343 | Note: The kernel reports -EINVAL for any unexpected data. The caller | ||
1344 | must make sure that all data matches the constraints given in | ||
1345 | /proc/crypto for the selected cipher. | ||
1346 | </para> | ||
1347 | |||
1348 | <para> | ||
1349 | With the recv() system call, the application can read the result of | ||
1350 | the cipher operation from the kernel crypto API. The output buffer | ||
1351 | must be at least as large as to hold all blocks of the encrypted or | ||
1352 | decrypted data. If the output data size is smaller, only as many | ||
1353 | blocks are returned that fit into that output buffer size. | ||
1354 | </para> | ||
1355 | </sect1> | ||
1356 | |||
1357 | <sect1><title>AEAD Cipher API</title> | ||
1358 | <para> | ||
1359 | The operation is very similar to the symmetric cipher discussion. | ||
1360 | During initialization, the struct sockaddr data structure must be | ||
1361 | filled as follows: | ||
1362 | </para> | ||
1363 | |||
1364 | <programlisting> | ||
1365 | struct sockaddr_alg sa = { | ||
1366 | .salg_family = AF_ALG, | ||
1367 | .salg_type = "aead", /* this selects the symmetric cipher */ | ||
1368 | .salg_name = "gcm(aes)" /* this is the cipher name */ | ||
1369 | }; | ||
1370 | </programlisting> | ||
1371 | |||
1372 | <para> | ||
1373 | Before data can be sent to the kernel using the write/send system | ||
1374 | call family, the consumer must set the key. The key setting is | ||
1375 | described with the setsockopt invocation below. | ||
1376 | </para> | ||
1377 | |||
1378 | <para> | ||
1379 | In addition, before data can be sent to the kernel using the | ||
1380 | write/send system call family, the consumer must set the authentication | ||
1381 | tag size. To set the authentication tag size, the caller must use the | ||
1382 | setsockopt invocation described below. | ||
1383 | </para> | ||
1384 | |||
1385 | <para> | ||
1386 | Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is | ||
1387 | specified with the data structure provided by the sendmsg() system call. | ||
1388 | </para> | ||
1389 | |||
1390 | <para> | ||
1391 | The sendmsg system call parameter of struct msghdr is embedded into the | ||
1392 | struct cmsghdr data structure. See recv(2) and cmsg(3) for more | ||
1393 | information on how the cmsghdr data structure is used together with the | ||
1394 | send/recv system call family. That cmsghdr data structure holds the | ||
1395 | following information specified with a separate header instances: | ||
1396 | </para> | ||
1397 | |||
1398 | <itemizedlist> | ||
1399 | <listitem> | ||
1400 | <para> | ||
1401 | specification of the cipher operation type with one of these flags: | ||
1402 | </para> | ||
1403 | <itemizedlist> | ||
1404 | <listitem> | ||
1405 | <para>ALG_OP_ENCRYPT - encryption of data</para> | ||
1406 | </listitem> | ||
1407 | <listitem> | ||
1408 | <para>ALG_OP_DECRYPT - decryption of data</para> | ||
1409 | </listitem> | ||
1410 | </itemizedlist> | ||
1411 | </listitem> | ||
1412 | |||
1413 | <listitem> | ||
1414 | <para> | ||
1415 | specification of the IV information marked with the flag ALG_SET_IV | ||
1416 | </para> | ||
1417 | </listitem> | ||
1418 | |||
1419 | <listitem> | ||
1420 | <para> | ||
1421 | specification of the associated authentication data (AAD) with the | ||
1422 | flag ALG_SET_AEAD_ASSOCLEN. The AAD is sent to the kernel together | ||
1423 | with the plaintext / ciphertext. See below for the memory structure. | ||
1424 | </para> | ||
1425 | </listitem> | ||
1426 | </itemizedlist> | ||
1427 | |||
1428 | <para> | ||
1429 | The send system call family allows the following flag to be specified: | ||
1430 | </para> | ||
1431 | |||
1432 | <itemizedlist> | ||
1433 | <listitem> | ||
1434 | <para> | ||
1435 | MSG_MORE: If this flag is set, the send system call acts like a | ||
1436 | cipher update function where more input data is expected | ||
1437 | with a subsequent invocation of the send system call. | ||
1438 | </para> | ||
1439 | </listitem> | ||
1440 | </itemizedlist> | ||
1441 | |||
1442 | <para> | ||
1443 | Note: The kernel reports -EINVAL for any unexpected data. The caller | ||
1444 | must make sure that all data matches the constraints given in | ||
1445 | /proc/crypto for the selected cipher. | ||
1446 | </para> | ||
1447 | |||
1448 | <para> | ||
1449 | With the recv() system call, the application can read the result of | ||
1450 | the cipher operation from the kernel crypto API. The output buffer | ||
1451 | must be at least as large as defined with the memory structure below. | ||
1452 | If the output data size is smaller, the cipher operation is not performed. | ||
1453 | </para> | ||
1454 | |||
1455 | <para> | ||
1456 | The authenticated decryption operation may indicate an integrity error. | ||
1457 | Such breach in integrity is marked with the -EBADMSG error code. | ||
1458 | </para> | ||
1459 | |||
1460 | <sect2><title>AEAD Memory Structure</title> | ||
1461 | <para> | ||
1462 | The AEAD cipher operates with the following information that | ||
1463 | is communicated between user and kernel space as one data stream: | ||
1464 | </para> | ||
1465 | |||
1466 | <itemizedlist> | ||
1467 | <listitem> | ||
1468 | <para>plaintext or ciphertext</para> | ||
1469 | </listitem> | ||
1470 | |||
1471 | <listitem> | ||
1472 | <para>associated authentication data (AAD)</para> | ||
1473 | </listitem> | ||
1474 | |||
1475 | <listitem> | ||
1476 | <para>authentication tag</para> | ||
1477 | </listitem> | ||
1478 | </itemizedlist> | ||
1479 | |||
1480 | <para> | ||
1481 | The sizes of the AAD and the authentication tag are provided with | ||
1482 | the sendmsg and setsockopt calls (see there). As the kernel knows | ||
1483 | the size of the entire data stream, the kernel is now able to | ||
1484 | calculate the right offsets of the data components in the data | ||
1485 | stream. | ||
1486 | </para> | ||
1487 | |||
1488 | <para> | ||
1489 | The user space caller must arrange the aforementioned information | ||
1490 | in the following order: | ||
1491 | </para> | ||
1492 | |||
1493 | <itemizedlist> | ||
1494 | <listitem> | ||
1495 | <para> | ||
1496 | AEAD encryption input: AAD || plaintext | ||
1497 | </para> | ||
1498 | </listitem> | ||
1499 | |||
1500 | <listitem> | ||
1501 | <para> | ||
1502 | AEAD decryption input: AAD || ciphertext || authentication tag | ||
1503 | </para> | ||
1504 | </listitem> | ||
1505 | </itemizedlist> | ||
1506 | |||
1507 | <para> | ||
1508 | The output buffer the user space caller provides must be at least as | ||
1509 | large to hold the following data: | ||
1510 | </para> | ||
1511 | |||
1512 | <itemizedlist> | ||
1513 | <listitem> | ||
1514 | <para> | ||
1515 | AEAD encryption output: ciphertext || authentication tag | ||
1516 | </para> | ||
1517 | </listitem> | ||
1518 | |||
1519 | <listitem> | ||
1520 | <para> | ||
1521 | AEAD decryption output: plaintext | ||
1522 | </para> | ||
1523 | </listitem> | ||
1524 | </itemizedlist> | ||
1525 | </sect2> | ||
1526 | </sect1> | ||
1527 | |||
1528 | <sect1><title>Random Number Generator API</title> | ||
1529 | <para> | ||
1530 | Again, the operation is very similar to the other APIs. | ||
1531 | During initialization, the struct sockaddr data structure must be | ||
1532 | filled as follows: | ||
1533 | </para> | ||
1534 | |||
1535 | <programlisting> | ||
1536 | struct sockaddr_alg sa = { | ||
1537 | .salg_family = AF_ALG, | ||
1538 | .salg_type = "rng", /* this selects the symmetric cipher */ | ||
1539 | .salg_name = "drbg_nopr_sha256" /* this is the cipher name */ | ||
1540 | }; | ||
1541 | </programlisting> | ||
1542 | |||
1543 | <para> | ||
1544 | Depending on the RNG type, the RNG must be seeded. The seed is provided | ||
1545 | using the setsockopt interface to set the key. For example, the | ||
1546 | ansi_cprng requires a seed. The DRBGs do not require a seed, but | ||
1547 | may be seeded. | ||
1548 | </para> | ||
1549 | |||
1550 | <para> | ||
1551 | Using the read()/recvmsg() system calls, random numbers can be obtained. | ||
1552 | The kernel generates at most 128 bytes in one call. If user space | ||
1553 | requires more data, multiple calls to read()/recvmsg() must be made. | ||
1554 | </para> | ||
1555 | |||
1556 | <para> | ||
1557 | WARNING: The user space caller may invoke the initially mentioned | ||
1558 | accept system call multiple times. In this case, the returned file | ||
1559 | descriptors have the same state. | ||
1560 | </para> | ||
1561 | |||
1562 | </sect1> | ||
1563 | |||
1564 | <sect1><title>Zero-Copy Interface</title> | ||
1565 | <para> | ||
1566 | In addition to the send/write/read/recv system call familty, the AF_ALG | ||
1567 | interface can be accessed with the zero-copy interface of splice/vmsplice. | ||
1568 | As the name indicates, the kernel tries to avoid a copy operation into | ||
1569 | kernel space. | ||
1570 | </para> | ||
1571 | |||
1572 | <para> | ||
1573 | The zero-copy operation requires data to be aligned at the page boundary. | ||
1574 | Non-aligned data can be used as well, but may require more operations of | ||
1575 | the kernel which would defeat the speed gains obtained from the zero-copy | ||
1576 | interface. | ||
1577 | </para> | ||
1578 | |||
1579 | <para> | ||
1580 | The system-interent limit for the size of one zero-copy operation is | ||
1581 | 16 pages. If more data is to be sent to AF_ALG, user space must slice | ||
1582 | the input into segments with a maximum size of 16 pages. | ||
1583 | </para> | ||
1584 | |||
1585 | <para> | ||
1586 | Zero-copy can be used with the following code example (a complete working | ||
1587 | example is provided with libkcapi): | ||
1588 | </para> | ||
1589 | |||
1590 | <programlisting> | ||
1591 | int pipes[2]; | ||
1592 | |||
1593 | pipe(pipes); | ||
1594 | /* input data in iov */ | ||
1595 | vmsplice(pipes[1], iov, iovlen, SPLICE_F_GIFT); | ||
1596 | /* opfd is the file descriptor returned from accept() system call */ | ||
1597 | splice(pipes[0], NULL, opfd, NULL, ret, 0); | ||
1598 | read(opfd, out, outlen); | ||
1599 | </programlisting> | ||
1600 | |||
1601 | </sect1> | ||
1602 | |||
1603 | <sect1><title>Setsockopt Interface</title> | ||
1604 | <para> | ||
1605 | In addition to the read/recv and send/write system call handling | ||
1606 | to send and retrieve data subject to the cipher operation, a consumer | ||
1607 | also needs to set the additional information for the cipher operation. | ||
1608 | This additional information is set using the setsockopt system call | ||
1609 | that must be invoked with the file descriptor of the open cipher | ||
1610 | (i.e. the file descriptor returned by the accept system call). | ||
1611 | </para> | ||
1612 | |||
1613 | <para> | ||
1614 | Each setsockopt invocation must use the level SOL_ALG. | ||
1615 | </para> | ||
1616 | |||
1617 | <para> | ||
1618 | The setsockopt interface allows setting the following data using | ||
1619 | the mentioned optname: | ||
1620 | </para> | ||
1621 | |||
1622 | <itemizedlist> | ||
1623 | <listitem> | ||
1624 | <para> | ||
1625 | ALG_SET_KEY -- Setting the key. Key setting is applicable to: | ||
1626 | </para> | ||
1627 | <itemizedlist> | ||
1628 | <listitem> | ||
1629 | <para>the skcipher cipher type (symmetric ciphers)</para> | ||
1630 | </listitem> | ||
1631 | <listitem> | ||
1632 | <para>the hash cipher type (keyed message digests)</para> | ||
1633 | </listitem> | ||
1634 | <listitem> | ||
1635 | <para>the AEAD cipher type</para> | ||
1636 | </listitem> | ||
1637 | <listitem> | ||
1638 | <para>the RNG cipher type to provide the seed</para> | ||
1639 | </listitem> | ||
1640 | </itemizedlist> | ||
1641 | </listitem> | ||
1642 | |||
1643 | <listitem> | ||
1644 | <para> | ||
1645 | ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size | ||
1646 | for AEAD ciphers. For a encryption operation, the authentication | ||
1647 | tag of the given size will be generated. For a decryption operation, | ||
1648 | the provided ciphertext is assumed to contain an authentication tag | ||
1649 | of the given size (see section about AEAD memory layout below). | ||
1650 | </para> | ||
1651 | </listitem> | ||
1652 | </itemizedlist> | ||
1653 | |||
1654 | </sect1> | ||
1655 | |||
1656 | <sect1><title>User space API example</title> | ||
1657 | <para> | ||
1658 | Please see [1] for libkcapi which provides an easy-to-use wrapper | ||
1659 | around the aforementioned Netlink kernel interface. [1] also contains | ||
1660 | a test application that invokes all libkcapi API calls. | ||
1661 | </para> | ||
1662 | |||
1663 | <para> | ||
1664 | [1] http://www.chronox.de/libkcapi.html | ||
1665 | </para> | ||
1666 | |||
1667 | </sect1> | ||
1668 | |||
1669 | </chapter> | ||
1670 | |||
811 | <chapter id="API"><title>Programming Interface</title> | 1671 | <chapter id="API"><title>Programming Interface</title> |
812 | <sect1><title>Block Cipher Context Data Structures</title> | 1672 | <sect1><title>Block Cipher Context Data Structures</title> |
813 | !Pinclude/linux/crypto.h Block Cipher Context Data Structures | 1673 | !Pinclude/linux/crypto.h Block Cipher Context Data Structures |
diff --git a/Documentation/crypto/crypto-API-userspace.txt b/Documentation/crypto/crypto-API-userspace.txt deleted file mode 100644 index ac619cd90300..000000000000 --- a/Documentation/crypto/crypto-API-userspace.txt +++ /dev/null | |||
@@ -1,205 +0,0 @@ | |||
1 | Introduction | ||
2 | ============ | ||
3 | |||
4 | The concepts of the kernel crypto API visible to kernel space is fully | ||
5 | applicable to the user space interface as well. Therefore, the kernel crypto API | ||
6 | high level discussion for the in-kernel use cases applies here as well. | ||
7 | |||
8 | The major difference, however, is that user space can only act as a consumer | ||
9 | and never as a provider of a transformation or cipher algorithm. | ||
10 | |||
11 | The following covers the user space interface exported by the kernel crypto | ||
12 | API. A working example of this description is libkcapi that can be obtained from | ||
13 | [1]. That library can be used by user space applications that require | ||
14 | cryptographic services from the kernel. | ||
15 | |||
16 | Some details of the in-kernel kernel crypto API aspects do not | ||
17 | apply to user space, however. This includes the difference between synchronous | ||
18 | and asynchronous invocations. The user space API call is fully synchronous. | ||
19 | In addition, only a subset of all cipher types are available as documented | ||
20 | below. | ||
21 | |||
22 | |||
23 | User space API general remarks | ||
24 | ============================== | ||
25 | |||
26 | The kernel crypto API is accessible from user space. Currently, the following | ||
27 | ciphers are accessible: | ||
28 | |||
29 | * Message digest including keyed message digest (HMAC, CMAC) | ||
30 | |||
31 | * Symmetric ciphers | ||
32 | |||
33 | Note, AEAD ciphers are currently not supported via the symmetric cipher | ||
34 | interface. | ||
35 | |||
36 | The interface is provided via Netlink using the type AF_ALG. In addition, the | ||
37 | setsockopt option type is SOL_ALG. In case the user space header files do not | ||
38 | export these flags yet, use the following macros: | ||
39 | |||
40 | #ifndef AF_ALG | ||
41 | #define AF_ALG 38 | ||
42 | #endif | ||
43 | #ifndef SOL_ALG | ||
44 | #define SOL_ALG 279 | ||
45 | #endif | ||
46 | |||
47 | A cipher is accessed with the same name as done for the in-kernel API calls. | ||
48 | This includes the generic vs. unique naming schema for ciphers as well as the | ||
49 | enforcement of priorities for generic names. | ||
50 | |||
51 | To interact with the kernel crypto API, a Netlink socket must be created by | ||
52 | the user space application. User space invokes the cipher operation with the | ||
53 | send/write system call family. The result of the cipher operation is obtained | ||
54 | with the read/recv system call family. | ||
55 | |||
56 | The following API calls assume that the Netlink socket descriptor is already | ||
57 | opened by the user space application and discusses only the kernel crypto API | ||
58 | specific invocations. | ||
59 | |||
60 | To initialize a Netlink interface, the following sequence has to be performed | ||
61 | by the consumer: | ||
62 | |||
63 | 1. Create a socket of type AF_ALG with the struct sockaddr_alg parameter | ||
64 | specified below for the different cipher types. | ||
65 | |||
66 | 2. Invoke bind with the socket descriptor | ||
67 | |||
68 | 3. Invoke accept with the socket descriptor. The accept system call | ||
69 | returns a new file descriptor that is to be used to interact with | ||
70 | the particular cipher instance. When invoking send/write or recv/read | ||
71 | system calls to send data to the kernel or obtain data from the | ||
72 | kernel, the file descriptor returned by accept must be used. | ||
73 | |||
74 | In-place cipher operation | ||
75 | ========================= | ||
76 | |||
77 | Just like the in-kernel operation of the kernel crypto API, the user space | ||
78 | interface allows the cipher operation in-place. That means that the input buffer | ||
79 | used for the send/write system call and the output buffer used by the read/recv | ||
80 | system call may be one and the same. This is of particular interest for | ||
81 | symmetric cipher operations where a copying of the output data to its final | ||
82 | destination can be avoided. | ||
83 | |||
84 | If a consumer on the other hand wants to maintain the plaintext and the | ||
85 | ciphertext in different memory locations, all a consumer needs to do is to | ||
86 | provide different memory pointers for the encryption and decryption operation. | ||
87 | |||
88 | Message digest API | ||
89 | ================== | ||
90 | |||
91 | The message digest type to be used for the cipher operation is selected when | ||
92 | invoking the bind syscall. bind requires the caller to provide a filled | ||
93 | struct sockaddr data structure. This data structure must be filled as follows: | ||
94 | |||
95 | struct sockaddr_alg sa = { | ||
96 | .salg_family = AF_ALG, | ||
97 | .salg_type = "hash", /* this selects the hash logic in the kernel */ | ||
98 | .salg_name = "sha1" /* this is the cipher name */ | ||
99 | }; | ||
100 | |||
101 | The salg_type value "hash" applies to message digests and keyed message digests. | ||
102 | Though, a keyed message digest is referenced by the appropriate salg_name. | ||
103 | Please see below for the setsockopt interface that explains how the key can be | ||
104 | set for a keyed message digest. | ||
105 | |||
106 | Using the send() system call, the application provides the data that should be | ||
107 | processed with the message digest. The send system call allows the following | ||
108 | flags to be specified: | ||
109 | |||
110 | * MSG_MORE: If this flag is set, the send system call acts like a | ||
111 | message digest update function where the final hash is not | ||
112 | yet calculated. If the flag is not set, the send system call | ||
113 | calculates the final message digest immediately. | ||
114 | |||
115 | With the recv() system call, the application can read the message digest from | ||
116 | the kernel crypto API. If the buffer is too small for the message digest, the | ||
117 | flag MSG_TRUNC is set by the kernel. | ||
118 | |||
119 | In order to set a message digest key, the calling application must use the | ||
120 | setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC operation is | ||
121 | performed without the initial HMAC state change caused by the key. | ||
122 | |||
123 | |||
124 | Symmetric cipher API | ||
125 | ==================== | ||
126 | |||
127 | The operation is very similar to the message digest discussion. During | ||
128 | initialization, the struct sockaddr data structure must be filled as follows: | ||
129 | |||
130 | struct sockaddr_alg sa = { | ||
131 | .salg_family = AF_ALG, | ||
132 | .salg_type = "skcipher", /* this selects the symmetric cipher */ | ||
133 | .salg_name = "cbc(aes)" /* this is the cipher name */ | ||
134 | }; | ||
135 | |||
136 | Before data can be sent to the kernel using the write/send system call family, | ||
137 | the consumer must set the key. The key setting is described with the setsockopt | ||
138 | invocation below. | ||
139 | |||
140 | Using the sendmsg() system call, the application provides the data that should | ||
141 | be processed for encryption or decryption. In addition, the IV is specified | ||
142 | with the data structure provided by the sendmsg() system call. | ||
143 | |||
144 | The sendmsg system call parameter of struct msghdr is embedded into the | ||
145 | struct cmsghdr data structure. See recv(2) and cmsg(3) for more information | ||
146 | on how the cmsghdr data structure is used together with the send/recv system | ||
147 | call family. That cmsghdr data structure holds the following information | ||
148 | specified with a separate header instances: | ||
149 | |||
150 | * specification of the cipher operation type with one of these flags: | ||
151 | ALG_OP_ENCRYPT - encryption of data | ||
152 | ALG_OP_DECRYPT - decryption of data | ||
153 | |||
154 | * specification of the IV information marked with the flag ALG_SET_IV | ||
155 | |||
156 | The send system call family allows the following flag to be specified: | ||
157 | |||
158 | * MSG_MORE: If this flag is set, the send system call acts like a | ||
159 | cipher update function where more input data is expected | ||
160 | with a subsequent invocation of the send system call. | ||
161 | |||
162 | Note: The kernel reports -EINVAL for any unexpected data. The caller must | ||
163 | make sure that all data matches the constraints given in /proc/crypto for the | ||
164 | selected cipher. | ||
165 | |||
166 | With the recv() system call, the application can read the result of the | ||
167 | cipher operation from the kernel crypto API. The output buffer must be at least | ||
168 | as large as to hold all blocks of the encrypted or decrypted data. If the output | ||
169 | data size is smaller, only as many blocks are returned that fit into that | ||
170 | output buffer size. | ||
171 | |||
172 | Setsockopt interface | ||
173 | ==================== | ||
174 | |||
175 | In addition to the read/recv and send/write system call handling to send and | ||
176 | retrieve data subject to the cipher operation, a consumer also needs to set | ||
177 | the additional information for the cipher operation. This additional information | ||
178 | is set using the setsockopt system call that must be invoked with the file | ||
179 | descriptor of the open cipher (i.e. the file descriptor returned by the | ||
180 | accept system call). | ||
181 | |||
182 | Each setsockopt invocation must use the level SOL_ALG. | ||
183 | |||
184 | The setsockopt interface allows setting the following data using the mentioned | ||
185 | optname: | ||
186 | |||
187 | * ALG_SET_KEY -- Setting the key. Key setting is applicable to: | ||
188 | |||
189 | - the skcipher cipher type (symmetric ciphers) | ||
190 | |||
191 | - the hash cipher type (keyed message digests) | ||
192 | |||
193 | User space API example | ||
194 | ====================== | ||
195 | |||
196 | Please see [1] for libkcapi which provides an easy-to-use wrapper around the | ||
197 | aforementioned Netlink kernel interface. [1] also contains a test application | ||
198 | that invokes all libkcapi API calls. | ||
199 | |||
200 | [1] http://www.chronox.de/libkcapi.html | ||
201 | |||
202 | Author | ||
203 | ====== | ||
204 | |||
205 | Stephan Mueller <smueller@chronox.de> | ||
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt new file mode 100644 index 000000000000..91a3d757d641 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/img-hash.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | Imagination Technologies hardware hash accelerator | ||
2 | |||
3 | The hash accelerator provides hardware hashing acceleration for | ||
4 | SHA1, SHA224, SHA256 and MD5 hashes | ||
5 | |||
6 | Required properties: | ||
7 | |||
8 | - compatible : "img,hash-accelerator" | ||
9 | - reg : Offset and length of the register set for the module, and the DMA port | ||
10 | - interrupts : The designated IRQ line for the hashing module. | ||
11 | - dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt | ||
12 | - dma-names : Should be "tx" | ||
13 | - clocks : Clock specifiers | ||
14 | - clock-names : "sys" Used to clock the hash block registers | ||
15 | "hash" Used to clock data through the accelerator | ||
16 | |||
17 | Example: | ||
18 | |||
19 | hash: hash@18149600 { | ||
20 | compatible = "img,hash-accelerator"; | ||
21 | reg = <0x18149600 0x100>, <0x18101100 0x4>; | ||
22 | interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>; | ||
23 | dmas = <&dma 8 0xffffffff 0>; | ||
24 | dma-names = "tx"; | ||
25 | clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>; | ||
26 | clock-names = "sys", "hash"; | ||
27 | }; | ||
diff --git a/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt new file mode 100644 index 000000000000..e25a456664b9 --- /dev/null +++ b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | HWRNG support for the iproc-rng200 driver | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "brcm,iproc-rng200" | ||
5 | - reg : base address and size of control register block | ||
6 | |||
7 | Example: | ||
8 | |||
9 | rng { | ||
10 | compatible = "brcm,iproc-rng200"; | ||
11 | reg = <0x18032000 0x28>; | ||
12 | }; | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 6ee1e79ea16b..7a8f367b4ebc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2825,6 +2825,7 @@ L: linux-crypto@vger.kernel.org | |||
2825 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git | 2825 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git |
2826 | S: Maintained | 2826 | S: Maintained |
2827 | F: Documentation/crypto/ | 2827 | F: Documentation/crypto/ |
2828 | F: Documentation/DocBook/crypto-API.tmpl | ||
2828 | F: arch/*/crypto/ | 2829 | F: arch/*/crypto/ |
2829 | F: crypto/ | 2830 | F: crypto/ |
2830 | F: drivers/crypto/ | 2831 | F: drivers/crypto/ |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index da1266c53c13..7cbf4ef5c6fd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug" | |||
2175 | source "security/Kconfig" | 2175 | source "security/Kconfig" |
2176 | 2176 | ||
2177 | source "crypto/Kconfig" | 2177 | source "crypto/Kconfig" |
2178 | if CRYPTO | ||
2179 | source "arch/arm/crypto/Kconfig" | ||
2180 | endif | ||
2178 | 2181 | ||
2179 | source "lib/Kconfig" | 2182 | source "lib/Kconfig" |
2180 | 2183 | ||
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig new file mode 100644 index 000000000000..8da2207b0072 --- /dev/null +++ b/arch/arm/crypto/Kconfig | |||
@@ -0,0 +1,130 @@ | |||
1 | |||
2 | menuconfig ARM_CRYPTO | ||
3 | bool "ARM Accelerated Cryptographic Algorithms" | ||
4 | depends on ARM | ||
5 | help | ||
6 | Say Y here to choose from a selection of cryptographic algorithms | ||
7 | implemented using ARM specific CPU features or instructions. | ||
8 | |||
9 | if ARM_CRYPTO | ||
10 | |||
11 | config CRYPTO_SHA1_ARM | ||
12 | tristate "SHA1 digest algorithm (ARM-asm)" | ||
13 | select CRYPTO_SHA1 | ||
14 | select CRYPTO_HASH | ||
15 | help | ||
16 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
17 | using optimized ARM assembler. | ||
18 | |||
19 | config CRYPTO_SHA1_ARM_NEON | ||
20 | tristate "SHA1 digest algorithm (ARM NEON)" | ||
21 | depends on KERNEL_MODE_NEON | ||
22 | select CRYPTO_SHA1_ARM | ||
23 | select CRYPTO_SHA1 | ||
24 | select CRYPTO_HASH | ||
25 | help | ||
26 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
27 | using optimized ARM NEON assembly, when NEON instructions are | ||
28 | available. | ||
29 | |||
30 | config CRYPTO_SHA1_ARM_CE | ||
31 | tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" | ||
32 | depends on KERNEL_MODE_NEON | ||
33 | select CRYPTO_SHA1_ARM | ||
34 | select CRYPTO_HASH | ||
35 | help | ||
36 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
37 | using special ARMv8 Crypto Extensions. | ||
38 | |||
39 | config CRYPTO_SHA2_ARM_CE | ||
40 | tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" | ||
41 | depends on KERNEL_MODE_NEON | ||
42 | select CRYPTO_SHA256_ARM | ||
43 | select CRYPTO_HASH | ||
44 | help | ||
45 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
46 | using special ARMv8 Crypto Extensions. | ||
47 | |||
48 | config CRYPTO_SHA256_ARM | ||
49 | tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)" | ||
50 | select CRYPTO_HASH | ||
51 | depends on !CPU_V7M | ||
52 | help | ||
53 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
54 | using optimized ARM assembler and NEON, when available. | ||
55 | |||
56 | config CRYPTO_SHA512_ARM_NEON | ||
57 | tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" | ||
58 | depends on KERNEL_MODE_NEON | ||
59 | select CRYPTO_SHA512 | ||
60 | select CRYPTO_HASH | ||
61 | help | ||
62 | SHA-512 secure hash standard (DFIPS 180-2) implemented | ||
63 | using ARM NEON instructions, when available. | ||
64 | |||
65 | This version of SHA implements a 512 bit hash with 256 bits of | ||
66 | security against collision attacks. | ||
67 | |||
68 | This code also includes SHA-384, a 384 bit hash with 192 bits | ||
69 | of security against collision attacks. | ||
70 | |||
71 | config CRYPTO_AES_ARM | ||
72 | tristate "AES cipher algorithms (ARM-asm)" | ||
73 | depends on ARM | ||
74 | select CRYPTO_ALGAPI | ||
75 | select CRYPTO_AES | ||
76 | help | ||
77 | Use optimized AES assembler routines for ARM platforms. | ||
78 | |||
79 | AES cipher algorithms (FIPS-197). AES uses the Rijndael | ||
80 | algorithm. | ||
81 | |||
82 | Rijndael appears to be consistently a very good performer in | ||
83 | both hardware and software across a wide range of computing | ||
84 | environments regardless of its use in feedback or non-feedback | ||
85 | modes. Its key setup time is excellent, and its key agility is | ||
86 | good. Rijndael's very low memory requirements make it very well | ||
87 | suited for restricted-space environments, in which it also | ||
88 | demonstrates excellent performance. Rijndael's operations are | ||
89 | among the easiest to defend against power and timing attacks. | ||
90 | |||
91 | The AES specifies three key sizes: 128, 192 and 256 bits | ||
92 | |||
93 | See <http://csrc.nist.gov/encryption/aes/> for more information. | ||
94 | |||
95 | config CRYPTO_AES_ARM_BS | ||
96 | tristate "Bit sliced AES using NEON instructions" | ||
97 | depends on KERNEL_MODE_NEON | ||
98 | select CRYPTO_ALGAPI | ||
99 | select CRYPTO_AES_ARM | ||
100 | select CRYPTO_ABLK_HELPER | ||
101 | help | ||
102 | Use a faster and more secure NEON based implementation of AES in CBC, | ||
103 | CTR and XTS modes | ||
104 | |||
105 | Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode | ||
106 | and for XTS mode encryption, CBC and XTS mode decryption speedup is | ||
107 | around 25%. (CBC encryption speed is not affected by this driver.) | ||
108 | This implementation does not rely on any lookup tables so it is | ||
109 | believed to be invulnerable to cache timing attacks. | ||
110 | |||
111 | config CRYPTO_AES_ARM_CE | ||
112 | tristate "Accelerated AES using ARMv8 Crypto Extensions" | ||
113 | depends on KERNEL_MODE_NEON | ||
114 | select CRYPTO_ALGAPI | ||
115 | select CRYPTO_ABLK_HELPER | ||
116 | help | ||
117 | Use an implementation of AES in CBC, CTR and XTS modes that uses | ||
118 | ARMv8 Crypto Extensions | ||
119 | |||
120 | config CRYPTO_GHASH_ARM_CE | ||
121 | tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions" | ||
122 | depends on KERNEL_MODE_NEON | ||
123 | select CRYPTO_HASH | ||
124 | select CRYPTO_CRYPTD | ||
125 | help | ||
126 | Use an implementation of GHASH (used by the GCM AEAD chaining mode) | ||
127 | that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) | ||
128 | that is part of the ARMv8 Crypto Extensions | ||
129 | |||
130 | endif | ||
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..6ea828241fcb 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile | |||
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o | |||
6 | obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o | 6 | obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o |
7 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o | 7 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o |
8 | obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o | 8 | obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o |
9 | obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o | ||
9 | obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o | 10 | obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o |
10 | 11 | ||
12 | ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o | ||
13 | ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o | ||
14 | ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o | ||
15 | ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o | ||
16 | |||
17 | ifneq ($(ce-obj-y)$(ce-obj-m),) | ||
18 | ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) | ||
19 | obj-y += $(ce-obj-y) | ||
20 | obj-m += $(ce-obj-m) | ||
21 | else | ||
22 | $(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) | ||
23 | $(warning $(ce-obj-y) $(ce-obj-m)) | ||
24 | endif | ||
25 | endif | ||
26 | |||
11 | aes-arm-y := aes-armv4.o aes_glue.o | 27 | aes-arm-y := aes-armv4.o aes_glue.o |
12 | aes-arm-bs-y := aesbs-core.o aesbs-glue.o | 28 | aes-arm-bs-y := aesbs-core.o aesbs-glue.o |
13 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o | 29 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o |
14 | sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o | 30 | sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o |
31 | sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o | ||
32 | sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) | ||
15 | sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o | 33 | sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o |
34 | sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o | ||
35 | sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o | ||
36 | aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o | ||
37 | ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o | ||
16 | 38 | ||
17 | quiet_cmd_perl = PERL $@ | 39 | quiet_cmd_perl = PERL $@ |
18 | cmd_perl = $(PERL) $(<) > $(@) | 40 | cmd_perl = $(PERL) $(<) > $(@) |
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@ | |||
20 | $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl | 42 | $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl |
21 | $(call cmd,perl) | 43 | $(call cmd,perl) |
22 | 44 | ||
23 | .PRECIOUS: $(obj)/aesbs-core.S | 45 | $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl |
46 | $(call cmd,perl) | ||
47 | |||
48 | .PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S | ||
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index 000000000000..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S | |||
@@ -0,0 +1,518 @@ | |||
1 | /* | ||
2 | * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/assembler.h> | ||
13 | |||
14 | .text | ||
15 | .fpu crypto-neon-fp-armv8 | ||
16 | .align 3 | ||
17 | |||
18 | .macro enc_round, state, key | ||
19 | aese.8 \state, \key | ||
20 | aesmc.8 \state, \state | ||
21 | .endm | ||
22 | |||
23 | .macro dec_round, state, key | ||
24 | aesd.8 \state, \key | ||
25 | aesimc.8 \state, \state | ||
26 | .endm | ||
27 | |||
28 | .macro enc_dround, key1, key2 | ||
29 | enc_round q0, \key1 | ||
30 | enc_round q0, \key2 | ||
31 | .endm | ||
32 | |||
33 | .macro dec_dround, key1, key2 | ||
34 | dec_round q0, \key1 | ||
35 | dec_round q0, \key2 | ||
36 | .endm | ||
37 | |||
38 | .macro enc_fround, key1, key2, key3 | ||
39 | enc_round q0, \key1 | ||
40 | aese.8 q0, \key2 | ||
41 | veor q0, q0, \key3 | ||
42 | .endm | ||
43 | |||
44 | .macro dec_fround, key1, key2, key3 | ||
45 | dec_round q0, \key1 | ||
46 | aesd.8 q0, \key2 | ||
47 | veor q0, q0, \key3 | ||
48 | .endm | ||
49 | |||
50 | .macro enc_dround_3x, key1, key2 | ||
51 | enc_round q0, \key1 | ||
52 | enc_round q1, \key1 | ||
53 | enc_round q2, \key1 | ||
54 | enc_round q0, \key2 | ||
55 | enc_round q1, \key2 | ||
56 | enc_round q2, \key2 | ||
57 | .endm | ||
58 | |||
59 | .macro dec_dround_3x, key1, key2 | ||
60 | dec_round q0, \key1 | ||
61 | dec_round q1, \key1 | ||
62 | dec_round q2, \key1 | ||
63 | dec_round q0, \key2 | ||
64 | dec_round q1, \key2 | ||
65 | dec_round q2, \key2 | ||
66 | .endm | ||
67 | |||
68 | .macro enc_fround_3x, key1, key2, key3 | ||
69 | enc_round q0, \key1 | ||
70 | enc_round q1, \key1 | ||
71 | enc_round q2, \key1 | ||
72 | aese.8 q0, \key2 | ||
73 | aese.8 q1, \key2 | ||
74 | aese.8 q2, \key2 | ||
75 | veor q0, q0, \key3 | ||
76 | veor q1, q1, \key3 | ||
77 | veor q2, q2, \key3 | ||
78 | .endm | ||
79 | |||
80 | .macro dec_fround_3x, key1, key2, key3 | ||
81 | dec_round q0, \key1 | ||
82 | dec_round q1, \key1 | ||
83 | dec_round q2, \key1 | ||
84 | aesd.8 q0, \key2 | ||
85 | aesd.8 q1, \key2 | ||
86 | aesd.8 q2, \key2 | ||
87 | veor q0, q0, \key3 | ||
88 | veor q1, q1, \key3 | ||
89 | veor q2, q2, \key3 | ||
90 | .endm | ||
91 | |||
92 | .macro do_block, dround, fround | ||
93 | cmp r3, #12 @ which key size? | ||
94 | vld1.8 {q10-q11}, [ip]! | ||
95 | \dround q8, q9 | ||
96 | vld1.8 {q12-q13}, [ip]! | ||
97 | \dround q10, q11 | ||
98 | vld1.8 {q10-q11}, [ip]! | ||
99 | \dround q12, q13 | ||
100 | vld1.8 {q12-q13}, [ip]! | ||
101 | \dround q10, q11 | ||
102 | blo 0f @ AES-128: 10 rounds | ||
103 | vld1.8 {q10-q11}, [ip]! | ||
104 | beq 1f @ AES-192: 12 rounds | ||
105 | \dround q12, q13 | ||
106 | vld1.8 {q12-q13}, [ip] | ||
107 | \dround q10, q11 | ||
108 | 0: \fround q12, q13, q14 | ||
109 | bx lr | ||
110 | |||
111 | 1: \dround q12, q13 | ||
112 | \fround q10, q11, q14 | ||
113 | bx lr | ||
114 | .endm | ||
115 | |||
116 | /* | ||
117 | * Internal, non-AAPCS compliant functions that implement the core AES | ||
118 | * transforms. These should preserve all registers except q0 - q2 and ip | ||
119 | * Arguments: | ||
120 | * q0 : first in/output block | ||
121 | * q1 : second in/output block (_3x version only) | ||
122 | * q2 : third in/output block (_3x version only) | ||
123 | * q8 : first round key | ||
124 | * q9 : secound round key | ||
125 | * ip : address of 3rd round key | ||
126 | * q14 : final round key | ||
127 | * r3 : number of rounds | ||
128 | */ | ||
129 | .align 6 | ||
130 | aes_encrypt: | ||
131 | add ip, r2, #32 @ 3rd round key | ||
132 | .Laes_encrypt_tweak: | ||
133 | do_block enc_dround, enc_fround | ||
134 | ENDPROC(aes_encrypt) | ||
135 | |||
136 | .align 6 | ||
137 | aes_decrypt: | ||
138 | add ip, r2, #32 @ 3rd round key | ||
139 | do_block dec_dround, dec_fround | ||
140 | ENDPROC(aes_decrypt) | ||
141 | |||
142 | .align 6 | ||
143 | aes_encrypt_3x: | ||
144 | add ip, r2, #32 @ 3rd round key | ||
145 | do_block enc_dround_3x, enc_fround_3x | ||
146 | ENDPROC(aes_encrypt_3x) | ||
147 | |||
148 | .align 6 | ||
149 | aes_decrypt_3x: | ||
150 | add ip, r2, #32 @ 3rd round key | ||
151 | do_block dec_dround_3x, dec_fround_3x | ||
152 | ENDPROC(aes_decrypt_3x) | ||
153 | |||
154 | .macro prepare_key, rk, rounds | ||
155 | add ip, \rk, \rounds, lsl #4 | ||
156 | vld1.8 {q8-q9}, [\rk] @ load first 2 round keys | ||
157 | vld1.8 {q14}, [ip] @ load last round key | ||
158 | .endm | ||
159 | |||
160 | /* | ||
161 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
162 | * int blocks) | ||
163 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
164 | * int blocks) | ||
165 | */ | ||
166 | ENTRY(ce_aes_ecb_encrypt) | ||
167 | push {r4, lr} | ||
168 | ldr r4, [sp, #8] | ||
169 | prepare_key r2, r3 | ||
170 | .Lecbencloop3x: | ||
171 | subs r4, r4, #3 | ||
172 | bmi .Lecbenc1x | ||
173 | vld1.8 {q0-q1}, [r1, :64]! | ||
174 | vld1.8 {q2}, [r1, :64]! | ||
175 | bl aes_encrypt_3x | ||
176 | vst1.8 {q0-q1}, [r0, :64]! | ||
177 | vst1.8 {q2}, [r0, :64]! | ||
178 | b .Lecbencloop3x | ||
179 | .Lecbenc1x: | ||
180 | adds r4, r4, #3 | ||
181 | beq .Lecbencout | ||
182 | .Lecbencloop: | ||
183 | vld1.8 {q0}, [r1, :64]! | ||
184 | bl aes_encrypt | ||
185 | vst1.8 {q0}, [r0, :64]! | ||
186 | subs r4, r4, #1 | ||
187 | bne .Lecbencloop | ||
188 | .Lecbencout: | ||
189 | pop {r4, pc} | ||
190 | ENDPROC(ce_aes_ecb_encrypt) | ||
191 | |||
192 | ENTRY(ce_aes_ecb_decrypt) | ||
193 | push {r4, lr} | ||
194 | ldr r4, [sp, #8] | ||
195 | prepare_key r2, r3 | ||
196 | .Lecbdecloop3x: | ||
197 | subs r4, r4, #3 | ||
198 | bmi .Lecbdec1x | ||
199 | vld1.8 {q0-q1}, [r1, :64]! | ||
200 | vld1.8 {q2}, [r1, :64]! | ||
201 | bl aes_decrypt_3x | ||
202 | vst1.8 {q0-q1}, [r0, :64]! | ||
203 | vst1.8 {q2}, [r0, :64]! | ||
204 | b .Lecbdecloop3x | ||
205 | .Lecbdec1x: | ||
206 | adds r4, r4, #3 | ||
207 | beq .Lecbdecout | ||
208 | .Lecbdecloop: | ||
209 | vld1.8 {q0}, [r1, :64]! | ||
210 | bl aes_decrypt | ||
211 | vst1.8 {q0}, [r0, :64]! | ||
212 | subs r4, r4, #1 | ||
213 | bne .Lecbdecloop | ||
214 | .Lecbdecout: | ||
215 | pop {r4, pc} | ||
216 | ENDPROC(ce_aes_ecb_decrypt) | ||
217 | |||
218 | /* | ||
219 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
220 | * int blocks, u8 iv[]) | ||
221 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
222 | * int blocks, u8 iv[]) | ||
223 | */ | ||
224 | ENTRY(ce_aes_cbc_encrypt) | ||
225 | push {r4-r6, lr} | ||
226 | ldrd r4, r5, [sp, #16] | ||
227 | vld1.8 {q0}, [r5] | ||
228 | prepare_key r2, r3 | ||
229 | .Lcbcencloop: | ||
230 | vld1.8 {q1}, [r1, :64]! @ get next pt block | ||
231 | veor q0, q0, q1 @ ..and xor with iv | ||
232 | bl aes_encrypt | ||
233 | vst1.8 {q0}, [r0, :64]! | ||
234 | subs r4, r4, #1 | ||
235 | bne .Lcbcencloop | ||
236 | vst1.8 {q0}, [r5] | ||
237 | pop {r4-r6, pc} | ||
238 | ENDPROC(ce_aes_cbc_encrypt) | ||
239 | |||
240 | ENTRY(ce_aes_cbc_decrypt) | ||
241 | push {r4-r6, lr} | ||
242 | ldrd r4, r5, [sp, #16] | ||
243 | vld1.8 {q6}, [r5] @ keep iv in q6 | ||
244 | prepare_key r2, r3 | ||
245 | .Lcbcdecloop3x: | ||
246 | subs r4, r4, #3 | ||
247 | bmi .Lcbcdec1x | ||
248 | vld1.8 {q0-q1}, [r1, :64]! | ||
249 | vld1.8 {q2}, [r1, :64]! | ||
250 | vmov q3, q0 | ||
251 | vmov q4, q1 | ||
252 | vmov q5, q2 | ||
253 | bl aes_decrypt_3x | ||
254 | veor q0, q0, q6 | ||
255 | veor q1, q1, q3 | ||
256 | veor q2, q2, q4 | ||
257 | vmov q6, q5 | ||
258 | vst1.8 {q0-q1}, [r0, :64]! | ||
259 | vst1.8 {q2}, [r0, :64]! | ||
260 | b .Lcbcdecloop3x | ||
261 | .Lcbcdec1x: | ||
262 | adds r4, r4, #3 | ||
263 | beq .Lcbcdecout | ||
264 | vmov q15, q14 @ preserve last round key | ||
265 | .Lcbcdecloop: | ||
266 | vld1.8 {q0}, [r1, :64]! @ get next ct block | ||
267 | veor q14, q15, q6 @ combine prev ct with last key | ||
268 | vmov q6, q0 | ||
269 | bl aes_decrypt | ||
270 | vst1.8 {q0}, [r0, :64]! | ||
271 | subs r4, r4, #1 | ||
272 | bne .Lcbcdecloop | ||
273 | .Lcbcdecout: | ||
274 | vst1.8 {q6}, [r5] @ keep iv in q6 | ||
275 | pop {r4-r6, pc} | ||
276 | ENDPROC(ce_aes_cbc_decrypt) | ||
277 | |||
278 | /* | ||
279 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
280 | * int blocks, u8 ctr[]) | ||
281 | */ | ||
282 | ENTRY(ce_aes_ctr_encrypt) | ||
283 | push {r4-r6, lr} | ||
284 | ldrd r4, r5, [sp, #16] | ||
285 | vld1.8 {q6}, [r5] @ load ctr | ||
286 | prepare_key r2, r3 | ||
287 | vmov r6, s27 @ keep swabbed ctr in r6 | ||
288 | rev r6, r6 | ||
289 | cmn r6, r4 @ 32 bit overflow? | ||
290 | bcs .Lctrloop | ||
291 | .Lctrloop3x: | ||
292 | subs r4, r4, #3 | ||
293 | bmi .Lctr1x | ||
294 | add r6, r6, #1 | ||
295 | vmov q0, q6 | ||
296 | vmov q1, q6 | ||
297 | rev ip, r6 | ||
298 | add r6, r6, #1 | ||
299 | vmov q2, q6 | ||
300 | vmov s7, ip | ||
301 | rev ip, r6 | ||
302 | add r6, r6, #1 | ||
303 | vmov s11, ip | ||
304 | vld1.8 {q3-q4}, [r1, :64]! | ||
305 | vld1.8 {q5}, [r1, :64]! | ||
306 | bl aes_encrypt_3x | ||
307 | veor q0, q0, q3 | ||
308 | veor q1, q1, q4 | ||
309 | veor q2, q2, q5 | ||
310 | rev ip, r6 | ||
311 | vst1.8 {q0-q1}, [r0, :64]! | ||
312 | vst1.8 {q2}, [r0, :64]! | ||
313 | vmov s27, ip | ||
314 | b .Lctrloop3x | ||
315 | .Lctr1x: | ||
316 | adds r4, r4, #3 | ||
317 | beq .Lctrout | ||
318 | .Lctrloop: | ||
319 | vmov q0, q6 | ||
320 | bl aes_encrypt | ||
321 | subs r4, r4, #1 | ||
322 | bmi .Lctrhalfblock @ blocks < 0 means 1/2 block | ||
323 | vld1.8 {q3}, [r1, :64]! | ||
324 | veor q3, q0, q3 | ||
325 | vst1.8 {q3}, [r0, :64]! | ||
326 | |||
327 | adds r6, r6, #1 @ increment BE ctr | ||
328 | rev ip, r6 | ||
329 | vmov s27, ip | ||
330 | bcs .Lctrcarry | ||
331 | teq r4, #0 | ||
332 | bne .Lctrloop | ||
333 | .Lctrout: | ||
334 | vst1.8 {q6}, [r5] | ||
335 | pop {r4-r6, pc} | ||
336 | |||
337 | .Lctrhalfblock: | ||
338 | vld1.8 {d1}, [r1, :64] | ||
339 | veor d0, d0, d1 | ||
340 | vst1.8 {d0}, [r0, :64] | ||
341 | pop {r4-r6, pc} | ||
342 | |||
343 | .Lctrcarry: | ||
344 | .irp sreg, s26, s25, s24 | ||
345 | vmov ip, \sreg @ load next word of ctr | ||
346 | rev ip, ip @ ... to handle the carry | ||
347 | adds ip, ip, #1 | ||
348 | rev ip, ip | ||
349 | vmov \sreg, ip | ||
350 | bcc 0f | ||
351 | .endr | ||
352 | 0: teq r4, #0 | ||
353 | beq .Lctrout | ||
354 | b .Lctrloop | ||
355 | ENDPROC(ce_aes_ctr_encrypt) | ||
356 | |||
357 | /* | ||
358 | * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | ||
359 | * int blocks, u8 iv[], u8 const rk2[], int first) | ||
360 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | ||
361 | * int blocks, u8 iv[], u8 const rk2[], int first) | ||
362 | */ | ||
363 | |||
364 | .macro next_tweak, out, in, const, tmp | ||
365 | vshr.s64 \tmp, \in, #63 | ||
366 | vand \tmp, \tmp, \const | ||
367 | vadd.u64 \out, \in, \in | ||
368 | vext.8 \tmp, \tmp, \tmp, #8 | ||
369 | veor \out, \out, \tmp | ||
370 | .endm | ||
371 | |||
372 | .align 3 | ||
373 | .Lxts_mul_x: | ||
374 | .quad 1, 0x87 | ||
375 | |||
376 | ce_aes_xts_init: | ||
377 | vldr d14, .Lxts_mul_x | ||
378 | vldr d15, .Lxts_mul_x + 8 | ||
379 | |||
380 | ldrd r4, r5, [sp, #16] @ load args | ||
381 | ldr r6, [sp, #28] | ||
382 | vld1.8 {q0}, [r5] @ load iv | ||
383 | teq r6, #1 @ start of a block? | ||
384 | bxne lr | ||
385 | |||
386 | @ Encrypt the IV in q0 with the second AES key. This should only | ||
387 | @ be done at the start of a block. | ||
388 | ldr r6, [sp, #24] @ load AES key 2 | ||
389 | prepare_key r6, r3 | ||
390 | add ip, r6, #32 @ 3rd round key of key 2 | ||
391 | b .Laes_encrypt_tweak @ tail call | ||
392 | ENDPROC(ce_aes_xts_init) | ||
393 | |||
394 | ENTRY(ce_aes_xts_encrypt) | ||
395 | push {r4-r6, lr} | ||
396 | |||
397 | bl ce_aes_xts_init @ run shared prologue | ||
398 | prepare_key r2, r3 | ||
399 | vmov q3, q0 | ||
400 | |||
401 | teq r6, #0 @ start of a block? | ||
402 | bne .Lxtsenc3x | ||
403 | |||
404 | .Lxtsencloop3x: | ||
405 | next_tweak q3, q3, q7, q6 | ||
406 | .Lxtsenc3x: | ||
407 | subs r4, r4, #3 | ||
408 | bmi .Lxtsenc1x | ||
409 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks | ||
410 | vld1.8 {q2}, [r1, :64]! | ||
411 | next_tweak q4, q3, q7, q6 | ||
412 | veor q0, q0, q3 | ||
413 | next_tweak q5, q4, q7, q6 | ||
414 | veor q1, q1, q4 | ||
415 | veor q2, q2, q5 | ||
416 | bl aes_encrypt_3x | ||
417 | veor q0, q0, q3 | ||
418 | veor q1, q1, q4 | ||
419 | veor q2, q2, q5 | ||
420 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks | ||
421 | vst1.8 {q2}, [r0, :64]! | ||
422 | vmov q3, q5 | ||
423 | teq r4, #0 | ||
424 | beq .Lxtsencout | ||
425 | b .Lxtsencloop3x | ||
426 | .Lxtsenc1x: | ||
427 | adds r4, r4, #3 | ||
428 | beq .Lxtsencout | ||
429 | .Lxtsencloop: | ||
430 | vld1.8 {q0}, [r1, :64]! | ||
431 | veor q0, q0, q3 | ||
432 | bl aes_encrypt | ||
433 | veor q0, q0, q3 | ||
434 | vst1.8 {q0}, [r0, :64]! | ||
435 | subs r4, r4, #1 | ||
436 | beq .Lxtsencout | ||
437 | next_tweak q3, q3, q7, q6 | ||
438 | b .Lxtsencloop | ||
439 | .Lxtsencout: | ||
440 | vst1.8 {q3}, [r5] | ||
441 | pop {r4-r6, pc} | ||
442 | ENDPROC(ce_aes_xts_encrypt) | ||
443 | |||
444 | |||
445 | ENTRY(ce_aes_xts_decrypt) | ||
446 | push {r4-r6, lr} | ||
447 | |||
448 | bl ce_aes_xts_init @ run shared prologue | ||
449 | prepare_key r2, r3 | ||
450 | vmov q3, q0 | ||
451 | |||
452 | teq r6, #0 @ start of a block? | ||
453 | bne .Lxtsdec3x | ||
454 | |||
455 | .Lxtsdecloop3x: | ||
456 | next_tweak q3, q3, q7, q6 | ||
457 | .Lxtsdec3x: | ||
458 | subs r4, r4, #3 | ||
459 | bmi .Lxtsdec1x | ||
460 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks | ||
461 | vld1.8 {q2}, [r1, :64]! | ||
462 | next_tweak q4, q3, q7, q6 | ||
463 | veor q0, q0, q3 | ||
464 | next_tweak q5, q4, q7, q6 | ||
465 | veor q1, q1, q4 | ||
466 | veor q2, q2, q5 | ||
467 | bl aes_decrypt_3x | ||
468 | veor q0, q0, q3 | ||
469 | veor q1, q1, q4 | ||
470 | veor q2, q2, q5 | ||
471 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks | ||
472 | vst1.8 {q2}, [r0, :64]! | ||
473 | vmov q3, q5 | ||
474 | teq r4, #0 | ||
475 | beq .Lxtsdecout | ||
476 | b .Lxtsdecloop3x | ||
477 | .Lxtsdec1x: | ||
478 | adds r4, r4, #3 | ||
479 | beq .Lxtsdecout | ||
480 | .Lxtsdecloop: | ||
481 | vld1.8 {q0}, [r1, :64]! | ||
482 | veor q0, q0, q3 | ||
483 | add ip, r2, #32 @ 3rd round key | ||
484 | bl aes_decrypt | ||
485 | veor q0, q0, q3 | ||
486 | vst1.8 {q0}, [r0, :64]! | ||
487 | subs r4, r4, #1 | ||
488 | beq .Lxtsdecout | ||
489 | next_tweak q3, q3, q7, q6 | ||
490 | b .Lxtsdecloop | ||
491 | .Lxtsdecout: | ||
492 | vst1.8 {q3}, [r5] | ||
493 | pop {r4-r6, pc} | ||
494 | ENDPROC(ce_aes_xts_decrypt) | ||
495 | |||
496 | /* | ||
497 | * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the | ||
498 | * AES sbox substitution on each byte in | ||
499 | * 'input' | ||
500 | */ | ||
501 | ENTRY(ce_aes_sub) | ||
502 | vdup.32 q1, r0 | ||
503 | veor q0, q0, q0 | ||
504 | aese.8 q0, q1 | ||
505 | vmov r0, s0 | ||
506 | bx lr | ||
507 | ENDPROC(ce_aes_sub) | ||
508 | |||
509 | /* | ||
510 | * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns | ||
511 | * operation on round key *src | ||
512 | */ | ||
513 | ENTRY(ce_aes_invert) | ||
514 | vld1.8 {q0}, [r1] | ||
515 | aesimc.8 q0, q0 | ||
516 | vst1.8 {q0}, [r0] | ||
517 | bx lr | ||
518 | ENDPROC(ce_aes_invert) | ||
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c new file mode 100644 index 000000000000..b445a5d56f43 --- /dev/null +++ b/arch/arm/crypto/aes-ce-glue.c | |||
@@ -0,0 +1,524 @@ | |||
1 | /* | ||
2 | * aes-ce-glue.c - wrapper code for ARMv8 AES | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/hwcap.h> | ||
12 | #include <asm/neon.h> | ||
13 | #include <asm/hwcap.h> | ||
14 | #include <crypto/aes.h> | ||
15 | #include <crypto/ablk_helper.h> | ||
16 | #include <crypto/algapi.h> | ||
17 | #include <linux/module.h> | ||
18 | |||
19 | MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); | ||
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
21 | MODULE_LICENSE("GPL v2"); | ||
22 | |||
23 | /* defined in aes-ce-core.S */ | ||
24 | asmlinkage u32 ce_aes_sub(u32 input); | ||
25 | asmlinkage void ce_aes_invert(void *dst, void *src); | ||
26 | |||
27 | asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
28 | int rounds, int blocks); | ||
29 | asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], | ||
30 | int rounds, int blocks); | ||
31 | |||
32 | asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
33 | int rounds, int blocks, u8 iv[]); | ||
34 | asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], | ||
35 | int rounds, int blocks, u8 iv[]); | ||
36 | |||
37 | asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
38 | int rounds, int blocks, u8 ctr[]); | ||
39 | |||
40 | asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], | ||
41 | int rounds, int blocks, u8 iv[], | ||
42 | u8 const rk2[], int first); | ||
43 | asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], | ||
44 | int rounds, int blocks, u8 iv[], | ||
45 | u8 const rk2[], int first); | ||
46 | |||
47 | struct aes_block { | ||
48 | u8 b[AES_BLOCK_SIZE]; | ||
49 | }; | ||
50 | |||
51 | static int num_rounds(struct crypto_aes_ctx *ctx) | ||
52 | { | ||
53 | /* | ||
54 | * # of rounds specified by AES: | ||
55 | * 128 bit key 10 rounds | ||
56 | * 192 bit key 12 rounds | ||
57 | * 256 bit key 14 rounds | ||
58 | * => n byte key => 6 + (n/4) rounds | ||
59 | */ | ||
60 | return 6 + ctx->key_length / 4; | ||
61 | } | ||
62 | |||
63 | static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, | ||
64 | unsigned int key_len) | ||
65 | { | ||
66 | /* | ||
67 | * The AES key schedule round constants | ||
68 | */ | ||
69 | static u8 const rcon[] = { | ||
70 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, | ||
71 | }; | ||
72 | |||
73 | u32 kwords = key_len / sizeof(u32); | ||
74 | struct aes_block *key_enc, *key_dec; | ||
75 | int i, j; | ||
76 | |||
77 | if (key_len != AES_KEYSIZE_128 && | ||
78 | key_len != AES_KEYSIZE_192 && | ||
79 | key_len != AES_KEYSIZE_256) | ||
80 | return -EINVAL; | ||
81 | |||
82 | memcpy(ctx->key_enc, in_key, key_len); | ||
83 | ctx->key_length = key_len; | ||
84 | |||
85 | kernel_neon_begin(); | ||
86 | for (i = 0; i < sizeof(rcon); i++) { | ||
87 | u32 *rki = ctx->key_enc + (i * kwords); | ||
88 | u32 *rko = rki + kwords; | ||
89 | |||
90 | rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); | ||
91 | rko[0] = rko[0] ^ rki[0] ^ rcon[i]; | ||
92 | rko[1] = rko[0] ^ rki[1]; | ||
93 | rko[2] = rko[1] ^ rki[2]; | ||
94 | rko[3] = rko[2] ^ rki[3]; | ||
95 | |||
96 | if (key_len == AES_KEYSIZE_192) { | ||
97 | if (i >= 7) | ||
98 | break; | ||
99 | rko[4] = rko[3] ^ rki[4]; | ||
100 | rko[5] = rko[4] ^ rki[5]; | ||
101 | } else if (key_len == AES_KEYSIZE_256) { | ||
102 | if (i >= 6) | ||
103 | break; | ||
104 | rko[4] = ce_aes_sub(rko[3]) ^ rki[4]; | ||
105 | rko[5] = rko[4] ^ rki[5]; | ||
106 | rko[6] = rko[5] ^ rki[6]; | ||
107 | rko[7] = rko[6] ^ rki[7]; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Generate the decryption keys for the Equivalent Inverse Cipher. | ||
113 | * This involves reversing the order of the round keys, and applying | ||
114 | * the Inverse Mix Columns transformation on all but the first and | ||
115 | * the last one. | ||
116 | */ | ||
117 | key_enc = (struct aes_block *)ctx->key_enc; | ||
118 | key_dec = (struct aes_block *)ctx->key_dec; | ||
119 | j = num_rounds(ctx); | ||
120 | |||
121 | key_dec[0] = key_enc[j]; | ||
122 | for (i = 1, j--; j > 0; i++, j--) | ||
123 | ce_aes_invert(key_dec + i, key_enc + j); | ||
124 | key_dec[i] = key_enc[0]; | ||
125 | |||
126 | kernel_neon_end(); | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
131 | unsigned int key_len) | ||
132 | { | ||
133 | struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
134 | int ret; | ||
135 | |||
136 | ret = ce_aes_expandkey(ctx, in_key, key_len); | ||
137 | if (!ret) | ||
138 | return 0; | ||
139 | |||
140 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
141 | return -EINVAL; | ||
142 | } | ||
143 | |||
144 | struct crypto_aes_xts_ctx { | ||
145 | struct crypto_aes_ctx key1; | ||
146 | struct crypto_aes_ctx __aligned(8) key2; | ||
147 | }; | ||
148 | |||
149 | static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
150 | unsigned int key_len) | ||
151 | { | ||
152 | struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
153 | int ret; | ||
154 | |||
155 | ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); | ||
156 | if (!ret) | ||
157 | ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], | ||
158 | key_len / 2); | ||
159 | if (!ret) | ||
160 | return 0; | ||
161 | |||
162 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
170 | struct blkcipher_walk walk; | ||
171 | unsigned int blocks; | ||
172 | int err; | ||
173 | |||
174 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
175 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
176 | err = blkcipher_walk_virt(desc, &walk); | ||
177 | |||
178 | kernel_neon_begin(); | ||
179 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
180 | ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
181 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks); | ||
182 | err = blkcipher_walk_done(desc, &walk, | ||
183 | walk.nbytes % AES_BLOCK_SIZE); | ||
184 | } | ||
185 | kernel_neon_end(); | ||
186 | return err; | ||
187 | } | ||
188 | |||
189 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
190 | struct scatterlist *src, unsigned int nbytes) | ||
191 | { | ||
192 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | struct blkcipher_walk walk; | ||
194 | unsigned int blocks; | ||
195 | int err; | ||
196 | |||
197 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
198 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
199 | err = blkcipher_walk_virt(desc, &walk); | ||
200 | |||
201 | kernel_neon_begin(); | ||
202 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
203 | ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
204 | (u8 *)ctx->key_dec, num_rounds(ctx), blocks); | ||
205 | err = blkcipher_walk_done(desc, &walk, | ||
206 | walk.nbytes % AES_BLOCK_SIZE); | ||
207 | } | ||
208 | kernel_neon_end(); | ||
209 | return err; | ||
210 | } | ||
211 | |||
212 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
213 | struct scatterlist *src, unsigned int nbytes) | ||
214 | { | ||
215 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
216 | struct blkcipher_walk walk; | ||
217 | unsigned int blocks; | ||
218 | int err; | ||
219 | |||
220 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
221 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
222 | err = blkcipher_walk_virt(desc, &walk); | ||
223 | |||
224 | kernel_neon_begin(); | ||
225 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
226 | ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
227 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks, | ||
228 | walk.iv); | ||
229 | err = blkcipher_walk_done(desc, &walk, | ||
230 | walk.nbytes % AES_BLOCK_SIZE); | ||
231 | } | ||
232 | kernel_neon_end(); | ||
233 | return err; | ||
234 | } | ||
235 | |||
236 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
237 | struct scatterlist *src, unsigned int nbytes) | ||
238 | { | ||
239 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
240 | struct blkcipher_walk walk; | ||
241 | unsigned int blocks; | ||
242 | int err; | ||
243 | |||
244 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
245 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
246 | err = blkcipher_walk_virt(desc, &walk); | ||
247 | |||
248 | kernel_neon_begin(); | ||
249 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
250 | ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
251 | (u8 *)ctx->key_dec, num_rounds(ctx), blocks, | ||
252 | walk.iv); | ||
253 | err = blkcipher_walk_done(desc, &walk, | ||
254 | walk.nbytes % AES_BLOCK_SIZE); | ||
255 | } | ||
256 | kernel_neon_end(); | ||
257 | return err; | ||
258 | } | ||
259 | |||
260 | static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
261 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | ||
263 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
264 | struct blkcipher_walk walk; | ||
265 | int err, blocks; | ||
266 | |||
267 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
268 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
269 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
270 | |||
271 | kernel_neon_begin(); | ||
272 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
273 | ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
274 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks, | ||
275 | walk.iv); | ||
276 | nbytes -= blocks * AES_BLOCK_SIZE; | ||
277 | if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) | ||
278 | break; | ||
279 | err = blkcipher_walk_done(desc, &walk, | ||
280 | walk.nbytes % AES_BLOCK_SIZE); | ||
281 | } | ||
282 | if (nbytes) { | ||
283 | u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; | ||
284 | u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; | ||
285 | u8 __aligned(8) tail[AES_BLOCK_SIZE]; | ||
286 | |||
287 | /* | ||
288 | * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need | ||
289 | * to tell aes_ctr_encrypt() to only read half a block. | ||
290 | */ | ||
291 | blocks = (nbytes <= 8) ? -1 : 1; | ||
292 | |||
293 | ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, | ||
294 | num_rounds(ctx), blocks, walk.iv); | ||
295 | memcpy(tdst, tail, nbytes); | ||
296 | err = blkcipher_walk_done(desc, &walk, 0); | ||
297 | } | ||
298 | kernel_neon_end(); | ||
299 | |||
300 | return err; | ||
301 | } | ||
302 | |||
303 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
304 | struct scatterlist *src, unsigned int nbytes) | ||
305 | { | ||
306 | struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
307 | int err, first, rounds = num_rounds(&ctx->key1); | ||
308 | struct blkcipher_walk walk; | ||
309 | unsigned int blocks; | ||
310 | |||
311 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
312 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
313 | err = blkcipher_walk_virt(desc, &walk); | ||
314 | |||
315 | kernel_neon_begin(); | ||
316 | for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { | ||
317 | ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
318 | (u8 *)ctx->key1.key_enc, rounds, blocks, | ||
319 | walk.iv, (u8 *)ctx->key2.key_enc, first); | ||
320 | err = blkcipher_walk_done(desc, &walk, | ||
321 | walk.nbytes % AES_BLOCK_SIZE); | ||
322 | } | ||
323 | kernel_neon_end(); | ||
324 | |||
325 | return err; | ||
326 | } | ||
327 | |||
328 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
329 | struct scatterlist *src, unsigned int nbytes) | ||
330 | { | ||
331 | struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
332 | int err, first, rounds = num_rounds(&ctx->key1); | ||
333 | struct blkcipher_walk walk; | ||
334 | unsigned int blocks; | ||
335 | |||
336 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
337 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
338 | err = blkcipher_walk_virt(desc, &walk); | ||
339 | |||
340 | kernel_neon_begin(); | ||
341 | for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { | ||
342 | ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
343 | (u8 *)ctx->key1.key_dec, rounds, blocks, | ||
344 | walk.iv, (u8 *)ctx->key2.key_enc, first); | ||
345 | err = blkcipher_walk_done(desc, &walk, | ||
346 | walk.nbytes % AES_BLOCK_SIZE); | ||
347 | } | ||
348 | kernel_neon_end(); | ||
349 | |||
350 | return err; | ||
351 | } | ||
352 | |||
353 | static struct crypto_alg aes_algs[] = { { | ||
354 | .cra_name = "__ecb-aes-ce", | ||
355 | .cra_driver_name = "__driver-ecb-aes-ce", | ||
356 | .cra_priority = 0, | ||
357 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
358 | CRYPTO_ALG_INTERNAL, | ||
359 | .cra_blocksize = AES_BLOCK_SIZE, | ||
360 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
361 | .cra_alignmask = 7, | ||
362 | .cra_type = &crypto_blkcipher_type, | ||
363 | .cra_module = THIS_MODULE, | ||
364 | .cra_blkcipher = { | ||
365 | .min_keysize = AES_MIN_KEY_SIZE, | ||
366 | .max_keysize = AES_MAX_KEY_SIZE, | ||
367 | .ivsize = AES_BLOCK_SIZE, | ||
368 | .setkey = ce_aes_setkey, | ||
369 | .encrypt = ecb_encrypt, | ||
370 | .decrypt = ecb_decrypt, | ||
371 | }, | ||
372 | }, { | ||
373 | .cra_name = "__cbc-aes-ce", | ||
374 | .cra_driver_name = "__driver-cbc-aes-ce", | ||
375 | .cra_priority = 0, | ||
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
377 | CRYPTO_ALG_INTERNAL, | ||
378 | .cra_blocksize = AES_BLOCK_SIZE, | ||
379 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
380 | .cra_alignmask = 7, | ||
381 | .cra_type = &crypto_blkcipher_type, | ||
382 | .cra_module = THIS_MODULE, | ||
383 | .cra_blkcipher = { | ||
384 | .min_keysize = AES_MIN_KEY_SIZE, | ||
385 | .max_keysize = AES_MAX_KEY_SIZE, | ||
386 | .ivsize = AES_BLOCK_SIZE, | ||
387 | .setkey = ce_aes_setkey, | ||
388 | .encrypt = cbc_encrypt, | ||
389 | .decrypt = cbc_decrypt, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__ctr-aes-ce", | ||
393 | .cra_driver_name = "__driver-ctr-aes-ce", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
396 | CRYPTO_ALG_INTERNAL, | ||
397 | .cra_blocksize = 1, | ||
398 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
399 | .cra_alignmask = 7, | ||
400 | .cra_type = &crypto_blkcipher_type, | ||
401 | .cra_module = THIS_MODULE, | ||
402 | .cra_blkcipher = { | ||
403 | .min_keysize = AES_MIN_KEY_SIZE, | ||
404 | .max_keysize = AES_MAX_KEY_SIZE, | ||
405 | .ivsize = AES_BLOCK_SIZE, | ||
406 | .setkey = ce_aes_setkey, | ||
407 | .encrypt = ctr_encrypt, | ||
408 | .decrypt = ctr_encrypt, | ||
409 | }, | ||
410 | }, { | ||
411 | .cra_name = "__xts-aes-ce", | ||
412 | .cra_driver_name = "__driver-xts-aes-ce", | ||
413 | .cra_priority = 0, | ||
414 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
415 | CRYPTO_ALG_INTERNAL, | ||
416 | .cra_blocksize = AES_BLOCK_SIZE, | ||
417 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), | ||
418 | .cra_alignmask = 7, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_blkcipher = { | ||
422 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
423 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
424 | .ivsize = AES_BLOCK_SIZE, | ||
425 | .setkey = xts_set_key, | ||
426 | .encrypt = xts_encrypt, | ||
427 | .decrypt = xts_decrypt, | ||
428 | }, | ||
429 | }, { | ||
430 | .cra_name = "ecb(aes)", | ||
431 | .cra_driver_name = "ecb-aes-ce", | ||
432 | .cra_priority = 300, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
434 | .cra_blocksize = AES_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
436 | .cra_alignmask = 7, | ||
437 | .cra_type = &crypto_ablkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_init = ablk_init, | ||
440 | .cra_exit = ablk_exit, | ||
441 | .cra_ablkcipher = { | ||
442 | .min_keysize = AES_MIN_KEY_SIZE, | ||
443 | .max_keysize = AES_MAX_KEY_SIZE, | ||
444 | .ivsize = AES_BLOCK_SIZE, | ||
445 | .setkey = ablk_set_key, | ||
446 | .encrypt = ablk_encrypt, | ||
447 | .decrypt = ablk_decrypt, | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "cbc(aes)", | ||
451 | .cra_driver_name = "cbc-aes-ce", | ||
452 | .cra_priority = 300, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
454 | .cra_blocksize = AES_BLOCK_SIZE, | ||
455 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
456 | .cra_alignmask = 7, | ||
457 | .cra_type = &crypto_ablkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_init = ablk_init, | ||
460 | .cra_exit = ablk_exit, | ||
461 | .cra_ablkcipher = { | ||
462 | .min_keysize = AES_MIN_KEY_SIZE, | ||
463 | .max_keysize = AES_MAX_KEY_SIZE, | ||
464 | .ivsize = AES_BLOCK_SIZE, | ||
465 | .setkey = ablk_set_key, | ||
466 | .encrypt = ablk_encrypt, | ||
467 | .decrypt = ablk_decrypt, | ||
468 | } | ||
469 | }, { | ||
470 | .cra_name = "ctr(aes)", | ||
471 | .cra_driver_name = "ctr-aes-ce", | ||
472 | .cra_priority = 300, | ||
473 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
474 | .cra_blocksize = 1, | ||
475 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
476 | .cra_alignmask = 7, | ||
477 | .cra_type = &crypto_ablkcipher_type, | ||
478 | .cra_module = THIS_MODULE, | ||
479 | .cra_init = ablk_init, | ||
480 | .cra_exit = ablk_exit, | ||
481 | .cra_ablkcipher = { | ||
482 | .min_keysize = AES_MIN_KEY_SIZE, | ||
483 | .max_keysize = AES_MAX_KEY_SIZE, | ||
484 | .ivsize = AES_BLOCK_SIZE, | ||
485 | .setkey = ablk_set_key, | ||
486 | .encrypt = ablk_encrypt, | ||
487 | .decrypt = ablk_decrypt, | ||
488 | } | ||
489 | }, { | ||
490 | .cra_name = "xts(aes)", | ||
491 | .cra_driver_name = "xts-aes-ce", | ||
492 | .cra_priority = 300, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = AES_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 7, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_init = ablk_init, | ||
500 | .cra_exit = ablk_exit, | ||
501 | .cra_ablkcipher = { | ||
502 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
503 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
504 | .ivsize = AES_BLOCK_SIZE, | ||
505 | .setkey = ablk_set_key, | ||
506 | .encrypt = ablk_encrypt, | ||
507 | .decrypt = ablk_decrypt, | ||
508 | } | ||
509 | } }; | ||
510 | |||
511 | static int __init aes_init(void) | ||
512 | { | ||
513 | if (!(elf_hwcap2 & HWCAP2_AES)) | ||
514 | return -ENODEV; | ||
515 | return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
516 | } | ||
517 | |||
518 | static void __exit aes_exit(void) | ||
519 | { | ||
520 | crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
521 | } | ||
522 | |||
523 | module_init(aes_init); | ||
524 | module_exit(aes_exit); | ||
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 15468fbbdea3..6d685298690e 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c | |||
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
301 | .cra_name = "__cbc-aes-neonbs", | 301 | .cra_name = "__cbc-aes-neonbs", |
302 | .cra_driver_name = "__driver-cbc-aes-neonbs", | 302 | .cra_driver_name = "__driver-cbc-aes-neonbs", |
303 | .cra_priority = 0, | 303 | .cra_priority = 0, |
304 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 304 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
305 | CRYPTO_ALG_INTERNAL, | ||
305 | .cra_blocksize = AES_BLOCK_SIZE, | 306 | .cra_blocksize = AES_BLOCK_SIZE, |
306 | .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), | 307 | .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), |
307 | .cra_alignmask = 7, | 308 | .cra_alignmask = 7, |
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
319 | .cra_name = "__ctr-aes-neonbs", | 320 | .cra_name = "__ctr-aes-neonbs", |
320 | .cra_driver_name = "__driver-ctr-aes-neonbs", | 321 | .cra_driver_name = "__driver-ctr-aes-neonbs", |
321 | .cra_priority = 0, | 322 | .cra_priority = 0, |
322 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 323 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
324 | CRYPTO_ALG_INTERNAL, | ||
323 | .cra_blocksize = 1, | 325 | .cra_blocksize = 1, |
324 | .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), | 326 | .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), |
325 | .cra_alignmask = 7, | 327 | .cra_alignmask = 7, |
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
337 | .cra_name = "__xts-aes-neonbs", | 339 | .cra_name = "__xts-aes-neonbs", |
338 | .cra_driver_name = "__driver-xts-aes-neonbs", | 340 | .cra_driver_name = "__driver-xts-aes-neonbs", |
339 | .cra_priority = 0, | 341 | .cra_priority = 0, |
340 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 342 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
343 | CRYPTO_ALG_INTERNAL, | ||
341 | .cra_blocksize = AES_BLOCK_SIZE, | 344 | .cra_blocksize = AES_BLOCK_SIZE, |
342 | .cra_ctxsize = sizeof(struct aesbs_xts_ctx), | 345 | .cra_ctxsize = sizeof(struct aesbs_xts_ctx), |
343 | .cra_alignmask = 7, | 346 | .cra_alignmask = 7, |
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..f6ab8bcc9efe --- /dev/null +++ b/arch/arm/crypto/ghash-ce-core.S | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/assembler.h> | ||
13 | |||
14 | SHASH .req q0 | ||
15 | SHASH2 .req q1 | ||
16 | T1 .req q2 | ||
17 | T2 .req q3 | ||
18 | MASK .req q4 | ||
19 | XL .req q5 | ||
20 | XM .req q6 | ||
21 | XH .req q7 | ||
22 | IN1 .req q7 | ||
23 | |||
24 | SHASH_L .req d0 | ||
25 | SHASH_H .req d1 | ||
26 | SHASH2_L .req d2 | ||
27 | T1_L .req d4 | ||
28 | MASK_L .req d8 | ||
29 | XL_L .req d10 | ||
30 | XL_H .req d11 | ||
31 | XM_L .req d12 | ||
32 | XM_H .req d13 | ||
33 | XH_L .req d14 | ||
34 | |||
35 | .text | ||
36 | .fpu crypto-neon-fp-armv8 | ||
37 | |||
38 | /* | ||
39 | * void pmull_ghash_update(int blocks, u64 dg[], const char *src, | ||
40 | * struct ghash_key const *k, const char *head) | ||
41 | */ | ||
42 | ENTRY(pmull_ghash_update) | ||
43 | vld1.64 {SHASH}, [r3] | ||
44 | vld1.64 {XL}, [r1] | ||
45 | vmov.i8 MASK, #0xe1 | ||
46 | vext.8 SHASH2, SHASH, SHASH, #8 | ||
47 | vshl.u64 MASK, MASK, #57 | ||
48 | veor SHASH2, SHASH2, SHASH | ||
49 | |||
50 | /* do the head block first, if supplied */ | ||
51 | ldr ip, [sp] | ||
52 | teq ip, #0 | ||
53 | beq 0f | ||
54 | vld1.64 {T1}, [ip] | ||
55 | teq r0, #0 | ||
56 | b 1f | ||
57 | |||
58 | 0: vld1.64 {T1}, [r2]! | ||
59 | subs r0, r0, #1 | ||
60 | |||
61 | 1: /* multiply XL by SHASH in GF(2^128) */ | ||
62 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
63 | vrev64.8 T1, T1 | ||
64 | #endif | ||
65 | vext.8 T2, XL, XL, #8 | ||
66 | vext.8 IN1, T1, T1, #8 | ||
67 | veor T1, T1, T2 | ||
68 | veor XL, XL, IN1 | ||
69 | |||
70 | vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 | ||
71 | veor T1, T1, XL | ||
72 | vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 | ||
73 | vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) | ||
74 | |||
75 | vext.8 T1, XL, XH, #8 | ||
76 | veor T2, XL, XH | ||
77 | veor XM, XM, T1 | ||
78 | veor XM, XM, T2 | ||
79 | vmull.p64 T2, XL_L, MASK_L | ||
80 | |||
81 | vmov XH_L, XM_H | ||
82 | vmov XM_H, XL_L | ||
83 | |||
84 | veor XL, XM, T2 | ||
85 | vext.8 T2, XL, XL, #8 | ||
86 | vmull.p64 XL, XL_L, MASK_L | ||
87 | veor T2, T2, XH | ||
88 | veor XL, XL, T2 | ||
89 | |||
90 | bne 0b | ||
91 | |||
92 | vst1.64 {XL}, [r1] | ||
93 | bx lr | ||
94 | ENDPROC(pmull_ghash_update) | ||
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..03a39fe29246 --- /dev/null +++ b/arch/arm/crypto/ghash-ce-glue.c | |||
@@ -0,0 +1,320 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/hwcap.h> | ||
12 | #include <asm/neon.h> | ||
13 | #include <asm/simd.h> | ||
14 | #include <asm/unaligned.h> | ||
15 | #include <crypto/cryptd.h> | ||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <crypto/gf128mul.h> | ||
18 | #include <linux/crypto.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); | ||
22 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
23 | MODULE_LICENSE("GPL v2"); | ||
24 | |||
25 | #define GHASH_BLOCK_SIZE 16 | ||
26 | #define GHASH_DIGEST_SIZE 16 | ||
27 | |||
28 | struct ghash_key { | ||
29 | u64 a; | ||
30 | u64 b; | ||
31 | }; | ||
32 | |||
33 | struct ghash_desc_ctx { | ||
34 | u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; | ||
35 | u8 buf[GHASH_BLOCK_SIZE]; | ||
36 | u32 count; | ||
37 | }; | ||
38 | |||
39 | struct ghash_async_ctx { | ||
40 | struct cryptd_ahash *cryptd_tfm; | ||
41 | }; | ||
42 | |||
43 | asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, | ||
44 | struct ghash_key const *k, const char *head); | ||
45 | |||
46 | static int ghash_init(struct shash_desc *desc) | ||
47 | { | ||
48 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
49 | |||
50 | *ctx = (struct ghash_desc_ctx){}; | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static int ghash_update(struct shash_desc *desc, const u8 *src, | ||
55 | unsigned int len) | ||
56 | { | ||
57 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
58 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; | ||
59 | |||
60 | ctx->count += len; | ||
61 | |||
62 | if ((partial + len) >= GHASH_BLOCK_SIZE) { | ||
63 | struct ghash_key *key = crypto_shash_ctx(desc->tfm); | ||
64 | int blocks; | ||
65 | |||
66 | if (partial) { | ||
67 | int p = GHASH_BLOCK_SIZE - partial; | ||
68 | |||
69 | memcpy(ctx->buf + partial, src, p); | ||
70 | src += p; | ||
71 | len -= p; | ||
72 | } | ||
73 | |||
74 | blocks = len / GHASH_BLOCK_SIZE; | ||
75 | len %= GHASH_BLOCK_SIZE; | ||
76 | |||
77 | kernel_neon_begin(); | ||
78 | pmull_ghash_update(blocks, ctx->digest, src, key, | ||
79 | partial ? ctx->buf : NULL); | ||
80 | kernel_neon_end(); | ||
81 | src += blocks * GHASH_BLOCK_SIZE; | ||
82 | partial = 0; | ||
83 | } | ||
84 | if (len) | ||
85 | memcpy(ctx->buf + partial, src, len); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static int ghash_final(struct shash_desc *desc, u8 *dst) | ||
90 | { | ||
91 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
92 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; | ||
93 | |||
94 | if (partial) { | ||
95 | struct ghash_key *key = crypto_shash_ctx(desc->tfm); | ||
96 | |||
97 | memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); | ||
98 | kernel_neon_begin(); | ||
99 | pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); | ||
100 | kernel_neon_end(); | ||
101 | } | ||
102 | put_unaligned_be64(ctx->digest[1], dst); | ||
103 | put_unaligned_be64(ctx->digest[0], dst + 8); | ||
104 | |||
105 | *ctx = (struct ghash_desc_ctx){}; | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | static int ghash_setkey(struct crypto_shash *tfm, | ||
110 | const u8 *inkey, unsigned int keylen) | ||
111 | { | ||
112 | struct ghash_key *key = crypto_shash_ctx(tfm); | ||
113 | u64 a, b; | ||
114 | |||
115 | if (keylen != GHASH_BLOCK_SIZE) { | ||
116 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
117 | return -EINVAL; | ||
118 | } | ||
119 | |||
120 | /* perform multiplication by 'x' in GF(2^128) */ | ||
121 | b = get_unaligned_be64(inkey); | ||
122 | a = get_unaligned_be64(inkey + 8); | ||
123 | |||
124 | key->a = (a << 1) | (b >> 63); | ||
125 | key->b = (b << 1) | (a >> 63); | ||
126 | |||
127 | if (b >> 63) | ||
128 | key->b ^= 0xc200000000000000UL; | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static struct shash_alg ghash_alg = { | ||
134 | .digestsize = GHASH_DIGEST_SIZE, | ||
135 | .init = ghash_init, | ||
136 | .update = ghash_update, | ||
137 | .final = ghash_final, | ||
138 | .setkey = ghash_setkey, | ||
139 | .descsize = sizeof(struct ghash_desc_ctx), | ||
140 | .base = { | ||
141 | .cra_name = "ghash", | ||
142 | .cra_driver_name = "__driver-ghash-ce", | ||
143 | .cra_priority = 0, | ||
144 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL, | ||
145 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
146 | .cra_ctxsize = sizeof(struct ghash_key), | ||
147 | .cra_module = THIS_MODULE, | ||
148 | }, | ||
149 | }; | ||
150 | |||
151 | static int ghash_async_init(struct ahash_request *req) | ||
152 | { | ||
153 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
154 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
155 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
156 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
157 | |||
158 | if (!may_use_simd()) { | ||
159 | memcpy(cryptd_req, req, sizeof(*req)); | ||
160 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
161 | return crypto_ahash_init(cryptd_req); | ||
162 | } else { | ||
163 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
164 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
165 | |||
166 | desc->tfm = child; | ||
167 | desc->flags = req->base.flags; | ||
168 | return crypto_shash_init(desc); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static int ghash_async_update(struct ahash_request *req) | ||
173 | { | ||
174 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
175 | |||
176 | if (!may_use_simd()) { | ||
177 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
178 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
179 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
180 | |||
181 | memcpy(cryptd_req, req, sizeof(*req)); | ||
182 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
183 | return crypto_ahash_update(cryptd_req); | ||
184 | } else { | ||
185 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
186 | return shash_ahash_update(req, desc); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static int ghash_async_final(struct ahash_request *req) | ||
191 | { | ||
192 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
193 | |||
194 | if (!may_use_simd()) { | ||
195 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
196 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
197 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
198 | |||
199 | memcpy(cryptd_req, req, sizeof(*req)); | ||
200 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
201 | return crypto_ahash_final(cryptd_req); | ||
202 | } else { | ||
203 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
204 | return crypto_shash_final(desc, req->result); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | static int ghash_async_digest(struct ahash_request *req) | ||
209 | { | ||
210 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
211 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
212 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
213 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
214 | |||
215 | if (!may_use_simd()) { | ||
216 | memcpy(cryptd_req, req, sizeof(*req)); | ||
217 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
218 | return crypto_ahash_digest(cryptd_req); | ||
219 | } else { | ||
220 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
221 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
222 | |||
223 | desc->tfm = child; | ||
224 | desc->flags = req->base.flags; | ||
225 | return shash_ahash_digest(req, desc); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
230 | unsigned int keylen) | ||
231 | { | ||
232 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
233 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; | ||
234 | int err; | ||
235 | |||
236 | crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
237 | crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) | ||
238 | & CRYPTO_TFM_REQ_MASK); | ||
239 | err = crypto_ahash_setkey(child, key, keylen); | ||
240 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) | ||
241 | & CRYPTO_TFM_RES_MASK); | ||
242 | |||
243 | return err; | ||
244 | } | ||
245 | |||
246 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) | ||
247 | { | ||
248 | struct cryptd_ahash *cryptd_tfm; | ||
249 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
250 | |||
251 | cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce", | ||
252 | CRYPTO_ALG_INTERNAL, | ||
253 | CRYPTO_ALG_INTERNAL); | ||
254 | if (IS_ERR(cryptd_tfm)) | ||
255 | return PTR_ERR(cryptd_tfm); | ||
256 | ctx->cryptd_tfm = cryptd_tfm; | ||
257 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
258 | sizeof(struct ahash_request) + | ||
259 | crypto_ahash_reqsize(&cryptd_tfm->base)); | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) | ||
265 | { | ||
266 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
267 | |||
268 | cryptd_free_ahash(ctx->cryptd_tfm); | ||
269 | } | ||
270 | |||
271 | static struct ahash_alg ghash_async_alg = { | ||
272 | .init = ghash_async_init, | ||
273 | .update = ghash_async_update, | ||
274 | .final = ghash_async_final, | ||
275 | .setkey = ghash_async_setkey, | ||
276 | .digest = ghash_async_digest, | ||
277 | .halg.digestsize = GHASH_DIGEST_SIZE, | ||
278 | .halg.base = { | ||
279 | .cra_name = "ghash", | ||
280 | .cra_driver_name = "ghash-ce", | ||
281 | .cra_priority = 300, | ||
282 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, | ||
283 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
284 | .cra_type = &crypto_ahash_type, | ||
285 | .cra_ctxsize = sizeof(struct ghash_async_ctx), | ||
286 | .cra_module = THIS_MODULE, | ||
287 | .cra_init = ghash_async_init_tfm, | ||
288 | .cra_exit = ghash_async_exit_tfm, | ||
289 | }, | ||
290 | }; | ||
291 | |||
292 | static int __init ghash_ce_mod_init(void) | ||
293 | { | ||
294 | int err; | ||
295 | |||
296 | if (!(elf_hwcap2 & HWCAP2_PMULL)) | ||
297 | return -ENODEV; | ||
298 | |||
299 | err = crypto_register_shash(&ghash_alg); | ||
300 | if (err) | ||
301 | return err; | ||
302 | err = crypto_register_ahash(&ghash_async_alg); | ||
303 | if (err) | ||
304 | goto err_shash; | ||
305 | |||
306 | return 0; | ||
307 | |||
308 | err_shash: | ||
309 | crypto_unregister_shash(&ghash_alg); | ||
310 | return err; | ||
311 | } | ||
312 | |||
313 | static void __exit ghash_ce_mod_exit(void) | ||
314 | { | ||
315 | crypto_unregister_ahash(&ghash_async_alg); | ||
316 | crypto_unregister_shash(&ghash_alg); | ||
317 | } | ||
318 | |||
319 | module_init(ghash_ce_mod_init); | ||
320 | module_exit(ghash_ce_mod_exit); | ||
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..b623f51ccbcf --- /dev/null +++ b/arch/arm/crypto/sha1-ce-core.S | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. | ||
5 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/assembler.h> | ||
14 | |||
15 | .text | ||
16 | .fpu crypto-neon-fp-armv8 | ||
17 | |||
18 | k0 .req q0 | ||
19 | k1 .req q1 | ||
20 | k2 .req q2 | ||
21 | k3 .req q3 | ||
22 | |||
23 | ta0 .req q4 | ||
24 | ta1 .req q5 | ||
25 | tb0 .req q5 | ||
26 | tb1 .req q4 | ||
27 | |||
28 | dga .req q6 | ||
29 | dgb .req q7 | ||
30 | dgbs .req s28 | ||
31 | |||
32 | dg0 .req q12 | ||
33 | dg1a0 .req q13 | ||
34 | dg1a1 .req q14 | ||
35 | dg1b0 .req q14 | ||
36 | dg1b1 .req q13 | ||
37 | |||
38 | .macro add_only, op, ev, rc, s0, dg1 | ||
39 | .ifnb \s0 | ||
40 | vadd.u32 tb\ev, q\s0, \rc | ||
41 | .endif | ||
42 | sha1h.32 dg1b\ev, dg0 | ||
43 | .ifb \dg1 | ||
44 | sha1\op\().32 dg0, dg1a\ev, ta\ev | ||
45 | .else | ||
46 | sha1\op\().32 dg0, \dg1, ta\ev | ||
47 | .endif | ||
48 | .endm | ||
49 | |||
50 | .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 | ||
51 | sha1su0.32 q\s0, q\s1, q\s2 | ||
52 | add_only \op, \ev, \rc, \s1, \dg1 | ||
53 | sha1su1.32 q\s0, q\s3 | ||
54 | .endm | ||
55 | |||
56 | .align 6 | ||
57 | .Lsha1_rcon: | ||
58 | .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 | ||
59 | .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 | ||
60 | .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc | ||
61 | .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 | ||
62 | |||
63 | /* | ||
64 | * void sha1_ce_transform(struct sha1_state *sst, u8 const *src, | ||
65 | * int blocks); | ||
66 | */ | ||
67 | ENTRY(sha1_ce_transform) | ||
68 | /* load round constants */ | ||
69 | adr ip, .Lsha1_rcon | ||
70 | vld1.32 {k0-k1}, [ip, :128]! | ||
71 | vld1.32 {k2-k3}, [ip, :128] | ||
72 | |||
73 | /* load state */ | ||
74 | vld1.32 {dga}, [r0] | ||
75 | vldr dgbs, [r0, #16] | ||
76 | |||
77 | /* load input */ | ||
78 | 0: vld1.32 {q8-q9}, [r1]! | ||
79 | vld1.32 {q10-q11}, [r1]! | ||
80 | subs r2, r2, #1 | ||
81 | |||
82 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
83 | vrev32.8 q8, q8 | ||
84 | vrev32.8 q9, q9 | ||
85 | vrev32.8 q10, q10 | ||
86 | vrev32.8 q11, q11 | ||
87 | #endif | ||
88 | |||
89 | vadd.u32 ta0, q8, k0 | ||
90 | vmov dg0, dga | ||
91 | |||
92 | add_update c, 0, k0, 8, 9, 10, 11, dgb | ||
93 | add_update c, 1, k0, 9, 10, 11, 8 | ||
94 | add_update c, 0, k0, 10, 11, 8, 9 | ||
95 | add_update c, 1, k0, 11, 8, 9, 10 | ||
96 | add_update c, 0, k1, 8, 9, 10, 11 | ||
97 | |||
98 | add_update p, 1, k1, 9, 10, 11, 8 | ||
99 | add_update p, 0, k1, 10, 11, 8, 9 | ||
100 | add_update p, 1, k1, 11, 8, 9, 10 | ||
101 | add_update p, 0, k1, 8, 9, 10, 11 | ||
102 | add_update p, 1, k2, 9, 10, 11, 8 | ||
103 | |||
104 | add_update m, 0, k2, 10, 11, 8, 9 | ||
105 | add_update m, 1, k2, 11, 8, 9, 10 | ||
106 | add_update m, 0, k2, 8, 9, 10, 11 | ||
107 | add_update m, 1, k2, 9, 10, 11, 8 | ||
108 | add_update m, 0, k3, 10, 11, 8, 9 | ||
109 | |||
110 | add_update p, 1, k3, 11, 8, 9, 10 | ||
111 | add_only p, 0, k3, 9 | ||
112 | add_only p, 1, k3, 10 | ||
113 | add_only p, 0, k3, 11 | ||
114 | add_only p, 1 | ||
115 | |||
116 | /* update state */ | ||
117 | vadd.u32 dga, dga, dg0 | ||
118 | vadd.u32 dgb, dgb, dg1a0 | ||
119 | bne 0b | ||
120 | |||
121 | /* store new state */ | ||
122 | vst1.32 {dga}, [r0] | ||
123 | vstr dgbs, [r0, #16] | ||
124 | bx lr | ||
125 | ENDPROC(sha1_ce_transform) | ||
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..80bc2fcd241a --- /dev/null +++ b/arch/arm/crypto/sha1-ce-glue.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <crypto/sha1_base.h> | ||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <asm/hwcap.h> | ||
18 | #include <asm/neon.h> | ||
19 | #include <asm/simd.h> | ||
20 | |||
21 | #include "sha1.h" | ||
22 | |||
23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | ||
24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
25 | MODULE_LICENSE("GPL v2"); | ||
26 | |||
27 | asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src, | ||
28 | int blocks); | ||
29 | |||
30 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, | ||
31 | unsigned int len) | ||
32 | { | ||
33 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
34 | |||
35 | if (!may_use_simd() || | ||
36 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) | ||
37 | return sha1_update_arm(desc, data, len); | ||
38 | |||
39 | kernel_neon_begin(); | ||
40 | sha1_base_do_update(desc, data, len, sha1_ce_transform); | ||
41 | kernel_neon_end(); | ||
42 | |||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, | ||
47 | unsigned int len, u8 *out) | ||
48 | { | ||
49 | if (!may_use_simd()) | ||
50 | return sha1_finup_arm(desc, data, len, out); | ||
51 | |||
52 | kernel_neon_begin(); | ||
53 | if (len) | ||
54 | sha1_base_do_update(desc, data, len, sha1_ce_transform); | ||
55 | sha1_base_do_finalize(desc, sha1_ce_transform); | ||
56 | kernel_neon_end(); | ||
57 | |||
58 | return sha1_base_finish(desc, out); | ||
59 | } | ||
60 | |||
61 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) | ||
62 | { | ||
63 | return sha1_ce_finup(desc, NULL, 0, out); | ||
64 | } | ||
65 | |||
66 | static struct shash_alg alg = { | ||
67 | .init = sha1_base_init, | ||
68 | .update = sha1_ce_update, | ||
69 | .final = sha1_ce_final, | ||
70 | .finup = sha1_ce_finup, | ||
71 | .descsize = sizeof(struct sha1_state), | ||
72 | .digestsize = SHA1_DIGEST_SIZE, | ||
73 | .base = { | ||
74 | .cra_name = "sha1", | ||
75 | .cra_driver_name = "sha1-ce", | ||
76 | .cra_priority = 200, | ||
77 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
78 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
79 | .cra_module = THIS_MODULE, | ||
80 | } | ||
81 | }; | ||
82 | |||
83 | static int __init sha1_ce_mod_init(void) | ||
84 | { | ||
85 | if (!(elf_hwcap2 & HWCAP2_SHA1)) | ||
86 | return -ENODEV; | ||
87 | return crypto_register_shash(&alg); | ||
88 | } | ||
89 | |||
90 | static void __exit sha1_ce_mod_fini(void) | ||
91 | { | ||
92 | crypto_unregister_shash(&alg); | ||
93 | } | ||
94 | |||
95 | module_init(sha1_ce_mod_init); | ||
96 | module_exit(sha1_ce_mod_fini); | ||
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 75e6a417416b..ffd8bd08b1a7 100644 --- a/arch/arm/include/asm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h | |||
@@ -7,4 +7,7 @@ | |||
7 | extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, | 7 | extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, |
8 | unsigned int len); | 8 | unsigned int len); |
9 | 9 | ||
10 | extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data, | ||
11 | unsigned int len, u8 *out); | ||
12 | |||
10 | #endif | 13 | #endif |
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index e31b0440c613..6fc73bf8766d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c | |||
@@ -22,127 +22,47 @@ | |||
22 | #include <linux/cryptohash.h> | 22 | #include <linux/cryptohash.h> |
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <crypto/sha.h> | 24 | #include <crypto/sha.h> |
25 | #include <crypto/sha1_base.h> | ||
25 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
26 | #include <asm/crypto/sha1.h> | ||
27 | 27 | ||
28 | #include "sha1.h" | ||
28 | 29 | ||
29 | asmlinkage void sha1_block_data_order(u32 *digest, | 30 | asmlinkage void sha1_block_data_order(u32 *digest, |
30 | const unsigned char *data, unsigned int rounds); | 31 | const unsigned char *data, unsigned int rounds); |
31 | 32 | ||
32 | |||
33 | static int sha1_init(struct shash_desc *desc) | ||
34 | { | ||
35 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
36 | |||
37 | *sctx = (struct sha1_state){ | ||
38 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
39 | }; | ||
40 | |||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | |||
45 | static int __sha1_update(struct sha1_state *sctx, const u8 *data, | ||
46 | unsigned int len, unsigned int partial) | ||
47 | { | ||
48 | unsigned int done = 0; | ||
49 | |||
50 | sctx->count += len; | ||
51 | |||
52 | if (partial) { | ||
53 | done = SHA1_BLOCK_SIZE - partial; | ||
54 | memcpy(sctx->buffer + partial, data, done); | ||
55 | sha1_block_data_order(sctx->state, sctx->buffer, 1); | ||
56 | } | ||
57 | |||
58 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
59 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
60 | sha1_block_data_order(sctx->state, data + done, rounds); | ||
61 | done += rounds * SHA1_BLOCK_SIZE; | ||
62 | } | ||
63 | |||
64 | memcpy(sctx->buffer, data + done, len - done); | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | |||
69 | int sha1_update_arm(struct shash_desc *desc, const u8 *data, | 33 | int sha1_update_arm(struct shash_desc *desc, const u8 *data, |
70 | unsigned int len) | 34 | unsigned int len) |
71 | { | 35 | { |
72 | struct sha1_state *sctx = shash_desc_ctx(desc); | 36 | /* make sure casting to sha1_block_fn() is safe */ |
73 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | 37 | BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); |
74 | int res; | ||
75 | 38 | ||
76 | /* Handle the fast case right here */ | 39 | return sha1_base_do_update(desc, data, len, |
77 | if (partial + len < SHA1_BLOCK_SIZE) { | 40 | (sha1_block_fn *)sha1_block_data_order); |
78 | sctx->count += len; | ||
79 | memcpy(sctx->buffer + partial, data, len); | ||
80 | return 0; | ||
81 | } | ||
82 | res = __sha1_update(sctx, data, len, partial); | ||
83 | return res; | ||
84 | } | 41 | } |
85 | EXPORT_SYMBOL_GPL(sha1_update_arm); | 42 | EXPORT_SYMBOL_GPL(sha1_update_arm); |
86 | 43 | ||
87 | |||
88 | /* Add padding and return the message digest. */ | ||
89 | static int sha1_final(struct shash_desc *desc, u8 *out) | 44 | static int sha1_final(struct shash_desc *desc, u8 *out) |
90 | { | 45 | { |
91 | struct sha1_state *sctx = shash_desc_ctx(desc); | 46 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order); |
92 | unsigned int i, index, padlen; | 47 | return sha1_base_finish(desc, out); |
93 | __be32 *dst = (__be32 *)out; | ||
94 | __be64 bits; | ||
95 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
96 | |||
97 | bits = cpu_to_be64(sctx->count << 3); | ||
98 | |||
99 | /* Pad out to 56 mod 64 and append length */ | ||
100 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
101 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
102 | /* We need to fill a whole block for __sha1_update() */ | ||
103 | if (padlen <= 56) { | ||
104 | sctx->count += padlen; | ||
105 | memcpy(sctx->buffer + index, padding, padlen); | ||
106 | } else { | ||
107 | __sha1_update(sctx, padding, padlen, index); | ||
108 | } | ||
109 | __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
110 | |||
111 | /* Store state in digest */ | ||
112 | for (i = 0; i < 5; i++) | ||
113 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
114 | |||
115 | /* Wipe context */ | ||
116 | memset(sctx, 0, sizeof(*sctx)); | ||
117 | return 0; | ||
118 | } | 48 | } |
119 | 49 | ||
120 | 50 | int sha1_finup_arm(struct shash_desc *desc, const u8 *data, | |
121 | static int sha1_export(struct shash_desc *desc, void *out) | 51 | unsigned int len, u8 *out) |
122 | { | 52 | { |
123 | struct sha1_state *sctx = shash_desc_ctx(desc); | 53 | sha1_base_do_update(desc, data, len, |
124 | memcpy(out, sctx, sizeof(*sctx)); | 54 | (sha1_block_fn *)sha1_block_data_order); |
125 | return 0; | 55 | return sha1_final(desc, out); |
126 | } | 56 | } |
127 | 57 | EXPORT_SYMBOL_GPL(sha1_finup_arm); | |
128 | |||
129 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
130 | { | ||
131 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
132 | memcpy(sctx, in, sizeof(*sctx)); | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | 58 | ||
137 | static struct shash_alg alg = { | 59 | static struct shash_alg alg = { |
138 | .digestsize = SHA1_DIGEST_SIZE, | 60 | .digestsize = SHA1_DIGEST_SIZE, |
139 | .init = sha1_init, | 61 | .init = sha1_base_init, |
140 | .update = sha1_update_arm, | 62 | .update = sha1_update_arm, |
141 | .final = sha1_final, | 63 | .final = sha1_final, |
142 | .export = sha1_export, | 64 | .finup = sha1_finup_arm, |
143 | .import = sha1_import, | ||
144 | .descsize = sizeof(struct sha1_state), | 65 | .descsize = sizeof(struct sha1_state), |
145 | .statesize = sizeof(struct sha1_state), | ||
146 | .base = { | 66 | .base = { |
147 | .cra_name = "sha1", | 67 | .cra_name = "sha1", |
148 | .cra_driver_name= "sha1-asm", | 68 | .cra_driver_name= "sha1-asm", |
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0b0083757d47..4e22f122f966 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c | |||
@@ -25,147 +25,60 @@ | |||
25 | #include <linux/cryptohash.h> | 25 | #include <linux/cryptohash.h> |
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <crypto/sha.h> | 27 | #include <crypto/sha.h> |
28 | #include <asm/byteorder.h> | 28 | #include <crypto/sha1_base.h> |
29 | #include <asm/neon.h> | 29 | #include <asm/neon.h> |
30 | #include <asm/simd.h> | 30 | #include <asm/simd.h> |
31 | #include <asm/crypto/sha1.h> | ||
32 | 31 | ||
32 | #include "sha1.h" | ||
33 | 33 | ||
34 | asmlinkage void sha1_transform_neon(void *state_h, const char *data, | 34 | asmlinkage void sha1_transform_neon(void *state_h, const char *data, |
35 | unsigned int rounds); | 35 | unsigned int rounds); |
36 | 36 | ||
37 | |||
38 | static int sha1_neon_init(struct shash_desc *desc) | ||
39 | { | ||
40 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
41 | |||
42 | *sctx = (struct sha1_state){ | ||
43 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
44 | }; | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len, unsigned int partial) | ||
51 | { | ||
52 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
53 | unsigned int done = 0; | ||
54 | |||
55 | sctx->count += len; | ||
56 | |||
57 | if (partial) { | ||
58 | done = SHA1_BLOCK_SIZE - partial; | ||
59 | memcpy(sctx->buffer + partial, data, done); | ||
60 | sha1_transform_neon(sctx->state, sctx->buffer, 1); | ||
61 | } | ||
62 | |||
63 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
64 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
65 | |||
66 | sha1_transform_neon(sctx->state, data + done, rounds); | ||
67 | done += rounds * SHA1_BLOCK_SIZE; | ||
68 | } | ||
69 | |||
70 | memcpy(sctx->buffer, data + done, len - done); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int sha1_neon_update(struct shash_desc *desc, const u8 *data, | 37 | static int sha1_neon_update(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len) | 38 | unsigned int len) |
77 | { | 39 | { |
78 | struct sha1_state *sctx = shash_desc_ctx(desc); | 40 | struct sha1_state *sctx = shash_desc_ctx(desc); |
79 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
80 | int res; | ||
81 | 41 | ||
82 | /* Handle the fast case right here */ | 42 | if (!may_use_simd() || |
83 | if (partial + len < SHA1_BLOCK_SIZE) { | 43 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) |
84 | sctx->count += len; | 44 | return sha1_update_arm(desc, data, len); |
85 | memcpy(sctx->buffer + partial, data, len); | ||
86 | 45 | ||
87 | return 0; | 46 | kernel_neon_begin(); |
88 | } | 47 | sha1_base_do_update(desc, data, len, |
89 | 48 | (sha1_block_fn *)sha1_transform_neon); | |
90 | if (!may_use_simd()) { | 49 | kernel_neon_end(); |
91 | res = sha1_update_arm(desc, data, len); | ||
92 | } else { | ||
93 | kernel_neon_begin(); | ||
94 | res = __sha1_neon_update(desc, data, len, partial); | ||
95 | kernel_neon_end(); | ||
96 | } | ||
97 | |||
98 | return res; | ||
99 | } | ||
100 | |||
101 | |||
102 | /* Add padding and return the message digest. */ | ||
103 | static int sha1_neon_final(struct shash_desc *desc, u8 *out) | ||
104 | { | ||
105 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
106 | unsigned int i, index, padlen; | ||
107 | __be32 *dst = (__be32 *)out; | ||
108 | __be64 bits; | ||
109 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
110 | |||
111 | bits = cpu_to_be64(sctx->count << 3); | ||
112 | |||
113 | /* Pad out to 56 mod 64 and append length */ | ||
114 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
115 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
116 | if (!may_use_simd()) { | ||
117 | sha1_update_arm(desc, padding, padlen); | ||
118 | sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits)); | ||
119 | } else { | ||
120 | kernel_neon_begin(); | ||
121 | /* We need to fill a whole block for __sha1_neon_update() */ | ||
122 | if (padlen <= 56) { | ||
123 | sctx->count += padlen; | ||
124 | memcpy(sctx->buffer + index, padding, padlen); | ||
125 | } else { | ||
126 | __sha1_neon_update(desc, padding, padlen, index); | ||
127 | } | ||
128 | __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
129 | kernel_neon_end(); | ||
130 | } | ||
131 | |||
132 | /* Store state in digest */ | ||
133 | for (i = 0; i < 5; i++) | ||
134 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
135 | |||
136 | /* Wipe context */ | ||
137 | memset(sctx, 0, sizeof(*sctx)); | ||
138 | 50 | ||
139 | return 0; | 51 | return 0; |
140 | } | 52 | } |
141 | 53 | ||
142 | static int sha1_neon_export(struct shash_desc *desc, void *out) | 54 | static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, |
55 | unsigned int len, u8 *out) | ||
143 | { | 56 | { |
144 | struct sha1_state *sctx = shash_desc_ctx(desc); | 57 | if (!may_use_simd()) |
58 | return sha1_finup_arm(desc, data, len, out); | ||
145 | 59 | ||
146 | memcpy(out, sctx, sizeof(*sctx)); | 60 | kernel_neon_begin(); |
61 | if (len) | ||
62 | sha1_base_do_update(desc, data, len, | ||
63 | (sha1_block_fn *)sha1_transform_neon); | ||
64 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon); | ||
65 | kernel_neon_end(); | ||
147 | 66 | ||
148 | return 0; | 67 | return sha1_base_finish(desc, out); |
149 | } | 68 | } |
150 | 69 | ||
151 | static int sha1_neon_import(struct shash_desc *desc, const void *in) | 70 | static int sha1_neon_final(struct shash_desc *desc, u8 *out) |
152 | { | 71 | { |
153 | struct sha1_state *sctx = shash_desc_ctx(desc); | 72 | return sha1_neon_finup(desc, NULL, 0, out); |
154 | |||
155 | memcpy(sctx, in, sizeof(*sctx)); | ||
156 | |||
157 | return 0; | ||
158 | } | 73 | } |
159 | 74 | ||
160 | static struct shash_alg alg = { | 75 | static struct shash_alg alg = { |
161 | .digestsize = SHA1_DIGEST_SIZE, | 76 | .digestsize = SHA1_DIGEST_SIZE, |
162 | .init = sha1_neon_init, | 77 | .init = sha1_base_init, |
163 | .update = sha1_neon_update, | 78 | .update = sha1_neon_update, |
164 | .final = sha1_neon_final, | 79 | .final = sha1_neon_final, |
165 | .export = sha1_neon_export, | 80 | .finup = sha1_neon_finup, |
166 | .import = sha1_neon_import, | ||
167 | .descsize = sizeof(struct sha1_state), | 81 | .descsize = sizeof(struct sha1_state), |
168 | .statesize = sizeof(struct sha1_state), | ||
169 | .base = { | 82 | .base = { |
170 | .cra_name = "sha1", | 83 | .cra_name = "sha1", |
171 | .cra_driver_name = "sha1-neon", | 84 | .cra_driver_name = "sha1-neon", |
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..87ec11a5f405 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-core.S | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. | ||
5 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/assembler.h> | ||
14 | |||
15 | .text | ||
16 | .fpu crypto-neon-fp-armv8 | ||
17 | |||
18 | k0 .req q7 | ||
19 | k1 .req q8 | ||
20 | rk .req r3 | ||
21 | |||
22 | ta0 .req q9 | ||
23 | ta1 .req q10 | ||
24 | tb0 .req q10 | ||
25 | tb1 .req q9 | ||
26 | |||
27 | dga .req q11 | ||
28 | dgb .req q12 | ||
29 | |||
30 | dg0 .req q13 | ||
31 | dg1 .req q14 | ||
32 | dg2 .req q15 | ||
33 | |||
34 | .macro add_only, ev, s0 | ||
35 | vmov dg2, dg0 | ||
36 | .ifnb \s0 | ||
37 | vld1.32 {k\ev}, [rk, :128]! | ||
38 | .endif | ||
39 | sha256h.32 dg0, dg1, tb\ev | ||
40 | sha256h2.32 dg1, dg2, tb\ev | ||
41 | .ifnb \s0 | ||
42 | vadd.u32 ta\ev, q\s0, k\ev | ||
43 | .endif | ||
44 | .endm | ||
45 | |||
46 | .macro add_update, ev, s0, s1, s2, s3 | ||
47 | sha256su0.32 q\s0, q\s1 | ||
48 | add_only \ev, \s1 | ||
49 | sha256su1.32 q\s0, q\s2, q\s3 | ||
50 | .endm | ||
51 | |||
52 | .align 6 | ||
53 | .Lsha256_rcon: | ||
54 | .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 | ||
55 | .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 | ||
56 | .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 | ||
57 | .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 | ||
58 | .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc | ||
59 | .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da | ||
60 | .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 | ||
61 | .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 | ||
62 | .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 | ||
63 | .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 | ||
64 | .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 | ||
65 | .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 | ||
66 | .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 | ||
67 | .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 | ||
68 | .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 | ||
69 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | ||
70 | |||
71 | /* | ||
72 | * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, | ||
73 | int blocks); | ||
74 | */ | ||
75 | ENTRY(sha2_ce_transform) | ||
76 | /* load state */ | ||
77 | vld1.32 {dga-dgb}, [r0] | ||
78 | |||
79 | /* load input */ | ||
80 | 0: vld1.32 {q0-q1}, [r1]! | ||
81 | vld1.32 {q2-q3}, [r1]! | ||
82 | subs r2, r2, #1 | ||
83 | |||
84 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
85 | vrev32.8 q0, q0 | ||
86 | vrev32.8 q1, q1 | ||
87 | vrev32.8 q2, q2 | ||
88 | vrev32.8 q3, q3 | ||
89 | #endif | ||
90 | |||
91 | /* load first round constant */ | ||
92 | adr rk, .Lsha256_rcon | ||
93 | vld1.32 {k0}, [rk, :128]! | ||
94 | |||
95 | vadd.u32 ta0, q0, k0 | ||
96 | vmov dg0, dga | ||
97 | vmov dg1, dgb | ||
98 | |||
99 | add_update 1, 0, 1, 2, 3 | ||
100 | add_update 0, 1, 2, 3, 0 | ||
101 | add_update 1, 2, 3, 0, 1 | ||
102 | add_update 0, 3, 0, 1, 2 | ||
103 | add_update 1, 0, 1, 2, 3 | ||
104 | add_update 0, 1, 2, 3, 0 | ||
105 | add_update 1, 2, 3, 0, 1 | ||
106 | add_update 0, 3, 0, 1, 2 | ||
107 | add_update 1, 0, 1, 2, 3 | ||
108 | add_update 0, 1, 2, 3, 0 | ||
109 | add_update 1, 2, 3, 0, 1 | ||
110 | add_update 0, 3, 0, 1, 2 | ||
111 | |||
112 | add_only 1, 1 | ||
113 | add_only 0, 2 | ||
114 | add_only 1, 3 | ||
115 | add_only 0 | ||
116 | |||
117 | /* update state */ | ||
118 | vadd.u32 dga, dga, dg0 | ||
119 | vadd.u32 dgb, dgb, dg1 | ||
120 | bne 0b | ||
121 | |||
122 | /* store new state */ | ||
123 | vst1.32 {dga-dgb}, [r0] | ||
124 | bx lr | ||
125 | ENDPROC(sha2_ce_transform) | ||
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..0755b2d657f3 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-glue.c | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <crypto/sha256_base.h> | ||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <asm/hwcap.h> | ||
18 | #include <asm/simd.h> | ||
19 | #include <asm/neon.h> | ||
20 | #include <asm/unaligned.h> | ||
21 | |||
22 | #include "sha256_glue.h" | ||
23 | |||
24 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | ||
25 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
26 | MODULE_LICENSE("GPL v2"); | ||
27 | |||
28 | asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src, | ||
29 | int blocks); | ||
30 | |||
31 | static int sha2_ce_update(struct shash_desc *desc, const u8 *data, | ||
32 | unsigned int len) | ||
33 | { | ||
34 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
35 | |||
36 | if (!may_use_simd() || | ||
37 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
38 | return crypto_sha256_arm_update(desc, data, len); | ||
39 | |||
40 | kernel_neon_begin(); | ||
41 | sha256_base_do_update(desc, data, len, | ||
42 | (sha256_block_fn *)sha2_ce_transform); | ||
43 | kernel_neon_end(); | ||
44 | |||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, | ||
49 | unsigned int len, u8 *out) | ||
50 | { | ||
51 | if (!may_use_simd()) | ||
52 | return crypto_sha256_arm_finup(desc, data, len, out); | ||
53 | |||
54 | kernel_neon_begin(); | ||
55 | if (len) | ||
56 | sha256_base_do_update(desc, data, len, | ||
57 | (sha256_block_fn *)sha2_ce_transform); | ||
58 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); | ||
59 | kernel_neon_end(); | ||
60 | |||
61 | return sha256_base_finish(desc, out); | ||
62 | } | ||
63 | |||
64 | static int sha2_ce_final(struct shash_desc *desc, u8 *out) | ||
65 | { | ||
66 | return sha2_ce_finup(desc, NULL, 0, out); | ||
67 | } | ||
68 | |||
69 | static struct shash_alg algs[] = { { | ||
70 | .init = sha224_base_init, | ||
71 | .update = sha2_ce_update, | ||
72 | .final = sha2_ce_final, | ||
73 | .finup = sha2_ce_finup, | ||
74 | .descsize = sizeof(struct sha256_state), | ||
75 | .digestsize = SHA224_DIGEST_SIZE, | ||
76 | .base = { | ||
77 | .cra_name = "sha224", | ||
78 | .cra_driver_name = "sha224-ce", | ||
79 | .cra_priority = 300, | ||
80 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
81 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
82 | .cra_module = THIS_MODULE, | ||
83 | } | ||
84 | }, { | ||
85 | .init = sha256_base_init, | ||
86 | .update = sha2_ce_update, | ||
87 | .final = sha2_ce_final, | ||
88 | .finup = sha2_ce_finup, | ||
89 | .descsize = sizeof(struct sha256_state), | ||
90 | .digestsize = SHA256_DIGEST_SIZE, | ||
91 | .base = { | ||
92 | .cra_name = "sha256", | ||
93 | .cra_driver_name = "sha256-ce", | ||
94 | .cra_priority = 300, | ||
95 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
96 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
97 | .cra_module = THIS_MODULE, | ||
98 | } | ||
99 | } }; | ||
100 | |||
101 | static int __init sha2_ce_mod_init(void) | ||
102 | { | ||
103 | if (!(elf_hwcap2 & HWCAP2_SHA2)) | ||
104 | return -ENODEV; | ||
105 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
106 | } | ||
107 | |||
108 | static void __exit sha2_ce_mod_fini(void) | ||
109 | { | ||
110 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
111 | } | ||
112 | |||
113 | module_init(sha2_ce_mod_init); | ||
114 | module_exit(sha2_ce_mod_fini); | ||
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 000000000000..fac0533ea633 --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl | |||
@@ -0,0 +1,716 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # | ||
9 | # Permission to use under GPL terms is granted. | ||
10 | # ==================================================================== | ||
11 | |||
12 | # SHA256 block procedure for ARMv4. May 2007. | ||
13 | |||
14 | # Performance is ~2x better than gcc 3.4 generated code and in "abso- | ||
15 | # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | ||
16 | # byte [on single-issue Xscale PXA250 core]. | ||
17 | |||
18 | # July 2010. | ||
19 | # | ||
20 | # Rescheduling for dual-issue pipeline resulted in 22% improvement on | ||
21 | # Cortex A8 core and ~20 cycles per processed byte. | ||
22 | |||
23 | # February 2011. | ||
24 | # | ||
25 | # Profiler-assisted and platform-specific optimization resulted in 16% | ||
26 | # improvement on Cortex A8 core and ~15.4 cycles per processed byte. | ||
27 | |||
28 | # September 2013. | ||
29 | # | ||
30 | # Add NEON implementation. On Cortex A8 it was measured to process one | ||
31 | # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon | ||
32 | # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only | ||
33 | # code (meaning that latter performs sub-optimally, nothing was done | ||
34 | # about it). | ||
35 | |||
36 | # May 2014. | ||
37 | # | ||
38 | # Add ARMv8 code path performing at 2.0 cpb on Apple A7. | ||
39 | |||
40 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
41 | open STDOUT,">$output"; | ||
42 | |||
43 | $ctx="r0"; $t0="r0"; | ||
44 | $inp="r1"; $t4="r1"; | ||
45 | $len="r2"; $t1="r2"; | ||
46 | $T1="r3"; $t3="r3"; | ||
47 | $A="r4"; | ||
48 | $B="r5"; | ||
49 | $C="r6"; | ||
50 | $D="r7"; | ||
51 | $E="r8"; | ||
52 | $F="r9"; | ||
53 | $G="r10"; | ||
54 | $H="r11"; | ||
55 | @V=($A,$B,$C,$D,$E,$F,$G,$H); | ||
56 | $t2="r12"; | ||
57 | $Ktbl="r14"; | ||
58 | |||
59 | @Sigma0=( 2,13,22); | ||
60 | @Sigma1=( 6,11,25); | ||
61 | @sigma0=( 7,18, 3); | ||
62 | @sigma1=(17,19,10); | ||
63 | |||
64 | sub BODY_00_15 { | ||
65 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | ||
66 | |||
67 | $code.=<<___ if ($i<16); | ||
68 | #if __ARM_ARCH__>=7 | ||
69 | @ ldr $t1,[$inp],#4 @ $i | ||
70 | # if $i==15 | ||
71 | str $inp,[sp,#17*4] @ make room for $t4 | ||
72 | # endif | ||
73 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` | ||
74 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
75 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
76 | # ifndef __ARMEB__ | ||
77 | rev $t1,$t1 | ||
78 | # endif | ||
79 | #else | ||
80 | @ ldrb $t1,[$inp,#3] @ $i | ||
81 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
82 | ldrb $t2,[$inp,#2] | ||
83 | ldrb $t0,[$inp,#1] | ||
84 | orr $t1,$t1,$t2,lsl#8 | ||
85 | ldrb $t2,[$inp],#4 | ||
86 | orr $t1,$t1,$t0,lsl#16 | ||
87 | # if $i==15 | ||
88 | str $inp,[sp,#17*4] @ make room for $t4 | ||
89 | # endif | ||
90 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` | ||
91 | orr $t1,$t1,$t2,lsl#24 | ||
92 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
93 | #endif | ||
94 | ___ | ||
95 | $code.=<<___; | ||
96 | ldr $t2,[$Ktbl],#4 @ *K256++ | ||
97 | add $h,$h,$t1 @ h+=X[i] | ||
98 | str $t1,[sp,#`$i%16`*4] | ||
99 | eor $t1,$f,$g | ||
100 | add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) | ||
101 | and $t1,$t1,$e | ||
102 | add $h,$h,$t2 @ h+=K256[i] | ||
103 | eor $t1,$t1,$g @ Ch(e,f,g) | ||
104 | eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` | ||
105 | add $h,$h,$t1 @ h+=Ch(e,f,g) | ||
106 | #if $i==31 | ||
107 | and $t2,$t2,#0xff | ||
108 | cmp $t2,#0xf2 @ done? | ||
109 | #endif | ||
110 | #if $i<15 | ||
111 | # if __ARM_ARCH__>=7 | ||
112 | ldr $t1,[$inp],#4 @ prefetch | ||
113 | # else | ||
114 | ldrb $t1,[$inp,#3] | ||
115 | # endif | ||
116 | eor $t2,$a,$b @ a^b, b^c in next round | ||
117 | #else | ||
118 | ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx | ||
119 | eor $t2,$a,$b @ a^b, b^c in next round | ||
120 | ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx | ||
121 | #endif | ||
122 | eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) | ||
123 | and $t3,$t3,$t2 @ (b^c)&=(a^b) | ||
124 | add $d,$d,$h @ d+=h | ||
125 | eor $t3,$t3,$b @ Maj(a,b,c) | ||
126 | add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) | ||
127 | @ add $h,$h,$t3 @ h+=Maj(a,b,c) | ||
128 | ___ | ||
129 | ($t2,$t3)=($t3,$t2); | ||
130 | } | ||
131 | |||
132 | sub BODY_16_XX { | ||
133 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | ||
134 | |||
135 | $code.=<<___; | ||
136 | @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i | ||
137 | @ ldr $t4,[sp,#`($i+14)%16`*4] | ||
138 | mov $t0,$t1,ror#$sigma0[0] | ||
139 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
140 | mov $t2,$t4,ror#$sigma1[0] | ||
141 | eor $t0,$t0,$t1,ror#$sigma0[1] | ||
142 | eor $t2,$t2,$t4,ror#$sigma1[1] | ||
143 | eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) | ||
144 | ldr $t1,[sp,#`($i+0)%16`*4] | ||
145 | eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) | ||
146 | ldr $t4,[sp,#`($i+9)%16`*4] | ||
147 | |||
148 | add $t2,$t2,$t0 | ||
149 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 | ||
150 | add $t1,$t1,$t2 | ||
151 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
152 | add $t1,$t1,$t4 @ X[i] | ||
153 | ___ | ||
154 | &BODY_00_15(@_); | ||
155 | } | ||
156 | |||
157 | $code=<<___; | ||
158 | #ifndef __KERNEL__ | ||
159 | # include "arm_arch.h" | ||
160 | #else | ||
161 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
162 | # define __ARM_MAX_ARCH__ 7 | ||
163 | #endif | ||
164 | |||
165 | .text | ||
166 | #if __ARM_ARCH__<7 | ||
167 | .code 32 | ||
168 | #else | ||
169 | .syntax unified | ||
170 | # ifdef __thumb2__ | ||
171 | # define adrl adr | ||
172 | .thumb | ||
173 | # else | ||
174 | .code 32 | ||
175 | # endif | ||
176 | #endif | ||
177 | |||
178 | .type K256,%object | ||
179 | .align 5 | ||
180 | K256: | ||
181 | .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
182 | .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
183 | .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
184 | .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
185 | .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
186 | .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
187 | .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
188 | .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
189 | .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
190 | .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
191 | .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
192 | .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
193 | .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
194 | .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
195 | .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
196 | .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
197 | .size K256,.-K256 | ||
198 | .word 0 @ terminator | ||
199 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
200 | .LOPENSSL_armcap: | ||
201 | .word OPENSSL_armcap_P-sha256_block_data_order | ||
202 | #endif | ||
203 | .align 5 | ||
204 | |||
205 | .global sha256_block_data_order | ||
206 | .type sha256_block_data_order,%function | ||
207 | sha256_block_data_order: | ||
208 | #if __ARM_ARCH__<7 | ||
209 | sub r3,pc,#8 @ sha256_block_data_order | ||
210 | #else | ||
211 | adr r3,sha256_block_data_order | ||
212 | #endif | ||
213 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
214 | ldr r12,.LOPENSSL_armcap | ||
215 | ldr r12,[r3,r12] @ OPENSSL_armcap_P | ||
216 | tst r12,#ARMV8_SHA256 | ||
217 | bne .LARMv8 | ||
218 | tst r12,#ARMV7_NEON | ||
219 | bne .LNEON | ||
220 | #endif | ||
221 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
222 | stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} | ||
223 | ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} | ||
224 | sub $Ktbl,r3,#256+32 @ K256 | ||
225 | sub sp,sp,#16*4 @ alloca(X[16]) | ||
226 | .Loop: | ||
227 | # if __ARM_ARCH__>=7 | ||
228 | ldr $t1,[$inp],#4 | ||
229 | # else | ||
230 | ldrb $t1,[$inp,#3] | ||
231 | # endif | ||
232 | eor $t3,$B,$C @ magic | ||
233 | eor $t2,$t2,$t2 | ||
234 | ___ | ||
235 | for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } | ||
236 | $code.=".Lrounds_16_xx:\n"; | ||
237 | for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } | ||
238 | $code.=<<___; | ||
239 | #if __ARM_ARCH__>=7 | ||
240 | ite eq @ Thumb2 thing, sanity check in ARM | ||
241 | #endif | ||
242 | ldreq $t3,[sp,#16*4] @ pull ctx | ||
243 | bne .Lrounds_16_xx | ||
244 | |||
245 | add $A,$A,$t2 @ h+=Maj(a,b,c) from the past | ||
246 | ldr $t0,[$t3,#0] | ||
247 | ldr $t1,[$t3,#4] | ||
248 | ldr $t2,[$t3,#8] | ||
249 | add $A,$A,$t0 | ||
250 | ldr $t0,[$t3,#12] | ||
251 | add $B,$B,$t1 | ||
252 | ldr $t1,[$t3,#16] | ||
253 | add $C,$C,$t2 | ||
254 | ldr $t2,[$t3,#20] | ||
255 | add $D,$D,$t0 | ||
256 | ldr $t0,[$t3,#24] | ||
257 | add $E,$E,$t1 | ||
258 | ldr $t1,[$t3,#28] | ||
259 | add $F,$F,$t2 | ||
260 | ldr $inp,[sp,#17*4] @ pull inp | ||
261 | ldr $t2,[sp,#18*4] @ pull inp+len | ||
262 | add $G,$G,$t0 | ||
263 | add $H,$H,$t1 | ||
264 | stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} | ||
265 | cmp $inp,$t2 | ||
266 | sub $Ktbl,$Ktbl,#256 @ rewind Ktbl | ||
267 | bne .Loop | ||
268 | |||
269 | add sp,sp,#`16+3`*4 @ destroy frame | ||
270 | #if __ARM_ARCH__>=5 | ||
271 | ldmia sp!,{r4-r11,pc} | ||
272 | #else | ||
273 | ldmia sp!,{r4-r11,lr} | ||
274 | tst lr,#1 | ||
275 | moveq pc,lr @ be binary compatible with V4, yet | ||
276 | bx lr @ interoperable with Thumb ISA:-) | ||
277 | #endif | ||
278 | .size sha256_block_data_order,.-sha256_block_data_order | ||
279 | ___ | ||
280 | ###################################################################### | ||
281 | # NEON stuff | ||
282 | # | ||
283 | {{{ | ||
284 | my @X=map("q$_",(0..3)); | ||
285 | my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); | ||
286 | my $Xfer=$t4; | ||
287 | my $j=0; | ||
288 | |||
289 | sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } | ||
290 | sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } | ||
291 | |||
292 | sub AUTOLOAD() # thunk [simplified] x86-style perlasm | ||
293 | { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; | ||
294 | my $arg = pop; | ||
295 | $arg = "#$arg" if ($arg*1 eq $arg); | ||
296 | $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; | ||
297 | } | ||
298 | |||
299 | sub Xupdate() | ||
300 | { use integer; | ||
301 | my $body = shift; | ||
302 | my @insns = (&$body,&$body,&$body,&$body); | ||
303 | my ($a,$b,$c,$d,$e,$f,$g,$h); | ||
304 | |||
305 | &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] | ||
306 | eval(shift(@insns)); | ||
307 | eval(shift(@insns)); | ||
308 | eval(shift(@insns)); | ||
309 | &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] | ||
310 | eval(shift(@insns)); | ||
311 | eval(shift(@insns)); | ||
312 | eval(shift(@insns)); | ||
313 | &vshr_u32 ($T2,$T0,$sigma0[0]); | ||
314 | eval(shift(@insns)); | ||
315 | eval(shift(@insns)); | ||
316 | &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] | ||
317 | eval(shift(@insns)); | ||
318 | eval(shift(@insns)); | ||
319 | &vshr_u32 ($T1,$T0,$sigma0[2]); | ||
320 | eval(shift(@insns)); | ||
321 | eval(shift(@insns)); | ||
322 | &vsli_32 ($T2,$T0,32-$sigma0[0]); | ||
323 | eval(shift(@insns)); | ||
324 | eval(shift(@insns)); | ||
325 | &vshr_u32 ($T3,$T0,$sigma0[1]); | ||
326 | eval(shift(@insns)); | ||
327 | eval(shift(@insns)); | ||
328 | &veor ($T1,$T1,$T2); | ||
329 | eval(shift(@insns)); | ||
330 | eval(shift(@insns)); | ||
331 | &vsli_32 ($T3,$T0,32-$sigma0[1]); | ||
332 | eval(shift(@insns)); | ||
333 | eval(shift(@insns)); | ||
334 | &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); | ||
335 | eval(shift(@insns)); | ||
336 | eval(shift(@insns)); | ||
337 | &veor ($T1,$T1,$T3); # sigma0(X[1..4]) | ||
338 | eval(shift(@insns)); | ||
339 | eval(shift(@insns)); | ||
340 | &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); | ||
341 | eval(shift(@insns)); | ||
342 | eval(shift(@insns)); | ||
343 | &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); | ||
344 | eval(shift(@insns)); | ||
345 | eval(shift(@insns)); | ||
346 | &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) | ||
347 | eval(shift(@insns)); | ||
348 | eval(shift(@insns)); | ||
349 | &veor ($T5,$T5,$T4); | ||
350 | eval(shift(@insns)); | ||
351 | eval(shift(@insns)); | ||
352 | &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); | ||
353 | eval(shift(@insns)); | ||
354 | eval(shift(@insns)); | ||
355 | &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); | ||
356 | eval(shift(@insns)); | ||
357 | eval(shift(@insns)); | ||
358 | &veor ($T5,$T5,$T4); # sigma1(X[14..15]) | ||
359 | eval(shift(@insns)); | ||
360 | eval(shift(@insns)); | ||
361 | &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) | ||
362 | eval(shift(@insns)); | ||
363 | eval(shift(@insns)); | ||
364 | &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); | ||
365 | eval(shift(@insns)); | ||
366 | eval(shift(@insns)); | ||
367 | &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); | ||
368 | eval(shift(@insns)); | ||
369 | eval(shift(@insns)); | ||
370 | &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); | ||
371 | eval(shift(@insns)); | ||
372 | eval(shift(@insns)); | ||
373 | &veor ($T5,$T5,$T4); | ||
374 | eval(shift(@insns)); | ||
375 | eval(shift(@insns)); | ||
376 | &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); | ||
377 | eval(shift(@insns)); | ||
378 | eval(shift(@insns)); | ||
379 | &vld1_32 ("{$T0}","[$Ktbl,:128]!"); | ||
380 | eval(shift(@insns)); | ||
381 | eval(shift(@insns)); | ||
382 | &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); | ||
383 | eval(shift(@insns)); | ||
384 | eval(shift(@insns)); | ||
385 | &veor ($T5,$T5,$T4); # sigma1(X[16..17]) | ||
386 | eval(shift(@insns)); | ||
387 | eval(shift(@insns)); | ||
388 | &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) | ||
389 | eval(shift(@insns)); | ||
390 | eval(shift(@insns)); | ||
391 | &vadd_i32 ($T0,$T0,@X[0]); | ||
392 | while($#insns>=2) { eval(shift(@insns)); } | ||
393 | &vst1_32 ("{$T0}","[$Xfer,:128]!"); | ||
394 | eval(shift(@insns)); | ||
395 | eval(shift(@insns)); | ||
396 | |||
397 | push(@X,shift(@X)); # "rotate" X[] | ||
398 | } | ||
399 | |||
400 | sub Xpreload() | ||
401 | { use integer; | ||
402 | my $body = shift; | ||
403 | my @insns = (&$body,&$body,&$body,&$body); | ||
404 | my ($a,$b,$c,$d,$e,$f,$g,$h); | ||
405 | |||
406 | eval(shift(@insns)); | ||
407 | eval(shift(@insns)); | ||
408 | eval(shift(@insns)); | ||
409 | eval(shift(@insns)); | ||
410 | &vld1_32 ("{$T0}","[$Ktbl,:128]!"); | ||
411 | eval(shift(@insns)); | ||
412 | eval(shift(@insns)); | ||
413 | eval(shift(@insns)); | ||
414 | eval(shift(@insns)); | ||
415 | &vrev32_8 (@X[0],@X[0]); | ||
416 | eval(shift(@insns)); | ||
417 | eval(shift(@insns)); | ||
418 | eval(shift(@insns)); | ||
419 | eval(shift(@insns)); | ||
420 | &vadd_i32 ($T0,$T0,@X[0]); | ||
421 | foreach (@insns) { eval; } # remaining instructions | ||
422 | &vst1_32 ("{$T0}","[$Xfer,:128]!"); | ||
423 | |||
424 | push(@X,shift(@X)); # "rotate" X[] | ||
425 | } | ||
426 | |||
427 | sub body_00_15 () { | ||
428 | ( | ||
429 | '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. | ||
430 | '&add ($h,$h,$t1)', # h+=X[i]+K[i] | ||
431 | '&eor ($t1,$f,$g)', | ||
432 | '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', | ||
433 | '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past | ||
434 | '&and ($t1,$t1,$e)', | ||
435 | '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) | ||
436 | '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', | ||
437 | '&eor ($t1,$t1,$g)', # Ch(e,f,g) | ||
438 | '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) | ||
439 | '&eor ($t2,$a,$b)', # a^b, b^c in next round | ||
440 | '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) | ||
441 | '&add ($h,$h,$t1)', # h+=Ch(e,f,g) | ||
442 | '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. | ||
443 | '&ldr ($t1,"[$Ktbl]") if ($j==15);'. | ||
444 | '&ldr ($t1,"[sp,#64]") if ($j==31)', | ||
445 | '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) | ||
446 | '&add ($d,$d,$h)', # d+=h | ||
447 | '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) | ||
448 | '&eor ($t3,$t3,$b)', # Maj(a,b,c) | ||
449 | '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' | ||
450 | ) | ||
451 | } | ||
452 | |||
453 | $code.=<<___; | ||
454 | #if __ARM_MAX_ARCH__>=7 | ||
455 | .arch armv7-a | ||
456 | .fpu neon | ||
457 | |||
458 | .global sha256_block_data_order_neon | ||
459 | .type sha256_block_data_order_neon,%function | ||
460 | .align 4 | ||
461 | sha256_block_data_order_neon: | ||
462 | .LNEON: | ||
463 | stmdb sp!,{r4-r12,lr} | ||
464 | |||
465 | sub $H,sp,#16*4+16 | ||
466 | adrl $Ktbl,K256 | ||
467 | bic $H,$H,#15 @ align for 128-bit stores | ||
468 | mov $t2,sp | ||
469 | mov sp,$H @ alloca | ||
470 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
471 | |||
472 | vld1.8 {@X[0]},[$inp]! | ||
473 | vld1.8 {@X[1]},[$inp]! | ||
474 | vld1.8 {@X[2]},[$inp]! | ||
475 | vld1.8 {@X[3]},[$inp]! | ||
476 | vld1.32 {$T0},[$Ktbl,:128]! | ||
477 | vld1.32 {$T1},[$Ktbl,:128]! | ||
478 | vld1.32 {$T2},[$Ktbl,:128]! | ||
479 | vld1.32 {$T3},[$Ktbl,:128]! | ||
480 | vrev32.8 @X[0],@X[0] @ yes, even on | ||
481 | str $ctx,[sp,#64] | ||
482 | vrev32.8 @X[1],@X[1] @ big-endian | ||
483 | str $inp,[sp,#68] | ||
484 | mov $Xfer,sp | ||
485 | vrev32.8 @X[2],@X[2] | ||
486 | str $len,[sp,#72] | ||
487 | vrev32.8 @X[3],@X[3] | ||
488 | str $t2,[sp,#76] @ save original sp | ||
489 | vadd.i32 $T0,$T0,@X[0] | ||
490 | vadd.i32 $T1,$T1,@X[1] | ||
491 | vst1.32 {$T0},[$Xfer,:128]! | ||
492 | vadd.i32 $T2,$T2,@X[2] | ||
493 | vst1.32 {$T1},[$Xfer,:128]! | ||
494 | vadd.i32 $T3,$T3,@X[3] | ||
495 | vst1.32 {$T2},[$Xfer,:128]! | ||
496 | vst1.32 {$T3},[$Xfer,:128]! | ||
497 | |||
498 | ldmia $ctx,{$A-$H} | ||
499 | sub $Xfer,$Xfer,#64 | ||
500 | ldr $t1,[sp,#0] | ||
501 | eor $t2,$t2,$t2 | ||
502 | eor $t3,$B,$C | ||
503 | b .L_00_48 | ||
504 | |||
505 | .align 4 | ||
506 | .L_00_48: | ||
507 | ___ | ||
508 | &Xupdate(\&body_00_15); | ||
509 | &Xupdate(\&body_00_15); | ||
510 | &Xupdate(\&body_00_15); | ||
511 | &Xupdate(\&body_00_15); | ||
512 | $code.=<<___; | ||
513 | teq $t1,#0 @ check for K256 terminator | ||
514 | ldr $t1,[sp,#0] | ||
515 | sub $Xfer,$Xfer,#64 | ||
516 | bne .L_00_48 | ||
517 | |||
518 | ldr $inp,[sp,#68] | ||
519 | ldr $t0,[sp,#72] | ||
520 | sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl | ||
521 | teq $inp,$t0 | ||
522 | it eq | ||
523 | subeq $inp,$inp,#64 @ avoid SEGV | ||
524 | vld1.8 {@X[0]},[$inp]! @ load next input block | ||
525 | vld1.8 {@X[1]},[$inp]! | ||
526 | vld1.8 {@X[2]},[$inp]! | ||
527 | vld1.8 {@X[3]},[$inp]! | ||
528 | it ne | ||
529 | strne $inp,[sp,#68] | ||
530 | mov $Xfer,sp | ||
531 | ___ | ||
532 | &Xpreload(\&body_00_15); | ||
533 | &Xpreload(\&body_00_15); | ||
534 | &Xpreload(\&body_00_15); | ||
535 | &Xpreload(\&body_00_15); | ||
536 | $code.=<<___; | ||
537 | ldr $t0,[$t1,#0] | ||
538 | add $A,$A,$t2 @ h+=Maj(a,b,c) from the past | ||
539 | ldr $t2,[$t1,#4] | ||
540 | ldr $t3,[$t1,#8] | ||
541 | ldr $t4,[$t1,#12] | ||
542 | add $A,$A,$t0 @ accumulate | ||
543 | ldr $t0,[$t1,#16] | ||
544 | add $B,$B,$t2 | ||
545 | ldr $t2,[$t1,#20] | ||
546 | add $C,$C,$t3 | ||
547 | ldr $t3,[$t1,#24] | ||
548 | add $D,$D,$t4 | ||
549 | ldr $t4,[$t1,#28] | ||
550 | add $E,$E,$t0 | ||
551 | str $A,[$t1],#4 | ||
552 | add $F,$F,$t2 | ||
553 | str $B,[$t1],#4 | ||
554 | add $G,$G,$t3 | ||
555 | str $C,[$t1],#4 | ||
556 | add $H,$H,$t4 | ||
557 | str $D,[$t1],#4 | ||
558 | stmia $t1,{$E-$H} | ||
559 | |||
560 | ittte ne | ||
561 | movne $Xfer,sp | ||
562 | ldrne $t1,[sp,#0] | ||
563 | eorne $t2,$t2,$t2 | ||
564 | ldreq sp,[sp,#76] @ restore original sp | ||
565 | itt ne | ||
566 | eorne $t3,$B,$C | ||
567 | bne .L_00_48 | ||
568 | |||
569 | ldmia sp!,{r4-r12,pc} | ||
570 | .size sha256_block_data_order_neon,.-sha256_block_data_order_neon | ||
571 | #endif | ||
572 | ___ | ||
573 | }}} | ||
574 | ###################################################################### | ||
575 | # ARMv8 stuff | ||
576 | # | ||
577 | {{{ | ||
578 | my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); | ||
579 | my @MSG=map("q$_",(8..11)); | ||
580 | my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); | ||
581 | my $Ktbl="r3"; | ||
582 | |||
583 | $code.=<<___; | ||
584 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
585 | |||
586 | # ifdef __thumb2__ | ||
587 | # define INST(a,b,c,d) .byte c,d|0xc,a,b | ||
588 | # else | ||
589 | # define INST(a,b,c,d) .byte a,b,c,d | ||
590 | # endif | ||
591 | |||
592 | .type sha256_block_data_order_armv8,%function | ||
593 | .align 5 | ||
594 | sha256_block_data_order_armv8: | ||
595 | .LARMv8: | ||
596 | vld1.32 {$ABCD,$EFGH},[$ctx] | ||
597 | # ifdef __thumb2__ | ||
598 | adr $Ktbl,.LARMv8 | ||
599 | sub $Ktbl,$Ktbl,#.LARMv8-K256 | ||
600 | # else | ||
601 | adrl $Ktbl,K256 | ||
602 | # endif | ||
603 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
604 | |||
605 | .Loop_v8: | ||
606 | vld1.8 {@MSG[0]-@MSG[1]},[$inp]! | ||
607 | vld1.8 {@MSG[2]-@MSG[3]},[$inp]! | ||
608 | vld1.32 {$W0},[$Ktbl]! | ||
609 | vrev32.8 @MSG[0],@MSG[0] | ||
610 | vrev32.8 @MSG[1],@MSG[1] | ||
611 | vrev32.8 @MSG[2],@MSG[2] | ||
612 | vrev32.8 @MSG[3],@MSG[3] | ||
613 | vmov $ABCD_SAVE,$ABCD @ offload | ||
614 | vmov $EFGH_SAVE,$EFGH | ||
615 | teq $inp,$len | ||
616 | ___ | ||
617 | for($i=0;$i<12;$i++) { | ||
618 | $code.=<<___; | ||
619 | vld1.32 {$W1},[$Ktbl]! | ||
620 | vadd.i32 $W0,$W0,@MSG[0] | ||
621 | sha256su0 @MSG[0],@MSG[1] | ||
622 | vmov $abcd,$ABCD | ||
623 | sha256h $ABCD,$EFGH,$W0 | ||
624 | sha256h2 $EFGH,$abcd,$W0 | ||
625 | sha256su1 @MSG[0],@MSG[2],@MSG[3] | ||
626 | ___ | ||
627 | ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); | ||
628 | } | ||
629 | $code.=<<___; | ||
630 | vld1.32 {$W1},[$Ktbl]! | ||
631 | vadd.i32 $W0,$W0,@MSG[0] | ||
632 | vmov $abcd,$ABCD | ||
633 | sha256h $ABCD,$EFGH,$W0 | ||
634 | sha256h2 $EFGH,$abcd,$W0 | ||
635 | |||
636 | vld1.32 {$W0},[$Ktbl]! | ||
637 | vadd.i32 $W1,$W1,@MSG[1] | ||
638 | vmov $abcd,$ABCD | ||
639 | sha256h $ABCD,$EFGH,$W1 | ||
640 | sha256h2 $EFGH,$abcd,$W1 | ||
641 | |||
642 | vld1.32 {$W1},[$Ktbl] | ||
643 | vadd.i32 $W0,$W0,@MSG[2] | ||
644 | sub $Ktbl,$Ktbl,#256-16 @ rewind | ||
645 | vmov $abcd,$ABCD | ||
646 | sha256h $ABCD,$EFGH,$W0 | ||
647 | sha256h2 $EFGH,$abcd,$W0 | ||
648 | |||
649 | vadd.i32 $W1,$W1,@MSG[3] | ||
650 | vmov $abcd,$ABCD | ||
651 | sha256h $ABCD,$EFGH,$W1 | ||
652 | sha256h2 $EFGH,$abcd,$W1 | ||
653 | |||
654 | vadd.i32 $ABCD,$ABCD,$ABCD_SAVE | ||
655 | vadd.i32 $EFGH,$EFGH,$EFGH_SAVE | ||
656 | it ne | ||
657 | bne .Loop_v8 | ||
658 | |||
659 | vst1.32 {$ABCD,$EFGH},[$ctx] | ||
660 | |||
661 | ret @ bx lr | ||
662 | .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 | ||
663 | #endif | ||
664 | ___ | ||
665 | }}} | ||
666 | $code.=<<___; | ||
667 | .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" | ||
668 | .align 2 | ||
669 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
670 | .comm OPENSSL_armcap_P,4,4 | ||
671 | #endif | ||
672 | ___ | ||
673 | |||
674 | open SELF,$0; | ||
675 | while(<SELF>) { | ||
676 | next if (/^#!/); | ||
677 | last if (!s/^#/@/ and !/^$/); | ||
678 | print; | ||
679 | } | ||
680 | close SELF; | ||
681 | |||
682 | { my %opcode = ( | ||
683 | "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, | ||
684 | "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); | ||
685 | |||
686 | sub unsha256 { | ||
687 | my ($mnemonic,$arg)=@_; | ||
688 | |||
689 | if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { | ||
690 | my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) | ||
691 | |(($2&7)<<17)|(($2&8)<<4) | ||
692 | |(($3&7)<<1) |(($3&8)<<2); | ||
693 | # since ARMv7 instructions are always encoded little-endian. | ||
694 | # correct solution is to use .inst directive, but older | ||
695 | # assemblers don't implement it:-( | ||
696 | sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", | ||
697 | $word&0xff,($word>>8)&0xff, | ||
698 | ($word>>16)&0xff,($word>>24)&0xff, | ||
699 | $mnemonic,$arg; | ||
700 | } | ||
701 | } | ||
702 | } | ||
703 | |||
704 | foreach (split($/,$code)) { | ||
705 | |||
706 | s/\`([^\`]*)\`/eval $1/geo; | ||
707 | |||
708 | s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; | ||
709 | |||
710 | s/\bret\b/bx lr/go or | ||
711 | s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 | ||
712 | |||
713 | print $_,"\n"; | ||
714 | } | ||
715 | |||
716 | close STDOUT; # enforce flush | ||
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 000000000000..555a1a8eec90 --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped | |||
@@ -0,0 +1,2808 @@ | |||
1 | |||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ | ||
8 | @ Permission to use under GPL terms is granted. | ||
9 | @ ==================================================================== | ||
10 | |||
11 | @ SHA256 block procedure for ARMv4. May 2007. | ||
12 | |||
13 | @ Performance is ~2x better than gcc 3.4 generated code and in "abso- | ||
14 | @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | ||
15 | @ byte [on single-issue Xscale PXA250 core]. | ||
16 | |||
17 | @ July 2010. | ||
18 | @ | ||
19 | @ Rescheduling for dual-issue pipeline resulted in 22% improvement on | ||
20 | @ Cortex A8 core and ~20 cycles per processed byte. | ||
21 | |||
22 | @ February 2011. | ||
23 | @ | ||
24 | @ Profiler-assisted and platform-specific optimization resulted in 16% | ||
25 | @ improvement on Cortex A8 core and ~15.4 cycles per processed byte. | ||
26 | |||
27 | @ September 2013. | ||
28 | @ | ||
29 | @ Add NEON implementation. On Cortex A8 it was measured to process one | ||
30 | @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon | ||
31 | @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only | ||
32 | @ code (meaning that latter performs sub-optimally, nothing was done | ||
33 | @ about it). | ||
34 | |||
35 | @ May 2014. | ||
36 | @ | ||
37 | @ Add ARMv8 code path performing at 2.0 cpb on Apple A7. | ||
38 | |||
39 | #ifndef __KERNEL__ | ||
40 | # include "arm_arch.h" | ||
41 | #else | ||
42 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
43 | # define __ARM_MAX_ARCH__ 7 | ||
44 | #endif | ||
45 | |||
46 | .text | ||
47 | #if __ARM_ARCH__<7 | ||
48 | .code 32 | ||
49 | #else | ||
50 | .syntax unified | ||
51 | # ifdef __thumb2__ | ||
52 | # define adrl adr | ||
53 | .thumb | ||
54 | # else | ||
55 | .code 32 | ||
56 | # endif | ||
57 | #endif | ||
58 | |||
59 | .type K256,%object | ||
60 | .align 5 | ||
61 | K256: | ||
62 | .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
63 | .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
64 | .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
65 | .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
66 | .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
67 | .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
68 | .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
69 | .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
70 | .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
71 | .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
72 | .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
73 | .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
74 | .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
75 | .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
76 | .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
77 | .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
78 | .size K256,.-K256 | ||
79 | .word 0 @ terminator | ||
80 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
81 | .LOPENSSL_armcap: | ||
82 | .word OPENSSL_armcap_P-sha256_block_data_order | ||
83 | #endif | ||
84 | .align 5 | ||
85 | |||
86 | .global sha256_block_data_order | ||
87 | .type sha256_block_data_order,%function | ||
88 | sha256_block_data_order: | ||
89 | #if __ARM_ARCH__<7 | ||
90 | sub r3,pc,#8 @ sha256_block_data_order | ||
91 | #else | ||
92 | adr r3,sha256_block_data_order | ||
93 | #endif | ||
94 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
95 | ldr r12,.LOPENSSL_armcap | ||
96 | ldr r12,[r3,r12] @ OPENSSL_armcap_P | ||
97 | tst r12,#ARMV8_SHA256 | ||
98 | bne .LARMv8 | ||
99 | tst r12,#ARMV7_NEON | ||
100 | bne .LNEON | ||
101 | #endif | ||
102 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
103 | stmdb sp!,{r0,r1,r2,r4-r11,lr} | ||
104 | ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} | ||
105 | sub r14,r3,#256+32 @ K256 | ||
106 | sub sp,sp,#16*4 @ alloca(X[16]) | ||
107 | .Loop: | ||
108 | # if __ARM_ARCH__>=7 | ||
109 | ldr r2,[r1],#4 | ||
110 | # else | ||
111 | ldrb r2,[r1,#3] | ||
112 | # endif | ||
113 | eor r3,r5,r6 @ magic | ||
114 | eor r12,r12,r12 | ||
115 | #if __ARM_ARCH__>=7 | ||
116 | @ ldr r2,[r1],#4 @ 0 | ||
117 | # if 0==15 | ||
118 | str r1,[sp,#17*4] @ make room for r1 | ||
119 | # endif | ||
120 | eor r0,r8,r8,ror#5 | ||
121 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
122 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
123 | # ifndef __ARMEB__ | ||
124 | rev r2,r2 | ||
125 | # endif | ||
126 | #else | ||
127 | @ ldrb r2,[r1,#3] @ 0 | ||
128 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
129 | ldrb r12,[r1,#2] | ||
130 | ldrb r0,[r1,#1] | ||
131 | orr r2,r2,r12,lsl#8 | ||
132 | ldrb r12,[r1],#4 | ||
133 | orr r2,r2,r0,lsl#16 | ||
134 | # if 0==15 | ||
135 | str r1,[sp,#17*4] @ make room for r1 | ||
136 | # endif | ||
137 | eor r0,r8,r8,ror#5 | ||
138 | orr r2,r2,r12,lsl#24 | ||
139 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
140 | #endif | ||
141 | ldr r12,[r14],#4 @ *K256++ | ||
142 | add r11,r11,r2 @ h+=X[i] | ||
143 | str r2,[sp,#0*4] | ||
144 | eor r2,r9,r10 | ||
145 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
146 | and r2,r2,r8 | ||
147 | add r11,r11,r12 @ h+=K256[i] | ||
148 | eor r2,r2,r10 @ Ch(e,f,g) | ||
149 | eor r0,r4,r4,ror#11 | ||
150 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
151 | #if 0==31 | ||
152 | and r12,r12,#0xff | ||
153 | cmp r12,#0xf2 @ done? | ||
154 | #endif | ||
155 | #if 0<15 | ||
156 | # if __ARM_ARCH__>=7 | ||
157 | ldr r2,[r1],#4 @ prefetch | ||
158 | # else | ||
159 | ldrb r2,[r1,#3] | ||
160 | # endif | ||
161 | eor r12,r4,r5 @ a^b, b^c in next round | ||
162 | #else | ||
163 | ldr r2,[sp,#2*4] @ from future BODY_16_xx | ||
164 | eor r12,r4,r5 @ a^b, b^c in next round | ||
165 | ldr r1,[sp,#15*4] @ from future BODY_16_xx | ||
166 | #endif | ||
167 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
168 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
169 | add r7,r7,r11 @ d+=h | ||
170 | eor r3,r3,r5 @ Maj(a,b,c) | ||
171 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
172 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
173 | #if __ARM_ARCH__>=7 | ||
174 | @ ldr r2,[r1],#4 @ 1 | ||
175 | # if 1==15 | ||
176 | str r1,[sp,#17*4] @ make room for r1 | ||
177 | # endif | ||
178 | eor r0,r7,r7,ror#5 | ||
179 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
180 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
181 | # ifndef __ARMEB__ | ||
182 | rev r2,r2 | ||
183 | # endif | ||
184 | #else | ||
185 | @ ldrb r2,[r1,#3] @ 1 | ||
186 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
187 | ldrb r3,[r1,#2] | ||
188 | ldrb r0,[r1,#1] | ||
189 | orr r2,r2,r3,lsl#8 | ||
190 | ldrb r3,[r1],#4 | ||
191 | orr r2,r2,r0,lsl#16 | ||
192 | # if 1==15 | ||
193 | str r1,[sp,#17*4] @ make room for r1 | ||
194 | # endif | ||
195 | eor r0,r7,r7,ror#5 | ||
196 | orr r2,r2,r3,lsl#24 | ||
197 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
198 | #endif | ||
199 | ldr r3,[r14],#4 @ *K256++ | ||
200 | add r10,r10,r2 @ h+=X[i] | ||
201 | str r2,[sp,#1*4] | ||
202 | eor r2,r8,r9 | ||
203 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
204 | and r2,r2,r7 | ||
205 | add r10,r10,r3 @ h+=K256[i] | ||
206 | eor r2,r2,r9 @ Ch(e,f,g) | ||
207 | eor r0,r11,r11,ror#11 | ||
208 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
209 | #if 1==31 | ||
210 | and r3,r3,#0xff | ||
211 | cmp r3,#0xf2 @ done? | ||
212 | #endif | ||
213 | #if 1<15 | ||
214 | # if __ARM_ARCH__>=7 | ||
215 | ldr r2,[r1],#4 @ prefetch | ||
216 | # else | ||
217 | ldrb r2,[r1,#3] | ||
218 | # endif | ||
219 | eor r3,r11,r4 @ a^b, b^c in next round | ||
220 | #else | ||
221 | ldr r2,[sp,#3*4] @ from future BODY_16_xx | ||
222 | eor r3,r11,r4 @ a^b, b^c in next round | ||
223 | ldr r1,[sp,#0*4] @ from future BODY_16_xx | ||
224 | #endif | ||
225 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
226 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
227 | add r6,r6,r10 @ d+=h | ||
228 | eor r12,r12,r4 @ Maj(a,b,c) | ||
229 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
230 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
231 | #if __ARM_ARCH__>=7 | ||
232 | @ ldr r2,[r1],#4 @ 2 | ||
233 | # if 2==15 | ||
234 | str r1,[sp,#17*4] @ make room for r1 | ||
235 | # endif | ||
236 | eor r0,r6,r6,ror#5 | ||
237 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
238 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
239 | # ifndef __ARMEB__ | ||
240 | rev r2,r2 | ||
241 | # endif | ||
242 | #else | ||
243 | @ ldrb r2,[r1,#3] @ 2 | ||
244 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
245 | ldrb r12,[r1,#2] | ||
246 | ldrb r0,[r1,#1] | ||
247 | orr r2,r2,r12,lsl#8 | ||
248 | ldrb r12,[r1],#4 | ||
249 | orr r2,r2,r0,lsl#16 | ||
250 | # if 2==15 | ||
251 | str r1,[sp,#17*4] @ make room for r1 | ||
252 | # endif | ||
253 | eor r0,r6,r6,ror#5 | ||
254 | orr r2,r2,r12,lsl#24 | ||
255 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
256 | #endif | ||
257 | ldr r12,[r14],#4 @ *K256++ | ||
258 | add r9,r9,r2 @ h+=X[i] | ||
259 | str r2,[sp,#2*4] | ||
260 | eor r2,r7,r8 | ||
261 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
262 | and r2,r2,r6 | ||
263 | add r9,r9,r12 @ h+=K256[i] | ||
264 | eor r2,r2,r8 @ Ch(e,f,g) | ||
265 | eor r0,r10,r10,ror#11 | ||
266 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
267 | #if 2==31 | ||
268 | and r12,r12,#0xff | ||
269 | cmp r12,#0xf2 @ done? | ||
270 | #endif | ||
271 | #if 2<15 | ||
272 | # if __ARM_ARCH__>=7 | ||
273 | ldr r2,[r1],#4 @ prefetch | ||
274 | # else | ||
275 | ldrb r2,[r1,#3] | ||
276 | # endif | ||
277 | eor r12,r10,r11 @ a^b, b^c in next round | ||
278 | #else | ||
279 | ldr r2,[sp,#4*4] @ from future BODY_16_xx | ||
280 | eor r12,r10,r11 @ a^b, b^c in next round | ||
281 | ldr r1,[sp,#1*4] @ from future BODY_16_xx | ||
282 | #endif | ||
283 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
284 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
285 | add r5,r5,r9 @ d+=h | ||
286 | eor r3,r3,r11 @ Maj(a,b,c) | ||
287 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
288 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
289 | #if __ARM_ARCH__>=7 | ||
290 | @ ldr r2,[r1],#4 @ 3 | ||
291 | # if 3==15 | ||
292 | str r1,[sp,#17*4] @ make room for r1 | ||
293 | # endif | ||
294 | eor r0,r5,r5,ror#5 | ||
295 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
296 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
297 | # ifndef __ARMEB__ | ||
298 | rev r2,r2 | ||
299 | # endif | ||
300 | #else | ||
301 | @ ldrb r2,[r1,#3] @ 3 | ||
302 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
303 | ldrb r3,[r1,#2] | ||
304 | ldrb r0,[r1,#1] | ||
305 | orr r2,r2,r3,lsl#8 | ||
306 | ldrb r3,[r1],#4 | ||
307 | orr r2,r2,r0,lsl#16 | ||
308 | # if 3==15 | ||
309 | str r1,[sp,#17*4] @ make room for r1 | ||
310 | # endif | ||
311 | eor r0,r5,r5,ror#5 | ||
312 | orr r2,r2,r3,lsl#24 | ||
313 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
314 | #endif | ||
315 | ldr r3,[r14],#4 @ *K256++ | ||
316 | add r8,r8,r2 @ h+=X[i] | ||
317 | str r2,[sp,#3*4] | ||
318 | eor r2,r6,r7 | ||
319 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
320 | and r2,r2,r5 | ||
321 | add r8,r8,r3 @ h+=K256[i] | ||
322 | eor r2,r2,r7 @ Ch(e,f,g) | ||
323 | eor r0,r9,r9,ror#11 | ||
324 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
325 | #if 3==31 | ||
326 | and r3,r3,#0xff | ||
327 | cmp r3,#0xf2 @ done? | ||
328 | #endif | ||
329 | #if 3<15 | ||
330 | # if __ARM_ARCH__>=7 | ||
331 | ldr r2,[r1],#4 @ prefetch | ||
332 | # else | ||
333 | ldrb r2,[r1,#3] | ||
334 | # endif | ||
335 | eor r3,r9,r10 @ a^b, b^c in next round | ||
336 | #else | ||
337 | ldr r2,[sp,#5*4] @ from future BODY_16_xx | ||
338 | eor r3,r9,r10 @ a^b, b^c in next round | ||
339 | ldr r1,[sp,#2*4] @ from future BODY_16_xx | ||
340 | #endif | ||
341 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
342 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
343 | add r4,r4,r8 @ d+=h | ||
344 | eor r12,r12,r10 @ Maj(a,b,c) | ||
345 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
346 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
347 | #if __ARM_ARCH__>=7 | ||
348 | @ ldr r2,[r1],#4 @ 4 | ||
349 | # if 4==15 | ||
350 | str r1,[sp,#17*4] @ make room for r1 | ||
351 | # endif | ||
352 | eor r0,r4,r4,ror#5 | ||
353 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
354 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
355 | # ifndef __ARMEB__ | ||
356 | rev r2,r2 | ||
357 | # endif | ||
358 | #else | ||
359 | @ ldrb r2,[r1,#3] @ 4 | ||
360 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
361 | ldrb r12,[r1,#2] | ||
362 | ldrb r0,[r1,#1] | ||
363 | orr r2,r2,r12,lsl#8 | ||
364 | ldrb r12,[r1],#4 | ||
365 | orr r2,r2,r0,lsl#16 | ||
366 | # if 4==15 | ||
367 | str r1,[sp,#17*4] @ make room for r1 | ||
368 | # endif | ||
369 | eor r0,r4,r4,ror#5 | ||
370 | orr r2,r2,r12,lsl#24 | ||
371 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
372 | #endif | ||
373 | ldr r12,[r14],#4 @ *K256++ | ||
374 | add r7,r7,r2 @ h+=X[i] | ||
375 | str r2,[sp,#4*4] | ||
376 | eor r2,r5,r6 | ||
377 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
378 | and r2,r2,r4 | ||
379 | add r7,r7,r12 @ h+=K256[i] | ||
380 | eor r2,r2,r6 @ Ch(e,f,g) | ||
381 | eor r0,r8,r8,ror#11 | ||
382 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
383 | #if 4==31 | ||
384 | and r12,r12,#0xff | ||
385 | cmp r12,#0xf2 @ done? | ||
386 | #endif | ||
387 | #if 4<15 | ||
388 | # if __ARM_ARCH__>=7 | ||
389 | ldr r2,[r1],#4 @ prefetch | ||
390 | # else | ||
391 | ldrb r2,[r1,#3] | ||
392 | # endif | ||
393 | eor r12,r8,r9 @ a^b, b^c in next round | ||
394 | #else | ||
395 | ldr r2,[sp,#6*4] @ from future BODY_16_xx | ||
396 | eor r12,r8,r9 @ a^b, b^c in next round | ||
397 | ldr r1,[sp,#3*4] @ from future BODY_16_xx | ||
398 | #endif | ||
399 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
400 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
401 | add r11,r11,r7 @ d+=h | ||
402 | eor r3,r3,r9 @ Maj(a,b,c) | ||
403 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
404 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
405 | #if __ARM_ARCH__>=7 | ||
406 | @ ldr r2,[r1],#4 @ 5 | ||
407 | # if 5==15 | ||
408 | str r1,[sp,#17*4] @ make room for r1 | ||
409 | # endif | ||
410 | eor r0,r11,r11,ror#5 | ||
411 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
412 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
413 | # ifndef __ARMEB__ | ||
414 | rev r2,r2 | ||
415 | # endif | ||
416 | #else | ||
417 | @ ldrb r2,[r1,#3] @ 5 | ||
418 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
419 | ldrb r3,[r1,#2] | ||
420 | ldrb r0,[r1,#1] | ||
421 | orr r2,r2,r3,lsl#8 | ||
422 | ldrb r3,[r1],#4 | ||
423 | orr r2,r2,r0,lsl#16 | ||
424 | # if 5==15 | ||
425 | str r1,[sp,#17*4] @ make room for r1 | ||
426 | # endif | ||
427 | eor r0,r11,r11,ror#5 | ||
428 | orr r2,r2,r3,lsl#24 | ||
429 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
430 | #endif | ||
431 | ldr r3,[r14],#4 @ *K256++ | ||
432 | add r6,r6,r2 @ h+=X[i] | ||
433 | str r2,[sp,#5*4] | ||
434 | eor r2,r4,r5 | ||
435 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
436 | and r2,r2,r11 | ||
437 | add r6,r6,r3 @ h+=K256[i] | ||
438 | eor r2,r2,r5 @ Ch(e,f,g) | ||
439 | eor r0,r7,r7,ror#11 | ||
440 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
441 | #if 5==31 | ||
442 | and r3,r3,#0xff | ||
443 | cmp r3,#0xf2 @ done? | ||
444 | #endif | ||
445 | #if 5<15 | ||
446 | # if __ARM_ARCH__>=7 | ||
447 | ldr r2,[r1],#4 @ prefetch | ||
448 | # else | ||
449 | ldrb r2,[r1,#3] | ||
450 | # endif | ||
451 | eor r3,r7,r8 @ a^b, b^c in next round | ||
452 | #else | ||
453 | ldr r2,[sp,#7*4] @ from future BODY_16_xx | ||
454 | eor r3,r7,r8 @ a^b, b^c in next round | ||
455 | ldr r1,[sp,#4*4] @ from future BODY_16_xx | ||
456 | #endif | ||
457 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
458 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
459 | add r10,r10,r6 @ d+=h | ||
460 | eor r12,r12,r8 @ Maj(a,b,c) | ||
461 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
462 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
463 | #if __ARM_ARCH__>=7 | ||
464 | @ ldr r2,[r1],#4 @ 6 | ||
465 | # if 6==15 | ||
466 | str r1,[sp,#17*4] @ make room for r1 | ||
467 | # endif | ||
468 | eor r0,r10,r10,ror#5 | ||
469 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
470 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
471 | # ifndef __ARMEB__ | ||
472 | rev r2,r2 | ||
473 | # endif | ||
474 | #else | ||
475 | @ ldrb r2,[r1,#3] @ 6 | ||
476 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
477 | ldrb r12,[r1,#2] | ||
478 | ldrb r0,[r1,#1] | ||
479 | orr r2,r2,r12,lsl#8 | ||
480 | ldrb r12,[r1],#4 | ||
481 | orr r2,r2,r0,lsl#16 | ||
482 | # if 6==15 | ||
483 | str r1,[sp,#17*4] @ make room for r1 | ||
484 | # endif | ||
485 | eor r0,r10,r10,ror#5 | ||
486 | orr r2,r2,r12,lsl#24 | ||
487 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
488 | #endif | ||
489 | ldr r12,[r14],#4 @ *K256++ | ||
490 | add r5,r5,r2 @ h+=X[i] | ||
491 | str r2,[sp,#6*4] | ||
492 | eor r2,r11,r4 | ||
493 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
494 | and r2,r2,r10 | ||
495 | add r5,r5,r12 @ h+=K256[i] | ||
496 | eor r2,r2,r4 @ Ch(e,f,g) | ||
497 | eor r0,r6,r6,ror#11 | ||
498 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
499 | #if 6==31 | ||
500 | and r12,r12,#0xff | ||
501 | cmp r12,#0xf2 @ done? | ||
502 | #endif | ||
503 | #if 6<15 | ||
504 | # if __ARM_ARCH__>=7 | ||
505 | ldr r2,[r1],#4 @ prefetch | ||
506 | # else | ||
507 | ldrb r2,[r1,#3] | ||
508 | # endif | ||
509 | eor r12,r6,r7 @ a^b, b^c in next round | ||
510 | #else | ||
511 | ldr r2,[sp,#8*4] @ from future BODY_16_xx | ||
512 | eor r12,r6,r7 @ a^b, b^c in next round | ||
513 | ldr r1,[sp,#5*4] @ from future BODY_16_xx | ||
514 | #endif | ||
515 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
516 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
517 | add r9,r9,r5 @ d+=h | ||
518 | eor r3,r3,r7 @ Maj(a,b,c) | ||
519 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
520 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
521 | #if __ARM_ARCH__>=7 | ||
522 | @ ldr r2,[r1],#4 @ 7 | ||
523 | # if 7==15 | ||
524 | str r1,[sp,#17*4] @ make room for r1 | ||
525 | # endif | ||
526 | eor r0,r9,r9,ror#5 | ||
527 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
528 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
529 | # ifndef __ARMEB__ | ||
530 | rev r2,r2 | ||
531 | # endif | ||
532 | #else | ||
533 | @ ldrb r2,[r1,#3] @ 7 | ||
534 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
535 | ldrb r3,[r1,#2] | ||
536 | ldrb r0,[r1,#1] | ||
537 | orr r2,r2,r3,lsl#8 | ||
538 | ldrb r3,[r1],#4 | ||
539 | orr r2,r2,r0,lsl#16 | ||
540 | # if 7==15 | ||
541 | str r1,[sp,#17*4] @ make room for r1 | ||
542 | # endif | ||
543 | eor r0,r9,r9,ror#5 | ||
544 | orr r2,r2,r3,lsl#24 | ||
545 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
546 | #endif | ||
547 | ldr r3,[r14],#4 @ *K256++ | ||
548 | add r4,r4,r2 @ h+=X[i] | ||
549 | str r2,[sp,#7*4] | ||
550 | eor r2,r10,r11 | ||
551 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
552 | and r2,r2,r9 | ||
553 | add r4,r4,r3 @ h+=K256[i] | ||
554 | eor r2,r2,r11 @ Ch(e,f,g) | ||
555 | eor r0,r5,r5,ror#11 | ||
556 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
557 | #if 7==31 | ||
558 | and r3,r3,#0xff | ||
559 | cmp r3,#0xf2 @ done? | ||
560 | #endif | ||
561 | #if 7<15 | ||
562 | # if __ARM_ARCH__>=7 | ||
563 | ldr r2,[r1],#4 @ prefetch | ||
564 | # else | ||
565 | ldrb r2,[r1,#3] | ||
566 | # endif | ||
567 | eor r3,r5,r6 @ a^b, b^c in next round | ||
568 | #else | ||
569 | ldr r2,[sp,#9*4] @ from future BODY_16_xx | ||
570 | eor r3,r5,r6 @ a^b, b^c in next round | ||
571 | ldr r1,[sp,#6*4] @ from future BODY_16_xx | ||
572 | #endif | ||
573 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
574 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
575 | add r8,r8,r4 @ d+=h | ||
576 | eor r12,r12,r6 @ Maj(a,b,c) | ||
577 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
578 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
579 | #if __ARM_ARCH__>=7 | ||
580 | @ ldr r2,[r1],#4 @ 8 | ||
581 | # if 8==15 | ||
582 | str r1,[sp,#17*4] @ make room for r1 | ||
583 | # endif | ||
584 | eor r0,r8,r8,ror#5 | ||
585 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
586 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
587 | # ifndef __ARMEB__ | ||
588 | rev r2,r2 | ||
589 | # endif | ||
590 | #else | ||
591 | @ ldrb r2,[r1,#3] @ 8 | ||
592 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
593 | ldrb r12,[r1,#2] | ||
594 | ldrb r0,[r1,#1] | ||
595 | orr r2,r2,r12,lsl#8 | ||
596 | ldrb r12,[r1],#4 | ||
597 | orr r2,r2,r0,lsl#16 | ||
598 | # if 8==15 | ||
599 | str r1,[sp,#17*4] @ make room for r1 | ||
600 | # endif | ||
601 | eor r0,r8,r8,ror#5 | ||
602 | orr r2,r2,r12,lsl#24 | ||
603 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
604 | #endif | ||
605 | ldr r12,[r14],#4 @ *K256++ | ||
606 | add r11,r11,r2 @ h+=X[i] | ||
607 | str r2,[sp,#8*4] | ||
608 | eor r2,r9,r10 | ||
609 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
610 | and r2,r2,r8 | ||
611 | add r11,r11,r12 @ h+=K256[i] | ||
612 | eor r2,r2,r10 @ Ch(e,f,g) | ||
613 | eor r0,r4,r4,ror#11 | ||
614 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
615 | #if 8==31 | ||
616 | and r12,r12,#0xff | ||
617 | cmp r12,#0xf2 @ done? | ||
618 | #endif | ||
619 | #if 8<15 | ||
620 | # if __ARM_ARCH__>=7 | ||
621 | ldr r2,[r1],#4 @ prefetch | ||
622 | # else | ||
623 | ldrb r2,[r1,#3] | ||
624 | # endif | ||
625 | eor r12,r4,r5 @ a^b, b^c in next round | ||
626 | #else | ||
627 | ldr r2,[sp,#10*4] @ from future BODY_16_xx | ||
628 | eor r12,r4,r5 @ a^b, b^c in next round | ||
629 | ldr r1,[sp,#7*4] @ from future BODY_16_xx | ||
630 | #endif | ||
631 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
632 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
633 | add r7,r7,r11 @ d+=h | ||
634 | eor r3,r3,r5 @ Maj(a,b,c) | ||
635 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
636 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
637 | #if __ARM_ARCH__>=7 | ||
638 | @ ldr r2,[r1],#4 @ 9 | ||
639 | # if 9==15 | ||
640 | str r1,[sp,#17*4] @ make room for r1 | ||
641 | # endif | ||
642 | eor r0,r7,r7,ror#5 | ||
643 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
644 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
645 | # ifndef __ARMEB__ | ||
646 | rev r2,r2 | ||
647 | # endif | ||
648 | #else | ||
649 | @ ldrb r2,[r1,#3] @ 9 | ||
650 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
651 | ldrb r3,[r1,#2] | ||
652 | ldrb r0,[r1,#1] | ||
653 | orr r2,r2,r3,lsl#8 | ||
654 | ldrb r3,[r1],#4 | ||
655 | orr r2,r2,r0,lsl#16 | ||
656 | # if 9==15 | ||
657 | str r1,[sp,#17*4] @ make room for r1 | ||
658 | # endif | ||
659 | eor r0,r7,r7,ror#5 | ||
660 | orr r2,r2,r3,lsl#24 | ||
661 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
662 | #endif | ||
663 | ldr r3,[r14],#4 @ *K256++ | ||
664 | add r10,r10,r2 @ h+=X[i] | ||
665 | str r2,[sp,#9*4] | ||
666 | eor r2,r8,r9 | ||
667 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
668 | and r2,r2,r7 | ||
669 | add r10,r10,r3 @ h+=K256[i] | ||
670 | eor r2,r2,r9 @ Ch(e,f,g) | ||
671 | eor r0,r11,r11,ror#11 | ||
672 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
673 | #if 9==31 | ||
674 | and r3,r3,#0xff | ||
675 | cmp r3,#0xf2 @ done? | ||
676 | #endif | ||
677 | #if 9<15 | ||
678 | # if __ARM_ARCH__>=7 | ||
679 | ldr r2,[r1],#4 @ prefetch | ||
680 | # else | ||
681 | ldrb r2,[r1,#3] | ||
682 | # endif | ||
683 | eor r3,r11,r4 @ a^b, b^c in next round | ||
684 | #else | ||
685 | ldr r2,[sp,#11*4] @ from future BODY_16_xx | ||
686 | eor r3,r11,r4 @ a^b, b^c in next round | ||
687 | ldr r1,[sp,#8*4] @ from future BODY_16_xx | ||
688 | #endif | ||
689 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
690 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
691 | add r6,r6,r10 @ d+=h | ||
692 | eor r12,r12,r4 @ Maj(a,b,c) | ||
693 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
694 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
695 | #if __ARM_ARCH__>=7 | ||
696 | @ ldr r2,[r1],#4 @ 10 | ||
697 | # if 10==15 | ||
698 | str r1,[sp,#17*4] @ make room for r1 | ||
699 | # endif | ||
700 | eor r0,r6,r6,ror#5 | ||
701 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
702 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
703 | # ifndef __ARMEB__ | ||
704 | rev r2,r2 | ||
705 | # endif | ||
706 | #else | ||
707 | @ ldrb r2,[r1,#3] @ 10 | ||
708 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
709 | ldrb r12,[r1,#2] | ||
710 | ldrb r0,[r1,#1] | ||
711 | orr r2,r2,r12,lsl#8 | ||
712 | ldrb r12,[r1],#4 | ||
713 | orr r2,r2,r0,lsl#16 | ||
714 | # if 10==15 | ||
715 | str r1,[sp,#17*4] @ make room for r1 | ||
716 | # endif | ||
717 | eor r0,r6,r6,ror#5 | ||
718 | orr r2,r2,r12,lsl#24 | ||
719 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
720 | #endif | ||
721 | ldr r12,[r14],#4 @ *K256++ | ||
722 | add r9,r9,r2 @ h+=X[i] | ||
723 | str r2,[sp,#10*4] | ||
724 | eor r2,r7,r8 | ||
725 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
726 | and r2,r2,r6 | ||
727 | add r9,r9,r12 @ h+=K256[i] | ||
728 | eor r2,r2,r8 @ Ch(e,f,g) | ||
729 | eor r0,r10,r10,ror#11 | ||
730 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
731 | #if 10==31 | ||
732 | and r12,r12,#0xff | ||
733 | cmp r12,#0xf2 @ done? | ||
734 | #endif | ||
735 | #if 10<15 | ||
736 | # if __ARM_ARCH__>=7 | ||
737 | ldr r2,[r1],#4 @ prefetch | ||
738 | # else | ||
739 | ldrb r2,[r1,#3] | ||
740 | # endif | ||
741 | eor r12,r10,r11 @ a^b, b^c in next round | ||
742 | #else | ||
743 | ldr r2,[sp,#12*4] @ from future BODY_16_xx | ||
744 | eor r12,r10,r11 @ a^b, b^c in next round | ||
745 | ldr r1,[sp,#9*4] @ from future BODY_16_xx | ||
746 | #endif | ||
747 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
748 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
749 | add r5,r5,r9 @ d+=h | ||
750 | eor r3,r3,r11 @ Maj(a,b,c) | ||
751 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
752 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
753 | #if __ARM_ARCH__>=7 | ||
754 | @ ldr r2,[r1],#4 @ 11 | ||
755 | # if 11==15 | ||
756 | str r1,[sp,#17*4] @ make room for r1 | ||
757 | # endif | ||
758 | eor r0,r5,r5,ror#5 | ||
759 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
760 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
761 | # ifndef __ARMEB__ | ||
762 | rev r2,r2 | ||
763 | # endif | ||
764 | #else | ||
765 | @ ldrb r2,[r1,#3] @ 11 | ||
766 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
767 | ldrb r3,[r1,#2] | ||
768 | ldrb r0,[r1,#1] | ||
769 | orr r2,r2,r3,lsl#8 | ||
770 | ldrb r3,[r1],#4 | ||
771 | orr r2,r2,r0,lsl#16 | ||
772 | # if 11==15 | ||
773 | str r1,[sp,#17*4] @ make room for r1 | ||
774 | # endif | ||
775 | eor r0,r5,r5,ror#5 | ||
776 | orr r2,r2,r3,lsl#24 | ||
777 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
778 | #endif | ||
779 | ldr r3,[r14],#4 @ *K256++ | ||
780 | add r8,r8,r2 @ h+=X[i] | ||
781 | str r2,[sp,#11*4] | ||
782 | eor r2,r6,r7 | ||
783 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
784 | and r2,r2,r5 | ||
785 | add r8,r8,r3 @ h+=K256[i] | ||
786 | eor r2,r2,r7 @ Ch(e,f,g) | ||
787 | eor r0,r9,r9,ror#11 | ||
788 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
789 | #if 11==31 | ||
790 | and r3,r3,#0xff | ||
791 | cmp r3,#0xf2 @ done? | ||
792 | #endif | ||
793 | #if 11<15 | ||
794 | # if __ARM_ARCH__>=7 | ||
795 | ldr r2,[r1],#4 @ prefetch | ||
796 | # else | ||
797 | ldrb r2,[r1,#3] | ||
798 | # endif | ||
799 | eor r3,r9,r10 @ a^b, b^c in next round | ||
800 | #else | ||
801 | ldr r2,[sp,#13*4] @ from future BODY_16_xx | ||
802 | eor r3,r9,r10 @ a^b, b^c in next round | ||
803 | ldr r1,[sp,#10*4] @ from future BODY_16_xx | ||
804 | #endif | ||
805 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
806 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
807 | add r4,r4,r8 @ d+=h | ||
808 | eor r12,r12,r10 @ Maj(a,b,c) | ||
809 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
810 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
811 | #if __ARM_ARCH__>=7 | ||
812 | @ ldr r2,[r1],#4 @ 12 | ||
813 | # if 12==15 | ||
814 | str r1,[sp,#17*4] @ make room for r1 | ||
815 | # endif | ||
816 | eor r0,r4,r4,ror#5 | ||
817 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
818 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
819 | # ifndef __ARMEB__ | ||
820 | rev r2,r2 | ||
821 | # endif | ||
822 | #else | ||
823 | @ ldrb r2,[r1,#3] @ 12 | ||
824 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
825 | ldrb r12,[r1,#2] | ||
826 | ldrb r0,[r1,#1] | ||
827 | orr r2,r2,r12,lsl#8 | ||
828 | ldrb r12,[r1],#4 | ||
829 | orr r2,r2,r0,lsl#16 | ||
830 | # if 12==15 | ||
831 | str r1,[sp,#17*4] @ make room for r1 | ||
832 | # endif | ||
833 | eor r0,r4,r4,ror#5 | ||
834 | orr r2,r2,r12,lsl#24 | ||
835 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
836 | #endif | ||
837 | ldr r12,[r14],#4 @ *K256++ | ||
838 | add r7,r7,r2 @ h+=X[i] | ||
839 | str r2,[sp,#12*4] | ||
840 | eor r2,r5,r6 | ||
841 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
842 | and r2,r2,r4 | ||
843 | add r7,r7,r12 @ h+=K256[i] | ||
844 | eor r2,r2,r6 @ Ch(e,f,g) | ||
845 | eor r0,r8,r8,ror#11 | ||
846 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
847 | #if 12==31 | ||
848 | and r12,r12,#0xff | ||
849 | cmp r12,#0xf2 @ done? | ||
850 | #endif | ||
851 | #if 12<15 | ||
852 | # if __ARM_ARCH__>=7 | ||
853 | ldr r2,[r1],#4 @ prefetch | ||
854 | # else | ||
855 | ldrb r2,[r1,#3] | ||
856 | # endif | ||
857 | eor r12,r8,r9 @ a^b, b^c in next round | ||
858 | #else | ||
859 | ldr r2,[sp,#14*4] @ from future BODY_16_xx | ||
860 | eor r12,r8,r9 @ a^b, b^c in next round | ||
861 | ldr r1,[sp,#11*4] @ from future BODY_16_xx | ||
862 | #endif | ||
863 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
864 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
865 | add r11,r11,r7 @ d+=h | ||
866 | eor r3,r3,r9 @ Maj(a,b,c) | ||
867 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
868 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
869 | #if __ARM_ARCH__>=7 | ||
870 | @ ldr r2,[r1],#4 @ 13 | ||
871 | # if 13==15 | ||
872 | str r1,[sp,#17*4] @ make room for r1 | ||
873 | # endif | ||
874 | eor r0,r11,r11,ror#5 | ||
875 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
876 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
877 | # ifndef __ARMEB__ | ||
878 | rev r2,r2 | ||
879 | # endif | ||
880 | #else | ||
881 | @ ldrb r2,[r1,#3] @ 13 | ||
882 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
883 | ldrb r3,[r1,#2] | ||
884 | ldrb r0,[r1,#1] | ||
885 | orr r2,r2,r3,lsl#8 | ||
886 | ldrb r3,[r1],#4 | ||
887 | orr r2,r2,r0,lsl#16 | ||
888 | # if 13==15 | ||
889 | str r1,[sp,#17*4] @ make room for r1 | ||
890 | # endif | ||
891 | eor r0,r11,r11,ror#5 | ||
892 | orr r2,r2,r3,lsl#24 | ||
893 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
894 | #endif | ||
895 | ldr r3,[r14],#4 @ *K256++ | ||
896 | add r6,r6,r2 @ h+=X[i] | ||
897 | str r2,[sp,#13*4] | ||
898 | eor r2,r4,r5 | ||
899 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
900 | and r2,r2,r11 | ||
901 | add r6,r6,r3 @ h+=K256[i] | ||
902 | eor r2,r2,r5 @ Ch(e,f,g) | ||
903 | eor r0,r7,r7,ror#11 | ||
904 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
905 | #if 13==31 | ||
906 | and r3,r3,#0xff | ||
907 | cmp r3,#0xf2 @ done? | ||
908 | #endif | ||
909 | #if 13<15 | ||
910 | # if __ARM_ARCH__>=7 | ||
911 | ldr r2,[r1],#4 @ prefetch | ||
912 | # else | ||
913 | ldrb r2,[r1,#3] | ||
914 | # endif | ||
915 | eor r3,r7,r8 @ a^b, b^c in next round | ||
916 | #else | ||
917 | ldr r2,[sp,#15*4] @ from future BODY_16_xx | ||
918 | eor r3,r7,r8 @ a^b, b^c in next round | ||
919 | ldr r1,[sp,#12*4] @ from future BODY_16_xx | ||
920 | #endif | ||
921 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
922 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
923 | add r10,r10,r6 @ d+=h | ||
924 | eor r12,r12,r8 @ Maj(a,b,c) | ||
925 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
926 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
927 | #if __ARM_ARCH__>=7 | ||
928 | @ ldr r2,[r1],#4 @ 14 | ||
929 | # if 14==15 | ||
930 | str r1,[sp,#17*4] @ make room for r1 | ||
931 | # endif | ||
932 | eor r0,r10,r10,ror#5 | ||
933 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
934 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
935 | # ifndef __ARMEB__ | ||
936 | rev r2,r2 | ||
937 | # endif | ||
938 | #else | ||
939 | @ ldrb r2,[r1,#3] @ 14 | ||
940 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
941 | ldrb r12,[r1,#2] | ||
942 | ldrb r0,[r1,#1] | ||
943 | orr r2,r2,r12,lsl#8 | ||
944 | ldrb r12,[r1],#4 | ||
945 | orr r2,r2,r0,lsl#16 | ||
946 | # if 14==15 | ||
947 | str r1,[sp,#17*4] @ make room for r1 | ||
948 | # endif | ||
949 | eor r0,r10,r10,ror#5 | ||
950 | orr r2,r2,r12,lsl#24 | ||
951 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
952 | #endif | ||
953 | ldr r12,[r14],#4 @ *K256++ | ||
954 | add r5,r5,r2 @ h+=X[i] | ||
955 | str r2,[sp,#14*4] | ||
956 | eor r2,r11,r4 | ||
957 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
958 | and r2,r2,r10 | ||
959 | add r5,r5,r12 @ h+=K256[i] | ||
960 | eor r2,r2,r4 @ Ch(e,f,g) | ||
961 | eor r0,r6,r6,ror#11 | ||
962 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
963 | #if 14==31 | ||
964 | and r12,r12,#0xff | ||
965 | cmp r12,#0xf2 @ done? | ||
966 | #endif | ||
967 | #if 14<15 | ||
968 | # if __ARM_ARCH__>=7 | ||
969 | ldr r2,[r1],#4 @ prefetch | ||
970 | # else | ||
971 | ldrb r2,[r1,#3] | ||
972 | # endif | ||
973 | eor r12,r6,r7 @ a^b, b^c in next round | ||
974 | #else | ||
975 | ldr r2,[sp,#0*4] @ from future BODY_16_xx | ||
976 | eor r12,r6,r7 @ a^b, b^c in next round | ||
977 | ldr r1,[sp,#13*4] @ from future BODY_16_xx | ||
978 | #endif | ||
979 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
980 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
981 | add r9,r9,r5 @ d+=h | ||
982 | eor r3,r3,r7 @ Maj(a,b,c) | ||
983 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
984 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
985 | #if __ARM_ARCH__>=7 | ||
986 | @ ldr r2,[r1],#4 @ 15 | ||
987 | # if 15==15 | ||
988 | str r1,[sp,#17*4] @ make room for r1 | ||
989 | # endif | ||
990 | eor r0,r9,r9,ror#5 | ||
991 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
992 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
993 | # ifndef __ARMEB__ | ||
994 | rev r2,r2 | ||
995 | # endif | ||
996 | #else | ||
997 | @ ldrb r2,[r1,#3] @ 15 | ||
998 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
999 | ldrb r3,[r1,#2] | ||
1000 | ldrb r0,[r1,#1] | ||
1001 | orr r2,r2,r3,lsl#8 | ||
1002 | ldrb r3,[r1],#4 | ||
1003 | orr r2,r2,r0,lsl#16 | ||
1004 | # if 15==15 | ||
1005 | str r1,[sp,#17*4] @ make room for r1 | ||
1006 | # endif | ||
1007 | eor r0,r9,r9,ror#5 | ||
1008 | orr r2,r2,r3,lsl#24 | ||
1009 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1010 | #endif | ||
1011 | ldr r3,[r14],#4 @ *K256++ | ||
1012 | add r4,r4,r2 @ h+=X[i] | ||
1013 | str r2,[sp,#15*4] | ||
1014 | eor r2,r10,r11 | ||
1015 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1016 | and r2,r2,r9 | ||
1017 | add r4,r4,r3 @ h+=K256[i] | ||
1018 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1019 | eor r0,r5,r5,ror#11 | ||
1020 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1021 | #if 15==31 | ||
1022 | and r3,r3,#0xff | ||
1023 | cmp r3,#0xf2 @ done? | ||
1024 | #endif | ||
1025 | #if 15<15 | ||
1026 | # if __ARM_ARCH__>=7 | ||
1027 | ldr r2,[r1],#4 @ prefetch | ||
1028 | # else | ||
1029 | ldrb r2,[r1,#3] | ||
1030 | # endif | ||
1031 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1032 | #else | ||
1033 | ldr r2,[sp,#1*4] @ from future BODY_16_xx | ||
1034 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1035 | ldr r1,[sp,#14*4] @ from future BODY_16_xx | ||
1036 | #endif | ||
1037 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1038 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1039 | add r8,r8,r4 @ d+=h | ||
1040 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1041 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1042 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1043 | .Lrounds_16_xx: | ||
1044 | @ ldr r2,[sp,#1*4] @ 16 | ||
1045 | @ ldr r1,[sp,#14*4] | ||
1046 | mov r0,r2,ror#7 | ||
1047 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1048 | mov r12,r1,ror#17 | ||
1049 | eor r0,r0,r2,ror#18 | ||
1050 | eor r12,r12,r1,ror#19 | ||
1051 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1052 | ldr r2,[sp,#0*4] | ||
1053 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1054 | ldr r1,[sp,#9*4] | ||
1055 | |||
1056 | add r12,r12,r0 | ||
1057 | eor r0,r8,r8,ror#5 @ from BODY_00_15 | ||
1058 | add r2,r2,r12 | ||
1059 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
1060 | add r2,r2,r1 @ X[i] | ||
1061 | ldr r12,[r14],#4 @ *K256++ | ||
1062 | add r11,r11,r2 @ h+=X[i] | ||
1063 | str r2,[sp,#0*4] | ||
1064 | eor r2,r9,r10 | ||
1065 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
1066 | and r2,r2,r8 | ||
1067 | add r11,r11,r12 @ h+=K256[i] | ||
1068 | eor r2,r2,r10 @ Ch(e,f,g) | ||
1069 | eor r0,r4,r4,ror#11 | ||
1070 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
1071 | #if 16==31 | ||
1072 | and r12,r12,#0xff | ||
1073 | cmp r12,#0xf2 @ done? | ||
1074 | #endif | ||
1075 | #if 16<15 | ||
1076 | # if __ARM_ARCH__>=7 | ||
1077 | ldr r2,[r1],#4 @ prefetch | ||
1078 | # else | ||
1079 | ldrb r2,[r1,#3] | ||
1080 | # endif | ||
1081 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1082 | #else | ||
1083 | ldr r2,[sp,#2*4] @ from future BODY_16_xx | ||
1084 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1085 | ldr r1,[sp,#15*4] @ from future BODY_16_xx | ||
1086 | #endif | ||
1087 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
1088 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1089 | add r7,r7,r11 @ d+=h | ||
1090 | eor r3,r3,r5 @ Maj(a,b,c) | ||
1091 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
1092 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
1093 | @ ldr r2,[sp,#2*4] @ 17 | ||
1094 | @ ldr r1,[sp,#15*4] | ||
1095 | mov r0,r2,ror#7 | ||
1096 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
1097 | mov r3,r1,ror#17 | ||
1098 | eor r0,r0,r2,ror#18 | ||
1099 | eor r3,r3,r1,ror#19 | ||
1100 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1101 | ldr r2,[sp,#1*4] | ||
1102 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1103 | ldr r1,[sp,#10*4] | ||
1104 | |||
1105 | add r3,r3,r0 | ||
1106 | eor r0,r7,r7,ror#5 @ from BODY_00_15 | ||
1107 | add r2,r2,r3 | ||
1108 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
1109 | add r2,r2,r1 @ X[i] | ||
1110 | ldr r3,[r14],#4 @ *K256++ | ||
1111 | add r10,r10,r2 @ h+=X[i] | ||
1112 | str r2,[sp,#1*4] | ||
1113 | eor r2,r8,r9 | ||
1114 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
1115 | and r2,r2,r7 | ||
1116 | add r10,r10,r3 @ h+=K256[i] | ||
1117 | eor r2,r2,r9 @ Ch(e,f,g) | ||
1118 | eor r0,r11,r11,ror#11 | ||
1119 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
1120 | #if 17==31 | ||
1121 | and r3,r3,#0xff | ||
1122 | cmp r3,#0xf2 @ done? | ||
1123 | #endif | ||
1124 | #if 17<15 | ||
1125 | # if __ARM_ARCH__>=7 | ||
1126 | ldr r2,[r1],#4 @ prefetch | ||
1127 | # else | ||
1128 | ldrb r2,[r1,#3] | ||
1129 | # endif | ||
1130 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1131 | #else | ||
1132 | ldr r2,[sp,#3*4] @ from future BODY_16_xx | ||
1133 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1134 | ldr r1,[sp,#0*4] @ from future BODY_16_xx | ||
1135 | #endif | ||
1136 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
1137 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1138 | add r6,r6,r10 @ d+=h | ||
1139 | eor r12,r12,r4 @ Maj(a,b,c) | ||
1140 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
1141 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
1142 | @ ldr r2,[sp,#3*4] @ 18 | ||
1143 | @ ldr r1,[sp,#0*4] | ||
1144 | mov r0,r2,ror#7 | ||
1145 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
1146 | mov r12,r1,ror#17 | ||
1147 | eor r0,r0,r2,ror#18 | ||
1148 | eor r12,r12,r1,ror#19 | ||
1149 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1150 | ldr r2,[sp,#2*4] | ||
1151 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1152 | ldr r1,[sp,#11*4] | ||
1153 | |||
1154 | add r12,r12,r0 | ||
1155 | eor r0,r6,r6,ror#5 @ from BODY_00_15 | ||
1156 | add r2,r2,r12 | ||
1157 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
1158 | add r2,r2,r1 @ X[i] | ||
1159 | ldr r12,[r14],#4 @ *K256++ | ||
1160 | add r9,r9,r2 @ h+=X[i] | ||
1161 | str r2,[sp,#2*4] | ||
1162 | eor r2,r7,r8 | ||
1163 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
1164 | and r2,r2,r6 | ||
1165 | add r9,r9,r12 @ h+=K256[i] | ||
1166 | eor r2,r2,r8 @ Ch(e,f,g) | ||
1167 | eor r0,r10,r10,ror#11 | ||
1168 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
1169 | #if 18==31 | ||
1170 | and r12,r12,#0xff | ||
1171 | cmp r12,#0xf2 @ done? | ||
1172 | #endif | ||
1173 | #if 18<15 | ||
1174 | # if __ARM_ARCH__>=7 | ||
1175 | ldr r2,[r1],#4 @ prefetch | ||
1176 | # else | ||
1177 | ldrb r2,[r1,#3] | ||
1178 | # endif | ||
1179 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1180 | #else | ||
1181 | ldr r2,[sp,#4*4] @ from future BODY_16_xx | ||
1182 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1183 | ldr r1,[sp,#1*4] @ from future BODY_16_xx | ||
1184 | #endif | ||
1185 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
1186 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1187 | add r5,r5,r9 @ d+=h | ||
1188 | eor r3,r3,r11 @ Maj(a,b,c) | ||
1189 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
1190 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
1191 | @ ldr r2,[sp,#4*4] @ 19 | ||
1192 | @ ldr r1,[sp,#1*4] | ||
1193 | mov r0,r2,ror#7 | ||
1194 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
1195 | mov r3,r1,ror#17 | ||
1196 | eor r0,r0,r2,ror#18 | ||
1197 | eor r3,r3,r1,ror#19 | ||
1198 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1199 | ldr r2,[sp,#3*4] | ||
1200 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1201 | ldr r1,[sp,#12*4] | ||
1202 | |||
1203 | add r3,r3,r0 | ||
1204 | eor r0,r5,r5,ror#5 @ from BODY_00_15 | ||
1205 | add r2,r2,r3 | ||
1206 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
1207 | add r2,r2,r1 @ X[i] | ||
1208 | ldr r3,[r14],#4 @ *K256++ | ||
1209 | add r8,r8,r2 @ h+=X[i] | ||
1210 | str r2,[sp,#3*4] | ||
1211 | eor r2,r6,r7 | ||
1212 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
1213 | and r2,r2,r5 | ||
1214 | add r8,r8,r3 @ h+=K256[i] | ||
1215 | eor r2,r2,r7 @ Ch(e,f,g) | ||
1216 | eor r0,r9,r9,ror#11 | ||
1217 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
1218 | #if 19==31 | ||
1219 | and r3,r3,#0xff | ||
1220 | cmp r3,#0xf2 @ done? | ||
1221 | #endif | ||
1222 | #if 19<15 | ||
1223 | # if __ARM_ARCH__>=7 | ||
1224 | ldr r2,[r1],#4 @ prefetch | ||
1225 | # else | ||
1226 | ldrb r2,[r1,#3] | ||
1227 | # endif | ||
1228 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1229 | #else | ||
1230 | ldr r2,[sp,#5*4] @ from future BODY_16_xx | ||
1231 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1232 | ldr r1,[sp,#2*4] @ from future BODY_16_xx | ||
1233 | #endif | ||
1234 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
1235 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1236 | add r4,r4,r8 @ d+=h | ||
1237 | eor r12,r12,r10 @ Maj(a,b,c) | ||
1238 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
1239 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
1240 | @ ldr r2,[sp,#5*4] @ 20 | ||
1241 | @ ldr r1,[sp,#2*4] | ||
1242 | mov r0,r2,ror#7 | ||
1243 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
1244 | mov r12,r1,ror#17 | ||
1245 | eor r0,r0,r2,ror#18 | ||
1246 | eor r12,r12,r1,ror#19 | ||
1247 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1248 | ldr r2,[sp,#4*4] | ||
1249 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1250 | ldr r1,[sp,#13*4] | ||
1251 | |||
1252 | add r12,r12,r0 | ||
1253 | eor r0,r4,r4,ror#5 @ from BODY_00_15 | ||
1254 | add r2,r2,r12 | ||
1255 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
1256 | add r2,r2,r1 @ X[i] | ||
1257 | ldr r12,[r14],#4 @ *K256++ | ||
1258 | add r7,r7,r2 @ h+=X[i] | ||
1259 | str r2,[sp,#4*4] | ||
1260 | eor r2,r5,r6 | ||
1261 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
1262 | and r2,r2,r4 | ||
1263 | add r7,r7,r12 @ h+=K256[i] | ||
1264 | eor r2,r2,r6 @ Ch(e,f,g) | ||
1265 | eor r0,r8,r8,ror#11 | ||
1266 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
1267 | #if 20==31 | ||
1268 | and r12,r12,#0xff | ||
1269 | cmp r12,#0xf2 @ done? | ||
1270 | #endif | ||
1271 | #if 20<15 | ||
1272 | # if __ARM_ARCH__>=7 | ||
1273 | ldr r2,[r1],#4 @ prefetch | ||
1274 | # else | ||
1275 | ldrb r2,[r1,#3] | ||
1276 | # endif | ||
1277 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1278 | #else | ||
1279 | ldr r2,[sp,#6*4] @ from future BODY_16_xx | ||
1280 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1281 | ldr r1,[sp,#3*4] @ from future BODY_16_xx | ||
1282 | #endif | ||
1283 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
1284 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1285 | add r11,r11,r7 @ d+=h | ||
1286 | eor r3,r3,r9 @ Maj(a,b,c) | ||
1287 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
1288 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
1289 | @ ldr r2,[sp,#6*4] @ 21 | ||
1290 | @ ldr r1,[sp,#3*4] | ||
1291 | mov r0,r2,ror#7 | ||
1292 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
1293 | mov r3,r1,ror#17 | ||
1294 | eor r0,r0,r2,ror#18 | ||
1295 | eor r3,r3,r1,ror#19 | ||
1296 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1297 | ldr r2,[sp,#5*4] | ||
1298 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1299 | ldr r1,[sp,#14*4] | ||
1300 | |||
1301 | add r3,r3,r0 | ||
1302 | eor r0,r11,r11,ror#5 @ from BODY_00_15 | ||
1303 | add r2,r2,r3 | ||
1304 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
1305 | add r2,r2,r1 @ X[i] | ||
1306 | ldr r3,[r14],#4 @ *K256++ | ||
1307 | add r6,r6,r2 @ h+=X[i] | ||
1308 | str r2,[sp,#5*4] | ||
1309 | eor r2,r4,r5 | ||
1310 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
1311 | and r2,r2,r11 | ||
1312 | add r6,r6,r3 @ h+=K256[i] | ||
1313 | eor r2,r2,r5 @ Ch(e,f,g) | ||
1314 | eor r0,r7,r7,ror#11 | ||
1315 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
1316 | #if 21==31 | ||
1317 | and r3,r3,#0xff | ||
1318 | cmp r3,#0xf2 @ done? | ||
1319 | #endif | ||
1320 | #if 21<15 | ||
1321 | # if __ARM_ARCH__>=7 | ||
1322 | ldr r2,[r1],#4 @ prefetch | ||
1323 | # else | ||
1324 | ldrb r2,[r1,#3] | ||
1325 | # endif | ||
1326 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1327 | #else | ||
1328 | ldr r2,[sp,#7*4] @ from future BODY_16_xx | ||
1329 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1330 | ldr r1,[sp,#4*4] @ from future BODY_16_xx | ||
1331 | #endif | ||
1332 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
1333 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1334 | add r10,r10,r6 @ d+=h | ||
1335 | eor r12,r12,r8 @ Maj(a,b,c) | ||
1336 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
1337 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
1338 | @ ldr r2,[sp,#7*4] @ 22 | ||
1339 | @ ldr r1,[sp,#4*4] | ||
1340 | mov r0,r2,ror#7 | ||
1341 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
1342 | mov r12,r1,ror#17 | ||
1343 | eor r0,r0,r2,ror#18 | ||
1344 | eor r12,r12,r1,ror#19 | ||
1345 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1346 | ldr r2,[sp,#6*4] | ||
1347 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1348 | ldr r1,[sp,#15*4] | ||
1349 | |||
1350 | add r12,r12,r0 | ||
1351 | eor r0,r10,r10,ror#5 @ from BODY_00_15 | ||
1352 | add r2,r2,r12 | ||
1353 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
1354 | add r2,r2,r1 @ X[i] | ||
1355 | ldr r12,[r14],#4 @ *K256++ | ||
1356 | add r5,r5,r2 @ h+=X[i] | ||
1357 | str r2,[sp,#6*4] | ||
1358 | eor r2,r11,r4 | ||
1359 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
1360 | and r2,r2,r10 | ||
1361 | add r5,r5,r12 @ h+=K256[i] | ||
1362 | eor r2,r2,r4 @ Ch(e,f,g) | ||
1363 | eor r0,r6,r6,ror#11 | ||
1364 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
1365 | #if 22==31 | ||
1366 | and r12,r12,#0xff | ||
1367 | cmp r12,#0xf2 @ done? | ||
1368 | #endif | ||
1369 | #if 22<15 | ||
1370 | # if __ARM_ARCH__>=7 | ||
1371 | ldr r2,[r1],#4 @ prefetch | ||
1372 | # else | ||
1373 | ldrb r2,[r1,#3] | ||
1374 | # endif | ||
1375 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1376 | #else | ||
1377 | ldr r2,[sp,#8*4] @ from future BODY_16_xx | ||
1378 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1379 | ldr r1,[sp,#5*4] @ from future BODY_16_xx | ||
1380 | #endif | ||
1381 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
1382 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1383 | add r9,r9,r5 @ d+=h | ||
1384 | eor r3,r3,r7 @ Maj(a,b,c) | ||
1385 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
1386 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
1387 | @ ldr r2,[sp,#8*4] @ 23 | ||
1388 | @ ldr r1,[sp,#5*4] | ||
1389 | mov r0,r2,ror#7 | ||
1390 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
1391 | mov r3,r1,ror#17 | ||
1392 | eor r0,r0,r2,ror#18 | ||
1393 | eor r3,r3,r1,ror#19 | ||
1394 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1395 | ldr r2,[sp,#7*4] | ||
1396 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1397 | ldr r1,[sp,#0*4] | ||
1398 | |||
1399 | add r3,r3,r0 | ||
1400 | eor r0,r9,r9,ror#5 @ from BODY_00_15 | ||
1401 | add r2,r2,r3 | ||
1402 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1403 | add r2,r2,r1 @ X[i] | ||
1404 | ldr r3,[r14],#4 @ *K256++ | ||
1405 | add r4,r4,r2 @ h+=X[i] | ||
1406 | str r2,[sp,#7*4] | ||
1407 | eor r2,r10,r11 | ||
1408 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1409 | and r2,r2,r9 | ||
1410 | add r4,r4,r3 @ h+=K256[i] | ||
1411 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1412 | eor r0,r5,r5,ror#11 | ||
1413 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1414 | #if 23==31 | ||
1415 | and r3,r3,#0xff | ||
1416 | cmp r3,#0xf2 @ done? | ||
1417 | #endif | ||
1418 | #if 23<15 | ||
1419 | # if __ARM_ARCH__>=7 | ||
1420 | ldr r2,[r1],#4 @ prefetch | ||
1421 | # else | ||
1422 | ldrb r2,[r1,#3] | ||
1423 | # endif | ||
1424 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1425 | #else | ||
1426 | ldr r2,[sp,#9*4] @ from future BODY_16_xx | ||
1427 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1428 | ldr r1,[sp,#6*4] @ from future BODY_16_xx | ||
1429 | #endif | ||
1430 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1431 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1432 | add r8,r8,r4 @ d+=h | ||
1433 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1434 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1435 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1436 | @ ldr r2,[sp,#9*4] @ 24 | ||
1437 | @ ldr r1,[sp,#6*4] | ||
1438 | mov r0,r2,ror#7 | ||
1439 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1440 | mov r12,r1,ror#17 | ||
1441 | eor r0,r0,r2,ror#18 | ||
1442 | eor r12,r12,r1,ror#19 | ||
1443 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1444 | ldr r2,[sp,#8*4] | ||
1445 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1446 | ldr r1,[sp,#1*4] | ||
1447 | |||
1448 | add r12,r12,r0 | ||
1449 | eor r0,r8,r8,ror#5 @ from BODY_00_15 | ||
1450 | add r2,r2,r12 | ||
1451 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
1452 | add r2,r2,r1 @ X[i] | ||
1453 | ldr r12,[r14],#4 @ *K256++ | ||
1454 | add r11,r11,r2 @ h+=X[i] | ||
1455 | str r2,[sp,#8*4] | ||
1456 | eor r2,r9,r10 | ||
1457 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
1458 | and r2,r2,r8 | ||
1459 | add r11,r11,r12 @ h+=K256[i] | ||
1460 | eor r2,r2,r10 @ Ch(e,f,g) | ||
1461 | eor r0,r4,r4,ror#11 | ||
1462 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
1463 | #if 24==31 | ||
1464 | and r12,r12,#0xff | ||
1465 | cmp r12,#0xf2 @ done? | ||
1466 | #endif | ||
1467 | #if 24<15 | ||
1468 | # if __ARM_ARCH__>=7 | ||
1469 | ldr r2,[r1],#4 @ prefetch | ||
1470 | # else | ||
1471 | ldrb r2,[r1,#3] | ||
1472 | # endif | ||
1473 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1474 | #else | ||
1475 | ldr r2,[sp,#10*4] @ from future BODY_16_xx | ||
1476 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1477 | ldr r1,[sp,#7*4] @ from future BODY_16_xx | ||
1478 | #endif | ||
1479 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
1480 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1481 | add r7,r7,r11 @ d+=h | ||
1482 | eor r3,r3,r5 @ Maj(a,b,c) | ||
1483 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
1484 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
1485 | @ ldr r2,[sp,#10*4] @ 25 | ||
1486 | @ ldr r1,[sp,#7*4] | ||
1487 | mov r0,r2,ror#7 | ||
1488 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
1489 | mov r3,r1,ror#17 | ||
1490 | eor r0,r0,r2,ror#18 | ||
1491 | eor r3,r3,r1,ror#19 | ||
1492 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1493 | ldr r2,[sp,#9*4] | ||
1494 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1495 | ldr r1,[sp,#2*4] | ||
1496 | |||
1497 | add r3,r3,r0 | ||
1498 | eor r0,r7,r7,ror#5 @ from BODY_00_15 | ||
1499 | add r2,r2,r3 | ||
1500 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
1501 | add r2,r2,r1 @ X[i] | ||
1502 | ldr r3,[r14],#4 @ *K256++ | ||
1503 | add r10,r10,r2 @ h+=X[i] | ||
1504 | str r2,[sp,#9*4] | ||
1505 | eor r2,r8,r9 | ||
1506 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
1507 | and r2,r2,r7 | ||
1508 | add r10,r10,r3 @ h+=K256[i] | ||
1509 | eor r2,r2,r9 @ Ch(e,f,g) | ||
1510 | eor r0,r11,r11,ror#11 | ||
1511 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
1512 | #if 25==31 | ||
1513 | and r3,r3,#0xff | ||
1514 | cmp r3,#0xf2 @ done? | ||
1515 | #endif | ||
1516 | #if 25<15 | ||
1517 | # if __ARM_ARCH__>=7 | ||
1518 | ldr r2,[r1],#4 @ prefetch | ||
1519 | # else | ||
1520 | ldrb r2,[r1,#3] | ||
1521 | # endif | ||
1522 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1523 | #else | ||
1524 | ldr r2,[sp,#11*4] @ from future BODY_16_xx | ||
1525 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1526 | ldr r1,[sp,#8*4] @ from future BODY_16_xx | ||
1527 | #endif | ||
1528 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
1529 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1530 | add r6,r6,r10 @ d+=h | ||
1531 | eor r12,r12,r4 @ Maj(a,b,c) | ||
1532 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
1533 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
1534 | @ ldr r2,[sp,#11*4] @ 26 | ||
1535 | @ ldr r1,[sp,#8*4] | ||
1536 | mov r0,r2,ror#7 | ||
1537 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
1538 | mov r12,r1,ror#17 | ||
1539 | eor r0,r0,r2,ror#18 | ||
1540 | eor r12,r12,r1,ror#19 | ||
1541 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1542 | ldr r2,[sp,#10*4] | ||
1543 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1544 | ldr r1,[sp,#3*4] | ||
1545 | |||
1546 | add r12,r12,r0 | ||
1547 | eor r0,r6,r6,ror#5 @ from BODY_00_15 | ||
1548 | add r2,r2,r12 | ||
1549 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
1550 | add r2,r2,r1 @ X[i] | ||
1551 | ldr r12,[r14],#4 @ *K256++ | ||
1552 | add r9,r9,r2 @ h+=X[i] | ||
1553 | str r2,[sp,#10*4] | ||
1554 | eor r2,r7,r8 | ||
1555 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
1556 | and r2,r2,r6 | ||
1557 | add r9,r9,r12 @ h+=K256[i] | ||
1558 | eor r2,r2,r8 @ Ch(e,f,g) | ||
1559 | eor r0,r10,r10,ror#11 | ||
1560 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
1561 | #if 26==31 | ||
1562 | and r12,r12,#0xff | ||
1563 | cmp r12,#0xf2 @ done? | ||
1564 | #endif | ||
1565 | #if 26<15 | ||
1566 | # if __ARM_ARCH__>=7 | ||
1567 | ldr r2,[r1],#4 @ prefetch | ||
1568 | # else | ||
1569 | ldrb r2,[r1,#3] | ||
1570 | # endif | ||
1571 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1572 | #else | ||
1573 | ldr r2,[sp,#12*4] @ from future BODY_16_xx | ||
1574 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1575 | ldr r1,[sp,#9*4] @ from future BODY_16_xx | ||
1576 | #endif | ||
1577 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
1578 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1579 | add r5,r5,r9 @ d+=h | ||
1580 | eor r3,r3,r11 @ Maj(a,b,c) | ||
1581 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
1582 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
1583 | @ ldr r2,[sp,#12*4] @ 27 | ||
1584 | @ ldr r1,[sp,#9*4] | ||
1585 | mov r0,r2,ror#7 | ||
1586 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
1587 | mov r3,r1,ror#17 | ||
1588 | eor r0,r0,r2,ror#18 | ||
1589 | eor r3,r3,r1,ror#19 | ||
1590 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1591 | ldr r2,[sp,#11*4] | ||
1592 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1593 | ldr r1,[sp,#4*4] | ||
1594 | |||
1595 | add r3,r3,r0 | ||
1596 | eor r0,r5,r5,ror#5 @ from BODY_00_15 | ||
1597 | add r2,r2,r3 | ||
1598 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
1599 | add r2,r2,r1 @ X[i] | ||
1600 | ldr r3,[r14],#4 @ *K256++ | ||
1601 | add r8,r8,r2 @ h+=X[i] | ||
1602 | str r2,[sp,#11*4] | ||
1603 | eor r2,r6,r7 | ||
1604 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
1605 | and r2,r2,r5 | ||
1606 | add r8,r8,r3 @ h+=K256[i] | ||
1607 | eor r2,r2,r7 @ Ch(e,f,g) | ||
1608 | eor r0,r9,r9,ror#11 | ||
1609 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
1610 | #if 27==31 | ||
1611 | and r3,r3,#0xff | ||
1612 | cmp r3,#0xf2 @ done? | ||
1613 | #endif | ||
1614 | #if 27<15 | ||
1615 | # if __ARM_ARCH__>=7 | ||
1616 | ldr r2,[r1],#4 @ prefetch | ||
1617 | # else | ||
1618 | ldrb r2,[r1,#3] | ||
1619 | # endif | ||
1620 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1621 | #else | ||
1622 | ldr r2,[sp,#13*4] @ from future BODY_16_xx | ||
1623 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1624 | ldr r1,[sp,#10*4] @ from future BODY_16_xx | ||
1625 | #endif | ||
1626 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
1627 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1628 | add r4,r4,r8 @ d+=h | ||
1629 | eor r12,r12,r10 @ Maj(a,b,c) | ||
1630 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
1631 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
1632 | @ ldr r2,[sp,#13*4] @ 28 | ||
1633 | @ ldr r1,[sp,#10*4] | ||
1634 | mov r0,r2,ror#7 | ||
1635 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
1636 | mov r12,r1,ror#17 | ||
1637 | eor r0,r0,r2,ror#18 | ||
1638 | eor r12,r12,r1,ror#19 | ||
1639 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1640 | ldr r2,[sp,#12*4] | ||
1641 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1642 | ldr r1,[sp,#5*4] | ||
1643 | |||
1644 | add r12,r12,r0 | ||
1645 | eor r0,r4,r4,ror#5 @ from BODY_00_15 | ||
1646 | add r2,r2,r12 | ||
1647 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
1648 | add r2,r2,r1 @ X[i] | ||
1649 | ldr r12,[r14],#4 @ *K256++ | ||
1650 | add r7,r7,r2 @ h+=X[i] | ||
1651 | str r2,[sp,#12*4] | ||
1652 | eor r2,r5,r6 | ||
1653 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
1654 | and r2,r2,r4 | ||
1655 | add r7,r7,r12 @ h+=K256[i] | ||
1656 | eor r2,r2,r6 @ Ch(e,f,g) | ||
1657 | eor r0,r8,r8,ror#11 | ||
1658 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
1659 | #if 28==31 | ||
1660 | and r12,r12,#0xff | ||
1661 | cmp r12,#0xf2 @ done? | ||
1662 | #endif | ||
1663 | #if 28<15 | ||
1664 | # if __ARM_ARCH__>=7 | ||
1665 | ldr r2,[r1],#4 @ prefetch | ||
1666 | # else | ||
1667 | ldrb r2,[r1,#3] | ||
1668 | # endif | ||
1669 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1670 | #else | ||
1671 | ldr r2,[sp,#14*4] @ from future BODY_16_xx | ||
1672 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1673 | ldr r1,[sp,#11*4] @ from future BODY_16_xx | ||
1674 | #endif | ||
1675 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
1676 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1677 | add r11,r11,r7 @ d+=h | ||
1678 | eor r3,r3,r9 @ Maj(a,b,c) | ||
1679 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
1680 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
1681 | @ ldr r2,[sp,#14*4] @ 29 | ||
1682 | @ ldr r1,[sp,#11*4] | ||
1683 | mov r0,r2,ror#7 | ||
1684 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
1685 | mov r3,r1,ror#17 | ||
1686 | eor r0,r0,r2,ror#18 | ||
1687 | eor r3,r3,r1,ror#19 | ||
1688 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1689 | ldr r2,[sp,#13*4] | ||
1690 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1691 | ldr r1,[sp,#6*4] | ||
1692 | |||
1693 | add r3,r3,r0 | ||
1694 | eor r0,r11,r11,ror#5 @ from BODY_00_15 | ||
1695 | add r2,r2,r3 | ||
1696 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
1697 | add r2,r2,r1 @ X[i] | ||
1698 | ldr r3,[r14],#4 @ *K256++ | ||
1699 | add r6,r6,r2 @ h+=X[i] | ||
1700 | str r2,[sp,#13*4] | ||
1701 | eor r2,r4,r5 | ||
1702 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
1703 | and r2,r2,r11 | ||
1704 | add r6,r6,r3 @ h+=K256[i] | ||
1705 | eor r2,r2,r5 @ Ch(e,f,g) | ||
1706 | eor r0,r7,r7,ror#11 | ||
1707 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
1708 | #if 29==31 | ||
1709 | and r3,r3,#0xff | ||
1710 | cmp r3,#0xf2 @ done? | ||
1711 | #endif | ||
1712 | #if 29<15 | ||
1713 | # if __ARM_ARCH__>=7 | ||
1714 | ldr r2,[r1],#4 @ prefetch | ||
1715 | # else | ||
1716 | ldrb r2,[r1,#3] | ||
1717 | # endif | ||
1718 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1719 | #else | ||
1720 | ldr r2,[sp,#15*4] @ from future BODY_16_xx | ||
1721 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1722 | ldr r1,[sp,#12*4] @ from future BODY_16_xx | ||
1723 | #endif | ||
1724 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
1725 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1726 | add r10,r10,r6 @ d+=h | ||
1727 | eor r12,r12,r8 @ Maj(a,b,c) | ||
1728 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
1729 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
1730 | @ ldr r2,[sp,#15*4] @ 30 | ||
1731 | @ ldr r1,[sp,#12*4] | ||
1732 | mov r0,r2,ror#7 | ||
1733 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
1734 | mov r12,r1,ror#17 | ||
1735 | eor r0,r0,r2,ror#18 | ||
1736 | eor r12,r12,r1,ror#19 | ||
1737 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1738 | ldr r2,[sp,#14*4] | ||
1739 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1740 | ldr r1,[sp,#7*4] | ||
1741 | |||
1742 | add r12,r12,r0 | ||
1743 | eor r0,r10,r10,ror#5 @ from BODY_00_15 | ||
1744 | add r2,r2,r12 | ||
1745 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
1746 | add r2,r2,r1 @ X[i] | ||
1747 | ldr r12,[r14],#4 @ *K256++ | ||
1748 | add r5,r5,r2 @ h+=X[i] | ||
1749 | str r2,[sp,#14*4] | ||
1750 | eor r2,r11,r4 | ||
1751 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
1752 | and r2,r2,r10 | ||
1753 | add r5,r5,r12 @ h+=K256[i] | ||
1754 | eor r2,r2,r4 @ Ch(e,f,g) | ||
1755 | eor r0,r6,r6,ror#11 | ||
1756 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
1757 | #if 30==31 | ||
1758 | and r12,r12,#0xff | ||
1759 | cmp r12,#0xf2 @ done? | ||
1760 | #endif | ||
1761 | #if 30<15 | ||
1762 | # if __ARM_ARCH__>=7 | ||
1763 | ldr r2,[r1],#4 @ prefetch | ||
1764 | # else | ||
1765 | ldrb r2,[r1,#3] | ||
1766 | # endif | ||
1767 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1768 | #else | ||
1769 | ldr r2,[sp,#0*4] @ from future BODY_16_xx | ||
1770 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1771 | ldr r1,[sp,#13*4] @ from future BODY_16_xx | ||
1772 | #endif | ||
1773 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
1774 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1775 | add r9,r9,r5 @ d+=h | ||
1776 | eor r3,r3,r7 @ Maj(a,b,c) | ||
1777 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
1778 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
1779 | @ ldr r2,[sp,#0*4] @ 31 | ||
1780 | @ ldr r1,[sp,#13*4] | ||
1781 | mov r0,r2,ror#7 | ||
1782 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
1783 | mov r3,r1,ror#17 | ||
1784 | eor r0,r0,r2,ror#18 | ||
1785 | eor r3,r3,r1,ror#19 | ||
1786 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1787 | ldr r2,[sp,#15*4] | ||
1788 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1789 | ldr r1,[sp,#8*4] | ||
1790 | |||
1791 | add r3,r3,r0 | ||
1792 | eor r0,r9,r9,ror#5 @ from BODY_00_15 | ||
1793 | add r2,r2,r3 | ||
1794 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1795 | add r2,r2,r1 @ X[i] | ||
1796 | ldr r3,[r14],#4 @ *K256++ | ||
1797 | add r4,r4,r2 @ h+=X[i] | ||
1798 | str r2,[sp,#15*4] | ||
1799 | eor r2,r10,r11 | ||
1800 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1801 | and r2,r2,r9 | ||
1802 | add r4,r4,r3 @ h+=K256[i] | ||
1803 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1804 | eor r0,r5,r5,ror#11 | ||
1805 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1806 | #if 31==31 | ||
1807 | and r3,r3,#0xff | ||
1808 | cmp r3,#0xf2 @ done? | ||
1809 | #endif | ||
1810 | #if 31<15 | ||
1811 | # if __ARM_ARCH__>=7 | ||
1812 | ldr r2,[r1],#4 @ prefetch | ||
1813 | # else | ||
1814 | ldrb r2,[r1,#3] | ||
1815 | # endif | ||
1816 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1817 | #else | ||
1818 | ldr r2,[sp,#1*4] @ from future BODY_16_xx | ||
1819 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1820 | ldr r1,[sp,#14*4] @ from future BODY_16_xx | ||
1821 | #endif | ||
1822 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1823 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1824 | add r8,r8,r4 @ d+=h | ||
1825 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1826 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1827 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1828 | #if __ARM_ARCH__>=7 | ||
1829 | ite eq @ Thumb2 thing, sanity check in ARM | ||
1830 | #endif | ||
1831 | ldreq r3,[sp,#16*4] @ pull ctx | ||
1832 | bne .Lrounds_16_xx | ||
1833 | |||
1834 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1835 | ldr r0,[r3,#0] | ||
1836 | ldr r2,[r3,#4] | ||
1837 | ldr r12,[r3,#8] | ||
1838 | add r4,r4,r0 | ||
1839 | ldr r0,[r3,#12] | ||
1840 | add r5,r5,r2 | ||
1841 | ldr r2,[r3,#16] | ||
1842 | add r6,r6,r12 | ||
1843 | ldr r12,[r3,#20] | ||
1844 | add r7,r7,r0 | ||
1845 | ldr r0,[r3,#24] | ||
1846 | add r8,r8,r2 | ||
1847 | ldr r2,[r3,#28] | ||
1848 | add r9,r9,r12 | ||
1849 | ldr r1,[sp,#17*4] @ pull inp | ||
1850 | ldr r12,[sp,#18*4] @ pull inp+len | ||
1851 | add r10,r10,r0 | ||
1852 | add r11,r11,r2 | ||
1853 | stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} | ||
1854 | cmp r1,r12 | ||
1855 | sub r14,r14,#256 @ rewind Ktbl | ||
1856 | bne .Loop | ||
1857 | |||
1858 | add sp,sp,#19*4 @ destroy frame | ||
1859 | #if __ARM_ARCH__>=5 | ||
1860 | ldmia sp!,{r4-r11,pc} | ||
1861 | #else | ||
1862 | ldmia sp!,{r4-r11,lr} | ||
1863 | tst lr,#1 | ||
1864 | moveq pc,lr @ be binary compatible with V4, yet | ||
1865 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
1866 | #endif | ||
1867 | .size sha256_block_data_order,.-sha256_block_data_order | ||
1868 | #if __ARM_MAX_ARCH__>=7 | ||
1869 | .arch armv7-a | ||
1870 | .fpu neon | ||
1871 | |||
1872 | .global sha256_block_data_order_neon | ||
1873 | .type sha256_block_data_order_neon,%function | ||
1874 | .align 4 | ||
1875 | sha256_block_data_order_neon: | ||
1876 | .LNEON: | ||
1877 | stmdb sp!,{r4-r12,lr} | ||
1878 | |||
1879 | sub r11,sp,#16*4+16 | ||
1880 | adrl r14,K256 | ||
1881 | bic r11,r11,#15 @ align for 128-bit stores | ||
1882 | mov r12,sp | ||
1883 | mov sp,r11 @ alloca | ||
1884 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
1885 | |||
1886 | vld1.8 {q0},[r1]! | ||
1887 | vld1.8 {q1},[r1]! | ||
1888 | vld1.8 {q2},[r1]! | ||
1889 | vld1.8 {q3},[r1]! | ||
1890 | vld1.32 {q8},[r14,:128]! | ||
1891 | vld1.32 {q9},[r14,:128]! | ||
1892 | vld1.32 {q10},[r14,:128]! | ||
1893 | vld1.32 {q11},[r14,:128]! | ||
1894 | vrev32.8 q0,q0 @ yes, even on | ||
1895 | str r0,[sp,#64] | ||
1896 | vrev32.8 q1,q1 @ big-endian | ||
1897 | str r1,[sp,#68] | ||
1898 | mov r1,sp | ||
1899 | vrev32.8 q2,q2 | ||
1900 | str r2,[sp,#72] | ||
1901 | vrev32.8 q3,q3 | ||
1902 | str r12,[sp,#76] @ save original sp | ||
1903 | vadd.i32 q8,q8,q0 | ||
1904 | vadd.i32 q9,q9,q1 | ||
1905 | vst1.32 {q8},[r1,:128]! | ||
1906 | vadd.i32 q10,q10,q2 | ||
1907 | vst1.32 {q9},[r1,:128]! | ||
1908 | vadd.i32 q11,q11,q3 | ||
1909 | vst1.32 {q10},[r1,:128]! | ||
1910 | vst1.32 {q11},[r1,:128]! | ||
1911 | |||
1912 | ldmia r0,{r4-r11} | ||
1913 | sub r1,r1,#64 | ||
1914 | ldr r2,[sp,#0] | ||
1915 | eor r12,r12,r12 | ||
1916 | eor r3,r5,r6 | ||
1917 | b .L_00_48 | ||
1918 | |||
1919 | .align 4 | ||
1920 | .L_00_48: | ||
1921 | vext.8 q8,q0,q1,#4 | ||
1922 | add r11,r11,r2 | ||
1923 | eor r2,r9,r10 | ||
1924 | eor r0,r8,r8,ror#5 | ||
1925 | vext.8 q9,q2,q3,#4 | ||
1926 | add r4,r4,r12 | ||
1927 | and r2,r2,r8 | ||
1928 | eor r12,r0,r8,ror#19 | ||
1929 | vshr.u32 q10,q8,#7 | ||
1930 | eor r0,r4,r4,ror#11 | ||
1931 | eor r2,r2,r10 | ||
1932 | vadd.i32 q0,q0,q9 | ||
1933 | add r11,r11,r12,ror#6 | ||
1934 | eor r12,r4,r5 | ||
1935 | vshr.u32 q9,q8,#3 | ||
1936 | eor r0,r0,r4,ror#20 | ||
1937 | add r11,r11,r2 | ||
1938 | vsli.32 q10,q8,#25 | ||
1939 | ldr r2,[sp,#4] | ||
1940 | and r3,r3,r12 | ||
1941 | vshr.u32 q11,q8,#18 | ||
1942 | add r7,r7,r11 | ||
1943 | add r11,r11,r0,ror#2 | ||
1944 | eor r3,r3,r5 | ||
1945 | veor q9,q9,q10 | ||
1946 | add r10,r10,r2 | ||
1947 | vsli.32 q11,q8,#14 | ||
1948 | eor r2,r8,r9 | ||
1949 | eor r0,r7,r7,ror#5 | ||
1950 | vshr.u32 d24,d7,#17 | ||
1951 | add r11,r11,r3 | ||
1952 | and r2,r2,r7 | ||
1953 | veor q9,q9,q11 | ||
1954 | eor r3,r0,r7,ror#19 | ||
1955 | eor r0,r11,r11,ror#11 | ||
1956 | vsli.32 d24,d7,#15 | ||
1957 | eor r2,r2,r9 | ||
1958 | add r10,r10,r3,ror#6 | ||
1959 | vshr.u32 d25,d7,#10 | ||
1960 | eor r3,r11,r4 | ||
1961 | eor r0,r0,r11,ror#20 | ||
1962 | vadd.i32 q0,q0,q9 | ||
1963 | add r10,r10,r2 | ||
1964 | ldr r2,[sp,#8] | ||
1965 | veor d25,d25,d24 | ||
1966 | and r12,r12,r3 | ||
1967 | add r6,r6,r10 | ||
1968 | vshr.u32 d24,d7,#19 | ||
1969 | add r10,r10,r0,ror#2 | ||
1970 | eor r12,r12,r4 | ||
1971 | vsli.32 d24,d7,#13 | ||
1972 | add r9,r9,r2 | ||
1973 | eor r2,r7,r8 | ||
1974 | veor d25,d25,d24 | ||
1975 | eor r0,r6,r6,ror#5 | ||
1976 | add r10,r10,r12 | ||
1977 | vadd.i32 d0,d0,d25 | ||
1978 | and r2,r2,r6 | ||
1979 | eor r12,r0,r6,ror#19 | ||
1980 | vshr.u32 d24,d0,#17 | ||
1981 | eor r0,r10,r10,ror#11 | ||
1982 | eor r2,r2,r8 | ||
1983 | vsli.32 d24,d0,#15 | ||
1984 | add r9,r9,r12,ror#6 | ||
1985 | eor r12,r10,r11 | ||
1986 | vshr.u32 d25,d0,#10 | ||
1987 | eor r0,r0,r10,ror#20 | ||
1988 | add r9,r9,r2 | ||
1989 | veor d25,d25,d24 | ||
1990 | ldr r2,[sp,#12] | ||
1991 | and r3,r3,r12 | ||
1992 | vshr.u32 d24,d0,#19 | ||
1993 | add r5,r5,r9 | ||
1994 | add r9,r9,r0,ror#2 | ||
1995 | eor r3,r3,r11 | ||
1996 | vld1.32 {q8},[r14,:128]! | ||
1997 | add r8,r8,r2 | ||
1998 | vsli.32 d24,d0,#13 | ||
1999 | eor r2,r6,r7 | ||
2000 | eor r0,r5,r5,ror#5 | ||
2001 | veor d25,d25,d24 | ||
2002 | add r9,r9,r3 | ||
2003 | and r2,r2,r5 | ||
2004 | vadd.i32 d1,d1,d25 | ||
2005 | eor r3,r0,r5,ror#19 | ||
2006 | eor r0,r9,r9,ror#11 | ||
2007 | vadd.i32 q8,q8,q0 | ||
2008 | eor r2,r2,r7 | ||
2009 | add r8,r8,r3,ror#6 | ||
2010 | eor r3,r9,r10 | ||
2011 | eor r0,r0,r9,ror#20 | ||
2012 | add r8,r8,r2 | ||
2013 | ldr r2,[sp,#16] | ||
2014 | and r12,r12,r3 | ||
2015 | add r4,r4,r8 | ||
2016 | vst1.32 {q8},[r1,:128]! | ||
2017 | add r8,r8,r0,ror#2 | ||
2018 | eor r12,r12,r10 | ||
2019 | vext.8 q8,q1,q2,#4 | ||
2020 | add r7,r7,r2 | ||
2021 | eor r2,r5,r6 | ||
2022 | eor r0,r4,r4,ror#5 | ||
2023 | vext.8 q9,q3,q0,#4 | ||
2024 | add r8,r8,r12 | ||
2025 | and r2,r2,r4 | ||
2026 | eor r12,r0,r4,ror#19 | ||
2027 | vshr.u32 q10,q8,#7 | ||
2028 | eor r0,r8,r8,ror#11 | ||
2029 | eor r2,r2,r6 | ||
2030 | vadd.i32 q1,q1,q9 | ||
2031 | add r7,r7,r12,ror#6 | ||
2032 | eor r12,r8,r9 | ||
2033 | vshr.u32 q9,q8,#3 | ||
2034 | eor r0,r0,r8,ror#20 | ||
2035 | add r7,r7,r2 | ||
2036 | vsli.32 q10,q8,#25 | ||
2037 | ldr r2,[sp,#20] | ||
2038 | and r3,r3,r12 | ||
2039 | vshr.u32 q11,q8,#18 | ||
2040 | add r11,r11,r7 | ||
2041 | add r7,r7,r0,ror#2 | ||
2042 | eor r3,r3,r9 | ||
2043 | veor q9,q9,q10 | ||
2044 | add r6,r6,r2 | ||
2045 | vsli.32 q11,q8,#14 | ||
2046 | eor r2,r4,r5 | ||
2047 | eor r0,r11,r11,ror#5 | ||
2048 | vshr.u32 d24,d1,#17 | ||
2049 | add r7,r7,r3 | ||
2050 | and r2,r2,r11 | ||
2051 | veor q9,q9,q11 | ||
2052 | eor r3,r0,r11,ror#19 | ||
2053 | eor r0,r7,r7,ror#11 | ||
2054 | vsli.32 d24,d1,#15 | ||
2055 | eor r2,r2,r5 | ||
2056 | add r6,r6,r3,ror#6 | ||
2057 | vshr.u32 d25,d1,#10 | ||
2058 | eor r3,r7,r8 | ||
2059 | eor r0,r0,r7,ror#20 | ||
2060 | vadd.i32 q1,q1,q9 | ||
2061 | add r6,r6,r2 | ||
2062 | ldr r2,[sp,#24] | ||
2063 | veor d25,d25,d24 | ||
2064 | and r12,r12,r3 | ||
2065 | add r10,r10,r6 | ||
2066 | vshr.u32 d24,d1,#19 | ||
2067 | add r6,r6,r0,ror#2 | ||
2068 | eor r12,r12,r8 | ||
2069 | vsli.32 d24,d1,#13 | ||
2070 | add r5,r5,r2 | ||
2071 | eor r2,r11,r4 | ||
2072 | veor d25,d25,d24 | ||
2073 | eor r0,r10,r10,ror#5 | ||
2074 | add r6,r6,r12 | ||
2075 | vadd.i32 d2,d2,d25 | ||
2076 | and r2,r2,r10 | ||
2077 | eor r12,r0,r10,ror#19 | ||
2078 | vshr.u32 d24,d2,#17 | ||
2079 | eor r0,r6,r6,ror#11 | ||
2080 | eor r2,r2,r4 | ||
2081 | vsli.32 d24,d2,#15 | ||
2082 | add r5,r5,r12,ror#6 | ||
2083 | eor r12,r6,r7 | ||
2084 | vshr.u32 d25,d2,#10 | ||
2085 | eor r0,r0,r6,ror#20 | ||
2086 | add r5,r5,r2 | ||
2087 | veor d25,d25,d24 | ||
2088 | ldr r2,[sp,#28] | ||
2089 | and r3,r3,r12 | ||
2090 | vshr.u32 d24,d2,#19 | ||
2091 | add r9,r9,r5 | ||
2092 | add r5,r5,r0,ror#2 | ||
2093 | eor r3,r3,r7 | ||
2094 | vld1.32 {q8},[r14,:128]! | ||
2095 | add r4,r4,r2 | ||
2096 | vsli.32 d24,d2,#13 | ||
2097 | eor r2,r10,r11 | ||
2098 | eor r0,r9,r9,ror#5 | ||
2099 | veor d25,d25,d24 | ||
2100 | add r5,r5,r3 | ||
2101 | and r2,r2,r9 | ||
2102 | vadd.i32 d3,d3,d25 | ||
2103 | eor r3,r0,r9,ror#19 | ||
2104 | eor r0,r5,r5,ror#11 | ||
2105 | vadd.i32 q8,q8,q1 | ||
2106 | eor r2,r2,r11 | ||
2107 | add r4,r4,r3,ror#6 | ||
2108 | eor r3,r5,r6 | ||
2109 | eor r0,r0,r5,ror#20 | ||
2110 | add r4,r4,r2 | ||
2111 | ldr r2,[sp,#32] | ||
2112 | and r12,r12,r3 | ||
2113 | add r8,r8,r4 | ||
2114 | vst1.32 {q8},[r1,:128]! | ||
2115 | add r4,r4,r0,ror#2 | ||
2116 | eor r12,r12,r6 | ||
2117 | vext.8 q8,q2,q3,#4 | ||
2118 | add r11,r11,r2 | ||
2119 | eor r2,r9,r10 | ||
2120 | eor r0,r8,r8,ror#5 | ||
2121 | vext.8 q9,q0,q1,#4 | ||
2122 | add r4,r4,r12 | ||
2123 | and r2,r2,r8 | ||
2124 | eor r12,r0,r8,ror#19 | ||
2125 | vshr.u32 q10,q8,#7 | ||
2126 | eor r0,r4,r4,ror#11 | ||
2127 | eor r2,r2,r10 | ||
2128 | vadd.i32 q2,q2,q9 | ||
2129 | add r11,r11,r12,ror#6 | ||
2130 | eor r12,r4,r5 | ||
2131 | vshr.u32 q9,q8,#3 | ||
2132 | eor r0,r0,r4,ror#20 | ||
2133 | add r11,r11,r2 | ||
2134 | vsli.32 q10,q8,#25 | ||
2135 | ldr r2,[sp,#36] | ||
2136 | and r3,r3,r12 | ||
2137 | vshr.u32 q11,q8,#18 | ||
2138 | add r7,r7,r11 | ||
2139 | add r11,r11,r0,ror#2 | ||
2140 | eor r3,r3,r5 | ||
2141 | veor q9,q9,q10 | ||
2142 | add r10,r10,r2 | ||
2143 | vsli.32 q11,q8,#14 | ||
2144 | eor r2,r8,r9 | ||
2145 | eor r0,r7,r7,ror#5 | ||
2146 | vshr.u32 d24,d3,#17 | ||
2147 | add r11,r11,r3 | ||
2148 | and r2,r2,r7 | ||
2149 | veor q9,q9,q11 | ||
2150 | eor r3,r0,r7,ror#19 | ||
2151 | eor r0,r11,r11,ror#11 | ||
2152 | vsli.32 d24,d3,#15 | ||
2153 | eor r2,r2,r9 | ||
2154 | add r10,r10,r3,ror#6 | ||
2155 | vshr.u32 d25,d3,#10 | ||
2156 | eor r3,r11,r4 | ||
2157 | eor r0,r0,r11,ror#20 | ||
2158 | vadd.i32 q2,q2,q9 | ||
2159 | add r10,r10,r2 | ||
2160 | ldr r2,[sp,#40] | ||
2161 | veor d25,d25,d24 | ||
2162 | and r12,r12,r3 | ||
2163 | add r6,r6,r10 | ||
2164 | vshr.u32 d24,d3,#19 | ||
2165 | add r10,r10,r0,ror#2 | ||
2166 | eor r12,r12,r4 | ||
2167 | vsli.32 d24,d3,#13 | ||
2168 | add r9,r9,r2 | ||
2169 | eor r2,r7,r8 | ||
2170 | veor d25,d25,d24 | ||
2171 | eor r0,r6,r6,ror#5 | ||
2172 | add r10,r10,r12 | ||
2173 | vadd.i32 d4,d4,d25 | ||
2174 | and r2,r2,r6 | ||
2175 | eor r12,r0,r6,ror#19 | ||
2176 | vshr.u32 d24,d4,#17 | ||
2177 | eor r0,r10,r10,ror#11 | ||
2178 | eor r2,r2,r8 | ||
2179 | vsli.32 d24,d4,#15 | ||
2180 | add r9,r9,r12,ror#6 | ||
2181 | eor r12,r10,r11 | ||
2182 | vshr.u32 d25,d4,#10 | ||
2183 | eor r0,r0,r10,ror#20 | ||
2184 | add r9,r9,r2 | ||
2185 | veor d25,d25,d24 | ||
2186 | ldr r2,[sp,#44] | ||
2187 | and r3,r3,r12 | ||
2188 | vshr.u32 d24,d4,#19 | ||
2189 | add r5,r5,r9 | ||
2190 | add r9,r9,r0,ror#2 | ||
2191 | eor r3,r3,r11 | ||
2192 | vld1.32 {q8},[r14,:128]! | ||
2193 | add r8,r8,r2 | ||
2194 | vsli.32 d24,d4,#13 | ||
2195 | eor r2,r6,r7 | ||
2196 | eor r0,r5,r5,ror#5 | ||
2197 | veor d25,d25,d24 | ||
2198 | add r9,r9,r3 | ||
2199 | and r2,r2,r5 | ||
2200 | vadd.i32 d5,d5,d25 | ||
2201 | eor r3,r0,r5,ror#19 | ||
2202 | eor r0,r9,r9,ror#11 | ||
2203 | vadd.i32 q8,q8,q2 | ||
2204 | eor r2,r2,r7 | ||
2205 | add r8,r8,r3,ror#6 | ||
2206 | eor r3,r9,r10 | ||
2207 | eor r0,r0,r9,ror#20 | ||
2208 | add r8,r8,r2 | ||
2209 | ldr r2,[sp,#48] | ||
2210 | and r12,r12,r3 | ||
2211 | add r4,r4,r8 | ||
2212 | vst1.32 {q8},[r1,:128]! | ||
2213 | add r8,r8,r0,ror#2 | ||
2214 | eor r12,r12,r10 | ||
2215 | vext.8 q8,q3,q0,#4 | ||
2216 | add r7,r7,r2 | ||
2217 | eor r2,r5,r6 | ||
2218 | eor r0,r4,r4,ror#5 | ||
2219 | vext.8 q9,q1,q2,#4 | ||
2220 | add r8,r8,r12 | ||
2221 | and r2,r2,r4 | ||
2222 | eor r12,r0,r4,ror#19 | ||
2223 | vshr.u32 q10,q8,#7 | ||
2224 | eor r0,r8,r8,ror#11 | ||
2225 | eor r2,r2,r6 | ||
2226 | vadd.i32 q3,q3,q9 | ||
2227 | add r7,r7,r12,ror#6 | ||
2228 | eor r12,r8,r9 | ||
2229 | vshr.u32 q9,q8,#3 | ||
2230 | eor r0,r0,r8,ror#20 | ||
2231 | add r7,r7,r2 | ||
2232 | vsli.32 q10,q8,#25 | ||
2233 | ldr r2,[sp,#52] | ||
2234 | and r3,r3,r12 | ||
2235 | vshr.u32 q11,q8,#18 | ||
2236 | add r11,r11,r7 | ||
2237 | add r7,r7,r0,ror#2 | ||
2238 | eor r3,r3,r9 | ||
2239 | veor q9,q9,q10 | ||
2240 | add r6,r6,r2 | ||
2241 | vsli.32 q11,q8,#14 | ||
2242 | eor r2,r4,r5 | ||
2243 | eor r0,r11,r11,ror#5 | ||
2244 | vshr.u32 d24,d5,#17 | ||
2245 | add r7,r7,r3 | ||
2246 | and r2,r2,r11 | ||
2247 | veor q9,q9,q11 | ||
2248 | eor r3,r0,r11,ror#19 | ||
2249 | eor r0,r7,r7,ror#11 | ||
2250 | vsli.32 d24,d5,#15 | ||
2251 | eor r2,r2,r5 | ||
2252 | add r6,r6,r3,ror#6 | ||
2253 | vshr.u32 d25,d5,#10 | ||
2254 | eor r3,r7,r8 | ||
2255 | eor r0,r0,r7,ror#20 | ||
2256 | vadd.i32 q3,q3,q9 | ||
2257 | add r6,r6,r2 | ||
2258 | ldr r2,[sp,#56] | ||
2259 | veor d25,d25,d24 | ||
2260 | and r12,r12,r3 | ||
2261 | add r10,r10,r6 | ||
2262 | vshr.u32 d24,d5,#19 | ||
2263 | add r6,r6,r0,ror#2 | ||
2264 | eor r12,r12,r8 | ||
2265 | vsli.32 d24,d5,#13 | ||
2266 | add r5,r5,r2 | ||
2267 | eor r2,r11,r4 | ||
2268 | veor d25,d25,d24 | ||
2269 | eor r0,r10,r10,ror#5 | ||
2270 | add r6,r6,r12 | ||
2271 | vadd.i32 d6,d6,d25 | ||
2272 | and r2,r2,r10 | ||
2273 | eor r12,r0,r10,ror#19 | ||
2274 | vshr.u32 d24,d6,#17 | ||
2275 | eor r0,r6,r6,ror#11 | ||
2276 | eor r2,r2,r4 | ||
2277 | vsli.32 d24,d6,#15 | ||
2278 | add r5,r5,r12,ror#6 | ||
2279 | eor r12,r6,r7 | ||
2280 | vshr.u32 d25,d6,#10 | ||
2281 | eor r0,r0,r6,ror#20 | ||
2282 | add r5,r5,r2 | ||
2283 | veor d25,d25,d24 | ||
2284 | ldr r2,[sp,#60] | ||
2285 | and r3,r3,r12 | ||
2286 | vshr.u32 d24,d6,#19 | ||
2287 | add r9,r9,r5 | ||
2288 | add r5,r5,r0,ror#2 | ||
2289 | eor r3,r3,r7 | ||
2290 | vld1.32 {q8},[r14,:128]! | ||
2291 | add r4,r4,r2 | ||
2292 | vsli.32 d24,d6,#13 | ||
2293 | eor r2,r10,r11 | ||
2294 | eor r0,r9,r9,ror#5 | ||
2295 | veor d25,d25,d24 | ||
2296 | add r5,r5,r3 | ||
2297 | and r2,r2,r9 | ||
2298 | vadd.i32 d7,d7,d25 | ||
2299 | eor r3,r0,r9,ror#19 | ||
2300 | eor r0,r5,r5,ror#11 | ||
2301 | vadd.i32 q8,q8,q3 | ||
2302 | eor r2,r2,r11 | ||
2303 | add r4,r4,r3,ror#6 | ||
2304 | eor r3,r5,r6 | ||
2305 | eor r0,r0,r5,ror#20 | ||
2306 | add r4,r4,r2 | ||
2307 | ldr r2,[r14] | ||
2308 | and r12,r12,r3 | ||
2309 | add r8,r8,r4 | ||
2310 | vst1.32 {q8},[r1,:128]! | ||
2311 | add r4,r4,r0,ror#2 | ||
2312 | eor r12,r12,r6 | ||
2313 | teq r2,#0 @ check for K256 terminator | ||
2314 | ldr r2,[sp,#0] | ||
2315 | sub r1,r1,#64 | ||
2316 | bne .L_00_48 | ||
2317 | |||
2318 | ldr r1,[sp,#68] | ||
2319 | ldr r0,[sp,#72] | ||
2320 | sub r14,r14,#256 @ rewind r14 | ||
2321 | teq r1,r0 | ||
2322 | it eq | ||
2323 | subeq r1,r1,#64 @ avoid SEGV | ||
2324 | vld1.8 {q0},[r1]! @ load next input block | ||
2325 | vld1.8 {q1},[r1]! | ||
2326 | vld1.8 {q2},[r1]! | ||
2327 | vld1.8 {q3},[r1]! | ||
2328 | it ne | ||
2329 | strne r1,[sp,#68] | ||
2330 | mov r1,sp | ||
2331 | add r11,r11,r2 | ||
2332 | eor r2,r9,r10 | ||
2333 | eor r0,r8,r8,ror#5 | ||
2334 | add r4,r4,r12 | ||
2335 | vld1.32 {q8},[r14,:128]! | ||
2336 | and r2,r2,r8 | ||
2337 | eor r12,r0,r8,ror#19 | ||
2338 | eor r0,r4,r4,ror#11 | ||
2339 | eor r2,r2,r10 | ||
2340 | vrev32.8 q0,q0 | ||
2341 | add r11,r11,r12,ror#6 | ||
2342 | eor r12,r4,r5 | ||
2343 | eor r0,r0,r4,ror#20 | ||
2344 | add r11,r11,r2 | ||
2345 | vadd.i32 q8,q8,q0 | ||
2346 | ldr r2,[sp,#4] | ||
2347 | and r3,r3,r12 | ||
2348 | add r7,r7,r11 | ||
2349 | add r11,r11,r0,ror#2 | ||
2350 | eor r3,r3,r5 | ||
2351 | add r10,r10,r2 | ||
2352 | eor r2,r8,r9 | ||
2353 | eor r0,r7,r7,ror#5 | ||
2354 | add r11,r11,r3 | ||
2355 | and r2,r2,r7 | ||
2356 | eor r3,r0,r7,ror#19 | ||
2357 | eor r0,r11,r11,ror#11 | ||
2358 | eor r2,r2,r9 | ||
2359 | add r10,r10,r3,ror#6 | ||
2360 | eor r3,r11,r4 | ||
2361 | eor r0,r0,r11,ror#20 | ||
2362 | add r10,r10,r2 | ||
2363 | ldr r2,[sp,#8] | ||
2364 | and r12,r12,r3 | ||
2365 | add r6,r6,r10 | ||
2366 | add r10,r10,r0,ror#2 | ||
2367 | eor r12,r12,r4 | ||
2368 | add r9,r9,r2 | ||
2369 | eor r2,r7,r8 | ||
2370 | eor r0,r6,r6,ror#5 | ||
2371 | add r10,r10,r12 | ||
2372 | and r2,r2,r6 | ||
2373 | eor r12,r0,r6,ror#19 | ||
2374 | eor r0,r10,r10,ror#11 | ||
2375 | eor r2,r2,r8 | ||
2376 | add r9,r9,r12,ror#6 | ||
2377 | eor r12,r10,r11 | ||
2378 | eor r0,r0,r10,ror#20 | ||
2379 | add r9,r9,r2 | ||
2380 | ldr r2,[sp,#12] | ||
2381 | and r3,r3,r12 | ||
2382 | add r5,r5,r9 | ||
2383 | add r9,r9,r0,ror#2 | ||
2384 | eor r3,r3,r11 | ||
2385 | add r8,r8,r2 | ||
2386 | eor r2,r6,r7 | ||
2387 | eor r0,r5,r5,ror#5 | ||
2388 | add r9,r9,r3 | ||
2389 | and r2,r2,r5 | ||
2390 | eor r3,r0,r5,ror#19 | ||
2391 | eor r0,r9,r9,ror#11 | ||
2392 | eor r2,r2,r7 | ||
2393 | add r8,r8,r3,ror#6 | ||
2394 | eor r3,r9,r10 | ||
2395 | eor r0,r0,r9,ror#20 | ||
2396 | add r8,r8,r2 | ||
2397 | ldr r2,[sp,#16] | ||
2398 | and r12,r12,r3 | ||
2399 | add r4,r4,r8 | ||
2400 | add r8,r8,r0,ror#2 | ||
2401 | eor r12,r12,r10 | ||
2402 | vst1.32 {q8},[r1,:128]! | ||
2403 | add r7,r7,r2 | ||
2404 | eor r2,r5,r6 | ||
2405 | eor r0,r4,r4,ror#5 | ||
2406 | add r8,r8,r12 | ||
2407 | vld1.32 {q8},[r14,:128]! | ||
2408 | and r2,r2,r4 | ||
2409 | eor r12,r0,r4,ror#19 | ||
2410 | eor r0,r8,r8,ror#11 | ||
2411 | eor r2,r2,r6 | ||
2412 | vrev32.8 q1,q1 | ||
2413 | add r7,r7,r12,ror#6 | ||
2414 | eor r12,r8,r9 | ||
2415 | eor r0,r0,r8,ror#20 | ||
2416 | add r7,r7,r2 | ||
2417 | vadd.i32 q8,q8,q1 | ||
2418 | ldr r2,[sp,#20] | ||
2419 | and r3,r3,r12 | ||
2420 | add r11,r11,r7 | ||
2421 | add r7,r7,r0,ror#2 | ||
2422 | eor r3,r3,r9 | ||
2423 | add r6,r6,r2 | ||
2424 | eor r2,r4,r5 | ||
2425 | eor r0,r11,r11,ror#5 | ||
2426 | add r7,r7,r3 | ||
2427 | and r2,r2,r11 | ||
2428 | eor r3,r0,r11,ror#19 | ||
2429 | eor r0,r7,r7,ror#11 | ||
2430 | eor r2,r2,r5 | ||
2431 | add r6,r6,r3,ror#6 | ||
2432 | eor r3,r7,r8 | ||
2433 | eor r0,r0,r7,ror#20 | ||
2434 | add r6,r6,r2 | ||
2435 | ldr r2,[sp,#24] | ||
2436 | and r12,r12,r3 | ||
2437 | add r10,r10,r6 | ||
2438 | add r6,r6,r0,ror#2 | ||
2439 | eor r12,r12,r8 | ||
2440 | add r5,r5,r2 | ||
2441 | eor r2,r11,r4 | ||
2442 | eor r0,r10,r10,ror#5 | ||
2443 | add r6,r6,r12 | ||
2444 | and r2,r2,r10 | ||
2445 | eor r12,r0,r10,ror#19 | ||
2446 | eor r0,r6,r6,ror#11 | ||
2447 | eor r2,r2,r4 | ||
2448 | add r5,r5,r12,ror#6 | ||
2449 | eor r12,r6,r7 | ||
2450 | eor r0,r0,r6,ror#20 | ||
2451 | add r5,r5,r2 | ||
2452 | ldr r2,[sp,#28] | ||
2453 | and r3,r3,r12 | ||
2454 | add r9,r9,r5 | ||
2455 | add r5,r5,r0,ror#2 | ||
2456 | eor r3,r3,r7 | ||
2457 | add r4,r4,r2 | ||
2458 | eor r2,r10,r11 | ||
2459 | eor r0,r9,r9,ror#5 | ||
2460 | add r5,r5,r3 | ||
2461 | and r2,r2,r9 | ||
2462 | eor r3,r0,r9,ror#19 | ||
2463 | eor r0,r5,r5,ror#11 | ||
2464 | eor r2,r2,r11 | ||
2465 | add r4,r4,r3,ror#6 | ||
2466 | eor r3,r5,r6 | ||
2467 | eor r0,r0,r5,ror#20 | ||
2468 | add r4,r4,r2 | ||
2469 | ldr r2,[sp,#32] | ||
2470 | and r12,r12,r3 | ||
2471 | add r8,r8,r4 | ||
2472 | add r4,r4,r0,ror#2 | ||
2473 | eor r12,r12,r6 | ||
2474 | vst1.32 {q8},[r1,:128]! | ||
2475 | add r11,r11,r2 | ||
2476 | eor r2,r9,r10 | ||
2477 | eor r0,r8,r8,ror#5 | ||
2478 | add r4,r4,r12 | ||
2479 | vld1.32 {q8},[r14,:128]! | ||
2480 | and r2,r2,r8 | ||
2481 | eor r12,r0,r8,ror#19 | ||
2482 | eor r0,r4,r4,ror#11 | ||
2483 | eor r2,r2,r10 | ||
2484 | vrev32.8 q2,q2 | ||
2485 | add r11,r11,r12,ror#6 | ||
2486 | eor r12,r4,r5 | ||
2487 | eor r0,r0,r4,ror#20 | ||
2488 | add r11,r11,r2 | ||
2489 | vadd.i32 q8,q8,q2 | ||
2490 | ldr r2,[sp,#36] | ||
2491 | and r3,r3,r12 | ||
2492 | add r7,r7,r11 | ||
2493 | add r11,r11,r0,ror#2 | ||
2494 | eor r3,r3,r5 | ||
2495 | add r10,r10,r2 | ||
2496 | eor r2,r8,r9 | ||
2497 | eor r0,r7,r7,ror#5 | ||
2498 | add r11,r11,r3 | ||
2499 | and r2,r2,r7 | ||
2500 | eor r3,r0,r7,ror#19 | ||
2501 | eor r0,r11,r11,ror#11 | ||
2502 | eor r2,r2,r9 | ||
2503 | add r10,r10,r3,ror#6 | ||
2504 | eor r3,r11,r4 | ||
2505 | eor r0,r0,r11,ror#20 | ||
2506 | add r10,r10,r2 | ||
2507 | ldr r2,[sp,#40] | ||
2508 | and r12,r12,r3 | ||
2509 | add r6,r6,r10 | ||
2510 | add r10,r10,r0,ror#2 | ||
2511 | eor r12,r12,r4 | ||
2512 | add r9,r9,r2 | ||
2513 | eor r2,r7,r8 | ||
2514 | eor r0,r6,r6,ror#5 | ||
2515 | add r10,r10,r12 | ||
2516 | and r2,r2,r6 | ||
2517 | eor r12,r0,r6,ror#19 | ||
2518 | eor r0,r10,r10,ror#11 | ||
2519 | eor r2,r2,r8 | ||
2520 | add r9,r9,r12,ror#6 | ||
2521 | eor r12,r10,r11 | ||
2522 | eor r0,r0,r10,ror#20 | ||
2523 | add r9,r9,r2 | ||
2524 | ldr r2,[sp,#44] | ||
2525 | and r3,r3,r12 | ||
2526 | add r5,r5,r9 | ||
2527 | add r9,r9,r0,ror#2 | ||
2528 | eor r3,r3,r11 | ||
2529 | add r8,r8,r2 | ||
2530 | eor r2,r6,r7 | ||
2531 | eor r0,r5,r5,ror#5 | ||
2532 | add r9,r9,r3 | ||
2533 | and r2,r2,r5 | ||
2534 | eor r3,r0,r5,ror#19 | ||
2535 | eor r0,r9,r9,ror#11 | ||
2536 | eor r2,r2,r7 | ||
2537 | add r8,r8,r3,ror#6 | ||
2538 | eor r3,r9,r10 | ||
2539 | eor r0,r0,r9,ror#20 | ||
2540 | add r8,r8,r2 | ||
2541 | ldr r2,[sp,#48] | ||
2542 | and r12,r12,r3 | ||
2543 | add r4,r4,r8 | ||
2544 | add r8,r8,r0,ror#2 | ||
2545 | eor r12,r12,r10 | ||
2546 | vst1.32 {q8},[r1,:128]! | ||
2547 | add r7,r7,r2 | ||
2548 | eor r2,r5,r6 | ||
2549 | eor r0,r4,r4,ror#5 | ||
2550 | add r8,r8,r12 | ||
2551 | vld1.32 {q8},[r14,:128]! | ||
2552 | and r2,r2,r4 | ||
2553 | eor r12,r0,r4,ror#19 | ||
2554 | eor r0,r8,r8,ror#11 | ||
2555 | eor r2,r2,r6 | ||
2556 | vrev32.8 q3,q3 | ||
2557 | add r7,r7,r12,ror#6 | ||
2558 | eor r12,r8,r9 | ||
2559 | eor r0,r0,r8,ror#20 | ||
2560 | add r7,r7,r2 | ||
2561 | vadd.i32 q8,q8,q3 | ||
2562 | ldr r2,[sp,#52] | ||
2563 | and r3,r3,r12 | ||
2564 | add r11,r11,r7 | ||
2565 | add r7,r7,r0,ror#2 | ||
2566 | eor r3,r3,r9 | ||
2567 | add r6,r6,r2 | ||
2568 | eor r2,r4,r5 | ||
2569 | eor r0,r11,r11,ror#5 | ||
2570 | add r7,r7,r3 | ||
2571 | and r2,r2,r11 | ||
2572 | eor r3,r0,r11,ror#19 | ||
2573 | eor r0,r7,r7,ror#11 | ||
2574 | eor r2,r2,r5 | ||
2575 | add r6,r6,r3,ror#6 | ||
2576 | eor r3,r7,r8 | ||
2577 | eor r0,r0,r7,ror#20 | ||
2578 | add r6,r6,r2 | ||
2579 | ldr r2,[sp,#56] | ||
2580 | and r12,r12,r3 | ||
2581 | add r10,r10,r6 | ||
2582 | add r6,r6,r0,ror#2 | ||
2583 | eor r12,r12,r8 | ||
2584 | add r5,r5,r2 | ||
2585 | eor r2,r11,r4 | ||
2586 | eor r0,r10,r10,ror#5 | ||
2587 | add r6,r6,r12 | ||
2588 | and r2,r2,r10 | ||
2589 | eor r12,r0,r10,ror#19 | ||
2590 | eor r0,r6,r6,ror#11 | ||
2591 | eor r2,r2,r4 | ||
2592 | add r5,r5,r12,ror#6 | ||
2593 | eor r12,r6,r7 | ||
2594 | eor r0,r0,r6,ror#20 | ||
2595 | add r5,r5,r2 | ||
2596 | ldr r2,[sp,#60] | ||
2597 | and r3,r3,r12 | ||
2598 | add r9,r9,r5 | ||
2599 | add r5,r5,r0,ror#2 | ||
2600 | eor r3,r3,r7 | ||
2601 | add r4,r4,r2 | ||
2602 | eor r2,r10,r11 | ||
2603 | eor r0,r9,r9,ror#5 | ||
2604 | add r5,r5,r3 | ||
2605 | and r2,r2,r9 | ||
2606 | eor r3,r0,r9,ror#19 | ||
2607 | eor r0,r5,r5,ror#11 | ||
2608 | eor r2,r2,r11 | ||
2609 | add r4,r4,r3,ror#6 | ||
2610 | eor r3,r5,r6 | ||
2611 | eor r0,r0,r5,ror#20 | ||
2612 | add r4,r4,r2 | ||
2613 | ldr r2,[sp,#64] | ||
2614 | and r12,r12,r3 | ||
2615 | add r8,r8,r4 | ||
2616 | add r4,r4,r0,ror#2 | ||
2617 | eor r12,r12,r6 | ||
2618 | vst1.32 {q8},[r1,:128]! | ||
2619 | ldr r0,[r2,#0] | ||
2620 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
2621 | ldr r12,[r2,#4] | ||
2622 | ldr r3,[r2,#8] | ||
2623 | ldr r1,[r2,#12] | ||
2624 | add r4,r4,r0 @ accumulate | ||
2625 | ldr r0,[r2,#16] | ||
2626 | add r5,r5,r12 | ||
2627 | ldr r12,[r2,#20] | ||
2628 | add r6,r6,r3 | ||
2629 | ldr r3,[r2,#24] | ||
2630 | add r7,r7,r1 | ||
2631 | ldr r1,[r2,#28] | ||
2632 | add r8,r8,r0 | ||
2633 | str r4,[r2],#4 | ||
2634 | add r9,r9,r12 | ||
2635 | str r5,[r2],#4 | ||
2636 | add r10,r10,r3 | ||
2637 | str r6,[r2],#4 | ||
2638 | add r11,r11,r1 | ||
2639 | str r7,[r2],#4 | ||
2640 | stmia r2,{r8-r11} | ||
2641 | |||
2642 | ittte ne | ||
2643 | movne r1,sp | ||
2644 | ldrne r2,[sp,#0] | ||
2645 | eorne r12,r12,r12 | ||
2646 | ldreq sp,[sp,#76] @ restore original sp | ||
2647 | itt ne | ||
2648 | eorne r3,r5,r6 | ||
2649 | bne .L_00_48 | ||
2650 | |||
2651 | ldmia sp!,{r4-r12,pc} | ||
2652 | .size sha256_block_data_order_neon,.-sha256_block_data_order_neon | ||
2653 | #endif | ||
2654 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
2655 | |||
2656 | # ifdef __thumb2__ | ||
2657 | # define INST(a,b,c,d) .byte c,d|0xc,a,b | ||
2658 | # else | ||
2659 | # define INST(a,b,c,d) .byte a,b,c,d | ||
2660 | # endif | ||
2661 | |||
2662 | .type sha256_block_data_order_armv8,%function | ||
2663 | .align 5 | ||
2664 | sha256_block_data_order_armv8: | ||
2665 | .LARMv8: | ||
2666 | vld1.32 {q0,q1},[r0] | ||
2667 | # ifdef __thumb2__ | ||
2668 | adr r3,.LARMv8 | ||
2669 | sub r3,r3,#.LARMv8-K256 | ||
2670 | # else | ||
2671 | adrl r3,K256 | ||
2672 | # endif | ||
2673 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
2674 | |||
2675 | .Loop_v8: | ||
2676 | vld1.8 {q8-q9},[r1]! | ||
2677 | vld1.8 {q10-q11},[r1]! | ||
2678 | vld1.32 {q12},[r3]! | ||
2679 | vrev32.8 q8,q8 | ||
2680 | vrev32.8 q9,q9 | ||
2681 | vrev32.8 q10,q10 | ||
2682 | vrev32.8 q11,q11 | ||
2683 | vmov q14,q0 @ offload | ||
2684 | vmov q15,q1 | ||
2685 | teq r1,r2 | ||
2686 | vld1.32 {q13},[r3]! | ||
2687 | vadd.i32 q12,q12,q8 | ||
2688 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2689 | vmov q2,q0 | ||
2690 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2691 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2692 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2693 | vld1.32 {q12},[r3]! | ||
2694 | vadd.i32 q13,q13,q9 | ||
2695 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2696 | vmov q2,q0 | ||
2697 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2698 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2699 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2700 | vld1.32 {q13},[r3]! | ||
2701 | vadd.i32 q12,q12,q10 | ||
2702 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2703 | vmov q2,q0 | ||
2704 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2705 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2706 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2707 | vld1.32 {q12},[r3]! | ||
2708 | vadd.i32 q13,q13,q11 | ||
2709 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2710 | vmov q2,q0 | ||
2711 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2712 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2713 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2714 | vld1.32 {q13},[r3]! | ||
2715 | vadd.i32 q12,q12,q8 | ||
2716 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2717 | vmov q2,q0 | ||
2718 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2719 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2720 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2721 | vld1.32 {q12},[r3]! | ||
2722 | vadd.i32 q13,q13,q9 | ||
2723 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2724 | vmov q2,q0 | ||
2725 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2726 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2727 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2728 | vld1.32 {q13},[r3]! | ||
2729 | vadd.i32 q12,q12,q10 | ||
2730 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2731 | vmov q2,q0 | ||
2732 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2733 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2734 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2735 | vld1.32 {q12},[r3]! | ||
2736 | vadd.i32 q13,q13,q11 | ||
2737 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2738 | vmov q2,q0 | ||
2739 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2740 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2741 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2742 | vld1.32 {q13},[r3]! | ||
2743 | vadd.i32 q12,q12,q8 | ||
2744 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2745 | vmov q2,q0 | ||
2746 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2747 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2748 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2749 | vld1.32 {q12},[r3]! | ||
2750 | vadd.i32 q13,q13,q9 | ||
2751 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2752 | vmov q2,q0 | ||
2753 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2754 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2755 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2756 | vld1.32 {q13},[r3]! | ||
2757 | vadd.i32 q12,q12,q10 | ||
2758 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2759 | vmov q2,q0 | ||
2760 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2761 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2762 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2763 | vld1.32 {q12},[r3]! | ||
2764 | vadd.i32 q13,q13,q11 | ||
2765 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2766 | vmov q2,q0 | ||
2767 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2768 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2769 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2770 | vld1.32 {q13},[r3]! | ||
2771 | vadd.i32 q12,q12,q8 | ||
2772 | vmov q2,q0 | ||
2773 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2774 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2775 | |||
2776 | vld1.32 {q12},[r3]! | ||
2777 | vadd.i32 q13,q13,q9 | ||
2778 | vmov q2,q0 | ||
2779 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2780 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2781 | |||
2782 | vld1.32 {q13},[r3] | ||
2783 | vadd.i32 q12,q12,q10 | ||
2784 | sub r3,r3,#256-16 @ rewind | ||
2785 | vmov q2,q0 | ||
2786 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2787 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2788 | |||
2789 | vadd.i32 q13,q13,q11 | ||
2790 | vmov q2,q0 | ||
2791 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2792 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2793 | |||
2794 | vadd.i32 q0,q0,q14 | ||
2795 | vadd.i32 q1,q1,q15 | ||
2796 | it ne | ||
2797 | bne .Loop_v8 | ||
2798 | |||
2799 | vst1.32 {q0,q1},[r0] | ||
2800 | |||
2801 | bx lr @ bx lr | ||
2802 | .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 | ||
2803 | #endif | ||
2804 | .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" | ||
2805 | .align 2 | ||
2806 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
2807 | .comm OPENSSL_armcap_P,4,4 | ||
2808 | #endif | ||
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..a84e869ef900 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * Glue code for the SHA256 Secure Hash Algorithm assembly implementation | ||
3 | * using optimized ARM assembler and NEON instructions. | ||
4 | * | ||
5 | * Copyright © 2015 Google Inc. | ||
6 | * | ||
7 | * This file is based on sha256_ssse3_glue.c: | ||
8 | * Copyright (C) 2013 Intel Corporation | ||
9 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <crypto/internal/hash.h> | ||
19 | #include <linux/crypto.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/cryptohash.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <crypto/sha.h> | ||
27 | #include <crypto/sha256_base.h> | ||
28 | #include <asm/simd.h> | ||
29 | #include <asm/neon.h> | ||
30 | |||
31 | #include "sha256_glue.h" | ||
32 | |||
33 | asmlinkage void sha256_block_data_order(u32 *digest, const void *data, | ||
34 | unsigned int num_blks); | ||
35 | |||
36 | int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | ||
39 | /* make sure casting to sha256_block_fn() is safe */ | ||
40 | BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); | ||
41 | |||
42 | return sha256_base_do_update(desc, data, len, | ||
43 | (sha256_block_fn *)sha256_block_data_order); | ||
44 | } | ||
45 | EXPORT_SYMBOL(crypto_sha256_arm_update); | ||
46 | |||
47 | static int sha256_final(struct shash_desc *desc, u8 *out) | ||
48 | { | ||
49 | sha256_base_do_finalize(desc, | ||
50 | (sha256_block_fn *)sha256_block_data_order); | ||
51 | return sha256_base_finish(desc, out); | ||
52 | } | ||
53 | |||
54 | int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, | ||
55 | unsigned int len, u8 *out) | ||
56 | { | ||
57 | sha256_base_do_update(desc, data, len, | ||
58 | (sha256_block_fn *)sha256_block_data_order); | ||
59 | return sha256_final(desc, out); | ||
60 | } | ||
61 | EXPORT_SYMBOL(crypto_sha256_arm_finup); | ||
62 | |||
63 | static struct shash_alg algs[] = { { | ||
64 | .digestsize = SHA256_DIGEST_SIZE, | ||
65 | .init = sha256_base_init, | ||
66 | .update = crypto_sha256_arm_update, | ||
67 | .final = sha256_final, | ||
68 | .finup = crypto_sha256_arm_finup, | ||
69 | .descsize = sizeof(struct sha256_state), | ||
70 | .base = { | ||
71 | .cra_name = "sha256", | ||
72 | .cra_driver_name = "sha256-asm", | ||
73 | .cra_priority = 150, | ||
74 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
75 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
76 | .cra_module = THIS_MODULE, | ||
77 | } | ||
78 | }, { | ||
79 | .digestsize = SHA224_DIGEST_SIZE, | ||
80 | .init = sha224_base_init, | ||
81 | .update = crypto_sha256_arm_update, | ||
82 | .final = sha256_final, | ||
83 | .finup = crypto_sha256_arm_finup, | ||
84 | .descsize = sizeof(struct sha256_state), | ||
85 | .base = { | ||
86 | .cra_name = "sha224", | ||
87 | .cra_driver_name = "sha224-asm", | ||
88 | .cra_priority = 150, | ||
89 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
90 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
91 | .cra_module = THIS_MODULE, | ||
92 | } | ||
93 | } }; | ||
94 | |||
95 | static int __init sha256_mod_init(void) | ||
96 | { | ||
97 | int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
98 | |||
99 | if (res < 0) | ||
100 | return res; | ||
101 | |||
102 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { | ||
103 | res = crypto_register_shashes(sha256_neon_algs, | ||
104 | ARRAY_SIZE(sha256_neon_algs)); | ||
105 | |||
106 | if (res < 0) | ||
107 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
108 | } | ||
109 | |||
110 | return res; | ||
111 | } | ||
112 | |||
113 | static void __exit sha256_mod_fini(void) | ||
114 | { | ||
115 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
116 | |||
117 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) | ||
118 | crypto_unregister_shashes(sha256_neon_algs, | ||
119 | ARRAY_SIZE(sha256_neon_algs)); | ||
120 | } | ||
121 | |||
122 | module_init(sha256_mod_init); | ||
123 | module_exit(sha256_mod_fini); | ||
124 | |||
125 | MODULE_LICENSE("GPL"); | ||
126 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); | ||
127 | |||
128 | MODULE_ALIAS_CRYPTO("sha256"); | ||
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 000000000000..7cf0bf786ada --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _CRYPTO_SHA256_GLUE_H | ||
2 | #define _CRYPTO_SHA256_GLUE_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | |||
6 | extern struct shash_alg sha256_neon_algs[2]; | ||
7 | |||
8 | int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, | ||
9 | unsigned int len); | ||
10 | |||
11 | int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, | ||
12 | unsigned int len, u8 *hash); | ||
13 | |||
14 | #endif /* _CRYPTO_SHA256_GLUE_H */ | ||
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c new file mode 100644 index 000000000000..39ccd658817e --- /dev/null +++ b/arch/arm/crypto/sha256_neon_glue.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * Glue code for the SHA256 Secure Hash Algorithm assembly implementation | ||
3 | * using NEON instructions. | ||
4 | * | ||
5 | * Copyright © 2015 Google Inc. | ||
6 | * | ||
7 | * This file is based on sha512_neon_glue.c: | ||
8 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <crypto/internal/hash.h> | ||
18 | #include <linux/cryptohash.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <crypto/sha256_base.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #include <asm/simd.h> | ||
25 | #include <asm/neon.h> | ||
26 | |||
27 | #include "sha256_glue.h" | ||
28 | |||
29 | asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, | ||
30 | unsigned int num_blks); | ||
31 | |||
32 | static int sha256_update(struct shash_desc *desc, const u8 *data, | ||
33 | unsigned int len) | ||
34 | { | ||
35 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
36 | |||
37 | if (!may_use_simd() || | ||
38 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
39 | return crypto_sha256_arm_update(desc, data, len); | ||
40 | |||
41 | kernel_neon_begin(); | ||
42 | sha256_base_do_update(desc, data, len, | ||
43 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
44 | kernel_neon_end(); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len, u8 *out) | ||
51 | { | ||
52 | if (!may_use_simd()) | ||
53 | return crypto_sha256_arm_finup(desc, data, len, out); | ||
54 | |||
55 | kernel_neon_begin(); | ||
56 | if (len) | ||
57 | sha256_base_do_update(desc, data, len, | ||
58 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
59 | sha256_base_do_finalize(desc, | ||
60 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
61 | kernel_neon_end(); | ||
62 | |||
63 | return sha256_base_finish(desc, out); | ||
64 | } | ||
65 | |||
66 | static int sha256_final(struct shash_desc *desc, u8 *out) | ||
67 | { | ||
68 | return sha256_finup(desc, NULL, 0, out); | ||
69 | } | ||
70 | |||
71 | struct shash_alg sha256_neon_algs[] = { { | ||
72 | .digestsize = SHA256_DIGEST_SIZE, | ||
73 | .init = sha256_base_init, | ||
74 | .update = sha256_update, | ||
75 | .final = sha256_final, | ||
76 | .finup = sha256_finup, | ||
77 | .descsize = sizeof(struct sha256_state), | ||
78 | .base = { | ||
79 | .cra_name = "sha256", | ||
80 | .cra_driver_name = "sha256-neon", | ||
81 | .cra_priority = 250, | ||
82 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
83 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
84 | .cra_module = THIS_MODULE, | ||
85 | } | ||
86 | }, { | ||
87 | .digestsize = SHA224_DIGEST_SIZE, | ||
88 | .init = sha224_base_init, | ||
89 | .update = sha256_update, | ||
90 | .final = sha256_final, | ||
91 | .finup = sha256_finup, | ||
92 | .descsize = sizeof(struct sha256_state), | ||
93 | .base = { | ||
94 | .cra_name = "sha224", | ||
95 | .cra_driver_name = "sha224-neon", | ||
96 | .cra_priority = 250, | ||
97 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
98 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
99 | .cra_module = THIS_MODULE, | ||
100 | } | ||
101 | } }; | ||
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index b1b5b893eb20..05d9e16c0dfd 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c | |||
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { { | |||
284 | .cra_name = "__ecb-aes-" MODE, | 284 | .cra_name = "__ecb-aes-" MODE, |
285 | .cra_driver_name = "__driver-ecb-aes-" MODE, | 285 | .cra_driver_name = "__driver-ecb-aes-" MODE, |
286 | .cra_priority = 0, | 286 | .cra_priority = 0, |
287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
288 | CRYPTO_ALG_INTERNAL, | ||
288 | .cra_blocksize = AES_BLOCK_SIZE, | 289 | .cra_blocksize = AES_BLOCK_SIZE, |
289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 290 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
290 | .cra_alignmask = 7, | 291 | .cra_alignmask = 7, |
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { { | |||
302 | .cra_name = "__cbc-aes-" MODE, | 303 | .cra_name = "__cbc-aes-" MODE, |
303 | .cra_driver_name = "__driver-cbc-aes-" MODE, | 304 | .cra_driver_name = "__driver-cbc-aes-" MODE, |
304 | .cra_priority = 0, | 305 | .cra_priority = 0, |
305 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 306 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
307 | CRYPTO_ALG_INTERNAL, | ||
306 | .cra_blocksize = AES_BLOCK_SIZE, | 308 | .cra_blocksize = AES_BLOCK_SIZE, |
307 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 309 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
308 | .cra_alignmask = 7, | 310 | .cra_alignmask = 7, |
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { { | |||
320 | .cra_name = "__ctr-aes-" MODE, | 322 | .cra_name = "__ctr-aes-" MODE, |
321 | .cra_driver_name = "__driver-ctr-aes-" MODE, | 323 | .cra_driver_name = "__driver-ctr-aes-" MODE, |
322 | .cra_priority = 0, | 324 | .cra_priority = 0, |
323 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 325 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
326 | CRYPTO_ALG_INTERNAL, | ||
324 | .cra_blocksize = 1, | 327 | .cra_blocksize = 1, |
325 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 328 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
326 | .cra_alignmask = 7, | 329 | .cra_alignmask = 7, |
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { { | |||
338 | .cra_name = "__xts-aes-" MODE, | 341 | .cra_name = "__xts-aes-" MODE, |
339 | .cra_driver_name = "__driver-xts-aes-" MODE, | 342 | .cra_driver_name = "__driver-xts-aes-" MODE, |
340 | .cra_priority = 0, | 343 | .cra_priority = 0, |
341 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
342 | .cra_blocksize = AES_BLOCK_SIZE, | 346 | .cra_blocksize = AES_BLOCK_SIZE, |
343 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), | 347 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), |
344 | .cra_alignmask = 7, | 348 | .cra_alignmask = 7, |
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..033aae6d732a 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S | |||
@@ -66,8 +66,8 @@ | |||
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | 66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 69 | * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
70 | * u8 *head, long bytes) | 70 | * int blocks) |
71 | */ | 71 | */ |
72 | ENTRY(sha1_ce_transform) | 72 | ENTRY(sha1_ce_transform) |
73 | /* load round constants */ | 73 | /* load round constants */ |
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform) | |||
78 | ld1r {k3.4s}, [x6] | 78 | ld1r {k3.4s}, [x6] |
79 | 79 | ||
80 | /* load state */ | 80 | /* load state */ |
81 | ldr dga, [x2] | 81 | ldr dga, [x0] |
82 | ldr dgb, [x2, #16] | 82 | ldr dgb, [x0, #16] |
83 | 83 | ||
84 | /* load partial state (if supplied) */ | 84 | /* load sha1_ce_state::finalize */ |
85 | cbz x3, 0f | 85 | ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] |
86 | ld1 {v8.4s-v11.4s}, [x3] | ||
87 | b 1f | ||
88 | 86 | ||
89 | /* load input */ | 87 | /* load input */ |
90 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | 88 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 |
91 | sub w0, w0, #1 | 89 | sub w2, w2, #1 |
92 | 90 | ||
93 | 1: | ||
94 | CPU_LE( rev32 v8.16b, v8.16b ) | 91 | CPU_LE( rev32 v8.16b, v8.16b ) |
95 | CPU_LE( rev32 v9.16b, v9.16b ) | 92 | CPU_LE( rev32 v9.16b, v9.16b ) |
96 | CPU_LE( rev32 v10.16b, v10.16b ) | 93 | CPU_LE( rev32 v10.16b, v10.16b ) |
97 | CPU_LE( rev32 v11.16b, v11.16b ) | 94 | CPU_LE( rev32 v11.16b, v11.16b ) |
98 | 95 | ||
99 | 2: add t0.4s, v8.4s, k0.4s | 96 | 1: add t0.4s, v8.4s, k0.4s |
100 | mov dg0v.16b, dgav.16b | 97 | mov dg0v.16b, dgav.16b |
101 | 98 | ||
102 | add_update c, ev, k0, 8, 9, 10, 11, dgb | 99 | add_update c, ev, k0, 8, 9, 10, 11, dgb |
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
127 | add dgbv.2s, dgbv.2s, dg1v.2s | 124 | add dgbv.2s, dgbv.2s, dg1v.2s |
128 | add dgav.4s, dgav.4s, dg0v.4s | 125 | add dgav.4s, dgav.4s, dg0v.4s |
129 | 126 | ||
130 | cbnz w0, 0b | 127 | cbnz w2, 0b |
131 | 128 | ||
132 | /* | 129 | /* |
133 | * Final block: add padding and total bit count. | 130 | * Final block: add padding and total bit count. |
134 | * Skip if we have no total byte count in x4. In that case, the input | 131 | * Skip if the input size was not a round multiple of the block size, |
135 | * size was not a round multiple of the block size, and the padding is | 132 | * the padding is handled by the C code in that case. |
136 | * handled by the C code. | ||
137 | */ | 133 | */ |
138 | cbz x4, 3f | 134 | cbz x4, 3f |
135 | ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] | ||
139 | movi v9.2d, #0 | 136 | movi v9.2d, #0 |
140 | mov x8, #0x80000000 | 137 | mov x8, #0x80000000 |
141 | movi v10.2d, #0 | 138 | movi v10.2d, #0 |
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
144 | mov x4, #0 | 141 | mov x4, #0 |
145 | mov v11.d[0], xzr | 142 | mov v11.d[0], xzr |
146 | mov v11.d[1], x7 | 143 | mov v11.d[1], x7 |
147 | b 2b | 144 | b 1b |
148 | 145 | ||
149 | /* store new state */ | 146 | /* store new state */ |
150 | 3: str dga, [x2] | 147 | 3: str dga, [x0] |
151 | str dgb, [x2, #16] | 148 | str dgb, [x0, #16] |
152 | ret | 149 | ret |
153 | ENDPROC(sha1_ce_transform) | 150 | ENDPROC(sha1_ce_transform) |
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..114e7cc5de8c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c | |||
@@ -12,144 +12,81 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha1_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha1_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha1_state sst; |
29 | u32 finalize; | ||
30 | }; | ||
25 | 31 | ||
26 | static int sha1_init(struct shash_desc *desc) | 32 | asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
27 | { | 33 | int blocks); |
28 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
29 | 34 | ||
30 | *sctx = (struct sha1_state){ | 35 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, |
31 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | 36 | unsigned int len) |
32 | }; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | 37 | { |
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | 38 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | 39 | ||
50 | memcpy(sctx->buffer + partial, data, p); | 40 | sctx->finalize = 0; |
51 | data += p; | 41 | kernel_neon_begin_partial(16); |
52 | len -= p; | 42 | sha1_base_do_update(desc, data, len, |
53 | } | 43 | (sha1_block_fn *)sha1_ce_transform); |
54 | 44 | kernel_neon_end(); | |
55 | blocks = len / SHA1_BLOCK_SIZE; | ||
56 | len %= SHA1_BLOCK_SIZE; | ||
57 | |||
58 | kernel_neon_begin_partial(16); | ||
59 | sha1_ce_transform(blocks, data, sctx->state, | ||
60 | partial ? sctx->buffer : NULL, 0); | ||
61 | kernel_neon_end(); | ||
62 | 45 | ||
63 | data += blocks * SHA1_BLOCK_SIZE; | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | return 0; | 46 | return 0; |
69 | } | 47 | } |
70 | 48 | ||
71 | static int sha1_final(struct shash_desc *desc, u8 *out) | 49 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, |
50 | unsigned int len, u8 *out) | ||
72 | { | 51 | { |
73 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | 52 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
53 | bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); | ||
74 | 54 | ||
75 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | ASM_EXPORT(sha1_ce_offsetof_count, |
76 | __be64 bits = cpu_to_be64(sctx->count << 3); | 56 | offsetof(struct sha1_ce_state, sst.count)); |
77 | __be32 *dst = (__be32 *)out; | 57 | ASM_EXPORT(sha1_ce_offsetof_finalize, |
78 | int i; | 58 | offsetof(struct sha1_ce_state, finalize)); |
79 | |||
80 | u32 padlen = SHA1_BLOCK_SIZE | ||
81 | - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
82 | |||
83 | sha1_update(desc, padding, padlen); | ||
84 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
85 | |||
86 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
87 | put_unaligned_be32(sctx->state[i], dst++); | ||
88 | |||
89 | *sctx = (struct sha1_state){}; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len, u8 *out) | ||
95 | { | ||
96 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | int blocks; | ||
99 | int i; | ||
100 | |||
101 | if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
102 | sha1_update(desc, data, len); | ||
103 | return sha1_final(desc, out); | ||
104 | } | ||
105 | 59 | ||
106 | /* | 60 | /* |
107 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
108 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
109 | * perform the entire digest calculation in a single invocation | ||
110 | * of sha1_ce_transform() | ||
111 | */ | 63 | */ |
112 | blocks = len / SHA1_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
113 | 65 | ||
114 | kernel_neon_begin_partial(16); | 66 | kernel_neon_begin_partial(16); |
115 | sha1_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha1_base_do_update(desc, data, len, |
68 | (sha1_block_fn *)sha1_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); | ||
116 | kernel_neon_end(); | 71 | kernel_neon_end(); |
117 | 72 | return sha1_base_finish(desc, out); | |
118 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
119 | put_unaligned_be32(sctx->state[i], dst++); | ||
120 | |||
121 | *sctx = (struct sha1_state){}; | ||
122 | return 0; | ||
123 | } | 73 | } |
124 | 74 | ||
125 | static int sha1_export(struct shash_desc *desc, void *out) | 75 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) |
126 | { | 76 | { |
127 | struct sha1_state *sctx = shash_desc_ctx(desc); | 77 | kernel_neon_begin_partial(16); |
128 | struct sha1_state *dst = out; | 78 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); |
129 | 79 | kernel_neon_end(); | |
130 | *dst = *sctx; | 80 | return sha1_base_finish(desc, out); |
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
135 | { | ||
136 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
137 | struct sha1_state const *src = in; | ||
138 | |||
139 | *sctx = *src; | ||
140 | return 0; | ||
141 | } | 81 | } |
142 | 82 | ||
143 | static struct shash_alg alg = { | 83 | static struct shash_alg alg = { |
144 | .init = sha1_init, | 84 | .init = sha1_base_init, |
145 | .update = sha1_update, | 85 | .update = sha1_ce_update, |
146 | .final = sha1_final, | 86 | .final = sha1_ce_final, |
147 | .finup = sha1_finup, | 87 | .finup = sha1_ce_finup, |
148 | .export = sha1_export, | 88 | .descsize = sizeof(struct sha1_ce_state), |
149 | .import = sha1_import, | ||
150 | .descsize = sizeof(struct sha1_state), | ||
151 | .digestsize = SHA1_DIGEST_SIZE, | 89 | .digestsize = SHA1_DIGEST_SIZE, |
152 | .statesize = sizeof(struct sha1_state), | ||
153 | .base = { | 90 | .base = { |
154 | .cra_name = "sha1", | 91 | .cra_name = "sha1", |
155 | .cra_driver_name = "sha1-ce", | 92 | .cra_driver_name = "sha1-ce", |
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..5df9d9d470ad 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S | |||
@@ -73,8 +73,8 @@ | |||
73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 76 | * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
77 | * u8 *head, long bytes) | 77 | * int blocks) |
78 | */ | 78 | */ |
79 | ENTRY(sha2_ce_transform) | 79 | ENTRY(sha2_ce_transform) |
80 | /* load round constants */ | 80 | /* load round constants */ |
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform) | |||
85 | ld1 {v12.4s-v15.4s}, [x8] | 85 | ld1 {v12.4s-v15.4s}, [x8] |
86 | 86 | ||
87 | /* load state */ | 87 | /* load state */ |
88 | ldp dga, dgb, [x2] | 88 | ldp dga, dgb, [x0] |
89 | 89 | ||
90 | /* load partial input (if supplied) */ | 90 | /* load sha256_ce_state::finalize */ |
91 | cbz x3, 0f | 91 | ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] |
92 | ld1 {v16.4s-v19.4s}, [x3] | ||
93 | b 1f | ||
94 | 92 | ||
95 | /* load input */ | 93 | /* load input */ |
96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | 94 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 |
97 | sub w0, w0, #1 | 95 | sub w2, w2, #1 |
98 | 96 | ||
99 | 1: | ||
100 | CPU_LE( rev32 v16.16b, v16.16b ) | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
101 | CPU_LE( rev32 v17.16b, v17.16b ) | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
102 | CPU_LE( rev32 v18.16b, v18.16b ) | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
103 | CPU_LE( rev32 v19.16b, v19.16b ) | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
104 | 101 | ||
105 | 2: add t0.4s, v16.4s, v0.4s | 102 | 1: add t0.4s, v16.4s, v0.4s |
106 | mov dg0v.16b, dgav.16b | 103 | mov dg0v.16b, dgav.16b |
107 | mov dg1v.16b, dgbv.16b | 104 | mov dg1v.16b, dgbv.16b |
108 | 105 | ||
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
131 | add dgbv.4s, dgbv.4s, dg1v.4s | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
132 | 129 | ||
133 | /* handled all input blocks? */ | 130 | /* handled all input blocks? */ |
134 | cbnz w0, 0b | 131 | cbnz w2, 0b |
135 | 132 | ||
136 | /* | 133 | /* |
137 | * Final block: add padding and total bit count. | 134 | * Final block: add padding and total bit count. |
138 | * Skip if we have no total byte count in x4. In that case, the input | 135 | * Skip if the input size was not a round multiple of the block size, |
139 | * size was not a round multiple of the block size, and the padding is | 136 | * the padding is handled by the C code in that case. |
140 | * handled by the C code. | ||
141 | */ | 137 | */ |
142 | cbz x4, 3f | 138 | cbz x4, 3f |
139 | ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] | ||
143 | movi v17.2d, #0 | 140 | movi v17.2d, #0 |
144 | mov x8, #0x80000000 | 141 | mov x8, #0x80000000 |
145 | movi v18.2d, #0 | 142 | movi v18.2d, #0 |
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
148 | mov x4, #0 | 145 | mov x4, #0 |
149 | mov v19.d[0], xzr | 146 | mov v19.d[0], xzr |
150 | mov v19.d[1], x7 | 147 | mov v19.d[1], x7 |
151 | b 2b | 148 | b 1b |
152 | 149 | ||
153 | /* store new state */ | 150 | /* store new state */ |
154 | 3: stp dga, dgb, [x2] | 151 | 3: stp dga, dgb, [x0] |
155 | ret | 152 | ret |
156 | ENDPROC(sha2_ce_transform) | 153 | ENDPROC(sha2_ce_transform) |
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..1340e44c048b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c | |||
@@ -12,206 +12,82 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha256_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha256_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha256_state sst; |
25 | 29 | u32 finalize; | |
26 | static int sha224_init(struct shash_desc *desc) | 30 | }; |
27 | { | ||
28 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
29 | |||
30 | *sctx = (struct sha256_state){ | ||
31 | .state = { | ||
32 | SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, | ||
33 | SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, | ||
34 | } | ||
35 | }; | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int sha256_init(struct shash_desc *desc) | ||
40 | { | ||
41 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
42 | |||
43 | *sctx = (struct sha256_state){ | ||
44 | .state = { | ||
45 | SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, | ||
46 | SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, | ||
47 | } | ||
48 | }; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int sha2_update(struct shash_desc *desc, const u8 *data, | ||
53 | unsigned int len) | ||
54 | { | ||
55 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
56 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
57 | |||
58 | sctx->count += len; | ||
59 | |||
60 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
61 | int blocks; | ||
62 | |||
63 | if (partial) { | ||
64 | int p = SHA256_BLOCK_SIZE - partial; | ||
65 | |||
66 | memcpy(sctx->buf + partial, data, p); | ||
67 | data += p; | ||
68 | len -= p; | ||
69 | } | ||
70 | 31 | ||
71 | blocks = len / SHA256_BLOCK_SIZE; | 32 | asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
72 | len %= SHA256_BLOCK_SIZE; | 33 | int blocks); |
73 | 34 | ||
74 | kernel_neon_begin_partial(28); | 35 | static int sha256_ce_update(struct shash_desc *desc, const u8 *data, |
75 | sha2_ce_transform(blocks, data, sctx->state, | 36 | unsigned int len) |
76 | partial ? sctx->buf : NULL, 0); | ||
77 | kernel_neon_end(); | ||
78 | |||
79 | data += blocks * SHA256_BLOCK_SIZE; | ||
80 | partial = 0; | ||
81 | } | ||
82 | if (len) | ||
83 | memcpy(sctx->buf + partial, data, len); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static void sha2_final(struct shash_desc *desc) | ||
88 | { | 37 | { |
89 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | 38 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
90 | |||
91 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
92 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
93 | u32 padlen = SHA256_BLOCK_SIZE | ||
94 | - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); | ||
95 | |||
96 | sha2_update(desc, padding, padlen); | ||
97 | sha2_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
98 | } | ||
99 | |||
100 | static int sha224_final(struct shash_desc *desc, u8 *out) | ||
101 | { | ||
102 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
103 | __be32 *dst = (__be32 *)out; | ||
104 | int i; | ||
105 | |||
106 | sha2_final(desc); | ||
107 | |||
108 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
109 | put_unaligned_be32(sctx->state[i], dst++); | ||
110 | |||
111 | *sctx = (struct sha256_state){}; | ||
112 | return 0; | ||
113 | } | ||
114 | 39 | ||
115 | static int sha256_final(struct shash_desc *desc, u8 *out) | 40 | sctx->finalize = 0; |
116 | { | 41 | kernel_neon_begin_partial(28); |
117 | struct sha256_state *sctx = shash_desc_ctx(desc); | 42 | sha256_base_do_update(desc, data, len, |
118 | __be32 *dst = (__be32 *)out; | 43 | (sha256_block_fn *)sha2_ce_transform); |
119 | int i; | 44 | kernel_neon_end(); |
120 | |||
121 | sha2_final(desc); | ||
122 | |||
123 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
124 | put_unaligned_be32(sctx->state[i], dst++); | ||
125 | 45 | ||
126 | *sctx = (struct sha256_state){}; | ||
127 | return 0; | 46 | return 0; |
128 | } | 47 | } |
129 | 48 | ||
130 | static void sha2_finup(struct shash_desc *desc, const u8 *data, | 49 | static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, |
131 | unsigned int len) | 50 | unsigned int len, u8 *out) |
132 | { | 51 | { |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | 52 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
134 | int blocks; | 53 | bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); |
135 | 54 | ||
136 | if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { | 55 | ASM_EXPORT(sha256_ce_offsetof_count, |
137 | sha2_update(desc, data, len); | 56 | offsetof(struct sha256_ce_state, sst.count)); |
138 | sha2_final(desc); | 57 | ASM_EXPORT(sha256_ce_offsetof_finalize, |
139 | return; | 58 | offsetof(struct sha256_ce_state, finalize)); |
140 | } | ||
141 | 59 | ||
142 | /* | 60 | /* |
143 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
144 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
145 | * perform the entire digest calculation in a single invocation | ||
146 | * of sha2_ce_transform() | ||
147 | */ | 63 | */ |
148 | blocks = len / SHA256_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
149 | 65 | ||
150 | kernel_neon_begin_partial(28); | 66 | kernel_neon_begin_partial(28); |
151 | sha2_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha256_base_do_update(desc, data, len, |
68 | (sha256_block_fn *)sha2_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha256_base_do_finalize(desc, | ||
71 | (sha256_block_fn *)sha2_ce_transform); | ||
152 | kernel_neon_end(); | 72 | kernel_neon_end(); |
73 | return sha256_base_finish(desc, out); | ||
153 | } | 74 | } |
154 | 75 | ||
155 | static int sha224_finup(struct shash_desc *desc, const u8 *data, | 76 | static int sha256_ce_final(struct shash_desc *desc, u8 *out) |
156 | unsigned int len, u8 *out) | ||
157 | { | 77 | { |
158 | struct sha256_state *sctx = shash_desc_ctx(desc); | 78 | kernel_neon_begin_partial(28); |
159 | __be32 *dst = (__be32 *)out; | 79 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); |
160 | int i; | 80 | kernel_neon_end(); |
161 | 81 | return sha256_base_finish(desc, out); | |
162 | sha2_finup(desc, data, len); | ||
163 | |||
164 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
165 | put_unaligned_be32(sctx->state[i], dst++); | ||
166 | |||
167 | *sctx = (struct sha256_state){}; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
172 | unsigned int len, u8 *out) | ||
173 | { | ||
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
175 | __be32 *dst = (__be32 *)out; | ||
176 | int i; | ||
177 | |||
178 | sha2_finup(desc, data, len); | ||
179 | |||
180 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
181 | put_unaligned_be32(sctx->state[i], dst++); | ||
182 | |||
183 | *sctx = (struct sha256_state){}; | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int sha2_export(struct shash_desc *desc, void *out) | ||
188 | { | ||
189 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
190 | struct sha256_state *dst = out; | ||
191 | |||
192 | *dst = *sctx; | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static int sha2_import(struct shash_desc *desc, const void *in) | ||
197 | { | ||
198 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
199 | struct sha256_state const *src = in; | ||
200 | |||
201 | *sctx = *src; | ||
202 | return 0; | ||
203 | } | 82 | } |
204 | 83 | ||
205 | static struct shash_alg algs[] = { { | 84 | static struct shash_alg algs[] = { { |
206 | .init = sha224_init, | 85 | .init = sha224_base_init, |
207 | .update = sha2_update, | 86 | .update = sha256_ce_update, |
208 | .final = sha224_final, | 87 | .final = sha256_ce_final, |
209 | .finup = sha224_finup, | 88 | .finup = sha256_ce_finup, |
210 | .export = sha2_export, | 89 | .descsize = sizeof(struct sha256_ce_state), |
211 | .import = sha2_import, | ||
212 | .descsize = sizeof(struct sha256_state), | ||
213 | .digestsize = SHA224_DIGEST_SIZE, | 90 | .digestsize = SHA224_DIGEST_SIZE, |
214 | .statesize = sizeof(struct sha256_state), | ||
215 | .base = { | 91 | .base = { |
216 | .cra_name = "sha224", | 92 | .cra_name = "sha224", |
217 | .cra_driver_name = "sha224-ce", | 93 | .cra_driver_name = "sha224-ce", |
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { { | |||
221 | .cra_module = THIS_MODULE, | 97 | .cra_module = THIS_MODULE, |
222 | } | 98 | } |
223 | }, { | 99 | }, { |
224 | .init = sha256_init, | 100 | .init = sha256_base_init, |
225 | .update = sha2_update, | 101 | .update = sha256_ce_update, |
226 | .final = sha256_final, | 102 | .final = sha256_ce_final, |
227 | .finup = sha256_finup, | 103 | .finup = sha256_ce_finup, |
228 | .export = sha2_export, | 104 | .descsize = sizeof(struct sha256_ce_state), |
229 | .import = sha2_import, | ||
230 | .descsize = sizeof(struct sha256_state), | ||
231 | .digestsize = SHA256_DIGEST_SIZE, | 105 | .digestsize = SHA256_DIGEST_SIZE, |
232 | .statesize = sizeof(struct sha256_state), | ||
233 | .base = { | 106 | .base = { |
234 | .cra_name = "sha256", | 107 | .cra_name = "sha256", |
235 | .cra_driver_name = "sha256-ce", | 108 | .cra_driver_name = "sha256-ce", |
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile index a74f76d85a2f..f7aa9d5d3b87 100644 --- a/arch/mips/cavium-octeon/crypto/Makefile +++ b/arch/mips/cavium-octeon/crypto/Makefile | |||
@@ -4,4 +4,7 @@ | |||
4 | 4 | ||
5 | obj-y += octeon-crypto.o | 5 | obj-y += octeon-crypto.o |
6 | 6 | ||
7 | obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o | 7 | obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o |
8 | obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o | ||
9 | obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o | ||
10 | obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 7c82ff463b65..f66bd1adc7ff 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c | |||
@@ -17,7 +17,7 @@ | |||
17 | * crypto operations in calls to octeon_crypto_enable/disable in order to make | 17 | * crypto operations in calls to octeon_crypto_enable/disable in order to make |
18 | * sure the state of COP2 isn't corrupted if userspace is also performing | 18 | * sure the state of COP2 isn't corrupted if userspace is also performing |
19 | * hardware crypto operations. Allocate the state parameter on the stack. | 19 | * hardware crypto operations. Allocate the state parameter on the stack. |
20 | * Preemption must be disabled to prevent context switches. | 20 | * Returns with preemption disabled. |
21 | * | 21 | * |
22 | * @state: Pointer to state structure to store current COP2 state in. | 22 | * @state: Pointer to state structure to store current COP2 state in. |
23 | * | 23 | * |
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state) | |||
28 | int status; | 28 | int status; |
29 | unsigned long flags; | 29 | unsigned long flags; |
30 | 30 | ||
31 | preempt_disable(); | ||
31 | local_irq_save(flags); | 32 | local_irq_save(flags); |
32 | status = read_c0_status(); | 33 | status = read_c0_status(); |
33 | write_c0_status(status | ST0_CU2); | 34 | write_c0_status(status | ST0_CU2); |
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state, | |||
62 | else | 63 | else |
63 | write_c0_status(read_c0_status() & ~ST0_CU2); | 64 | write_c0_status(read_c0_status() & ~ST0_CU2); |
64 | local_irq_restore(flags); | 65 | local_irq_restore(flags); |
66 | preempt_enable(); | ||
65 | } | 67 | } |
66 | EXPORT_SYMBOL_GPL(octeon_crypto_disable); | 68 | EXPORT_SYMBOL_GPL(octeon_crypto_disable); |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index e2a4aece9c24..355072535110 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h | |||
@@ -5,7 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. | 6 | * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. |
7 | * | 7 | * |
8 | * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>. | 8 | * MD5/SHA1/SHA256/SHA512 instruction definitions added by |
9 | * Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
9 | * | 10 | * |
10 | */ | 11 | */ |
11 | #ifndef __LINUX_OCTEON_CRYPTO_H | 12 | #ifndef __LINUX_OCTEON_CRYPTO_H |
@@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state, | |||
21 | unsigned long flags); | 22 | unsigned long flags); |
22 | 23 | ||
23 | /* | 24 | /* |
24 | * Macros needed to implement MD5: | 25 | * Macros needed to implement MD5/SHA1/SHA256: |
25 | */ | 26 | */ |
26 | 27 | ||
27 | /* | 28 | /* |
28 | * The index can be 0-1. | 29 | * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256). |
29 | */ | 30 | */ |
30 | #define write_octeon_64bit_hash_dword(value, index) \ | 31 | #define write_octeon_64bit_hash_dword(value, index) \ |
31 | do { \ | 32 | do { \ |
@@ -36,7 +37,7 @@ do { \ | |||
36 | } while (0) | 37 | } while (0) |
37 | 38 | ||
38 | /* | 39 | /* |
39 | * The index can be 0-1. | 40 | * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256). |
40 | */ | 41 | */ |
41 | #define read_octeon_64bit_hash_dword(index) \ | 42 | #define read_octeon_64bit_hash_dword(index) \ |
42 | ({ \ | 43 | ({ \ |
@@ -72,4 +73,78 @@ do { \ | |||
72 | : [rt] "d" (value)); \ | 73 | : [rt] "d" (value)); \ |
73 | } while (0) | 74 | } while (0) |
74 | 75 | ||
76 | /* | ||
77 | * The value is the final block dword (64-bit). | ||
78 | */ | ||
79 | #define octeon_sha1_start(value) \ | ||
80 | do { \ | ||
81 | __asm__ __volatile__ ( \ | ||
82 | "dmtc2 %[rt],0x4057" \ | ||
83 | : \ | ||
84 | : [rt] "d" (value)); \ | ||
85 | } while (0) | ||
86 | |||
87 | /* | ||
88 | * The value is the final block dword (64-bit). | ||
89 | */ | ||
90 | #define octeon_sha256_start(value) \ | ||
91 | do { \ | ||
92 | __asm__ __volatile__ ( \ | ||
93 | "dmtc2 %[rt],0x404f" \ | ||
94 | : \ | ||
95 | : [rt] "d" (value)); \ | ||
96 | } while (0) | ||
97 | |||
98 | /* | ||
99 | * Macros needed to implement SHA512: | ||
100 | */ | ||
101 | |||
102 | /* | ||
103 | * The index can be 0-7. | ||
104 | */ | ||
105 | #define write_octeon_64bit_hash_sha512(value, index) \ | ||
106 | do { \ | ||
107 | __asm__ __volatile__ ( \ | ||
108 | "dmtc2 %[rt],0x0250+" STR(index) \ | ||
109 | : \ | ||
110 | : [rt] "d" (value)); \ | ||
111 | } while (0) | ||
112 | |||
113 | /* | ||
114 | * The index can be 0-7. | ||
115 | */ | ||
116 | #define read_octeon_64bit_hash_sha512(index) \ | ||
117 | ({ \ | ||
118 | u64 __value; \ | ||
119 | \ | ||
120 | __asm__ __volatile__ ( \ | ||
121 | "dmfc2 %[rt],0x0250+" STR(index) \ | ||
122 | : [rt] "=d" (__value) \ | ||
123 | : ); \ | ||
124 | \ | ||
125 | __value; \ | ||
126 | }) | ||
127 | |||
128 | /* | ||
129 | * The index can be 0-14. | ||
130 | */ | ||
131 | #define write_octeon_64bit_block_sha512(value, index) \ | ||
132 | do { \ | ||
133 | __asm__ __volatile__ ( \ | ||
134 | "dmtc2 %[rt],0x0240+" STR(index) \ | ||
135 | : \ | ||
136 | : [rt] "d" (value)); \ | ||
137 | } while (0) | ||
138 | |||
139 | /* | ||
140 | * The value is the final block word (64-bit). | ||
141 | */ | ||
142 | #define octeon_sha512_start(value) \ | ||
143 | do { \ | ||
144 | __asm__ __volatile__ ( \ | ||
145 | "dmtc2 %[rt],0x424f" \ | ||
146 | : \ | ||
147 | : [rt] "d" (value)); \ | ||
148 | } while (0) | ||
149 | |||
75 | #endif /* __LINUX_OCTEON_CRYPTO_H */ | 150 | #endif /* __LINUX_OCTEON_CRYPTO_H */ |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index b909881ba6c1..12dccdb38286 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c | |||
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data, | |||
97 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, | 97 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, |
98 | avail); | 98 | avail); |
99 | 99 | ||
100 | local_bh_disable(); | ||
101 | preempt_disable(); | ||
102 | flags = octeon_crypto_enable(&state); | 100 | flags = octeon_crypto_enable(&state); |
103 | octeon_md5_store_hash(mctx); | 101 | octeon_md5_store_hash(mctx); |
104 | 102 | ||
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data, | |||
114 | 112 | ||
115 | octeon_md5_read_hash(mctx); | 113 | octeon_md5_read_hash(mctx); |
116 | octeon_crypto_disable(&state, flags); | 114 | octeon_crypto_disable(&state, flags); |
117 | preempt_enable(); | ||
118 | local_bh_enable(); | ||
119 | 115 | ||
120 | memcpy(mctx->block, data, len); | 116 | memcpy(mctx->block, data, len); |
121 | 117 | ||
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) | |||
133 | 129 | ||
134 | *p++ = 0x80; | 130 | *p++ = 0x80; |
135 | 131 | ||
136 | local_bh_disable(); | ||
137 | preempt_disable(); | ||
138 | flags = octeon_crypto_enable(&state); | 132 | flags = octeon_crypto_enable(&state); |
139 | octeon_md5_store_hash(mctx); | 133 | octeon_md5_store_hash(mctx); |
140 | 134 | ||
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) | |||
152 | 146 | ||
153 | octeon_md5_read_hash(mctx); | 147 | octeon_md5_read_hash(mctx); |
154 | octeon_crypto_disable(&state, flags); | 148 | octeon_crypto_disable(&state, flags); |
155 | preempt_enable(); | ||
156 | local_bh_enable(); | ||
157 | 149 | ||
158 | memcpy(out, mctx->hash, sizeof(mctx->hash)); | 150 | memcpy(out, mctx->hash, sizeof(mctx->hash)); |
159 | memset(mctx, 0, sizeof(*mctx)); | 151 | memset(mctx, 0, sizeof(*mctx)); |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c new file mode 100644 index 000000000000..2b74b5b67cae --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA1 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha1_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Alan Smithee. | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/mm.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <asm/byteorder.h> | ||
26 | #include <asm/octeon/octeon.h> | ||
27 | #include <crypto/internal/hash.h> | ||
28 | |||
29 | #include "octeon-crypto.h" | ||
30 | |||
31 | /* | ||
32 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
33 | */ | ||
34 | |||
35 | static void octeon_sha1_store_hash(struct sha1_state *sctx) | ||
36 | { | ||
37 | u64 *hash = (u64 *)sctx->state; | ||
38 | union { | ||
39 | u32 word[2]; | ||
40 | u64 dword; | ||
41 | } hash_tail = { { sctx->state[4], } }; | ||
42 | |||
43 | write_octeon_64bit_hash_dword(hash[0], 0); | ||
44 | write_octeon_64bit_hash_dword(hash[1], 1); | ||
45 | write_octeon_64bit_hash_dword(hash_tail.dword, 2); | ||
46 | memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0])); | ||
47 | } | ||
48 | |||
49 | static void octeon_sha1_read_hash(struct sha1_state *sctx) | ||
50 | { | ||
51 | u64 *hash = (u64 *)sctx->state; | ||
52 | union { | ||
53 | u32 word[2]; | ||
54 | u64 dword; | ||
55 | } hash_tail; | ||
56 | |||
57 | hash[0] = read_octeon_64bit_hash_dword(0); | ||
58 | hash[1] = read_octeon_64bit_hash_dword(1); | ||
59 | hash_tail.dword = read_octeon_64bit_hash_dword(2); | ||
60 | sctx->state[4] = hash_tail.word[0]; | ||
61 | memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword)); | ||
62 | } | ||
63 | |||
64 | static void octeon_sha1_transform(const void *_block) | ||
65 | { | ||
66 | const u64 *block = _block; | ||
67 | |||
68 | write_octeon_64bit_block_dword(block[0], 0); | ||
69 | write_octeon_64bit_block_dword(block[1], 1); | ||
70 | write_octeon_64bit_block_dword(block[2], 2); | ||
71 | write_octeon_64bit_block_dword(block[3], 3); | ||
72 | write_octeon_64bit_block_dword(block[4], 4); | ||
73 | write_octeon_64bit_block_dword(block[5], 5); | ||
74 | write_octeon_64bit_block_dword(block[6], 6); | ||
75 | octeon_sha1_start(block[7]); | ||
76 | } | ||
77 | |||
78 | static int octeon_sha1_init(struct shash_desc *desc) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | |||
82 | sctx->state[0] = SHA1_H0; | ||
83 | sctx->state[1] = SHA1_H1; | ||
84 | sctx->state[2] = SHA1_H2; | ||
85 | sctx->state[3] = SHA1_H3; | ||
86 | sctx->state[4] = SHA1_H4; | ||
87 | sctx->count = 0; | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data, | ||
93 | unsigned int len) | ||
94 | { | ||
95 | unsigned int partial; | ||
96 | unsigned int done; | ||
97 | const u8 *src; | ||
98 | |||
99 | partial = sctx->count % SHA1_BLOCK_SIZE; | ||
100 | sctx->count += len; | ||
101 | done = 0; | ||
102 | src = data; | ||
103 | |||
104 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
105 | if (partial) { | ||
106 | done = -partial; | ||
107 | memcpy(sctx->buffer + partial, data, | ||
108 | done + SHA1_BLOCK_SIZE); | ||
109 | src = sctx->buffer; | ||
110 | } | ||
111 | |||
112 | do { | ||
113 | octeon_sha1_transform(src); | ||
114 | done += SHA1_BLOCK_SIZE; | ||
115 | src = data + done; | ||
116 | } while (done + SHA1_BLOCK_SIZE <= len); | ||
117 | |||
118 | partial = 0; | ||
119 | } | ||
120 | memcpy(sctx->buffer + partial, src, len - done); | ||
121 | } | ||
122 | |||
123 | static int octeon_sha1_update(struct shash_desc *desc, const u8 *data, | ||
124 | unsigned int len) | ||
125 | { | ||
126 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
127 | struct octeon_cop2_state state; | ||
128 | unsigned long flags; | ||
129 | |||
130 | /* | ||
131 | * Small updates never reach the crypto engine, so the generic sha1 is | ||
132 | * faster because of the heavyweight octeon_crypto_enable() / | ||
133 | * octeon_crypto_disable(). | ||
134 | */ | ||
135 | if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) | ||
136 | return crypto_sha1_update(desc, data, len); | ||
137 | |||
138 | flags = octeon_crypto_enable(&state); | ||
139 | octeon_sha1_store_hash(sctx); | ||
140 | |||
141 | __octeon_sha1_update(sctx, data, len); | ||
142 | |||
143 | octeon_sha1_read_hash(sctx); | ||
144 | octeon_crypto_disable(&state, flags); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static int octeon_sha1_final(struct shash_desc *desc, u8 *out) | ||
150 | { | ||
151 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
152 | static const u8 padding[64] = { 0x80, }; | ||
153 | struct octeon_cop2_state state; | ||
154 | __be32 *dst = (__be32 *)out; | ||
155 | unsigned int pad_len; | ||
156 | unsigned long flags; | ||
157 | unsigned int index; | ||
158 | __be64 bits; | ||
159 | int i; | ||
160 | |||
161 | /* Save number of bits. */ | ||
162 | bits = cpu_to_be64(sctx->count << 3); | ||
163 | |||
164 | /* Pad out to 56 mod 64. */ | ||
165 | index = sctx->count & 0x3f; | ||
166 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | ||
167 | |||
168 | flags = octeon_crypto_enable(&state); | ||
169 | octeon_sha1_store_hash(sctx); | ||
170 | |||
171 | __octeon_sha1_update(sctx, padding, pad_len); | ||
172 | |||
173 | /* Append length (before padding). */ | ||
174 | __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits)); | ||
175 | |||
176 | octeon_sha1_read_hash(sctx); | ||
177 | octeon_crypto_disable(&state, flags); | ||
178 | |||
179 | /* Store state in digest */ | ||
180 | for (i = 0; i < 5; i++) | ||
181 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
182 | |||
183 | /* Zeroize sensitive information. */ | ||
184 | memset(sctx, 0, sizeof(*sctx)); | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int octeon_sha1_export(struct shash_desc *desc, void *out) | ||
190 | { | ||
191 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
192 | |||
193 | memcpy(out, sctx, sizeof(*sctx)); | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int octeon_sha1_import(struct shash_desc *desc, const void *in) | ||
198 | { | ||
199 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | memcpy(sctx, in, sizeof(*sctx)); | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct shash_alg octeon_sha1_alg = { | ||
206 | .digestsize = SHA1_DIGEST_SIZE, | ||
207 | .init = octeon_sha1_init, | ||
208 | .update = octeon_sha1_update, | ||
209 | .final = octeon_sha1_final, | ||
210 | .export = octeon_sha1_export, | ||
211 | .import = octeon_sha1_import, | ||
212 | .descsize = sizeof(struct sha1_state), | ||
213 | .statesize = sizeof(struct sha1_state), | ||
214 | .base = { | ||
215 | .cra_name = "sha1", | ||
216 | .cra_driver_name= "octeon-sha1", | ||
217 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
218 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
219 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
220 | .cra_module = THIS_MODULE, | ||
221 | } | ||
222 | }; | ||
223 | |||
224 | static int __init octeon_sha1_mod_init(void) | ||
225 | { | ||
226 | if (!octeon_has_crypto()) | ||
227 | return -ENOTSUPP; | ||
228 | return crypto_register_shash(&octeon_sha1_alg); | ||
229 | } | ||
230 | |||
231 | static void __exit octeon_sha1_mod_fini(void) | ||
232 | { | ||
233 | crypto_unregister_shash(&octeon_sha1_alg); | ||
234 | } | ||
235 | |||
236 | module_init(octeon_sha1_mod_init); | ||
237 | module_exit(octeon_sha1_mod_fini); | ||
238 | |||
239 | MODULE_LICENSE("GPL"); | ||
240 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)"); | ||
241 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c new file mode 100644 index 000000000000..97e96fead08a --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA-224 and SHA-256 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha256_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
13 | * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify it | ||
16 | * under the terms of the GNU General Public License as published by the Free | ||
17 | * Software Foundation; either version 2 of the License, or (at your option) | ||
18 | * any later version. | ||
19 | */ | ||
20 | |||
21 | #include <linux/mm.h> | ||
22 | #include <crypto/sha.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <asm/byteorder.h> | ||
27 | #include <asm/octeon/octeon.h> | ||
28 | #include <crypto/internal/hash.h> | ||
29 | |||
30 | #include "octeon-crypto.h" | ||
31 | |||
32 | /* | ||
33 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
34 | */ | ||
35 | |||
36 | static void octeon_sha256_store_hash(struct sha256_state *sctx) | ||
37 | { | ||
38 | u64 *hash = (u64 *)sctx->state; | ||
39 | |||
40 | write_octeon_64bit_hash_dword(hash[0], 0); | ||
41 | write_octeon_64bit_hash_dword(hash[1], 1); | ||
42 | write_octeon_64bit_hash_dword(hash[2], 2); | ||
43 | write_octeon_64bit_hash_dword(hash[3], 3); | ||
44 | } | ||
45 | |||
46 | static void octeon_sha256_read_hash(struct sha256_state *sctx) | ||
47 | { | ||
48 | u64 *hash = (u64 *)sctx->state; | ||
49 | |||
50 | hash[0] = read_octeon_64bit_hash_dword(0); | ||
51 | hash[1] = read_octeon_64bit_hash_dword(1); | ||
52 | hash[2] = read_octeon_64bit_hash_dword(2); | ||
53 | hash[3] = read_octeon_64bit_hash_dword(3); | ||
54 | } | ||
55 | |||
56 | static void octeon_sha256_transform(const void *_block) | ||
57 | { | ||
58 | const u64 *block = _block; | ||
59 | |||
60 | write_octeon_64bit_block_dword(block[0], 0); | ||
61 | write_octeon_64bit_block_dword(block[1], 1); | ||
62 | write_octeon_64bit_block_dword(block[2], 2); | ||
63 | write_octeon_64bit_block_dword(block[3], 3); | ||
64 | write_octeon_64bit_block_dword(block[4], 4); | ||
65 | write_octeon_64bit_block_dword(block[5], 5); | ||
66 | write_octeon_64bit_block_dword(block[6], 6); | ||
67 | octeon_sha256_start(block[7]); | ||
68 | } | ||
69 | |||
70 | static int octeon_sha224_init(struct shash_desc *desc) | ||
71 | { | ||
72 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
73 | |||
74 | sctx->state[0] = SHA224_H0; | ||
75 | sctx->state[1] = SHA224_H1; | ||
76 | sctx->state[2] = SHA224_H2; | ||
77 | sctx->state[3] = SHA224_H3; | ||
78 | sctx->state[4] = SHA224_H4; | ||
79 | sctx->state[5] = SHA224_H5; | ||
80 | sctx->state[6] = SHA224_H6; | ||
81 | sctx->state[7] = SHA224_H7; | ||
82 | sctx->count = 0; | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static int octeon_sha256_init(struct shash_desc *desc) | ||
88 | { | ||
89 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
90 | |||
91 | sctx->state[0] = SHA256_H0; | ||
92 | sctx->state[1] = SHA256_H1; | ||
93 | sctx->state[2] = SHA256_H2; | ||
94 | sctx->state[3] = SHA256_H3; | ||
95 | sctx->state[4] = SHA256_H4; | ||
96 | sctx->state[5] = SHA256_H5; | ||
97 | sctx->state[6] = SHA256_H6; | ||
98 | sctx->state[7] = SHA256_H7; | ||
99 | sctx->count = 0; | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data, | ||
105 | unsigned int len) | ||
106 | { | ||
107 | unsigned int partial; | ||
108 | unsigned int done; | ||
109 | const u8 *src; | ||
110 | |||
111 | partial = sctx->count % SHA256_BLOCK_SIZE; | ||
112 | sctx->count += len; | ||
113 | done = 0; | ||
114 | src = data; | ||
115 | |||
116 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
117 | if (partial) { | ||
118 | done = -partial; | ||
119 | memcpy(sctx->buf + partial, data, | ||
120 | done + SHA256_BLOCK_SIZE); | ||
121 | src = sctx->buf; | ||
122 | } | ||
123 | |||
124 | do { | ||
125 | octeon_sha256_transform(src); | ||
126 | done += SHA256_BLOCK_SIZE; | ||
127 | src = data + done; | ||
128 | } while (done + SHA256_BLOCK_SIZE <= len); | ||
129 | |||
130 | partial = 0; | ||
131 | } | ||
132 | memcpy(sctx->buf + partial, src, len - done); | ||
133 | } | ||
134 | |||
135 | static int octeon_sha256_update(struct shash_desc *desc, const u8 *data, | ||
136 | unsigned int len) | ||
137 | { | ||
138 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
139 | struct octeon_cop2_state state; | ||
140 | unsigned long flags; | ||
141 | |||
142 | /* | ||
143 | * Small updates never reach the crypto engine, so the generic sha256 is | ||
144 | * faster because of the heavyweight octeon_crypto_enable() / | ||
145 | * octeon_crypto_disable(). | ||
146 | */ | ||
147 | if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
148 | return crypto_sha256_update(desc, data, len); | ||
149 | |||
150 | flags = octeon_crypto_enable(&state); | ||
151 | octeon_sha256_store_hash(sctx); | ||
152 | |||
153 | __octeon_sha256_update(sctx, data, len); | ||
154 | |||
155 | octeon_sha256_read_hash(sctx); | ||
156 | octeon_crypto_disable(&state, flags); | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static int octeon_sha256_final(struct shash_desc *desc, u8 *out) | ||
162 | { | ||
163 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
164 | static const u8 padding[64] = { 0x80, }; | ||
165 | struct octeon_cop2_state state; | ||
166 | __be32 *dst = (__be32 *)out; | ||
167 | unsigned int pad_len; | ||
168 | unsigned long flags; | ||
169 | unsigned int index; | ||
170 | __be64 bits; | ||
171 | int i; | ||
172 | |||
173 | /* Save number of bits. */ | ||
174 | bits = cpu_to_be64(sctx->count << 3); | ||
175 | |||
176 | /* Pad out to 56 mod 64. */ | ||
177 | index = sctx->count & 0x3f; | ||
178 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | ||
179 | |||
180 | flags = octeon_crypto_enable(&state); | ||
181 | octeon_sha256_store_hash(sctx); | ||
182 | |||
183 | __octeon_sha256_update(sctx, padding, pad_len); | ||
184 | |||
185 | /* Append length (before padding). */ | ||
186 | __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits)); | ||
187 | |||
188 | octeon_sha256_read_hash(sctx); | ||
189 | octeon_crypto_disable(&state, flags); | ||
190 | |||
191 | /* Store state in digest */ | ||
192 | for (i = 0; i < 8; i++) | ||
193 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
194 | |||
195 | /* Zeroize sensitive information. */ | ||
196 | memset(sctx, 0, sizeof(*sctx)); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int octeon_sha224_final(struct shash_desc *desc, u8 *hash) | ||
202 | { | ||
203 | u8 D[SHA256_DIGEST_SIZE]; | ||
204 | |||
205 | octeon_sha256_final(desc, D); | ||
206 | |||
207 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
208 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int octeon_sha256_export(struct shash_desc *desc, void *out) | ||
214 | { | ||
215 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
216 | |||
217 | memcpy(out, sctx, sizeof(*sctx)); | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static int octeon_sha256_import(struct shash_desc *desc, const void *in) | ||
222 | { | ||
223 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
224 | |||
225 | memcpy(sctx, in, sizeof(*sctx)); | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | static struct shash_alg octeon_sha256_algs[2] = { { | ||
230 | .digestsize = SHA256_DIGEST_SIZE, | ||
231 | .init = octeon_sha256_init, | ||
232 | .update = octeon_sha256_update, | ||
233 | .final = octeon_sha256_final, | ||
234 | .export = octeon_sha256_export, | ||
235 | .import = octeon_sha256_import, | ||
236 | .descsize = sizeof(struct sha256_state), | ||
237 | .statesize = sizeof(struct sha256_state), | ||
238 | .base = { | ||
239 | .cra_name = "sha256", | ||
240 | .cra_driver_name= "octeon-sha256", | ||
241 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
242 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
243 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
244 | .cra_module = THIS_MODULE, | ||
245 | } | ||
246 | }, { | ||
247 | .digestsize = SHA224_DIGEST_SIZE, | ||
248 | .init = octeon_sha224_init, | ||
249 | .update = octeon_sha256_update, | ||
250 | .final = octeon_sha224_final, | ||
251 | .descsize = sizeof(struct sha256_state), | ||
252 | .base = { | ||
253 | .cra_name = "sha224", | ||
254 | .cra_driver_name= "octeon-sha224", | ||
255 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
256 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
257 | .cra_module = THIS_MODULE, | ||
258 | } | ||
259 | } }; | ||
260 | |||
261 | static int __init octeon_sha256_mod_init(void) | ||
262 | { | ||
263 | if (!octeon_has_crypto()) | ||
264 | return -ENOTSUPP; | ||
265 | return crypto_register_shashes(octeon_sha256_algs, | ||
266 | ARRAY_SIZE(octeon_sha256_algs)); | ||
267 | } | ||
268 | |||
269 | static void __exit octeon_sha256_mod_fini(void) | ||
270 | { | ||
271 | crypto_unregister_shashes(octeon_sha256_algs, | ||
272 | ARRAY_SIZE(octeon_sha256_algs)); | ||
273 | } | ||
274 | |||
275 | module_init(octeon_sha256_mod_init); | ||
276 | module_exit(octeon_sha256_mod_fini); | ||
277 | |||
278 | MODULE_LICENSE("GPL"); | ||
279 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)"); | ||
280 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c new file mode 100644 index 000000000000..d5fb3c6f22ae --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c | |||
@@ -0,0 +1,277 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA-512 and SHA-384 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha512_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the | ||
16 | * Free Software Foundation; either version 2, or (at your option) any | ||
17 | * later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/mm.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <asm/byteorder.h> | ||
26 | #include <asm/octeon/octeon.h> | ||
27 | #include <crypto/internal/hash.h> | ||
28 | |||
29 | #include "octeon-crypto.h" | ||
30 | |||
31 | /* | ||
32 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
33 | */ | ||
34 | |||
35 | static void octeon_sha512_store_hash(struct sha512_state *sctx) | ||
36 | { | ||
37 | write_octeon_64bit_hash_sha512(sctx->state[0], 0); | ||
38 | write_octeon_64bit_hash_sha512(sctx->state[1], 1); | ||
39 | write_octeon_64bit_hash_sha512(sctx->state[2], 2); | ||
40 | write_octeon_64bit_hash_sha512(sctx->state[3], 3); | ||
41 | write_octeon_64bit_hash_sha512(sctx->state[4], 4); | ||
42 | write_octeon_64bit_hash_sha512(sctx->state[5], 5); | ||
43 | write_octeon_64bit_hash_sha512(sctx->state[6], 6); | ||
44 | write_octeon_64bit_hash_sha512(sctx->state[7], 7); | ||
45 | } | ||
46 | |||
47 | static void octeon_sha512_read_hash(struct sha512_state *sctx) | ||
48 | { | ||
49 | sctx->state[0] = read_octeon_64bit_hash_sha512(0); | ||
50 | sctx->state[1] = read_octeon_64bit_hash_sha512(1); | ||
51 | sctx->state[2] = read_octeon_64bit_hash_sha512(2); | ||
52 | sctx->state[3] = read_octeon_64bit_hash_sha512(3); | ||
53 | sctx->state[4] = read_octeon_64bit_hash_sha512(4); | ||
54 | sctx->state[5] = read_octeon_64bit_hash_sha512(5); | ||
55 | sctx->state[6] = read_octeon_64bit_hash_sha512(6); | ||
56 | sctx->state[7] = read_octeon_64bit_hash_sha512(7); | ||
57 | } | ||
58 | |||
59 | static void octeon_sha512_transform(const void *_block) | ||
60 | { | ||
61 | const u64 *block = _block; | ||
62 | |||
63 | write_octeon_64bit_block_sha512(block[0], 0); | ||
64 | write_octeon_64bit_block_sha512(block[1], 1); | ||
65 | write_octeon_64bit_block_sha512(block[2], 2); | ||
66 | write_octeon_64bit_block_sha512(block[3], 3); | ||
67 | write_octeon_64bit_block_sha512(block[4], 4); | ||
68 | write_octeon_64bit_block_sha512(block[5], 5); | ||
69 | write_octeon_64bit_block_sha512(block[6], 6); | ||
70 | write_octeon_64bit_block_sha512(block[7], 7); | ||
71 | write_octeon_64bit_block_sha512(block[8], 8); | ||
72 | write_octeon_64bit_block_sha512(block[9], 9); | ||
73 | write_octeon_64bit_block_sha512(block[10], 10); | ||
74 | write_octeon_64bit_block_sha512(block[11], 11); | ||
75 | write_octeon_64bit_block_sha512(block[12], 12); | ||
76 | write_octeon_64bit_block_sha512(block[13], 13); | ||
77 | write_octeon_64bit_block_sha512(block[14], 14); | ||
78 | octeon_sha512_start(block[15]); | ||
79 | } | ||
80 | |||
81 | static int octeon_sha512_init(struct shash_desc *desc) | ||
82 | { | ||
83 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
84 | |||
85 | sctx->state[0] = SHA512_H0; | ||
86 | sctx->state[1] = SHA512_H1; | ||
87 | sctx->state[2] = SHA512_H2; | ||
88 | sctx->state[3] = SHA512_H3; | ||
89 | sctx->state[4] = SHA512_H4; | ||
90 | sctx->state[5] = SHA512_H5; | ||
91 | sctx->state[6] = SHA512_H6; | ||
92 | sctx->state[7] = SHA512_H7; | ||
93 | sctx->count[0] = sctx->count[1] = 0; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int octeon_sha384_init(struct shash_desc *desc) | ||
99 | { | ||
100 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
101 | |||
102 | sctx->state[0] = SHA384_H0; | ||
103 | sctx->state[1] = SHA384_H1; | ||
104 | sctx->state[2] = SHA384_H2; | ||
105 | sctx->state[3] = SHA384_H3; | ||
106 | sctx->state[4] = SHA384_H4; | ||
107 | sctx->state[5] = SHA384_H5; | ||
108 | sctx->state[6] = SHA384_H6; | ||
109 | sctx->state[7] = SHA384_H7; | ||
110 | sctx->count[0] = sctx->count[1] = 0; | ||
111 | |||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data, | ||
116 | unsigned int len) | ||
117 | { | ||
118 | unsigned int part_len; | ||
119 | unsigned int index; | ||
120 | unsigned int i; | ||
121 | |||
122 | /* Compute number of bytes mod 128. */ | ||
123 | index = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
124 | |||
125 | /* Update number of bytes. */ | ||
126 | if ((sctx->count[0] += len) < len) | ||
127 | sctx->count[1]++; | ||
128 | |||
129 | part_len = SHA512_BLOCK_SIZE - index; | ||
130 | |||
131 | /* Transform as many times as possible. */ | ||
132 | if (len >= part_len) { | ||
133 | memcpy(&sctx->buf[index], data, part_len); | ||
134 | octeon_sha512_transform(sctx->buf); | ||
135 | |||
136 | for (i = part_len; i + SHA512_BLOCK_SIZE <= len; | ||
137 | i += SHA512_BLOCK_SIZE) | ||
138 | octeon_sha512_transform(&data[i]); | ||
139 | |||
140 | index = 0; | ||
141 | } else { | ||
142 | i = 0; | ||
143 | } | ||
144 | |||
145 | /* Buffer remaining input. */ | ||
146 | memcpy(&sctx->buf[index], &data[i], len - i); | ||
147 | } | ||
148 | |||
149 | static int octeon_sha512_update(struct shash_desc *desc, const u8 *data, | ||
150 | unsigned int len) | ||
151 | { | ||
152 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
153 | struct octeon_cop2_state state; | ||
154 | unsigned long flags; | ||
155 | |||
156 | /* | ||
157 | * Small updates never reach the crypto engine, so the generic sha512 is | ||
158 | * faster because of the heavyweight octeon_crypto_enable() / | ||
159 | * octeon_crypto_disable(). | ||
160 | */ | ||
161 | if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) | ||
162 | return crypto_sha512_update(desc, data, len); | ||
163 | |||
164 | flags = octeon_crypto_enable(&state); | ||
165 | octeon_sha512_store_hash(sctx); | ||
166 | |||
167 | __octeon_sha512_update(sctx, data, len); | ||
168 | |||
169 | octeon_sha512_read_hash(sctx); | ||
170 | octeon_crypto_disable(&state, flags); | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static int octeon_sha512_final(struct shash_desc *desc, u8 *hash) | ||
176 | { | ||
177 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
178 | static u8 padding[128] = { 0x80, }; | ||
179 | struct octeon_cop2_state state; | ||
180 | __be64 *dst = (__be64 *)hash; | ||
181 | unsigned int pad_len; | ||
182 | unsigned long flags; | ||
183 | unsigned int index; | ||
184 | __be64 bits[2]; | ||
185 | int i; | ||
186 | |||
187 | /* Save number of bits. */ | ||
188 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
189 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
190 | |||
191 | /* Pad out to 112 mod 128. */ | ||
192 | index = sctx->count[0] & 0x7f; | ||
193 | pad_len = (index < 112) ? (112 - index) : ((128+112) - index); | ||
194 | |||
195 | flags = octeon_crypto_enable(&state); | ||
196 | octeon_sha512_store_hash(sctx); | ||
197 | |||
198 | __octeon_sha512_update(sctx, padding, pad_len); | ||
199 | |||
200 | /* Append length (before padding). */ | ||
201 | __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits)); | ||
202 | |||
203 | octeon_sha512_read_hash(sctx); | ||
204 | octeon_crypto_disable(&state, flags); | ||
205 | |||
206 | /* Store state in digest. */ | ||
207 | for (i = 0; i < 8; i++) | ||
208 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
209 | |||
210 | /* Zeroize sensitive information. */ | ||
211 | memset(sctx, 0, sizeof(struct sha512_state)); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int octeon_sha384_final(struct shash_desc *desc, u8 *hash) | ||
217 | { | ||
218 | u8 D[64]; | ||
219 | |||
220 | octeon_sha512_final(desc, D); | ||
221 | |||
222 | memcpy(hash, D, 48); | ||
223 | memzero_explicit(D, 64); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static struct shash_alg octeon_sha512_algs[2] = { { | ||
229 | .digestsize = SHA512_DIGEST_SIZE, | ||
230 | .init = octeon_sha512_init, | ||
231 | .update = octeon_sha512_update, | ||
232 | .final = octeon_sha512_final, | ||
233 | .descsize = sizeof(struct sha512_state), | ||
234 | .base = { | ||
235 | .cra_name = "sha512", | ||
236 | .cra_driver_name= "octeon-sha512", | ||
237 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
238 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
239 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
240 | .cra_module = THIS_MODULE, | ||
241 | } | ||
242 | }, { | ||
243 | .digestsize = SHA384_DIGEST_SIZE, | ||
244 | .init = octeon_sha384_init, | ||
245 | .update = octeon_sha512_update, | ||
246 | .final = octeon_sha384_final, | ||
247 | .descsize = sizeof(struct sha512_state), | ||
248 | .base = { | ||
249 | .cra_name = "sha384", | ||
250 | .cra_driver_name= "octeon-sha384", | ||
251 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
252 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
253 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
254 | .cra_module = THIS_MODULE, | ||
255 | } | ||
256 | } }; | ||
257 | |||
258 | static int __init octeon_sha512_mod_init(void) | ||
259 | { | ||
260 | if (!octeon_has_crypto()) | ||
261 | return -ENOTSUPP; | ||
262 | return crypto_register_shashes(octeon_sha512_algs, | ||
263 | ARRAY_SIZE(octeon_sha512_algs)); | ||
264 | } | ||
265 | |||
266 | static void __exit octeon_sha512_mod_fini(void) | ||
267 | { | ||
268 | crypto_unregister_shashes(octeon_sha512_algs, | ||
269 | ARRAY_SIZE(octeon_sha512_algs)); | ||
270 | } | ||
271 | |||
272 | module_init(octeon_sha512_mod_init); | ||
273 | module_exit(octeon_sha512_mod_fini); | ||
274 | |||
275 | MODULE_LICENSE("GPL"); | ||
276 | MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)"); | ||
277 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 4794067cb5a7..5035f09c5427 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | |||
@@ -1259,20 +1259,6 @@ | |||
1259 | #define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) | 1259 | #define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) |
1260 | 1260 | ||
1261 | /************************************************************************* | 1261 | /************************************************************************* |
1262 | * _REG relative to RSET_RNG | ||
1263 | *************************************************************************/ | ||
1264 | |||
1265 | #define RNG_CTRL 0x00 | ||
1266 | #define RNG_EN (1 << 0) | ||
1267 | |||
1268 | #define RNG_STAT 0x04 | ||
1269 | #define RNG_AVAIL_MASK (0xff000000) | ||
1270 | |||
1271 | #define RNG_DATA 0x08 | ||
1272 | #define RNG_THRES 0x0c | ||
1273 | #define RNG_MASK 0x10 | ||
1274 | |||
1275 | /************************************************************************* | ||
1276 | * _REG relative to RSET_SPI | 1262 | * _REG relative to RSET_SPI |
1277 | *************************************************************************/ | 1263 | *************************************************************************/ |
1278 | 1264 | ||
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 2926fb9c570a..9c221b69c181 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile | |||
@@ -4,6 +4,14 @@ | |||
4 | # Arch-specific CryptoAPI modules. | 4 | # Arch-specific CryptoAPI modules. |
5 | # | 5 | # |
6 | 6 | ||
7 | obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o | ||
8 | obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o | ||
7 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o | 9 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o |
10 | obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o | ||
11 | obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o | ||
8 | 12 | ||
13 | aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o | ||
14 | md5-ppc-y := md5-asm.o md5-glue.o | ||
9 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o | 15 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o |
16 | sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o | ||
17 | sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o | ||
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S new file mode 100644 index 000000000000..5dc6bce90a77 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-core.S | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Fast AES implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include "aes-spe-regs.h" | ||
20 | |||
21 | #define EAD(in, bpos) \ | ||
22 | rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; | ||
23 | |||
24 | #define DAD(in, bpos) \ | ||
25 | rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; | ||
26 | |||
27 | #define LWH(out, off) \ | ||
28 | evlwwsplat out,off(rT0); /* load word high */ | ||
29 | |||
30 | #define LWL(out, off) \ | ||
31 | lwz out,off(rT0); /* load word low */ | ||
32 | |||
33 | #define LBZ(out, tab, off) \ | ||
34 | lbz out,off(tab); /* load byte */ | ||
35 | |||
36 | #define LAH(out, in, bpos, off) \ | ||
37 | EAD(in, bpos) /* calc addr + load word high */ \ | ||
38 | LWH(out, off) | ||
39 | |||
40 | #define LAL(out, in, bpos, off) \ | ||
41 | EAD(in, bpos) /* calc addr + load word low */ \ | ||
42 | LWL(out, off) | ||
43 | |||
44 | #define LAE(out, in, bpos) \ | ||
45 | EAD(in, bpos) /* calc addr + load enc byte */ \ | ||
46 | LBZ(out, rT0, 8) | ||
47 | |||
48 | #define LBE(out) \ | ||
49 | LBZ(out, rT0, 8) /* load enc byte */ | ||
50 | |||
51 | #define LAD(out, in, bpos) \ | ||
52 | DAD(in, bpos) /* calc addr + load dec byte */ \ | ||
53 | LBZ(out, rT1, 0) | ||
54 | |||
55 | #define LBD(out) \ | ||
56 | LBZ(out, rT1, 0) | ||
57 | |||
58 | /* | ||
59 | * ppc_encrypt_block: The central encryption function for a single 16 bytes | ||
60 | * block. It does no stack handling or register saving to support fast calls | ||
61 | * via bl/blr. It expects that caller has pre-xored input data with first | ||
62 | * 4 words of encryption key into rD0-rD3. Pointer/counter registers must | ||
63 | * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 | ||
64 | * and rW0-rW3 and caller must execute a final xor on the ouput registers. | ||
65 | * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. | ||
66 | * | ||
67 | */ | ||
68 | _GLOBAL(ppc_encrypt_block) | ||
69 | LAH(rW4, rD1, 2, 4) | ||
70 | LAH(rW6, rD0, 3, 0) | ||
71 | LAH(rW3, rD0, 1, 8) | ||
72 | ppc_encrypt_block_loop: | ||
73 | LAH(rW0, rD3, 0, 12) | ||
74 | LAL(rW0, rD0, 0, 12) | ||
75 | LAH(rW1, rD1, 0, 12) | ||
76 | LAH(rW2, rD2, 1, 8) | ||
77 | LAL(rW2, rD3, 1, 8) | ||
78 | LAL(rW3, rD1, 1, 8) | ||
79 | LAL(rW4, rD2, 2, 4) | ||
80 | LAL(rW6, rD1, 3, 0) | ||
81 | LAH(rW5, rD3, 2, 4) | ||
82 | LAL(rW5, rD0, 2, 4) | ||
83 | LAH(rW7, rD2, 3, 0) | ||
84 | evldw rD1,16(rKP) | ||
85 | EAD(rD3, 3) | ||
86 | evxor rW2,rW2,rW4 | ||
87 | LWL(rW7, 0) | ||
88 | evxor rW2,rW2,rW6 | ||
89 | EAD(rD2, 0) | ||
90 | evxor rD1,rD1,rW2 | ||
91 | LWL(rW1, 12) | ||
92 | evxor rD1,rD1,rW0 | ||
93 | evldw rD3,24(rKP) | ||
94 | evmergehi rD0,rD0,rD1 | ||
95 | EAD(rD1, 2) | ||
96 | evxor rW3,rW3,rW5 | ||
97 | LWH(rW4, 4) | ||
98 | evxor rW3,rW3,rW7 | ||
99 | EAD(rD0, 3) | ||
100 | evxor rD3,rD3,rW3 | ||
101 | LWH(rW6, 0) | ||
102 | evxor rD3,rD3,rW1 | ||
103 | EAD(rD0, 1) | ||
104 | evmergehi rD2,rD2,rD3 | ||
105 | LWH(rW3, 8) | ||
106 | LAH(rW0, rD3, 0, 12) | ||
107 | LAL(rW0, rD0, 0, 12) | ||
108 | LAH(rW1, rD1, 0, 12) | ||
109 | LAH(rW2, rD2, 1, 8) | ||
110 | LAL(rW2, rD3, 1, 8) | ||
111 | LAL(rW3, rD1, 1, 8) | ||
112 | LAL(rW4, rD2, 2, 4) | ||
113 | LAL(rW6, rD1, 3, 0) | ||
114 | LAH(rW5, rD3, 2, 4) | ||
115 | LAL(rW5, rD0, 2, 4) | ||
116 | LAH(rW7, rD2, 3, 0) | ||
117 | evldw rD1,32(rKP) | ||
118 | EAD(rD3, 3) | ||
119 | evxor rW2,rW2,rW4 | ||
120 | LWL(rW7, 0) | ||
121 | evxor rW2,rW2,rW6 | ||
122 | EAD(rD2, 0) | ||
123 | evxor rD1,rD1,rW2 | ||
124 | LWL(rW1, 12) | ||
125 | evxor rD1,rD1,rW0 | ||
126 | evldw rD3,40(rKP) | ||
127 | evmergehi rD0,rD0,rD1 | ||
128 | EAD(rD1, 2) | ||
129 | evxor rW3,rW3,rW5 | ||
130 | LWH(rW4, 4) | ||
131 | evxor rW3,rW3,rW7 | ||
132 | EAD(rD0, 3) | ||
133 | evxor rD3,rD3,rW3 | ||
134 | LWH(rW6, 0) | ||
135 | evxor rD3,rD3,rW1 | ||
136 | EAD(rD0, 1) | ||
137 | evmergehi rD2,rD2,rD3 | ||
138 | LWH(rW3, 8) | ||
139 | addi rKP,rKP,32 | ||
140 | bdnz ppc_encrypt_block_loop | ||
141 | LAH(rW0, rD3, 0, 12) | ||
142 | LAL(rW0, rD0, 0, 12) | ||
143 | LAH(rW1, rD1, 0, 12) | ||
144 | LAH(rW2, rD2, 1, 8) | ||
145 | LAL(rW2, rD3, 1, 8) | ||
146 | LAL(rW3, rD1, 1, 8) | ||
147 | LAL(rW4, rD2, 2, 4) | ||
148 | LAH(rW5, rD3, 2, 4) | ||
149 | LAL(rW6, rD1, 3, 0) | ||
150 | LAL(rW5, rD0, 2, 4) | ||
151 | LAH(rW7, rD2, 3, 0) | ||
152 | evldw rD1,16(rKP) | ||
153 | EAD(rD3, 3) | ||
154 | evxor rW2,rW2,rW4 | ||
155 | LWL(rW7, 0) | ||
156 | evxor rW2,rW2,rW6 | ||
157 | EAD(rD2, 0) | ||
158 | evxor rD1,rD1,rW2 | ||
159 | LWL(rW1, 12) | ||
160 | evxor rD1,rD1,rW0 | ||
161 | evldw rD3,24(rKP) | ||
162 | evmergehi rD0,rD0,rD1 | ||
163 | EAD(rD1, 0) | ||
164 | evxor rW3,rW3,rW5 | ||
165 | LBE(rW2) | ||
166 | evxor rW3,rW3,rW7 | ||
167 | EAD(rD0, 1) | ||
168 | evxor rD3,rD3,rW3 | ||
169 | LBE(rW6) | ||
170 | evxor rD3,rD3,rW1 | ||
171 | EAD(rD0, 0) | ||
172 | evmergehi rD2,rD2,rD3 | ||
173 | LBE(rW1) | ||
174 | LAE(rW0, rD3, 0) | ||
175 | LAE(rW1, rD0, 0) | ||
176 | LAE(rW4, rD2, 1) | ||
177 | LAE(rW5, rD3, 1) | ||
178 | LAE(rW3, rD2, 0) | ||
179 | LAE(rW7, rD1, 1) | ||
180 | rlwimi rW0,rW4,8,16,23 | ||
181 | rlwimi rW1,rW5,8,16,23 | ||
182 | LAE(rW4, rD1, 2) | ||
183 | LAE(rW5, rD2, 2) | ||
184 | rlwimi rW2,rW6,8,16,23 | ||
185 | rlwimi rW3,rW7,8,16,23 | ||
186 | LAE(rW6, rD3, 2) | ||
187 | LAE(rW7, rD0, 2) | ||
188 | rlwimi rW0,rW4,16,8,15 | ||
189 | rlwimi rW1,rW5,16,8,15 | ||
190 | LAE(rW4, rD0, 3) | ||
191 | LAE(rW5, rD1, 3) | ||
192 | rlwimi rW2,rW6,16,8,15 | ||
193 | lwz rD0,32(rKP) | ||
194 | rlwimi rW3,rW7,16,8,15 | ||
195 | lwz rD1,36(rKP) | ||
196 | LAE(rW6, rD2, 3) | ||
197 | LAE(rW7, rD3, 3) | ||
198 | rlwimi rW0,rW4,24,0,7 | ||
199 | lwz rD2,40(rKP) | ||
200 | rlwimi rW1,rW5,24,0,7 | ||
201 | lwz rD3,44(rKP) | ||
202 | rlwimi rW2,rW6,24,0,7 | ||
203 | rlwimi rW3,rW7,24,0,7 | ||
204 | blr | ||
205 | |||
206 | /* | ||
207 | * ppc_decrypt_block: The central decryption function for a single 16 bytes | ||
208 | * block. It does no stack handling or register saving to support fast calls | ||
209 | * via bl/blr. It expects that caller has pre-xored input data with first | ||
210 | * 4 words of encryption key into rD0-rD3. Pointer/counter registers must | ||
211 | * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 | ||
212 | * and rW0-rW3 and caller must execute a final xor on the ouput registers. | ||
213 | * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. | ||
214 | * | ||
215 | */ | ||
216 | _GLOBAL(ppc_decrypt_block) | ||
217 | LAH(rW0, rD1, 0, 12) | ||
218 | LAH(rW6, rD0, 3, 0) | ||
219 | LAH(rW3, rD0, 1, 8) | ||
220 | ppc_decrypt_block_loop: | ||
221 | LAH(rW1, rD3, 0, 12) | ||
222 | LAL(rW0, rD2, 0, 12) | ||
223 | LAH(rW2, rD2, 1, 8) | ||
224 | LAL(rW2, rD3, 1, 8) | ||
225 | LAH(rW4, rD3, 2, 4) | ||
226 | LAL(rW4, rD0, 2, 4) | ||
227 | LAL(rW6, rD1, 3, 0) | ||
228 | LAH(rW5, rD1, 2, 4) | ||
229 | LAH(rW7, rD2, 3, 0) | ||
230 | LAL(rW7, rD3, 3, 0) | ||
231 | LAL(rW3, rD1, 1, 8) | ||
232 | evldw rD1,16(rKP) | ||
233 | EAD(rD0, 0) | ||
234 | evxor rW4,rW4,rW6 | ||
235 | LWL(rW1, 12) | ||
236 | evxor rW0,rW0,rW4 | ||
237 | EAD(rD2, 2) | ||
238 | evxor rW0,rW0,rW2 | ||
239 | LWL(rW5, 4) | ||
240 | evxor rD1,rD1,rW0 | ||
241 | evldw rD3,24(rKP) | ||
242 | evmergehi rD0,rD0,rD1 | ||
243 | EAD(rD1, 0) | ||
244 | evxor rW3,rW3,rW7 | ||
245 | LWH(rW0, 12) | ||
246 | evxor rW3,rW3,rW1 | ||
247 | EAD(rD0, 3) | ||
248 | evxor rD3,rD3,rW3 | ||
249 | LWH(rW6, 0) | ||
250 | evxor rD3,rD3,rW5 | ||
251 | EAD(rD0, 1) | ||
252 | evmergehi rD2,rD2,rD3 | ||
253 | LWH(rW3, 8) | ||
254 | LAH(rW1, rD3, 0, 12) | ||
255 | LAL(rW0, rD2, 0, 12) | ||
256 | LAH(rW2, rD2, 1, 8) | ||
257 | LAL(rW2, rD3, 1, 8) | ||
258 | LAH(rW4, rD3, 2, 4) | ||
259 | LAL(rW4, rD0, 2, 4) | ||
260 | LAL(rW6, rD1, 3, 0) | ||
261 | LAH(rW5, rD1, 2, 4) | ||
262 | LAH(rW7, rD2, 3, 0) | ||
263 | LAL(rW7, rD3, 3, 0) | ||
264 | LAL(rW3, rD1, 1, 8) | ||
265 | evldw rD1,32(rKP) | ||
266 | EAD(rD0, 0) | ||
267 | evxor rW4,rW4,rW6 | ||
268 | LWL(rW1, 12) | ||
269 | evxor rW0,rW0,rW4 | ||
270 | EAD(rD2, 2) | ||
271 | evxor rW0,rW0,rW2 | ||
272 | LWL(rW5, 4) | ||
273 | evxor rD1,rD1,rW0 | ||
274 | evldw rD3,40(rKP) | ||
275 | evmergehi rD0,rD0,rD1 | ||
276 | EAD(rD1, 0) | ||
277 | evxor rW3,rW3,rW7 | ||
278 | LWH(rW0, 12) | ||
279 | evxor rW3,rW3,rW1 | ||
280 | EAD(rD0, 3) | ||
281 | evxor rD3,rD3,rW3 | ||
282 | LWH(rW6, 0) | ||
283 | evxor rD3,rD3,rW5 | ||
284 | EAD(rD0, 1) | ||
285 | evmergehi rD2,rD2,rD3 | ||
286 | LWH(rW3, 8) | ||
287 | addi rKP,rKP,32 | ||
288 | bdnz ppc_decrypt_block_loop | ||
289 | LAH(rW1, rD3, 0, 12) | ||
290 | LAL(rW0, rD2, 0, 12) | ||
291 | LAH(rW2, rD2, 1, 8) | ||
292 | LAL(rW2, rD3, 1, 8) | ||
293 | LAH(rW4, rD3, 2, 4) | ||
294 | LAL(rW4, rD0, 2, 4) | ||
295 | LAL(rW6, rD1, 3, 0) | ||
296 | LAH(rW5, rD1, 2, 4) | ||
297 | LAH(rW7, rD2, 3, 0) | ||
298 | LAL(rW7, rD3, 3, 0) | ||
299 | LAL(rW3, rD1, 1, 8) | ||
300 | evldw rD1,16(rKP) | ||
301 | EAD(rD0, 0) | ||
302 | evxor rW4,rW4,rW6 | ||
303 | LWL(rW1, 12) | ||
304 | evxor rW0,rW0,rW4 | ||
305 | EAD(rD2, 2) | ||
306 | evxor rW0,rW0,rW2 | ||
307 | LWL(rW5, 4) | ||
308 | evxor rD1,rD1,rW0 | ||
309 | evldw rD3,24(rKP) | ||
310 | evmergehi rD0,rD0,rD1 | ||
311 | DAD(rD1, 0) | ||
312 | evxor rW3,rW3,rW7 | ||
313 | LBD(rW0) | ||
314 | evxor rW3,rW3,rW1 | ||
315 | DAD(rD0, 1) | ||
316 | evxor rD3,rD3,rW3 | ||
317 | LBD(rW6) | ||
318 | evxor rD3,rD3,rW5 | ||
319 | DAD(rD0, 0) | ||
320 | evmergehi rD2,rD2,rD3 | ||
321 | LBD(rW3) | ||
322 | LAD(rW2, rD3, 0) | ||
323 | LAD(rW1, rD2, 0) | ||
324 | LAD(rW4, rD2, 1) | ||
325 | LAD(rW5, rD3, 1) | ||
326 | LAD(rW7, rD1, 1) | ||
327 | rlwimi rW0,rW4,8,16,23 | ||
328 | rlwimi rW1,rW5,8,16,23 | ||
329 | LAD(rW4, rD3, 2) | ||
330 | LAD(rW5, rD0, 2) | ||
331 | rlwimi rW2,rW6,8,16,23 | ||
332 | rlwimi rW3,rW7,8,16,23 | ||
333 | LAD(rW6, rD1, 2) | ||
334 | LAD(rW7, rD2, 2) | ||
335 | rlwimi rW0,rW4,16,8,15 | ||
336 | rlwimi rW1,rW5,16,8,15 | ||
337 | LAD(rW4, rD0, 3) | ||
338 | LAD(rW5, rD1, 3) | ||
339 | rlwimi rW2,rW6,16,8,15 | ||
340 | lwz rD0,32(rKP) | ||
341 | rlwimi rW3,rW7,16,8,15 | ||
342 | lwz rD1,36(rKP) | ||
343 | LAD(rW6, rD2, 3) | ||
344 | LAD(rW7, rD3, 3) | ||
345 | rlwimi rW0,rW4,24,0,7 | ||
346 | lwz rD2,40(rKP) | ||
347 | rlwimi rW1,rW5,24,0,7 | ||
348 | lwz rD3,44(rKP) | ||
349 | rlwimi rW2,rW6,24,0,7 | ||
350 | rlwimi rW3,rW7,24,0,7 | ||
351 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c new file mode 100644 index 000000000000..bd5e63f72ad4 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-glue.c | |||
@@ -0,0 +1,512 @@ | |||
1 | /* | ||
2 | * Glue code for AES implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. The assembler module takes care | ||
5 | * about the SPE registers so it can run from interrupt context. | ||
6 | * | ||
7 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <crypto/aes.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/crypto.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | #include <asm/switch_to.h> | ||
24 | #include <crypto/algapi.h> | ||
25 | |||
26 | /* | ||
27 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
28 | * between preempt_disable() and preempt_enable(). e500 cores can issue two | ||
29 | * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32 | ||
30 | * bit unit (SU2). One of these can be a memory access that is executed via | ||
31 | * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per | ||
32 | * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data | ||
33 | * will need an estimated maximum of 20,000 cycles. Headroom for cache misses | ||
34 | * included. Even with the low end model clocked at 667 MHz this equals to a | ||
35 | * critical time window of less than 30us. The value has been choosen to | ||
36 | * process a 512 byte disk block in one or a large 1400 bytes IPsec network | ||
37 | * packet in two runs. | ||
38 | * | ||
39 | */ | ||
40 | #define MAX_BYTES 768 | ||
41 | |||
42 | struct ppc_aes_ctx { | ||
43 | u32 key_enc[AES_MAX_KEYLENGTH_U32]; | ||
44 | u32 key_dec[AES_MAX_KEYLENGTH_U32]; | ||
45 | u32 rounds; | ||
46 | }; | ||
47 | |||
48 | struct ppc_xts_ctx { | ||
49 | u32 key_enc[AES_MAX_KEYLENGTH_U32]; | ||
50 | u32 key_dec[AES_MAX_KEYLENGTH_U32]; | ||
51 | u32 key_twk[AES_MAX_KEYLENGTH_U32]; | ||
52 | u32 rounds; | ||
53 | }; | ||
54 | |||
55 | extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds); | ||
56 | extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds); | ||
57 | extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
58 | u32 bytes); | ||
59 | extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
60 | u32 bytes); | ||
61 | extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
62 | u32 bytes, u8 *iv); | ||
63 | extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
64 | u32 bytes, u8 *iv); | ||
65 | extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
66 | u32 bytes, u8 *iv); | ||
67 | extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
68 | u32 bytes, u8 *iv, u32 *key_twk); | ||
69 | extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
70 | u32 bytes, u8 *iv, u32 *key_twk); | ||
71 | |||
72 | extern void ppc_expand_key_128(u32 *key_enc, const u8 *key); | ||
73 | extern void ppc_expand_key_192(u32 *key_enc, const u8 *key); | ||
74 | extern void ppc_expand_key_256(u32 *key_enc, const u8 *key); | ||
75 | |||
76 | extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc, | ||
77 | unsigned int key_len); | ||
78 | |||
79 | static void spe_begin(void) | ||
80 | { | ||
81 | /* disable preemption and save users SPE registers if required */ | ||
82 | preempt_disable(); | ||
83 | enable_kernel_spe(); | ||
84 | } | ||
85 | |||
86 | static void spe_end(void) | ||
87 | { | ||
88 | /* reenable preemption */ | ||
89 | preempt_enable(); | ||
90 | } | ||
91 | |||
92 | static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
93 | unsigned int key_len) | ||
94 | { | ||
95 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
96 | |||
97 | if (key_len != AES_KEYSIZE_128 && | ||
98 | key_len != AES_KEYSIZE_192 && | ||
99 | key_len != AES_KEYSIZE_256) { | ||
100 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
101 | return -EINVAL; | ||
102 | } | ||
103 | |||
104 | switch (key_len) { | ||
105 | case AES_KEYSIZE_128: | ||
106 | ctx->rounds = 4; | ||
107 | ppc_expand_key_128(ctx->key_enc, in_key); | ||
108 | break; | ||
109 | case AES_KEYSIZE_192: | ||
110 | ctx->rounds = 5; | ||
111 | ppc_expand_key_192(ctx->key_enc, in_key); | ||
112 | break; | ||
113 | case AES_KEYSIZE_256: | ||
114 | ctx->rounds = 6; | ||
115 | ppc_expand_key_256(ctx->key_enc, in_key); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); | ||
120 | |||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
125 | unsigned int key_len) | ||
126 | { | ||
127 | struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
128 | |||
129 | key_len >>= 1; | ||
130 | |||
131 | if (key_len != AES_KEYSIZE_128 && | ||
132 | key_len != AES_KEYSIZE_192 && | ||
133 | key_len != AES_KEYSIZE_256) { | ||
134 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
135 | return -EINVAL; | ||
136 | } | ||
137 | |||
138 | switch (key_len) { | ||
139 | case AES_KEYSIZE_128: | ||
140 | ctx->rounds = 4; | ||
141 | ppc_expand_key_128(ctx->key_enc, in_key); | ||
142 | ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128); | ||
143 | break; | ||
144 | case AES_KEYSIZE_192: | ||
145 | ctx->rounds = 5; | ||
146 | ppc_expand_key_192(ctx->key_enc, in_key); | ||
147 | ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192); | ||
148 | break; | ||
149 | case AES_KEYSIZE_256: | ||
150 | ctx->rounds = 6; | ||
151 | ppc_expand_key_256(ctx->key_enc, in_key); | ||
152 | ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256); | ||
153 | break; | ||
154 | } | ||
155 | |||
156 | ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
162 | { | ||
163 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
164 | |||
165 | spe_begin(); | ||
166 | ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds); | ||
167 | spe_end(); | ||
168 | } | ||
169 | |||
170 | static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
171 | { | ||
172 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
173 | |||
174 | spe_begin(); | ||
175 | ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds); | ||
176 | spe_end(); | ||
177 | } | ||
178 | |||
179 | static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
183 | struct blkcipher_walk walk; | ||
184 | unsigned int ubytes; | ||
185 | int err; | ||
186 | |||
187 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
188 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
189 | err = blkcipher_walk_virt(desc, &walk); | ||
190 | |||
191 | while ((nbytes = walk.nbytes)) { | ||
192 | ubytes = nbytes > MAX_BYTES ? | ||
193 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
194 | nbytes -= ubytes; | ||
195 | |||
196 | spe_begin(); | ||
197 | ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, | ||
198 | ctx->key_enc, ctx->rounds, nbytes); | ||
199 | spe_end(); | ||
200 | |||
201 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
202 | } | ||
203 | |||
204 | return err; | ||
205 | } | ||
206 | |||
207 | static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
208 | struct scatterlist *src, unsigned int nbytes) | ||
209 | { | ||
210 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
211 | struct blkcipher_walk walk; | ||
212 | unsigned int ubytes; | ||
213 | int err; | ||
214 | |||
215 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
216 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
217 | err = blkcipher_walk_virt(desc, &walk); | ||
218 | |||
219 | while ((nbytes = walk.nbytes)) { | ||
220 | ubytes = nbytes > MAX_BYTES ? | ||
221 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
222 | nbytes -= ubytes; | ||
223 | |||
224 | spe_begin(); | ||
225 | ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, | ||
226 | ctx->key_dec, ctx->rounds, nbytes); | ||
227 | spe_end(); | ||
228 | |||
229 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
230 | } | ||
231 | |||
232 | return err; | ||
233 | } | ||
234 | |||
235 | static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
236 | struct scatterlist *src, unsigned int nbytes) | ||
237 | { | ||
238 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
239 | struct blkcipher_walk walk; | ||
240 | unsigned int ubytes; | ||
241 | int err; | ||
242 | |||
243 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
244 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
245 | err = blkcipher_walk_virt(desc, &walk); | ||
246 | |||
247 | while ((nbytes = walk.nbytes)) { | ||
248 | ubytes = nbytes > MAX_BYTES ? | ||
249 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
250 | nbytes -= ubytes; | ||
251 | |||
252 | spe_begin(); | ||
253 | ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, | ||
254 | ctx->key_enc, ctx->rounds, nbytes, walk.iv); | ||
255 | spe_end(); | ||
256 | |||
257 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
258 | } | ||
259 | |||
260 | return err; | ||
261 | } | ||
262 | |||
263 | static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
264 | struct scatterlist *src, unsigned int nbytes) | ||
265 | { | ||
266 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
267 | struct blkcipher_walk walk; | ||
268 | unsigned int ubytes; | ||
269 | int err; | ||
270 | |||
271 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
272 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
273 | err = blkcipher_walk_virt(desc, &walk); | ||
274 | |||
275 | while ((nbytes = walk.nbytes)) { | ||
276 | ubytes = nbytes > MAX_BYTES ? | ||
277 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
278 | nbytes -= ubytes; | ||
279 | |||
280 | spe_begin(); | ||
281 | ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, | ||
282 | ctx->key_dec, ctx->rounds, nbytes, walk.iv); | ||
283 | spe_end(); | ||
284 | |||
285 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
286 | } | ||
287 | |||
288 | return err; | ||
289 | } | ||
290 | |||
291 | static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | struct blkcipher_walk walk; | ||
296 | unsigned int pbytes, ubytes; | ||
297 | int err; | ||
298 | |||
299 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
300 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
301 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
302 | |||
303 | while ((pbytes = walk.nbytes)) { | ||
304 | pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes; | ||
305 | pbytes = pbytes == nbytes ? | ||
306 | nbytes : pbytes & ~(AES_BLOCK_SIZE - 1); | ||
307 | ubytes = walk.nbytes - pbytes; | ||
308 | |||
309 | spe_begin(); | ||
310 | ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr, | ||
311 | ctx->key_enc, ctx->rounds, pbytes , walk.iv); | ||
312 | spe_end(); | ||
313 | |||
314 | nbytes -= pbytes; | ||
315 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
316 | } | ||
317 | |||
318 | return err; | ||
319 | } | ||
320 | |||
321 | static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
322 | struct scatterlist *src, unsigned int nbytes) | ||
323 | { | ||
324 | struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
325 | struct blkcipher_walk walk; | ||
326 | unsigned int ubytes; | ||
327 | int err; | ||
328 | u32 *twk; | ||
329 | |||
330 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
331 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
332 | err = blkcipher_walk_virt(desc, &walk); | ||
333 | twk = ctx->key_twk; | ||
334 | |||
335 | while ((nbytes = walk.nbytes)) { | ||
336 | ubytes = nbytes > MAX_BYTES ? | ||
337 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
338 | nbytes -= ubytes; | ||
339 | |||
340 | spe_begin(); | ||
341 | ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, | ||
342 | ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk); | ||
343 | spe_end(); | ||
344 | |||
345 | twk = NULL; | ||
346 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
347 | } | ||
348 | |||
349 | return err; | ||
350 | } | ||
351 | |||
352 | static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
353 | struct scatterlist *src, unsigned int nbytes) | ||
354 | { | ||
355 | struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
356 | struct blkcipher_walk walk; | ||
357 | unsigned int ubytes; | ||
358 | int err; | ||
359 | u32 *twk; | ||
360 | |||
361 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
362 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
363 | err = blkcipher_walk_virt(desc, &walk); | ||
364 | twk = ctx->key_twk; | ||
365 | |||
366 | while ((nbytes = walk.nbytes)) { | ||
367 | ubytes = nbytes > MAX_BYTES ? | ||
368 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
369 | nbytes -= ubytes; | ||
370 | |||
371 | spe_begin(); | ||
372 | ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, | ||
373 | ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk); | ||
374 | spe_end(); | ||
375 | |||
376 | twk = NULL; | ||
377 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
378 | } | ||
379 | |||
380 | return err; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen | ||
385 | * because the e500 platform can handle unaligned reads/writes very efficently. | ||
386 | * This improves IPsec thoughput by another few percent. Additionally we assume | ||
387 | * that AES context is always aligned to at least 8 bytes because it is created | ||
388 | * with kmalloc() in the crypto infrastructure | ||
389 | * | ||
390 | */ | ||
391 | static struct crypto_alg aes_algs[] = { { | ||
392 | .cra_name = "aes", | ||
393 | .cra_driver_name = "aes-ppc-spe", | ||
394 | .cra_priority = 300, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
396 | .cra_blocksize = AES_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_module = THIS_MODULE, | ||
400 | .cra_u = { | ||
401 | .cipher = { | ||
402 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
403 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
404 | .cia_setkey = ppc_aes_setkey, | ||
405 | .cia_encrypt = ppc_aes_encrypt, | ||
406 | .cia_decrypt = ppc_aes_decrypt | ||
407 | } | ||
408 | } | ||
409 | }, { | ||
410 | .cra_name = "ecb(aes)", | ||
411 | .cra_driver_name = "ecb-ppc-spe", | ||
412 | .cra_priority = 300, | ||
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
414 | .cra_blocksize = AES_BLOCK_SIZE, | ||
415 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
416 | .cra_alignmask = 0, | ||
417 | .cra_type = &crypto_blkcipher_type, | ||
418 | .cra_module = THIS_MODULE, | ||
419 | .cra_u = { | ||
420 | .blkcipher = { | ||
421 | .min_keysize = AES_MIN_KEY_SIZE, | ||
422 | .max_keysize = AES_MAX_KEY_SIZE, | ||
423 | .ivsize = AES_BLOCK_SIZE, | ||
424 | .setkey = ppc_aes_setkey, | ||
425 | .encrypt = ppc_ecb_encrypt, | ||
426 | .decrypt = ppc_ecb_decrypt, | ||
427 | } | ||
428 | } | ||
429 | }, { | ||
430 | .cra_name = "cbc(aes)", | ||
431 | .cra_driver_name = "cbc-ppc-spe", | ||
432 | .cra_priority = 300, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
434 | .cra_blocksize = AES_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
436 | .cra_alignmask = 0, | ||
437 | .cra_type = &crypto_blkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_u = { | ||
440 | .blkcipher = { | ||
441 | .min_keysize = AES_MIN_KEY_SIZE, | ||
442 | .max_keysize = AES_MAX_KEY_SIZE, | ||
443 | .ivsize = AES_BLOCK_SIZE, | ||
444 | .setkey = ppc_aes_setkey, | ||
445 | .encrypt = ppc_cbc_encrypt, | ||
446 | .decrypt = ppc_cbc_decrypt, | ||
447 | } | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "ctr(aes)", | ||
451 | .cra_driver_name = "ctr-ppc-spe", | ||
452 | .cra_priority = 300, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
454 | .cra_blocksize = 1, | ||
455 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
456 | .cra_alignmask = 0, | ||
457 | .cra_type = &crypto_blkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_u = { | ||
460 | .blkcipher = { | ||
461 | .min_keysize = AES_MIN_KEY_SIZE, | ||
462 | .max_keysize = AES_MAX_KEY_SIZE, | ||
463 | .ivsize = AES_BLOCK_SIZE, | ||
464 | .setkey = ppc_aes_setkey, | ||
465 | .encrypt = ppc_ctr_crypt, | ||
466 | .decrypt = ppc_ctr_crypt, | ||
467 | } | ||
468 | } | ||
469 | }, { | ||
470 | .cra_name = "xts(aes)", | ||
471 | .cra_driver_name = "xts-ppc-spe", | ||
472 | .cra_priority = 300, | ||
473 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
474 | .cra_blocksize = AES_BLOCK_SIZE, | ||
475 | .cra_ctxsize = sizeof(struct ppc_xts_ctx), | ||
476 | .cra_alignmask = 0, | ||
477 | .cra_type = &crypto_blkcipher_type, | ||
478 | .cra_module = THIS_MODULE, | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = AES_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = AES_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = AES_BLOCK_SIZE, | ||
484 | .setkey = ppc_xts_setkey, | ||
485 | .encrypt = ppc_xts_encrypt, | ||
486 | .decrypt = ppc_xts_decrypt, | ||
487 | } | ||
488 | } | ||
489 | } }; | ||
490 | |||
491 | static int __init ppc_aes_mod_init(void) | ||
492 | { | ||
493 | return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
494 | } | ||
495 | |||
496 | static void __exit ppc_aes_mod_fini(void) | ||
497 | { | ||
498 | crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
499 | } | ||
500 | |||
501 | module_init(ppc_aes_mod_init); | ||
502 | module_exit(ppc_aes_mod_fini); | ||
503 | |||
504 | MODULE_LICENSE("GPL"); | ||
505 | MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized"); | ||
506 | |||
507 | MODULE_ALIAS_CRYPTO("aes"); | ||
508 | MODULE_ALIAS_CRYPTO("ecb(aes)"); | ||
509 | MODULE_ALIAS_CRYPTO("cbc(aes)"); | ||
510 | MODULE_ALIAS_CRYPTO("ctr(aes)"); | ||
511 | MODULE_ALIAS_CRYPTO("xts(aes)"); | ||
512 | MODULE_ALIAS_CRYPTO("aes-ppc-spe"); | ||
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S new file mode 100644 index 000000000000..be8090f3d700 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-keys.S | |||
@@ -0,0 +1,283 @@ | |||
1 | /* | ||
2 | * Key handling functions for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <asm/ppc_asm.h> | ||
14 | |||
15 | #ifdef __BIG_ENDIAN__ | ||
16 | #define LOAD_KEY(d, s, off) \ | ||
17 | lwz d,off(s); | ||
18 | #else | ||
19 | #define LOAD_KEY(d, s, off) \ | ||
20 | li r0,off; \ | ||
21 | lwbrx d,s,r0; | ||
22 | #endif | ||
23 | |||
24 | #define INITIALIZE_KEY \ | ||
25 | stwu r1,-32(r1); /* create stack frame */ \ | ||
26 | stw r14,8(r1); /* save registers */ \ | ||
27 | stw r15,12(r1); \ | ||
28 | stw r16,16(r1); | ||
29 | |||
30 | #define FINALIZE_KEY \ | ||
31 | lwz r14,8(r1); /* restore registers */ \ | ||
32 | lwz r15,12(r1); \ | ||
33 | lwz r16,16(r1); \ | ||
34 | xor r5,r5,r5; /* clear sensitive data */ \ | ||
35 | xor r6,r6,r6; \ | ||
36 | xor r7,r7,r7; \ | ||
37 | xor r8,r8,r8; \ | ||
38 | xor r9,r9,r9; \ | ||
39 | xor r10,r10,r10; \ | ||
40 | xor r11,r11,r11; \ | ||
41 | xor r12,r12,r12; \ | ||
42 | addi r1,r1,32; /* cleanup stack */ | ||
43 | |||
44 | #define LS_BOX(r, t1, t2) \ | ||
45 | lis t2,PPC_AES_4K_ENCTAB@h; \ | ||
46 | ori t2,t2,PPC_AES_4K_ENCTAB@l; \ | ||
47 | rlwimi t2,r,4,20,27; \ | ||
48 | lbz t1,8(t2); \ | ||
49 | rlwimi r,t1,0,24,31; \ | ||
50 | rlwimi t2,r,28,20,27; \ | ||
51 | lbz t1,8(t2); \ | ||
52 | rlwimi r,t1,8,16,23; \ | ||
53 | rlwimi t2,r,20,20,27; \ | ||
54 | lbz t1,8(t2); \ | ||
55 | rlwimi r,t1,16,8,15; \ | ||
56 | rlwimi t2,r,12,20,27; \ | ||
57 | lbz t1,8(t2); \ | ||
58 | rlwimi r,t1,24,0,7; | ||
59 | |||
60 | #define GF8_MUL(out, in, t1, t2) \ | ||
61 | lis t1,0x8080; /* multiplication in GF8 */ \ | ||
62 | ori t1,t1,0x8080; \ | ||
63 | and t1,t1,in; \ | ||
64 | srwi t1,t1,7; \ | ||
65 | mulli t1,t1,0x1b; \ | ||
66 | lis t2,0x7f7f; \ | ||
67 | ori t2,t2,0x7f7f; \ | ||
68 | and t2,t2,in; \ | ||
69 | slwi t2,t2,1; \ | ||
70 | xor out,t1,t2; | ||
71 | |||
72 | /* | ||
73 | * ppc_expand_key_128(u32 *key_enc, const u8 *key) | ||
74 | * | ||
75 | * Expand 128 bit key into 176 bytes encryption key. It consists of | ||
76 | * key itself plus 10 rounds with 16 bytes each | ||
77 | * | ||
78 | */ | ||
79 | _GLOBAL(ppc_expand_key_128) | ||
80 | INITIALIZE_KEY | ||
81 | LOAD_KEY(r5,r4,0) | ||
82 | LOAD_KEY(r6,r4,4) | ||
83 | LOAD_KEY(r7,r4,8) | ||
84 | LOAD_KEY(r8,r4,12) | ||
85 | stw r5,0(r3) /* key[0..3] = input data */ | ||
86 | stw r6,4(r3) | ||
87 | stw r7,8(r3) | ||
88 | stw r8,12(r3) | ||
89 | li r16,10 /* 10 expansion rounds */ | ||
90 | lis r0,0x0100 /* RCO(1) */ | ||
91 | ppc_expand_128_loop: | ||
92 | addi r3,r3,16 | ||
93 | mr r14,r8 /* apply LS_BOX to 4th temp */ | ||
94 | rotlwi r14,r14,8 | ||
95 | LS_BOX(r14, r15, r4) | ||
96 | xor r14,r14,r0 | ||
97 | xor r5,r5,r14 /* xor next 4 keys */ | ||
98 | xor r6,r6,r5 | ||
99 | xor r7,r7,r6 | ||
100 | xor r8,r8,r7 | ||
101 | stw r5,0(r3) /* store next 4 keys */ | ||
102 | stw r6,4(r3) | ||
103 | stw r7,8(r3) | ||
104 | stw r8,12(r3) | ||
105 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */ | ||
106 | subi r16,r16,1 | ||
107 | cmpwi r16,0 | ||
108 | bt eq,ppc_expand_128_end | ||
109 | b ppc_expand_128_loop | ||
110 | ppc_expand_128_end: | ||
111 | FINALIZE_KEY | ||
112 | blr | ||
113 | |||
114 | /* | ||
115 | * ppc_expand_key_192(u32 *key_enc, const u8 *key) | ||
116 | * | ||
117 | * Expand 192 bit key into 208 bytes encryption key. It consists of key | ||
118 | * itself plus 12 rounds with 16 bytes each | ||
119 | * | ||
120 | */ | ||
121 | _GLOBAL(ppc_expand_key_192) | ||
122 | INITIALIZE_KEY | ||
123 | LOAD_KEY(r5,r4,0) | ||
124 | LOAD_KEY(r6,r4,4) | ||
125 | LOAD_KEY(r7,r4,8) | ||
126 | LOAD_KEY(r8,r4,12) | ||
127 | LOAD_KEY(r9,r4,16) | ||
128 | LOAD_KEY(r10,r4,20) | ||
129 | stw r5,0(r3) | ||
130 | stw r6,4(r3) | ||
131 | stw r7,8(r3) | ||
132 | stw r8,12(r3) | ||
133 | stw r9,16(r3) | ||
134 | stw r10,20(r3) | ||
135 | li r16,8 /* 8 expansion rounds */ | ||
136 | lis r0,0x0100 /* RCO(1) */ | ||
137 | ppc_expand_192_loop: | ||
138 | addi r3,r3,24 | ||
139 | mr r14,r10 /* apply LS_BOX to 6th temp */ | ||
140 | rotlwi r14,r14,8 | ||
141 | LS_BOX(r14, r15, r4) | ||
142 | xor r14,r14,r0 | ||
143 | xor r5,r5,r14 /* xor next 6 keys */ | ||
144 | xor r6,r6,r5 | ||
145 | xor r7,r7,r6 | ||
146 | xor r8,r8,r7 | ||
147 | xor r9,r9,r8 | ||
148 | xor r10,r10,r9 | ||
149 | stw r5,0(r3) | ||
150 | stw r6,4(r3) | ||
151 | stw r7,8(r3) | ||
152 | stw r8,12(r3) | ||
153 | subi r16,r16,1 | ||
154 | cmpwi r16,0 /* last round early kick out */ | ||
155 | bt eq,ppc_expand_192_end | ||
156 | stw r9,16(r3) | ||
157 | stw r10,20(r3) | ||
158 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */ | ||
159 | b ppc_expand_192_loop | ||
160 | ppc_expand_192_end: | ||
161 | FINALIZE_KEY | ||
162 | blr | ||
163 | |||
164 | /* | ||
165 | * ppc_expand_key_256(u32 *key_enc, const u8 *key) | ||
166 | * | ||
167 | * Expand 256 bit key into 240 bytes encryption key. It consists of key | ||
168 | * itself plus 14 rounds with 16 bytes each | ||
169 | * | ||
170 | */ | ||
171 | _GLOBAL(ppc_expand_key_256) | ||
172 | INITIALIZE_KEY | ||
173 | LOAD_KEY(r5,r4,0) | ||
174 | LOAD_KEY(r6,r4,4) | ||
175 | LOAD_KEY(r7,r4,8) | ||
176 | LOAD_KEY(r8,r4,12) | ||
177 | LOAD_KEY(r9,r4,16) | ||
178 | LOAD_KEY(r10,r4,20) | ||
179 | LOAD_KEY(r11,r4,24) | ||
180 | LOAD_KEY(r12,r4,28) | ||
181 | stw r5,0(r3) | ||
182 | stw r6,4(r3) | ||
183 | stw r7,8(r3) | ||
184 | stw r8,12(r3) | ||
185 | stw r9,16(r3) | ||
186 | stw r10,20(r3) | ||
187 | stw r11,24(r3) | ||
188 | stw r12,28(r3) | ||
189 | li r16,7 /* 7 expansion rounds */ | ||
190 | lis r0,0x0100 /* RCO(1) */ | ||
191 | ppc_expand_256_loop: | ||
192 | addi r3,r3,32 | ||
193 | mr r14,r12 /* apply LS_BOX to 8th temp */ | ||
194 | rotlwi r14,r14,8 | ||
195 | LS_BOX(r14, r15, r4) | ||
196 | xor r14,r14,r0 | ||
197 | xor r5,r5,r14 /* xor 4 keys */ | ||
198 | xor r6,r6,r5 | ||
199 | xor r7,r7,r6 | ||
200 | xor r8,r8,r7 | ||
201 | mr r14,r8 | ||
202 | LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */ | ||
203 | xor r9,r9,r14 /* xor 4 keys */ | ||
204 | xor r10,r10,r9 | ||
205 | xor r11,r11,r10 | ||
206 | xor r12,r12,r11 | ||
207 | stw r5,0(r3) | ||
208 | stw r6,4(r3) | ||
209 | stw r7,8(r3) | ||
210 | stw r8,12(r3) | ||
211 | subi r16,r16,1 | ||
212 | cmpwi r16,0 /* last round early kick out */ | ||
213 | bt eq,ppc_expand_256_end | ||
214 | stw r9,16(r3) | ||
215 | stw r10,20(r3) | ||
216 | stw r11,24(r3) | ||
217 | stw r12,28(r3) | ||
218 | GF8_MUL(r0, r0, r4, r14) | ||
219 | b ppc_expand_256_loop | ||
220 | ppc_expand_256_end: | ||
221 | FINALIZE_KEY | ||
222 | blr | ||
223 | |||
224 | /* | ||
225 | * ppc_generate_decrypt_key: derive decryption key from encryption key | ||
226 | * number of bytes to handle are calculated from length of key (16/24/32) | ||
227 | * | ||
228 | */ | ||
229 | _GLOBAL(ppc_generate_decrypt_key) | ||
230 | addi r6,r5,24 | ||
231 | slwi r6,r6,2 | ||
232 | lwzx r7,r4,r6 /* first/last 4 words are same */ | ||
233 | stw r7,0(r3) | ||
234 | lwz r7,0(r4) | ||
235 | stwx r7,r3,r6 | ||
236 | addi r6,r6,4 | ||
237 | lwzx r7,r4,r6 | ||
238 | stw r7,4(r3) | ||
239 | lwz r7,4(r4) | ||
240 | stwx r7,r3,r6 | ||
241 | addi r6,r6,4 | ||
242 | lwzx r7,r4,r6 | ||
243 | stw r7,8(r3) | ||
244 | lwz r7,8(r4) | ||
245 | stwx r7,r3,r6 | ||
246 | addi r6,r6,4 | ||
247 | lwzx r7,r4,r6 | ||
248 | stw r7,12(r3) | ||
249 | lwz r7,12(r4) | ||
250 | stwx r7,r3,r6 | ||
251 | addi r3,r3,16 | ||
252 | add r4,r4,r6 | ||
253 | subi r4,r4,28 | ||
254 | addi r5,r5,20 | ||
255 | srwi r5,r5,2 | ||
256 | ppc_generate_decrypt_block: | ||
257 | li r6,4 | ||
258 | mtctr r6 | ||
259 | ppc_generate_decrypt_word: | ||
260 | lwz r6,0(r4) | ||
261 | GF8_MUL(r7, r6, r0, r7) | ||
262 | GF8_MUL(r8, r7, r0, r8) | ||
263 | GF8_MUL(r9, r8, r0, r9) | ||
264 | xor r10,r9,r6 | ||
265 | xor r11,r7,r8 | ||
266 | xor r11,r11,r9 | ||
267 | xor r12,r7,r10 | ||
268 | rotrwi r12,r12,24 | ||
269 | xor r11,r11,r12 | ||
270 | xor r12,r8,r10 | ||
271 | rotrwi r12,r12,16 | ||
272 | xor r11,r11,r12 | ||
273 | rotrwi r12,r10,8 | ||
274 | xor r11,r11,r12 | ||
275 | stw r11,0(r3) | ||
276 | addi r3,r3,4 | ||
277 | addi r4,r4,4 | ||
278 | bdnz ppc_generate_decrypt_word | ||
279 | subi r4,r4,32 | ||
280 | subi r5,r5,1 | ||
281 | cmpwi r5,0 | ||
282 | bt gt,ppc_generate_decrypt_block | ||
283 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S new file mode 100644 index 000000000000..ad48032ca8e0 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-modes.S | |||
@@ -0,0 +1,630 @@ | |||
1 | /* | ||
2 | * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <asm/ppc_asm.h> | ||
14 | #include "aes-spe-regs.h" | ||
15 | |||
16 | #ifdef __BIG_ENDIAN__ /* Macros for big endian builds */ | ||
17 | |||
18 | #define LOAD_DATA(reg, off) \ | ||
19 | lwz reg,off(rSP); /* load with offset */ | ||
20 | #define SAVE_DATA(reg, off) \ | ||
21 | stw reg,off(rDP); /* save with offset */ | ||
22 | #define NEXT_BLOCK \ | ||
23 | addi rSP,rSP,16; /* increment pointers per bloc */ \ | ||
24 | addi rDP,rDP,16; | ||
25 | #define LOAD_IV(reg, off) \ | ||
26 | lwz reg,off(rIP); /* IV loading with offset */ | ||
27 | #define SAVE_IV(reg, off) \ | ||
28 | stw reg,off(rIP); /* IV saving with offset */ | ||
29 | #define START_IV /* nothing to reset */ | ||
30 | #define CBC_DEC 16 /* CBC decrement per block */ | ||
31 | #define CTR_DEC 1 /* CTR decrement one byte */ | ||
32 | |||
33 | #else /* Macros for little endian */ | ||
34 | |||
35 | #define LOAD_DATA(reg, off) \ | ||
36 | lwbrx reg,0,rSP; /* load reversed */ \ | ||
37 | addi rSP,rSP,4; /* and increment pointer */ | ||
38 | #define SAVE_DATA(reg, off) \ | ||
39 | stwbrx reg,0,rDP; /* save reversed */ \ | ||
40 | addi rDP,rDP,4; /* and increment pointer */ | ||
41 | #define NEXT_BLOCK /* nothing todo */ | ||
42 | #define LOAD_IV(reg, off) \ | ||
43 | lwbrx reg,0,rIP; /* load reversed */ \ | ||
44 | addi rIP,rIP,4; /* and increment pointer */ | ||
45 | #define SAVE_IV(reg, off) \ | ||
46 | stwbrx reg,0,rIP; /* load reversed */ \ | ||
47 | addi rIP,rIP,4; /* and increment pointer */ | ||
48 | #define START_IV \ | ||
49 | subi rIP,rIP,16; /* must reset pointer */ | ||
50 | #define CBC_DEC 32 /* 2 blocks because of incs */ | ||
51 | #define CTR_DEC 17 /* 1 block because of incs */ | ||
52 | |||
53 | #endif | ||
54 | |||
55 | #define SAVE_0_REGS | ||
56 | #define LOAD_0_REGS | ||
57 | |||
58 | #define SAVE_4_REGS \ | ||
59 | stw rI0,96(r1); /* save 32 bit registers */ \ | ||
60 | stw rI1,100(r1); \ | ||
61 | stw rI2,104(r1); \ | ||
62 | stw rI3,108(r1); | ||
63 | |||
64 | #define LOAD_4_REGS \ | ||
65 | lwz rI0,96(r1); /* restore 32 bit registers */ \ | ||
66 | lwz rI1,100(r1); \ | ||
67 | lwz rI2,104(r1); \ | ||
68 | lwz rI3,108(r1); | ||
69 | |||
70 | #define SAVE_8_REGS \ | ||
71 | SAVE_4_REGS \ | ||
72 | stw rG0,112(r1); /* save 32 bit registers */ \ | ||
73 | stw rG1,116(r1); \ | ||
74 | stw rG2,120(r1); \ | ||
75 | stw rG3,124(r1); | ||
76 | |||
77 | #define LOAD_8_REGS \ | ||
78 | LOAD_4_REGS \ | ||
79 | lwz rG0,112(r1); /* restore 32 bit registers */ \ | ||
80 | lwz rG1,116(r1); \ | ||
81 | lwz rG2,120(r1); \ | ||
82 | lwz rG3,124(r1); | ||
83 | |||
84 | #define INITIALIZE_CRYPT(tab,nr32bitregs) \ | ||
85 | mflr r0; \ | ||
86 | stwu r1,-160(r1); /* create stack frame */ \ | ||
87 | lis rT0,tab@h; /* en-/decryption table pointer */ \ | ||
88 | stw r0,8(r1); /* save link register */ \ | ||
89 | ori rT0,rT0,tab@l; \ | ||
90 | evstdw r14,16(r1); \ | ||
91 | mr rKS,rKP; \ | ||
92 | evstdw r15,24(r1); /* We must save non volatile */ \ | ||
93 | evstdw r16,32(r1); /* registers. Take the chance */ \ | ||
94 | evstdw r17,40(r1); /* and save the SPE part too */ \ | ||
95 | evstdw r18,48(r1); \ | ||
96 | evstdw r19,56(r1); \ | ||
97 | evstdw r20,64(r1); \ | ||
98 | evstdw r21,72(r1); \ | ||
99 | evstdw r22,80(r1); \ | ||
100 | evstdw r23,88(r1); \ | ||
101 | SAVE_##nr32bitregs##_REGS | ||
102 | |||
103 | #define FINALIZE_CRYPT(nr32bitregs) \ | ||
104 | lwz r0,8(r1); \ | ||
105 | evldw r14,16(r1); /* restore SPE registers */ \ | ||
106 | evldw r15,24(r1); \ | ||
107 | evldw r16,32(r1); \ | ||
108 | evldw r17,40(r1); \ | ||
109 | evldw r18,48(r1); \ | ||
110 | evldw r19,56(r1); \ | ||
111 | evldw r20,64(r1); \ | ||
112 | evldw r21,72(r1); \ | ||
113 | evldw r22,80(r1); \ | ||
114 | evldw r23,88(r1); \ | ||
115 | LOAD_##nr32bitregs##_REGS \ | ||
116 | mtlr r0; /* restore link register */ \ | ||
117 | xor r0,r0,r0; \ | ||
118 | stw r0,16(r1); /* delete sensitive data */ \ | ||
119 | stw r0,24(r1); /* that we might have pushed */ \ | ||
120 | stw r0,32(r1); /* from other context that runs */ \ | ||
121 | stw r0,40(r1); /* the same code */ \ | ||
122 | stw r0,48(r1); \ | ||
123 | stw r0,56(r1); \ | ||
124 | stw r0,64(r1); \ | ||
125 | stw r0,72(r1); \ | ||
126 | stw r0,80(r1); \ | ||
127 | stw r0,88(r1); \ | ||
128 | addi r1,r1,160; /* cleanup stack frame */ | ||
129 | |||
130 | #define ENDIAN_SWAP(t0, t1, s0, s1) \ | ||
131 | rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \ | ||
132 | rotrwi t1,s1,8; \ | ||
133 | rlwimi t0,s0,8,8,15; \ | ||
134 | rlwimi t1,s1,8,8,15; \ | ||
135 | rlwimi t0,s0,8,24,31; \ | ||
136 | rlwimi t1,s1,8,24,31; | ||
137 | |||
138 | #define GF128_MUL(d0, d1, d2, d3, t0) \ | ||
139 | li t0,0x87; /* multiplication in GF128 */ \ | ||
140 | cmpwi d3,-1; \ | ||
141 | iselgt t0,0,t0; \ | ||
142 | rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \ | ||
143 | rotlwi d3,d3,1; \ | ||
144 | rlwimi d2,d1,0,0,0; \ | ||
145 | rotlwi d2,d2,1; \ | ||
146 | rlwimi d1,d0,0,0,0; \ | ||
147 | slwi d0,d0,1; /* shift left 128 bit */ \ | ||
148 | rotlwi d1,d1,1; \ | ||
149 | xor d0,d0,t0; | ||
150 | |||
151 | #define START_KEY(d0, d1, d2, d3) \ | ||
152 | lwz rW0,0(rKP); \ | ||
153 | mtctr rRR; \ | ||
154 | lwz rW1,4(rKP); \ | ||
155 | lwz rW2,8(rKP); \ | ||
156 | lwz rW3,12(rKP); \ | ||
157 | xor rD0,d0,rW0; \ | ||
158 | xor rD1,d1,rW1; \ | ||
159 | xor rD2,d2,rW2; \ | ||
160 | xor rD3,d3,rW3; | ||
161 | |||
162 | /* | ||
163 | * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, | ||
164 | * u32 rounds) | ||
165 | * | ||
166 | * called from glue layer to encrypt a single 16 byte block | ||
167 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
168 | * | ||
169 | */ | ||
170 | _GLOBAL(ppc_encrypt_aes) | ||
171 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) | ||
172 | LOAD_DATA(rD0, 0) | ||
173 | LOAD_DATA(rD1, 4) | ||
174 | LOAD_DATA(rD2, 8) | ||
175 | LOAD_DATA(rD3, 12) | ||
176 | START_KEY(rD0, rD1, rD2, rD3) | ||
177 | bl ppc_encrypt_block | ||
178 | xor rD0,rD0,rW0 | ||
179 | SAVE_DATA(rD0, 0) | ||
180 | xor rD1,rD1,rW1 | ||
181 | SAVE_DATA(rD1, 4) | ||
182 | xor rD2,rD2,rW2 | ||
183 | SAVE_DATA(rD2, 8) | ||
184 | xor rD3,rD3,rW3 | ||
185 | SAVE_DATA(rD3, 12) | ||
186 | FINALIZE_CRYPT(0) | ||
187 | blr | ||
188 | |||
189 | /* | ||
190 | * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, | ||
191 | * u32 rounds) | ||
192 | * | ||
193 | * called from glue layer to decrypt a single 16 byte block | ||
194 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
195 | * | ||
196 | */ | ||
197 | _GLOBAL(ppc_decrypt_aes) | ||
198 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0) | ||
199 | LOAD_DATA(rD0, 0) | ||
200 | addi rT1,rT0,4096 | ||
201 | LOAD_DATA(rD1, 4) | ||
202 | LOAD_DATA(rD2, 8) | ||
203 | LOAD_DATA(rD3, 12) | ||
204 | START_KEY(rD0, rD1, rD2, rD3) | ||
205 | bl ppc_decrypt_block | ||
206 | xor rD0,rD0,rW0 | ||
207 | SAVE_DATA(rD0, 0) | ||
208 | xor rD1,rD1,rW1 | ||
209 | SAVE_DATA(rD1, 4) | ||
210 | xor rD2,rD2,rW2 | ||
211 | SAVE_DATA(rD2, 8) | ||
212 | xor rD3,rD3,rW3 | ||
213 | SAVE_DATA(rD3, 12) | ||
214 | FINALIZE_CRYPT(0) | ||
215 | blr | ||
216 | |||
217 | /* | ||
218 | * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, | ||
219 | * u32 rounds, u32 bytes); | ||
220 | * | ||
221 | * called from glue layer to encrypt multiple blocks via ECB | ||
222 | * Bytes must be larger or equal 16 and only whole blocks are | ||
223 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
224 | * AES256 = 6 | ||
225 | * | ||
226 | */ | ||
227 | _GLOBAL(ppc_encrypt_ecb) | ||
228 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) | ||
229 | ppc_encrypt_ecb_loop: | ||
230 | LOAD_DATA(rD0, 0) | ||
231 | mr rKP,rKS | ||
232 | LOAD_DATA(rD1, 4) | ||
233 | subi rLN,rLN,16 | ||
234 | LOAD_DATA(rD2, 8) | ||
235 | cmpwi rLN,15 | ||
236 | LOAD_DATA(rD3, 12) | ||
237 | START_KEY(rD0, rD1, rD2, rD3) | ||
238 | bl ppc_encrypt_block | ||
239 | xor rD0,rD0,rW0 | ||
240 | SAVE_DATA(rD0, 0) | ||
241 | xor rD1,rD1,rW1 | ||
242 | SAVE_DATA(rD1, 4) | ||
243 | xor rD2,rD2,rW2 | ||
244 | SAVE_DATA(rD2, 8) | ||
245 | xor rD3,rD3,rW3 | ||
246 | SAVE_DATA(rD3, 12) | ||
247 | NEXT_BLOCK | ||
248 | bt gt,ppc_encrypt_ecb_loop | ||
249 | FINALIZE_CRYPT(0) | ||
250 | blr | ||
251 | |||
252 | /* | ||
253 | * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, | ||
254 | * u32 rounds, u32 bytes); | ||
255 | * | ||
256 | * called from glue layer to decrypt multiple blocks via ECB | ||
257 | * Bytes must be larger or equal 16 and only whole blocks are | ||
258 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
259 | * AES256 = 6 | ||
260 | * | ||
261 | */ | ||
262 | _GLOBAL(ppc_decrypt_ecb) | ||
263 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0) | ||
264 | addi rT1,rT0,4096 | ||
265 | ppc_decrypt_ecb_loop: | ||
266 | LOAD_DATA(rD0, 0) | ||
267 | mr rKP,rKS | ||
268 | LOAD_DATA(rD1, 4) | ||
269 | subi rLN,rLN,16 | ||
270 | LOAD_DATA(rD2, 8) | ||
271 | cmpwi rLN,15 | ||
272 | LOAD_DATA(rD3, 12) | ||
273 | START_KEY(rD0, rD1, rD2, rD3) | ||
274 | bl ppc_decrypt_block | ||
275 | xor rD0,rD0,rW0 | ||
276 | SAVE_DATA(rD0, 0) | ||
277 | xor rD1,rD1,rW1 | ||
278 | SAVE_DATA(rD1, 4) | ||
279 | xor rD2,rD2,rW2 | ||
280 | SAVE_DATA(rD2, 8) | ||
281 | xor rD3,rD3,rW3 | ||
282 | SAVE_DATA(rD3, 12) | ||
283 | NEXT_BLOCK | ||
284 | bt gt,ppc_decrypt_ecb_loop | ||
285 | FINALIZE_CRYPT(0) | ||
286 | blr | ||
287 | |||
288 | /* | ||
289 | * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, | ||
290 | * 32 rounds, u32 bytes, u8 *iv); | ||
291 | * | ||
292 | * called from glue layer to encrypt multiple blocks via CBC | ||
293 | * Bytes must be larger or equal 16 and only whole blocks are | ||
294 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
295 | * AES256 = 6 | ||
296 | * | ||
297 | */ | ||
298 | _GLOBAL(ppc_encrypt_cbc) | ||
299 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) | ||
300 | LOAD_IV(rI0, 0) | ||
301 | LOAD_IV(rI1, 4) | ||
302 | LOAD_IV(rI2, 8) | ||
303 | LOAD_IV(rI3, 12) | ||
304 | ppc_encrypt_cbc_loop: | ||
305 | LOAD_DATA(rD0, 0) | ||
306 | mr rKP,rKS | ||
307 | LOAD_DATA(rD1, 4) | ||
308 | subi rLN,rLN,16 | ||
309 | LOAD_DATA(rD2, 8) | ||
310 | cmpwi rLN,15 | ||
311 | LOAD_DATA(rD3, 12) | ||
312 | xor rD0,rD0,rI0 | ||
313 | xor rD1,rD1,rI1 | ||
314 | xor rD2,rD2,rI2 | ||
315 | xor rD3,rD3,rI3 | ||
316 | START_KEY(rD0, rD1, rD2, rD3) | ||
317 | bl ppc_encrypt_block | ||
318 | xor rI0,rD0,rW0 | ||
319 | SAVE_DATA(rI0, 0) | ||
320 | xor rI1,rD1,rW1 | ||
321 | SAVE_DATA(rI1, 4) | ||
322 | xor rI2,rD2,rW2 | ||
323 | SAVE_DATA(rI2, 8) | ||
324 | xor rI3,rD3,rW3 | ||
325 | SAVE_DATA(rI3, 12) | ||
326 | NEXT_BLOCK | ||
327 | bt gt,ppc_encrypt_cbc_loop | ||
328 | START_IV | ||
329 | SAVE_IV(rI0, 0) | ||
330 | SAVE_IV(rI1, 4) | ||
331 | SAVE_IV(rI2, 8) | ||
332 | SAVE_IV(rI3, 12) | ||
333 | FINALIZE_CRYPT(4) | ||
334 | blr | ||
335 | |||
336 | /* | ||
337 | * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, | ||
338 | * u32 rounds, u32 bytes, u8 *iv); | ||
339 | * | ||
340 | * called from glue layer to decrypt multiple blocks via CBC | ||
341 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
342 | * | ||
343 | */ | ||
344 | _GLOBAL(ppc_decrypt_cbc) | ||
345 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4) | ||
346 | li rT1,15 | ||
347 | LOAD_IV(rI0, 0) | ||
348 | andc rLN,rLN,rT1 | ||
349 | LOAD_IV(rI1, 4) | ||
350 | subi rLN,rLN,16 | ||
351 | LOAD_IV(rI2, 8) | ||
352 | add rSP,rSP,rLN /* reverse processing */ | ||
353 | LOAD_IV(rI3, 12) | ||
354 | add rDP,rDP,rLN | ||
355 | LOAD_DATA(rD0, 0) | ||
356 | addi rT1,rT0,4096 | ||
357 | LOAD_DATA(rD1, 4) | ||
358 | LOAD_DATA(rD2, 8) | ||
359 | LOAD_DATA(rD3, 12) | ||
360 | START_IV | ||
361 | SAVE_IV(rD0, 0) | ||
362 | SAVE_IV(rD1, 4) | ||
363 | SAVE_IV(rD2, 8) | ||
364 | cmpwi rLN,16 | ||
365 | SAVE_IV(rD3, 12) | ||
366 | bt lt,ppc_decrypt_cbc_end | ||
367 | ppc_decrypt_cbc_loop: | ||
368 | mr rKP,rKS | ||
369 | START_KEY(rD0, rD1, rD2, rD3) | ||
370 | bl ppc_decrypt_block | ||
371 | subi rLN,rLN,16 | ||
372 | subi rSP,rSP,CBC_DEC | ||
373 | xor rW0,rD0,rW0 | ||
374 | LOAD_DATA(rD0, 0) | ||
375 | xor rW1,rD1,rW1 | ||
376 | LOAD_DATA(rD1, 4) | ||
377 | xor rW2,rD2,rW2 | ||
378 | LOAD_DATA(rD2, 8) | ||
379 | xor rW3,rD3,rW3 | ||
380 | LOAD_DATA(rD3, 12) | ||
381 | xor rW0,rW0,rD0 | ||
382 | SAVE_DATA(rW0, 0) | ||
383 | xor rW1,rW1,rD1 | ||
384 | SAVE_DATA(rW1, 4) | ||
385 | xor rW2,rW2,rD2 | ||
386 | SAVE_DATA(rW2, 8) | ||
387 | xor rW3,rW3,rD3 | ||
388 | SAVE_DATA(rW3, 12) | ||
389 | cmpwi rLN,15 | ||
390 | subi rDP,rDP,CBC_DEC | ||
391 | bt gt,ppc_decrypt_cbc_loop | ||
392 | ppc_decrypt_cbc_end: | ||
393 | mr rKP,rKS | ||
394 | START_KEY(rD0, rD1, rD2, rD3) | ||
395 | bl ppc_decrypt_block | ||
396 | xor rW0,rW0,rD0 | ||
397 | xor rW1,rW1,rD1 | ||
398 | xor rW2,rW2,rD2 | ||
399 | xor rW3,rW3,rD3 | ||
400 | xor rW0,rW0,rI0 /* decrypt with initial IV */ | ||
401 | SAVE_DATA(rW0, 0) | ||
402 | xor rW1,rW1,rI1 | ||
403 | SAVE_DATA(rW1, 4) | ||
404 | xor rW2,rW2,rI2 | ||
405 | SAVE_DATA(rW2, 8) | ||
406 | xor rW3,rW3,rI3 | ||
407 | SAVE_DATA(rW3, 12) | ||
408 | FINALIZE_CRYPT(4) | ||
409 | blr | ||
410 | |||
411 | /* | ||
412 | * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc, | ||
413 | * u32 rounds, u32 bytes, u8 *iv); | ||
414 | * | ||
415 | * called from glue layer to encrypt/decrypt multiple blocks | ||
416 | * via CTR. Number of bytes does not need to be a multiple of | ||
417 | * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
418 | * | ||
419 | */ | ||
420 | _GLOBAL(ppc_crypt_ctr) | ||
421 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) | ||
422 | LOAD_IV(rI0, 0) | ||
423 | LOAD_IV(rI1, 4) | ||
424 | LOAD_IV(rI2, 8) | ||
425 | cmpwi rLN,16 | ||
426 | LOAD_IV(rI3, 12) | ||
427 | START_IV | ||
428 | bt lt,ppc_crypt_ctr_partial | ||
429 | ppc_crypt_ctr_loop: | ||
430 | mr rKP,rKS | ||
431 | START_KEY(rI0, rI1, rI2, rI3) | ||
432 | bl ppc_encrypt_block | ||
433 | xor rW0,rD0,rW0 | ||
434 | xor rW1,rD1,rW1 | ||
435 | xor rW2,rD2,rW2 | ||
436 | xor rW3,rD3,rW3 | ||
437 | LOAD_DATA(rD0, 0) | ||
438 | subi rLN,rLN,16 | ||
439 | LOAD_DATA(rD1, 4) | ||
440 | LOAD_DATA(rD2, 8) | ||
441 | LOAD_DATA(rD3, 12) | ||
442 | xor rD0,rD0,rW0 | ||
443 | SAVE_DATA(rD0, 0) | ||
444 | xor rD1,rD1,rW1 | ||
445 | SAVE_DATA(rD1, 4) | ||
446 | xor rD2,rD2,rW2 | ||
447 | SAVE_DATA(rD2, 8) | ||
448 | xor rD3,rD3,rW3 | ||
449 | SAVE_DATA(rD3, 12) | ||
450 | addic rI3,rI3,1 /* increase counter */ | ||
451 | addze rI2,rI2 | ||
452 | addze rI1,rI1 | ||
453 | addze rI0,rI0 | ||
454 | NEXT_BLOCK | ||
455 | cmpwi rLN,15 | ||
456 | bt gt,ppc_crypt_ctr_loop | ||
457 | ppc_crypt_ctr_partial: | ||
458 | cmpwi rLN,0 | ||
459 | bt eq,ppc_crypt_ctr_end | ||
460 | mr rKP,rKS | ||
461 | START_KEY(rI0, rI1, rI2, rI3) | ||
462 | bl ppc_encrypt_block | ||
463 | xor rW0,rD0,rW0 | ||
464 | SAVE_IV(rW0, 0) | ||
465 | xor rW1,rD1,rW1 | ||
466 | SAVE_IV(rW1, 4) | ||
467 | xor rW2,rD2,rW2 | ||
468 | SAVE_IV(rW2, 8) | ||
469 | xor rW3,rD3,rW3 | ||
470 | SAVE_IV(rW3, 12) | ||
471 | mtctr rLN | ||
472 | subi rIP,rIP,CTR_DEC | ||
473 | subi rSP,rSP,1 | ||
474 | subi rDP,rDP,1 | ||
475 | ppc_crypt_ctr_xorbyte: | ||
476 | lbzu rW4,1(rIP) /* bytewise xor for partial block */ | ||
477 | lbzu rW5,1(rSP) | ||
478 | xor rW4,rW4,rW5 | ||
479 | stbu rW4,1(rDP) | ||
480 | bdnz ppc_crypt_ctr_xorbyte | ||
481 | subf rIP,rLN,rIP | ||
482 | addi rIP,rIP,1 | ||
483 | addic rI3,rI3,1 | ||
484 | addze rI2,rI2 | ||
485 | addze rI1,rI1 | ||
486 | addze rI0,rI0 | ||
487 | ppc_crypt_ctr_end: | ||
488 | SAVE_IV(rI0, 0) | ||
489 | SAVE_IV(rI1, 4) | ||
490 | SAVE_IV(rI2, 8) | ||
491 | SAVE_IV(rI3, 12) | ||
492 | FINALIZE_CRYPT(4) | ||
493 | blr | ||
494 | |||
495 | /* | ||
496 | * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, | ||
497 | * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk); | ||
498 | * | ||
499 | * called from glue layer to encrypt multiple blocks via XTS | ||
500 | * If key_twk is given, the initial IV encryption will be | ||
501 | * processed too. Round values are AES128 = 4, AES192 = 5, | ||
502 | * AES256 = 6 | ||
503 | * | ||
504 | */ | ||
505 | _GLOBAL(ppc_encrypt_xts) | ||
506 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8) | ||
507 | LOAD_IV(rI0, 0) | ||
508 | LOAD_IV(rI1, 4) | ||
509 | LOAD_IV(rI2, 8) | ||
510 | cmpwi rKT,0 | ||
511 | LOAD_IV(rI3, 12) | ||
512 | bt eq,ppc_encrypt_xts_notweak | ||
513 | mr rKP,rKT | ||
514 | START_KEY(rI0, rI1, rI2, rI3) | ||
515 | bl ppc_encrypt_block | ||
516 | xor rI0,rD0,rW0 | ||
517 | xor rI1,rD1,rW1 | ||
518 | xor rI2,rD2,rW2 | ||
519 | xor rI3,rD3,rW3 | ||
520 | ppc_encrypt_xts_notweak: | ||
521 | ENDIAN_SWAP(rG0, rG1, rI0, rI1) | ||
522 | ENDIAN_SWAP(rG2, rG3, rI2, rI3) | ||
523 | ppc_encrypt_xts_loop: | ||
524 | LOAD_DATA(rD0, 0) | ||
525 | mr rKP,rKS | ||
526 | LOAD_DATA(rD1, 4) | ||
527 | subi rLN,rLN,16 | ||
528 | LOAD_DATA(rD2, 8) | ||
529 | LOAD_DATA(rD3, 12) | ||
530 | xor rD0,rD0,rI0 | ||
531 | xor rD1,rD1,rI1 | ||
532 | xor rD2,rD2,rI2 | ||
533 | xor rD3,rD3,rI3 | ||
534 | START_KEY(rD0, rD1, rD2, rD3) | ||
535 | bl ppc_encrypt_block | ||
536 | xor rD0,rD0,rW0 | ||
537 | xor rD1,rD1,rW1 | ||
538 | xor rD2,rD2,rW2 | ||
539 | xor rD3,rD3,rW3 | ||
540 | xor rD0,rD0,rI0 | ||
541 | SAVE_DATA(rD0, 0) | ||
542 | xor rD1,rD1,rI1 | ||
543 | SAVE_DATA(rD1, 4) | ||
544 | xor rD2,rD2,rI2 | ||
545 | SAVE_DATA(rD2, 8) | ||
546 | xor rD3,rD3,rI3 | ||
547 | SAVE_DATA(rD3, 12) | ||
548 | GF128_MUL(rG0, rG1, rG2, rG3, rW0) | ||
549 | ENDIAN_SWAP(rI0, rI1, rG0, rG1) | ||
550 | ENDIAN_SWAP(rI2, rI3, rG2, rG3) | ||
551 | cmpwi rLN,0 | ||
552 | NEXT_BLOCK | ||
553 | bt gt,ppc_encrypt_xts_loop | ||
554 | START_IV | ||
555 | SAVE_IV(rI0, 0) | ||
556 | SAVE_IV(rI1, 4) | ||
557 | SAVE_IV(rI2, 8) | ||
558 | SAVE_IV(rI3, 12) | ||
559 | FINALIZE_CRYPT(8) | ||
560 | blr | ||
561 | |||
562 | /* | ||
563 | * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, | ||
564 | * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk); | ||
565 | * | ||
566 | * called from glue layer to decrypt multiple blocks via XTS | ||
567 | * If key_twk is given, the initial IV encryption will be | ||
568 | * processed too. Round values are AES128 = 4, AES192 = 5, | ||
569 | * AES256 = 6 | ||
570 | * | ||
571 | */ | ||
572 | _GLOBAL(ppc_decrypt_xts) | ||
573 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8) | ||
574 | LOAD_IV(rI0, 0) | ||
575 | addi rT1,rT0,4096 | ||
576 | LOAD_IV(rI1, 4) | ||
577 | LOAD_IV(rI2, 8) | ||
578 | cmpwi rKT,0 | ||
579 | LOAD_IV(rI3, 12) | ||
580 | bt eq,ppc_decrypt_xts_notweak | ||
581 | subi rT0,rT0,4096 | ||
582 | mr rKP,rKT | ||
583 | START_KEY(rI0, rI1, rI2, rI3) | ||
584 | bl ppc_encrypt_block | ||
585 | xor rI0,rD0,rW0 | ||
586 | xor rI1,rD1,rW1 | ||
587 | xor rI2,rD2,rW2 | ||
588 | xor rI3,rD3,rW3 | ||
589 | addi rT0,rT0,4096 | ||
590 | ppc_decrypt_xts_notweak: | ||
591 | ENDIAN_SWAP(rG0, rG1, rI0, rI1) | ||
592 | ENDIAN_SWAP(rG2, rG3, rI2, rI3) | ||
593 | ppc_decrypt_xts_loop: | ||
594 | LOAD_DATA(rD0, 0) | ||
595 | mr rKP,rKS | ||
596 | LOAD_DATA(rD1, 4) | ||
597 | subi rLN,rLN,16 | ||
598 | LOAD_DATA(rD2, 8) | ||
599 | LOAD_DATA(rD3, 12) | ||
600 | xor rD0,rD0,rI0 | ||
601 | xor rD1,rD1,rI1 | ||
602 | xor rD2,rD2,rI2 | ||
603 | xor rD3,rD3,rI3 | ||
604 | START_KEY(rD0, rD1, rD2, rD3) | ||
605 | bl ppc_decrypt_block | ||
606 | xor rD0,rD0,rW0 | ||
607 | xor rD1,rD1,rW1 | ||
608 | xor rD2,rD2,rW2 | ||
609 | xor rD3,rD3,rW3 | ||
610 | xor rD0,rD0,rI0 | ||
611 | SAVE_DATA(rD0, 0) | ||
612 | xor rD1,rD1,rI1 | ||
613 | SAVE_DATA(rD1, 4) | ||
614 | xor rD2,rD2,rI2 | ||
615 | SAVE_DATA(rD2, 8) | ||
616 | xor rD3,rD3,rI3 | ||
617 | SAVE_DATA(rD3, 12) | ||
618 | GF128_MUL(rG0, rG1, rG2, rG3, rW0) | ||
619 | ENDIAN_SWAP(rI0, rI1, rG0, rG1) | ||
620 | ENDIAN_SWAP(rI2, rI3, rG2, rG3) | ||
621 | cmpwi rLN,0 | ||
622 | NEXT_BLOCK | ||
623 | bt gt,ppc_decrypt_xts_loop | ||
624 | START_IV | ||
625 | SAVE_IV(rI0, 0) | ||
626 | SAVE_IV(rI1, 4) | ||
627 | SAVE_IV(rI2, 8) | ||
628 | SAVE_IV(rI3, 12) | ||
629 | FINALIZE_CRYPT(8) | ||
630 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h new file mode 100644 index 000000000000..30d217b399c3 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-regs.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * Common registers for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #define rKS r0 /* copy of en-/decryption key pointer */ | ||
14 | #define rDP r3 /* destination pointer */ | ||
15 | #define rSP r4 /* source pointer */ | ||
16 | #define rKP r5 /* pointer to en-/decryption key pointer */ | ||
17 | #define rRR r6 /* en-/decryption rounds */ | ||
18 | #define rLN r7 /* length of data to be processed */ | ||
19 | #define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */ | ||
20 | #define rKT r9 /* pointer to tweak key (XTS mode) */ | ||
21 | #define rT0 r11 /* pointers to en-/decrpytion tables */ | ||
22 | #define rT1 r10 | ||
23 | #define rD0 r9 /* data */ | ||
24 | #define rD1 r14 | ||
25 | #define rD2 r12 | ||
26 | #define rD3 r15 | ||
27 | #define rW0 r16 /* working registers */ | ||
28 | #define rW1 r17 | ||
29 | #define rW2 r18 | ||
30 | #define rW3 r19 | ||
31 | #define rW4 r20 | ||
32 | #define rW5 r21 | ||
33 | #define rW6 r22 | ||
34 | #define rW7 r23 | ||
35 | #define rI0 r24 /* IV */ | ||
36 | #define rI1 r25 | ||
37 | #define rI2 r26 | ||
38 | #define rI3 r27 | ||
39 | #define rG0 r28 /* endian reversed tweak (XTS mode) */ | ||
40 | #define rG1 r29 | ||
41 | #define rG2 r30 | ||
42 | #define rG3 r31 | ||
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S new file mode 100644 index 000000000000..701e60240dc3 --- /dev/null +++ b/arch/powerpc/crypto/aes-tab-4k.S | |||
@@ -0,0 +1,331 @@ | |||
1 | /* | ||
2 | * 4K AES tables for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * These big endian AES encryption/decryption tables have been taken from | ||
15 | * crypto/aes_generic.c and are designed to be simply accessed by a combination | ||
16 | * of rlwimi/lwz instructions with a minimum of table registers (usually only | ||
17 | * one required). Thus they are aligned to 4K. The locality of rotated values | ||
18 | * is derived from the reduced offsets that are available in the SPE load | ||
19 | * instructions. E.g. evldw, evlwwsplat, ... | ||
20 | * | ||
21 | * For the safety-conscious it has to be noted that they might be vulnerable | ||
22 | * to cache timing attacks because of their size. Nevertheless in contrast to | ||
23 | * the generic tables they have been reduced from 16KB to 8KB + 256 bytes. | ||
24 | * This is a quite good tradeoff for low power devices (e.g. routers) without | ||
25 | * dedicated encryption hardware where we usually have no multiuser | ||
26 | * environment. | ||
27 | * | ||
28 | */ | ||
29 | |||
30 | #define R(a, b, c, d) \ | ||
31 | 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a | ||
32 | |||
33 | .data | ||
34 | .align 12 | ||
35 | .globl PPC_AES_4K_ENCTAB | ||
36 | PPC_AES_4K_ENCTAB: | ||
37 | /* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */ | ||
38 | .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84) | ||
39 | .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d) | ||
40 | .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd) | ||
41 | .long R(de, 6f, 6f, b1), R(91, c5, c5, 54) | ||
42 | .long R(60, 30, 30, 50), R(02, 01, 01, 03) | ||
43 | .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d) | ||
44 | .long R(e7, fe, fe, 19), R(b5, d7, d7, 62) | ||
45 | .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a) | ||
46 | .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d) | ||
47 | .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87) | ||
48 | .long R(ef, fa, fa, 15), R(b2, 59, 59, eb) | ||
49 | .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b) | ||
50 | .long R(41, ad, ad, ec), R(b3, d4, d4, 67) | ||
51 | .long R(5f, a2, a2, fd), R(45, af, af, ea) | ||
52 | .long R(23, 9c, 9c, bf), R(53, a4, a4, f7) | ||
53 | .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b) | ||
54 | .long R(75, b7, b7, c2), R(e1, fd, fd, 1c) | ||
55 | .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a) | ||
56 | .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41) | ||
57 | .long R(f5, f7, f7, 02), R(83, cc, cc, 4f) | ||
58 | .long R(68, 34, 34, 5c), R(51, a5, a5, f4) | ||
59 | .long R(d1, e5, e5, 34), R(f9, f1, f1, 08) | ||
60 | .long R(e2, 71, 71, 93), R(ab, d8, d8, 73) | ||
61 | .long R(62, 31, 31, 53), R(2a, 15, 15, 3f) | ||
62 | .long R(08, 04, 04, 0c), R(95, c7, c7, 52) | ||
63 | .long R(46, 23, 23, 65), R(9d, c3, c3, 5e) | ||
64 | .long R(30, 18, 18, 28), R(37, 96, 96, a1) | ||
65 | .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5) | ||
66 | .long R(0e, 07, 07, 09), R(24, 12, 12, 36) | ||
67 | .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d) | ||
68 | .long R(cd, eb, eb, 26), R(4e, 27, 27, 69) | ||
69 | .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f) | ||
70 | .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e) | ||
71 | .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e) | ||
72 | .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2) | ||
73 | .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb) | ||
74 | .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d) | ||
75 | .long R(b7, d6, d6, 61), R(7d, b3, b3, ce) | ||
76 | .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e) | ||
77 | .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97) | ||
78 | .long R(a6, 53, 53, f5), R(b9, d1, d1, 68) | ||
79 | .long R(00, 00, 00, 00), R(c1, ed, ed, 2c) | ||
80 | .long R(40, 20, 20, 60), R(e3, fc, fc, 1f) | ||
81 | .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed) | ||
82 | .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46) | ||
83 | .long R(67, be, be, d9), R(72, 39, 39, 4b) | ||
84 | .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4) | ||
85 | .long R(b0, 58, 58, e8), R(85, cf, cf, 4a) | ||
86 | .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a) | ||
87 | .long R(4f, aa, aa, e5), R(ed, fb, fb, 16) | ||
88 | .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7) | ||
89 | .long R(66, 33, 33, 55), R(11, 85, 85, 94) | ||
90 | .long R(8a, 45, 45, cf), R(e9, f9, f9, 10) | ||
91 | .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81) | ||
92 | .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44) | ||
93 | .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3) | ||
94 | .long R(a2, 51, 51, f3), R(5d, a3, a3, fe) | ||
95 | .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a) | ||
96 | .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc) | ||
97 | .long R(70, 38, 38, 48), R(f1, f5, f5, 04) | ||
98 | .long R(63, bc, bc, df), R(77, b6, b6, c1) | ||
99 | .long R(af, da, da, 75), R(42, 21, 21, 63) | ||
100 | .long R(20, 10, 10, 30), R(e5, ff, ff, 1a) | ||
101 | .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d) | ||
102 | .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14) | ||
103 | .long R(26, 13, 13, 35), R(c3, ec, ec, 2f) | ||
104 | .long R(be, 5f, 5f, e1), R(35, 97, 97, a2) | ||
105 | .long R(88, 44, 44, cc), R(2e, 17, 17, 39) | ||
106 | .long R(93, c4, c4, 57), R(55, a7, a7, f2) | ||
107 | .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47) | ||
108 | .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7) | ||
109 | .long R(32, 19, 19, 2b), R(e6, 73, 73, 95) | ||
110 | .long R(c0, 60, 60, a0), R(19, 81, 81, 98) | ||
111 | .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f) | ||
112 | .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e) | ||
113 | .long R(3b, 90, 90, ab), R(0b, 88, 88, 83) | ||
114 | .long R(8c, 46, 46, ca), R(c7, ee, ee, 29) | ||
115 | .long R(6b, b8, b8, d3), R(28, 14, 14, 3c) | ||
116 | .long R(a7, de, de, 79), R(bc, 5e, 5e, e2) | ||
117 | .long R(16, 0b, 0b, 1d), R(ad, db, db, 76) | ||
118 | .long R(db, e0, e0, 3b), R(64, 32, 32, 56) | ||
119 | .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e) | ||
120 | .long R(92, 49, 49, db), R(0c, 06, 06, 0a) | ||
121 | .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4) | ||
122 | .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e) | ||
123 | .long R(43, ac, ac, ef), R(c4, 62, 62, a6) | ||
124 | .long R(39, 91, 91, a8), R(31, 95, 95, a4) | ||
125 | .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b) | ||
126 | .long R(d5, e7, e7, 32), R(8b, c8, c8, 43) | ||
127 | .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7) | ||
128 | .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64) | ||
129 | .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0) | ||
130 | .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa) | ||
131 | .long R(f3, f4, f4, 07), R(cf, ea, ea, 25) | ||
132 | .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e) | ||
133 | .long R(47, ae, ae, e9), R(10, 08, 08, 18) | ||
134 | .long R(6f, ba, ba, d5), R(f0, 78, 78, 88) | ||
135 | .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72) | ||
136 | .long R(38, 1c, 1c, 24), R(57, a6, a6, f1) | ||
137 | .long R(73, b4, b4, c7), R(97, c6, c6, 51) | ||
138 | .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c) | ||
139 | .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21) | ||
140 | .long R(96, 4b, 4b, dd), R(61, bd, bd, dc) | ||
141 | .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85) | ||
142 | .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42) | ||
143 | .long R(71, b5, b5, c4), R(cc, 66, 66, aa) | ||
144 | .long R(90, 48, 48, d8), R(06, 03, 03, 05) | ||
145 | .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12) | ||
146 | .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f) | ||
147 | .long R(ae, 57, 57, f9), R(69, b9, b9, d0) | ||
148 | .long R(17, 86, 86, 91), R(99, c1, c1, 58) | ||
149 | .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9) | ||
150 | .long R(d9, e1, e1, 38), R(eb, f8, f8, 13) | ||
151 | .long R(2b, 98, 98, b3), R(22, 11, 11, 33) | ||
152 | .long R(d2, 69, 69, bb), R(a9, d9, d9, 70) | ||
153 | .long R(07, 8e, 8e, 89), R(33, 94, 94, a7) | ||
154 | .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22) | ||
155 | .long R(15, 87, 87, 92), R(c9, e9, e9, 20) | ||
156 | .long R(87, ce, ce, 49), R(aa, 55, 55, ff) | ||
157 | .long R(50, 28, 28, 78), R(a5, df, df, 7a) | ||
158 | .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8) | ||
159 | .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17) | ||
160 | .long R(65, bf, bf, da), R(d7, e6, e6, 31) | ||
161 | .long R(84, 42, 42, c6), R(d0, 68, 68, b8) | ||
162 | .long R(82, 41, 41, c3), R(29, 99, 99, b0) | ||
163 | .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11) | ||
164 | .long R(7b, b0, b0, cb), R(a8, 54, 54, fc) | ||
165 | .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a) | ||
166 | .globl PPC_AES_4K_DECTAB | ||
167 | PPC_AES_4K_DECTAB: | ||
168 | /* decryption table, same as crypto_it_tab in crypto/aes-generic.c */ | ||
169 | .long R(51, f4, a7, 50), R(7e, 41, 65, 53) | ||
170 | .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96) | ||
171 | .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1) | ||
172 | .long R(ac, fa, 58, ab), R(4b, e3, 03, 93) | ||
173 | .long R(20, 30, fa, 55), R(ad, 76, 6d, f6) | ||
174 | .long R(88, cc, 76, 91), R(f5, 02, 4c, 25) | ||
175 | .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7) | ||
176 | .long R(26, 35, 44, 80), R(b5, 62, a3, 8f) | ||
177 | .long R(de, b1, 5a, 49), R(25, ba, 1b, 67) | ||
178 | .long R(45, ea, 0e, 98), R(5d, fe, c0, e1) | ||
179 | .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12) | ||
180 | .long R(8d, 46, 97, a3), R(6b, d3, f9, c6) | ||
181 | .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95) | ||
182 | .long R(bf, 6d, 7a, eb), R(95, 52, 59, da) | ||
183 | .long R(d4, be, 83, 2d), R(58, 74, 21, d3) | ||
184 | .long R(49, e0, 69, 29), R(8e, c9, c8, 44) | ||
185 | .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78) | ||
186 | .long R(99, 58, 3e, 6b), R(27, b9, 71, dd) | ||
187 | .long R(be, e1, 4f, b6), R(f0, 88, ad, 17) | ||
188 | .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4) | ||
189 | .long R(63, df, 4a, 18), R(e5, 1a, 31, 82) | ||
190 | .long R(97, 51, 33, 60), R(62, 53, 7f, 45) | ||
191 | .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84) | ||
192 | .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94) | ||
193 | .long R(70, 48, 68, 58), R(8f, 45, fd, 19) | ||
194 | .long R(94, de, 6c, 87), R(52, 7b, f8, b7) | ||
195 | .long R(ab, 73, d3, 23), R(72, 4b, 02, e2) | ||
196 | .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a) | ||
197 | .long R(b2, eb, 28, 07), R(2f, b5, c2, 03) | ||
198 | .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5) | ||
199 | .long R(30, 28, 87, f2), R(23, bf, a5, b2) | ||
200 | .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c) | ||
201 | .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92) | ||
202 | .long R(f3, 07, f2, f0), R(4e, 69, e2, a1) | ||
203 | .long R(65, da, f4, cd), R(06, 05, be, d5) | ||
204 | .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a) | ||
205 | .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0) | ||
206 | .long R(05, 8a, e1, 32), R(a4, f6, eb, 75) | ||
207 | .long R(0b, 83, ec, 39), R(40, 60, ef, aa) | ||
208 | .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51) | ||
209 | .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d) | ||
210 | .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46) | ||
211 | .long R(91, 54, 8d, b5), R(71, c4, 5d, 05) | ||
212 | .long R(04, 06, d4, 6f), R(60, 50, 15, ff) | ||
213 | .long R(19, 98, fb, 24), R(d6, bd, e9, 97) | ||
214 | .long R(89, 40, 43, cc), R(67, d9, 9e, 77) | ||
215 | .long R(b0, e8, 42, bd), R(07, 89, 8b, 88) | ||
216 | .long R(e7, 19, 5b, 38), R(79, c8, ee, db) | ||
217 | .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9) | ||
218 | .long R(f8, 84, 1e, c9), R(00, 00, 00, 00) | ||
219 | .long R(09, 80, 86, 83), R(32, 2b, ed, 48) | ||
220 | .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e) | ||
221 | .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56) | ||
222 | .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27) | ||
223 | .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21) | ||
224 | .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a) | ||
225 | .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f) | ||
226 | .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e) | ||
227 | .long R(80, c0, c5, 4f), R(61, dc, 20, a2) | ||
228 | .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16) | ||
229 | .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5) | ||
230 | .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d) | ||
231 | .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad) | ||
232 | .long R(2d, b6, a8, b9), R(14, 1e, a9, c8) | ||
233 | .long R(57, f1, 19, 85), R(af, 75, 07, 4c) | ||
234 | .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd) | ||
235 | .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc) | ||
236 | .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34) | ||
237 | .long R(8b, 43, 29, 76), R(cb, 23, c6, dc) | ||
238 | .long R(b6, ed, fc, 68), R(b8, e4, f1, 63) | ||
239 | .long R(d7, 31, dc, ca), R(42, 63, 85, 10) | ||
240 | .long R(13, 97, 22, 40), R(84, c6, 11, 20) | ||
241 | .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8) | ||
242 | .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d) | ||
243 | .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3) | ||
244 | .long R(0d, 86, 52, ec), R(77, c1, e3, d0) | ||
245 | .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99) | ||
246 | .long R(11, 94, 48, fa), R(47, e9, 64, 22) | ||
247 | .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a) | ||
248 | .long R(56, 7d, 2c, d8), R(22, 33, 90, ef) | ||
249 | .long R(87, 49, 4e, c7), R(d9, 38, d1, c1) | ||
250 | .long R(8c, ca, a2, fe), R(98, d4, 0b, 36) | ||
251 | .long R(a6, f5, 81, cf), R(a5, 7a, de, 28) | ||
252 | .long R(da, b7, 8e, 26), R(3f, ad, bf, a4) | ||
253 | .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d) | ||
254 | .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62) | ||
255 | .long R(f6, 8d, 13, c2), R(90, d8, b8, e8) | ||
256 | .long R(2e, 39, f7, 5e), R(82, c3, af, f5) | ||
257 | .long R(9f, 5d, 80, be), R(69, d0, 93, 7c) | ||
258 | .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3) | ||
259 | .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7) | ||
260 | .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b) | ||
261 | .long R(cd, 26, 78, 09), R(6e, 59, 18, f4) | ||
262 | .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8) | ||
263 | .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e) | ||
264 | .long R(21, bc, cf, 08), R(ef, 15, e8, e6) | ||
265 | .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce) | ||
266 | .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6) | ||
267 | .long R(31, a4, b2, af), R(2a, 3f, 23, 31) | ||
268 | .long R(c6, a5, 94, 30), R(35, a2, 66, c0) | ||
269 | .long R(74, 4e, bc, 37), R(fc, 82, ca, a6) | ||
270 | .long R(e0, 90, d0, b0), R(33, a7, d8, 15) | ||
271 | .long R(f1, 04, 98, 4a), R(41, ec, da, f7) | ||
272 | .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f) | ||
273 | .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d) | ||
274 | .long R(cc, aa, 4d, 54), R(e4, 96, 04, df) | ||
275 | .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b) | ||
276 | .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f) | ||
277 | .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d) | ||
278 | .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e) | ||
279 | .long R(b3, 67, 1d, 5a), R(92, db, d2, 52) | ||
280 | .long R(e9, 10, 56, 33), R(6d, d6, 47, 13) | ||
281 | .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a) | ||
282 | .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89) | ||
283 | .long R(ce, a9, 27, ee), R(b7, 61, c9, 35) | ||
284 | .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c) | ||
285 | .long R(9c, d2, df, 59), R(55, f2, 73, 3f) | ||
286 | .long R(18, 14, ce, 79), R(73, c7, 37, bf) | ||
287 | .long R(53, f7, cd, ea), R(5f, fd, aa, 5b) | ||
288 | .long R(df, 3d, 6f, 14), R(78, 44, db, 86) | ||
289 | .long R(ca, af, f3, 81), R(b9, 68, c4, 3e) | ||
290 | .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f) | ||
291 | .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c) | ||
292 | .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41) | ||
293 | .long R(39, a8, 01, 71), R(08, 0c, b3, de) | ||
294 | .long R(d8, b4, e4, 9c), R(64, 56, c1, 90) | ||
295 | .long R(7b, cb, 84, 61), R(d5, 32, b6, 70) | ||
296 | .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42) | ||
297 | .globl PPC_AES_4K_DECTAB2 | ||
298 | PPC_AES_4K_DECTAB2: | ||
299 | /* decryption table, same as crypto_il_tab in crypto/aes-generic.c */ | ||
300 | .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 | ||
301 | .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb | ||
302 | .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 | ||
303 | .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb | ||
304 | .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d | ||
305 | .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e | ||
306 | .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 | ||
307 | .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 | ||
308 | .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 | ||
309 | .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 | ||
310 | .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda | ||
311 | .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 | ||
312 | .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a | ||
313 | .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 | ||
314 | .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 | ||
315 | .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b | ||
316 | .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea | ||
317 | .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 | ||
318 | .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 | ||
319 | .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e | ||
320 | .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 | ||
321 | .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b | ||
322 | .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 | ||
323 | .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 | ||
324 | .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 | ||
325 | .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f | ||
326 | .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d | ||
327 | .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef | ||
328 | .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 | ||
329 | .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 | ||
330 | .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 | ||
331 | .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d | ||
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S new file mode 100644 index 000000000000..10cdf5bceebb --- /dev/null +++ b/arch/powerpc/crypto/md5-asm.S | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * Fast MD5 implementation for PPC | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | #include <asm/ppc_asm.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | |||
15 | #define rHP r3 | ||
16 | #define rWP r4 | ||
17 | |||
18 | #define rH0 r0 | ||
19 | #define rH1 r6 | ||
20 | #define rH2 r7 | ||
21 | #define rH3 r5 | ||
22 | |||
23 | #define rW00 r8 | ||
24 | #define rW01 r9 | ||
25 | #define rW02 r10 | ||
26 | #define rW03 r11 | ||
27 | #define rW04 r12 | ||
28 | #define rW05 r14 | ||
29 | #define rW06 r15 | ||
30 | #define rW07 r16 | ||
31 | #define rW08 r17 | ||
32 | #define rW09 r18 | ||
33 | #define rW10 r19 | ||
34 | #define rW11 r20 | ||
35 | #define rW12 r21 | ||
36 | #define rW13 r22 | ||
37 | #define rW14 r23 | ||
38 | #define rW15 r24 | ||
39 | |||
40 | #define rT0 r25 | ||
41 | #define rT1 r26 | ||
42 | |||
43 | #define INITIALIZE \ | ||
44 | PPC_STLU r1,-INT_FRAME_SIZE(r1); \ | ||
45 | SAVE_8GPRS(14, r1); /* push registers onto stack */ \ | ||
46 | SAVE_4GPRS(22, r1); \ | ||
47 | SAVE_GPR(26, r1) | ||
48 | |||
49 | #define FINALIZE \ | ||
50 | REST_8GPRS(14, r1); /* pop registers from stack */ \ | ||
51 | REST_4GPRS(22, r1); \ | ||
52 | REST_GPR(26, r1); \ | ||
53 | addi r1,r1,INT_FRAME_SIZE; | ||
54 | |||
55 | #ifdef __BIG_ENDIAN__ | ||
56 | #define LOAD_DATA(reg, off) \ | ||
57 | lwbrx reg,0,rWP; /* load data */ | ||
58 | #define INC_PTR \ | ||
59 | addi rWP,rWP,4; /* increment per word */ | ||
60 | #define NEXT_BLOCK /* nothing to do */ | ||
61 | #else | ||
62 | #define LOAD_DATA(reg, off) \ | ||
63 | lwz reg,off(rWP); /* load data */ | ||
64 | #define INC_PTR /* nothing to do */ | ||
65 | #define NEXT_BLOCK \ | ||
66 | addi rWP,rWP,64; /* increment per block */ | ||
67 | #endif | ||
68 | |||
69 | #define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ | ||
70 | LOAD_DATA(w0, off) /* W */ \ | ||
71 | and rT0,b,c; /* 1: f = b and c */ \ | ||
72 | INC_PTR /* ptr++ */ \ | ||
73 | andc rT1,d,b; /* 1: f' = ~b and d */ \ | ||
74 | LOAD_DATA(w1, off+4) /* W */ \ | ||
75 | or rT0,rT0,rT1; /* 1: f = f or f' */ \ | ||
76 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
77 | add a,a,rT0; /* 1: a = a + f */ \ | ||
78 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
79 | addis w1,w1,k1h; /* 2: wk = w + k */ \ | ||
80 | add a,a,w0; /* 1: a = a + wk */ \ | ||
81 | addi w1,w1,k1l; /* 2: wk = w + k' */ \ | ||
82 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
83 | add d,d,w1; /* 2: a = a + wk */ \ | ||
84 | add a,a,b; /* 1: a = a + b */ \ | ||
85 | and rT0,a,b; /* 2: f = b and c */ \ | ||
86 | andc rT1,c,a; /* 2: f' = ~b and d */ \ | ||
87 | or rT0,rT0,rT1; /* 2: f = f or f' */ \ | ||
88 | add d,d,rT0; /* 2: a = a + f */ \ | ||
89 | INC_PTR /* ptr++ */ \ | ||
90 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
91 | add d,d,a; /* 2: a = a + b */ | ||
92 | |||
93 | #define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
94 | andc rT0,c,d; /* 1: f = c and ~d */ \ | ||
95 | and rT1,b,d; /* 1: f' = b and d */ \ | ||
96 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
97 | or rT0,rT0,rT1; /* 1: f = f or f' */ \ | ||
98 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
99 | add a,a,rT0; /* 1: a = a + f */ \ | ||
100 | addi w1,w1,k1l; /* 2: wk = w + k */ \ | ||
101 | add a,a,w0; /* 1: a = a + wk */ \ | ||
102 | addis w1,w1,k1h; /* 2: wk = w + k' */ \ | ||
103 | andc rT0,b,c; /* 2: f = c and ~d */ \ | ||
104 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
105 | add a,a,b; /* 1: a = a + b */ \ | ||
106 | add d,d,w1; /* 2: a = a + wk */ \ | ||
107 | and rT1,a,c; /* 2: f' = b and d */ \ | ||
108 | or rT0,rT0,rT1; /* 2: f = f or f' */ \ | ||
109 | add d,d,rT0; /* 2: a = a + f */ \ | ||
110 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
111 | add d,d,a; /* 2: a = a +b */ | ||
112 | |||
113 | #define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
114 | xor rT0,b,c; /* 1: f' = b xor c */ \ | ||
115 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
116 | xor rT1,rT0,d; /* 1: f = f xor f' */ \ | ||
117 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
118 | add a,a,rT1; /* 1: a = a + f */ \ | ||
119 | addi w1,w1,k1l; /* 2: wk = w + k */ \ | ||
120 | add a,a,w0; /* 1: a = a + wk */ \ | ||
121 | addis w1,w1,k1h; /* 2: wk = w + k' */ \ | ||
122 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
123 | add d,d,w1; /* 2: a = a + wk */ \ | ||
124 | add a,a,b; /* 1: a = a + b */ \ | ||
125 | xor rT1,rT0,a; /* 2: f = b xor f' */ \ | ||
126 | add d,d,rT1; /* 2: a = a + f */ \ | ||
127 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
128 | add d,d,a; /* 2: a = a + b */ | ||
129 | |||
130 | #define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
131 | addi w0,w0,k0l; /* 1: w = w + k */ \ | ||
132 | orc rT0,b,d; /* 1: f = b or ~d */ \ | ||
133 | addis w0,w0,k0h; /* 1: w = w + k' */ \ | ||
134 | xor rT0,rT0,c; /* 1: f = f xor c */ \ | ||
135 | add a,a,w0; /* 1: a = a + wk */ \ | ||
136 | addi w1,w1,k1l; /* 2: w = w + k */ \ | ||
137 | add a,a,rT0; /* 1: a = a + f */ \ | ||
138 | addis w1,w1,k1h; /* 2: w = w + k' */ \ | ||
139 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
140 | add a,a,b; /* 1: a = a + b */ \ | ||
141 | orc rT0,a,c; /* 2: f = b or ~d */ \ | ||
142 | add d,d,w1; /* 2: a = a + wk */ \ | ||
143 | xor rT0,rT0,b; /* 2: f = f xor c */ \ | ||
144 | add d,d,rT0; /* 2: a = a + f */ \ | ||
145 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
146 | add d,d,a; /* 2: a = a + b */ | ||
147 | |||
148 | _GLOBAL(ppc_md5_transform) | ||
149 | INITIALIZE | ||
150 | |||
151 | mtctr r5 | ||
152 | lwz rH0,0(rHP) | ||
153 | lwz rH1,4(rHP) | ||
154 | lwz rH2,8(rHP) | ||
155 | lwz rH3,12(rHP) | ||
156 | |||
157 | ppc_md5_main: | ||
158 | R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, | ||
159 | 0xd76b, -23432, 0xe8c8, -18602) | ||
160 | R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, | ||
161 | 0x2420, 0x70db, 0xc1be, -12562) | ||
162 | R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, | ||
163 | 0xf57c, 0x0faf, 0x4788, -14806) | ||
164 | R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, | ||
165 | 0xa830, 0x4613, 0xfd47, -27391) | ||
166 | R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, | ||
167 | 0x6981, -26408, 0x8b45, -2129) | ||
168 | R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, | ||
169 | 0xffff, 0x5bb1, 0x895d, -10306) | ||
170 | R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, | ||
171 | 0x6b90, 0x1122, 0xfd98, 0x7193) | ||
172 | R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, | ||
173 | 0xa679, 0x438e, 0x49b4, 0x0821) | ||
174 | |||
175 | R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, | ||
176 | 0x0d56, 0x6e0c, 0x1810, 0x6d2d) | ||
177 | R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, | ||
178 | 0x9d02, -32109, 0x124c, 0x2332) | ||
179 | R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, | ||
180 | 0x8ea7, 0x4a33, 0x0245, -18270) | ||
181 | R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, | ||
182 | 0x8eee, -8608, 0xf258, -5095) | ||
183 | R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, | ||
184 | 0x969d, -10697, 0x1cbe, -15288) | ||
185 | R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, | ||
186 | 0x3317, 0x3e99, 0xdbd9, 0x7c15) | ||
187 | R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, | ||
188 | 0xac4b, 0x7772, 0xd8cf, 0x331d) | ||
189 | R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, | ||
190 | 0x6a28, 0x6dd8, 0x219a, 0x3b68) | ||
191 | |||
192 | R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, | ||
193 | 0x29cb, 0x28e5, 0x4218, -7788) | ||
194 | R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, | ||
195 | 0x473f, 0x06d1, 0x3aae, 0x3036) | ||
196 | R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, | ||
197 | 0xaea1, -15134, 0x640b, -11295) | ||
198 | R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, | ||
199 | 0x8f4c, 0x4887, 0xbc7c, -22499) | ||
200 | R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, | ||
201 | 0x7eb8, -27199, 0x00ea, 0x6050) | ||
202 | R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, | ||
203 | 0xe01a, 0x22fe, 0x4447, 0x69c5) | ||
204 | R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, | ||
205 | 0xb7f3, 0x0253, 0x59b1, 0x4d5b) | ||
206 | R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, | ||
207 | 0x4701, -27017, 0xc7bd, -19859) | ||
208 | |||
209 | R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, | ||
210 | 0x0988, -1462, 0x4c70, -19401) | ||
211 | R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, | ||
212 | 0xadaf, -5221, 0xfc99, 0x66f7) | ||
213 | R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, | ||
214 | 0x7e80, -16418, 0xba1e, -25587) | ||
215 | R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, | ||
216 | 0x4130, 0x380d, 0xe0c5, 0x738d) | ||
217 | lwz rW00,0(rHP) | ||
218 | R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, | ||
219 | 0xe837, -30770, 0xde8a, 0x69e8) | ||
220 | lwz rW14,4(rHP) | ||
221 | R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, | ||
222 | 0x9e79, 0x260f, 0x256d, -27941) | ||
223 | lwz rW12,8(rHP) | ||
224 | R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, | ||
225 | 0xab75, -20775, 0x4f9e, -28397) | ||
226 | lwz rW10,12(rHP) | ||
227 | R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, | ||
228 | 0x662b, 0x7c56, 0x11b2, 0x0358) | ||
229 | |||
230 | add rH0,rH0,rW00 | ||
231 | stw rH0,0(rHP) | ||
232 | add rH1,rH1,rW14 | ||
233 | stw rH1,4(rHP) | ||
234 | add rH2,rH2,rW12 | ||
235 | stw rH2,8(rHP) | ||
236 | add rH3,rH3,rW10 | ||
237 | stw rH3,12(rHP) | ||
238 | NEXT_BLOCK | ||
239 | |||
240 | bdnz ppc_md5_main | ||
241 | |||
242 | FINALIZE | ||
243 | blr | ||
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c new file mode 100644 index 000000000000..452fb4dc575f --- /dev/null +++ b/arch/powerpc/crypto/md5-glue.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * Glue code for MD5 implementation for PPC assembler | ||
3 | * | ||
4 | * Based on generic implementation. | ||
5 | * | ||
6 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <crypto/internal/hash.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/cryptohash.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <crypto/md5.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | |||
24 | extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks); | ||
25 | |||
26 | static inline void ppc_md5_clear_context(struct md5_state *sctx) | ||
27 | { | ||
28 | int count = sizeof(struct md5_state) >> 2; | ||
29 | u32 *ptr = (u32 *)sctx; | ||
30 | |||
31 | /* make sure we can clear the fast way */ | ||
32 | BUILD_BUG_ON(sizeof(struct md5_state) % 4); | ||
33 | do { *ptr++ = 0; } while (--count); | ||
34 | } | ||
35 | |||
36 | static int ppc_md5_init(struct shash_desc *desc) | ||
37 | { | ||
38 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
39 | |||
40 | sctx->hash[0] = 0x67452301; | ||
41 | sctx->hash[1] = 0xefcdab89; | ||
42 | sctx->hash[2] = 0x98badcfe; | ||
43 | sctx->hash[3] = 0x10325476; | ||
44 | sctx->byte_count = 0; | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int ppc_md5_update(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len) | ||
51 | { | ||
52 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
53 | const unsigned int offset = sctx->byte_count & 0x3f; | ||
54 | unsigned int avail = 64 - offset; | ||
55 | const u8 *src = data; | ||
56 | |||
57 | sctx->byte_count += len; | ||
58 | |||
59 | if (avail > len) { | ||
60 | memcpy((char *)sctx->block + offset, src, len); | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | if (offset) { | ||
65 | memcpy((char *)sctx->block + offset, src, avail); | ||
66 | ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1); | ||
67 | len -= avail; | ||
68 | src += avail; | ||
69 | } | ||
70 | |||
71 | if (len > 63) { | ||
72 | ppc_md5_transform(sctx->hash, src, len >> 6); | ||
73 | src += len & ~0x3f; | ||
74 | len &= 0x3f; | ||
75 | } | ||
76 | |||
77 | memcpy((char *)sctx->block, src, len); | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int ppc_md5_final(struct shash_desc *desc, u8 *out) | ||
82 | { | ||
83 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
84 | const unsigned int offset = sctx->byte_count & 0x3f; | ||
85 | const u8 *src = (const u8 *)sctx->block; | ||
86 | u8 *p = (u8 *)src + offset; | ||
87 | int padlen = 55 - offset; | ||
88 | __le64 *pbits = (__le64 *)((char *)sctx->block + 56); | ||
89 | __le32 *dst = (__le32 *)out; | ||
90 | |||
91 | *p++ = 0x80; | ||
92 | |||
93 | if (padlen < 0) { | ||
94 | memset(p, 0x00, padlen + sizeof (u64)); | ||
95 | ppc_md5_transform(sctx->hash, src, 1); | ||
96 | p = (char *)sctx->block; | ||
97 | padlen = 56; | ||
98 | } | ||
99 | |||
100 | memset(p, 0, padlen); | ||
101 | *pbits = cpu_to_le64(sctx->byte_count << 3); | ||
102 | ppc_md5_transform(sctx->hash, src, 1); | ||
103 | |||
104 | dst[0] = cpu_to_le32(sctx->hash[0]); | ||
105 | dst[1] = cpu_to_le32(sctx->hash[1]); | ||
106 | dst[2] = cpu_to_le32(sctx->hash[2]); | ||
107 | dst[3] = cpu_to_le32(sctx->hash[3]); | ||
108 | |||
109 | ppc_md5_clear_context(sctx); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int ppc_md5_export(struct shash_desc *desc, void *out) | ||
114 | { | ||
115 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
116 | |||
117 | memcpy(out, sctx, sizeof(*sctx)); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int ppc_md5_import(struct shash_desc *desc, const void *in) | ||
122 | { | ||
123 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
124 | |||
125 | memcpy(sctx, in, sizeof(*sctx)); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static struct shash_alg alg = { | ||
130 | .digestsize = MD5_DIGEST_SIZE, | ||
131 | .init = ppc_md5_init, | ||
132 | .update = ppc_md5_update, | ||
133 | .final = ppc_md5_final, | ||
134 | .export = ppc_md5_export, | ||
135 | .import = ppc_md5_import, | ||
136 | .descsize = sizeof(struct md5_state), | ||
137 | .statesize = sizeof(struct md5_state), | ||
138 | .base = { | ||
139 | .cra_name = "md5", | ||
140 | .cra_driver_name= "md5-ppc", | ||
141 | .cra_priority = 200, | ||
142 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
143 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
144 | .cra_module = THIS_MODULE, | ||
145 | } | ||
146 | }; | ||
147 | |||
148 | static int __init ppc_md5_mod_init(void) | ||
149 | { | ||
150 | return crypto_register_shash(&alg); | ||
151 | } | ||
152 | |||
153 | static void __exit ppc_md5_mod_fini(void) | ||
154 | { | ||
155 | crypto_unregister_shash(&alg); | ||
156 | } | ||
157 | |||
158 | module_init(ppc_md5_mod_init); | ||
159 | module_exit(ppc_md5_mod_fini); | ||
160 | |||
161 | MODULE_LICENSE("GPL"); | ||
162 | MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler"); | ||
163 | |||
164 | MODULE_ALIAS_CRYPTO("md5"); | ||
165 | MODULE_ALIAS_CRYPTO("md5-ppc"); | ||
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S new file mode 100644 index 000000000000..fcb6cf002889 --- /dev/null +++ b/arch/powerpc/crypto/sha1-spe-asm.S | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * Fast SHA-1 implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | |||
21 | #define rHP r3 /* pointer to hash value */ | ||
22 | #define rWP r4 /* pointer to input */ | ||
23 | #define rKP r5 /* pointer to constants */ | ||
24 | |||
25 | #define rW0 r14 /* 64 bit round words */ | ||
26 | #define rW1 r15 | ||
27 | #define rW2 r16 | ||
28 | #define rW3 r17 | ||
29 | #define rW4 r18 | ||
30 | #define rW5 r19 | ||
31 | #define rW6 r20 | ||
32 | #define rW7 r21 | ||
33 | |||
34 | #define rH0 r6 /* 32 bit hash values */ | ||
35 | #define rH1 r7 | ||
36 | #define rH2 r8 | ||
37 | #define rH3 r9 | ||
38 | #define rH4 r10 | ||
39 | |||
40 | #define rT0 r22 /* 64 bit temporary */ | ||
41 | #define rT1 r0 /* 32 bit temporaries */ | ||
42 | #define rT2 r11 | ||
43 | #define rT3 r12 | ||
44 | |||
45 | #define rK r23 /* 64 bit constant in volatile register */ | ||
46 | |||
47 | #define LOAD_K01 | ||
48 | |||
49 | #define LOAD_K11 \ | ||
50 | evlwwsplat rK,0(rKP); | ||
51 | |||
52 | #define LOAD_K21 \ | ||
53 | evlwwsplat rK,4(rKP); | ||
54 | |||
55 | #define LOAD_K31 \ | ||
56 | evlwwsplat rK,8(rKP); | ||
57 | |||
58 | #define LOAD_K41 \ | ||
59 | evlwwsplat rK,12(rKP); | ||
60 | |||
61 | #define INITIALIZE \ | ||
62 | stwu r1,-128(r1); /* create stack frame */ \ | ||
63 | evstdw r14,8(r1); /* We must save non volatile */ \ | ||
64 | evstdw r15,16(r1); /* registers. Take the chance */ \ | ||
65 | evstdw r16,24(r1); /* and save the SPE part too */ \ | ||
66 | evstdw r17,32(r1); \ | ||
67 | evstdw r18,40(r1); \ | ||
68 | evstdw r19,48(r1); \ | ||
69 | evstdw r20,56(r1); \ | ||
70 | evstdw r21,64(r1); \ | ||
71 | evstdw r22,72(r1); \ | ||
72 | evstdw r23,80(r1); | ||
73 | |||
74 | |||
75 | #define FINALIZE \ | ||
76 | evldw r14,8(r1); /* restore SPE registers */ \ | ||
77 | evldw r15,16(r1); \ | ||
78 | evldw r16,24(r1); \ | ||
79 | evldw r17,32(r1); \ | ||
80 | evldw r18,40(r1); \ | ||
81 | evldw r19,48(r1); \ | ||
82 | evldw r20,56(r1); \ | ||
83 | evldw r21,64(r1); \ | ||
84 | evldw r22,72(r1); \ | ||
85 | evldw r23,80(r1); \ | ||
86 | xor r0,r0,r0; \ | ||
87 | stw r0,8(r1); /* Delete sensitive data */ \ | ||
88 | stw r0,16(r1); /* that we might have pushed */ \ | ||
89 | stw r0,24(r1); /* from other context that runs */ \ | ||
90 | stw r0,32(r1); /* the same code. Assume that */ \ | ||
91 | stw r0,40(r1); /* the lower part of the GPRs */ \ | ||
92 | stw r0,48(r1); /* were already overwritten on */ \ | ||
93 | stw r0,56(r1); /* the way down to here */ \ | ||
94 | stw r0,64(r1); \ | ||
95 | stw r0,72(r1); \ | ||
96 | stw r0,80(r1); \ | ||
97 | addi r1,r1,128; /* cleanup stack frame */ | ||
98 | |||
99 | #ifdef __BIG_ENDIAN__ | ||
100 | #define LOAD_DATA(reg, off) \ | ||
101 | lwz reg,off(rWP); /* load data */ | ||
102 | #define NEXT_BLOCK \ | ||
103 | addi rWP,rWP,64; /* increment per block */ | ||
104 | #else | ||
105 | #define LOAD_DATA(reg, off) \ | ||
106 | lwbrx reg,0,rWP; /* load data */ \ | ||
107 | addi rWP,rWP,4; /* increment per word */ | ||
108 | #define NEXT_BLOCK /* nothing to do */ | ||
109 | #endif | ||
110 | |||
111 | #define R_00_15(a, b, c, d, e, w0, w1, k, off) \ | ||
112 | LOAD_DATA(w0, off) /* 1: W */ \ | ||
113 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
114 | LOAD_K##k##1 \ | ||
115 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
116 | rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \ | ||
117 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
118 | add e,e,rT0; /* 1: E = E + A' */ \ | ||
119 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
120 | add e,e,w0; /* 1: E = E + W */ \ | ||
121 | LOAD_DATA(w1, off+4) /* 2: W */ \ | ||
122 | add e,e,rT2; /* 1: E = E + F */ \ | ||
123 | and rT1,a,b; /* 2: F' = B and C */ \ | ||
124 | add e,e,rK; /* 1: E = E + K */ \ | ||
125 | andc rT2,c,a; /* 2: F" = ~B and D */ \ | ||
126 | add d,d,rK; /* 2: E = E + K */ \ | ||
127 | or rT2,rT2,rT1; /* 2: F = F' or F" */ \ | ||
128 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
129 | add d,d,w1; /* 2: E = E + W */ \ | ||
130 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
131 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
132 | evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \ | ||
133 | add d,d,rT2 /* 2: E = E + F */ | ||
134 | |||
135 | #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
136 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
137 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
138 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
139 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
140 | or rT1,rT1,rT2; /* 1: F = F' or F" */ \ | ||
141 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
142 | add e,e,rT1; /* 1: E = E + F */ \ | ||
143 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
144 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
145 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
146 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
147 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
148 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
149 | LOAD_K##k##1 \ | ||
150 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
151 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
152 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
153 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
154 | andc rT1,c,a; /* 2: F" = ~B and D */ \ | ||
155 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
156 | or rT1,rT1,rT2; /* 2: F = F' or F" */ \ | ||
157 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
158 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
159 | add d,d,rT1 /* 2: E = E + F */ | ||
160 | |||
161 | #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
162 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
163 | xor rT2,b,c; /* 1: F' = B xor C */ \ | ||
164 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
165 | xor rT2,rT2,d; /* 1: F = F' xor D */ \ | ||
166 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
167 | add e,e,rT2; /* 1: E = E + F */ \ | ||
168 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
169 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
170 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
171 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
172 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
173 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
174 | LOAD_K##k##1 \ | ||
175 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
176 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
177 | xor rT2,a,b; /* 2: F' = B xor C */ \ | ||
178 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
179 | xor rT2,rT2,c; /* 2: F = F' xor D */ \ | ||
180 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
181 | add d,d,rT2; /* 2: E = E + F */ \ | ||
182 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
183 | add d,d,rT0 /* 2: E = E + A' */ | ||
184 | |||
185 | #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
186 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
187 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
188 | or rT1,b,c; /* 1: F" = B or C */ \ | ||
189 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
190 | and rT1,d,rT1; /* 1: F" = F" and D */ \ | ||
191 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
192 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
193 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
194 | add e,e,rT2; /* 1: E = E + F */ \ | ||
195 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
196 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
197 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
198 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
199 | LOAD_K##k##1 \ | ||
200 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
201 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
202 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
203 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
204 | or rT0,a,b; /* 2: F" = B or C */ \ | ||
205 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
206 | and rT0,c,rT0; /* 2: F" = F" and D */ \ | ||
207 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
208 | or rT2,rT2,rT0; /* 2: F = F' or F" */ \ | ||
209 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
210 | add d,d,rT2; /* 2: E = E + F */ \ | ||
211 | add d,d,rT0 /* 2: E = E + A' */ | ||
212 | |||
213 | #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
214 | R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) | ||
215 | |||
216 | _GLOBAL(ppc_spe_sha1_transform) | ||
217 | INITIALIZE | ||
218 | |||
219 | lwz rH0,0(rHP) | ||
220 | lwz rH1,4(rHP) | ||
221 | mtctr r5 | ||
222 | lwz rH2,8(rHP) | ||
223 | lis rKP,PPC_SPE_SHA1_K@h | ||
224 | lwz rH3,12(rHP) | ||
225 | ori rKP,rKP,PPC_SPE_SHA1_K@l | ||
226 | lwz rH4,16(rHP) | ||
227 | |||
228 | ppc_spe_sha1_main: | ||
229 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0) | ||
230 | R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8) | ||
231 | R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16) | ||
232 | R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24) | ||
233 | R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32) | ||
234 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40) | ||
235 | R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48) | ||
236 | R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56) | ||
237 | |||
238 | R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0) | ||
239 | R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2) | ||
240 | |||
241 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0) | ||
242 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0) | ||
243 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0) | ||
244 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0) | ||
245 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0) | ||
246 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0) | ||
247 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0) | ||
248 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0) | ||
249 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0) | ||
250 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3) | ||
251 | |||
252 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0) | ||
253 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0) | ||
254 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0) | ||
255 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0) | ||
256 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0) | ||
257 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0) | ||
258 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0) | ||
259 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0) | ||
260 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0) | ||
261 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4) | ||
262 | |||
263 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0) | ||
264 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0) | ||
265 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0) | ||
266 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0) | ||
267 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0) | ||
268 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0) | ||
269 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0) | ||
270 | lwz rT3,0(rHP) | ||
271 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0) | ||
272 | lwz rW1,4(rHP) | ||
273 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0) | ||
274 | lwz rW2,8(rHP) | ||
275 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0) | ||
276 | lwz rW3,12(rHP) | ||
277 | NEXT_BLOCK | ||
278 | lwz rW4,16(rHP) | ||
279 | |||
280 | add rH0,rH0,rT3 | ||
281 | stw rH0,0(rHP) | ||
282 | add rH1,rH1,rW1 | ||
283 | stw rH1,4(rHP) | ||
284 | add rH2,rH2,rW2 | ||
285 | stw rH2,8(rHP) | ||
286 | add rH3,rH3,rW3 | ||
287 | stw rH3,12(rHP) | ||
288 | add rH4,rH4,rW4 | ||
289 | stw rH4,16(rHP) | ||
290 | |||
291 | bdnz ppc_spe_sha1_main | ||
292 | |||
293 | FINALIZE | ||
294 | blr | ||
295 | |||
296 | .data | ||
297 | .align 4 | ||
298 | PPC_SPE_SHA1_K: | ||
299 | .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6 | ||
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c new file mode 100644 index 000000000000..3e1d22212521 --- /dev/null +++ b/arch/powerpc/crypto/sha1-spe-glue.c | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | * Glue code for SHA-1 implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. | ||
5 | * | ||
6 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <crypto/internal/hash.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/cryptohash.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | #include <asm/switch_to.h> | ||
24 | #include <linux/hardirq.h> | ||
25 | |||
26 | /* | ||
27 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
28 | * between preempt_disable() and preempt_enable(). SHA1 takes ~1000 | ||
29 | * operations per 64 bytes. e500 cores can issue two arithmetic instructions | ||
30 | * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). | ||
31 | * Thus 2KB of input data will need an estimated maximum of 18,000 cycles. | ||
32 | * Headroom for cache misses included. Even with the low end model clocked | ||
33 | * at 667 MHz this equals to a critical time window of less than 27us. | ||
34 | * | ||
35 | */ | ||
36 | #define MAX_BYTES 2048 | ||
37 | |||
38 | extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks); | ||
39 | |||
40 | static void spe_begin(void) | ||
41 | { | ||
42 | /* We just start SPE operations and will save SPE registers later. */ | ||
43 | preempt_disable(); | ||
44 | enable_kernel_spe(); | ||
45 | } | ||
46 | |||
47 | static void spe_end(void) | ||
48 | { | ||
49 | /* reenable preemption */ | ||
50 | preempt_enable(); | ||
51 | } | ||
52 | |||
53 | static inline void ppc_sha1_clear_context(struct sha1_state *sctx) | ||
54 | { | ||
55 | int count = sizeof(struct sha1_state) >> 2; | ||
56 | u32 *ptr = (u32 *)sctx; | ||
57 | |||
58 | /* make sure we can clear the fast way */ | ||
59 | BUILD_BUG_ON(sizeof(struct sha1_state) % 4); | ||
60 | do { *ptr++ = 0; } while (--count); | ||
61 | } | ||
62 | |||
63 | static int ppc_spe_sha1_init(struct shash_desc *desc) | ||
64 | { | ||
65 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
66 | |||
67 | sctx->state[0] = SHA1_H0; | ||
68 | sctx->state[1] = SHA1_H1; | ||
69 | sctx->state[2] = SHA1_H2; | ||
70 | sctx->state[3] = SHA1_H3; | ||
71 | sctx->state[4] = SHA1_H4; | ||
72 | sctx->count = 0; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data, | ||
78 | unsigned int len) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | const unsigned int offset = sctx->count & 0x3f; | ||
82 | const unsigned int avail = 64 - offset; | ||
83 | unsigned int bytes; | ||
84 | const u8 *src = data; | ||
85 | |||
86 | if (avail > len) { | ||
87 | sctx->count += len; | ||
88 | memcpy((char *)sctx->buffer + offset, src, len); | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | sctx->count += len; | ||
93 | |||
94 | if (offset) { | ||
95 | memcpy((char *)sctx->buffer + offset, src, avail); | ||
96 | |||
97 | spe_begin(); | ||
98 | ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1); | ||
99 | spe_end(); | ||
100 | |||
101 | len -= avail; | ||
102 | src += avail; | ||
103 | } | ||
104 | |||
105 | while (len > 63) { | ||
106 | bytes = (len > MAX_BYTES) ? MAX_BYTES : len; | ||
107 | bytes = bytes & ~0x3f; | ||
108 | |||
109 | spe_begin(); | ||
110 | ppc_spe_sha1_transform(sctx->state, src, bytes >> 6); | ||
111 | spe_end(); | ||
112 | |||
113 | src += bytes; | ||
114 | len -= bytes; | ||
115 | }; | ||
116 | |||
117 | memcpy((char *)sctx->buffer, src, len); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out) | ||
122 | { | ||
123 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
124 | const unsigned int offset = sctx->count & 0x3f; | ||
125 | char *p = (char *)sctx->buffer + offset; | ||
126 | int padlen; | ||
127 | __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56); | ||
128 | __be32 *dst = (__be32 *)out; | ||
129 | |||
130 | padlen = 55 - offset; | ||
131 | *p++ = 0x80; | ||
132 | |||
133 | spe_begin(); | ||
134 | |||
135 | if (padlen < 0) { | ||
136 | memset(p, 0x00, padlen + sizeof (u64)); | ||
137 | ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1); | ||
138 | p = (char *)sctx->buffer; | ||
139 | padlen = 56; | ||
140 | } | ||
141 | |||
142 | memset(p, 0, padlen); | ||
143 | *pbits = cpu_to_be64(sctx->count << 3); | ||
144 | ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1); | ||
145 | |||
146 | spe_end(); | ||
147 | |||
148 | dst[0] = cpu_to_be32(sctx->state[0]); | ||
149 | dst[1] = cpu_to_be32(sctx->state[1]); | ||
150 | dst[2] = cpu_to_be32(sctx->state[2]); | ||
151 | dst[3] = cpu_to_be32(sctx->state[3]); | ||
152 | dst[4] = cpu_to_be32(sctx->state[4]); | ||
153 | |||
154 | ppc_sha1_clear_context(sctx); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static int ppc_spe_sha1_export(struct shash_desc *desc, void *out) | ||
159 | { | ||
160 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
161 | |||
162 | memcpy(out, sctx, sizeof(*sctx)); | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in) | ||
167 | { | ||
168 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
169 | |||
170 | memcpy(sctx, in, sizeof(*sctx)); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static struct shash_alg alg = { | ||
175 | .digestsize = SHA1_DIGEST_SIZE, | ||
176 | .init = ppc_spe_sha1_init, | ||
177 | .update = ppc_spe_sha1_update, | ||
178 | .final = ppc_spe_sha1_final, | ||
179 | .export = ppc_spe_sha1_export, | ||
180 | .import = ppc_spe_sha1_import, | ||
181 | .descsize = sizeof(struct sha1_state), | ||
182 | .statesize = sizeof(struct sha1_state), | ||
183 | .base = { | ||
184 | .cra_name = "sha1", | ||
185 | .cra_driver_name= "sha1-ppc-spe", | ||
186 | .cra_priority = 300, | ||
187 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
188 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
189 | .cra_module = THIS_MODULE, | ||
190 | } | ||
191 | }; | ||
192 | |||
193 | static int __init ppc_spe_sha1_mod_init(void) | ||
194 | { | ||
195 | return crypto_register_shash(&alg); | ||
196 | } | ||
197 | |||
198 | static void __exit ppc_spe_sha1_mod_fini(void) | ||
199 | { | ||
200 | crypto_unregister_shash(&alg); | ||
201 | } | ||
202 | |||
203 | module_init(ppc_spe_sha1_mod_init); | ||
204 | module_exit(ppc_spe_sha1_mod_fini); | ||
205 | |||
206 | MODULE_LICENSE("GPL"); | ||
207 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized"); | ||
208 | |||
209 | MODULE_ALIAS_CRYPTO("sha1"); | ||
210 | MODULE_ALIAS_CRYPTO("sha1-ppc-spe"); | ||
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S new file mode 100644 index 000000000000..2d10e4c08f03 --- /dev/null +++ b/arch/powerpc/crypto/sha256-spe-asm.S | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Fast SHA-256 implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | |||
21 | #define rHP r3 /* pointer to hash values in memory */ | ||
22 | #define rKP r24 /* pointer to round constants */ | ||
23 | #define rWP r4 /* pointer to input data */ | ||
24 | |||
25 | #define rH0 r5 /* 8 32 bit hash values in 8 registers */ | ||
26 | #define rH1 r6 | ||
27 | #define rH2 r7 | ||
28 | #define rH3 r8 | ||
29 | #define rH4 r9 | ||
30 | #define rH5 r10 | ||
31 | #define rH6 r11 | ||
32 | #define rH7 r12 | ||
33 | |||
34 | #define rW0 r14 /* 64 bit registers. 16 words in 8 registers */ | ||
35 | #define rW1 r15 | ||
36 | #define rW2 r16 | ||
37 | #define rW3 r17 | ||
38 | #define rW4 r18 | ||
39 | #define rW5 r19 | ||
40 | #define rW6 r20 | ||
41 | #define rW7 r21 | ||
42 | |||
43 | #define rT0 r22 /* 64 bit temporaries */ | ||
44 | #define rT1 r23 | ||
45 | #define rT2 r0 /* 32 bit temporaries */ | ||
46 | #define rT3 r25 | ||
47 | |||
48 | #define CMP_KN_LOOP | ||
49 | #define CMP_KC_LOOP \ | ||
50 | cmpwi rT1,0; | ||
51 | |||
52 | #define INITIALIZE \ | ||
53 | stwu r1,-128(r1); /* create stack frame */ \ | ||
54 | evstdw r14,8(r1); /* We must save non volatile */ \ | ||
55 | evstdw r15,16(r1); /* registers. Take the chance */ \ | ||
56 | evstdw r16,24(r1); /* and save the SPE part too */ \ | ||
57 | evstdw r17,32(r1); \ | ||
58 | evstdw r18,40(r1); \ | ||
59 | evstdw r19,48(r1); \ | ||
60 | evstdw r20,56(r1); \ | ||
61 | evstdw r21,64(r1); \ | ||
62 | evstdw r22,72(r1); \ | ||
63 | evstdw r23,80(r1); \ | ||
64 | stw r24,88(r1); /* save normal registers */ \ | ||
65 | stw r25,92(r1); | ||
66 | |||
67 | |||
68 | #define FINALIZE \ | ||
69 | evldw r14,8(r1); /* restore SPE registers */ \ | ||
70 | evldw r15,16(r1); \ | ||
71 | evldw r16,24(r1); \ | ||
72 | evldw r17,32(r1); \ | ||
73 | evldw r18,40(r1); \ | ||
74 | evldw r19,48(r1); \ | ||
75 | evldw r20,56(r1); \ | ||
76 | evldw r21,64(r1); \ | ||
77 | evldw r22,72(r1); \ | ||
78 | evldw r23,80(r1); \ | ||
79 | lwz r24,88(r1); /* restore normal registers */ \ | ||
80 | lwz r25,92(r1); \ | ||
81 | xor r0,r0,r0; \ | ||
82 | stw r0,8(r1); /* Delete sensitive data */ \ | ||
83 | stw r0,16(r1); /* that we might have pushed */ \ | ||
84 | stw r0,24(r1); /* from other context that runs */ \ | ||
85 | stw r0,32(r1); /* the same code. Assume that */ \ | ||
86 | stw r0,40(r1); /* the lower part of the GPRs */ \ | ||
87 | stw r0,48(r1); /* was already overwritten on */ \ | ||
88 | stw r0,56(r1); /* the way down to here */ \ | ||
89 | stw r0,64(r1); \ | ||
90 | stw r0,72(r1); \ | ||
91 | stw r0,80(r1); \ | ||
92 | addi r1,r1,128; /* cleanup stack frame */ | ||
93 | |||
94 | #ifdef __BIG_ENDIAN__ | ||
95 | #define LOAD_DATA(reg, off) \ | ||
96 | lwz reg,off(rWP); /* load data */ | ||
97 | #define NEXT_BLOCK \ | ||
98 | addi rWP,rWP,64; /* increment per block */ | ||
99 | #else | ||
100 | #define LOAD_DATA(reg, off) \ | ||
101 | lwbrx reg,0,rWP; /* load data */ \ | ||
102 | addi rWP,rWP,4; /* increment per word */ | ||
103 | #define NEXT_BLOCK /* nothing to do */ | ||
104 | #endif | ||
105 | |||
106 | #define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \ | ||
107 | LOAD_DATA(w, off) /* 1: W */ \ | ||
108 | rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \ | ||
109 | rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \ | ||
110 | rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \ | ||
111 | xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \ | ||
112 | and rT3,e,f; /* 1: ch = e and f */ \ | ||
113 | xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \ | ||
114 | andc rT1,g,e; /* 1: ch' = ~e and g */ \ | ||
115 | lwz rT2,off(rKP); /* 1: K */ \ | ||
116 | xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \ | ||
117 | add h,h,rT0; /* 1: temp1 = h + S1 */ \ | ||
118 | add rT3,rT3,w; /* 1: temp1' = ch + w */ \ | ||
119 | rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \ | ||
120 | add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \ | ||
121 | rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \ | ||
122 | add h,h,rT2; /* 1: temp1 = temp1 + K */ \ | ||
123 | rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \ | ||
124 | xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \ | ||
125 | add d,d,h; /* 1: d = d + temp1 */ \ | ||
126 | xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \ | ||
127 | evmergelo w,w,w; /* shift W */ \ | ||
128 | or rT2,a,b; /* 1: maj = a or b */ \ | ||
129 | and rT1,a,b; /* 1: maj' = a and b */ \ | ||
130 | and rT2,rT2,c; /* 1: maj = maj and c */ \ | ||
131 | LOAD_DATA(w, off+4) /* 2: W */ \ | ||
132 | or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \ | ||
133 | rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ | ||
134 | add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \ | ||
135 | rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ | ||
136 | add h,h,rT3; /* 1: h = temp1 + temp2 */ \ | ||
137 | rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \ | ||
138 | xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ | ||
139 | and rT3,d,e; /* 2: ch = e and f */ \ | ||
140 | xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ | ||
141 | andc rT1,f,d; /* 2: ch' = ~e and g */ \ | ||
142 | lwz rT2,off+4(rKP); /* 2: K */ \ | ||
143 | xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ | ||
144 | add g,g,rT0; /* 2: temp1 = h + S1 */ \ | ||
145 | add rT3,rT3,w; /* 2: temp1' = ch + w */ \ | ||
146 | rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ | ||
147 | add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \ | ||
148 | rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ | ||
149 | add g,g,rT2; /* 2: temp1 = temp1 + K */ \ | ||
150 | rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ | ||
151 | xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ | ||
152 | or rT2,h,a; /* 2: maj = a or b */ \ | ||
153 | xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ | ||
154 | and rT1,h,a; /* 2: maj' = a and b */ \ | ||
155 | and rT2,rT2,b; /* 2: maj = maj and c */ \ | ||
156 | add c,c,g; /* 2: d = d + temp1 */ \ | ||
157 | or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ | ||
158 | add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ | ||
159 | add g,g,rT3 /* 2: h = temp1 + temp2 */ | ||
160 | |||
161 | #define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \ | ||
162 | rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \ | ||
163 | evmergelohi rT0,w0,w1; /* w[-15] */ \ | ||
164 | rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \ | ||
165 | evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \ | ||
166 | xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ | ||
167 | evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \ | ||
168 | rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \ | ||
169 | evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \ | ||
170 | xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ | ||
171 | evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \ | ||
172 | add h,h,rT2; /* 1: temp1 = h + S1 */ \ | ||
173 | evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \ | ||
174 | and rT2,e,f; /* 1: ch = e and f */ \ | ||
175 | evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \ | ||
176 | andc rT3,g,e; /* 1: ch' = ~e and g */ \ | ||
177 | evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \ | ||
178 | xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \ | ||
179 | evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \ | ||
180 | add h,h,rT2; /* 1: temp1 = temp1 + ch */ \ | ||
181 | evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ | ||
182 | rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \ | ||
183 | evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \ | ||
184 | rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \ | ||
185 | evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ | ||
186 | xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ | ||
187 | evldw rT1,off(rKP); /* k */ \ | ||
188 | rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \ | ||
189 | evaddw w0,w0,rT0; /* w = w + s1 */ \ | ||
190 | xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ | ||
191 | evmergelohi rT0,w4,w5; /* w[-7] */ \ | ||
192 | and rT3,a,b; /* 1: maj = a and b */ \ | ||
193 | evaddw w0,w0,rT0; /* w = w + w[-7] */ \ | ||
194 | CMP_K##k##_LOOP \ | ||
195 | add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \ | ||
196 | evaddw rT1,rT1,w0; /* wk = w + k */ \ | ||
197 | xor rT3,a,b; /* 1: maj = a xor b */ \ | ||
198 | evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \ | ||
199 | and rT3,rT3,c; /* 1: maj = maj and c */ \ | ||
200 | add h,h,rT0; /* 1: temp1 = temp1 + wk */ \ | ||
201 | add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \ | ||
202 | add g,g,rT1; /* 2: temp1 = temp1 + wk */ \ | ||
203 | add d,d,h; /* 1: d = d + temp1 */ \ | ||
204 | rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ | ||
205 | add h,h,rT2; /* 1: h = temp1 + temp2 */ \ | ||
206 | rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ | ||
207 | rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \ | ||
208 | xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ | ||
209 | and rT3,d,e; /* 2: ch = e and f */ \ | ||
210 | xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ | ||
211 | andc rT1,f,d; /* 2: ch' = ~e and g */ \ | ||
212 | add g,g,rT0; /* 2: temp1 = h + S1 */ \ | ||
213 | xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ | ||
214 | rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ | ||
215 | add g,g,rT3; /* 2: temp1 = temp1 + ch */ \ | ||
216 | rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ | ||
217 | rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ | ||
218 | xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ | ||
219 | or rT2,h,a; /* 2: maj = a or b */ \ | ||
220 | and rT1,h,a; /* 2: maj' = a and b */ \ | ||
221 | and rT2,rT2,b; /* 2: maj = maj and c */ \ | ||
222 | xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ | ||
223 | or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ | ||
224 | add c,c,g; /* 2: d = d + temp1 */ \ | ||
225 | add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ | ||
226 | add g,g,rT3 /* 2: h = temp1 + temp2 */ | ||
227 | |||
228 | _GLOBAL(ppc_spe_sha256_transform) | ||
229 | INITIALIZE | ||
230 | |||
231 | mtctr r5 | ||
232 | lwz rH0,0(rHP) | ||
233 | lwz rH1,4(rHP) | ||
234 | lwz rH2,8(rHP) | ||
235 | lwz rH3,12(rHP) | ||
236 | lwz rH4,16(rHP) | ||
237 | lwz rH5,20(rHP) | ||
238 | lwz rH6,24(rHP) | ||
239 | lwz rH7,28(rHP) | ||
240 | |||
241 | ppc_spe_sha256_main: | ||
242 | lis rKP,PPC_SPE_SHA256_K@ha | ||
243 | addi rKP,rKP,PPC_SPE_SHA256_K@l | ||
244 | |||
245 | R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0) | ||
246 | R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8) | ||
247 | R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16) | ||
248 | R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24) | ||
249 | R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32) | ||
250 | R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40) | ||
251 | R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48) | ||
252 | R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56) | ||
253 | ppc_spe_sha256_16_rounds: | ||
254 | addi rKP,rKP,64 | ||
255 | R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, | ||
256 | rW0, rW1, rW4, rW5, rW7, N, 0) | ||
257 | R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, | ||
258 | rW1, rW2, rW5, rW6, rW0, N, 8) | ||
259 | R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, | ||
260 | rW2, rW3, rW6, rW7, rW1, N, 16) | ||
261 | R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, | ||
262 | rW3, rW4, rW7, rW0, rW2, N, 24) | ||
263 | R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, | ||
264 | rW4, rW5, rW0, rW1, rW3, N, 32) | ||
265 | R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, | ||
266 | rW5, rW6, rW1, rW2, rW4, N, 40) | ||
267 | R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, | ||
268 | rW6, rW7, rW2, rW3, rW5, N, 48) | ||
269 | R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, | ||
270 | rW7, rW0, rW3, rW4, rW6, C, 56) | ||
271 | bt gt,ppc_spe_sha256_16_rounds | ||
272 | |||
273 | lwz rW0,0(rHP) | ||
274 | NEXT_BLOCK | ||
275 | lwz rW1,4(rHP) | ||
276 | lwz rW2,8(rHP) | ||
277 | lwz rW3,12(rHP) | ||
278 | lwz rW4,16(rHP) | ||
279 | lwz rW5,20(rHP) | ||
280 | lwz rW6,24(rHP) | ||
281 | lwz rW7,28(rHP) | ||
282 | |||
283 | add rH0,rH0,rW0 | ||
284 | stw rH0,0(rHP) | ||
285 | add rH1,rH1,rW1 | ||
286 | stw rH1,4(rHP) | ||
287 | add rH2,rH2,rW2 | ||
288 | stw rH2,8(rHP) | ||
289 | add rH3,rH3,rW3 | ||
290 | stw rH3,12(rHP) | ||
291 | add rH4,rH4,rW4 | ||
292 | stw rH4,16(rHP) | ||
293 | add rH5,rH5,rW5 | ||
294 | stw rH5,20(rHP) | ||
295 | add rH6,rH6,rW6 | ||
296 | stw rH6,24(rHP) | ||
297 | add rH7,rH7,rW7 | ||
298 | stw rH7,28(rHP) | ||
299 | |||
300 | bdnz ppc_spe_sha256_main | ||
301 | |||
302 | FINALIZE | ||
303 | blr | ||
304 | |||
305 | .data | ||
306 | .align 5 | ||
307 | PPC_SPE_SHA256_K: | ||
308 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
309 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
310 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
311 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
312 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
313 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
314 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
315 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
316 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
317 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
318 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
319 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
320 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
321 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
322 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
323 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c new file mode 100644 index 000000000000..f4a616fe1a82 --- /dev/null +++ b/arch/powerpc/crypto/sha256-spe-glue.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * Glue code for SHA-256 implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. The assembler module takes care | ||
5 | * about the SPE registers so it can run from interrupt context. | ||
6 | * | ||
7 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/cryptohash.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <crypto/sha.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #include <asm/switch_to.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | |||
27 | /* | ||
28 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
29 | * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000 | ||
30 | * operations per 64 bytes. e500 cores can issue two arithmetic instructions | ||
31 | * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). | ||
32 | * Thus 1KB of input data will need an estimated maximum of 18,000 cycles. | ||
33 | * Headroom for cache misses included. Even with the low end model clocked | ||
34 | * at 667 MHz this equals to a critical time window of less than 27us. | ||
35 | * | ||
36 | */ | ||
37 | #define MAX_BYTES 1024 | ||
38 | |||
39 | extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks); | ||
40 | |||
41 | static void spe_begin(void) | ||
42 | { | ||
43 | /* We just start SPE operations and will save SPE registers later. */ | ||
44 | preempt_disable(); | ||
45 | enable_kernel_spe(); | ||
46 | } | ||
47 | |||
48 | static void spe_end(void) | ||
49 | { | ||
50 | /* reenable preemption */ | ||
51 | preempt_enable(); | ||
52 | } | ||
53 | |||
54 | static inline void ppc_sha256_clear_context(struct sha256_state *sctx) | ||
55 | { | ||
56 | int count = sizeof(struct sha256_state) >> 2; | ||
57 | u32 *ptr = (u32 *)sctx; | ||
58 | |||
59 | /* make sure we can clear the fast way */ | ||
60 | BUILD_BUG_ON(sizeof(struct sha256_state) % 4); | ||
61 | do { *ptr++ = 0; } while (--count); | ||
62 | } | ||
63 | |||
64 | static int ppc_spe_sha256_init(struct shash_desc *desc) | ||
65 | { | ||
66 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
67 | |||
68 | sctx->state[0] = SHA256_H0; | ||
69 | sctx->state[1] = SHA256_H1; | ||
70 | sctx->state[2] = SHA256_H2; | ||
71 | sctx->state[3] = SHA256_H3; | ||
72 | sctx->state[4] = SHA256_H4; | ||
73 | sctx->state[5] = SHA256_H5; | ||
74 | sctx->state[6] = SHA256_H6; | ||
75 | sctx->state[7] = SHA256_H7; | ||
76 | sctx->count = 0; | ||
77 | |||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int ppc_spe_sha224_init(struct shash_desc *desc) | ||
82 | { | ||
83 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
84 | |||
85 | sctx->state[0] = SHA224_H0; | ||
86 | sctx->state[1] = SHA224_H1; | ||
87 | sctx->state[2] = SHA224_H2; | ||
88 | sctx->state[3] = SHA224_H3; | ||
89 | sctx->state[4] = SHA224_H4; | ||
90 | sctx->state[5] = SHA224_H5; | ||
91 | sctx->state[6] = SHA224_H6; | ||
92 | sctx->state[7] = SHA224_H7; | ||
93 | sctx->count = 0; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data, | ||
99 | unsigned int len) | ||
100 | { | ||
101 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
102 | const unsigned int offset = sctx->count & 0x3f; | ||
103 | const unsigned int avail = 64 - offset; | ||
104 | unsigned int bytes; | ||
105 | const u8 *src = data; | ||
106 | |||
107 | if (avail > len) { | ||
108 | sctx->count += len; | ||
109 | memcpy((char *)sctx->buf + offset, src, len); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | sctx->count += len; | ||
114 | |||
115 | if (offset) { | ||
116 | memcpy((char *)sctx->buf + offset, src, avail); | ||
117 | |||
118 | spe_begin(); | ||
119 | ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1); | ||
120 | spe_end(); | ||
121 | |||
122 | len -= avail; | ||
123 | src += avail; | ||
124 | } | ||
125 | |||
126 | while (len > 63) { | ||
127 | /* cut input data into smaller blocks */ | ||
128 | bytes = (len > MAX_BYTES) ? MAX_BYTES : len; | ||
129 | bytes = bytes & ~0x3f; | ||
130 | |||
131 | spe_begin(); | ||
132 | ppc_spe_sha256_transform(sctx->state, src, bytes >> 6); | ||
133 | spe_end(); | ||
134 | |||
135 | src += bytes; | ||
136 | len -= bytes; | ||
137 | }; | ||
138 | |||
139 | memcpy((char *)sctx->buf, src, len); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out) | ||
144 | { | ||
145 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
146 | const unsigned int offset = sctx->count & 0x3f; | ||
147 | char *p = (char *)sctx->buf + offset; | ||
148 | int padlen; | ||
149 | __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56); | ||
150 | __be32 *dst = (__be32 *)out; | ||
151 | |||
152 | padlen = 55 - offset; | ||
153 | *p++ = 0x80; | ||
154 | |||
155 | spe_begin(); | ||
156 | |||
157 | if (padlen < 0) { | ||
158 | memset(p, 0x00, padlen + sizeof (u64)); | ||
159 | ppc_spe_sha256_transform(sctx->state, sctx->buf, 1); | ||
160 | p = (char *)sctx->buf; | ||
161 | padlen = 56; | ||
162 | } | ||
163 | |||
164 | memset(p, 0, padlen); | ||
165 | *pbits = cpu_to_be64(sctx->count << 3); | ||
166 | ppc_spe_sha256_transform(sctx->state, sctx->buf, 1); | ||
167 | |||
168 | spe_end(); | ||
169 | |||
170 | dst[0] = cpu_to_be32(sctx->state[0]); | ||
171 | dst[1] = cpu_to_be32(sctx->state[1]); | ||
172 | dst[2] = cpu_to_be32(sctx->state[2]); | ||
173 | dst[3] = cpu_to_be32(sctx->state[3]); | ||
174 | dst[4] = cpu_to_be32(sctx->state[4]); | ||
175 | dst[5] = cpu_to_be32(sctx->state[5]); | ||
176 | dst[6] = cpu_to_be32(sctx->state[6]); | ||
177 | dst[7] = cpu_to_be32(sctx->state[7]); | ||
178 | |||
179 | ppc_sha256_clear_context(sctx); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out) | ||
184 | { | ||
185 | u32 D[SHA256_DIGEST_SIZE >> 2]; | ||
186 | __be32 *dst = (__be32 *)out; | ||
187 | |||
188 | ppc_spe_sha256_final(desc, (u8 *)D); | ||
189 | |||
190 | /* avoid bytewise memcpy */ | ||
191 | dst[0] = D[0]; | ||
192 | dst[1] = D[1]; | ||
193 | dst[2] = D[2]; | ||
194 | dst[3] = D[3]; | ||
195 | dst[4] = D[4]; | ||
196 | dst[5] = D[5]; | ||
197 | dst[6] = D[6]; | ||
198 | |||
199 | /* clear sensitive data */ | ||
200 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static int ppc_spe_sha256_export(struct shash_desc *desc, void *out) | ||
205 | { | ||
206 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
207 | |||
208 | memcpy(out, sctx, sizeof(*sctx)); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in) | ||
213 | { | ||
214 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
215 | |||
216 | memcpy(sctx, in, sizeof(*sctx)); | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static struct shash_alg algs[2] = { { | ||
221 | .digestsize = SHA256_DIGEST_SIZE, | ||
222 | .init = ppc_spe_sha256_init, | ||
223 | .update = ppc_spe_sha256_update, | ||
224 | .final = ppc_spe_sha256_final, | ||
225 | .export = ppc_spe_sha256_export, | ||
226 | .import = ppc_spe_sha256_import, | ||
227 | .descsize = sizeof(struct sha256_state), | ||
228 | .statesize = sizeof(struct sha256_state), | ||
229 | .base = { | ||
230 | .cra_name = "sha256", | ||
231 | .cra_driver_name= "sha256-ppc-spe", | ||
232 | .cra_priority = 300, | ||
233 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
234 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
235 | .cra_module = THIS_MODULE, | ||
236 | } | ||
237 | }, { | ||
238 | .digestsize = SHA224_DIGEST_SIZE, | ||
239 | .init = ppc_spe_sha224_init, | ||
240 | .update = ppc_spe_sha256_update, | ||
241 | .final = ppc_spe_sha224_final, | ||
242 | .export = ppc_spe_sha256_export, | ||
243 | .import = ppc_spe_sha256_import, | ||
244 | .descsize = sizeof(struct sha256_state), | ||
245 | .statesize = sizeof(struct sha256_state), | ||
246 | .base = { | ||
247 | .cra_name = "sha224", | ||
248 | .cra_driver_name= "sha224-ppc-spe", | ||
249 | .cra_priority = 300, | ||
250 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
251 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
252 | .cra_module = THIS_MODULE, | ||
253 | } | ||
254 | } }; | ||
255 | |||
256 | static int __init ppc_spe_sha256_mod_init(void) | ||
257 | { | ||
258 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
259 | } | ||
260 | |||
261 | static void __exit ppc_spe_sha256_mod_fini(void) | ||
262 | { | ||
263 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
264 | } | ||
265 | |||
266 | module_init(ppc_spe_sha256_mod_init); | ||
267 | module_exit(ppc_spe_sha256_mod_fini); | ||
268 | |||
269 | MODULE_LICENSE("GPL"); | ||
270 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized"); | ||
271 | |||
272 | MODULE_ALIAS_CRYPTO("sha224"); | ||
273 | MODULE_ALIAS_CRYPTO("sha224-ppc-spe"); | ||
274 | MODULE_ALIAS_CRYPTO("sha256"); | ||
275 | MODULE_ALIAS_CRYPTO("sha256-ppc-spe"); | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 54f60ab41c63..112cefacf2af 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm) | |||
797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | 797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); |
798 | struct crypto_aead *cryptd_child; | 798 | struct crypto_aead *cryptd_child; |
799 | struct aesni_rfc4106_gcm_ctx *child_ctx; | 799 | struct aesni_rfc4106_gcm_ctx *child_ctx; |
800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | 800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", |
801 | CRYPTO_ALG_INTERNAL, | ||
802 | CRYPTO_ALG_INTERNAL); | ||
801 | if (IS_ERR(cryptd_tfm)) | 803 | if (IS_ERR(cryptd_tfm)) |
802 | return PTR_ERR(cryptd_tfm); | 804 | return PTR_ERR(cryptd_tfm); |
803 | 805 | ||
@@ -890,15 +892,12 @@ out_free_ablkcipher: | |||
890 | return ret; | 892 | return ret; |
891 | } | 893 | } |
892 | 894 | ||
893 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | 895 | static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, |
894 | unsigned int key_len) | 896 | unsigned int key_len) |
895 | { | 897 | { |
896 | int ret = 0; | 898 | int ret = 0; |
897 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | 899 | struct crypto_tfm *tfm = crypto_aead_tfm(aead); |
898 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 900 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); |
899 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
900 | struct aesni_rfc4106_gcm_ctx *child_ctx = | ||
901 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | ||
902 | u8 *new_key_align, *new_key_mem = NULL; | 901 | u8 *new_key_align, *new_key_mem = NULL; |
903 | 902 | ||
904 | if (key_len < 4) { | 903 | if (key_len < 4) { |
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
943 | goto exit; | 942 | goto exit; |
944 | } | 943 | } |
945 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | 944 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); |
946 | memcpy(child_ctx, ctx, sizeof(*ctx)); | ||
947 | exit: | 945 | exit: |
948 | kfree(new_key_mem); | 946 | kfree(new_key_mem); |
949 | return ret; | 947 | return ret; |
950 | } | 948 | } |
951 | 949 | ||
952 | /* This is the Integrity Check Value (aka the authentication tag length and can | 950 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, |
953 | * be 8, 12 or 16 bytes long. */ | 951 | unsigned int key_len) |
954 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
955 | unsigned int authsize) | ||
956 | { | 952 | { |
957 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 953 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); |
958 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 954 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); |
955 | struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child); | ||
956 | struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm; | ||
957 | int ret; | ||
959 | 958 | ||
959 | ret = crypto_aead_setkey(child, key, key_len); | ||
960 | if (!ret) { | ||
961 | memcpy(ctx, c_ctx, sizeof(*ctx)); | ||
962 | ctx->cryptd_tfm = cryptd_tfm; | ||
963 | } | ||
964 | return ret; | ||
965 | } | ||
966 | |||
967 | static int common_rfc4106_set_authsize(struct crypto_aead *aead, | ||
968 | unsigned int authsize) | ||
969 | { | ||
960 | switch (authsize) { | 970 | switch (authsize) { |
961 | case 8: | 971 | case 8: |
962 | case 12: | 972 | case 12: |
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent, | |||
965 | default: | 975 | default: |
966 | return -EINVAL; | 976 | return -EINVAL; |
967 | } | 977 | } |
968 | crypto_aead_crt(parent)->authsize = authsize; | 978 | crypto_aead_crt(aead)->authsize = authsize; |
969 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
970 | return 0; | 979 | return 0; |
971 | } | 980 | } |
972 | 981 | ||
973 | static int rfc4106_encrypt(struct aead_request *req) | 982 | /* This is the Integrity Check Value (aka the authentication tag length and can |
974 | { | 983 | * be 8, 12 or 16 bytes long. */ |
975 | int ret; | 984 | static int rfc4106_set_authsize(struct crypto_aead *parent, |
976 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 985 | unsigned int authsize) |
977 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
978 | |||
979 | if (!irq_fpu_usable()) { | ||
980 | struct aead_request *cryptd_req = | ||
981 | (struct aead_request *) aead_request_ctx(req); | ||
982 | memcpy(cryptd_req, req, sizeof(*req)); | ||
983 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
984 | return crypto_aead_encrypt(cryptd_req); | ||
985 | } else { | ||
986 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
987 | kernel_fpu_begin(); | ||
988 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
989 | kernel_fpu_end(); | ||
990 | return ret; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | static int rfc4106_decrypt(struct aead_request *req) | ||
995 | { | 986 | { |
987 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
988 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); | ||
996 | int ret; | 989 | int ret; |
997 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
998 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
999 | 990 | ||
1000 | if (!irq_fpu_usable()) { | 991 | ret = crypto_aead_setauthsize(child, authsize); |
1001 | struct aead_request *cryptd_req = | 992 | if (!ret) |
1002 | (struct aead_request *) aead_request_ctx(req); | 993 | crypto_aead_crt(parent)->authsize = authsize; |
1003 | memcpy(cryptd_req, req, sizeof(*req)); | 994 | return ret; |
1004 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1005 | return crypto_aead_decrypt(cryptd_req); | ||
1006 | } else { | ||
1007 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1008 | kernel_fpu_begin(); | ||
1009 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1010 | kernel_fpu_end(); | ||
1011 | return ret; | ||
1012 | } | ||
1013 | } | 995 | } |
1014 | 996 | ||
1015 | static int __driver_rfc4106_encrypt(struct aead_request *req) | 997 | static int __driver_rfc4106_encrypt(struct aead_request *req) |
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
1185 | } | 1167 | } |
1186 | return retval; | 1168 | return retval; |
1187 | } | 1169 | } |
1170 | |||
1171 | static int rfc4106_encrypt(struct aead_request *req) | ||
1172 | { | ||
1173 | int ret; | ||
1174 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1175 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1176 | |||
1177 | if (!irq_fpu_usable()) { | ||
1178 | struct aead_request *cryptd_req = | ||
1179 | (struct aead_request *) aead_request_ctx(req); | ||
1180 | |||
1181 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1182 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1183 | ret = crypto_aead_encrypt(cryptd_req); | ||
1184 | } else { | ||
1185 | kernel_fpu_begin(); | ||
1186 | ret = __driver_rfc4106_encrypt(req); | ||
1187 | kernel_fpu_end(); | ||
1188 | } | ||
1189 | return ret; | ||
1190 | } | ||
1191 | |||
1192 | static int rfc4106_decrypt(struct aead_request *req) | ||
1193 | { | ||
1194 | int ret; | ||
1195 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1196 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1197 | |||
1198 | if (!irq_fpu_usable()) { | ||
1199 | struct aead_request *cryptd_req = | ||
1200 | (struct aead_request *) aead_request_ctx(req); | ||
1201 | |||
1202 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1203 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1204 | ret = crypto_aead_decrypt(cryptd_req); | ||
1205 | } else { | ||
1206 | kernel_fpu_begin(); | ||
1207 | ret = __driver_rfc4106_decrypt(req); | ||
1208 | kernel_fpu_end(); | ||
1209 | } | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | static int helper_rfc4106_encrypt(struct aead_request *req) | ||
1214 | { | ||
1215 | int ret; | ||
1216 | |||
1217 | if (unlikely(!irq_fpu_usable())) { | ||
1218 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1219 | ret = -EINVAL; | ||
1220 | } else { | ||
1221 | kernel_fpu_begin(); | ||
1222 | ret = __driver_rfc4106_encrypt(req); | ||
1223 | kernel_fpu_end(); | ||
1224 | } | ||
1225 | return ret; | ||
1226 | } | ||
1227 | |||
1228 | static int helper_rfc4106_decrypt(struct aead_request *req) | ||
1229 | { | ||
1230 | int ret; | ||
1231 | |||
1232 | if (unlikely(!irq_fpu_usable())) { | ||
1233 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1234 | ret = -EINVAL; | ||
1235 | } else { | ||
1236 | kernel_fpu_begin(); | ||
1237 | ret = __driver_rfc4106_decrypt(req); | ||
1238 | kernel_fpu_end(); | ||
1239 | } | ||
1240 | return ret; | ||
1241 | } | ||
1188 | #endif | 1242 | #endif |
1189 | 1243 | ||
1190 | static struct crypto_alg aesni_algs[] = { { | 1244 | static struct crypto_alg aesni_algs[] = { { |
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1210 | .cra_name = "__aes-aesni", | 1264 | .cra_name = "__aes-aesni", |
1211 | .cra_driver_name = "__driver-aes-aesni", | 1265 | .cra_driver_name = "__driver-aes-aesni", |
1212 | .cra_priority = 0, | 1266 | .cra_priority = 0, |
1213 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 1267 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, |
1214 | .cra_blocksize = AES_BLOCK_SIZE, | 1268 | .cra_blocksize = AES_BLOCK_SIZE, |
1215 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1269 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1216 | AESNI_ALIGN - 1, | 1270 | AESNI_ALIGN - 1, |
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1229 | .cra_name = "__ecb-aes-aesni", | 1283 | .cra_name = "__ecb-aes-aesni", |
1230 | .cra_driver_name = "__driver-ecb-aes-aesni", | 1284 | .cra_driver_name = "__driver-ecb-aes-aesni", |
1231 | .cra_priority = 0, | 1285 | .cra_priority = 0, |
1232 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1286 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1287 | CRYPTO_ALG_INTERNAL, | ||
1233 | .cra_blocksize = AES_BLOCK_SIZE, | 1288 | .cra_blocksize = AES_BLOCK_SIZE, |
1234 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1235 | AESNI_ALIGN - 1, | 1290 | AESNI_ALIGN - 1, |
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1249 | .cra_name = "__cbc-aes-aesni", | 1304 | .cra_name = "__cbc-aes-aesni", |
1250 | .cra_driver_name = "__driver-cbc-aes-aesni", | 1305 | .cra_driver_name = "__driver-cbc-aes-aesni", |
1251 | .cra_priority = 0, | 1306 | .cra_priority = 0, |
1252 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1307 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1308 | CRYPTO_ALG_INTERNAL, | ||
1253 | .cra_blocksize = AES_BLOCK_SIZE, | 1309 | .cra_blocksize = AES_BLOCK_SIZE, |
1254 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1310 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1255 | AESNI_ALIGN - 1, | 1311 | AESNI_ALIGN - 1, |
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1313 | .cra_name = "__ctr-aes-aesni", | 1369 | .cra_name = "__ctr-aes-aesni", |
1314 | .cra_driver_name = "__driver-ctr-aes-aesni", | 1370 | .cra_driver_name = "__driver-ctr-aes-aesni", |
1315 | .cra_priority = 0, | 1371 | .cra_priority = 0, |
1316 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1372 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1373 | CRYPTO_ALG_INTERNAL, | ||
1317 | .cra_blocksize = 1, | 1374 | .cra_blocksize = 1, |
1318 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1375 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1319 | AESNI_ALIGN - 1, | 1376 | AESNI_ALIGN - 1, |
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1357 | .cra_name = "__gcm-aes-aesni", | 1414 | .cra_name = "__gcm-aes-aesni", |
1358 | .cra_driver_name = "__driver-gcm-aes-aesni", | 1415 | .cra_driver_name = "__driver-gcm-aes-aesni", |
1359 | .cra_priority = 0, | 1416 | .cra_priority = 0, |
1360 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | 1417 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL, |
1361 | .cra_blocksize = 1, | 1418 | .cra_blocksize = 1, |
1362 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + | 1419 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + |
1363 | AESNI_ALIGN, | 1420 | AESNI_ALIGN, |
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { { | |||
1366 | .cra_module = THIS_MODULE, | 1423 | .cra_module = THIS_MODULE, |
1367 | .cra_u = { | 1424 | .cra_u = { |
1368 | .aead = { | 1425 | .aead = { |
1369 | .encrypt = __driver_rfc4106_encrypt, | 1426 | .setkey = common_rfc4106_set_key, |
1370 | .decrypt = __driver_rfc4106_decrypt, | 1427 | .setauthsize = common_rfc4106_set_authsize, |
1428 | .encrypt = helper_rfc4106_encrypt, | ||
1429 | .decrypt = helper_rfc4106_decrypt, | ||
1430 | .ivsize = 8, | ||
1431 | .maxauthsize = 16, | ||
1371 | }, | 1432 | }, |
1372 | }, | 1433 | }, |
1373 | }, { | 1434 | }, { |
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1423 | .cra_name = "__lrw-aes-aesni", | 1484 | .cra_name = "__lrw-aes-aesni", |
1424 | .cra_driver_name = "__driver-lrw-aes-aesni", | 1485 | .cra_driver_name = "__driver-lrw-aes-aesni", |
1425 | .cra_priority = 0, | 1486 | .cra_priority = 0, |
1426 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1487 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1488 | CRYPTO_ALG_INTERNAL, | ||
1427 | .cra_blocksize = AES_BLOCK_SIZE, | 1489 | .cra_blocksize = AES_BLOCK_SIZE, |
1428 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), | 1490 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), |
1429 | .cra_alignmask = 0, | 1491 | .cra_alignmask = 0, |
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1444 | .cra_name = "__xts-aes-aesni", | 1506 | .cra_name = "__xts-aes-aesni", |
1445 | .cra_driver_name = "__driver-xts-aes-aesni", | 1507 | .cra_driver_name = "__driver-xts-aes-aesni", |
1446 | .cra_priority = 0, | 1508 | .cra_priority = 0, |
1447 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1509 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1510 | CRYPTO_ALG_INTERNAL, | ||
1448 | .cra_blocksize = AES_BLOCK_SIZE, | 1511 | .cra_blocksize = AES_BLOCK_SIZE, |
1449 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), | 1512 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), |
1450 | .cra_alignmask = 0, | 1513 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 9a07fafe3831..baf0ac21ace5 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
343 | .cra_name = "__ecb-camellia-aesni-avx2", | 343 | .cra_name = "__ecb-camellia-aesni-avx2", |
344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", | 344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", |
345 | .cra_priority = 0, | 345 | .cra_priority = 0, |
346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
347 | CRYPTO_ALG_INTERNAL, | ||
347 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 348 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
348 | .cra_ctxsize = sizeof(struct camellia_ctx), | 349 | .cra_ctxsize = sizeof(struct camellia_ctx), |
349 | .cra_alignmask = 0, | 350 | .cra_alignmask = 0, |
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
362 | .cra_name = "__cbc-camellia-aesni-avx2", | 363 | .cra_name = "__cbc-camellia-aesni-avx2", |
363 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", | 364 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", |
364 | .cra_priority = 0, | 365 | .cra_priority = 0, |
365 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 366 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
367 | CRYPTO_ALG_INTERNAL, | ||
366 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 368 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
367 | .cra_ctxsize = sizeof(struct camellia_ctx), | 369 | .cra_ctxsize = sizeof(struct camellia_ctx), |
368 | .cra_alignmask = 0, | 370 | .cra_alignmask = 0, |
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
381 | .cra_name = "__ctr-camellia-aesni-avx2", | 383 | .cra_name = "__ctr-camellia-aesni-avx2", |
382 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", | 384 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", |
383 | .cra_priority = 0, | 385 | .cra_priority = 0, |
384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 386 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
387 | CRYPTO_ALG_INTERNAL, | ||
385 | .cra_blocksize = 1, | 388 | .cra_blocksize = 1, |
386 | .cra_ctxsize = sizeof(struct camellia_ctx), | 389 | .cra_ctxsize = sizeof(struct camellia_ctx), |
387 | .cra_alignmask = 0, | 390 | .cra_alignmask = 0, |
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
401 | .cra_name = "__lrw-camellia-aesni-avx2", | 404 | .cra_name = "__lrw-camellia-aesni-avx2", |
402 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", | 405 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", |
403 | .cra_priority = 0, | 406 | .cra_priority = 0, |
404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
408 | CRYPTO_ALG_INTERNAL, | ||
405 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 409 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
406 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 410 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
407 | .cra_alignmask = 0, | 411 | .cra_alignmask = 0, |
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
424 | .cra_name = "__xts-camellia-aesni-avx2", | 428 | .cra_name = "__xts-camellia-aesni-avx2", |
425 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", | 429 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", |
426 | .cra_priority = 0, | 430 | .cra_priority = 0, |
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 431 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
432 | CRYPTO_ALG_INTERNAL, | ||
428 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 433 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
429 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 434 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
430 | .cra_alignmask = 0, | 435 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index ed38d959add6..78818a1e73e3 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
335 | .cra_name = "__ecb-camellia-aesni", | 335 | .cra_name = "__ecb-camellia-aesni", |
336 | .cra_driver_name = "__driver-ecb-camellia-aesni", | 336 | .cra_driver_name = "__driver-ecb-camellia-aesni", |
337 | .cra_priority = 0, | 337 | .cra_priority = 0, |
338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
339 | CRYPTO_ALG_INTERNAL, | ||
339 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 340 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
340 | .cra_ctxsize = sizeof(struct camellia_ctx), | 341 | .cra_ctxsize = sizeof(struct camellia_ctx), |
341 | .cra_alignmask = 0, | 342 | .cra_alignmask = 0, |
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
354 | .cra_name = "__cbc-camellia-aesni", | 355 | .cra_name = "__cbc-camellia-aesni", |
355 | .cra_driver_name = "__driver-cbc-camellia-aesni", | 356 | .cra_driver_name = "__driver-cbc-camellia-aesni", |
356 | .cra_priority = 0, | 357 | .cra_priority = 0, |
357 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 358 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
359 | CRYPTO_ALG_INTERNAL, | ||
358 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 360 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
359 | .cra_ctxsize = sizeof(struct camellia_ctx), | 361 | .cra_ctxsize = sizeof(struct camellia_ctx), |
360 | .cra_alignmask = 0, | 362 | .cra_alignmask = 0, |
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
373 | .cra_name = "__ctr-camellia-aesni", | 375 | .cra_name = "__ctr-camellia-aesni", |
374 | .cra_driver_name = "__driver-ctr-camellia-aesni", | 376 | .cra_driver_name = "__driver-ctr-camellia-aesni", |
375 | .cra_priority = 0, | 377 | .cra_priority = 0, |
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 378 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
379 | CRYPTO_ALG_INTERNAL, | ||
377 | .cra_blocksize = 1, | 380 | .cra_blocksize = 1, |
378 | .cra_ctxsize = sizeof(struct camellia_ctx), | 381 | .cra_ctxsize = sizeof(struct camellia_ctx), |
379 | .cra_alignmask = 0, | 382 | .cra_alignmask = 0, |
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
393 | .cra_name = "__lrw-camellia-aesni", | 396 | .cra_name = "__lrw-camellia-aesni", |
394 | .cra_driver_name = "__driver-lrw-camellia-aesni", | 397 | .cra_driver_name = "__driver-lrw-camellia-aesni", |
395 | .cra_priority = 0, | 398 | .cra_priority = 0, |
396 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 399 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
400 | CRYPTO_ALG_INTERNAL, | ||
397 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 401 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
398 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 402 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
399 | .cra_alignmask = 0, | 403 | .cra_alignmask = 0, |
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
416 | .cra_name = "__xts-camellia-aesni", | 420 | .cra_name = "__xts-camellia-aesni", |
417 | .cra_driver_name = "__driver-xts-camellia-aesni", | 421 | .cra_driver_name = "__driver-xts-camellia-aesni", |
418 | .cra_priority = 0, | 422 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 423 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
424 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 425 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
421 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 426 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
422 | .cra_alignmask = 0, | 427 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 60ada677a928..236c80974457 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
341 | .cra_name = "__ecb-cast5-avx", | 341 | .cra_name = "__ecb-cast5-avx", |
342 | .cra_driver_name = "__driver-ecb-cast5-avx", | 342 | .cra_driver_name = "__driver-ecb-cast5-avx", |
343 | .cra_priority = 0, | 343 | .cra_priority = 0, |
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
345 | .cra_blocksize = CAST5_BLOCK_SIZE, | 346 | .cra_blocksize = CAST5_BLOCK_SIZE, |
346 | .cra_ctxsize = sizeof(struct cast5_ctx), | 347 | .cra_ctxsize = sizeof(struct cast5_ctx), |
347 | .cra_alignmask = 0, | 348 | .cra_alignmask = 0, |
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
360 | .cra_name = "__cbc-cast5-avx", | 361 | .cra_name = "__cbc-cast5-avx", |
361 | .cra_driver_name = "__driver-cbc-cast5-avx", | 362 | .cra_driver_name = "__driver-cbc-cast5-avx", |
362 | .cra_priority = 0, | 363 | .cra_priority = 0, |
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 364 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
365 | CRYPTO_ALG_INTERNAL, | ||
364 | .cra_blocksize = CAST5_BLOCK_SIZE, | 366 | .cra_blocksize = CAST5_BLOCK_SIZE, |
365 | .cra_ctxsize = sizeof(struct cast5_ctx), | 367 | .cra_ctxsize = sizeof(struct cast5_ctx), |
366 | .cra_alignmask = 0, | 368 | .cra_alignmask = 0, |
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
379 | .cra_name = "__ctr-cast5-avx", | 381 | .cra_name = "__ctr-cast5-avx", |
380 | .cra_driver_name = "__driver-ctr-cast5-avx", | 382 | .cra_driver_name = "__driver-ctr-cast5-avx", |
381 | .cra_priority = 0, | 383 | .cra_priority = 0, |
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
385 | CRYPTO_ALG_INTERNAL, | ||
383 | .cra_blocksize = 1, | 386 | .cra_blocksize = 1, |
384 | .cra_ctxsize = sizeof(struct cast5_ctx), | 387 | .cra_ctxsize = sizeof(struct cast5_ctx), |
385 | .cra_alignmask = 0, | 388 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 0160f68a57ff..f448810ca4ac 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
372 | .cra_name = "__ecb-cast6-avx", | 372 | .cra_name = "__ecb-cast6-avx", |
373 | .cra_driver_name = "__driver-ecb-cast6-avx", | 373 | .cra_driver_name = "__driver-ecb-cast6-avx", |
374 | .cra_priority = 0, | 374 | .cra_priority = 0, |
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
376 | CRYPTO_ALG_INTERNAL, | ||
376 | .cra_blocksize = CAST6_BLOCK_SIZE, | 377 | .cra_blocksize = CAST6_BLOCK_SIZE, |
377 | .cra_ctxsize = sizeof(struct cast6_ctx), | 378 | .cra_ctxsize = sizeof(struct cast6_ctx), |
378 | .cra_alignmask = 0, | 379 | .cra_alignmask = 0, |
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
391 | .cra_name = "__cbc-cast6-avx", | 392 | .cra_name = "__cbc-cast6-avx", |
392 | .cra_driver_name = "__driver-cbc-cast6-avx", | 393 | .cra_driver_name = "__driver-cbc-cast6-avx", |
393 | .cra_priority = 0, | 394 | .cra_priority = 0, |
394 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
396 | CRYPTO_ALG_INTERNAL, | ||
395 | .cra_blocksize = CAST6_BLOCK_SIZE, | 397 | .cra_blocksize = CAST6_BLOCK_SIZE, |
396 | .cra_ctxsize = sizeof(struct cast6_ctx), | 398 | .cra_ctxsize = sizeof(struct cast6_ctx), |
397 | .cra_alignmask = 0, | 399 | .cra_alignmask = 0, |
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
410 | .cra_name = "__ctr-cast6-avx", | 412 | .cra_name = "__ctr-cast6-avx", |
411 | .cra_driver_name = "__driver-ctr-cast6-avx", | 413 | .cra_driver_name = "__driver-ctr-cast6-avx", |
412 | .cra_priority = 0, | 414 | .cra_priority = 0, |
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
416 | CRYPTO_ALG_INTERNAL, | ||
414 | .cra_blocksize = 1, | 417 | .cra_blocksize = 1, |
415 | .cra_ctxsize = sizeof(struct cast6_ctx), | 418 | .cra_ctxsize = sizeof(struct cast6_ctx), |
416 | .cra_alignmask = 0, | 419 | .cra_alignmask = 0, |
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
430 | .cra_name = "__lrw-cast6-avx", | 433 | .cra_name = "__lrw-cast6-avx", |
431 | .cra_driver_name = "__driver-lrw-cast6-avx", | 434 | .cra_driver_name = "__driver-lrw-cast6-avx", |
432 | .cra_priority = 0, | 435 | .cra_priority = 0, |
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
437 | CRYPTO_ALG_INTERNAL, | ||
434 | .cra_blocksize = CAST6_BLOCK_SIZE, | 438 | .cra_blocksize = CAST6_BLOCK_SIZE, |
435 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), | 439 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), |
436 | .cra_alignmask = 0, | 440 | .cra_alignmask = 0, |
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
453 | .cra_name = "__xts-cast6-avx", | 457 | .cra_name = "__xts-cast6-avx", |
454 | .cra_driver_name = "__driver-xts-cast6-avx", | 458 | .cra_driver_name = "__driver-xts-cast6-avx", |
455 | .cra_priority = 0, | 459 | .cra_priority = 0, |
456 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
461 | CRYPTO_ALG_INTERNAL, | ||
457 | .cra_blocksize = CAST6_BLOCK_SIZE, | 462 | .cra_blocksize = CAST6_BLOCK_SIZE, |
458 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), | 463 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), |
459 | .cra_alignmask = 0, | 464 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 8253d85aa165..2079baf06bdd 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = { | |||
154 | .cra_name = "__ghash", | 154 | .cra_name = "__ghash", |
155 | .cra_driver_name = "__ghash-pclmulqdqni", | 155 | .cra_driver_name = "__ghash-pclmulqdqni", |
156 | .cra_priority = 0, | 156 | .cra_priority = 0, |
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | |
158 | CRYPTO_ALG_INTERNAL, | ||
158 | .cra_blocksize = GHASH_BLOCK_SIZE, | 159 | .cra_blocksize = GHASH_BLOCK_SIZE, |
159 | .cra_ctxsize = sizeof(struct ghash_ctx), | 160 | .cra_ctxsize = sizeof(struct ghash_ctx), |
160 | .cra_module = THIS_MODULE, | 161 | .cra_module = THIS_MODULE, |
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm) | |||
261 | struct cryptd_ahash *cryptd_tfm; | 262 | struct cryptd_ahash *cryptd_tfm; |
262 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | 263 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
263 | 264 | ||
264 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); | 265 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", |
266 | CRYPTO_ALG_INTERNAL, | ||
267 | CRYPTO_ALG_INTERNAL); | ||
265 | if (IS_ERR(cryptd_tfm)) | 268 | if (IS_ERR(cryptd_tfm)) |
266 | return PTR_ERR(cryptd_tfm); | 269 | return PTR_ERR(cryptd_tfm); |
267 | ctx->cryptd_tfm = cryptd_tfm; | 270 | ctx->cryptd_tfm = cryptd_tfm; |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 432f1d76ceb8..6a85598931b5 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | |||
232 | 232 | ||
233 | le128_to_be128((be128 *)walk->iv, &ctrblk); | 233 | le128_to_be128((be128 *)walk->iv, &ctrblk); |
234 | } | 234 | } |
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | 235 | ||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | 236 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, |
238 | struct blkcipher_desc *desc, | 237 | struct blkcipher_desc *desc, |
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 437e47a4d302..2f63dc89e7a9 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
309 | .cra_name = "__ecb-serpent-avx2", | 309 | .cra_name = "__ecb-serpent-avx2", |
310 | .cra_driver_name = "__driver-ecb-serpent-avx2", | 310 | .cra_driver_name = "__driver-ecb-serpent-avx2", |
311 | .cra_priority = 0, | 311 | .cra_priority = 0, |
312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
313 | CRYPTO_ALG_INTERNAL, | ||
313 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 314 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
314 | .cra_ctxsize = sizeof(struct serpent_ctx), | 315 | .cra_ctxsize = sizeof(struct serpent_ctx), |
315 | .cra_alignmask = 0, | 316 | .cra_alignmask = 0, |
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
329 | .cra_name = "__cbc-serpent-avx2", | 330 | .cra_name = "__cbc-serpent-avx2", |
330 | .cra_driver_name = "__driver-cbc-serpent-avx2", | 331 | .cra_driver_name = "__driver-cbc-serpent-avx2", |
331 | .cra_priority = 0, | 332 | .cra_priority = 0, |
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 333 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
334 | CRYPTO_ALG_INTERNAL, | ||
333 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 335 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
334 | .cra_ctxsize = sizeof(struct serpent_ctx), | 336 | .cra_ctxsize = sizeof(struct serpent_ctx), |
335 | .cra_alignmask = 0, | 337 | .cra_alignmask = 0, |
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
349 | .cra_name = "__ctr-serpent-avx2", | 351 | .cra_name = "__ctr-serpent-avx2", |
350 | .cra_driver_name = "__driver-ctr-serpent-avx2", | 352 | .cra_driver_name = "__driver-ctr-serpent-avx2", |
351 | .cra_priority = 0, | 353 | .cra_priority = 0, |
352 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 354 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
355 | CRYPTO_ALG_INTERNAL, | ||
353 | .cra_blocksize = 1, | 356 | .cra_blocksize = 1, |
354 | .cra_ctxsize = sizeof(struct serpent_ctx), | 357 | .cra_ctxsize = sizeof(struct serpent_ctx), |
355 | .cra_alignmask = 0, | 358 | .cra_alignmask = 0, |
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
370 | .cra_name = "__lrw-serpent-avx2", | 373 | .cra_name = "__lrw-serpent-avx2", |
371 | .cra_driver_name = "__driver-lrw-serpent-avx2", | 374 | .cra_driver_name = "__driver-lrw-serpent-avx2", |
372 | .cra_priority = 0, | 375 | .cra_priority = 0, |
373 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
377 | CRYPTO_ALG_INTERNAL, | ||
374 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 378 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
375 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 379 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
376 | .cra_alignmask = 0, | 380 | .cra_alignmask = 0, |
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
394 | .cra_name = "__xts-serpent-avx2", | 398 | .cra_name = "__xts-serpent-avx2", |
395 | .cra_driver_name = "__driver-xts-serpent-avx2", | 399 | .cra_driver_name = "__driver-xts-serpent-avx2", |
396 | .cra_priority = 0, | 400 | .cra_priority = 0, |
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
398 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
399 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
400 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7e217398b4eb..c8d478af8456 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
378 | .cra_name = "__ecb-serpent-avx", | 378 | .cra_name = "__ecb-serpent-avx", |
379 | .cra_driver_name = "__driver-ecb-serpent-avx", | 379 | .cra_driver_name = "__driver-ecb-serpent-avx", |
380 | .cra_priority = 0, | 380 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
382 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 383 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
383 | .cra_ctxsize = sizeof(struct serpent_ctx), | 384 | .cra_ctxsize = sizeof(struct serpent_ctx), |
384 | .cra_alignmask = 0, | 385 | .cra_alignmask = 0, |
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
397 | .cra_name = "__cbc-serpent-avx", | 398 | .cra_name = "__cbc-serpent-avx", |
398 | .cra_driver_name = "__driver-cbc-serpent-avx", | 399 | .cra_driver_name = "__driver-cbc-serpent-avx", |
399 | .cra_priority = 0, | 400 | .cra_priority = 0, |
400 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
401 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
402 | .cra_ctxsize = sizeof(struct serpent_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_ctx), |
403 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
416 | .cra_name = "__ctr-serpent-avx", | 418 | .cra_name = "__ctr-serpent-avx", |
417 | .cra_driver_name = "__driver-ctr-serpent-avx", | 419 | .cra_driver_name = "__driver-ctr-serpent-avx", |
418 | .cra_priority = 0, | 420 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 421 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
422 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = 1, | 423 | .cra_blocksize = 1, |
421 | .cra_ctxsize = sizeof(struct serpent_ctx), | 424 | .cra_ctxsize = sizeof(struct serpent_ctx), |
422 | .cra_alignmask = 0, | 425 | .cra_alignmask = 0, |
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
436 | .cra_name = "__lrw-serpent-avx", | 439 | .cra_name = "__lrw-serpent-avx", |
437 | .cra_driver_name = "__driver-lrw-serpent-avx", | 440 | .cra_driver_name = "__driver-lrw-serpent-avx", |
438 | .cra_priority = 0, | 441 | .cra_priority = 0, |
439 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 442 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
443 | CRYPTO_ALG_INTERNAL, | ||
440 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 444 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
441 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 445 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
442 | .cra_alignmask = 0, | 446 | .cra_alignmask = 0, |
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
459 | .cra_name = "__xts-serpent-avx", | 463 | .cra_name = "__xts-serpent-avx", |
460 | .cra_driver_name = "__driver-xts-serpent-avx", | 464 | .cra_driver_name = "__driver-xts-serpent-avx", |
461 | .cra_priority = 0, | 465 | .cra_priority = 0, |
462 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 466 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
467 | CRYPTO_ALG_INTERNAL, | ||
463 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 468 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
464 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 469 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
465 | .cra_alignmask = 0, | 470 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index bf025adaea01..3643dd508f45 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
387 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
388 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
389 | .cra_priority = 0, | 389 | .cra_priority = 0, |
390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
391 | CRYPTO_ALG_INTERNAL, | ||
391 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 392 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
392 | .cra_ctxsize = sizeof(struct serpent_ctx), | 393 | .cra_ctxsize = sizeof(struct serpent_ctx), |
393 | .cra_alignmask = 0, | 394 | .cra_alignmask = 0, |
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
406 | .cra_name = "__cbc-serpent-sse2", | 407 | .cra_name = "__cbc-serpent-sse2", |
407 | .cra_driver_name = "__driver-cbc-serpent-sse2", | 408 | .cra_driver_name = "__driver-cbc-serpent-sse2", |
408 | .cra_priority = 0, | 409 | .cra_priority = 0, |
409 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 410 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
411 | CRYPTO_ALG_INTERNAL, | ||
410 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 412 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
411 | .cra_ctxsize = sizeof(struct serpent_ctx), | 413 | .cra_ctxsize = sizeof(struct serpent_ctx), |
412 | .cra_alignmask = 0, | 414 | .cra_alignmask = 0, |
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
425 | .cra_name = "__ctr-serpent-sse2", | 427 | .cra_name = "__ctr-serpent-sse2", |
426 | .cra_driver_name = "__driver-ctr-serpent-sse2", | 428 | .cra_driver_name = "__driver-ctr-serpent-sse2", |
427 | .cra_priority = 0, | 429 | .cra_priority = 0, |
428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 430 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
431 | CRYPTO_ALG_INTERNAL, | ||
429 | .cra_blocksize = 1, | 432 | .cra_blocksize = 1, |
430 | .cra_ctxsize = sizeof(struct serpent_ctx), | 433 | .cra_ctxsize = sizeof(struct serpent_ctx), |
431 | .cra_alignmask = 0, | 434 | .cra_alignmask = 0, |
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
445 | .cra_name = "__lrw-serpent-sse2", | 448 | .cra_name = "__lrw-serpent-sse2", |
446 | .cra_driver_name = "__driver-lrw-serpent-sse2", | 449 | .cra_driver_name = "__driver-lrw-serpent-sse2", |
447 | .cra_priority = 0, | 450 | .cra_priority = 0, |
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 451 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
452 | CRYPTO_ALG_INTERNAL, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 453 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 454 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
451 | .cra_alignmask = 0, | 455 | .cra_alignmask = 0, |
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
468 | .cra_name = "__xts-serpent-sse2", | 472 | .cra_name = "__xts-serpent-sse2", |
469 | .cra_driver_name = "__driver-xts-serpent-sse2", | 473 | .cra_driver_name = "__driver-xts-serpent-sse2", |
470 | .cra_priority = 0, | 474 | .cra_priority = 0, |
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 475 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
476 | CRYPTO_ALG_INTERNAL, | ||
472 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 477 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
473 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 478 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
474 | .cra_alignmask = 0, | 479 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index fd9f6b035b16..e510b1c5d690 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c | |||
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = { | |||
694 | * use ASYNC flag as some buffers in multi-buffer | 694 | * use ASYNC flag as some buffers in multi-buffer |
695 | * algo may not have completed before hashing thread sleep | 695 | * algo may not have completed before hashing thread sleep |
696 | */ | 696 | */ |
697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, | 697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC | |
698 | CRYPTO_ALG_INTERNAL, | ||
698 | .cra_blocksize = SHA1_BLOCK_SIZE, | 699 | .cra_blocksize = SHA1_BLOCK_SIZE, |
699 | .cra_module = THIS_MODULE, | 700 | .cra_module = THIS_MODULE, |
700 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), | 701 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), |
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) | |||
770 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | 771 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); |
771 | struct mcryptd_hash_ctx *mctx; | 772 | struct mcryptd_hash_ctx *mctx; |
772 | 773 | ||
773 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); | 774 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", |
775 | CRYPTO_ALG_INTERNAL, | ||
776 | CRYPTO_ALG_INTERNAL); | ||
774 | if (IS_ERR(mcryptd_tfm)) | 777 | if (IS_ERR(mcryptd_tfm)) |
775 | return PTR_ERR(mcryptd_tfm); | 778 | return PTR_ERR(mcryptd_tfm); |
776 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | 779 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); |
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) | |||
828 | while (!list_empty(&cstate->work_list)) { | 831 | while (!list_empty(&cstate->work_list)) { |
829 | rctx = list_entry(cstate->work_list.next, | 832 | rctx = list_entry(cstate->work_list.next, |
830 | struct mcryptd_hash_request_ctx, waiter); | 833 | struct mcryptd_hash_request_ctx, waiter); |
831 | if time_before(cur_time, rctx->tag.expire) | 834 | if (time_before(cur_time, rctx->tag.expire)) |
832 | break; | 835 | break; |
833 | kernel_fpu_begin(); | 836 | kernel_fpu_begin(); |
834 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); | 837 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c index 4ca7e166a2aa..822acb5b464c 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c | |||
@@ -56,7 +56,7 @@ | |||
56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) | 56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) |
57 | { | 57 | { |
58 | unsigned int j; | 58 | unsigned int j; |
59 | state->unused_lanes = 0xF76543210; | 59 | state->unused_lanes = 0xF76543210ULL; |
60 | for (j = 0; j < 8; j++) { | 60 | for (j = 0; j < 8; j++) { |
61 | state->lens[j] = 0xFFFFFFFF; | 61 | state->lens[j] = 0xFFFFFFFF; |
62 | state->ldata[j].job_in_lane = NULL; | 62 | state->ldata[j].job_in_lane = NULL; |
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 6c20fe04a738..33d1b9dc14cc 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/cryptohash.h> | 28 | #include <linux/cryptohash.h> |
29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
30 | #include <crypto/sha.h> | 30 | #include <crypto/sha.h> |
31 | #include <asm/byteorder.h> | 31 | #include <crypto/sha1_base.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/xcr.h> | 33 | #include <asm/xcr.h> |
34 | #include <asm/xsave.h> | 34 | #include <asm/xsave.h> |
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | |||
44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | 44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ |
45 | 45 | ||
46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | 46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, |
47 | unsigned int rounds); | 47 | unsigned int rounds); |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | 50 | static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); |
51 | |||
52 | |||
53 | static int sha1_ssse3_init(struct shash_desc *desc) | ||
54 | { | ||
55 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
56 | |||
57 | *sctx = (struct sha1_state){ | ||
58 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
59 | }; | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
65 | unsigned int len, unsigned int partial) | ||
66 | { | ||
67 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
68 | unsigned int done = 0; | ||
69 | |||
70 | sctx->count += len; | ||
71 | |||
72 | if (partial) { | ||
73 | done = SHA1_BLOCK_SIZE - partial; | ||
74 | memcpy(sctx->buffer + partial, data, done); | ||
75 | sha1_transform_asm(sctx->state, sctx->buffer, 1); | ||
76 | } | ||
77 | |||
78 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
79 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
80 | |||
81 | sha1_transform_asm(sctx->state, data + done, rounds); | ||
82 | done += rounds * SHA1_BLOCK_SIZE; | ||
83 | } | ||
84 | |||
85 | memcpy(sctx->buffer, data + done, len - done); | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | 51 | ||
90 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | 52 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, |
91 | unsigned int len) | 53 | unsigned int len) |
92 | { | 54 | { |
93 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | struct sha1_state *sctx = shash_desc_ctx(desc); |
94 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
95 | int res; | ||
96 | 56 | ||
97 | /* Handle the fast case right here */ | 57 | if (!irq_fpu_usable() || |
98 | if (partial + len < SHA1_BLOCK_SIZE) { | 58 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) |
99 | sctx->count += len; | 59 | return crypto_sha1_update(desc, data, len); |
100 | memcpy(sctx->buffer + partial, data, len); | ||
101 | 60 | ||
102 | return 0; | 61 | /* make sure casting to sha1_block_fn() is safe */ |
103 | } | 62 | BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); |
104 | 63 | ||
105 | if (!irq_fpu_usable()) { | 64 | kernel_fpu_begin(); |
106 | res = crypto_sha1_update(desc, data, len); | 65 | sha1_base_do_update(desc, data, len, |
107 | } else { | 66 | (sha1_block_fn *)sha1_transform_asm); |
108 | kernel_fpu_begin(); | 67 | kernel_fpu_end(); |
109 | res = __sha1_ssse3_update(desc, data, len, partial); | ||
110 | kernel_fpu_end(); | ||
111 | } | ||
112 | |||
113 | return res; | ||
114 | } | ||
115 | |||
116 | |||
117 | /* Add padding and return the message digest. */ | ||
118 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
119 | { | ||
120 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
121 | unsigned int i, index, padlen; | ||
122 | __be32 *dst = (__be32 *)out; | ||
123 | __be64 bits; | ||
124 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
125 | |||
126 | bits = cpu_to_be64(sctx->count << 3); | ||
127 | |||
128 | /* Pad out to 56 mod 64 and append length */ | ||
129 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
130 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
131 | if (!irq_fpu_usable()) { | ||
132 | crypto_sha1_update(desc, padding, padlen); | ||
133 | crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
134 | } else { | ||
135 | kernel_fpu_begin(); | ||
136 | /* We need to fill a whole block for __sha1_ssse3_update() */ | ||
137 | if (padlen <= 56) { | ||
138 | sctx->count += padlen; | ||
139 | memcpy(sctx->buffer + index, padding, padlen); | ||
140 | } else { | ||
141 | __sha1_ssse3_update(desc, padding, padlen, index); | ||
142 | } | ||
143 | __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
144 | kernel_fpu_end(); | ||
145 | } | ||
146 | |||
147 | /* Store state in digest */ | ||
148 | for (i = 0; i < 5; i++) | ||
149 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
150 | |||
151 | /* Wipe context */ | ||
152 | memset(sctx, 0, sizeof(*sctx)); | ||
153 | 68 | ||
154 | return 0; | 69 | return 0; |
155 | } | 70 | } |
156 | 71 | ||
157 | static int sha1_ssse3_export(struct shash_desc *desc, void *out) | 72 | static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, |
73 | unsigned int len, u8 *out) | ||
158 | { | 74 | { |
159 | struct sha1_state *sctx = shash_desc_ctx(desc); | 75 | if (!irq_fpu_usable()) |
76 | return crypto_sha1_finup(desc, data, len, out); | ||
160 | 77 | ||
161 | memcpy(out, sctx, sizeof(*sctx)); | 78 | kernel_fpu_begin(); |
79 | if (len) | ||
80 | sha1_base_do_update(desc, data, len, | ||
81 | (sha1_block_fn *)sha1_transform_asm); | ||
82 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); | ||
83 | kernel_fpu_end(); | ||
162 | 84 | ||
163 | return 0; | 85 | return sha1_base_finish(desc, out); |
164 | } | 86 | } |
165 | 87 | ||
166 | static int sha1_ssse3_import(struct shash_desc *desc, const void *in) | 88 | /* Add padding and return the message digest. */ |
89 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
167 | { | 90 | { |
168 | struct sha1_state *sctx = shash_desc_ctx(desc); | 91 | return sha1_ssse3_finup(desc, NULL, 0, out); |
169 | |||
170 | memcpy(sctx, in, sizeof(*sctx)); | ||
171 | |||
172 | return 0; | ||
173 | } | 92 | } |
174 | 93 | ||
175 | #ifdef CONFIG_AS_AVX2 | 94 | #ifdef CONFIG_AS_AVX2 |
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data, | |||
186 | 105 | ||
187 | static struct shash_alg alg = { | 106 | static struct shash_alg alg = { |
188 | .digestsize = SHA1_DIGEST_SIZE, | 107 | .digestsize = SHA1_DIGEST_SIZE, |
189 | .init = sha1_ssse3_init, | 108 | .init = sha1_base_init, |
190 | .update = sha1_ssse3_update, | 109 | .update = sha1_ssse3_update, |
191 | .final = sha1_ssse3_final, | 110 | .final = sha1_ssse3_final, |
192 | .export = sha1_ssse3_export, | 111 | .finup = sha1_ssse3_finup, |
193 | .import = sha1_ssse3_import, | ||
194 | .descsize = sizeof(struct sha1_state), | 112 | .descsize = sizeof(struct sha1_state), |
195 | .statesize = sizeof(struct sha1_state), | ||
196 | .base = { | 113 | .base = { |
197 | .cra_name = "sha1", | 114 | .cra_name = "sha1", |
198 | .cra_driver_name= "sha1-ssse3", | 115 | .cra_driver_name= "sha1-ssse3", |
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 642f15687a0a..92b3b5d75ba9 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S | |||
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00 | |||
96 | BYTE_FLIP_MASK = %xmm13 | 96 | BYTE_FLIP_MASK = %xmm13 |
97 | 97 | ||
98 | NUM_BLKS = %rdx # 3rd arg | 98 | NUM_BLKS = %rdx # 3rd arg |
99 | CTX = %rsi # 2nd arg | 99 | INP = %rsi # 2nd arg |
100 | INP = %rdi # 1st arg | 100 | CTX = %rdi # 1st arg |
101 | 101 | ||
102 | SRND = %rdi # clobbers INP | 102 | SRND = %rsi # clobbers INP |
103 | c = %ecx | 103 | c = %ecx |
104 | d = %r8d | 104 | d = %r8d |
105 | e = %edx | 105 | e = %edx |
@@ -342,8 +342,8 @@ a = TMP_ | |||
342 | 342 | ||
343 | ######################################################################## | 343 | ######################################################################## |
344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
345 | ## arg 1 : pointer to input data | 345 | ## arg 1 : pointer to digest |
346 | ## arg 2 : pointer to digest | 346 | ## arg 2 : pointer to input data |
347 | ## arg 3 : Num blocks | 347 | ## arg 3 : Num blocks |
348 | ######################################################################## | 348 | ######################################################################## |
349 | .text | 349 | .text |
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9e86944c539d..570ec5ec62d7 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S | |||
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13 | |||
91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK | 91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK |
92 | 92 | ||
93 | NUM_BLKS = %rdx # 3rd arg | 93 | NUM_BLKS = %rdx # 3rd arg |
94 | CTX = %rsi # 2nd arg | 94 | INP = %rsi # 2nd arg |
95 | INP = %rdi # 1st arg | 95 | CTX = %rdi # 1st arg |
96 | c = %ecx | 96 | c = %ecx |
97 | d = %r8d | 97 | d = %r8d |
98 | e = %edx # clobbers NUM_BLKS | 98 | e = %edx # clobbers NUM_BLKS |
99 | y3 = %edi # clobbers INP | 99 | y3 = %esi # clobbers INP |
100 | 100 | ||
101 | 101 | ||
102 | TBL = %rbp | 102 | TBL = %rbp |
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE | |||
523 | 523 | ||
524 | ######################################################################## | 524 | ######################################################################## |
525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
526 | ## arg 1 : pointer to input data | 526 | ## arg 1 : pointer to digest |
527 | ## arg 2 : pointer to digest | 527 | ## arg 2 : pointer to input data |
528 | ## arg 3 : Num blocks | 528 | ## arg 3 : Num blocks |
529 | ######################################################################## | 529 | ######################################################################## |
530 | .text | 530 | .text |
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index f833b74d902b..2cedc44e8121 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S | |||
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00 | |||
88 | BYTE_FLIP_MASK = %xmm12 | 88 | BYTE_FLIP_MASK = %xmm12 |
89 | 89 | ||
90 | NUM_BLKS = %rdx # 3rd arg | 90 | NUM_BLKS = %rdx # 3rd arg |
91 | CTX = %rsi # 2nd arg | 91 | INP = %rsi # 2nd arg |
92 | INP = %rdi # 1st arg | 92 | CTX = %rdi # 1st arg |
93 | 93 | ||
94 | SRND = %rdi # clobbers INP | 94 | SRND = %rsi # clobbers INP |
95 | c = %ecx | 95 | c = %ecx |
96 | d = %r8d | 96 | d = %r8d |
97 | e = %edx | 97 | e = %edx |
@@ -348,8 +348,8 @@ a = TMP_ | |||
348 | 348 | ||
349 | ######################################################################## | 349 | ######################################################################## |
350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) | 350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) |
351 | ## arg 1 : pointer to input data | 351 | ## arg 1 : pointer to digest |
352 | ## arg 2 : pointer to digest | 352 | ## arg 2 : pointer to input data |
353 | ## arg 3 : Num blocks | 353 | ## arg 3 : Num blocks |
354 | ######################################################################## | 354 | ######################################################################## |
355 | .text | 355 | .text |
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 8fad72f4dfd2..ccc338881ee8 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -36,195 +36,74 @@ | |||
36 | #include <linux/cryptohash.h> | 36 | #include <linux/cryptohash.h> |
37 | #include <linux/types.h> | 37 | #include <linux/types.h> |
38 | #include <crypto/sha.h> | 38 | #include <crypto/sha.h> |
39 | #include <asm/byteorder.h> | 39 | #include <crypto/sha256_base.h> |
40 | #include <asm/i387.h> | 40 | #include <asm/i387.h> |
41 | #include <asm/xcr.h> | 41 | #include <asm/xcr.h> |
42 | #include <asm/xsave.h> | 42 | #include <asm/xsave.h> |
43 | #include <linux/string.h> | 43 | #include <linux/string.h> |
44 | 44 | ||
45 | asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, | 45 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, |
46 | u64 rounds); | 46 | u64 rounds); |
47 | #ifdef CONFIG_AS_AVX | 47 | #ifdef CONFIG_AS_AVX |
48 | asmlinkage void sha256_transform_avx(const char *data, u32 *digest, | 48 | asmlinkage void sha256_transform_avx(u32 *digest, const char *data, |
49 | u64 rounds); | 49 | u64 rounds); |
50 | #endif | 50 | #endif |
51 | #ifdef CONFIG_AS_AVX2 | 51 | #ifdef CONFIG_AS_AVX2 |
52 | asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, | 52 | asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, |
53 | u64 rounds); | 53 | u64 rounds); |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); | 56 | static void (*sha256_transform_asm)(u32 *, const char *, u64); |
57 | |||
58 | |||
59 | static int sha256_ssse3_init(struct shash_desc *desc) | ||
60 | { | ||
61 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
62 | |||
63 | sctx->state[0] = SHA256_H0; | ||
64 | sctx->state[1] = SHA256_H1; | ||
65 | sctx->state[2] = SHA256_H2; | ||
66 | sctx->state[3] = SHA256_H3; | ||
67 | sctx->state[4] = SHA256_H4; | ||
68 | sctx->state[5] = SHA256_H5; | ||
69 | sctx->state[6] = SHA256_H6; | ||
70 | sctx->state[7] = SHA256_H7; | ||
71 | sctx->count = 0; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
77 | unsigned int len, unsigned int partial) | ||
78 | { | ||
79 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
80 | unsigned int done = 0; | ||
81 | |||
82 | sctx->count += len; | ||
83 | |||
84 | if (partial) { | ||
85 | done = SHA256_BLOCK_SIZE - partial; | ||
86 | memcpy(sctx->buf + partial, data, done); | ||
87 | sha256_transform_asm(sctx->buf, sctx->state, 1); | ||
88 | } | ||
89 | |||
90 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
91 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
92 | |||
93 | sha256_transform_asm(data + done, sctx->state, (u64) rounds); | ||
94 | |||
95 | done += rounds * SHA256_BLOCK_SIZE; | ||
96 | } | ||
97 | |||
98 | memcpy(sctx->buf, data + done, len - done); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | 57 | ||
103 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | 58 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, |
104 | unsigned int len) | 59 | unsigned int len) |
105 | { | 60 | { |
106 | struct sha256_state *sctx = shash_desc_ctx(desc); | 61 | struct sha256_state *sctx = shash_desc_ctx(desc); |
107 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
108 | int res; | ||
109 | 62 | ||
110 | /* Handle the fast case right here */ | 63 | if (!irq_fpu_usable() || |
111 | if (partial + len < SHA256_BLOCK_SIZE) { | 64 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) |
112 | sctx->count += len; | 65 | return crypto_sha256_update(desc, data, len); |
113 | memcpy(sctx->buf + partial, data, len); | ||
114 | 66 | ||
115 | return 0; | 67 | /* make sure casting to sha256_block_fn() is safe */ |
116 | } | 68 | BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); |
117 | |||
118 | if (!irq_fpu_usable()) { | ||
119 | res = crypto_sha256_update(desc, data, len); | ||
120 | } else { | ||
121 | kernel_fpu_begin(); | ||
122 | res = __sha256_ssse3_update(desc, data, len, partial); | ||
123 | kernel_fpu_end(); | ||
124 | } | ||
125 | |||
126 | return res; | ||
127 | } | ||
128 | 69 | ||
129 | 70 | kernel_fpu_begin(); | |
130 | /* Add padding and return the message digest. */ | 71 | sha256_base_do_update(desc, data, len, |
131 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) | 72 | (sha256_block_fn *)sha256_transform_asm); |
132 | { | 73 | kernel_fpu_end(); |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
134 | unsigned int i, index, padlen; | ||
135 | __be32 *dst = (__be32 *)out; | ||
136 | __be64 bits; | ||
137 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
138 | |||
139 | bits = cpu_to_be64(sctx->count << 3); | ||
140 | |||
141 | /* Pad out to 56 mod 64 and append length */ | ||
142 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
143 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); | ||
144 | |||
145 | if (!irq_fpu_usable()) { | ||
146 | crypto_sha256_update(desc, padding, padlen); | ||
147 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
148 | } else { | ||
149 | kernel_fpu_begin(); | ||
150 | /* We need to fill a whole block for __sha256_ssse3_update() */ | ||
151 | if (padlen <= 56) { | ||
152 | sctx->count += padlen; | ||
153 | memcpy(sctx->buf + index, padding, padlen); | ||
154 | } else { | ||
155 | __sha256_ssse3_update(desc, padding, padlen, index); | ||
156 | } | ||
157 | __sha256_ssse3_update(desc, (const u8 *)&bits, | ||
158 | sizeof(bits), 56); | ||
159 | kernel_fpu_end(); | ||
160 | } | ||
161 | |||
162 | /* Store state in digest */ | ||
163 | for (i = 0; i < 8; i++) | ||
164 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
165 | |||
166 | /* Wipe context */ | ||
167 | memset(sctx, 0, sizeof(*sctx)); | ||
168 | 74 | ||
169 | return 0; | 75 | return 0; |
170 | } | 76 | } |
171 | 77 | ||
172 | static int sha256_ssse3_export(struct shash_desc *desc, void *out) | 78 | static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, |
79 | unsigned int len, u8 *out) | ||
173 | { | 80 | { |
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | 81 | if (!irq_fpu_usable()) |
82 | return crypto_sha256_finup(desc, data, len, out); | ||
175 | 83 | ||
176 | memcpy(out, sctx, sizeof(*sctx)); | 84 | kernel_fpu_begin(); |
85 | if (len) | ||
86 | sha256_base_do_update(desc, data, len, | ||
87 | (sha256_block_fn *)sha256_transform_asm); | ||
88 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); | ||
89 | kernel_fpu_end(); | ||
177 | 90 | ||
178 | return 0; | 91 | return sha256_base_finish(desc, out); |
179 | } | 92 | } |
180 | 93 | ||
181 | static int sha256_ssse3_import(struct shash_desc *desc, const void *in) | 94 | /* Add padding and return the message digest. */ |
182 | { | 95 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) |
183 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
184 | |||
185 | memcpy(sctx, in, sizeof(*sctx)); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int sha224_ssse3_init(struct shash_desc *desc) | ||
191 | { | ||
192 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
193 | |||
194 | sctx->state[0] = SHA224_H0; | ||
195 | sctx->state[1] = SHA224_H1; | ||
196 | sctx->state[2] = SHA224_H2; | ||
197 | sctx->state[3] = SHA224_H3; | ||
198 | sctx->state[4] = SHA224_H4; | ||
199 | sctx->state[5] = SHA224_H5; | ||
200 | sctx->state[6] = SHA224_H6; | ||
201 | sctx->state[7] = SHA224_H7; | ||
202 | sctx->count = 0; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
208 | { | 96 | { |
209 | u8 D[SHA256_DIGEST_SIZE]; | 97 | return sha256_ssse3_finup(desc, NULL, 0, out); |
210 | |||
211 | sha256_ssse3_final(desc, D); | ||
212 | |||
213 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
214 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
215 | |||
216 | return 0; | ||
217 | } | 98 | } |
218 | 99 | ||
219 | static struct shash_alg algs[] = { { | 100 | static struct shash_alg algs[] = { { |
220 | .digestsize = SHA256_DIGEST_SIZE, | 101 | .digestsize = SHA256_DIGEST_SIZE, |
221 | .init = sha256_ssse3_init, | 102 | .init = sha256_base_init, |
222 | .update = sha256_ssse3_update, | 103 | .update = sha256_ssse3_update, |
223 | .final = sha256_ssse3_final, | 104 | .final = sha256_ssse3_final, |
224 | .export = sha256_ssse3_export, | 105 | .finup = sha256_ssse3_finup, |
225 | .import = sha256_ssse3_import, | ||
226 | .descsize = sizeof(struct sha256_state), | 106 | .descsize = sizeof(struct sha256_state), |
227 | .statesize = sizeof(struct sha256_state), | ||
228 | .base = { | 107 | .base = { |
229 | .cra_name = "sha256", | 108 | .cra_name = "sha256", |
230 | .cra_driver_name = "sha256-ssse3", | 109 | .cra_driver_name = "sha256-ssse3", |
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { { | |||
235 | } | 114 | } |
236 | }, { | 115 | }, { |
237 | .digestsize = SHA224_DIGEST_SIZE, | 116 | .digestsize = SHA224_DIGEST_SIZE, |
238 | .init = sha224_ssse3_init, | 117 | .init = sha224_base_init, |
239 | .update = sha256_ssse3_update, | 118 | .update = sha256_ssse3_update, |
240 | .final = sha224_ssse3_final, | 119 | .final = sha256_ssse3_final, |
241 | .export = sha256_ssse3_export, | 120 | .finup = sha256_ssse3_finup, |
242 | .import = sha256_ssse3_import, | ||
243 | .descsize = sizeof(struct sha256_state), | 121 | .descsize = sizeof(struct sha256_state), |
244 | .statesize = sizeof(struct sha256_state), | ||
245 | .base = { | 122 | .base = { |
246 | .cra_name = "sha224", | 123 | .cra_name = "sha224", |
247 | .cra_driver_name = "sha224-ssse3", | 124 | .cra_driver_name = "sha224-ssse3", |
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 974dde9bc6cd..565274d6a641 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | # Virtual Registers | 55 | # Virtual Registers |
56 | # ARG1 | 56 | # ARG1 |
57 | msg = %rdi | 57 | digest = %rdi |
58 | # ARG2 | 58 | # ARG2 |
59 | digest = %rsi | 59 | msg = %rsi |
60 | # ARG3 | 60 | # ARG3 |
61 | msglen = %rdx | 61 | msglen = %rdx |
62 | T1 = %rcx | 62 | T1 = %rcx |
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
271 | .endm | 271 | .endm |
272 | 272 | ||
273 | ######################################################################## | 273 | ######################################################################## |
274 | # void sha512_transform_avx(const void* M, void* D, u64 L) | 274 | # void sha512_transform_avx(void* D, const void* M, u64 L) |
275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
276 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 276 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
277 | # message blocks. | 277 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 568b96105f5c..a4771dcd1fcf 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S | |||
@@ -70,9 +70,9 @@ XFER = YTMP0 | |||
70 | BYTE_FLIP_MASK = %ymm9 | 70 | BYTE_FLIP_MASK = %ymm9 |
71 | 71 | ||
72 | # 1st arg | 72 | # 1st arg |
73 | INP = %rdi | 73 | CTX = %rdi |
74 | # 2nd arg | 74 | # 2nd arg |
75 | CTX = %rsi | 75 | INP = %rsi |
76 | # 3rd arg | 76 | # 3rd arg |
77 | NUM_BLKS = %rdx | 77 | NUM_BLKS = %rdx |
78 | 78 | ||
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
562 | .endm | 562 | .endm |
563 | 563 | ||
564 | ######################################################################## | 564 | ######################################################################## |
565 | # void sha512_transform_rorx(const void* M, void* D, uint64_t L)# | 565 | # void sha512_transform_rorx(void* D, const void* M, uint64_t L)# |
566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
567 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 567 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
568 | # message blocks. | 568 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index fb56855d51f5..e610e29cbc81 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S | |||
@@ -53,9 +53,9 @@ | |||
53 | 53 | ||
54 | # Virtual Registers | 54 | # Virtual Registers |
55 | # ARG1 | 55 | # ARG1 |
56 | msg = %rdi | 56 | digest = %rdi |
57 | # ARG2 | 57 | # ARG2 |
58 | digest = %rsi | 58 | msg = %rsi |
59 | # ARG3 | 59 | # ARG3 |
60 | msglen = %rdx | 60 | msglen = %rdx |
61 | T1 = %rcx | 61 | T1 = %rcx |
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
269 | .endm | 269 | .endm |
270 | 270 | ||
271 | ######################################################################## | 271 | ######################################################################## |
272 | # void sha512_transform_ssse3(const void* M, void* D, u64 L)# | 272 | # void sha512_transform_ssse3(void* D, const void* M, u64 L)# |
273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
274 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 274 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
275 | # message blocks. | 275 | # message blocks. |
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b6af26832bf..d9fa4c1e063f 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -34,205 +34,75 @@ | |||
34 | #include <linux/cryptohash.h> | 34 | #include <linux/cryptohash.h> |
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <crypto/sha.h> | 36 | #include <crypto/sha.h> |
37 | #include <asm/byteorder.h> | 37 | #include <crypto/sha512_base.h> |
38 | #include <asm/i387.h> | 38 | #include <asm/i387.h> |
39 | #include <asm/xcr.h> | 39 | #include <asm/xcr.h> |
40 | #include <asm/xsave.h> | 40 | #include <asm/xsave.h> |
41 | 41 | ||
42 | #include <linux/string.h> | 42 | #include <linux/string.h> |
43 | 43 | ||
44 | asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, | 44 | asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, |
45 | u64 rounds); | 45 | u64 rounds); |
46 | #ifdef CONFIG_AS_AVX | 46 | #ifdef CONFIG_AS_AVX |
47 | asmlinkage void sha512_transform_avx(const char *data, u64 *digest, | 47 | asmlinkage void sha512_transform_avx(u64 *digest, const char *data, |
48 | u64 rounds); | 48 | u64 rounds); |
49 | #endif | 49 | #endif |
50 | #ifdef CONFIG_AS_AVX2 | 50 | #ifdef CONFIG_AS_AVX2 |
51 | asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, | 51 | asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, |
52 | u64 rounds); | 52 | u64 rounds); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); | 55 | static void (*sha512_transform_asm)(u64 *, const char *, u64); |
56 | |||
57 | |||
58 | static int sha512_ssse3_init(struct shash_desc *desc) | ||
59 | { | ||
60 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
61 | |||
62 | sctx->state[0] = SHA512_H0; | ||
63 | sctx->state[1] = SHA512_H1; | ||
64 | sctx->state[2] = SHA512_H2; | ||
65 | sctx->state[3] = SHA512_H3; | ||
66 | sctx->state[4] = SHA512_H4; | ||
67 | sctx->state[5] = SHA512_H5; | ||
68 | sctx->state[6] = SHA512_H6; | ||
69 | sctx->state[7] = SHA512_H7; | ||
70 | sctx->count[0] = sctx->count[1] = 0; | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | 56 | ||
75 | static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 57 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len, unsigned int partial) | 58 | unsigned int len) |
77 | { | 59 | { |
78 | struct sha512_state *sctx = shash_desc_ctx(desc); | 60 | struct sha512_state *sctx = shash_desc_ctx(desc); |
79 | unsigned int done = 0; | ||
80 | |||
81 | sctx->count[0] += len; | ||
82 | if (sctx->count[0] < len) | ||
83 | sctx->count[1]++; | ||
84 | 61 | ||
85 | if (partial) { | 62 | if (!irq_fpu_usable() || |
86 | done = SHA512_BLOCK_SIZE - partial; | 63 | (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) |
87 | memcpy(sctx->buf + partial, data, done); | 64 | return crypto_sha512_update(desc, data, len); |
88 | sha512_transform_asm(sctx->buf, sctx->state, 1); | ||
89 | } | ||
90 | |||
91 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
92 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
93 | 65 | ||
94 | sha512_transform_asm(data + done, sctx->state, (u64) rounds); | 66 | /* make sure casting to sha512_block_fn() is safe */ |
95 | 67 | BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); | |
96 | done += rounds * SHA512_BLOCK_SIZE; | ||
97 | } | ||
98 | 68 | ||
99 | memcpy(sctx->buf, data + done, len - done); | 69 | kernel_fpu_begin(); |
70 | sha512_base_do_update(desc, data, len, | ||
71 | (sha512_block_fn *)sha512_transform_asm); | ||
72 | kernel_fpu_end(); | ||
100 | 73 | ||
101 | return 0; | 74 | return 0; |
102 | } | 75 | } |
103 | 76 | ||
104 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 77 | static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, |
105 | unsigned int len) | 78 | unsigned int len, u8 *out) |
106 | { | 79 | { |
107 | struct sha512_state *sctx = shash_desc_ctx(desc); | 80 | if (!irq_fpu_usable()) |
108 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | 81 | return crypto_sha512_finup(desc, data, len, out); |
109 | int res; | ||
110 | |||
111 | /* Handle the fast case right here */ | ||
112 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
113 | sctx->count[0] += len; | ||
114 | if (sctx->count[0] < len) | ||
115 | sctx->count[1]++; | ||
116 | memcpy(sctx->buf + partial, data, len); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | 82 | ||
121 | if (!irq_fpu_usable()) { | 83 | kernel_fpu_begin(); |
122 | res = crypto_sha512_update(desc, data, len); | 84 | if (len) |
123 | } else { | 85 | sha512_base_do_update(desc, data, len, |
124 | kernel_fpu_begin(); | 86 | (sha512_block_fn *)sha512_transform_asm); |
125 | res = __sha512_ssse3_update(desc, data, len, partial); | 87 | sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); |
126 | kernel_fpu_end(); | 88 | kernel_fpu_end(); |
127 | } | ||
128 | 89 | ||
129 | return res; | 90 | return sha512_base_finish(desc, out); |
130 | } | 91 | } |
131 | 92 | ||
132 | |||
133 | /* Add padding and return the message digest. */ | 93 | /* Add padding and return the message digest. */ |
134 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) | 94 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) |
135 | { | 95 | { |
136 | struct sha512_state *sctx = shash_desc_ctx(desc); | 96 | return sha512_ssse3_finup(desc, NULL, 0, out); |
137 | unsigned int i, index, padlen; | ||
138 | __be64 *dst = (__be64 *)out; | ||
139 | __be64 bits[2]; | ||
140 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
141 | |||
142 | /* save number of bits */ | ||
143 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
144 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
145 | |||
146 | /* Pad out to 112 mod 128 and append length */ | ||
147 | index = sctx->count[0] & 0x7f; | ||
148 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
149 | |||
150 | if (!irq_fpu_usable()) { | ||
151 | crypto_sha512_update(desc, padding, padlen); | ||
152 | crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
153 | } else { | ||
154 | kernel_fpu_begin(); | ||
155 | /* We need to fill a whole block for __sha512_ssse3_update() */ | ||
156 | if (padlen <= 112) { | ||
157 | sctx->count[0] += padlen; | ||
158 | if (sctx->count[0] < padlen) | ||
159 | sctx->count[1]++; | ||
160 | memcpy(sctx->buf + index, padding, padlen); | ||
161 | } else { | ||
162 | __sha512_ssse3_update(desc, padding, padlen, index); | ||
163 | } | ||
164 | __sha512_ssse3_update(desc, (const u8 *)&bits, | ||
165 | sizeof(bits), 112); | ||
166 | kernel_fpu_end(); | ||
167 | } | ||
168 | |||
169 | /* Store state in digest */ | ||
170 | for (i = 0; i < 8; i++) | ||
171 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
172 | |||
173 | /* Wipe context */ | ||
174 | memset(sctx, 0, sizeof(*sctx)); | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static int sha512_ssse3_export(struct shash_desc *desc, void *out) | ||
180 | { | ||
181 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
182 | |||
183 | memcpy(out, sctx, sizeof(*sctx)); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static int sha512_ssse3_import(struct shash_desc *desc, const void *in) | ||
189 | { | ||
190 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
191 | |||
192 | memcpy(sctx, in, sizeof(*sctx)); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int sha384_ssse3_init(struct shash_desc *desc) | ||
198 | { | ||
199 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | sctx->state[0] = SHA384_H0; | ||
202 | sctx->state[1] = SHA384_H1; | ||
203 | sctx->state[2] = SHA384_H2; | ||
204 | sctx->state[3] = SHA384_H3; | ||
205 | sctx->state[4] = SHA384_H4; | ||
206 | sctx->state[5] = SHA384_H5; | ||
207 | sctx->state[6] = SHA384_H6; | ||
208 | sctx->state[7] = SHA384_H7; | ||
209 | |||
210 | sctx->count[0] = sctx->count[1] = 0; | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
216 | { | ||
217 | u8 D[SHA512_DIGEST_SIZE]; | ||
218 | |||
219 | sha512_ssse3_final(desc, D); | ||
220 | |||
221 | memcpy(hash, D, SHA384_DIGEST_SIZE); | ||
222 | memzero_explicit(D, SHA512_DIGEST_SIZE); | ||
223 | |||
224 | return 0; | ||
225 | } | 97 | } |
226 | 98 | ||
227 | static struct shash_alg algs[] = { { | 99 | static struct shash_alg algs[] = { { |
228 | .digestsize = SHA512_DIGEST_SIZE, | 100 | .digestsize = SHA512_DIGEST_SIZE, |
229 | .init = sha512_ssse3_init, | 101 | .init = sha512_base_init, |
230 | .update = sha512_ssse3_update, | 102 | .update = sha512_ssse3_update, |
231 | .final = sha512_ssse3_final, | 103 | .final = sha512_ssse3_final, |
232 | .export = sha512_ssse3_export, | 104 | .finup = sha512_ssse3_finup, |
233 | .import = sha512_ssse3_import, | ||
234 | .descsize = sizeof(struct sha512_state), | 105 | .descsize = sizeof(struct sha512_state), |
235 | .statesize = sizeof(struct sha512_state), | ||
236 | .base = { | 106 | .base = { |
237 | .cra_name = "sha512", | 107 | .cra_name = "sha512", |
238 | .cra_driver_name = "sha512-ssse3", | 108 | .cra_driver_name = "sha512-ssse3", |
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { { | |||
243 | } | 113 | } |
244 | }, { | 114 | }, { |
245 | .digestsize = SHA384_DIGEST_SIZE, | 115 | .digestsize = SHA384_DIGEST_SIZE, |
246 | .init = sha384_ssse3_init, | 116 | .init = sha384_base_init, |
247 | .update = sha512_ssse3_update, | 117 | .update = sha512_ssse3_update, |
248 | .final = sha384_ssse3_final, | 118 | .final = sha512_ssse3_final, |
249 | .export = sha512_ssse3_export, | 119 | .finup = sha512_ssse3_finup, |
250 | .import = sha512_ssse3_import, | ||
251 | .descsize = sizeof(struct sha512_state), | 120 | .descsize = sizeof(struct sha512_state), |
252 | .statesize = sizeof(struct sha512_state), | ||
253 | .base = { | 121 | .base = { |
254 | .cra_name = "sha384", | 122 | .cra_name = "sha384", |
255 | .cra_driver_name = "sha384-ssse3", | 123 | .cra_driver_name = "sha384-ssse3", |
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 1ac531ea9bcc..b5e2d5651851 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
340 | .cra_name = "__ecb-twofish-avx", | 340 | .cra_name = "__ecb-twofish-avx", |
341 | .cra_driver_name = "__driver-ecb-twofish-avx", | 341 | .cra_driver_name = "__driver-ecb-twofish-avx", |
342 | .cra_priority = 0, | 342 | .cra_priority = 0, |
343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
344 | CRYPTO_ALG_INTERNAL, | ||
344 | .cra_blocksize = TF_BLOCK_SIZE, | 345 | .cra_blocksize = TF_BLOCK_SIZE, |
345 | .cra_ctxsize = sizeof(struct twofish_ctx), | 346 | .cra_ctxsize = sizeof(struct twofish_ctx), |
346 | .cra_alignmask = 0, | 347 | .cra_alignmask = 0, |
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
359 | .cra_name = "__cbc-twofish-avx", | 360 | .cra_name = "__cbc-twofish-avx", |
360 | .cra_driver_name = "__driver-cbc-twofish-avx", | 361 | .cra_driver_name = "__driver-cbc-twofish-avx", |
361 | .cra_priority = 0, | 362 | .cra_priority = 0, |
362 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
364 | CRYPTO_ALG_INTERNAL, | ||
363 | .cra_blocksize = TF_BLOCK_SIZE, | 365 | .cra_blocksize = TF_BLOCK_SIZE, |
364 | .cra_ctxsize = sizeof(struct twofish_ctx), | 366 | .cra_ctxsize = sizeof(struct twofish_ctx), |
365 | .cra_alignmask = 0, | 367 | .cra_alignmask = 0, |
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
378 | .cra_name = "__ctr-twofish-avx", | 380 | .cra_name = "__ctr-twofish-avx", |
379 | .cra_driver_name = "__driver-ctr-twofish-avx", | 381 | .cra_driver_name = "__driver-ctr-twofish-avx", |
380 | .cra_priority = 0, | 382 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
384 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = 1, | 385 | .cra_blocksize = 1, |
383 | .cra_ctxsize = sizeof(struct twofish_ctx), | 386 | .cra_ctxsize = sizeof(struct twofish_ctx), |
384 | .cra_alignmask = 0, | 387 | .cra_alignmask = 0, |
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
398 | .cra_name = "__lrw-twofish-avx", | 401 | .cra_name = "__lrw-twofish-avx", |
399 | .cra_driver_name = "__driver-lrw-twofish-avx", | 402 | .cra_driver_name = "__driver-lrw-twofish-avx", |
400 | .cra_priority = 0, | 403 | .cra_priority = 0, |
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
405 | CRYPTO_ALG_INTERNAL, | ||
402 | .cra_blocksize = TF_BLOCK_SIZE, | 406 | .cra_blocksize = TF_BLOCK_SIZE, |
403 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | 407 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), |
404 | .cra_alignmask = 0, | 408 | .cra_alignmask = 0, |
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
421 | .cra_name = "__xts-twofish-avx", | 425 | .cra_name = "__xts-twofish-avx", |
422 | .cra_driver_name = "__driver-xts-twofish-avx", | 426 | .cra_driver_name = "__driver-xts-twofish-avx", |
423 | .cra_priority = 0, | 427 | .cra_priority = 0, |
424 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
429 | CRYPTO_ALG_INTERNAL, | ||
425 | .cra_blocksize = TF_BLOCK_SIZE, | 430 | .cra_blocksize = TF_BLOCK_SIZE, |
426 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | 431 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), |
427 | .cra_alignmask = 0, | 432 | .cra_alignmask = 0, |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 50f4da44a304..8aaf298a80e1 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -436,6 +436,14 @@ config CRYPTO_MD5_OCTEON | |||
436 | MD5 message digest algorithm (RFC1321) implemented | 436 | MD5 message digest algorithm (RFC1321) implemented |
437 | using OCTEON crypto instructions, when available. | 437 | using OCTEON crypto instructions, when available. |
438 | 438 | ||
439 | config CRYPTO_MD5_PPC | ||
440 | tristate "MD5 digest algorithm (PPC)" | ||
441 | depends on PPC | ||
442 | select CRYPTO_HASH | ||
443 | help | ||
444 | MD5 message digest algorithm (RFC1321) implemented | ||
445 | in PPC assembler. | ||
446 | |||
439 | config CRYPTO_MD5_SPARC64 | 447 | config CRYPTO_MD5_SPARC64 |
440 | tristate "MD5 digest algorithm (SPARC64)" | 448 | tristate "MD5 digest algorithm (SPARC64)" |
441 | depends on SPARC64 | 449 | depends on SPARC64 |
@@ -546,34 +554,23 @@ config CRYPTO_SHA512_SSSE3 | |||
546 | Extensions version 1 (AVX1), or Advanced Vector Extensions | 554 | Extensions version 1 (AVX1), or Advanced Vector Extensions |
547 | version 2 (AVX2) instructions, when available. | 555 | version 2 (AVX2) instructions, when available. |
548 | 556 | ||
549 | config CRYPTO_SHA1_SPARC64 | 557 | config CRYPTO_SHA1_OCTEON |
550 | tristate "SHA1 digest algorithm (SPARC64)" | 558 | tristate "SHA1 digest algorithm (OCTEON)" |
551 | depends on SPARC64 | 559 | depends on CPU_CAVIUM_OCTEON |
552 | select CRYPTO_SHA1 | ||
553 | select CRYPTO_HASH | ||
554 | help | ||
555 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
556 | using sparc64 crypto instructions, when available. | ||
557 | |||
558 | config CRYPTO_SHA1_ARM | ||
559 | tristate "SHA1 digest algorithm (ARM-asm)" | ||
560 | depends on ARM | ||
561 | select CRYPTO_SHA1 | 560 | select CRYPTO_SHA1 |
562 | select CRYPTO_HASH | 561 | select CRYPTO_HASH |
563 | help | 562 | help |
564 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | 563 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented |
565 | using optimized ARM assembler. | 564 | using OCTEON crypto instructions, when available. |
566 | 565 | ||
567 | config CRYPTO_SHA1_ARM_NEON | 566 | config CRYPTO_SHA1_SPARC64 |
568 | tristate "SHA1 digest algorithm (ARM NEON)" | 567 | tristate "SHA1 digest algorithm (SPARC64)" |
569 | depends on ARM && KERNEL_MODE_NEON | 568 | depends on SPARC64 |
570 | select CRYPTO_SHA1_ARM | ||
571 | select CRYPTO_SHA1 | 569 | select CRYPTO_SHA1 |
572 | select CRYPTO_HASH | 570 | select CRYPTO_HASH |
573 | help | 571 | help |
574 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | 572 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented |
575 | using optimized ARM NEON assembly, when NEON instructions are | 573 | using sparc64 crypto instructions, when available. |
576 | available. | ||
577 | 574 | ||
578 | config CRYPTO_SHA1_PPC | 575 | config CRYPTO_SHA1_PPC |
579 | tristate "SHA1 digest algorithm (powerpc)" | 576 | tristate "SHA1 digest algorithm (powerpc)" |
@@ -582,6 +579,13 @@ config CRYPTO_SHA1_PPC | |||
582 | This is the powerpc hardware accelerated implementation of the | 579 | This is the powerpc hardware accelerated implementation of the |
583 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). | 580 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). |
584 | 581 | ||
582 | config CRYPTO_SHA1_PPC_SPE | ||
583 | tristate "SHA1 digest algorithm (PPC SPE)" | ||
584 | depends on PPC && SPE | ||
585 | help | ||
586 | SHA-1 secure hash standard (DFIPS 180-4) implemented | ||
587 | using powerpc SPE SIMD instruction set. | ||
588 | |||
585 | config CRYPTO_SHA1_MB | 589 | config CRYPTO_SHA1_MB |
586 | tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" | 590 | tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" |
587 | depends on X86 && 64BIT | 591 | depends on X86 && 64BIT |
@@ -610,6 +614,24 @@ config CRYPTO_SHA256 | |||
610 | This code also includes SHA-224, a 224 bit hash with 112 bits | 614 | This code also includes SHA-224, a 224 bit hash with 112 bits |
611 | of security against collision attacks. | 615 | of security against collision attacks. |
612 | 616 | ||
617 | config CRYPTO_SHA256_PPC_SPE | ||
618 | tristate "SHA224 and SHA256 digest algorithm (PPC SPE)" | ||
619 | depends on PPC && SPE | ||
620 | select CRYPTO_SHA256 | ||
621 | select CRYPTO_HASH | ||
622 | help | ||
623 | SHA224 and SHA256 secure hash standard (DFIPS 180-2) | ||
624 | implemented using powerpc SPE SIMD instruction set. | ||
625 | |||
626 | config CRYPTO_SHA256_OCTEON | ||
627 | tristate "SHA224 and SHA256 digest algorithm (OCTEON)" | ||
628 | depends on CPU_CAVIUM_OCTEON | ||
629 | select CRYPTO_SHA256 | ||
630 | select CRYPTO_HASH | ||
631 | help | ||
632 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
633 | using OCTEON crypto instructions, when available. | ||
634 | |||
613 | config CRYPTO_SHA256_SPARC64 | 635 | config CRYPTO_SHA256_SPARC64 |
614 | tristate "SHA224 and SHA256 digest algorithm (SPARC64)" | 636 | tristate "SHA224 and SHA256 digest algorithm (SPARC64)" |
615 | depends on SPARC64 | 637 | depends on SPARC64 |
@@ -631,29 +653,23 @@ config CRYPTO_SHA512 | |||
631 | This code also includes SHA-384, a 384 bit hash with 192 bits | 653 | This code also includes SHA-384, a 384 bit hash with 192 bits |
632 | of security against collision attacks. | 654 | of security against collision attacks. |
633 | 655 | ||
634 | config CRYPTO_SHA512_SPARC64 | 656 | config CRYPTO_SHA512_OCTEON |
635 | tristate "SHA384 and SHA512 digest algorithm (SPARC64)" | 657 | tristate "SHA384 and SHA512 digest algorithms (OCTEON)" |
636 | depends on SPARC64 | 658 | depends on CPU_CAVIUM_OCTEON |
637 | select CRYPTO_SHA512 | 659 | select CRYPTO_SHA512 |
638 | select CRYPTO_HASH | 660 | select CRYPTO_HASH |
639 | help | 661 | help |
640 | SHA-512 secure hash standard (DFIPS 180-2) implemented | 662 | SHA-512 secure hash standard (DFIPS 180-2) implemented |
641 | using sparc64 crypto instructions, when available. | 663 | using OCTEON crypto instructions, when available. |
642 | 664 | ||
643 | config CRYPTO_SHA512_ARM_NEON | 665 | config CRYPTO_SHA512_SPARC64 |
644 | tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" | 666 | tristate "SHA384 and SHA512 digest algorithm (SPARC64)" |
645 | depends on ARM && KERNEL_MODE_NEON | 667 | depends on SPARC64 |
646 | select CRYPTO_SHA512 | 668 | select CRYPTO_SHA512 |
647 | select CRYPTO_HASH | 669 | select CRYPTO_HASH |
648 | help | 670 | help |
649 | SHA-512 secure hash standard (DFIPS 180-2) implemented | 671 | SHA-512 secure hash standard (DFIPS 180-2) implemented |
650 | using ARM NEON instructions, when available. | 672 | using sparc64 crypto instructions, when available. |
651 | |||
652 | This version of SHA implements a 512 bit hash with 256 bits of | ||
653 | security against collision attacks. | ||
654 | |||
655 | This code also includes SHA-384, a 384 bit hash with 192 bits | ||
656 | of security against collision attacks. | ||
657 | 673 | ||
658 | config CRYPTO_TGR192 | 674 | config CRYPTO_TGR192 |
659 | tristate "Tiger digest algorithms" | 675 | tristate "Tiger digest algorithms" |
@@ -817,45 +833,18 @@ config CRYPTO_AES_SPARC64 | |||
817 | for some popular block cipher mode is supported too, including | 833 | for some popular block cipher mode is supported too, including |
818 | ECB and CBC. | 834 | ECB and CBC. |
819 | 835 | ||
820 | config CRYPTO_AES_ARM | 836 | config CRYPTO_AES_PPC_SPE |
821 | tristate "AES cipher algorithms (ARM-asm)" | 837 | tristate "AES cipher algorithms (PPC SPE)" |
822 | depends on ARM | 838 | depends on PPC && SPE |
823 | select CRYPTO_ALGAPI | ||
824 | select CRYPTO_AES | ||
825 | help | ||
826 | Use optimized AES assembler routines for ARM platforms. | ||
827 | |||
828 | AES cipher algorithms (FIPS-197). AES uses the Rijndael | ||
829 | algorithm. | ||
830 | |||
831 | Rijndael appears to be consistently a very good performer in | ||
832 | both hardware and software across a wide range of computing | ||
833 | environments regardless of its use in feedback or non-feedback | ||
834 | modes. Its key setup time is excellent, and its key agility is | ||
835 | good. Rijndael's very low memory requirements make it very well | ||
836 | suited for restricted-space environments, in which it also | ||
837 | demonstrates excellent performance. Rijndael's operations are | ||
838 | among the easiest to defend against power and timing attacks. | ||
839 | |||
840 | The AES specifies three key sizes: 128, 192 and 256 bits | ||
841 | |||
842 | See <http://csrc.nist.gov/encryption/aes/> for more information. | ||
843 | |||
844 | config CRYPTO_AES_ARM_BS | ||
845 | tristate "Bit sliced AES using NEON instructions" | ||
846 | depends on ARM && KERNEL_MODE_NEON | ||
847 | select CRYPTO_ALGAPI | ||
848 | select CRYPTO_AES_ARM | ||
849 | select CRYPTO_ABLK_HELPER | ||
850 | help | 839 | help |
851 | Use a faster and more secure NEON based implementation of AES in CBC, | 840 | AES cipher algorithms (FIPS-197). Additionally the acceleration |
852 | CTR and XTS modes | 841 | for popular block cipher modes ECB, CBC, CTR and XTS is supported. |
853 | 842 | This module should only be used for low power (router) devices | |
854 | Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode | 843 | without hardware AES acceleration (e.g. caam crypto). It reduces the |
855 | and for XTS mode encryption, CBC and XTS mode decryption speedup is | 844 | size of the AES tables from 16KB to 8KB + 256 bytes and mitigates |
856 | around 25%. (CBC encryption speed is not affected by this driver.) | 845 | timining attacks. Nevertheless it might be not as secure as other |
857 | This implementation does not rely on any lookup tables so it is | 846 | architecture specific assembler implementations that work on 1KB |
858 | believed to be invulnerable to cache timing attacks. | 847 | tables or 256 bytes S-boxes. |
859 | 848 | ||
860 | config CRYPTO_ANUBIS | 849 | config CRYPTO_ANUBIS |
861 | tristate "Anubis cipher algorithm" | 850 | tristate "Anubis cipher algorithm" |
@@ -1199,7 +1188,7 @@ config CRYPTO_SERPENT_SSE2_X86_64 | |||
1199 | Keys are allowed to be from 0 to 256 bits in length, in steps | 1188 | Keys are allowed to be from 0 to 256 bits in length, in steps |
1200 | of 8 bits. | 1189 | of 8 bits. |
1201 | 1190 | ||
1202 | This module provides Serpent cipher algorithm that processes eigth | 1191 | This module provides Serpent cipher algorithm that processes eight |
1203 | blocks parallel using SSE2 instruction set. | 1192 | blocks parallel using SSE2 instruction set. |
1204 | 1193 | ||
1205 | See also: | 1194 | See also: |
@@ -1523,6 +1512,15 @@ config CRYPTO_USER_API_RNG | |||
1523 | This option enables the user-spaces interface for random | 1512 | This option enables the user-spaces interface for random |
1524 | number generator algorithms. | 1513 | number generator algorithms. |
1525 | 1514 | ||
1515 | config CRYPTO_USER_API_AEAD | ||
1516 | tristate "User-space interface for AEAD cipher algorithms" | ||
1517 | depends on NET | ||
1518 | select CRYPTO_AEAD | ||
1519 | select CRYPTO_USER_API | ||
1520 | help | ||
1521 | This option enables the user-spaces interface for AEAD | ||
1522 | cipher algorithms. | ||
1523 | |||
1526 | config CRYPTO_HASH_INFO | 1524 | config CRYPTO_HASH_INFO |
1527 | bool | 1525 | bool |
1528 | 1526 | ||
diff --git a/crypto/Makefile b/crypto/Makefile index ba19465f9ad3..97b7d3ac87e7 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o | |||
100 | obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o | 100 | obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o |
101 | obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o | 101 | obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o |
102 | obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o | 102 | obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o |
103 | obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o | ||
103 | 104 | ||
104 | # | 105 | # |
105 | # generic algorithms and the async_tx api | 106 | # generic algorithms and the async_tx api |
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c index ffe7278d4bd8..e1fcf53bb931 100644 --- a/crypto/ablk_helper.c +++ b/crypto/ablk_helper.c | |||
@@ -124,7 +124,8 @@ int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | |||
124 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | 124 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); |
125 | struct cryptd_ablkcipher *cryptd_tfm; | 125 | struct cryptd_ablkcipher *cryptd_tfm; |
126 | 126 | ||
127 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | 127 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL, |
128 | CRYPTO_ALG_INTERNAL); | ||
128 | if (IS_ERR(cryptd_tfm)) | 129 | if (IS_ERR(cryptd_tfm)) |
129 | return PTR_ERR(cryptd_tfm); | 130 | return PTR_ERR(cryptd_tfm); |
130 | 131 | ||
diff --git a/crypto/algapi.c b/crypto/algapi.c index 83b04e0884b1..2d0a1c64ce39 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c | |||
@@ -64,6 +64,8 @@ static int crypto_check_alg(struct crypto_alg *alg) | |||
64 | if (alg->cra_priority < 0) | 64 | if (alg->cra_priority < 0) |
65 | return -EINVAL; | 65 | return -EINVAL; |
66 | 66 | ||
67 | atomic_set(&alg->cra_refcnt, 1); | ||
68 | |||
67 | return crypto_set_driver_name(alg); | 69 | return crypto_set_driver_name(alg); |
68 | } | 70 | } |
69 | 71 | ||
@@ -99,10 +101,9 @@ static struct list_head *crypto_more_spawns(struct crypto_alg *alg, | |||
99 | return &n->list == stack ? top : &n->inst->alg.cra_users; | 101 | return &n->list == stack ? top : &n->inst->alg.cra_users; |
100 | } | 102 | } |
101 | 103 | ||
102 | static void crypto_remove_spawn(struct crypto_spawn *spawn, | 104 | static void crypto_remove_instance(struct crypto_instance *inst, |
103 | struct list_head *list) | 105 | struct list_head *list) |
104 | { | 106 | { |
105 | struct crypto_instance *inst = spawn->inst; | ||
106 | struct crypto_template *tmpl = inst->tmpl; | 107 | struct crypto_template *tmpl = inst->tmpl; |
107 | 108 | ||
108 | if (crypto_is_dead(&inst->alg)) | 109 | if (crypto_is_dead(&inst->alg)) |
@@ -167,7 +168,7 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, | |||
167 | if (spawn->alg) | 168 | if (spawn->alg) |
168 | list_move(&spawn->list, &spawn->alg->cra_users); | 169 | list_move(&spawn->list, &spawn->alg->cra_users); |
169 | else | 170 | else |
170 | crypto_remove_spawn(spawn, list); | 171 | crypto_remove_instance(spawn->inst, list); |
171 | } | 172 | } |
172 | } | 173 | } |
173 | EXPORT_SYMBOL_GPL(crypto_remove_spawns); | 174 | EXPORT_SYMBOL_GPL(crypto_remove_spawns); |
@@ -188,7 +189,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) | |||
188 | 189 | ||
189 | ret = -EEXIST; | 190 | ret = -EEXIST; |
190 | 191 | ||
191 | atomic_set(&alg->cra_refcnt, 1); | ||
192 | list_for_each_entry(q, &crypto_alg_list, cra_list) { | 192 | list_for_each_entry(q, &crypto_alg_list, cra_list) { |
193 | if (q == alg) | 193 | if (q == alg) |
194 | goto err; | 194 | goto err; |
@@ -523,7 +523,10 @@ int crypto_register_instance(struct crypto_template *tmpl, | |||
523 | 523 | ||
524 | err = crypto_check_alg(&inst->alg); | 524 | err = crypto_check_alg(&inst->alg); |
525 | if (err) | 525 | if (err) |
526 | goto err; | 526 | return err; |
527 | |||
528 | if (unlikely(!crypto_mod_get(&inst->alg))) | ||
529 | return -EAGAIN; | ||
527 | 530 | ||
528 | inst->alg.cra_module = tmpl->module; | 531 | inst->alg.cra_module = tmpl->module; |
529 | inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE; | 532 | inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE; |
@@ -545,37 +548,30 @@ unlock: | |||
545 | goto err; | 548 | goto err; |
546 | 549 | ||
547 | crypto_wait_for_test(larval); | 550 | crypto_wait_for_test(larval); |
551 | |||
552 | /* Remove instance if test failed */ | ||
553 | if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED)) | ||
554 | crypto_unregister_instance(inst); | ||
548 | err = 0; | 555 | err = 0; |
549 | 556 | ||
550 | err: | 557 | err: |
558 | crypto_mod_put(&inst->alg); | ||
551 | return err; | 559 | return err; |
552 | } | 560 | } |
553 | EXPORT_SYMBOL_GPL(crypto_register_instance); | 561 | EXPORT_SYMBOL_GPL(crypto_register_instance); |
554 | 562 | ||
555 | int crypto_unregister_instance(struct crypto_alg *alg) | 563 | int crypto_unregister_instance(struct crypto_instance *inst) |
556 | { | 564 | { |
557 | int err; | 565 | LIST_HEAD(list); |
558 | struct crypto_instance *inst = (void *)alg; | ||
559 | struct crypto_template *tmpl = inst->tmpl; | ||
560 | LIST_HEAD(users); | ||
561 | |||
562 | if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) | ||
563 | return -EINVAL; | ||
564 | |||
565 | BUG_ON(atomic_read(&alg->cra_refcnt) != 1); | ||
566 | 566 | ||
567 | down_write(&crypto_alg_sem); | 567 | down_write(&crypto_alg_sem); |
568 | 568 | ||
569 | hlist_del_init(&inst->list); | 569 | crypto_remove_spawns(&inst->alg, &list, NULL); |
570 | err = crypto_remove_alg(alg, &users); | 570 | crypto_remove_instance(inst, &list); |
571 | 571 | ||
572 | up_write(&crypto_alg_sem); | 572 | up_write(&crypto_alg_sem); |
573 | 573 | ||
574 | if (err) | 574 | crypto_remove_final(&list); |
575 | return err; | ||
576 | |||
577 | tmpl->free(inst); | ||
578 | crypto_remove_final(&users); | ||
579 | 575 | ||
580 | return 0; | 576 | return 0; |
581 | } | 577 | } |
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c new file mode 100644 index 000000000000..527d27b023ab --- /dev/null +++ b/crypto/algif_aead.c | |||
@@ -0,0 +1,666 @@ | |||
1 | /* | ||
2 | * algif_aead: User-space interface for AEAD algorithms | ||
3 | * | ||
4 | * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de> | ||
5 | * | ||
6 | * This file provides the user-space API for AEAD ciphers. | ||
7 | * | ||
8 | * This file is derived from algif_skcipher.c. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | */ | ||
15 | |||
16 | #include <crypto/scatterwalk.h> | ||
17 | #include <crypto/if_alg.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/list.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/mm.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/net.h> | ||
24 | #include <net/sock.h> | ||
25 | |||
26 | struct aead_sg_list { | ||
27 | unsigned int cur; | ||
28 | struct scatterlist sg[ALG_MAX_PAGES]; | ||
29 | }; | ||
30 | |||
31 | struct aead_ctx { | ||
32 | struct aead_sg_list tsgl; | ||
33 | /* | ||
34 | * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum | ||
35 | * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES | ||
36 | * bytes | ||
37 | */ | ||
38 | #define RSGL_MAX_ENTRIES ALG_MAX_PAGES | ||
39 | struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES]; | ||
40 | |||
41 | void *iv; | ||
42 | |||
43 | struct af_alg_completion completion; | ||
44 | |||
45 | unsigned long used; | ||
46 | |||
47 | unsigned int len; | ||
48 | bool more; | ||
49 | bool merge; | ||
50 | bool enc; | ||
51 | |||
52 | size_t aead_assoclen; | ||
53 | struct aead_request aead_req; | ||
54 | }; | ||
55 | |||
56 | static inline int aead_sndbuf(struct sock *sk) | ||
57 | { | ||
58 | struct alg_sock *ask = alg_sk(sk); | ||
59 | struct aead_ctx *ctx = ask->private; | ||
60 | |||
61 | return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - | ||
62 | ctx->used, 0); | ||
63 | } | ||
64 | |||
65 | static inline bool aead_writable(struct sock *sk) | ||
66 | { | ||
67 | return PAGE_SIZE <= aead_sndbuf(sk); | ||
68 | } | ||
69 | |||
70 | static inline bool aead_sufficient_data(struct aead_ctx *ctx) | ||
71 | { | ||
72 | unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req)); | ||
73 | |||
74 | return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as))); | ||
75 | } | ||
76 | |||
77 | static void aead_put_sgl(struct sock *sk) | ||
78 | { | ||
79 | struct alg_sock *ask = alg_sk(sk); | ||
80 | struct aead_ctx *ctx = ask->private; | ||
81 | struct aead_sg_list *sgl = &ctx->tsgl; | ||
82 | struct scatterlist *sg = sgl->sg; | ||
83 | unsigned int i; | ||
84 | |||
85 | for (i = 0; i < sgl->cur; i++) { | ||
86 | if (!sg_page(sg + i)) | ||
87 | continue; | ||
88 | |||
89 | put_page(sg_page(sg + i)); | ||
90 | sg_assign_page(sg + i, NULL); | ||
91 | } | ||
92 | sgl->cur = 0; | ||
93 | ctx->used = 0; | ||
94 | ctx->more = 0; | ||
95 | ctx->merge = 0; | ||
96 | } | ||
97 | |||
98 | static void aead_wmem_wakeup(struct sock *sk) | ||
99 | { | ||
100 | struct socket_wq *wq; | ||
101 | |||
102 | if (!aead_writable(sk)) | ||
103 | return; | ||
104 | |||
105 | rcu_read_lock(); | ||
106 | wq = rcu_dereference(sk->sk_wq); | ||
107 | if (wq_has_sleeper(wq)) | ||
108 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
109 | POLLRDNORM | | ||
110 | POLLRDBAND); | ||
111 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | ||
112 | rcu_read_unlock(); | ||
113 | } | ||
114 | |||
115 | static int aead_wait_for_data(struct sock *sk, unsigned flags) | ||
116 | { | ||
117 | struct alg_sock *ask = alg_sk(sk); | ||
118 | struct aead_ctx *ctx = ask->private; | ||
119 | long timeout; | ||
120 | DEFINE_WAIT(wait); | ||
121 | int err = -ERESTARTSYS; | ||
122 | |||
123 | if (flags & MSG_DONTWAIT) | ||
124 | return -EAGAIN; | ||
125 | |||
126 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | ||
127 | |||
128 | for (;;) { | ||
129 | if (signal_pending(current)) | ||
130 | break; | ||
131 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); | ||
132 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
133 | if (sk_wait_event(sk, &timeout, !ctx->more)) { | ||
134 | err = 0; | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | finish_wait(sk_sleep(sk), &wait); | ||
139 | |||
140 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | ||
141 | |||
142 | return err; | ||
143 | } | ||
144 | |||
145 | static void aead_data_wakeup(struct sock *sk) | ||
146 | { | ||
147 | struct alg_sock *ask = alg_sk(sk); | ||
148 | struct aead_ctx *ctx = ask->private; | ||
149 | struct socket_wq *wq; | ||
150 | |||
151 | if (ctx->more) | ||
152 | return; | ||
153 | if (!ctx->used) | ||
154 | return; | ||
155 | |||
156 | rcu_read_lock(); | ||
157 | wq = rcu_dereference(sk->sk_wq); | ||
158 | if (wq_has_sleeper(wq)) | ||
159 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
160 | POLLRDNORM | | ||
161 | POLLRDBAND); | ||
162 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | ||
163 | rcu_read_unlock(); | ||
164 | } | ||
165 | |||
166 | static int aead_sendmsg(struct kiocb *unused, struct socket *sock, | ||
167 | struct msghdr *msg, size_t size) | ||
168 | { | ||
169 | struct sock *sk = sock->sk; | ||
170 | struct alg_sock *ask = alg_sk(sk); | ||
171 | struct aead_ctx *ctx = ask->private; | ||
172 | unsigned ivsize = | ||
173 | crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req)); | ||
174 | struct aead_sg_list *sgl = &ctx->tsgl; | ||
175 | struct af_alg_control con = {}; | ||
176 | long copied = 0; | ||
177 | bool enc = 0; | ||
178 | bool init = 0; | ||
179 | int err = -EINVAL; | ||
180 | |||
181 | if (msg->msg_controllen) { | ||
182 | err = af_alg_cmsg_send(msg, &con); | ||
183 | if (err) | ||
184 | return err; | ||
185 | |||
186 | init = 1; | ||
187 | switch (con.op) { | ||
188 | case ALG_OP_ENCRYPT: | ||
189 | enc = 1; | ||
190 | break; | ||
191 | case ALG_OP_DECRYPT: | ||
192 | enc = 0; | ||
193 | break; | ||
194 | default: | ||
195 | return -EINVAL; | ||
196 | } | ||
197 | |||
198 | if (con.iv && con.iv->ivlen != ivsize) | ||
199 | return -EINVAL; | ||
200 | } | ||
201 | |||
202 | lock_sock(sk); | ||
203 | if (!ctx->more && ctx->used) | ||
204 | goto unlock; | ||
205 | |||
206 | if (init) { | ||
207 | ctx->enc = enc; | ||
208 | if (con.iv) | ||
209 | memcpy(ctx->iv, con.iv->iv, ivsize); | ||
210 | |||
211 | ctx->aead_assoclen = con.aead_assoclen; | ||
212 | } | ||
213 | |||
214 | while (size) { | ||
215 | unsigned long len = size; | ||
216 | struct scatterlist *sg = NULL; | ||
217 | |||
218 | /* use the existing memory in an allocated page */ | ||
219 | if (ctx->merge) { | ||
220 | sg = sgl->sg + sgl->cur - 1; | ||
221 | len = min_t(unsigned long, len, | ||
222 | PAGE_SIZE - sg->offset - sg->length); | ||
223 | err = memcpy_from_msg(page_address(sg_page(sg)) + | ||
224 | sg->offset + sg->length, | ||
225 | msg, len); | ||
226 | if (err) | ||
227 | goto unlock; | ||
228 | |||
229 | sg->length += len; | ||
230 | ctx->merge = (sg->offset + sg->length) & | ||
231 | (PAGE_SIZE - 1); | ||
232 | |||
233 | ctx->used += len; | ||
234 | copied += len; | ||
235 | size -= len; | ||
236 | continue; | ||
237 | } | ||
238 | |||
239 | if (!aead_writable(sk)) { | ||
240 | /* user space sent too much data */ | ||
241 | aead_put_sgl(sk); | ||
242 | err = -EMSGSIZE; | ||
243 | goto unlock; | ||
244 | } | ||
245 | |||
246 | /* allocate a new page */ | ||
247 | len = min_t(unsigned long, size, aead_sndbuf(sk)); | ||
248 | while (len) { | ||
249 | int plen = 0; | ||
250 | |||
251 | if (sgl->cur >= ALG_MAX_PAGES) { | ||
252 | aead_put_sgl(sk); | ||
253 | err = -E2BIG; | ||
254 | goto unlock; | ||
255 | } | ||
256 | |||
257 | sg = sgl->sg + sgl->cur; | ||
258 | plen = min_t(int, len, PAGE_SIZE); | ||
259 | |||
260 | sg_assign_page(sg, alloc_page(GFP_KERNEL)); | ||
261 | err = -ENOMEM; | ||
262 | if (!sg_page(sg)) | ||
263 | goto unlock; | ||
264 | |||
265 | err = memcpy_from_msg(page_address(sg_page(sg)), | ||
266 | msg, plen); | ||
267 | if (err) { | ||
268 | __free_page(sg_page(sg)); | ||
269 | sg_assign_page(sg, NULL); | ||
270 | goto unlock; | ||
271 | } | ||
272 | |||
273 | sg->offset = 0; | ||
274 | sg->length = plen; | ||
275 | len -= plen; | ||
276 | ctx->used += plen; | ||
277 | copied += plen; | ||
278 | sgl->cur++; | ||
279 | size -= plen; | ||
280 | ctx->merge = plen & (PAGE_SIZE - 1); | ||
281 | } | ||
282 | } | ||
283 | |||
284 | err = 0; | ||
285 | |||
286 | ctx->more = msg->msg_flags & MSG_MORE; | ||
287 | if (!ctx->more && !aead_sufficient_data(ctx)) { | ||
288 | aead_put_sgl(sk); | ||
289 | err = -EMSGSIZE; | ||
290 | } | ||
291 | |||
292 | unlock: | ||
293 | aead_data_wakeup(sk); | ||
294 | release_sock(sk); | ||
295 | |||
296 | return err ?: copied; | ||
297 | } | ||
298 | |||
299 | static ssize_t aead_sendpage(struct socket *sock, struct page *page, | ||
300 | int offset, size_t size, int flags) | ||
301 | { | ||
302 | struct sock *sk = sock->sk; | ||
303 | struct alg_sock *ask = alg_sk(sk); | ||
304 | struct aead_ctx *ctx = ask->private; | ||
305 | struct aead_sg_list *sgl = &ctx->tsgl; | ||
306 | int err = -EINVAL; | ||
307 | |||
308 | if (flags & MSG_SENDPAGE_NOTLAST) | ||
309 | flags |= MSG_MORE; | ||
310 | |||
311 | if (sgl->cur >= ALG_MAX_PAGES) | ||
312 | return -E2BIG; | ||
313 | |||
314 | lock_sock(sk); | ||
315 | if (!ctx->more && ctx->used) | ||
316 | goto unlock; | ||
317 | |||
318 | if (!size) | ||
319 | goto done; | ||
320 | |||
321 | if (!aead_writable(sk)) { | ||
322 | /* user space sent too much data */ | ||
323 | aead_put_sgl(sk); | ||
324 | err = -EMSGSIZE; | ||
325 | goto unlock; | ||
326 | } | ||
327 | |||
328 | ctx->merge = 0; | ||
329 | |||
330 | get_page(page); | ||
331 | sg_set_page(sgl->sg + sgl->cur, page, size, offset); | ||
332 | sgl->cur++; | ||
333 | ctx->used += size; | ||
334 | |||
335 | err = 0; | ||
336 | |||
337 | done: | ||
338 | ctx->more = flags & MSG_MORE; | ||
339 | if (!ctx->more && !aead_sufficient_data(ctx)) { | ||
340 | aead_put_sgl(sk); | ||
341 | err = -EMSGSIZE; | ||
342 | } | ||
343 | |||
344 | unlock: | ||
345 | aead_data_wakeup(sk); | ||
346 | release_sock(sk); | ||
347 | |||
348 | return err ?: size; | ||
349 | } | ||
350 | |||
351 | static int aead_recvmsg(struct kiocb *unused, struct socket *sock, | ||
352 | struct msghdr *msg, size_t ignored, int flags) | ||
353 | { | ||
354 | struct sock *sk = sock->sk; | ||
355 | struct alg_sock *ask = alg_sk(sk); | ||
356 | struct aead_ctx *ctx = ask->private; | ||
357 | unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req)); | ||
358 | unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req)); | ||
359 | struct aead_sg_list *sgl = &ctx->tsgl; | ||
360 | struct scatterlist *sg = NULL; | ||
361 | struct scatterlist assoc[ALG_MAX_PAGES]; | ||
362 | size_t assoclen = 0; | ||
363 | unsigned int i = 0; | ||
364 | int err = -EINVAL; | ||
365 | unsigned long used = 0; | ||
366 | size_t outlen = 0; | ||
367 | size_t usedpages = 0; | ||
368 | unsigned int cnt = 0; | ||
369 | |||
370 | /* Limit number of IOV blocks to be accessed below */ | ||
371 | if (msg->msg_iter.nr_segs > RSGL_MAX_ENTRIES) | ||
372 | return -ENOMSG; | ||
373 | |||
374 | lock_sock(sk); | ||
375 | |||
376 | /* | ||
377 | * AEAD memory structure: For encryption, the tag is appended to the | ||
378 | * ciphertext which implies that the memory allocated for the ciphertext | ||
379 | * must be increased by the tag length. For decryption, the tag | ||
380 | * is expected to be concatenated to the ciphertext. The plaintext | ||
381 | * therefore has a memory size of the ciphertext minus the tag length. | ||
382 | * | ||
383 | * The memory structure for cipher operation has the following | ||
384 | * structure: | ||
385 | * AEAD encryption input: assoc data || plaintext | ||
386 | * AEAD encryption output: cipherntext || auth tag | ||
387 | * AEAD decryption input: assoc data || ciphertext || auth tag | ||
388 | * AEAD decryption output: plaintext | ||
389 | */ | ||
390 | |||
391 | if (ctx->more) { | ||
392 | err = aead_wait_for_data(sk, flags); | ||
393 | if (err) | ||
394 | goto unlock; | ||
395 | } | ||
396 | |||
397 | used = ctx->used; | ||
398 | |||
399 | /* | ||
400 | * Make sure sufficient data is present -- note, the same check is | ||
401 | * is also present in sendmsg/sendpage. The checks in sendpage/sendmsg | ||
402 | * shall provide an information to the data sender that something is | ||
403 | * wrong, but they are irrelevant to maintain the kernel integrity. | ||
404 | * We need this check here too in case user space decides to not honor | ||
405 | * the error message in sendmsg/sendpage and still call recvmsg. This | ||
406 | * check here protects the kernel integrity. | ||
407 | */ | ||
408 | if (!aead_sufficient_data(ctx)) | ||
409 | goto unlock; | ||
410 | |||
411 | /* | ||
412 | * The cipher operation input data is reduced by the associated data | ||
413 | * length as this data is processed separately later on. | ||
414 | */ | ||
415 | used -= ctx->aead_assoclen; | ||
416 | |||
417 | if (ctx->enc) { | ||
418 | /* round up output buffer to multiple of block size */ | ||
419 | outlen = ((used + bs - 1) / bs * bs); | ||
420 | /* add the size needed for the auth tag to be created */ | ||
421 | outlen += as; | ||
422 | } else { | ||
423 | /* output data size is input without the authentication tag */ | ||
424 | outlen = used - as; | ||
425 | /* round up output buffer to multiple of block size */ | ||
426 | outlen = ((outlen + bs - 1) / bs * bs); | ||
427 | } | ||
428 | |||
429 | /* convert iovecs of output buffers into scatterlists */ | ||
430 | while (iov_iter_count(&msg->msg_iter)) { | ||
431 | size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter), | ||
432 | (outlen - usedpages)); | ||
433 | |||
434 | /* make one iovec available as scatterlist */ | ||
435 | err = af_alg_make_sg(&ctx->rsgl[cnt], &msg->msg_iter, | ||
436 | seglen); | ||
437 | if (err < 0) | ||
438 | goto unlock; | ||
439 | usedpages += err; | ||
440 | /* chain the new scatterlist with initial list */ | ||
441 | if (cnt) | ||
442 | scatterwalk_crypto_chain(ctx->rsgl[0].sg, | ||
443 | ctx->rsgl[cnt].sg, 1, | ||
444 | sg_nents(ctx->rsgl[cnt-1].sg)); | ||
445 | /* we do not need more iovecs as we have sufficient memory */ | ||
446 | if (outlen <= usedpages) | ||
447 | break; | ||
448 | iov_iter_advance(&msg->msg_iter, err); | ||
449 | cnt++; | ||
450 | } | ||
451 | |||
452 | err = -EINVAL; | ||
453 | /* ensure output buffer is sufficiently large */ | ||
454 | if (usedpages < outlen) | ||
455 | goto unlock; | ||
456 | |||
457 | sg_init_table(assoc, ALG_MAX_PAGES); | ||
458 | assoclen = ctx->aead_assoclen; | ||
459 | /* | ||
460 | * Split scatterlist into two: first part becomes AD, second part | ||
461 | * is plaintext / ciphertext. The first part is assigned to assoc | ||
462 | * scatterlist. When this loop finishes, sg points to the start of the | ||
463 | * plaintext / ciphertext. | ||
464 | */ | ||
465 | for (i = 0; i < ctx->tsgl.cur; i++) { | ||
466 | sg = sgl->sg + i; | ||
467 | if (sg->length <= assoclen) { | ||
468 | /* AD is larger than one page */ | ||
469 | sg_set_page(assoc + i, sg_page(sg), | ||
470 | sg->length, sg->offset); | ||
471 | assoclen -= sg->length; | ||
472 | if (i >= ctx->tsgl.cur) | ||
473 | goto unlock; | ||
474 | } else if (!assoclen) { | ||
475 | /* current page is to start of plaintext / ciphertext */ | ||
476 | if (i) | ||
477 | /* AD terminates at page boundary */ | ||
478 | sg_mark_end(assoc + i - 1); | ||
479 | else | ||
480 | /* AD size is zero */ | ||
481 | sg_mark_end(assoc); | ||
482 | break; | ||
483 | } else { | ||
484 | /* AD does not terminate at page boundary */ | ||
485 | sg_set_page(assoc + i, sg_page(sg), | ||
486 | assoclen, sg->offset); | ||
487 | sg_mark_end(assoc + i); | ||
488 | /* plaintext / ciphertext starts after AD */ | ||
489 | sg->length -= assoclen; | ||
490 | sg->offset += assoclen; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | |||
495 | aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen); | ||
496 | aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl[0].sg, used, | ||
497 | ctx->iv); | ||
498 | |||
499 | err = af_alg_wait_for_completion(ctx->enc ? | ||
500 | crypto_aead_encrypt(&ctx->aead_req) : | ||
501 | crypto_aead_decrypt(&ctx->aead_req), | ||
502 | &ctx->completion); | ||
503 | |||
504 | if (err) { | ||
505 | /* EBADMSG implies a valid cipher operation took place */ | ||
506 | if (err == -EBADMSG) | ||
507 | aead_put_sgl(sk); | ||
508 | goto unlock; | ||
509 | } | ||
510 | |||
511 | aead_put_sgl(sk); | ||
512 | |||
513 | err = 0; | ||
514 | |||
515 | unlock: | ||
516 | for (i = 0; i < cnt; i++) | ||
517 | af_alg_free_sg(&ctx->rsgl[i]); | ||
518 | |||
519 | aead_wmem_wakeup(sk); | ||
520 | release_sock(sk); | ||
521 | |||
522 | return err ? err : outlen; | ||
523 | } | ||
524 | |||
525 | static unsigned int aead_poll(struct file *file, struct socket *sock, | ||
526 | poll_table *wait) | ||
527 | { | ||
528 | struct sock *sk = sock->sk; | ||
529 | struct alg_sock *ask = alg_sk(sk); | ||
530 | struct aead_ctx *ctx = ask->private; | ||
531 | unsigned int mask; | ||
532 | |||
533 | sock_poll_wait(file, sk_sleep(sk), wait); | ||
534 | mask = 0; | ||
535 | |||
536 | if (!ctx->more) | ||
537 | mask |= POLLIN | POLLRDNORM; | ||
538 | |||
539 | if (aead_writable(sk)) | ||
540 | mask |= POLLOUT | POLLWRNORM | POLLWRBAND; | ||
541 | |||
542 | return mask; | ||
543 | } | ||
544 | |||
545 | static struct proto_ops algif_aead_ops = { | ||
546 | .family = PF_ALG, | ||
547 | |||
548 | .connect = sock_no_connect, | ||
549 | .socketpair = sock_no_socketpair, | ||
550 | .getname = sock_no_getname, | ||
551 | .ioctl = sock_no_ioctl, | ||
552 | .listen = sock_no_listen, | ||
553 | .shutdown = sock_no_shutdown, | ||
554 | .getsockopt = sock_no_getsockopt, | ||
555 | .mmap = sock_no_mmap, | ||
556 | .bind = sock_no_bind, | ||
557 | .accept = sock_no_accept, | ||
558 | .setsockopt = sock_no_setsockopt, | ||
559 | |||
560 | .release = af_alg_release, | ||
561 | .sendmsg = aead_sendmsg, | ||
562 | .sendpage = aead_sendpage, | ||
563 | .recvmsg = aead_recvmsg, | ||
564 | .poll = aead_poll, | ||
565 | }; | ||
566 | |||
567 | static void *aead_bind(const char *name, u32 type, u32 mask) | ||
568 | { | ||
569 | return crypto_alloc_aead(name, type, mask); | ||
570 | } | ||
571 | |||
572 | static void aead_release(void *private) | ||
573 | { | ||
574 | crypto_free_aead(private); | ||
575 | } | ||
576 | |||
577 | static int aead_setauthsize(void *private, unsigned int authsize) | ||
578 | { | ||
579 | return crypto_aead_setauthsize(private, authsize); | ||
580 | } | ||
581 | |||
582 | static int aead_setkey(void *private, const u8 *key, unsigned int keylen) | ||
583 | { | ||
584 | return crypto_aead_setkey(private, key, keylen); | ||
585 | } | ||
586 | |||
587 | static void aead_sock_destruct(struct sock *sk) | ||
588 | { | ||
589 | struct alg_sock *ask = alg_sk(sk); | ||
590 | struct aead_ctx *ctx = ask->private; | ||
591 | unsigned int ivlen = crypto_aead_ivsize( | ||
592 | crypto_aead_reqtfm(&ctx->aead_req)); | ||
593 | |||
594 | aead_put_sgl(sk); | ||
595 | sock_kzfree_s(sk, ctx->iv, ivlen); | ||
596 | sock_kfree_s(sk, ctx, ctx->len); | ||
597 | af_alg_release_parent(sk); | ||
598 | } | ||
599 | |||
600 | static int aead_accept_parent(void *private, struct sock *sk) | ||
601 | { | ||
602 | struct aead_ctx *ctx; | ||
603 | struct alg_sock *ask = alg_sk(sk); | ||
604 | unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private); | ||
605 | unsigned int ivlen = crypto_aead_ivsize(private); | ||
606 | |||
607 | ctx = sock_kmalloc(sk, len, GFP_KERNEL); | ||
608 | if (!ctx) | ||
609 | return -ENOMEM; | ||
610 | memset(ctx, 0, len); | ||
611 | |||
612 | ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL); | ||
613 | if (!ctx->iv) { | ||
614 | sock_kfree_s(sk, ctx, len); | ||
615 | return -ENOMEM; | ||
616 | } | ||
617 | memset(ctx->iv, 0, ivlen); | ||
618 | |||
619 | ctx->len = len; | ||
620 | ctx->used = 0; | ||
621 | ctx->more = 0; | ||
622 | ctx->merge = 0; | ||
623 | ctx->enc = 0; | ||
624 | ctx->tsgl.cur = 0; | ||
625 | ctx->aead_assoclen = 0; | ||
626 | af_alg_init_completion(&ctx->completion); | ||
627 | sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES); | ||
628 | |||
629 | ask->private = ctx; | ||
630 | |||
631 | aead_request_set_tfm(&ctx->aead_req, private); | ||
632 | aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
633 | af_alg_complete, &ctx->completion); | ||
634 | |||
635 | sk->sk_destruct = aead_sock_destruct; | ||
636 | |||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | static const struct af_alg_type algif_type_aead = { | ||
641 | .bind = aead_bind, | ||
642 | .release = aead_release, | ||
643 | .setkey = aead_setkey, | ||
644 | .setauthsize = aead_setauthsize, | ||
645 | .accept = aead_accept_parent, | ||
646 | .ops = &algif_aead_ops, | ||
647 | .name = "aead", | ||
648 | .owner = THIS_MODULE | ||
649 | }; | ||
650 | |||
651 | static int __init algif_aead_init(void) | ||
652 | { | ||
653 | return af_alg_register_type(&algif_type_aead); | ||
654 | } | ||
655 | |||
656 | static void __exit algif_aead_exit(void) | ||
657 | { | ||
658 | int err = af_alg_unregister_type(&algif_type_aead); | ||
659 | BUG_ON(err); | ||
660 | } | ||
661 | |||
662 | module_init(algif_aead_init); | ||
663 | module_exit(algif_aead_exit); | ||
664 | MODULE_LICENSE("GPL"); | ||
665 | MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>"); | ||
666 | MODULE_DESCRIPTION("AEAD kernel crypto API user space interface"); | ||
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c index 3acba0a7cd55..8109aaad2726 100644 --- a/crypto/algif_rng.c +++ b/crypto/algif_rng.c | |||
@@ -87,7 +87,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, | |||
87 | return genlen; | 87 | return genlen; |
88 | 88 | ||
89 | err = memcpy_to_msg(msg, result, len); | 89 | err = memcpy_to_msg(msg, result, len); |
90 | memzero_explicit(result, genlen); | 90 | memzero_explicit(result, len); |
91 | 91 | ||
92 | return err ? err : len; | 92 | return err ? err : len; |
93 | } | 93 | } |
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 6f5bebc9bf01..765fe7609348 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c | |||
@@ -210,7 +210,11 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, | |||
210 | byte_count = DEFAULT_BLK_SZ; | 210 | byte_count = DEFAULT_BLK_SZ; |
211 | } | 211 | } |
212 | 212 | ||
213 | err = byte_count; | 213 | /* |
214 | * Return 0 in case of success as mandated by the kernel | ||
215 | * crypto API interface definition. | ||
216 | */ | ||
217 | err = 0; | ||
214 | 218 | ||
215 | dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", | 219 | dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", |
216 | byte_count, ctx); | 220 | byte_count, ctx); |
diff --git a/crypto/api.c b/crypto/api.c index 2a81e98a0021..afe4610afc4b 100644 --- a/crypto/api.c +++ b/crypto/api.c | |||
@@ -257,6 +257,16 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask) | |||
257 | mask |= CRYPTO_ALG_TESTED; | 257 | mask |= CRYPTO_ALG_TESTED; |
258 | } | 258 | } |
259 | 259 | ||
260 | /* | ||
261 | * If the internal flag is set for a cipher, require a caller to | ||
262 | * to invoke the cipher with the internal flag to use that cipher. | ||
263 | * Also, if a caller wants to allocate a cipher that may or may | ||
264 | * not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and | ||
265 | * !(mask & CRYPTO_ALG_INTERNAL). | ||
266 | */ | ||
267 | if (!((type | mask) & CRYPTO_ALG_INTERNAL)) | ||
268 | mask |= CRYPTO_ALG_INTERNAL; | ||
269 | |||
260 | larval = crypto_larval_lookup(name, type, mask); | 270 | larval = crypto_larval_lookup(name, type, mask); |
261 | if (IS_ERR(larval) || !crypto_is_larval(larval)) | 271 | if (IS_ERR(larval) || !crypto_is_larval(larval)) |
262 | return larval; | 272 | return larval; |
diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 650afac10fd7..b0602ba03111 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c | |||
@@ -168,6 +168,20 @@ static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm) | |||
168 | return ictx->queue; | 168 | return ictx->queue; |
169 | } | 169 | } |
170 | 170 | ||
171 | static inline void cryptd_check_internal(struct rtattr **tb, u32 *type, | ||
172 | u32 *mask) | ||
173 | { | ||
174 | struct crypto_attr_type *algt; | ||
175 | |||
176 | algt = crypto_get_attr_type(tb); | ||
177 | if (IS_ERR(algt)) | ||
178 | return; | ||
179 | if ((algt->type & CRYPTO_ALG_INTERNAL)) | ||
180 | *type |= CRYPTO_ALG_INTERNAL; | ||
181 | if ((algt->mask & CRYPTO_ALG_INTERNAL)) | ||
182 | *mask |= CRYPTO_ALG_INTERNAL; | ||
183 | } | ||
184 | |||
171 | static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent, | 185 | static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent, |
172 | const u8 *key, unsigned int keylen) | 186 | const u8 *key, unsigned int keylen) |
173 | { | 187 | { |
@@ -321,10 +335,13 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl, | |||
321 | struct cryptd_instance_ctx *ctx; | 335 | struct cryptd_instance_ctx *ctx; |
322 | struct crypto_instance *inst; | 336 | struct crypto_instance *inst; |
323 | struct crypto_alg *alg; | 337 | struct crypto_alg *alg; |
338 | u32 type = CRYPTO_ALG_TYPE_BLKCIPHER; | ||
339 | u32 mask = CRYPTO_ALG_TYPE_MASK; | ||
324 | int err; | 340 | int err; |
325 | 341 | ||
326 | alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, | 342 | cryptd_check_internal(tb, &type, &mask); |
327 | CRYPTO_ALG_TYPE_MASK); | 343 | |
344 | alg = crypto_get_attr_alg(tb, type, mask); | ||
328 | if (IS_ERR(alg)) | 345 | if (IS_ERR(alg)) |
329 | return PTR_ERR(alg); | 346 | return PTR_ERR(alg); |
330 | 347 | ||
@@ -341,7 +358,10 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl, | |||
341 | if (err) | 358 | if (err) |
342 | goto out_free_inst; | 359 | goto out_free_inst; |
343 | 360 | ||
344 | inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; | 361 | type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC; |
362 | if (alg->cra_flags & CRYPTO_ALG_INTERNAL) | ||
363 | type |= CRYPTO_ALG_INTERNAL; | ||
364 | inst->alg.cra_flags = type; | ||
345 | inst->alg.cra_type = &crypto_ablkcipher_type; | 365 | inst->alg.cra_type = &crypto_ablkcipher_type; |
346 | 366 | ||
347 | inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize; | 367 | inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize; |
@@ -577,9 +597,13 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, | |||
577 | struct ahash_instance *inst; | 597 | struct ahash_instance *inst; |
578 | struct shash_alg *salg; | 598 | struct shash_alg *salg; |
579 | struct crypto_alg *alg; | 599 | struct crypto_alg *alg; |
600 | u32 type = 0; | ||
601 | u32 mask = 0; | ||
580 | int err; | 602 | int err; |
581 | 603 | ||
582 | salg = shash_attr_alg(tb[1], 0, 0); | 604 | cryptd_check_internal(tb, &type, &mask); |
605 | |||
606 | salg = shash_attr_alg(tb[1], type, mask); | ||
583 | if (IS_ERR(salg)) | 607 | if (IS_ERR(salg)) |
584 | return PTR_ERR(salg); | 608 | return PTR_ERR(salg); |
585 | 609 | ||
@@ -598,7 +622,10 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, | |||
598 | if (err) | 622 | if (err) |
599 | goto out_free_inst; | 623 | goto out_free_inst; |
600 | 624 | ||
601 | inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; | 625 | type = CRYPTO_ALG_ASYNC; |
626 | if (alg->cra_flags & CRYPTO_ALG_INTERNAL) | ||
627 | type |= CRYPTO_ALG_INTERNAL; | ||
628 | inst->alg.halg.base.cra_flags = type; | ||
602 | 629 | ||
603 | inst->alg.halg.digestsize = salg->digestsize; | 630 | inst->alg.halg.digestsize = salg->digestsize; |
604 | inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); | 631 | inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); |
@@ -719,10 +746,13 @@ static int cryptd_create_aead(struct crypto_template *tmpl, | |||
719 | struct aead_instance_ctx *ctx; | 746 | struct aead_instance_ctx *ctx; |
720 | struct crypto_instance *inst; | 747 | struct crypto_instance *inst; |
721 | struct crypto_alg *alg; | 748 | struct crypto_alg *alg; |
749 | u32 type = CRYPTO_ALG_TYPE_AEAD; | ||
750 | u32 mask = CRYPTO_ALG_TYPE_MASK; | ||
722 | int err; | 751 | int err; |
723 | 752 | ||
724 | alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD, | 753 | cryptd_check_internal(tb, &type, &mask); |
725 | CRYPTO_ALG_TYPE_MASK); | 754 | |
755 | alg = crypto_get_attr_alg(tb, type, mask); | ||
726 | if (IS_ERR(alg)) | 756 | if (IS_ERR(alg)) |
727 | return PTR_ERR(alg); | 757 | return PTR_ERR(alg); |
728 | 758 | ||
@@ -739,7 +769,10 @@ static int cryptd_create_aead(struct crypto_template *tmpl, | |||
739 | if (err) | 769 | if (err) |
740 | goto out_free_inst; | 770 | goto out_free_inst; |
741 | 771 | ||
742 | inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; | 772 | type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; |
773 | if (alg->cra_flags & CRYPTO_ALG_INTERNAL) | ||
774 | type |= CRYPTO_ALG_INTERNAL; | ||
775 | inst->alg.cra_flags = type; | ||
743 | inst->alg.cra_type = alg->cra_type; | 776 | inst->alg.cra_type = alg->cra_type; |
744 | inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx); | 777 | inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx); |
745 | inst->alg.cra_init = cryptd_aead_init_tfm; | 778 | inst->alg.cra_init = cryptd_aead_init_tfm; |
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index c5148a35ae0a..41dfe762b7fb 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c | |||
@@ -62,10 +62,14 @@ static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact) | |||
62 | else if (!exact) | 62 | else if (!exact) |
63 | match = !strcmp(q->cra_name, p->cru_name); | 63 | match = !strcmp(q->cra_name, p->cru_name); |
64 | 64 | ||
65 | if (match) { | 65 | if (!match) |
66 | alg = q; | 66 | continue; |
67 | break; | 67 | |
68 | } | 68 | if (unlikely(!crypto_mod_get(q))) |
69 | continue; | ||
70 | |||
71 | alg = q; | ||
72 | break; | ||
69 | } | 73 | } |
70 | 74 | ||
71 | up_read(&crypto_alg_sem); | 75 | up_read(&crypto_alg_sem); |
@@ -205,9 +209,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, | |||
205 | if (!alg) | 209 | if (!alg) |
206 | return -ENOENT; | 210 | return -ENOENT; |
207 | 211 | ||
212 | err = -ENOMEM; | ||
208 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); | 213 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); |
209 | if (!skb) | 214 | if (!skb) |
210 | return -ENOMEM; | 215 | goto drop_alg; |
211 | 216 | ||
212 | info.in_skb = in_skb; | 217 | info.in_skb = in_skb; |
213 | info.out_skb = skb; | 218 | info.out_skb = skb; |
@@ -215,6 +220,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, | |||
215 | info.nlmsg_flags = 0; | 220 | info.nlmsg_flags = 0; |
216 | 221 | ||
217 | err = crypto_report_alg(alg, &info); | 222 | err = crypto_report_alg(alg, &info); |
223 | |||
224 | drop_alg: | ||
225 | crypto_mod_put(alg); | ||
226 | |||
218 | if (err) | 227 | if (err) |
219 | return err; | 228 | return err; |
220 | 229 | ||
@@ -284,6 +293,7 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
284 | 293 | ||
285 | up_write(&crypto_alg_sem); | 294 | up_write(&crypto_alg_sem); |
286 | 295 | ||
296 | crypto_mod_put(alg); | ||
287 | crypto_remove_final(&list); | 297 | crypto_remove_final(&list); |
288 | 298 | ||
289 | return 0; | 299 | return 0; |
@@ -294,6 +304,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
294 | { | 304 | { |
295 | struct crypto_alg *alg; | 305 | struct crypto_alg *alg; |
296 | struct crypto_user_alg *p = nlmsg_data(nlh); | 306 | struct crypto_user_alg *p = nlmsg_data(nlh); |
307 | int err; | ||
297 | 308 | ||
298 | if (!netlink_capable(skb, CAP_NET_ADMIN)) | 309 | if (!netlink_capable(skb, CAP_NET_ADMIN)) |
299 | return -EPERM; | 310 | return -EPERM; |
@@ -310,13 +321,19 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
310 | * if we try to unregister. Unregistering such an algorithm without | 321 | * if we try to unregister. Unregistering such an algorithm without |
311 | * removing the module is not possible, so we restrict to crypto | 322 | * removing the module is not possible, so we restrict to crypto |
312 | * instances that are build from templates. */ | 323 | * instances that are build from templates. */ |
324 | err = -EINVAL; | ||
313 | if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) | 325 | if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE)) |
314 | return -EINVAL; | 326 | goto drop_alg; |
315 | 327 | ||
316 | if (atomic_read(&alg->cra_refcnt) != 1) | 328 | err = -EBUSY; |
317 | return -EBUSY; | 329 | if (atomic_read(&alg->cra_refcnt) > 2) |
330 | goto drop_alg; | ||
318 | 331 | ||
319 | return crypto_unregister_instance(alg); | 332 | err = crypto_unregister_instance((struct crypto_instance *)alg); |
333 | |||
334 | drop_alg: | ||
335 | crypto_mod_put(alg); | ||
336 | return err; | ||
320 | } | 337 | } |
321 | 338 | ||
322 | static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type, | 339 | static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type, |
@@ -395,8 +412,10 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
395 | return -EINVAL; | 412 | return -EINVAL; |
396 | 413 | ||
397 | alg = crypto_alg_match(p, exact); | 414 | alg = crypto_alg_match(p, exact); |
398 | if (alg) | 415 | if (alg) { |
416 | crypto_mod_put(alg); | ||
399 | return -EEXIST; | 417 | return -EEXIST; |
418 | } | ||
400 | 419 | ||
401 | if (strlen(p->cru_driver_name)) | 420 | if (strlen(p->cru_driver_name)) |
402 | name = p->cru_driver_name; | 421 | name = p->cru_driver_name; |
diff --git a/crypto/drbg.c b/crypto/drbg.c index d8ff16e5c322..b69409cb7e6a 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c | |||
@@ -119,19 +119,19 @@ static const struct drbg_core drbg_cores[] = { | |||
119 | .statelen = 32, /* 256 bits as defined in 10.2.1 */ | 119 | .statelen = 32, /* 256 bits as defined in 10.2.1 */ |
120 | .blocklen_bytes = 16, | 120 | .blocklen_bytes = 16, |
121 | .cra_name = "ctr_aes128", | 121 | .cra_name = "ctr_aes128", |
122 | .backend_cra_name = "ecb(aes)", | 122 | .backend_cra_name = "aes", |
123 | }, { | 123 | }, { |
124 | .flags = DRBG_CTR | DRBG_STRENGTH192, | 124 | .flags = DRBG_CTR | DRBG_STRENGTH192, |
125 | .statelen = 40, /* 320 bits as defined in 10.2.1 */ | 125 | .statelen = 40, /* 320 bits as defined in 10.2.1 */ |
126 | .blocklen_bytes = 16, | 126 | .blocklen_bytes = 16, |
127 | .cra_name = "ctr_aes192", | 127 | .cra_name = "ctr_aes192", |
128 | .backend_cra_name = "ecb(aes)", | 128 | .backend_cra_name = "aes", |
129 | }, { | 129 | }, { |
130 | .flags = DRBG_CTR | DRBG_STRENGTH256, | 130 | .flags = DRBG_CTR | DRBG_STRENGTH256, |
131 | .statelen = 48, /* 384 bits as defined in 10.2.1 */ | 131 | .statelen = 48, /* 384 bits as defined in 10.2.1 */ |
132 | .blocklen_bytes = 16, | 132 | .blocklen_bytes = 16, |
133 | .cra_name = "ctr_aes256", | 133 | .cra_name = "ctr_aes256", |
134 | .backend_cra_name = "ecb(aes)", | 134 | .backend_cra_name = "aes", |
135 | }, | 135 | }, |
136 | #endif /* CONFIG_CRYPTO_DRBG_CTR */ | 136 | #endif /* CONFIG_CRYPTO_DRBG_CTR */ |
137 | #ifdef CONFIG_CRYPTO_DRBG_HASH | 137 | #ifdef CONFIG_CRYPTO_DRBG_HASH |
@@ -308,9 +308,6 @@ static int drbg_ctr_bcc(struct drbg_state *drbg, | |||
308 | 308 | ||
309 | drbg_string_fill(&data, out, drbg_blocklen(drbg)); | 309 | drbg_string_fill(&data, out, drbg_blocklen(drbg)); |
310 | 310 | ||
311 | /* 10.4.3 step 1 */ | ||
312 | memset(out, 0, drbg_blocklen(drbg)); | ||
313 | |||
314 | /* 10.4.3 step 2 / 4 */ | 311 | /* 10.4.3 step 2 / 4 */ |
315 | list_for_each_entry(curr, in, list) { | 312 | list_for_each_entry(curr, in, list) { |
316 | const unsigned char *pos = curr->buf; | 313 | const unsigned char *pos = curr->buf; |
@@ -406,7 +403,6 @@ static int drbg_ctr_df(struct drbg_state *drbg, | |||
406 | 403 | ||
407 | memset(pad, 0, drbg_blocklen(drbg)); | 404 | memset(pad, 0, drbg_blocklen(drbg)); |
408 | memset(iv, 0, drbg_blocklen(drbg)); | 405 | memset(iv, 0, drbg_blocklen(drbg)); |
409 | memset(temp, 0, drbg_statelen(drbg)); | ||
410 | 406 | ||
411 | /* 10.4.2 step 1 is implicit as we work byte-wise */ | 407 | /* 10.4.2 step 1 is implicit as we work byte-wise */ |
412 | 408 | ||
@@ -523,7 +519,6 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed, | |||
523 | unsigned int len = 0; | 519 | unsigned int len = 0; |
524 | struct drbg_string cipherin; | 520 | struct drbg_string cipherin; |
525 | 521 | ||
526 | memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg)); | ||
527 | if (3 > reseed) | 522 | if (3 > reseed) |
528 | memset(df_data, 0, drbg_statelen(drbg)); | 523 | memset(df_data, 0, drbg_statelen(drbg)); |
529 | 524 | ||
@@ -585,8 +580,6 @@ static int drbg_ctr_generate(struct drbg_state *drbg, | |||
585 | int ret = 0; | 580 | int ret = 0; |
586 | struct drbg_string data; | 581 | struct drbg_string data; |
587 | 582 | ||
588 | memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); | ||
589 | |||
590 | /* 10.2.1.5.2 step 2 */ | 583 | /* 10.2.1.5.2 step 2 */ |
591 | if (addtl && !list_empty(addtl)) { | 584 | if (addtl && !list_empty(addtl)) { |
592 | ret = drbg_ctr_update(drbg, addtl, 2); | 585 | ret = drbg_ctr_update(drbg, addtl, 2); |
@@ -761,7 +754,6 @@ static struct drbg_state_ops drbg_hmac_ops = { | |||
761 | .generate = drbg_hmac_generate, | 754 | .generate = drbg_hmac_generate, |
762 | .crypto_init = drbg_init_hash_kernel, | 755 | .crypto_init = drbg_init_hash_kernel, |
763 | .crypto_fini = drbg_fini_hash_kernel, | 756 | .crypto_fini = drbg_fini_hash_kernel, |
764 | |||
765 | }; | 757 | }; |
766 | #endif /* CONFIG_CRYPTO_DRBG_HMAC */ | 758 | #endif /* CONFIG_CRYPTO_DRBG_HMAC */ |
767 | 759 | ||
@@ -838,8 +830,6 @@ static int drbg_hash_df(struct drbg_state *drbg, | |||
838 | unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg); | 830 | unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg); |
839 | struct drbg_string data; | 831 | struct drbg_string data; |
840 | 832 | ||
841 | memset(tmp, 0, drbg_blocklen(drbg)); | ||
842 | |||
843 | /* 10.4.1 step 3 */ | 833 | /* 10.4.1 step 3 */ |
844 | input[0] = 1; | 834 | input[0] = 1; |
845 | drbg_cpu_to_be32((outlen * 8), &input[1]); | 835 | drbg_cpu_to_be32((outlen * 8), &input[1]); |
@@ -879,7 +869,6 @@ static int drbg_hash_update(struct drbg_state *drbg, struct list_head *seed, | |||
879 | unsigned char *V = drbg->scratchpad; | 869 | unsigned char *V = drbg->scratchpad; |
880 | unsigned char prefix = DRBG_PREFIX1; | 870 | unsigned char prefix = DRBG_PREFIX1; |
881 | 871 | ||
882 | memset(drbg->scratchpad, 0, drbg_statelen(drbg)); | ||
883 | if (!seed) | 872 | if (!seed) |
884 | return -EINVAL; | 873 | return -EINVAL; |
885 | 874 | ||
@@ -921,9 +910,6 @@ static int drbg_hash_process_addtl(struct drbg_state *drbg, | |||
921 | LIST_HEAD(datalist); | 910 | LIST_HEAD(datalist); |
922 | unsigned char prefix = DRBG_PREFIX2; | 911 | unsigned char prefix = DRBG_PREFIX2; |
923 | 912 | ||
924 | /* this is value w as per documentation */ | ||
925 | memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); | ||
926 | |||
927 | /* 10.1.1.4 step 2 */ | 913 | /* 10.1.1.4 step 2 */ |
928 | if (!addtl || list_empty(addtl)) | 914 | if (!addtl || list_empty(addtl)) |
929 | return 0; | 915 | return 0; |
@@ -959,9 +945,6 @@ static int drbg_hash_hashgen(struct drbg_state *drbg, | |||
959 | struct drbg_string data; | 945 | struct drbg_string data; |
960 | LIST_HEAD(datalist); | 946 | LIST_HEAD(datalist); |
961 | 947 | ||
962 | memset(src, 0, drbg_statelen(drbg)); | ||
963 | memset(dst, 0, drbg_blocklen(drbg)); | ||
964 | |||
965 | /* 10.1.1.4 step hashgen 2 */ | 948 | /* 10.1.1.4 step hashgen 2 */ |
966 | memcpy(src, drbg->V, drbg_statelen(drbg)); | 949 | memcpy(src, drbg->V, drbg_statelen(drbg)); |
967 | 950 | ||
@@ -1018,7 +1001,6 @@ static int drbg_hash_generate(struct drbg_state *drbg, | |||
1018 | len = drbg_hash_hashgen(drbg, buf, buflen); | 1001 | len = drbg_hash_hashgen(drbg, buf, buflen); |
1019 | 1002 | ||
1020 | /* this is the value H as documented in 10.1.1.4 */ | 1003 | /* this is the value H as documented in 10.1.1.4 */ |
1021 | memset(drbg->scratchpad, 0, drbg_blocklen(drbg)); | ||
1022 | /* 10.1.1.4 step 4 */ | 1004 | /* 10.1.1.4 step 4 */ |
1023 | drbg_string_fill(&data1, &prefix, 1); | 1005 | drbg_string_fill(&data1, &prefix, 1); |
1024 | list_add_tail(&data1.list, &datalist); | 1006 | list_add_tail(&data1.list, &datalist); |
@@ -1298,7 +1280,7 @@ static void drbg_restore_shadow(struct drbg_state *drbg, | |||
1298 | * as defined in SP800-90A. The additional input is mixed into | 1280 | * as defined in SP800-90A. The additional input is mixed into |
1299 | * the state in addition to the pulled entropy. | 1281 | * the state in addition to the pulled entropy. |
1300 | * | 1282 | * |
1301 | * return: generated number of bytes | 1283 | * return: 0 when all bytes are generated; < 0 in case of an error |
1302 | */ | 1284 | */ |
1303 | static int drbg_generate(struct drbg_state *drbg, | 1285 | static int drbg_generate(struct drbg_state *drbg, |
1304 | unsigned char *buf, unsigned int buflen, | 1286 | unsigned char *buf, unsigned int buflen, |
@@ -1437,6 +1419,11 @@ static int drbg_generate(struct drbg_state *drbg, | |||
1437 | } | 1419 | } |
1438 | #endif | 1420 | #endif |
1439 | 1421 | ||
1422 | /* | ||
1423 | * All operations were successful, return 0 as mandated by | ||
1424 | * the kernel crypto API interface. | ||
1425 | */ | ||
1426 | len = 0; | ||
1440 | err: | 1427 | err: |
1441 | shadow->d_ops->crypto_fini(shadow); | 1428 | shadow->d_ops->crypto_fini(shadow); |
1442 | drbg_restore_shadow(drbg, &shadow); | 1429 | drbg_restore_shadow(drbg, &shadow); |
@@ -1644,24 +1631,24 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, const unsigned char *key, | |||
1644 | static int drbg_init_sym_kernel(struct drbg_state *drbg) | 1631 | static int drbg_init_sym_kernel(struct drbg_state *drbg) |
1645 | { | 1632 | { |
1646 | int ret = 0; | 1633 | int ret = 0; |
1647 | struct crypto_blkcipher *tfm; | 1634 | struct crypto_cipher *tfm; |
1648 | 1635 | ||
1649 | tfm = crypto_alloc_blkcipher(drbg->core->backend_cra_name, 0, 0); | 1636 | tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0); |
1650 | if (IS_ERR(tfm)) { | 1637 | if (IS_ERR(tfm)) { |
1651 | pr_info("DRBG: could not allocate cipher TFM handle\n"); | 1638 | pr_info("DRBG: could not allocate cipher TFM handle\n"); |
1652 | return PTR_ERR(tfm); | 1639 | return PTR_ERR(tfm); |
1653 | } | 1640 | } |
1654 | BUG_ON(drbg_blocklen(drbg) != crypto_blkcipher_blocksize(tfm)); | 1641 | BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm)); |
1655 | drbg->priv_data = tfm; | 1642 | drbg->priv_data = tfm; |
1656 | return ret; | 1643 | return ret; |
1657 | } | 1644 | } |
1658 | 1645 | ||
1659 | static int drbg_fini_sym_kernel(struct drbg_state *drbg) | 1646 | static int drbg_fini_sym_kernel(struct drbg_state *drbg) |
1660 | { | 1647 | { |
1661 | struct crypto_blkcipher *tfm = | 1648 | struct crypto_cipher *tfm = |
1662 | (struct crypto_blkcipher *)drbg->priv_data; | 1649 | (struct crypto_cipher *)drbg->priv_data; |
1663 | if (tfm) | 1650 | if (tfm) |
1664 | crypto_free_blkcipher(tfm); | 1651 | crypto_free_cipher(tfm); |
1665 | drbg->priv_data = NULL; | 1652 | drbg->priv_data = NULL; |
1666 | return 0; | 1653 | return 0; |
1667 | } | 1654 | } |
@@ -1669,21 +1656,14 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg) | |||
1669 | static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key, | 1656 | static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key, |
1670 | unsigned char *outval, const struct drbg_string *in) | 1657 | unsigned char *outval, const struct drbg_string *in) |
1671 | { | 1658 | { |
1672 | int ret = 0; | 1659 | struct crypto_cipher *tfm = |
1673 | struct scatterlist sg_in, sg_out; | 1660 | (struct crypto_cipher *)drbg->priv_data; |
1674 | struct blkcipher_desc desc; | ||
1675 | struct crypto_blkcipher *tfm = | ||
1676 | (struct crypto_blkcipher *)drbg->priv_data; | ||
1677 | |||
1678 | desc.tfm = tfm; | ||
1679 | desc.flags = 0; | ||
1680 | crypto_blkcipher_setkey(tfm, key, (drbg_keylen(drbg))); | ||
1681 | /* there is only component in *in */ | ||
1682 | sg_init_one(&sg_in, in->buf, in->len); | ||
1683 | sg_init_one(&sg_out, outval, drbg_blocklen(drbg)); | ||
1684 | ret = crypto_blkcipher_encrypt(&desc, &sg_out, &sg_in, in->len); | ||
1685 | 1661 | ||
1686 | return ret; | 1662 | crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg))); |
1663 | /* there is only component in *in */ | ||
1664 | BUG_ON(in->len < drbg_blocklen(drbg)); | ||
1665 | crypto_cipher_encrypt_one(tfm, outval, in->buf); | ||
1666 | return 0; | ||
1687 | } | 1667 | } |
1688 | #endif /* CONFIG_CRYPTO_DRBG_CTR */ | 1668 | #endif /* CONFIG_CRYPTO_DRBG_CTR */ |
1689 | 1669 | ||
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index a8e870444ea9..fe5b495a434d 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c | |||
@@ -258,6 +258,20 @@ out_free_inst: | |||
258 | goto out; | 258 | goto out; |
259 | } | 259 | } |
260 | 260 | ||
261 | static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type, | ||
262 | u32 *mask) | ||
263 | { | ||
264 | struct crypto_attr_type *algt; | ||
265 | |||
266 | algt = crypto_get_attr_type(tb); | ||
267 | if (IS_ERR(algt)) | ||
268 | return; | ||
269 | if ((algt->type & CRYPTO_ALG_INTERNAL)) | ||
270 | *type |= CRYPTO_ALG_INTERNAL; | ||
271 | if ((algt->mask & CRYPTO_ALG_INTERNAL)) | ||
272 | *mask |= CRYPTO_ALG_INTERNAL; | ||
273 | } | ||
274 | |||
261 | static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) | 275 | static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) |
262 | { | 276 | { |
263 | struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); | 277 | struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); |
@@ -480,9 +494,13 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, | |||
480 | struct ahash_instance *inst; | 494 | struct ahash_instance *inst; |
481 | struct shash_alg *salg; | 495 | struct shash_alg *salg; |
482 | struct crypto_alg *alg; | 496 | struct crypto_alg *alg; |
497 | u32 type = 0; | ||
498 | u32 mask = 0; | ||
483 | int err; | 499 | int err; |
484 | 500 | ||
485 | salg = shash_attr_alg(tb[1], 0, 0); | 501 | mcryptd_check_internal(tb, &type, &mask); |
502 | |||
503 | salg = shash_attr_alg(tb[1], type, mask); | ||
486 | if (IS_ERR(salg)) | 504 | if (IS_ERR(salg)) |
487 | return PTR_ERR(salg); | 505 | return PTR_ERR(salg); |
488 | 506 | ||
@@ -502,7 +520,10 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, | |||
502 | if (err) | 520 | if (err) |
503 | goto out_free_inst; | 521 | goto out_free_inst; |
504 | 522 | ||
505 | inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; | 523 | type = CRYPTO_ALG_ASYNC; |
524 | if (alg->cra_flags & CRYPTO_ALG_INTERNAL) | ||
525 | type |= CRYPTO_ALG_INTERNAL; | ||
526 | inst->alg.halg.base.cra_flags = type; | ||
506 | 527 | ||
507 | inst->alg.halg.digestsize = salg->digestsize; | 528 | inst->alg.halg.digestsize = salg->digestsize; |
508 | inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); | 529 | inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); |
diff --git a/crypto/proc.c b/crypto/proc.c index 4a0a7aad2204..4ffe73b51612 100644 --- a/crypto/proc.c +++ b/crypto/proc.c | |||
@@ -89,6 +89,9 @@ static int c_show(struct seq_file *m, void *p) | |||
89 | seq_printf(m, "selftest : %s\n", | 89 | seq_printf(m, "selftest : %s\n", |
90 | (alg->cra_flags & CRYPTO_ALG_TESTED) ? | 90 | (alg->cra_flags & CRYPTO_ALG_TESTED) ? |
91 | "passed" : "unknown"); | 91 | "passed" : "unknown"); |
92 | seq_printf(m, "internal : %s\n", | ||
93 | (alg->cra_flags & CRYPTO_ALG_INTERNAL) ? | ||
94 | "yes" : "no"); | ||
92 | 95 | ||
93 | if (alg->cra_flags & CRYPTO_ALG_LARVAL) { | 96 | if (alg->cra_flags & CRYPTO_ALG_LARVAL) { |
94 | seq_printf(m, "type : larval\n"); | 97 | seq_printf(m, "type : larval\n"); |
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index a3e50c37eb6f..39e3acc438d9 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c | |||
@@ -23,111 +23,49 @@ | |||
23 | #include <linux/cryptohash.h> | 23 | #include <linux/cryptohash.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <crypto/sha.h> | 25 | #include <crypto/sha.h> |
26 | #include <crypto/sha1_base.h> | ||
26 | #include <asm/byteorder.h> | 27 | #include <asm/byteorder.h> |
27 | 28 | ||
28 | static int sha1_init(struct shash_desc *desc) | 29 | static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src, |
30 | int blocks) | ||
29 | { | 31 | { |
30 | struct sha1_state *sctx = shash_desc_ctx(desc); | 32 | u32 temp[SHA_WORKSPACE_WORDS]; |
31 | 33 | ||
32 | *sctx = (struct sha1_state){ | 34 | while (blocks--) { |
33 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | 35 | sha_transform(sst->state, src, temp); |
34 | }; | 36 | src += SHA1_BLOCK_SIZE; |
35 | 37 | } | |
36 | return 0; | 38 | memzero_explicit(temp, sizeof(temp)); |
37 | } | 39 | } |
38 | 40 | ||
39 | int crypto_sha1_update(struct shash_desc *desc, const u8 *data, | 41 | int crypto_sha1_update(struct shash_desc *desc, const u8 *data, |
40 | unsigned int len) | 42 | unsigned int len) |
41 | { | 43 | { |
42 | struct sha1_state *sctx = shash_desc_ctx(desc); | 44 | return sha1_base_do_update(desc, data, len, sha1_generic_block_fn); |
43 | unsigned int partial, done; | ||
44 | const u8 *src; | ||
45 | |||
46 | partial = sctx->count % SHA1_BLOCK_SIZE; | ||
47 | sctx->count += len; | ||
48 | done = 0; | ||
49 | src = data; | ||
50 | |||
51 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
52 | u32 temp[SHA_WORKSPACE_WORDS]; | ||
53 | |||
54 | if (partial) { | ||
55 | done = -partial; | ||
56 | memcpy(sctx->buffer + partial, data, | ||
57 | done + SHA1_BLOCK_SIZE); | ||
58 | src = sctx->buffer; | ||
59 | } | ||
60 | |||
61 | do { | ||
62 | sha_transform(sctx->state, src, temp); | ||
63 | done += SHA1_BLOCK_SIZE; | ||
64 | src = data + done; | ||
65 | } while (done + SHA1_BLOCK_SIZE <= len); | ||
66 | |||
67 | memzero_explicit(temp, sizeof(temp)); | ||
68 | partial = 0; | ||
69 | } | ||
70 | memcpy(sctx->buffer + partial, src, len - done); | ||
71 | |||
72 | return 0; | ||
73 | } | 45 | } |
74 | EXPORT_SYMBOL(crypto_sha1_update); | 46 | EXPORT_SYMBOL(crypto_sha1_update); |
75 | 47 | ||
76 | |||
77 | /* Add padding and return the message digest. */ | ||
78 | static int sha1_final(struct shash_desc *desc, u8 *out) | 48 | static int sha1_final(struct shash_desc *desc, u8 *out) |
79 | { | 49 | { |
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | 50 | sha1_base_do_finalize(desc, sha1_generic_block_fn); |
81 | __be32 *dst = (__be32 *)out; | 51 | return sha1_base_finish(desc, out); |
82 | u32 i, index, padlen; | ||
83 | __be64 bits; | ||
84 | static const u8 padding[64] = { 0x80, }; | ||
85 | |||
86 | bits = cpu_to_be64(sctx->count << 3); | ||
87 | |||
88 | /* Pad out to 56 mod 64 */ | ||
89 | index = sctx->count & 0x3f; | ||
90 | padlen = (index < 56) ? (56 - index) : ((64+56) - index); | ||
91 | crypto_sha1_update(desc, padding, padlen); | ||
92 | |||
93 | /* Append length */ | ||
94 | crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
95 | |||
96 | /* Store state in digest */ | ||
97 | for (i = 0; i < 5; i++) | ||
98 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
99 | |||
100 | /* Wipe context */ | ||
101 | memset(sctx, 0, sizeof *sctx); | ||
102 | |||
103 | return 0; | ||
104 | } | 52 | } |
105 | 53 | ||
106 | static int sha1_export(struct shash_desc *desc, void *out) | 54 | int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, |
55 | unsigned int len, u8 *out) | ||
107 | { | 56 | { |
108 | struct sha1_state *sctx = shash_desc_ctx(desc); | 57 | sha1_base_do_update(desc, data, len, sha1_generic_block_fn); |
109 | 58 | return sha1_final(desc, out); | |
110 | memcpy(out, sctx, sizeof(*sctx)); | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
115 | { | ||
116 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
117 | |||
118 | memcpy(sctx, in, sizeof(*sctx)); | ||
119 | return 0; | ||
120 | } | 59 | } |
60 | EXPORT_SYMBOL(crypto_sha1_finup); | ||
121 | 61 | ||
122 | static struct shash_alg alg = { | 62 | static struct shash_alg alg = { |
123 | .digestsize = SHA1_DIGEST_SIZE, | 63 | .digestsize = SHA1_DIGEST_SIZE, |
124 | .init = sha1_init, | 64 | .init = sha1_base_init, |
125 | .update = crypto_sha1_update, | 65 | .update = crypto_sha1_update, |
126 | .final = sha1_final, | 66 | .final = sha1_final, |
127 | .export = sha1_export, | 67 | .finup = crypto_sha1_finup, |
128 | .import = sha1_import, | ||
129 | .descsize = sizeof(struct sha1_state), | 68 | .descsize = sizeof(struct sha1_state), |
130 | .statesize = sizeof(struct sha1_state), | ||
131 | .base = { | 69 | .base = { |
132 | .cra_name = "sha1", | 70 | .cra_name = "sha1", |
133 | .cra_driver_name= "sha1-generic", | 71 | .cra_driver_name= "sha1-generic", |
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index b001ff5c2efc..78431163ed3c 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <crypto/sha.h> | 25 | #include <crypto/sha.h> |
26 | #include <crypto/sha256_base.h> | ||
26 | #include <asm/byteorder.h> | 27 | #include <asm/byteorder.h> |
27 | #include <asm/unaligned.h> | 28 | #include <asm/unaligned.h> |
28 | 29 | ||
@@ -214,138 +215,43 @@ static void sha256_transform(u32 *state, const u8 *input) | |||
214 | memzero_explicit(W, 64 * sizeof(u32)); | 215 | memzero_explicit(W, 64 * sizeof(u32)); |
215 | } | 216 | } |
216 | 217 | ||
217 | static int sha224_init(struct shash_desc *desc) | 218 | static void sha256_generic_block_fn(struct sha256_state *sst, u8 const *src, |
219 | int blocks) | ||
218 | { | 220 | { |
219 | struct sha256_state *sctx = shash_desc_ctx(desc); | 221 | while (blocks--) { |
220 | sctx->state[0] = SHA224_H0; | 222 | sha256_transform(sst->state, src); |
221 | sctx->state[1] = SHA224_H1; | 223 | src += SHA256_BLOCK_SIZE; |
222 | sctx->state[2] = SHA224_H2; | 224 | } |
223 | sctx->state[3] = SHA224_H3; | ||
224 | sctx->state[4] = SHA224_H4; | ||
225 | sctx->state[5] = SHA224_H5; | ||
226 | sctx->state[6] = SHA224_H6; | ||
227 | sctx->state[7] = SHA224_H7; | ||
228 | sctx->count = 0; | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int sha256_init(struct shash_desc *desc) | ||
234 | { | ||
235 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
236 | sctx->state[0] = SHA256_H0; | ||
237 | sctx->state[1] = SHA256_H1; | ||
238 | sctx->state[2] = SHA256_H2; | ||
239 | sctx->state[3] = SHA256_H3; | ||
240 | sctx->state[4] = SHA256_H4; | ||
241 | sctx->state[5] = SHA256_H5; | ||
242 | sctx->state[6] = SHA256_H6; | ||
243 | sctx->state[7] = SHA256_H7; | ||
244 | sctx->count = 0; | ||
245 | |||
246 | return 0; | ||
247 | } | 225 | } |
248 | 226 | ||
249 | int crypto_sha256_update(struct shash_desc *desc, const u8 *data, | 227 | int crypto_sha256_update(struct shash_desc *desc, const u8 *data, |
250 | unsigned int len) | 228 | unsigned int len) |
251 | { | 229 | { |
252 | struct sha256_state *sctx = shash_desc_ctx(desc); | 230 | return sha256_base_do_update(desc, data, len, sha256_generic_block_fn); |
253 | unsigned int partial, done; | ||
254 | const u8 *src; | ||
255 | |||
256 | partial = sctx->count & 0x3f; | ||
257 | sctx->count += len; | ||
258 | done = 0; | ||
259 | src = data; | ||
260 | |||
261 | if ((partial + len) > 63) { | ||
262 | if (partial) { | ||
263 | done = -partial; | ||
264 | memcpy(sctx->buf + partial, data, done + 64); | ||
265 | src = sctx->buf; | ||
266 | } | ||
267 | |||
268 | do { | ||
269 | sha256_transform(sctx->state, src); | ||
270 | done += 64; | ||
271 | src = data + done; | ||
272 | } while (done + 63 < len); | ||
273 | |||
274 | partial = 0; | ||
275 | } | ||
276 | memcpy(sctx->buf + partial, src, len - done); | ||
277 | |||
278 | return 0; | ||
279 | } | 231 | } |
280 | EXPORT_SYMBOL(crypto_sha256_update); | 232 | EXPORT_SYMBOL(crypto_sha256_update); |
281 | 233 | ||
282 | static int sha256_final(struct shash_desc *desc, u8 *out) | 234 | static int sha256_final(struct shash_desc *desc, u8 *out) |
283 | { | 235 | { |
284 | struct sha256_state *sctx = shash_desc_ctx(desc); | 236 | sha256_base_do_finalize(desc, sha256_generic_block_fn); |
285 | __be32 *dst = (__be32 *)out; | 237 | return sha256_base_finish(desc, out); |
286 | __be64 bits; | ||
287 | unsigned int index, pad_len; | ||
288 | int i; | ||
289 | static const u8 padding[64] = { 0x80, }; | ||
290 | |||
291 | /* Save number of bits */ | ||
292 | bits = cpu_to_be64(sctx->count << 3); | ||
293 | |||
294 | /* Pad out to 56 mod 64. */ | ||
295 | index = sctx->count & 0x3f; | ||
296 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | ||
297 | crypto_sha256_update(desc, padding, pad_len); | ||
298 | |||
299 | /* Append length (before padding) */ | ||
300 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
301 | |||
302 | /* Store state in digest */ | ||
303 | for (i = 0; i < 8; i++) | ||
304 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
305 | |||
306 | /* Zeroize sensitive information. */ | ||
307 | memset(sctx, 0, sizeof(*sctx)); | ||
308 | |||
309 | return 0; | ||
310 | } | 238 | } |
311 | 239 | ||
312 | static int sha224_final(struct shash_desc *desc, u8 *hash) | 240 | int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, |
241 | unsigned int len, u8 *hash) | ||
313 | { | 242 | { |
314 | u8 D[SHA256_DIGEST_SIZE]; | 243 | sha256_base_do_update(desc, data, len, sha256_generic_block_fn); |
315 | 244 | return sha256_final(desc, hash); | |
316 | sha256_final(desc, D); | ||
317 | |||
318 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
319 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
320 | |||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static int sha256_export(struct shash_desc *desc, void *out) | ||
325 | { | ||
326 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
327 | |||
328 | memcpy(out, sctx, sizeof(*sctx)); | ||
329 | return 0; | ||
330 | } | ||
331 | |||
332 | static int sha256_import(struct shash_desc *desc, const void *in) | ||
333 | { | ||
334 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
335 | |||
336 | memcpy(sctx, in, sizeof(*sctx)); | ||
337 | return 0; | ||
338 | } | 245 | } |
246 | EXPORT_SYMBOL(crypto_sha256_finup); | ||
339 | 247 | ||
340 | static struct shash_alg sha256_algs[2] = { { | 248 | static struct shash_alg sha256_algs[2] = { { |
341 | .digestsize = SHA256_DIGEST_SIZE, | 249 | .digestsize = SHA256_DIGEST_SIZE, |
342 | .init = sha256_init, | 250 | .init = sha256_base_init, |
343 | .update = crypto_sha256_update, | 251 | .update = crypto_sha256_update, |
344 | .final = sha256_final, | 252 | .final = sha256_final, |
345 | .export = sha256_export, | 253 | .finup = crypto_sha256_finup, |
346 | .import = sha256_import, | ||
347 | .descsize = sizeof(struct sha256_state), | 254 | .descsize = sizeof(struct sha256_state), |
348 | .statesize = sizeof(struct sha256_state), | ||
349 | .base = { | 255 | .base = { |
350 | .cra_name = "sha256", | 256 | .cra_name = "sha256", |
351 | .cra_driver_name= "sha256-generic", | 257 | .cra_driver_name= "sha256-generic", |
@@ -355,9 +261,10 @@ static struct shash_alg sha256_algs[2] = { { | |||
355 | } | 261 | } |
356 | }, { | 262 | }, { |
357 | .digestsize = SHA224_DIGEST_SIZE, | 263 | .digestsize = SHA224_DIGEST_SIZE, |
358 | .init = sha224_init, | 264 | .init = sha224_base_init, |
359 | .update = crypto_sha256_update, | 265 | .update = crypto_sha256_update, |
360 | .final = sha224_final, | 266 | .final = sha256_final, |
267 | .finup = crypto_sha256_finup, | ||
361 | .descsize = sizeof(struct sha256_state), | 268 | .descsize = sizeof(struct sha256_state), |
362 | .base = { | 269 | .base = { |
363 | .cra_name = "sha224", | 270 | .cra_name = "sha224", |
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 1c3c3767e079..eba965d18bfc 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/crypto.h> | 18 | #include <linux/crypto.h> |
19 | #include <linux/types.h> | 19 | #include <linux/types.h> |
20 | #include <crypto/sha.h> | 20 | #include <crypto/sha.h> |
21 | #include <crypto/sha512_base.h> | ||
21 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
22 | #include <asm/byteorder.h> | 23 | #include <asm/byteorder.h> |
23 | #include <asm/unaligned.h> | 24 | #include <asm/unaligned.h> |
@@ -130,125 +131,42 @@ sha512_transform(u64 *state, const u8 *input) | |||
130 | a = b = c = d = e = f = g = h = t1 = t2 = 0; | 131 | a = b = c = d = e = f = g = h = t1 = t2 = 0; |
131 | } | 132 | } |
132 | 133 | ||
133 | static int | 134 | static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, |
134 | sha512_init(struct shash_desc *desc) | 135 | int blocks) |
135 | { | 136 | { |
136 | struct sha512_state *sctx = shash_desc_ctx(desc); | 137 | while (blocks--) { |
137 | sctx->state[0] = SHA512_H0; | 138 | sha512_transform(sst->state, src); |
138 | sctx->state[1] = SHA512_H1; | 139 | src += SHA512_BLOCK_SIZE; |
139 | sctx->state[2] = SHA512_H2; | 140 | } |
140 | sctx->state[3] = SHA512_H3; | ||
141 | sctx->state[4] = SHA512_H4; | ||
142 | sctx->state[5] = SHA512_H5; | ||
143 | sctx->state[6] = SHA512_H6; | ||
144 | sctx->state[7] = SHA512_H7; | ||
145 | sctx->count[0] = sctx->count[1] = 0; | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static int | ||
151 | sha384_init(struct shash_desc *desc) | ||
152 | { | ||
153 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
154 | sctx->state[0] = SHA384_H0; | ||
155 | sctx->state[1] = SHA384_H1; | ||
156 | sctx->state[2] = SHA384_H2; | ||
157 | sctx->state[3] = SHA384_H3; | ||
158 | sctx->state[4] = SHA384_H4; | ||
159 | sctx->state[5] = SHA384_H5; | ||
160 | sctx->state[6] = SHA384_H6; | ||
161 | sctx->state[7] = SHA384_H7; | ||
162 | sctx->count[0] = sctx->count[1] = 0; | ||
163 | |||
164 | return 0; | ||
165 | } | 141 | } |
166 | 142 | ||
167 | int crypto_sha512_update(struct shash_desc *desc, const u8 *data, | 143 | int crypto_sha512_update(struct shash_desc *desc, const u8 *data, |
168 | unsigned int len) | 144 | unsigned int len) |
169 | { | 145 | { |
170 | struct sha512_state *sctx = shash_desc_ctx(desc); | 146 | return sha512_base_do_update(desc, data, len, sha512_generic_block_fn); |
171 | |||
172 | unsigned int i, index, part_len; | ||
173 | |||
174 | /* Compute number of bytes mod 128 */ | ||
175 | index = sctx->count[0] & 0x7f; | ||
176 | |||
177 | /* Update number of bytes */ | ||
178 | if ((sctx->count[0] += len) < len) | ||
179 | sctx->count[1]++; | ||
180 | |||
181 | part_len = 128 - index; | ||
182 | |||
183 | /* Transform as many times as possible. */ | ||
184 | if (len >= part_len) { | ||
185 | memcpy(&sctx->buf[index], data, part_len); | ||
186 | sha512_transform(sctx->state, sctx->buf); | ||
187 | |||
188 | for (i = part_len; i + 127 < len; i+=128) | ||
189 | sha512_transform(sctx->state, &data[i]); | ||
190 | |||
191 | index = 0; | ||
192 | } else { | ||
193 | i = 0; | ||
194 | } | ||
195 | |||
196 | /* Buffer remaining input */ | ||
197 | memcpy(&sctx->buf[index], &data[i], len - i); | ||
198 | |||
199 | return 0; | ||
200 | } | 147 | } |
201 | EXPORT_SYMBOL(crypto_sha512_update); | 148 | EXPORT_SYMBOL(crypto_sha512_update); |
202 | 149 | ||
203 | static int | 150 | static int sha512_final(struct shash_desc *desc, u8 *hash) |
204 | sha512_final(struct shash_desc *desc, u8 *hash) | ||
205 | { | 151 | { |
206 | struct sha512_state *sctx = shash_desc_ctx(desc); | 152 | sha512_base_do_finalize(desc, sha512_generic_block_fn); |
207 | static u8 padding[128] = { 0x80, }; | 153 | return sha512_base_finish(desc, hash); |
208 | __be64 *dst = (__be64 *)hash; | ||
209 | __be64 bits[2]; | ||
210 | unsigned int index, pad_len; | ||
211 | int i; | ||
212 | |||
213 | /* Save number of bits */ | ||
214 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
215 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
216 | |||
217 | /* Pad out to 112 mod 128. */ | ||
218 | index = sctx->count[0] & 0x7f; | ||
219 | pad_len = (index < 112) ? (112 - index) : ((128+112) - index); | ||
220 | crypto_sha512_update(desc, padding, pad_len); | ||
221 | |||
222 | /* Append length (before padding) */ | ||
223 | crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); | ||
224 | |||
225 | /* Store state in digest */ | ||
226 | for (i = 0; i < 8; i++) | ||
227 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
228 | |||
229 | /* Zeroize sensitive information. */ | ||
230 | memset(sctx, 0, sizeof(struct sha512_state)); | ||
231 | |||
232 | return 0; | ||
233 | } | 154 | } |
234 | 155 | ||
235 | static int sha384_final(struct shash_desc *desc, u8 *hash) | 156 | int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, |
157 | unsigned int len, u8 *hash) | ||
236 | { | 158 | { |
237 | u8 D[64]; | 159 | sha512_base_do_update(desc, data, len, sha512_generic_block_fn); |
238 | 160 | return sha512_final(desc, hash); | |
239 | sha512_final(desc, D); | ||
240 | |||
241 | memcpy(hash, D, 48); | ||
242 | memzero_explicit(D, 64); | ||
243 | |||
244 | return 0; | ||
245 | } | 161 | } |
162 | EXPORT_SYMBOL(crypto_sha512_finup); | ||
246 | 163 | ||
247 | static struct shash_alg sha512_algs[2] = { { | 164 | static struct shash_alg sha512_algs[2] = { { |
248 | .digestsize = SHA512_DIGEST_SIZE, | 165 | .digestsize = SHA512_DIGEST_SIZE, |
249 | .init = sha512_init, | 166 | .init = sha512_base_init, |
250 | .update = crypto_sha512_update, | 167 | .update = crypto_sha512_update, |
251 | .final = sha512_final, | 168 | .final = sha512_final, |
169 | .finup = crypto_sha512_finup, | ||
252 | .descsize = sizeof(struct sha512_state), | 170 | .descsize = sizeof(struct sha512_state), |
253 | .base = { | 171 | .base = { |
254 | .cra_name = "sha512", | 172 | .cra_name = "sha512", |
@@ -259,9 +177,10 @@ static struct shash_alg sha512_algs[2] = { { | |||
259 | } | 177 | } |
260 | }, { | 178 | }, { |
261 | .digestsize = SHA384_DIGEST_SIZE, | 179 | .digestsize = SHA384_DIGEST_SIZE, |
262 | .init = sha384_init, | 180 | .init = sha384_base_init, |
263 | .update = crypto_sha512_update, | 181 | .update = crypto_sha512_update, |
264 | .final = sha384_final, | 182 | .final = sha512_final, |
183 | .finup = crypto_sha512_finup, | ||
265 | .descsize = sizeof(struct sha512_state), | 184 | .descsize = sizeof(struct sha512_state), |
266 | .base = { | 185 | .base = { |
267 | .cra_name = "sha384", | 186 | .cra_name = "sha384", |
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 4b9e23fa4204..1a2800107fc8 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c | |||
@@ -1155,9 +1155,9 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs, | |||
1155 | goto out_free_req; | 1155 | goto out_free_req; |
1156 | } | 1156 | } |
1157 | 1157 | ||
1158 | sg_init_table(sg, TVMEMSIZE); | ||
1159 | |||
1160 | k = *keysize + *b_size; | 1158 | k = *keysize + *b_size; |
1159 | sg_init_table(sg, DIV_ROUND_UP(k, PAGE_SIZE)); | ||
1160 | |||
1161 | if (k > PAGE_SIZE) { | 1161 | if (k > PAGE_SIZE) { |
1162 | sg_set_buf(sg, tvmem[0] + *keysize, | 1162 | sg_set_buf(sg, tvmem[0] + *keysize, |
1163 | PAGE_SIZE - *keysize); | 1163 | PAGE_SIZE - *keysize); |
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f4ed6d4205e7..f9bce3d7ee7f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
@@ -1474,11 +1474,11 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, | |||
1474 | for (j = 0; j < template[i].loops; j++) { | 1474 | for (j = 0; j < template[i].loops; j++) { |
1475 | err = crypto_rng_get_bytes(tfm, result, | 1475 | err = crypto_rng_get_bytes(tfm, result, |
1476 | template[i].rlen); | 1476 | template[i].rlen); |
1477 | if (err != template[i].rlen) { | 1477 | if (err < 0) { |
1478 | printk(KERN_ERR "alg: cprng: Failed to obtain " | 1478 | printk(KERN_ERR "alg: cprng: Failed to obtain " |
1479 | "the correct amount of random data for " | 1479 | "the correct amount of random data for " |
1480 | "%s (requested %d, got %d)\n", algo, | 1480 | "%s (requested %d)\n", algo, |
1481 | template[i].rlen, err); | 1481 | template[i].rlen); |
1482 | goto out; | 1482 | goto out; |
1483 | } | 1483 | } |
1484 | } | 1484 | } |
@@ -1505,7 +1505,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, | |||
1505 | struct crypto_aead *tfm; | 1505 | struct crypto_aead *tfm; |
1506 | int err = 0; | 1506 | int err = 0; |
1507 | 1507 | ||
1508 | tfm = crypto_alloc_aead(driver, type, mask); | 1508 | tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1509 | if (IS_ERR(tfm)) { | 1509 | if (IS_ERR(tfm)) { |
1510 | printk(KERN_ERR "alg: aead: Failed to load transform for %s: " | 1510 | printk(KERN_ERR "alg: aead: Failed to load transform for %s: " |
1511 | "%ld\n", driver, PTR_ERR(tfm)); | 1511 | "%ld\n", driver, PTR_ERR(tfm)); |
@@ -1534,7 +1534,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc, | |||
1534 | struct crypto_cipher *tfm; | 1534 | struct crypto_cipher *tfm; |
1535 | int err = 0; | 1535 | int err = 0; |
1536 | 1536 | ||
1537 | tfm = crypto_alloc_cipher(driver, type, mask); | 1537 | tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1538 | if (IS_ERR(tfm)) { | 1538 | if (IS_ERR(tfm)) { |
1539 | printk(KERN_ERR "alg: cipher: Failed to load transform for " | 1539 | printk(KERN_ERR "alg: cipher: Failed to load transform for " |
1540 | "%s: %ld\n", driver, PTR_ERR(tfm)); | 1540 | "%s: %ld\n", driver, PTR_ERR(tfm)); |
@@ -1563,7 +1563,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, | |||
1563 | struct crypto_ablkcipher *tfm; | 1563 | struct crypto_ablkcipher *tfm; |
1564 | int err = 0; | 1564 | int err = 0; |
1565 | 1565 | ||
1566 | tfm = crypto_alloc_ablkcipher(driver, type, mask); | 1566 | tfm = crypto_alloc_ablkcipher(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1567 | if (IS_ERR(tfm)) { | 1567 | if (IS_ERR(tfm)) { |
1568 | printk(KERN_ERR "alg: skcipher: Failed to load transform for " | 1568 | printk(KERN_ERR "alg: skcipher: Failed to load transform for " |
1569 | "%s: %ld\n", driver, PTR_ERR(tfm)); | 1569 | "%s: %ld\n", driver, PTR_ERR(tfm)); |
@@ -1636,7 +1636,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver, | |||
1636 | struct crypto_ahash *tfm; | 1636 | struct crypto_ahash *tfm; |
1637 | int err; | 1637 | int err; |
1638 | 1638 | ||
1639 | tfm = crypto_alloc_ahash(driver, type, mask); | 1639 | tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1640 | if (IS_ERR(tfm)) { | 1640 | if (IS_ERR(tfm)) { |
1641 | printk(KERN_ERR "alg: hash: Failed to load transform for %s: " | 1641 | printk(KERN_ERR "alg: hash: Failed to load transform for %s: " |
1642 | "%ld\n", driver, PTR_ERR(tfm)); | 1642 | "%ld\n", driver, PTR_ERR(tfm)); |
@@ -1664,7 +1664,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, | |||
1664 | if (err) | 1664 | if (err) |
1665 | goto out; | 1665 | goto out; |
1666 | 1666 | ||
1667 | tfm = crypto_alloc_shash(driver, type, mask); | 1667 | tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1668 | if (IS_ERR(tfm)) { | 1668 | if (IS_ERR(tfm)) { |
1669 | printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " | 1669 | printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " |
1670 | "%ld\n", driver, PTR_ERR(tfm)); | 1670 | "%ld\n", driver, PTR_ERR(tfm)); |
@@ -1706,7 +1706,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver, | |||
1706 | struct crypto_rng *rng; | 1706 | struct crypto_rng *rng; |
1707 | int err; | 1707 | int err; |
1708 | 1708 | ||
1709 | rng = crypto_alloc_rng(driver, type, mask); | 1709 | rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1710 | if (IS_ERR(rng)) { | 1710 | if (IS_ERR(rng)) { |
1711 | printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " | 1711 | printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " |
1712 | "%ld\n", driver, PTR_ERR(rng)); | 1712 | "%ld\n", driver, PTR_ERR(rng)); |
@@ -1733,7 +1733,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, | |||
1733 | if (!buf) | 1733 | if (!buf) |
1734 | return -ENOMEM; | 1734 | return -ENOMEM; |
1735 | 1735 | ||
1736 | drng = crypto_alloc_rng(driver, type, mask); | 1736 | drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); |
1737 | if (IS_ERR(drng)) { | 1737 | if (IS_ERR(drng)) { |
1738 | printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for " | 1738 | printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for " |
1739 | "%s\n", driver); | 1739 | "%s\n", driver); |
@@ -1759,7 +1759,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, | |||
1759 | ret = crypto_drbg_get_bytes_addtl(drng, | 1759 | ret = crypto_drbg_get_bytes_addtl(drng, |
1760 | buf, test->expectedlen, &addtl); | 1760 | buf, test->expectedlen, &addtl); |
1761 | } | 1761 | } |
1762 | if (ret <= 0) { | 1762 | if (ret < 0) { |
1763 | printk(KERN_ERR "alg: drbg: could not obtain random data for " | 1763 | printk(KERN_ERR "alg: drbg: could not obtain random data for " |
1764 | "driver %s\n", driver); | 1764 | "driver %s\n", driver); |
1765 | goto outbuf; | 1765 | goto outbuf; |
@@ -1774,7 +1774,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, | |||
1774 | ret = crypto_drbg_get_bytes_addtl(drng, | 1774 | ret = crypto_drbg_get_bytes_addtl(drng, |
1775 | buf, test->expectedlen, &addtl); | 1775 | buf, test->expectedlen, &addtl); |
1776 | } | 1776 | } |
1777 | if (ret <= 0) { | 1777 | if (ret < 0) { |
1778 | printk(KERN_ERR "alg: drbg: could not obtain random data for " | 1778 | printk(KERN_ERR "alg: drbg: could not obtain random data for " |
1779 | "driver %s\n", driver); | 1779 | "driver %s\n", driver); |
1780 | goto outbuf; | 1780 | goto outbuf; |
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index de57b38809c7..f48cf11c655e 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig | |||
@@ -101,6 +101,19 @@ config HW_RANDOM_BCM2835 | |||
101 | 101 | ||
102 | If unsure, say Y. | 102 | If unsure, say Y. |
103 | 103 | ||
104 | config HW_RANDOM_IPROC_RNG200 | ||
105 | tristate "Broadcom iProc RNG200 support" | ||
106 | depends on ARCH_BCM_IPROC | ||
107 | default HW_RANDOM | ||
108 | ---help--- | ||
109 | This driver provides kernel-side support for the RNG200 | ||
110 | hardware found on the Broadcom iProc SoCs. | ||
111 | |||
112 | To compile this driver as a module, choose M here: the | ||
113 | module will be called iproc-rng200 | ||
114 | |||
115 | If unsure, say Y. | ||
116 | |||
104 | config HW_RANDOM_GEODE | 117 | config HW_RANDOM_GEODE |
105 | tristate "AMD Geode HW Random Number Generator support" | 118 | tristate "AMD Geode HW Random Number Generator support" |
106 | depends on X86_32 && PCI | 119 | depends on X86_32 && PCI |
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 0b4cd57f4e24..055bb01510ad 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile | |||
@@ -28,5 +28,6 @@ obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o | |||
28 | obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o | 28 | obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o |
29 | obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o | 29 | obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o |
30 | obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o | 30 | obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o |
31 | obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o | ||
31 | obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o | 32 | obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o |
32 | obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o | 33 | obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o |
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c index ba6a65ac023b..d1494ecd9e11 100644 --- a/drivers/char/hw_random/bcm63xx-rng.c +++ b/drivers/char/hw_random/bcm63xx-rng.c | |||
@@ -13,24 +13,37 @@ | |||
13 | #include <linux/platform_device.h> | 13 | #include <linux/platform_device.h> |
14 | #include <linux/hw_random.h> | 14 | #include <linux/hw_random.h> |
15 | 15 | ||
16 | #include <bcm63xx_io.h> | 16 | #define RNG_CTRL 0x00 |
17 | #include <bcm63xx_regs.h> | 17 | #define RNG_EN (1 << 0) |
18 | |||
19 | #define RNG_STAT 0x04 | ||
20 | #define RNG_AVAIL_MASK (0xff000000) | ||
21 | |||
22 | #define RNG_DATA 0x08 | ||
23 | #define RNG_THRES 0x0c | ||
24 | #define RNG_MASK 0x10 | ||
18 | 25 | ||
19 | struct bcm63xx_rng_priv { | 26 | struct bcm63xx_rng_priv { |
27 | struct hwrng rng; | ||
20 | struct clk *clk; | 28 | struct clk *clk; |
21 | void __iomem *regs; | 29 | void __iomem *regs; |
22 | }; | 30 | }; |
23 | 31 | ||
24 | #define to_rng_priv(rng) ((struct bcm63xx_rng_priv *)rng->priv) | 32 | #define to_rng_priv(rng) container_of(rng, struct bcm63xx_rng_priv, rng) |
25 | 33 | ||
26 | static int bcm63xx_rng_init(struct hwrng *rng) | 34 | static int bcm63xx_rng_init(struct hwrng *rng) |
27 | { | 35 | { |
28 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); | 36 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); |
29 | u32 val; | 37 | u32 val; |
38 | int error; | ||
39 | |||
40 | error = clk_prepare_enable(priv->clk); | ||
41 | if (error) | ||
42 | return error; | ||
30 | 43 | ||
31 | val = bcm_readl(priv->regs + RNG_CTRL); | 44 | val = __raw_readl(priv->regs + RNG_CTRL); |
32 | val |= RNG_EN; | 45 | val |= RNG_EN; |
33 | bcm_writel(val, priv->regs + RNG_CTRL); | 46 | __raw_writel(val, priv->regs + RNG_CTRL); |
34 | 47 | ||
35 | return 0; | 48 | return 0; |
36 | } | 49 | } |
@@ -40,23 +53,25 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng) | |||
40 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); | 53 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); |
41 | u32 val; | 54 | u32 val; |
42 | 55 | ||
43 | val = bcm_readl(priv->regs + RNG_CTRL); | 56 | val = __raw_readl(priv->regs + RNG_CTRL); |
44 | val &= ~RNG_EN; | 57 | val &= ~RNG_EN; |
45 | bcm_writel(val, priv->regs + RNG_CTRL); | 58 | __raw_writel(val, priv->regs + RNG_CTRL); |
59 | |||
60 | clk_didsable_unprepare(prov->clk); | ||
46 | } | 61 | } |
47 | 62 | ||
48 | static int bcm63xx_rng_data_present(struct hwrng *rng, int wait) | 63 | static int bcm63xx_rng_data_present(struct hwrng *rng, int wait) |
49 | { | 64 | { |
50 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); | 65 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); |
51 | 66 | ||
52 | return bcm_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK; | 67 | return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK; |
53 | } | 68 | } |
54 | 69 | ||
55 | static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data) | 70 | static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data) |
56 | { | 71 | { |
57 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); | 72 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); |
58 | 73 | ||
59 | *data = bcm_readl(priv->regs + RNG_DATA); | 74 | *data = __raw_readl(priv->regs + RNG_DATA); |
60 | 75 | ||
61 | return 4; | 76 | return 4; |
62 | } | 77 | } |
@@ -72,94 +87,53 @@ static int bcm63xx_rng_probe(struct platform_device *pdev) | |||
72 | r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 87 | r = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
73 | if (!r) { | 88 | if (!r) { |
74 | dev_err(&pdev->dev, "no iomem resource\n"); | 89 | dev_err(&pdev->dev, "no iomem resource\n"); |
75 | ret = -ENXIO; | 90 | return -ENXIO; |
76 | goto out; | ||
77 | } | 91 | } |
78 | 92 | ||
79 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 93 | priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); |
80 | if (!priv) { | 94 | if (!priv) |
81 | dev_err(&pdev->dev, "no memory for private structure\n"); | 95 | return -ENOMEM; |
82 | ret = -ENOMEM; | 96 | |
83 | goto out; | 97 | priv->rng.name = pdev->name; |
98 | priv->rng.init = bcm63xx_rng_init; | ||
99 | priv->rng.cleanup = bcm63xx_rng_cleanup; | ||
100 | prov->rng.data_present = bcm63xx_rng_data_present; | ||
101 | priv->rng.data_read = bcm63xx_rng_data_read; | ||
102 | |||
103 | priv->clk = devm_clk_get(&pdev->dev, "ipsec"); | ||
104 | if (IS_ERR(priv->clk)) { | ||
105 | error = PTR_ERR(priv->clk); | ||
106 | dev_err(&pdev->dev, "no clock for device: %d\n", error); | ||
107 | return error; | ||
84 | } | 108 | } |
85 | 109 | ||
86 | rng = kzalloc(sizeof(*rng), GFP_KERNEL); | ||
87 | if (!rng) { | ||
88 | dev_err(&pdev->dev, "no memory for rng structure\n"); | ||
89 | ret = -ENOMEM; | ||
90 | goto out_free_priv; | ||
91 | } | ||
92 | |||
93 | platform_set_drvdata(pdev, rng); | ||
94 | rng->priv = (unsigned long)priv; | ||
95 | rng->name = pdev->name; | ||
96 | rng->init = bcm63xx_rng_init; | ||
97 | rng->cleanup = bcm63xx_rng_cleanup; | ||
98 | rng->data_present = bcm63xx_rng_data_present; | ||
99 | rng->data_read = bcm63xx_rng_data_read; | ||
100 | |||
101 | clk = clk_get(&pdev->dev, "ipsec"); | ||
102 | if (IS_ERR(clk)) { | ||
103 | dev_err(&pdev->dev, "no clock for device\n"); | ||
104 | ret = PTR_ERR(clk); | ||
105 | goto out_free_rng; | ||
106 | } | ||
107 | |||
108 | priv->clk = clk; | ||
109 | |||
110 | if (!devm_request_mem_region(&pdev->dev, r->start, | 110 | if (!devm_request_mem_region(&pdev->dev, r->start, |
111 | resource_size(r), pdev->name)) { | 111 | resource_size(r), pdev->name)) { |
112 | dev_err(&pdev->dev, "request mem failed"); | 112 | dev_err(&pdev->dev, "request mem failed"); |
113 | ret = -ENOMEM; | 113 | return -EBUSY; |
114 | goto out_free_rng; | ||
115 | } | 114 | } |
116 | 115 | ||
117 | priv->regs = devm_ioremap_nocache(&pdev->dev, r->start, | 116 | priv->regs = devm_ioremap_nocache(&pdev->dev, r->start, |
118 | resource_size(r)); | 117 | resource_size(r)); |
119 | if (!priv->regs) { | 118 | if (!priv->regs) { |
120 | dev_err(&pdev->dev, "ioremap failed"); | 119 | dev_err(&pdev->dev, "ioremap failed"); |
121 | ret = -ENOMEM; | 120 | return -ENOMEM; |
122 | goto out_free_rng; | ||
123 | } | 121 | } |
124 | 122 | ||
125 | clk_enable(clk); | 123 | error = devm_hwrng_register(&pdev->dev, &priv->rng); |
126 | 124 | if (error) { | |
127 | ret = hwrng_register(rng); | 125 | dev_err(&pdev->dev, "failed to register rng device: %d\n", |
128 | if (ret) { | 126 | error); |
129 | dev_err(&pdev->dev, "failed to register rng device\n"); | 127 | return error; |
130 | goto out_clk_disable; | ||
131 | } | 128 | } |
132 | 129 | ||
133 | dev_info(&pdev->dev, "registered RNG driver\n"); | 130 | dev_info(&pdev->dev, "registered RNG driver\n"); |
134 | 131 | ||
135 | return 0; | 132 | return 0; |
136 | |||
137 | out_clk_disable: | ||
138 | clk_disable(clk); | ||
139 | out_free_rng: | ||
140 | kfree(rng); | ||
141 | out_free_priv: | ||
142 | kfree(priv); | ||
143 | out: | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | static int bcm63xx_rng_remove(struct platform_device *pdev) | ||
148 | { | ||
149 | struct hwrng *rng = platform_get_drvdata(pdev); | ||
150 | struct bcm63xx_rng_priv *priv = to_rng_priv(rng); | ||
151 | |||
152 | hwrng_unregister(rng); | ||
153 | clk_disable(priv->clk); | ||
154 | kfree(priv); | ||
155 | kfree(rng); | ||
156 | |||
157 | return 0; | ||
158 | } | 133 | } |
159 | 134 | ||
160 | static struct platform_driver bcm63xx_rng_driver = { | 135 | static struct platform_driver bcm63xx_rng_driver = { |
161 | .probe = bcm63xx_rng_probe, | 136 | .probe = bcm63xx_rng_probe, |
162 | .remove = bcm63xx_rng_remove, | ||
163 | .driver = { | 137 | .driver = { |
164 | .name = "bcm63xx-rng", | 138 | .name = "bcm63xx-rng", |
165 | }, | 139 | }, |
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 32a8a867f7f8..571ef61f8ea9 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c | |||
@@ -179,7 +179,8 @@ skip_init: | |||
179 | add_early_randomness(rng); | 179 | add_early_randomness(rng); |
180 | 180 | ||
181 | current_quality = rng->quality ? : default_quality; | 181 | current_quality = rng->quality ? : default_quality; |
182 | current_quality &= 1023; | 182 | if (current_quality > 1024) |
183 | current_quality = 1024; | ||
183 | 184 | ||
184 | if (current_quality == 0 && hwrng_fill) | 185 | if (current_quality == 0 && hwrng_fill) |
185 | kthread_stop(hwrng_fill); | 186 | kthread_stop(hwrng_fill); |
@@ -536,6 +537,48 @@ void hwrng_unregister(struct hwrng *rng) | |||
536 | } | 537 | } |
537 | EXPORT_SYMBOL_GPL(hwrng_unregister); | 538 | EXPORT_SYMBOL_GPL(hwrng_unregister); |
538 | 539 | ||
540 | static void devm_hwrng_release(struct device *dev, void *res) | ||
541 | { | ||
542 | hwrng_unregister(*(struct hwrng **)res); | ||
543 | } | ||
544 | |||
545 | static int devm_hwrng_match(struct device *dev, void *res, void *data) | ||
546 | { | ||
547 | struct hwrng **r = res; | ||
548 | |||
549 | if (WARN_ON(!r || !*r)) | ||
550 | return 0; | ||
551 | |||
552 | return *r == data; | ||
553 | } | ||
554 | |||
555 | int devm_hwrng_register(struct device *dev, struct hwrng *rng) | ||
556 | { | ||
557 | struct hwrng **ptr; | ||
558 | int error; | ||
559 | |||
560 | ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL); | ||
561 | if (!ptr) | ||
562 | return -ENOMEM; | ||
563 | |||
564 | error = hwrng_register(rng); | ||
565 | if (error) { | ||
566 | devres_free(ptr); | ||
567 | return error; | ||
568 | } | ||
569 | |||
570 | *ptr = rng; | ||
571 | devres_add(dev, ptr); | ||
572 | return 0; | ||
573 | } | ||
574 | EXPORT_SYMBOL_GPL(devm_hwrng_register); | ||
575 | |||
576 | void devm_hwrng_unregister(struct device *dev, struct hwrng *rng) | ||
577 | { | ||
578 | devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng); | ||
579 | } | ||
580 | EXPORT_SYMBOL_GPL(devm_hwrng_unregister); | ||
581 | |||
539 | static int __init hwrng_modinit(void) | 582 | static int __init hwrng_modinit(void) |
540 | { | 583 | { |
541 | return register_miscdev(); | 584 | return register_miscdev(); |
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c index fed0830bf724..dc4701fd814f 100644 --- a/drivers/char/hw_random/exynos-rng.c +++ b/drivers/char/hw_random/exynos-rng.c | |||
@@ -131,16 +131,7 @@ static int exynos_rng_probe(struct platform_device *pdev) | |||
131 | pm_runtime_use_autosuspend(&pdev->dev); | 131 | pm_runtime_use_autosuspend(&pdev->dev); |
132 | pm_runtime_enable(&pdev->dev); | 132 | pm_runtime_enable(&pdev->dev); |
133 | 133 | ||
134 | return hwrng_register(&exynos_rng->rng); | 134 | return devm_hwrng_register(&pdev->dev, &exynos_rng->rng); |
135 | } | ||
136 | |||
137 | static int exynos_rng_remove(struct platform_device *pdev) | ||
138 | { | ||
139 | struct exynos_rng *exynos_rng = platform_get_drvdata(pdev); | ||
140 | |||
141 | hwrng_unregister(&exynos_rng->rng); | ||
142 | |||
143 | return 0; | ||
144 | } | 135 | } |
145 | 136 | ||
146 | #ifdef CONFIG_PM | 137 | #ifdef CONFIG_PM |
@@ -172,7 +163,6 @@ static struct platform_driver exynos_rng_driver = { | |||
172 | .pm = &exynos_rng_pm_ops, | 163 | .pm = &exynos_rng_pm_ops, |
173 | }, | 164 | }, |
174 | .probe = exynos_rng_probe, | 165 | .probe = exynos_rng_probe, |
175 | .remove = exynos_rng_remove, | ||
176 | }; | 166 | }; |
177 | 167 | ||
178 | module_platform_driver(exynos_rng_driver); | 168 | module_platform_driver(exynos_rng_driver); |
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c new file mode 100644 index 000000000000..3eaf7cb96d36 --- /dev/null +++ b/drivers/char/hw_random/iproc-rng200.c | |||
@@ -0,0 +1,239 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Broadcom Corporation | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License as | ||
6 | * published by the Free Software Foundation version 2. | ||
7 | * | ||
8 | * This program is distributed "as is" WITHOUT ANY WARRANTY of any | ||
9 | * kind, whether express or implied; without even the implied warranty | ||
10 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | /* | ||
14 | * DESCRIPTION: The Broadcom iProc RNG200 Driver | ||
15 | */ | ||
16 | |||
17 | #include <linux/hw_random.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/io.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/of_address.h> | ||
23 | #include <linux/of_platform.h> | ||
24 | #include <linux/platform_device.h> | ||
25 | #include <linux/delay.h> | ||
26 | |||
27 | /* Registers */ | ||
28 | #define RNG_CTRL_OFFSET 0x00 | ||
29 | #define RNG_CTRL_RNG_RBGEN_MASK 0x00001FFF | ||
30 | #define RNG_CTRL_RNG_RBGEN_ENABLE 0x00000001 | ||
31 | #define RNG_CTRL_RNG_RBGEN_DISABLE 0x00000000 | ||
32 | |||
33 | #define RNG_SOFT_RESET_OFFSET 0x04 | ||
34 | #define RNG_SOFT_RESET 0x00000001 | ||
35 | |||
36 | #define RBG_SOFT_RESET_OFFSET 0x08 | ||
37 | #define RBG_SOFT_RESET 0x00000001 | ||
38 | |||
39 | #define RNG_INT_STATUS_OFFSET 0x18 | ||
40 | #define RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK 0x80000000 | ||
41 | #define RNG_INT_STATUS_STARTUP_TRANSITIONS_MET_IRQ_MASK 0x00020000 | ||
42 | #define RNG_INT_STATUS_NIST_FAIL_IRQ_MASK 0x00000020 | ||
43 | #define RNG_INT_STATUS_TOTAL_BITS_COUNT_IRQ_MASK 0x00000001 | ||
44 | |||
45 | #define RNG_FIFO_DATA_OFFSET 0x20 | ||
46 | |||
47 | #define RNG_FIFO_COUNT_OFFSET 0x24 | ||
48 | #define RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK 0x000000FF | ||
49 | |||
50 | struct iproc_rng200_dev { | ||
51 | struct hwrng rng; | ||
52 | void __iomem *base; | ||
53 | }; | ||
54 | |||
55 | #define to_rng_priv(rng) container_of(rng, struct iproc_rng200_dev, rng) | ||
56 | |||
57 | static void iproc_rng200_restart(void __iomem *rng_base) | ||
58 | { | ||
59 | uint32_t val; | ||
60 | |||
61 | /* Disable RBG */ | ||
62 | val = ioread32(rng_base + RNG_CTRL_OFFSET); | ||
63 | val &= ~RNG_CTRL_RNG_RBGEN_MASK; | ||
64 | val |= RNG_CTRL_RNG_RBGEN_DISABLE; | ||
65 | iowrite32(val, rng_base + RNG_CTRL_OFFSET); | ||
66 | |||
67 | /* Clear all interrupt status */ | ||
68 | iowrite32(0xFFFFFFFFUL, rng_base + RNG_INT_STATUS_OFFSET); | ||
69 | |||
70 | /* Reset RNG and RBG */ | ||
71 | val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET); | ||
72 | val |= RBG_SOFT_RESET; | ||
73 | iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET); | ||
74 | |||
75 | val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET); | ||
76 | val |= RNG_SOFT_RESET; | ||
77 | iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET); | ||
78 | |||
79 | val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET); | ||
80 | val &= ~RNG_SOFT_RESET; | ||
81 | iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET); | ||
82 | |||
83 | val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET); | ||
84 | val &= ~RBG_SOFT_RESET; | ||
85 | iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET); | ||
86 | |||
87 | /* Enable RBG */ | ||
88 | val = ioread32(rng_base + RNG_CTRL_OFFSET); | ||
89 | val &= ~RNG_CTRL_RNG_RBGEN_MASK; | ||
90 | val |= RNG_CTRL_RNG_RBGEN_ENABLE; | ||
91 | iowrite32(val, rng_base + RNG_CTRL_OFFSET); | ||
92 | } | ||
93 | |||
94 | static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max, | ||
95 | bool wait) | ||
96 | { | ||
97 | struct iproc_rng200_dev *priv = to_rng_priv(rng); | ||
98 | uint32_t num_remaining = max; | ||
99 | uint32_t status; | ||
100 | |||
101 | #define MAX_RESETS_PER_READ 1 | ||
102 | uint32_t num_resets = 0; | ||
103 | |||
104 | #define MAX_IDLE_TIME (1 * HZ) | ||
105 | unsigned long idle_endtime = jiffies + MAX_IDLE_TIME; | ||
106 | |||
107 | while ((num_remaining > 0) && time_before(jiffies, idle_endtime)) { | ||
108 | |||
109 | /* Is RNG sane? If not, reset it. */ | ||
110 | status = ioread32(priv->base + RNG_INT_STATUS_OFFSET); | ||
111 | if ((status & (RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK | | ||
112 | RNG_INT_STATUS_NIST_FAIL_IRQ_MASK)) != 0) { | ||
113 | |||
114 | if (num_resets >= MAX_RESETS_PER_READ) | ||
115 | return max - num_remaining; | ||
116 | |||
117 | iproc_rng200_restart(priv->base); | ||
118 | num_resets++; | ||
119 | } | ||
120 | |||
121 | /* Are there any random numbers available? */ | ||
122 | if ((ioread32(priv->base + RNG_FIFO_COUNT_OFFSET) & | ||
123 | RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK) > 0) { | ||
124 | |||
125 | if (num_remaining >= sizeof(uint32_t)) { | ||
126 | /* Buffer has room to store entire word */ | ||
127 | *(uint32_t *)buf = ioread32(priv->base + | ||
128 | RNG_FIFO_DATA_OFFSET); | ||
129 | buf += sizeof(uint32_t); | ||
130 | num_remaining -= sizeof(uint32_t); | ||
131 | } else { | ||
132 | /* Buffer can only store partial word */ | ||
133 | uint32_t rnd_number = ioread32(priv->base + | ||
134 | RNG_FIFO_DATA_OFFSET); | ||
135 | memcpy(buf, &rnd_number, num_remaining); | ||
136 | buf += num_remaining; | ||
137 | num_remaining = 0; | ||
138 | } | ||
139 | |||
140 | /* Reset the IDLE timeout */ | ||
141 | idle_endtime = jiffies + MAX_IDLE_TIME; | ||
142 | } else { | ||
143 | if (!wait) | ||
144 | /* Cannot wait, return immediately */ | ||
145 | return max - num_remaining; | ||
146 | |||
147 | /* Can wait, give others chance to run */ | ||
148 | usleep_range(min(num_remaining * 10, 500U), 500); | ||
149 | } | ||
150 | } | ||
151 | |||
152 | return max - num_remaining; | ||
153 | } | ||
154 | |||
155 | static int iproc_rng200_init(struct hwrng *rng) | ||
156 | { | ||
157 | struct iproc_rng200_dev *priv = to_rng_priv(rng); | ||
158 | uint32_t val; | ||
159 | |||
160 | /* Setup RNG. */ | ||
161 | val = ioread32(priv->base + RNG_CTRL_OFFSET); | ||
162 | val &= ~RNG_CTRL_RNG_RBGEN_MASK; | ||
163 | val |= RNG_CTRL_RNG_RBGEN_ENABLE; | ||
164 | iowrite32(val, priv->base + RNG_CTRL_OFFSET); | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static void iproc_rng200_cleanup(struct hwrng *rng) | ||
170 | { | ||
171 | struct iproc_rng200_dev *priv = to_rng_priv(rng); | ||
172 | uint32_t val; | ||
173 | |||
174 | /* Disable RNG hardware */ | ||
175 | val = ioread32(priv->base + RNG_CTRL_OFFSET); | ||
176 | val &= ~RNG_CTRL_RNG_RBGEN_MASK; | ||
177 | val |= RNG_CTRL_RNG_RBGEN_DISABLE; | ||
178 | iowrite32(val, priv->base + RNG_CTRL_OFFSET); | ||
179 | } | ||
180 | |||
181 | static int iproc_rng200_probe(struct platform_device *pdev) | ||
182 | { | ||
183 | struct iproc_rng200_dev *priv; | ||
184 | struct resource *res; | ||
185 | struct device *dev = &pdev->dev; | ||
186 | int ret; | ||
187 | |||
188 | priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); | ||
189 | if (!priv) | ||
190 | return -ENOMEM; | ||
191 | |||
192 | /* Map peripheral */ | ||
193 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
194 | if (!res) { | ||
195 | dev_err(dev, "failed to get rng resources\n"); | ||
196 | return -EINVAL; | ||
197 | } | ||
198 | |||
199 | priv->base = devm_ioremap_resource(dev, res); | ||
200 | if (IS_ERR(priv->base)) { | ||
201 | dev_err(dev, "failed to remap rng regs\n"); | ||
202 | return PTR_ERR(priv->base); | ||
203 | } | ||
204 | |||
205 | priv->rng.name = "iproc-rng200", | ||
206 | priv->rng.read = iproc_rng200_read, | ||
207 | priv->rng.init = iproc_rng200_init, | ||
208 | priv->rng.cleanup = iproc_rng200_cleanup, | ||
209 | |||
210 | /* Register driver */ | ||
211 | ret = devm_hwrng_register(dev, &priv->rng); | ||
212 | if (ret) { | ||
213 | dev_err(dev, "hwrng registration failed\n"); | ||
214 | return ret; | ||
215 | } | ||
216 | |||
217 | dev_info(dev, "hwrng registered\n"); | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | static const struct of_device_id iproc_rng200_of_match[] = { | ||
223 | { .compatible = "brcm,iproc-rng200", }, | ||
224 | {}, | ||
225 | }; | ||
226 | MODULE_DEVICE_TABLE(of, iproc_rng200_of_match); | ||
227 | |||
228 | static struct platform_driver iproc_rng200_driver = { | ||
229 | .driver = { | ||
230 | .name = "iproc-rng200", | ||
231 | .of_match_table = iproc_rng200_of_match, | ||
232 | }, | ||
233 | .probe = iproc_rng200_probe, | ||
234 | }; | ||
235 | module_platform_driver(iproc_rng200_driver); | ||
236 | |||
237 | MODULE_AUTHOR("Broadcom"); | ||
238 | MODULE_DESCRIPTION("iProc RNG200 Random Number Generator driver"); | ||
239 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c index cea1c703d62f..96fb986402eb 100644 --- a/drivers/char/hw_random/msm-rng.c +++ b/drivers/char/hw_random/msm-rng.c | |||
@@ -157,7 +157,7 @@ static int msm_rng_probe(struct platform_device *pdev) | |||
157 | rng->hwrng.cleanup = msm_rng_cleanup, | 157 | rng->hwrng.cleanup = msm_rng_cleanup, |
158 | rng->hwrng.read = msm_rng_read, | 158 | rng->hwrng.read = msm_rng_read, |
159 | 159 | ||
160 | ret = hwrng_register(&rng->hwrng); | 160 | ret = devm_hwrng_register(&pdev->dev, &rng->hwrng); |
161 | if (ret) { | 161 | if (ret) { |
162 | dev_err(&pdev->dev, "failed to register hwrng\n"); | 162 | dev_err(&pdev->dev, "failed to register hwrng\n"); |
163 | return ret; | 163 | return ret; |
@@ -166,14 +166,6 @@ static int msm_rng_probe(struct platform_device *pdev) | |||
166 | return 0; | 166 | return 0; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int msm_rng_remove(struct platform_device *pdev) | ||
170 | { | ||
171 | struct msm_rng *rng = platform_get_drvdata(pdev); | ||
172 | |||
173 | hwrng_unregister(&rng->hwrng); | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | static const struct of_device_id msm_rng_of_match[] = { | 169 | static const struct of_device_id msm_rng_of_match[] = { |
178 | { .compatible = "qcom,prng", }, | 170 | { .compatible = "qcom,prng", }, |
179 | {} | 171 | {} |
@@ -182,7 +174,6 @@ MODULE_DEVICE_TABLE(of, msm_rng_of_match); | |||
182 | 174 | ||
183 | static struct platform_driver msm_rng_driver = { | 175 | static struct platform_driver msm_rng_driver = { |
184 | .probe = msm_rng_probe, | 176 | .probe = msm_rng_probe, |
185 | .remove = msm_rng_remove, | ||
186 | .driver = { | 177 | .driver = { |
187 | .name = KBUILD_MODNAME, | 178 | .name = KBUILD_MODNAME, |
188 | .of_match_table = of_match_ptr(msm_rng_of_match), | 179 | .of_match_table = of_match_ptr(msm_rng_of_match), |
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c index be1c3f607398..6234a4a19b56 100644 --- a/drivers/char/hw_random/octeon-rng.c +++ b/drivers/char/hw_random/octeon-rng.c | |||
@@ -105,7 +105,7 @@ static int octeon_rng_probe(struct platform_device *pdev) | |||
105 | return 0; | 105 | return 0; |
106 | } | 106 | } |
107 | 107 | ||
108 | static int __exit octeon_rng_remove(struct platform_device *pdev) | 108 | static int octeon_rng_remove(struct platform_device *pdev) |
109 | { | 109 | { |
110 | struct hwrng *rng = platform_get_drvdata(pdev); | 110 | struct hwrng *rng = platform_get_drvdata(pdev); |
111 | 111 | ||
@@ -119,7 +119,7 @@ static struct platform_driver octeon_rng_driver = { | |||
119 | .name = "octeon_rng", | 119 | .name = "octeon_rng", |
120 | }, | 120 | }, |
121 | .probe = octeon_rng_probe, | 121 | .probe = octeon_rng_probe, |
122 | .remove = __exit_p(octeon_rng_remove), | 122 | .remove = octeon_rng_remove, |
123 | }; | 123 | }; |
124 | 124 | ||
125 | module_platform_driver(octeon_rng_driver); | 125 | module_platform_driver(octeon_rng_driver); |
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index d14dcf788f17..8a1432e8bb80 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c | |||
@@ -236,7 +236,7 @@ static int omap4_rng_init(struct omap_rng_dev *priv) | |||
236 | u32 val; | 236 | u32 val; |
237 | 237 | ||
238 | /* Return if RNG is already running. */ | 238 | /* Return if RNG is already running. */ |
239 | if (omap_rng_read(priv, RNG_CONFIG_REG) & RNG_CONTROL_ENABLE_TRNG_MASK) | 239 | if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK) |
240 | return 0; | 240 | return 0; |
241 | 241 | ||
242 | val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT; | 242 | val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT; |
@@ -262,7 +262,7 @@ static void omap4_rng_cleanup(struct omap_rng_dev *priv) | |||
262 | 262 | ||
263 | val = omap_rng_read(priv, RNG_CONTROL_REG); | 263 | val = omap_rng_read(priv, RNG_CONTROL_REG); |
264 | val &= ~RNG_CONTROL_ENABLE_TRNG_MASK; | 264 | val &= ~RNG_CONTROL_ENABLE_TRNG_MASK; |
265 | omap_rng_write(priv, RNG_CONFIG_REG, val); | 265 | omap_rng_write(priv, RNG_CONTROL_REG, val); |
266 | } | 266 | } |
267 | 267 | ||
268 | static irqreturn_t omap4_rng_irq(int irq, void *dev_id) | 268 | static irqreturn_t omap4_rng_irq(int irq, void *dev_id) |
@@ -408,7 +408,7 @@ err_ioremap: | |||
408 | return ret; | 408 | return ret; |
409 | } | 409 | } |
410 | 410 | ||
411 | static int __exit omap_rng_remove(struct platform_device *pdev) | 411 | static int omap_rng_remove(struct platform_device *pdev) |
412 | { | 412 | { |
413 | struct omap_rng_dev *priv = platform_get_drvdata(pdev); | 413 | struct omap_rng_dev *priv = platform_get_drvdata(pdev); |
414 | 414 | ||
@@ -422,9 +422,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev) | |||
422 | return 0; | 422 | return 0; |
423 | } | 423 | } |
424 | 424 | ||
425 | #ifdef CONFIG_PM_SLEEP | 425 | static int __maybe_unused omap_rng_suspend(struct device *dev) |
426 | |||
427 | static int omap_rng_suspend(struct device *dev) | ||
428 | { | 426 | { |
429 | struct omap_rng_dev *priv = dev_get_drvdata(dev); | 427 | struct omap_rng_dev *priv = dev_get_drvdata(dev); |
430 | 428 | ||
@@ -434,7 +432,7 @@ static int omap_rng_suspend(struct device *dev) | |||
434 | return 0; | 432 | return 0; |
435 | } | 433 | } |
436 | 434 | ||
437 | static int omap_rng_resume(struct device *dev) | 435 | static int __maybe_unused omap_rng_resume(struct device *dev) |
438 | { | 436 | { |
439 | struct omap_rng_dev *priv = dev_get_drvdata(dev); | 437 | struct omap_rng_dev *priv = dev_get_drvdata(dev); |
440 | 438 | ||
@@ -445,22 +443,15 @@ static int omap_rng_resume(struct device *dev) | |||
445 | } | 443 | } |
446 | 444 | ||
447 | static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume); | 445 | static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume); |
448 | #define OMAP_RNG_PM (&omap_rng_pm) | ||
449 | |||
450 | #else | ||
451 | |||
452 | #define OMAP_RNG_PM NULL | ||
453 | |||
454 | #endif | ||
455 | 446 | ||
456 | static struct platform_driver omap_rng_driver = { | 447 | static struct platform_driver omap_rng_driver = { |
457 | .driver = { | 448 | .driver = { |
458 | .name = "omap_rng", | 449 | .name = "omap_rng", |
459 | .pm = OMAP_RNG_PM, | 450 | .pm = &omap_rng_pm, |
460 | .of_match_table = of_match_ptr(omap_rng_of_match), | 451 | .of_match_table = of_match_ptr(omap_rng_of_match), |
461 | }, | 452 | }, |
462 | .probe = omap_rng_probe, | 453 | .probe = omap_rng_probe, |
463 | .remove = __exit_p(omap_rng_remove), | 454 | .remove = omap_rng_remove, |
464 | }; | 455 | }; |
465 | 456 | ||
466 | module_platform_driver(omap_rng_driver); | 457 | module_platform_driver(omap_rng_driver); |
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index bcf86f91800a..63ce51d09af1 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c | |||
@@ -61,13 +61,13 @@ static struct hwrng pseries_rng = { | |||
61 | .read = pseries_rng_read, | 61 | .read = pseries_rng_read, |
62 | }; | 62 | }; |
63 | 63 | ||
64 | static int __init pseries_rng_probe(struct vio_dev *dev, | 64 | static int pseries_rng_probe(struct vio_dev *dev, |
65 | const struct vio_device_id *id) | 65 | const struct vio_device_id *id) |
66 | { | 66 | { |
67 | return hwrng_register(&pseries_rng); | 67 | return hwrng_register(&pseries_rng); |
68 | } | 68 | } |
69 | 69 | ||
70 | static int __exit pseries_rng_remove(struct vio_dev *dev) | 70 | static int pseries_rng_remove(struct vio_dev *dev) |
71 | { | 71 | { |
72 | hwrng_unregister(&pseries_rng); | 72 | hwrng_unregister(&pseries_rng); |
73 | return 0; | 73 | return 0; |
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c index 23caa05380a8..c37cf754a985 100644 --- a/drivers/char/hw_random/xgene-rng.c +++ b/drivers/char/hw_random/xgene-rng.c | |||
@@ -21,6 +21,7 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/acpi.h> | ||
24 | #include <linux/clk.h> | 25 | #include <linux/clk.h> |
25 | #include <linux/delay.h> | 26 | #include <linux/delay.h> |
26 | #include <linux/hw_random.h> | 27 | #include <linux/hw_random.h> |
@@ -310,6 +311,14 @@ static int xgene_rng_init(struct hwrng *rng) | |||
310 | return 0; | 311 | return 0; |
311 | } | 312 | } |
312 | 313 | ||
314 | #ifdef CONFIG_ACPI | ||
315 | static const struct acpi_device_id xgene_rng_acpi_match[] = { | ||
316 | { "APMC0D18", }, | ||
317 | { } | ||
318 | }; | ||
319 | MODULE_DEVICE_TABLE(acpi, xgene_rng_acpi_match); | ||
320 | #endif | ||
321 | |||
313 | static struct hwrng xgene_rng_func = { | 322 | static struct hwrng xgene_rng_func = { |
314 | .name = "xgene-rng", | 323 | .name = "xgene-rng", |
315 | .init = xgene_rng_init, | 324 | .init = xgene_rng_init, |
@@ -415,6 +424,7 @@ static struct platform_driver xgene_rng_driver = { | |||
415 | .driver = { | 424 | .driver = { |
416 | .name = "xgene-rng", | 425 | .name = "xgene-rng", |
417 | .of_match_table = xgene_rng_of_match, | 426 | .of_match_table = xgene_rng_of_match, |
427 | .acpi_match_table = ACPI_PTR(xgene_rng_acpi_match), | ||
418 | }, | 428 | }, |
419 | }; | 429 | }; |
420 | 430 | ||
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 2fb0fdfc87df..800bf41718e1 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig | |||
@@ -391,7 +391,7 @@ config CRYPTO_DEV_ATMEL_SHA | |||
391 | 391 | ||
392 | config CRYPTO_DEV_CCP | 392 | config CRYPTO_DEV_CCP |
393 | bool "Support for AMD Cryptographic Coprocessor" | 393 | bool "Support for AMD Cryptographic Coprocessor" |
394 | depends on (X86 && PCI) || ARM64 | 394 | depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM |
395 | default n | 395 | default n |
396 | help | 396 | help |
397 | The AMD Cryptographic Coprocessor provides hardware support | 397 | The AMD Cryptographic Coprocessor provides hardware support |
@@ -436,4 +436,26 @@ config CRYPTO_DEV_QCE | |||
436 | hardware. To compile this driver as a module, choose M here. The | 436 | hardware. To compile this driver as a module, choose M here. The |
437 | module will be called qcrypto. | 437 | module will be called qcrypto. |
438 | 438 | ||
439 | config CRYPTO_DEV_VMX | ||
440 | bool "Support for VMX cryptographic acceleration instructions" | ||
441 | depends on PPC64 | ||
442 | default n | ||
443 | help | ||
444 | Support for VMX cryptographic acceleration instructions. | ||
445 | |||
446 | source "drivers/crypto/vmx/Kconfig" | ||
447 | |||
448 | config CRYPTO_DEV_IMGTEC_HASH | ||
449 | depends on MIPS || COMPILE_TEST | ||
450 | tristate "Imagination Technologies hardware hash accelerator" | ||
451 | select CRYPTO_ALGAPI | ||
452 | select CRYPTO_MD5 | ||
453 | select CRYPTO_SHA1 | ||
454 | select CRYPTO_SHA256 | ||
455 | select CRYPTO_HASH | ||
456 | help | ||
457 | This driver interfaces with the Imagination Technologies | ||
458 | hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256 | ||
459 | hashing algorithms. | ||
460 | |||
439 | endif # CRYPTO_HW | 461 | endif # CRYPTO_HW |
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 3924f93d5774..fb84be7e6be5 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile | |||
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ | |||
6 | obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ | 6 | obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ |
7 | obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o | 7 | obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o |
8 | obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o | 8 | obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o |
9 | obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o | ||
9 | obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o | 10 | obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o |
10 | obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o | 11 | obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o |
11 | obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o | 12 | obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o |
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o | |||
25 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ | 26 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ |
26 | obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ | 27 | obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ |
27 | obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ | 28 | obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ |
29 | obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ | ||
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index d02b77150070..3b28e8c3de28 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c | |||
@@ -1155,7 +1155,7 @@ struct crypto4xx_alg_common crypto4xx_alg[] = { | |||
1155 | /** | 1155 | /** |
1156 | * Module Initialization Routine | 1156 | * Module Initialization Routine |
1157 | */ | 1157 | */ |
1158 | static int __init crypto4xx_probe(struct platform_device *ofdev) | 1158 | static int crypto4xx_probe(struct platform_device *ofdev) |
1159 | { | 1159 | { |
1160 | int rc; | 1160 | int rc; |
1161 | struct resource res; | 1161 | struct resource res; |
@@ -1263,7 +1263,7 @@ err_alloc_dev: | |||
1263 | return rc; | 1263 | return rc; |
1264 | } | 1264 | } |
1265 | 1265 | ||
1266 | static int __exit crypto4xx_remove(struct platform_device *ofdev) | 1266 | static int crypto4xx_remove(struct platform_device *ofdev) |
1267 | { | 1267 | { |
1268 | struct device *dev = &ofdev->dev; | 1268 | struct device *dev = &ofdev->dev; |
1269 | struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); | 1269 | struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev); |
@@ -1291,7 +1291,7 @@ static struct platform_driver crypto4xx_driver = { | |||
1291 | .of_match_table = crypto4xx_match, | 1291 | .of_match_table = crypto4xx_match, |
1292 | }, | 1292 | }, |
1293 | .probe = crypto4xx_probe, | 1293 | .probe = crypto4xx_probe, |
1294 | .remove = __exit_p(crypto4xx_remove), | 1294 | .remove = crypto4xx_remove, |
1295 | }; | 1295 | }; |
1296 | 1296 | ||
1297 | module_platform_driver(crypto4xx_driver); | 1297 | module_platform_driver(crypto4xx_driver); |
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 6597aac9905d..0f9a9dc06a83 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c | |||
@@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd, | |||
315 | 315 | ||
316 | dd->dma_size = length; | 316 | dd->dma_size = length; |
317 | 317 | ||
318 | if (!(dd->flags & AES_FLAGS_FAST)) { | 318 | dma_sync_single_for_device(dd->dev, dma_addr_in, length, |
319 | dma_sync_single_for_device(dd->dev, dma_addr_in, length, | 319 | DMA_TO_DEVICE); |
320 | DMA_TO_DEVICE); | 320 | dma_sync_single_for_device(dd->dev, dma_addr_out, length, |
321 | } | 321 | DMA_FROM_DEVICE); |
322 | 322 | ||
323 | if (dd->flags & AES_FLAGS_CFB8) { | 323 | if (dd->flags & AES_FLAGS_CFB8) { |
324 | dd->dma_lch_in.dma_conf.dst_addr_width = | 324 | dd->dma_lch_in.dma_conf.dst_addr_width = |
@@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) | |||
391 | { | 391 | { |
392 | dd->flags &= ~AES_FLAGS_DMA; | 392 | dd->flags &= ~AES_FLAGS_DMA; |
393 | 393 | ||
394 | dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in, | ||
395 | dd->dma_size, DMA_TO_DEVICE); | ||
396 | dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, | ||
397 | dd->dma_size, DMA_FROM_DEVICE); | ||
398 | |||
394 | /* use cache buffers */ | 399 | /* use cache buffers */ |
395 | dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); | 400 | dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); |
396 | if (!dd->nb_in_sg) | 401 | if (!dd->nb_in_sg) |
@@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) | |||
459 | dd->flags |= AES_FLAGS_FAST; | 464 | dd->flags |= AES_FLAGS_FAST; |
460 | 465 | ||
461 | } else { | 466 | } else { |
467 | dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in, | ||
468 | dd->dma_size, DMA_TO_DEVICE); | ||
469 | |||
462 | /* use cache buffers */ | 470 | /* use cache buffers */ |
463 | count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset, | 471 | count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset, |
464 | dd->buf_in, dd->buflen, dd->total, 0); | 472 | dd->buf_in, dd->buflen, dd->total, 0); |
@@ -619,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) | |||
619 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); | 627 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); |
620 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | 628 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); |
621 | } else { | 629 | } else { |
622 | dma_sync_single_for_device(dd->dev, dd->dma_addr_out, | 630 | dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out, |
623 | dd->dma_size, DMA_FROM_DEVICE); | 631 | dd->dma_size, DMA_FROM_DEVICE); |
624 | 632 | ||
625 | /* copy data */ | 633 | /* copy data */ |
@@ -1246,6 +1254,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) | |||
1246 | 1254 | ||
1247 | /* keep only major version number */ | 1255 | /* keep only major version number */ |
1248 | switch (dd->hw_version & 0xff0) { | 1256 | switch (dd->hw_version & 0xff0) { |
1257 | case 0x200: | ||
1258 | dd->caps.has_dualbuff = 1; | ||
1259 | dd->caps.has_cfb64 = 1; | ||
1260 | dd->caps.max_burst_size = 4; | ||
1261 | break; | ||
1249 | case 0x130: | 1262 | case 0x130: |
1250 | dd->caps.has_dualbuff = 1; | 1263 | dd->caps.has_dualbuff = 1; |
1251 | dd->caps.has_cfb64 = 1; | 1264 | dd->caps.has_cfb64 = 1; |
@@ -1336,6 +1349,7 @@ static int atmel_aes_probe(struct platform_device *pdev) | |||
1336 | platform_set_drvdata(pdev, aes_dd); | 1349 | platform_set_drvdata(pdev, aes_dd); |
1337 | 1350 | ||
1338 | INIT_LIST_HEAD(&aes_dd->list); | 1351 | INIT_LIST_HEAD(&aes_dd->list); |
1352 | spin_lock_init(&aes_dd->lock); | ||
1339 | 1353 | ||
1340 | tasklet_init(&aes_dd->done_task, atmel_aes_done_task, | 1354 | tasklet_init(&aes_dd->done_task, atmel_aes_done_task, |
1341 | (unsigned long)aes_dd); | 1355 | (unsigned long)aes_dd); |
@@ -1374,7 +1388,7 @@ static int atmel_aes_probe(struct platform_device *pdev) | |||
1374 | /* Initializing the clock */ | 1388 | /* Initializing the clock */ |
1375 | aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); | 1389 | aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); |
1376 | if (IS_ERR(aes_dd->iclk)) { | 1390 | if (IS_ERR(aes_dd->iclk)) { |
1377 | dev_err(dev, "clock intialization failed.\n"); | 1391 | dev_err(dev, "clock initialization failed.\n"); |
1378 | err = PTR_ERR(aes_dd->iclk); | 1392 | err = PTR_ERR(aes_dd->iclk); |
1379 | goto clk_err; | 1393 | goto clk_err; |
1380 | } | 1394 | } |
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 34db04addc18..5b35433c5399 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c | |||
@@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) | |||
163 | count = min(ctx->sg->length - ctx->offset, ctx->total); | 163 | count = min(ctx->sg->length - ctx->offset, ctx->total); |
164 | count = min(count, ctx->buflen - ctx->bufcnt); | 164 | count = min(count, ctx->buflen - ctx->bufcnt); |
165 | 165 | ||
166 | if (count <= 0) | 166 | if (count <= 0) { |
167 | break; | 167 | /* |
168 | * Check if count <= 0 because the buffer is full or | ||
169 | * because the sg length is 0. In the latest case, | ||
170 | * check if there is another sg in the list, a 0 length | ||
171 | * sg doesn't necessarily mean the end of the sg list. | ||
172 | */ | ||
173 | if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) { | ||
174 | ctx->sg = sg_next(ctx->sg); | ||
175 | continue; | ||
176 | } else { | ||
177 | break; | ||
178 | } | ||
179 | } | ||
168 | 180 | ||
169 | scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, | 181 | scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg, |
170 | ctx->offset, count, 0); | 182 | ctx->offset, count, 0); |
@@ -420,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | |||
420 | dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", | 432 | dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", |
421 | ctx->digcnt[1], ctx->digcnt[0], length1, final); | 433 | ctx->digcnt[1], ctx->digcnt[0], length1, final); |
422 | 434 | ||
423 | if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 | | 435 | dd->dma_lch_in.dma_conf.src_maxburst = 16; |
424 | SHA_FLAGS_SHA256)) { | 436 | dd->dma_lch_in.dma_conf.dst_maxburst = 16; |
425 | dd->dma_lch_in.dma_conf.src_maxburst = 16; | ||
426 | dd->dma_lch_in.dma_conf.dst_maxburst = 16; | ||
427 | } else { | ||
428 | dd->dma_lch_in.dma_conf.src_maxburst = 32; | ||
429 | dd->dma_lch_in.dma_conf.dst_maxburst = 32; | ||
430 | } | ||
431 | 437 | ||
432 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); | 438 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); |
433 | 439 | ||
@@ -529,7 +535,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) | |||
529 | if (final) | 535 | if (final) |
530 | atmel_sha_fill_padding(ctx, 0); | 536 | atmel_sha_fill_padding(ctx, 0); |
531 | 537 | ||
532 | if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { | 538 | if (final || (ctx->bufcnt == ctx->buflen)) { |
533 | count = ctx->bufcnt; | 539 | count = ctx->bufcnt; |
534 | ctx->bufcnt = 0; | 540 | ctx->bufcnt = 0; |
535 | return atmel_sha_xmit_dma_map(dd, ctx, count, final); | 541 | return atmel_sha_xmit_dma_map(dd, ctx, count, final); |
@@ -1266,6 +1272,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) | |||
1266 | 1272 | ||
1267 | /* keep only major version number */ | 1273 | /* keep only major version number */ |
1268 | switch (dd->hw_version & 0xff0) { | 1274 | switch (dd->hw_version & 0xff0) { |
1275 | case 0x420: | ||
1276 | dd->caps.has_dma = 1; | ||
1277 | dd->caps.has_dualbuff = 1; | ||
1278 | dd->caps.has_sha224 = 1; | ||
1279 | dd->caps.has_sha_384_512 = 1; | ||
1280 | break; | ||
1269 | case 0x410: | 1281 | case 0x410: |
1270 | dd->caps.has_dma = 1; | 1282 | dd->caps.has_dma = 1; |
1271 | dd->caps.has_dualbuff = 1; | 1283 | dd->caps.has_dualbuff = 1; |
@@ -1349,6 +1361,7 @@ static int atmel_sha_probe(struct platform_device *pdev) | |||
1349 | platform_set_drvdata(pdev, sha_dd); | 1361 | platform_set_drvdata(pdev, sha_dd); |
1350 | 1362 | ||
1351 | INIT_LIST_HEAD(&sha_dd->list); | 1363 | INIT_LIST_HEAD(&sha_dd->list); |
1364 | spin_lock_init(&sha_dd->lock); | ||
1352 | 1365 | ||
1353 | tasklet_init(&sha_dd->done_task, atmel_sha_done_task, | 1366 | tasklet_init(&sha_dd->done_task, atmel_sha_done_task, |
1354 | (unsigned long)sha_dd); | 1367 | (unsigned long)sha_dd); |
@@ -1385,7 +1398,7 @@ static int atmel_sha_probe(struct platform_device *pdev) | |||
1385 | /* Initializing the clock */ | 1398 | /* Initializing the clock */ |
1386 | sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); | 1399 | sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); |
1387 | if (IS_ERR(sha_dd->iclk)) { | 1400 | if (IS_ERR(sha_dd->iclk)) { |
1388 | dev_err(dev, "clock intialization failed.\n"); | 1401 | dev_err(dev, "clock initialization failed.\n"); |
1389 | err = PTR_ERR(sha_dd->iclk); | 1402 | err = PTR_ERR(sha_dd->iclk); |
1390 | goto clk_err; | 1403 | goto clk_err; |
1391 | } | 1404 | } |
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 258772d9b22f..ca2999709eb4 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c | |||
@@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev) | |||
1370 | platform_set_drvdata(pdev, tdes_dd); | 1370 | platform_set_drvdata(pdev, tdes_dd); |
1371 | 1371 | ||
1372 | INIT_LIST_HEAD(&tdes_dd->list); | 1372 | INIT_LIST_HEAD(&tdes_dd->list); |
1373 | spin_lock_init(&tdes_dd->lock); | ||
1373 | 1374 | ||
1374 | tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task, | 1375 | tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task, |
1375 | (unsigned long)tdes_dd); | 1376 | (unsigned long)tdes_dd); |
@@ -1408,7 +1409,7 @@ static int atmel_tdes_probe(struct platform_device *pdev) | |||
1408 | /* Initializing the clock */ | 1409 | /* Initializing the clock */ |
1409 | tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); | 1410 | tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); |
1410 | if (IS_ERR(tdes_dd->iclk)) { | 1411 | if (IS_ERR(tdes_dd->iclk)) { |
1411 | dev_err(dev, "clock intialization failed.\n"); | 1412 | dev_err(dev, "clock initialization failed.\n"); |
1412 | err = PTR_ERR(tdes_dd->iclk); | 1413 | err = PTR_ERR(tdes_dd->iclk); |
1413 | goto clk_err; | 1414 | goto clk_err; |
1414 | } | 1415 | } |
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index f347ab7eea95..ba0532efd3ae 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c | |||
@@ -1172,6 +1172,7 @@ static int ahash_final_no_ctx(struct ahash_request *req) | |||
1172 | return -ENOMEM; | 1172 | return -ENOMEM; |
1173 | } | 1173 | } |
1174 | 1174 | ||
1175 | edesc->sec4_sg_bytes = 0; | ||
1175 | sh_len = desc_len(sh_desc); | 1176 | sh_len = desc_len(sh_desc); |
1176 | desc = edesc->hw_desc; | 1177 | desc = edesc->hw_desc; |
1177 | init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); | 1178 | init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE); |
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index ae31e555793c..26a544b505f1 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c | |||
@@ -52,7 +52,7 @@ | |||
52 | 52 | ||
53 | /* length of descriptors */ | 53 | /* length of descriptors */ |
54 | #define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) | 54 | #define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) |
55 | #define DESC_RNG_LEN (10 * CAAM_CMD_SZ) | 55 | #define DESC_RNG_LEN (4 * CAAM_CMD_SZ) |
56 | 56 | ||
57 | /* Buffer, its dma address and lock */ | 57 | /* Buffer, its dma address and lock */ |
58 | struct buf_data { | 58 | struct buf_data { |
@@ -90,8 +90,8 @@ static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) | |||
90 | struct device *jrdev = ctx->jrdev; | 90 | struct device *jrdev = ctx->jrdev; |
91 | 91 | ||
92 | if (ctx->sh_desc_dma) | 92 | if (ctx->sh_desc_dma) |
93 | dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN, | 93 | dma_unmap_single(jrdev, ctx->sh_desc_dma, |
94 | DMA_TO_DEVICE); | 94 | desc_bytes(ctx->sh_desc), DMA_TO_DEVICE); |
95 | rng_unmap_buf(jrdev, &ctx->bufs[0]); | 95 | rng_unmap_buf(jrdev, &ctx->bufs[0]); |
96 | rng_unmap_buf(jrdev, &ctx->bufs[1]); | 96 | rng_unmap_buf(jrdev, &ctx->bufs[1]); |
97 | } | 97 | } |
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile index 7f592d8d07bb..55a1f3951578 100644 --- a/drivers/crypto/ccp/Makefile +++ b/drivers/crypto/ccp/Makefile | |||
@@ -1,11 +1,6 @@ | |||
1 | obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o | 1 | obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o |
2 | ccp-objs := ccp-dev.o ccp-ops.o | 2 | ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o |
3 | ifdef CONFIG_X86 | 3 | ccp-$(CONFIG_PCI) += ccp-pci.o |
4 | ccp-objs += ccp-pci.o | ||
5 | endif | ||
6 | ifdef CONFIG_ARM64 | ||
7 | ccp-objs += ccp-platform.o | ||
8 | endif | ||
9 | 4 | ||
10 | obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o | 5 | obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o |
11 | ccp-crypto-objs := ccp-crypto-main.o \ | 6 | ccp-crypto-objs := ccp-crypto-main.o \ |
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 8e162ad82085..ea7e8446956a 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c | |||
@@ -23,7 +23,6 @@ | |||
23 | 23 | ||
24 | #include "ccp-crypto.h" | 24 | #include "ccp-crypto.h" |
25 | 25 | ||
26 | |||
27 | static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, | 26 | static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, |
28 | int ret) | 27 | int ret) |
29 | { | 28 | { |
@@ -38,11 +37,13 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, | |||
38 | if (rctx->hash_rem) { | 37 | if (rctx->hash_rem) { |
39 | /* Save remaining data to buffer */ | 38 | /* Save remaining data to buffer */ |
40 | unsigned int offset = rctx->nbytes - rctx->hash_rem; | 39 | unsigned int offset = rctx->nbytes - rctx->hash_rem; |
40 | |||
41 | scatterwalk_map_and_copy(rctx->buf, rctx->src, | 41 | scatterwalk_map_and_copy(rctx->buf, rctx->src, |
42 | offset, rctx->hash_rem, 0); | 42 | offset, rctx->hash_rem, 0); |
43 | rctx->buf_count = rctx->hash_rem; | 43 | rctx->buf_count = rctx->hash_rem; |
44 | } else | 44 | } else { |
45 | rctx->buf_count = 0; | 45 | rctx->buf_count = 0; |
46 | } | ||
46 | 47 | ||
47 | /* Update result area if supplied */ | 48 | /* Update result area if supplied */ |
48 | if (req->result) | 49 | if (req->result) |
@@ -202,7 +203,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) | |||
202 | } | 203 | } |
203 | 204 | ||
204 | static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, | 205 | static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, |
205 | unsigned int key_len) | 206 | unsigned int key_len) |
206 | { | 207 | { |
207 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); | 208 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); |
208 | struct ccp_crypto_ahash_alg *alg = | 209 | struct ccp_crypto_ahash_alg *alg = |
@@ -292,7 +293,8 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) | |||
292 | crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); | 293 | crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); |
293 | 294 | ||
294 | cipher_tfm = crypto_alloc_cipher("aes", 0, | 295 | cipher_tfm = crypto_alloc_cipher("aes", 0, |
295 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); | 296 | CRYPTO_ALG_ASYNC | |
297 | CRYPTO_ALG_NEED_FALLBACK); | ||
296 | if (IS_ERR(cipher_tfm)) { | 298 | if (IS_ERR(cipher_tfm)) { |
297 | pr_warn("could not load aes cipher driver\n"); | 299 | pr_warn("could not load aes cipher driver\n"); |
298 | return PTR_ERR(cipher_tfm); | 300 | return PTR_ERR(cipher_tfm); |
@@ -354,7 +356,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head) | |||
354 | ret = crypto_register_ahash(alg); | 356 | ret = crypto_register_ahash(alg); |
355 | if (ret) { | 357 | if (ret) { |
356 | pr_err("%s ahash algorithm registration error (%d)\n", | 358 | pr_err("%s ahash algorithm registration error (%d)\n", |
357 | base->cra_name, ret); | 359 | base->cra_name, ret); |
358 | kfree(ccp_alg); | 360 | kfree(ccp_alg); |
359 | return ret; | 361 | return ret; |
360 | } | 362 | } |
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 0cc5594b7de3..52c7395cb8d8 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c | |||
@@ -21,7 +21,6 @@ | |||
21 | 21 | ||
22 | #include "ccp-crypto.h" | 22 | #include "ccp-crypto.h" |
23 | 23 | ||
24 | |||
25 | struct ccp_aes_xts_def { | 24 | struct ccp_aes_xts_def { |
26 | const char *name; | 25 | const char *name; |
27 | const char *drv_name; | 26 | const char *drv_name; |
@@ -216,7 +215,6 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) | |||
216 | ctx->u.aes.tfm_ablkcipher = NULL; | 215 | ctx->u.aes.tfm_ablkcipher = NULL; |
217 | } | 216 | } |
218 | 217 | ||
219 | |||
220 | static int ccp_register_aes_xts_alg(struct list_head *head, | 218 | static int ccp_register_aes_xts_alg(struct list_head *head, |
221 | const struct ccp_aes_xts_def *def) | 219 | const struct ccp_aes_xts_def *def) |
222 | { | 220 | { |
@@ -255,7 +253,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head, | |||
255 | ret = crypto_register_alg(alg); | 253 | ret = crypto_register_alg(alg); |
256 | if (ret) { | 254 | if (ret) { |
257 | pr_err("%s ablkcipher algorithm registration error (%d)\n", | 255 | pr_err("%s ablkcipher algorithm registration error (%d)\n", |
258 | alg->cra_name, ret); | 256 | alg->cra_name, ret); |
259 | kfree(ccp_alg); | 257 | kfree(ccp_alg); |
260 | return ret; | 258 | return ret; |
261 | } | 259 | } |
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index e46490db0f63..7984f910884d 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | #include "ccp-crypto.h" | 23 | #include "ccp-crypto.h" |
24 | 24 | ||
25 | |||
26 | static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) | 25 | static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) |
27 | { | 26 | { |
28 | struct ablkcipher_request *req = ablkcipher_request_cast(async_req); | 27 | struct ablkcipher_request *req = ablkcipher_request_cast(async_req); |
@@ -345,7 +344,7 @@ static int ccp_register_aes_alg(struct list_head *head, | |||
345 | ret = crypto_register_alg(alg); | 344 | ret = crypto_register_alg(alg); |
346 | if (ret) { | 345 | if (ret) { |
347 | pr_err("%s ablkcipher algorithm registration error (%d)\n", | 346 | pr_err("%s ablkcipher algorithm registration error (%d)\n", |
348 | alg->cra_name, ret); | 347 | alg->cra_name, ret); |
349 | kfree(ccp_alg); | 348 | kfree(ccp_alg); |
350 | return ret; | 349 | return ret; |
351 | } | 350 | } |
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 4d4e016d755b..bdec01ec608f 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c | |||
@@ -33,7 +33,6 @@ static unsigned int sha_disable; | |||
33 | module_param(sha_disable, uint, 0444); | 33 | module_param(sha_disable, uint, 0444); |
34 | MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); | 34 | MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); |
35 | 35 | ||
36 | |||
37 | /* List heads for the supported algorithms */ | 36 | /* List heads for the supported algorithms */ |
38 | static LIST_HEAD(hash_algs); | 37 | static LIST_HEAD(hash_algs); |
39 | static LIST_HEAD(cipher_algs); | 38 | static LIST_HEAD(cipher_algs); |
@@ -48,6 +47,7 @@ struct ccp_crypto_queue { | |||
48 | struct list_head *backlog; | 47 | struct list_head *backlog; |
49 | unsigned int cmd_count; | 48 | unsigned int cmd_count; |
50 | }; | 49 | }; |
50 | |||
51 | #define CCP_CRYPTO_MAX_QLEN 100 | 51 | #define CCP_CRYPTO_MAX_QLEN 100 |
52 | 52 | ||
53 | static struct ccp_crypto_queue req_queue; | 53 | static struct ccp_crypto_queue req_queue; |
@@ -77,7 +77,6 @@ struct ccp_crypto_cpu { | |||
77 | int err; | 77 | int err; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | |||
81 | static inline bool ccp_crypto_success(int err) | 80 | static inline bool ccp_crypto_success(int err) |
82 | { | 81 | { |
83 | if (err && (err != -EINPROGRESS) && (err != -EBUSY)) | 82 | if (err && (err != -EINPROGRESS) && (err != -EBUSY)) |
@@ -143,7 +142,7 @@ static void ccp_crypto_complete(void *data, int err) | |||
143 | int ret; | 142 | int ret; |
144 | 143 | ||
145 | if (err == -EINPROGRESS) { | 144 | if (err == -EINPROGRESS) { |
146 | /* Only propogate the -EINPROGRESS if necessary */ | 145 | /* Only propagate the -EINPROGRESS if necessary */ |
147 | if (crypto_cmd->ret == -EBUSY) { | 146 | if (crypto_cmd->ret == -EBUSY) { |
148 | crypto_cmd->ret = -EINPROGRESS; | 147 | crypto_cmd->ret = -EINPROGRESS; |
149 | req->complete(req, -EINPROGRESS); | 148 | req->complete(req, -EINPROGRESS); |
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 96531571f7cf..507b34e0cc19 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c | |||
@@ -23,7 +23,6 @@ | |||
23 | 23 | ||
24 | #include "ccp-crypto.h" | 24 | #include "ccp-crypto.h" |
25 | 25 | ||
26 | |||
27 | static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) | 26 | static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) |
28 | { | 27 | { |
29 | struct ahash_request *req = ahash_request_cast(async_req); | 28 | struct ahash_request *req = ahash_request_cast(async_req); |
@@ -37,11 +36,13 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) | |||
37 | if (rctx->hash_rem) { | 36 | if (rctx->hash_rem) { |
38 | /* Save remaining data to buffer */ | 37 | /* Save remaining data to buffer */ |
39 | unsigned int offset = rctx->nbytes - rctx->hash_rem; | 38 | unsigned int offset = rctx->nbytes - rctx->hash_rem; |
39 | |||
40 | scatterwalk_map_and_copy(rctx->buf, rctx->src, | 40 | scatterwalk_map_and_copy(rctx->buf, rctx->src, |
41 | offset, rctx->hash_rem, 0); | 41 | offset, rctx->hash_rem, 0); |
42 | rctx->buf_count = rctx->hash_rem; | 42 | rctx->buf_count = rctx->hash_rem; |
43 | } else | 43 | } else { |
44 | rctx->buf_count = 0; | 44 | rctx->buf_count = 0; |
45 | } | ||
45 | 46 | ||
46 | /* Update result area if supplied */ | 47 | /* Update result area if supplied */ |
47 | if (req->result) | 48 | if (req->result) |
@@ -227,8 +228,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, | |||
227 | } | 228 | } |
228 | 229 | ||
229 | key_len = digest_size; | 230 | key_len = digest_size; |
230 | } else | 231 | } else { |
231 | memcpy(ctx->u.sha.key, key, key_len); | 232 | memcpy(ctx->u.sha.key, key, key_len); |
233 | } | ||
232 | 234 | ||
233 | for (i = 0; i < block_size; i++) { | 235 | for (i = 0; i < block_size; i++) { |
234 | ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; | 236 | ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; |
@@ -355,7 +357,7 @@ static int ccp_register_hmac_alg(struct list_head *head, | |||
355 | ret = crypto_register_ahash(alg); | 357 | ret = crypto_register_ahash(alg); |
356 | if (ret) { | 358 | if (ret) { |
357 | pr_err("%s ahash algorithm registration error (%d)\n", | 359 | pr_err("%s ahash algorithm registration error (%d)\n", |
358 | base->cra_name, ret); | 360 | base->cra_name, ret); |
359 | kfree(ccp_alg); | 361 | kfree(ccp_alg); |
360 | return ret; | 362 | return ret; |
361 | } | 363 | } |
@@ -410,7 +412,7 @@ static int ccp_register_sha_alg(struct list_head *head, | |||
410 | ret = crypto_register_ahash(alg); | 412 | ret = crypto_register_ahash(alg); |
411 | if (ret) { | 413 | if (ret) { |
412 | pr_err("%s ahash algorithm registration error (%d)\n", | 414 | pr_err("%s ahash algorithm registration error (%d)\n", |
413 | base->cra_name, ret); | 415 | base->cra_name, ret); |
414 | kfree(ccp_alg); | 416 | kfree(ccp_alg); |
415 | return ret; | 417 | return ret; |
416 | } | 418 | } |
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index 9aa4ae184f7f..76a96f0f44c6 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h | |||
@@ -13,7 +13,6 @@ | |||
13 | #ifndef __CCP_CRYPTO_H__ | 13 | #ifndef __CCP_CRYPTO_H__ |
14 | #define __CCP_CRYPTO_H__ | 14 | #define __CCP_CRYPTO_H__ |
15 | 15 | ||
16 | |||
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
18 | #include <linux/wait.h> | 17 | #include <linux/wait.h> |
19 | #include <linux/pci.h> | 18 | #include <linux/pci.h> |
@@ -25,7 +24,6 @@ | |||
25 | #include <crypto/hash.h> | 24 | #include <crypto/hash.h> |
26 | #include <crypto/sha.h> | 25 | #include <crypto/sha.h> |
27 | 26 | ||
28 | |||
29 | #define CCP_CRA_PRIORITY 300 | 27 | #define CCP_CRA_PRIORITY 300 |
30 | 28 | ||
31 | struct ccp_crypto_ablkcipher_alg { | 29 | struct ccp_crypto_ablkcipher_alg { |
@@ -68,7 +66,6 @@ static inline struct ccp_crypto_ahash_alg * | |||
68 | return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); | 66 | return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); |
69 | } | 67 | } |
70 | 68 | ||
71 | |||
72 | /***** AES related defines *****/ | 69 | /***** AES related defines *****/ |
73 | struct ccp_aes_ctx { | 70 | struct ccp_aes_ctx { |
74 | /* Fallback cipher for XTS with unsupported unit sizes */ | 71 | /* Fallback cipher for XTS with unsupported unit sizes */ |
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index ca29c120b85f..861bacc1bb94 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c | |||
@@ -37,7 +37,6 @@ struct ccp_tasklet_data { | |||
37 | struct ccp_cmd *cmd; | 37 | struct ccp_cmd *cmd; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | |||
41 | static struct ccp_device *ccp_dev; | 40 | static struct ccp_device *ccp_dev; |
42 | static inline struct ccp_device *ccp_get_device(void) | 41 | static inline struct ccp_device *ccp_get_device(void) |
43 | { | 42 | { |
@@ -296,11 +295,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) | |||
296 | { | 295 | { |
297 | struct ccp_device *ccp; | 296 | struct ccp_device *ccp; |
298 | 297 | ||
299 | ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); | 298 | ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL); |
300 | if (ccp == NULL) { | 299 | if (!ccp) |
301 | dev_err(dev, "unable to allocate device struct\n"); | ||
302 | return NULL; | 300 | return NULL; |
303 | } | ||
304 | ccp->dev = dev; | 301 | ccp->dev = dev; |
305 | 302 | ||
306 | INIT_LIST_HEAD(&ccp->cmd); | 303 | INIT_LIST_HEAD(&ccp->cmd); |
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 62ff35a6b9ec..6ff89031fb96 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/wait.h> | 21 | #include <linux/wait.h> |
22 | #include <linux/dmapool.h> | 22 | #include <linux/dmapool.h> |
23 | #include <linux/hw_random.h> | 23 | #include <linux/hw_random.h> |
24 | 24 | #include <linux/bitops.h> | |
25 | 25 | ||
26 | #define MAX_DMAPOOL_NAME_LEN 32 | 26 | #define MAX_DMAPOOL_NAME_LEN 32 |
27 | 27 | ||
@@ -33,7 +33,6 @@ | |||
33 | #define CACHE_NONE 0x00 | 33 | #define CACHE_NONE 0x00 |
34 | #define CACHE_WB_NO_ALLOC 0xb7 | 34 | #define CACHE_WB_NO_ALLOC 0xb7 |
35 | 35 | ||
36 | |||
37 | /****** Register Mappings ******/ | 36 | /****** Register Mappings ******/ |
38 | #define Q_MASK_REG 0x000 | 37 | #define Q_MASK_REG 0x000 |
39 | #define TRNG_OUT_REG 0x00c | 38 | #define TRNG_OUT_REG 0x00c |
@@ -54,8 +53,8 @@ | |||
54 | #define CMD_Q_CACHE_BASE 0x228 | 53 | #define CMD_Q_CACHE_BASE 0x228 |
55 | #define CMD_Q_CACHE_INC 0x20 | 54 | #define CMD_Q_CACHE_INC 0x20 |
56 | 55 | ||
57 | #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f); | 56 | #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) |
58 | #define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f); | 57 | #define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f) |
59 | 58 | ||
60 | /****** REQ0 Related Values ******/ | 59 | /****** REQ0 Related Values ******/ |
61 | #define REQ0_WAIT_FOR_WRITE 0x00000004 | 60 | #define REQ0_WAIT_FOR_WRITE 0x00000004 |
@@ -103,7 +102,6 @@ | |||
103 | /****** REQ6 Related Values ******/ | 102 | /****** REQ6 Related Values ******/ |
104 | #define REQ6_MEMTYPE_SHIFT 16 | 103 | #define REQ6_MEMTYPE_SHIFT 16 |
105 | 104 | ||
106 | |||
107 | /****** Key Storage Block ******/ | 105 | /****** Key Storage Block ******/ |
108 | #define KSB_START 77 | 106 | #define KSB_START 77 |
109 | #define KSB_END 127 | 107 | #define KSB_END 127 |
@@ -114,7 +112,7 @@ | |||
114 | #define CCP_JOBID_MASK 0x0000003f | 112 | #define CCP_JOBID_MASK 0x0000003f |
115 | 113 | ||
116 | #define CCP_DMAPOOL_MAX_SIZE 64 | 114 | #define CCP_DMAPOOL_MAX_SIZE 64 |
117 | #define CCP_DMAPOOL_ALIGN (1 << 5) | 115 | #define CCP_DMAPOOL_ALIGN BIT(5) |
118 | 116 | ||
119 | #define CCP_REVERSE_BUF_SIZE 64 | 117 | #define CCP_REVERSE_BUF_SIZE 64 |
120 | 118 | ||
@@ -142,7 +140,6 @@ | |||
142 | #define CCP_ECC_RESULT_OFFSET 60 | 140 | #define CCP_ECC_RESULT_OFFSET 60 |
143 | #define CCP_ECC_RESULT_SUCCESS 0x0001 | 141 | #define CCP_ECC_RESULT_SUCCESS 0x0001 |
144 | 142 | ||
145 | |||
146 | struct ccp_device; | 143 | struct ccp_device; |
147 | struct ccp_cmd; | 144 | struct ccp_cmd; |
148 | 145 | ||
@@ -261,7 +258,6 @@ struct ccp_device { | |||
261 | unsigned int axcache; | 258 | unsigned int axcache; |
262 | }; | 259 | }; |
263 | 260 | ||
264 | |||
265 | int ccp_pci_init(void); | 261 | int ccp_pci_init(void); |
266 | void ccp_pci_exit(void); | 262 | void ccp_pci_exit(void); |
267 | 263 | ||
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 8729364261d7..71f2e3c89424 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c | |||
@@ -27,7 +27,6 @@ | |||
27 | 27 | ||
28 | #include "ccp-dev.h" | 28 | #include "ccp-dev.h" |
29 | 29 | ||
30 | |||
31 | enum ccp_memtype { | 30 | enum ccp_memtype { |
32 | CCP_MEMTYPE_SYSTEM = 0, | 31 | CCP_MEMTYPE_SYSTEM = 0, |
33 | CCP_MEMTYPE_KSB, | 32 | CCP_MEMTYPE_KSB, |
@@ -515,7 +514,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, | |||
515 | if (!wa->dma_count) | 514 | if (!wa->dma_count) |
516 | return -ENOMEM; | 515 | return -ENOMEM; |
517 | 516 | ||
518 | |||
519 | return 0; | 517 | return 0; |
520 | } | 518 | } |
521 | 519 | ||
@@ -763,8 +761,9 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, | |||
763 | sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; | 761 | sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; |
764 | sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); | 762 | sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); |
765 | op_len = min(sg_src_len, sg_dst_len); | 763 | op_len = min(sg_src_len, sg_dst_len); |
766 | } else | 764 | } else { |
767 | op_len = sg_src_len; | 765 | op_len = sg_src_len; |
766 | } | ||
768 | 767 | ||
769 | /* The data operation length will be at least block_size in length | 768 | /* The data operation length will be at least block_size in length |
770 | * or the smaller of available sg room remaining for the source or | 769 | * or the smaller of available sg room remaining for the source or |
@@ -1131,9 +1130,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | |||
1131 | if (ret) | 1130 | if (ret) |
1132 | goto e_ctx; | 1131 | goto e_ctx; |
1133 | 1132 | ||
1134 | if (in_place) | 1133 | if (in_place) { |
1135 | dst = src; | 1134 | dst = src; |
1136 | else { | 1135 | } else { |
1137 | ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, | 1136 | ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, |
1138 | AES_BLOCK_SIZE, DMA_FROM_DEVICE); | 1137 | AES_BLOCK_SIZE, DMA_FROM_DEVICE); |
1139 | if (ret) | 1138 | if (ret) |
@@ -1304,9 +1303,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, | |||
1304 | if (ret) | 1303 | if (ret) |
1305 | goto e_ctx; | 1304 | goto e_ctx; |
1306 | 1305 | ||
1307 | if (in_place) | 1306 | if (in_place) { |
1308 | dst = src; | 1307 | dst = src; |
1309 | else { | 1308 | } else { |
1310 | ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, | 1309 | ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, |
1311 | unit_size, DMA_FROM_DEVICE); | 1310 | unit_size, DMA_FROM_DEVICE); |
1312 | if (ret) | 1311 | if (ret) |
@@ -1451,8 +1450,9 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | |||
1451 | goto e_ctx; | 1450 | goto e_ctx; |
1452 | } | 1451 | } |
1453 | memcpy(ctx.address, init, CCP_SHA_CTXSIZE); | 1452 | memcpy(ctx.address, init, CCP_SHA_CTXSIZE); |
1454 | } else | 1453 | } else { |
1455 | ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); | 1454 | ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); |
1455 | } | ||
1456 | 1456 | ||
1457 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | 1457 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, |
1458 | CCP_PASSTHRU_BYTESWAP_256BIT); | 1458 | CCP_PASSTHRU_BYTESWAP_256BIT); |
@@ -1732,9 +1732,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, | |||
1732 | if (ret) | 1732 | if (ret) |
1733 | goto e_mask; | 1733 | goto e_mask; |
1734 | 1734 | ||
1735 | if (in_place) | 1735 | if (in_place) { |
1736 | dst = src; | 1736 | dst = src; |
1737 | else { | 1737 | } else { |
1738 | ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, | 1738 | ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, |
1739 | CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); | 1739 | CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); |
1740 | if (ret) | 1740 | if (ret) |
@@ -1974,7 +1974,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | |||
1974 | src.address += CCP_ECC_OPERAND_SIZE; | 1974 | src.address += CCP_ECC_OPERAND_SIZE; |
1975 | 1975 | ||
1976 | /* Set the first point Z coordianate to 1 */ | 1976 | /* Set the first point Z coordianate to 1 */ |
1977 | *(src.address) = 0x01; | 1977 | *src.address = 0x01; |
1978 | src.address += CCP_ECC_OPERAND_SIZE; | 1978 | src.address += CCP_ECC_OPERAND_SIZE; |
1979 | 1979 | ||
1980 | if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { | 1980 | if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { |
@@ -1989,7 +1989,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | |||
1989 | src.address += CCP_ECC_OPERAND_SIZE; | 1989 | src.address += CCP_ECC_OPERAND_SIZE; |
1990 | 1990 | ||
1991 | /* Set the second point Z coordianate to 1 */ | 1991 | /* Set the second point Z coordianate to 1 */ |
1992 | *(src.address) = 0x01; | 1992 | *src.address = 0x01; |
1993 | src.address += CCP_ECC_OPERAND_SIZE; | 1993 | src.address += CCP_ECC_OPERAND_SIZE; |
1994 | } else { | 1994 | } else { |
1995 | /* Copy the Domain "a" parameter */ | 1995 | /* Copy the Domain "a" parameter */ |
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c index 7f89c946adfe..af190d4795a8 100644 --- a/drivers/crypto/ccp/ccp-pci.c +++ b/drivers/crypto/ccp/ccp-pci.c | |||
@@ -174,11 +174,10 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
174 | if (!ccp) | 174 | if (!ccp) |
175 | goto e_err; | 175 | goto e_err; |
176 | 176 | ||
177 | ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL); | 177 | ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL); |
178 | if (!ccp_pci) { | 178 | if (!ccp_pci) |
179 | ret = -ENOMEM; | 179 | goto e_err; |
180 | goto e_free1; | 180 | |
181 | } | ||
182 | ccp->dev_specific = ccp_pci; | 181 | ccp->dev_specific = ccp_pci; |
183 | ccp->get_irq = ccp_get_irqs; | 182 | ccp->get_irq = ccp_get_irqs; |
184 | ccp->free_irq = ccp_free_irqs; | 183 | ccp->free_irq = ccp_free_irqs; |
@@ -186,7 +185,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
186 | ret = pci_request_regions(pdev, "ccp"); | 185 | ret = pci_request_regions(pdev, "ccp"); |
187 | if (ret) { | 186 | if (ret) { |
188 | dev_err(dev, "pci_request_regions failed (%d)\n", ret); | 187 | dev_err(dev, "pci_request_regions failed (%d)\n", ret); |
189 | goto e_free2; | 188 | goto e_err; |
190 | } | 189 | } |
191 | 190 | ||
192 | ret = pci_enable_device(pdev); | 191 | ret = pci_enable_device(pdev); |
@@ -204,7 +203,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
204 | 203 | ||
205 | ret = -EIO; | 204 | ret = -EIO; |
206 | ccp->io_map = pci_iomap(pdev, bar, 0); | 205 | ccp->io_map = pci_iomap(pdev, bar, 0); |
207 | if (ccp->io_map == NULL) { | 206 | if (!ccp->io_map) { |
208 | dev_err(dev, "pci_iomap failed\n"); | 207 | dev_err(dev, "pci_iomap failed\n"); |
209 | goto e_device; | 208 | goto e_device; |
210 | } | 209 | } |
@@ -239,12 +238,6 @@ e_device: | |||
239 | e_regions: | 238 | e_regions: |
240 | pci_release_regions(pdev); | 239 | pci_release_regions(pdev); |
241 | 240 | ||
242 | e_free2: | ||
243 | kfree(ccp_pci); | ||
244 | |||
245 | e_free1: | ||
246 | kfree(ccp); | ||
247 | |||
248 | e_err: | 241 | e_err: |
249 | dev_notice(dev, "initialization failed\n"); | 242 | dev_notice(dev, "initialization failed\n"); |
250 | return ret; | 243 | return ret; |
@@ -266,8 +259,6 @@ static void ccp_pci_remove(struct pci_dev *pdev) | |||
266 | 259 | ||
267 | pci_release_regions(pdev); | 260 | pci_release_regions(pdev); |
268 | 261 | ||
269 | kfree(ccp); | ||
270 | |||
271 | dev_notice(dev, "disabled\n"); | 262 | dev_notice(dev, "disabled\n"); |
272 | } | 263 | } |
273 | 264 | ||
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index 8c50bad25f7e..b1c20b2b5647 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c | |||
@@ -23,9 +23,15 @@ | |||
23 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
24 | #include <linux/ccp.h> | 24 | #include <linux/ccp.h> |
25 | #include <linux/of.h> | 25 | #include <linux/of.h> |
26 | #include <linux/of_address.h> | ||
27 | #include <linux/acpi.h> | ||
26 | 28 | ||
27 | #include "ccp-dev.h" | 29 | #include "ccp-dev.h" |
28 | 30 | ||
31 | struct ccp_platform { | ||
32 | int use_acpi; | ||
33 | int coherent; | ||
34 | }; | ||
29 | 35 | ||
30 | static int ccp_get_irq(struct ccp_device *ccp) | 36 | static int ccp_get_irq(struct ccp_device *ccp) |
31 | { | 37 | { |
@@ -84,10 +90,64 @@ static struct resource *ccp_find_mmio_area(struct ccp_device *ccp) | |||
84 | return NULL; | 90 | return NULL; |
85 | } | 91 | } |
86 | 92 | ||
93 | #ifdef CONFIG_ACPI | ||
94 | static int ccp_acpi_support(struct ccp_device *ccp) | ||
95 | { | ||
96 | struct ccp_platform *ccp_platform = ccp->dev_specific; | ||
97 | struct acpi_device *adev = ACPI_COMPANION(ccp->dev); | ||
98 | acpi_handle handle; | ||
99 | acpi_status status; | ||
100 | unsigned long long data; | ||
101 | int cca; | ||
102 | |||
103 | /* Retrieve the device cache coherency value */ | ||
104 | handle = adev->handle; | ||
105 | do { | ||
106 | status = acpi_evaluate_integer(handle, "_CCA", NULL, &data); | ||
107 | if (!ACPI_FAILURE(status)) { | ||
108 | cca = data; | ||
109 | break; | ||
110 | } | ||
111 | } while (!ACPI_FAILURE(status)); | ||
112 | |||
113 | if (ACPI_FAILURE(status)) { | ||
114 | dev_err(ccp->dev, "error obtaining acpi coherency value\n"); | ||
115 | return -EINVAL; | ||
116 | } | ||
117 | |||
118 | ccp_platform->coherent = !!cca; | ||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | #else /* CONFIG_ACPI */ | ||
123 | static int ccp_acpi_support(struct ccp_device *ccp) | ||
124 | { | ||
125 | return -EINVAL; | ||
126 | } | ||
127 | #endif | ||
128 | |||
129 | #ifdef CONFIG_OF | ||
130 | static int ccp_of_support(struct ccp_device *ccp) | ||
131 | { | ||
132 | struct ccp_platform *ccp_platform = ccp->dev_specific; | ||
133 | |||
134 | ccp_platform->coherent = of_dma_is_coherent(ccp->dev->of_node); | ||
135 | |||
136 | return 0; | ||
137 | } | ||
138 | #else | ||
139 | static int ccp_of_support(struct ccp_device *ccp) | ||
140 | { | ||
141 | return -EINVAL; | ||
142 | } | ||
143 | #endif | ||
144 | |||
87 | static int ccp_platform_probe(struct platform_device *pdev) | 145 | static int ccp_platform_probe(struct platform_device *pdev) |
88 | { | 146 | { |
89 | struct ccp_device *ccp; | 147 | struct ccp_device *ccp; |
148 | struct ccp_platform *ccp_platform; | ||
90 | struct device *dev = &pdev->dev; | 149 | struct device *dev = &pdev->dev; |
150 | struct acpi_device *adev = ACPI_COMPANION(dev); | ||
91 | struct resource *ior; | 151 | struct resource *ior; |
92 | int ret; | 152 | int ret; |
93 | 153 | ||
@@ -96,24 +156,40 @@ static int ccp_platform_probe(struct platform_device *pdev) | |||
96 | if (!ccp) | 156 | if (!ccp) |
97 | goto e_err; | 157 | goto e_err; |
98 | 158 | ||
99 | ccp->dev_specific = NULL; | 159 | ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL); |
160 | if (!ccp_platform) | ||
161 | goto e_err; | ||
162 | |||
163 | ccp->dev_specific = ccp_platform; | ||
100 | ccp->get_irq = ccp_get_irqs; | 164 | ccp->get_irq = ccp_get_irqs; |
101 | ccp->free_irq = ccp_free_irqs; | 165 | ccp->free_irq = ccp_free_irqs; |
102 | 166 | ||
167 | ccp_platform->use_acpi = (!adev || acpi_disabled) ? 0 : 1; | ||
168 | |||
103 | ior = ccp_find_mmio_area(ccp); | 169 | ior = ccp_find_mmio_area(ccp); |
104 | ccp->io_map = devm_ioremap_resource(dev, ior); | 170 | ccp->io_map = devm_ioremap_resource(dev, ior); |
105 | if (IS_ERR(ccp->io_map)) { | 171 | if (IS_ERR(ccp->io_map)) { |
106 | ret = PTR_ERR(ccp->io_map); | 172 | ret = PTR_ERR(ccp->io_map); |
107 | goto e_free; | 173 | goto e_err; |
108 | } | 174 | } |
109 | ccp->io_regs = ccp->io_map; | 175 | ccp->io_regs = ccp->io_map; |
110 | 176 | ||
111 | if (!dev->dma_mask) | 177 | if (!dev->dma_mask) |
112 | dev->dma_mask = &dev->coherent_dma_mask; | 178 | dev->dma_mask = &dev->coherent_dma_mask; |
113 | *(dev->dma_mask) = DMA_BIT_MASK(48); | 179 | ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); |
114 | dev->coherent_dma_mask = DMA_BIT_MASK(48); | 180 | if (ret) { |
181 | dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); | ||
182 | goto e_err; | ||
183 | } | ||
184 | |||
185 | if (ccp_platform->use_acpi) | ||
186 | ret = ccp_acpi_support(ccp); | ||
187 | else | ||
188 | ret = ccp_of_support(ccp); | ||
189 | if (ret) | ||
190 | goto e_err; | ||
115 | 191 | ||
116 | if (of_property_read_bool(dev->of_node, "dma-coherent")) | 192 | if (ccp_platform->coherent) |
117 | ccp->axcache = CACHE_WB_NO_ALLOC; | 193 | ccp->axcache = CACHE_WB_NO_ALLOC; |
118 | else | 194 | else |
119 | ccp->axcache = CACHE_NONE; | 195 | ccp->axcache = CACHE_NONE; |
@@ -122,15 +198,12 @@ static int ccp_platform_probe(struct platform_device *pdev) | |||
122 | 198 | ||
123 | ret = ccp_init(ccp); | 199 | ret = ccp_init(ccp); |
124 | if (ret) | 200 | if (ret) |
125 | goto e_free; | 201 | goto e_err; |
126 | 202 | ||
127 | dev_notice(dev, "enabled\n"); | 203 | dev_notice(dev, "enabled\n"); |
128 | 204 | ||
129 | return 0; | 205 | return 0; |
130 | 206 | ||
131 | e_free: | ||
132 | kfree(ccp); | ||
133 | |||
134 | e_err: | 207 | e_err: |
135 | dev_notice(dev, "initialization failed\n"); | 208 | dev_notice(dev, "initialization failed\n"); |
136 | return ret; | 209 | return ret; |
@@ -143,8 +216,6 @@ static int ccp_platform_remove(struct platform_device *pdev) | |||
143 | 216 | ||
144 | ccp_destroy(ccp); | 217 | ccp_destroy(ccp); |
145 | 218 | ||
146 | kfree(ccp); | ||
147 | |||
148 | dev_notice(dev, "disabled\n"); | 219 | dev_notice(dev, "disabled\n"); |
149 | 220 | ||
150 | return 0; | 221 | return 0; |
@@ -200,15 +271,29 @@ static int ccp_platform_resume(struct platform_device *pdev) | |||
200 | } | 271 | } |
201 | #endif | 272 | #endif |
202 | 273 | ||
203 | static const struct of_device_id ccp_platform_ids[] = { | 274 | #ifdef CONFIG_ACPI |
275 | static const struct acpi_device_id ccp_acpi_match[] = { | ||
276 | { "AMDI0C00", 0 }, | ||
277 | { }, | ||
278 | }; | ||
279 | #endif | ||
280 | |||
281 | #ifdef CONFIG_OF | ||
282 | static const struct of_device_id ccp_of_match[] = { | ||
204 | { .compatible = "amd,ccp-seattle-v1a" }, | 283 | { .compatible = "amd,ccp-seattle-v1a" }, |
205 | { }, | 284 | { }, |
206 | }; | 285 | }; |
286 | #endif | ||
207 | 287 | ||
208 | static struct platform_driver ccp_platform_driver = { | 288 | static struct platform_driver ccp_platform_driver = { |
209 | .driver = { | 289 | .driver = { |
210 | .name = "AMD Cryptographic Coprocessor", | 290 | .name = "AMD Cryptographic Coprocessor", |
211 | .of_match_table = ccp_platform_ids, | 291 | #ifdef CONFIG_ACPI |
292 | .acpi_match_table = ccp_acpi_match, | ||
293 | #endif | ||
294 | #ifdef CONFIG_OF | ||
295 | .of_match_table = ccp_of_match, | ||
296 | #endif | ||
212 | }, | 297 | }, |
213 | .probe = ccp_platform_probe, | 298 | .probe = ccp_platform_probe, |
214 | .remove = ccp_platform_remove, | 299 | .remove = ccp_platform_remove, |
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c new file mode 100644 index 000000000000..ad47d0d61098 --- /dev/null +++ b/drivers/crypto/img-hash.c | |||
@@ -0,0 +1,1029 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Imagination Technologies | ||
3 | * Authors: Will Thomas, James Hartley | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as published | ||
7 | * by the Free Software Foundation. | ||
8 | * | ||
9 | * Interface structure taken from omap-sham driver | ||
10 | */ | ||
11 | |||
12 | #include <linux/clk.h> | ||
13 | #include <linux/dmaengine.h> | ||
14 | #include <linux/interrupt.h> | ||
15 | #include <linux/io.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/of_device.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | #include <linux/scatterlist.h> | ||
21 | |||
22 | #include <crypto/internal/hash.h> | ||
23 | #include <crypto/md5.h> | ||
24 | #include <crypto/sha.h> | ||
25 | |||
26 | #define CR_RESET 0 | ||
27 | #define CR_RESET_SET 1 | ||
28 | #define CR_RESET_UNSET 0 | ||
29 | |||
30 | #define CR_MESSAGE_LENGTH_H 0x4 | ||
31 | #define CR_MESSAGE_LENGTH_L 0x8 | ||
32 | |||
33 | #define CR_CONTROL 0xc | ||
34 | #define CR_CONTROL_BYTE_ORDER_3210 0 | ||
35 | #define CR_CONTROL_BYTE_ORDER_0123 1 | ||
36 | #define CR_CONTROL_BYTE_ORDER_2310 2 | ||
37 | #define CR_CONTROL_BYTE_ORDER_1032 3 | ||
38 | #define CR_CONTROL_BYTE_ORDER_SHIFT 8 | ||
39 | #define CR_CONTROL_ALGO_MD5 0 | ||
40 | #define CR_CONTROL_ALGO_SHA1 1 | ||
41 | #define CR_CONTROL_ALGO_SHA224 2 | ||
42 | #define CR_CONTROL_ALGO_SHA256 3 | ||
43 | |||
44 | #define CR_INTSTAT 0x10 | ||
45 | #define CR_INTENAB 0x14 | ||
46 | #define CR_INTCLEAR 0x18 | ||
47 | #define CR_INT_RESULTS_AVAILABLE BIT(0) | ||
48 | #define CR_INT_NEW_RESULTS_SET BIT(1) | ||
49 | #define CR_INT_RESULT_READ_ERR BIT(2) | ||
50 | #define CR_INT_MESSAGE_WRITE_ERROR BIT(3) | ||
51 | #define CR_INT_STATUS BIT(8) | ||
52 | |||
53 | #define CR_RESULT_QUEUE 0x1c | ||
54 | #define CR_RSD0 0x40 | ||
55 | #define CR_CORE_REV 0x50 | ||
56 | #define CR_CORE_DES1 0x60 | ||
57 | #define CR_CORE_DES2 0x70 | ||
58 | |||
59 | #define DRIVER_FLAGS_BUSY BIT(0) | ||
60 | #define DRIVER_FLAGS_FINAL BIT(1) | ||
61 | #define DRIVER_FLAGS_DMA_ACTIVE BIT(2) | ||
62 | #define DRIVER_FLAGS_OUTPUT_READY BIT(3) | ||
63 | #define DRIVER_FLAGS_INIT BIT(4) | ||
64 | #define DRIVER_FLAGS_CPU BIT(5) | ||
65 | #define DRIVER_FLAGS_DMA_READY BIT(6) | ||
66 | #define DRIVER_FLAGS_ERROR BIT(7) | ||
67 | #define DRIVER_FLAGS_SG BIT(8) | ||
68 | #define DRIVER_FLAGS_SHA1 BIT(18) | ||
69 | #define DRIVER_FLAGS_SHA224 BIT(19) | ||
70 | #define DRIVER_FLAGS_SHA256 BIT(20) | ||
71 | #define DRIVER_FLAGS_MD5 BIT(21) | ||
72 | |||
73 | #define IMG_HASH_QUEUE_LENGTH 20 | ||
74 | #define IMG_HASH_DMA_THRESHOLD 64 | ||
75 | |||
76 | #ifdef __LITTLE_ENDIAN | ||
77 | #define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_3210 | ||
78 | #else | ||
79 | #define IMG_HASH_BYTE_ORDER CR_CONTROL_BYTE_ORDER_0123 | ||
80 | #endif | ||
81 | |||
82 | struct img_hash_dev; | ||
83 | |||
84 | struct img_hash_request_ctx { | ||
85 | struct img_hash_dev *hdev; | ||
86 | u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); | ||
87 | unsigned long flags; | ||
88 | size_t digsize; | ||
89 | |||
90 | dma_addr_t dma_addr; | ||
91 | size_t dma_ct; | ||
92 | |||
93 | /* sg root */ | ||
94 | struct scatterlist *sgfirst; | ||
95 | /* walk state */ | ||
96 | struct scatterlist *sg; | ||
97 | size_t nents; | ||
98 | size_t offset; | ||
99 | unsigned int total; | ||
100 | size_t sent; | ||
101 | |||
102 | unsigned long op; | ||
103 | |||
104 | size_t bufcnt; | ||
105 | u8 buffer[0] __aligned(sizeof(u32)); | ||
106 | struct ahash_request fallback_req; | ||
107 | }; | ||
108 | |||
109 | struct img_hash_ctx { | ||
110 | struct img_hash_dev *hdev; | ||
111 | unsigned long flags; | ||
112 | struct crypto_ahash *fallback; | ||
113 | }; | ||
114 | |||
115 | struct img_hash_dev { | ||
116 | struct list_head list; | ||
117 | struct device *dev; | ||
118 | struct clk *hash_clk; | ||
119 | struct clk *sys_clk; | ||
120 | void __iomem *io_base; | ||
121 | |||
122 | phys_addr_t bus_addr; | ||
123 | void __iomem *cpu_addr; | ||
124 | |||
125 | spinlock_t lock; | ||
126 | int err; | ||
127 | struct tasklet_struct done_task; | ||
128 | struct tasklet_struct dma_task; | ||
129 | |||
130 | unsigned long flags; | ||
131 | struct crypto_queue queue; | ||
132 | struct ahash_request *req; | ||
133 | |||
134 | struct dma_chan *dma_lch; | ||
135 | }; | ||
136 | |||
137 | struct img_hash_drv { | ||
138 | struct list_head dev_list; | ||
139 | spinlock_t lock; | ||
140 | }; | ||
141 | |||
142 | static struct img_hash_drv img_hash = { | ||
143 | .dev_list = LIST_HEAD_INIT(img_hash.dev_list), | ||
144 | .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock), | ||
145 | }; | ||
146 | |||
147 | static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset) | ||
148 | { | ||
149 | return readl_relaxed(hdev->io_base + offset); | ||
150 | } | ||
151 | |||
152 | static inline void img_hash_write(struct img_hash_dev *hdev, | ||
153 | u32 offset, u32 value) | ||
154 | { | ||
155 | writel_relaxed(value, hdev->io_base + offset); | ||
156 | } | ||
157 | |||
158 | static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev) | ||
159 | { | ||
160 | return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); | ||
161 | } | ||
162 | |||
163 | static void img_hash_start(struct img_hash_dev *hdev, bool dma) | ||
164 | { | ||
165 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
166 | u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT; | ||
167 | |||
168 | if (ctx->flags & DRIVER_FLAGS_MD5) | ||
169 | cr |= CR_CONTROL_ALGO_MD5; | ||
170 | else if (ctx->flags & DRIVER_FLAGS_SHA1) | ||
171 | cr |= CR_CONTROL_ALGO_SHA1; | ||
172 | else if (ctx->flags & DRIVER_FLAGS_SHA224) | ||
173 | cr |= CR_CONTROL_ALGO_SHA224; | ||
174 | else if (ctx->flags & DRIVER_FLAGS_SHA256) | ||
175 | cr |= CR_CONTROL_ALGO_SHA256; | ||
176 | dev_dbg(hdev->dev, "Starting hash process\n"); | ||
177 | img_hash_write(hdev, CR_CONTROL, cr); | ||
178 | |||
179 | /* | ||
180 | * The hardware block requires two cycles between writing the control | ||
181 | * register and writing the first word of data in non DMA mode, to | ||
182 | * ensure the first data write is not grouped in burst with the control | ||
183 | * register write a read is issued to 'flush' the bus. | ||
184 | */ | ||
185 | if (!dma) | ||
186 | img_hash_read(hdev, CR_CONTROL); | ||
187 | } | ||
188 | |||
189 | static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf, | ||
190 | size_t length, int final) | ||
191 | { | ||
192 | u32 count, len32; | ||
193 | const u32 *buffer = (const u32 *)buf; | ||
194 | |||
195 | dev_dbg(hdev->dev, "xmit_cpu: length: %zu bytes\n", length); | ||
196 | |||
197 | if (final) | ||
198 | hdev->flags |= DRIVER_FLAGS_FINAL; | ||
199 | |||
200 | len32 = DIV_ROUND_UP(length, sizeof(u32)); | ||
201 | |||
202 | for (count = 0; count < len32; count++) | ||
203 | writel_relaxed(buffer[count], hdev->cpu_addr); | ||
204 | |||
205 | return -EINPROGRESS; | ||
206 | } | ||
207 | |||
208 | static void img_hash_dma_callback(void *data) | ||
209 | { | ||
210 | struct img_hash_dev *hdev = (struct img_hash_dev *)data; | ||
211 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
212 | |||
213 | if (ctx->bufcnt) { | ||
214 | img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0); | ||
215 | ctx->bufcnt = 0; | ||
216 | } | ||
217 | if (ctx->sg) | ||
218 | tasklet_schedule(&hdev->dma_task); | ||
219 | } | ||
220 | |||
221 | static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg) | ||
222 | { | ||
223 | struct dma_async_tx_descriptor *desc; | ||
224 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
225 | |||
226 | ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); | ||
227 | if (ctx->dma_ct == 0) { | ||
228 | dev_err(hdev->dev, "Invalid DMA sg\n"); | ||
229 | hdev->err = -EINVAL; | ||
230 | return -EINVAL; | ||
231 | } | ||
232 | |||
233 | desc = dmaengine_prep_slave_sg(hdev->dma_lch, | ||
234 | sg, | ||
235 | ctx->dma_ct, | ||
236 | DMA_MEM_TO_DEV, | ||
237 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
238 | if (!desc) { | ||
239 | dev_err(hdev->dev, "Null DMA descriptor\n"); | ||
240 | hdev->err = -EINVAL; | ||
241 | dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV); | ||
242 | return -EINVAL; | ||
243 | } | ||
244 | desc->callback = img_hash_dma_callback; | ||
245 | desc->callback_param = hdev; | ||
246 | dmaengine_submit(desc); | ||
247 | dma_async_issue_pending(hdev->dma_lch); | ||
248 | |||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | static int img_hash_write_via_cpu(struct img_hash_dev *hdev) | ||
253 | { | ||
254 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
255 | |||
256 | ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg), | ||
257 | ctx->buffer, hdev->req->nbytes); | ||
258 | |||
259 | ctx->total = hdev->req->nbytes; | ||
260 | ctx->bufcnt = 0; | ||
261 | |||
262 | hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL); | ||
263 | |||
264 | img_hash_start(hdev, false); | ||
265 | |||
266 | return img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1); | ||
267 | } | ||
268 | |||
269 | static int img_hash_finish(struct ahash_request *req) | ||
270 | { | ||
271 | struct img_hash_request_ctx *ctx = ahash_request_ctx(req); | ||
272 | |||
273 | if (!req->result) | ||
274 | return -EINVAL; | ||
275 | |||
276 | memcpy(req->result, ctx->digest, ctx->digsize); | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | static void img_hash_copy_hash(struct ahash_request *req) | ||
282 | { | ||
283 | struct img_hash_request_ctx *ctx = ahash_request_ctx(req); | ||
284 | u32 *hash = (u32 *)ctx->digest; | ||
285 | int i; | ||
286 | |||
287 | for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--) | ||
288 | hash[i] = img_hash_read_result_queue(ctx->hdev); | ||
289 | } | ||
290 | |||
291 | static void img_hash_finish_req(struct ahash_request *req, int err) | ||
292 | { | ||
293 | struct img_hash_request_ctx *ctx = ahash_request_ctx(req); | ||
294 | struct img_hash_dev *hdev = ctx->hdev; | ||
295 | |||
296 | if (!err) { | ||
297 | img_hash_copy_hash(req); | ||
298 | if (DRIVER_FLAGS_FINAL & hdev->flags) | ||
299 | err = img_hash_finish(req); | ||
300 | } else { | ||
301 | dev_warn(hdev->dev, "Hash failed with error %d\n", err); | ||
302 | ctx->flags |= DRIVER_FLAGS_ERROR; | ||
303 | } | ||
304 | |||
305 | hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY | | ||
306 | DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL); | ||
307 | |||
308 | if (req->base.complete) | ||
309 | req->base.complete(&req->base, err); | ||
310 | } | ||
311 | |||
312 | static int img_hash_write_via_dma(struct img_hash_dev *hdev) | ||
313 | { | ||
314 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
315 | |||
316 | img_hash_start(hdev, true); | ||
317 | |||
318 | dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total); | ||
319 | |||
320 | if (!ctx->total) | ||
321 | hdev->flags |= DRIVER_FLAGS_FINAL; | ||
322 | |||
323 | hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL; | ||
324 | |||
325 | tasklet_schedule(&hdev->dma_task); | ||
326 | |||
327 | return -EINPROGRESS; | ||
328 | } | ||
329 | |||
330 | static int img_hash_dma_init(struct img_hash_dev *hdev) | ||
331 | { | ||
332 | struct dma_slave_config dma_conf; | ||
333 | int err = -EINVAL; | ||
334 | |||
335 | hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx"); | ||
336 | if (!hdev->dma_lch) { | ||
337 | dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n"); | ||
338 | return -EBUSY; | ||
339 | } | ||
340 | dma_conf.direction = DMA_MEM_TO_DEV; | ||
341 | dma_conf.dst_addr = hdev->bus_addr; | ||
342 | dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
343 | dma_conf.dst_maxburst = 16; | ||
344 | dma_conf.device_fc = false; | ||
345 | |||
346 | err = dmaengine_slave_config(hdev->dma_lch, &dma_conf); | ||
347 | if (err) { | ||
348 | dev_err(hdev->dev, "Couldn't configure DMA slave.\n"); | ||
349 | dma_release_channel(hdev->dma_lch); | ||
350 | return err; | ||
351 | } | ||
352 | |||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static void img_hash_dma_task(unsigned long d) | ||
357 | { | ||
358 | struct img_hash_dev *hdev = (struct img_hash_dev *)d; | ||
359 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
360 | u8 *addr; | ||
361 | size_t nbytes, bleft, wsend, len, tbc; | ||
362 | struct scatterlist tsg; | ||
363 | |||
364 | if (!ctx->sg) | ||
365 | return; | ||
366 | |||
367 | addr = sg_virt(ctx->sg); | ||
368 | nbytes = ctx->sg->length - ctx->offset; | ||
369 | |||
370 | /* | ||
371 | * The hash accelerator does not support a data valid mask. This means | ||
372 | * that if each dma (i.e. per page) is not a multiple of 4 bytes, the | ||
373 | * padding bytes in the last word written by that dma would erroneously | ||
374 | * be included in the hash. To avoid this we round down the transfer, | ||
375 | * and add the excess to the start of the next dma. It does not matter | ||
376 | * that the final dma may not be a multiple of 4 bytes as the hashing | ||
377 | * block is programmed to accept the correct number of bytes. | ||
378 | */ | ||
379 | |||
380 | bleft = nbytes % 4; | ||
381 | wsend = (nbytes / 4); | ||
382 | |||
383 | if (wsend) { | ||
384 | sg_init_one(&tsg, addr + ctx->offset, wsend * 4); | ||
385 | if (img_hash_xmit_dma(hdev, &tsg)) { | ||
386 | dev_err(hdev->dev, "DMA failed, falling back to CPU"); | ||
387 | ctx->flags |= DRIVER_FLAGS_CPU; | ||
388 | hdev->err = 0; | ||
389 | img_hash_xmit_cpu(hdev, addr + ctx->offset, | ||
390 | wsend * 4, 0); | ||
391 | ctx->sent += wsend * 4; | ||
392 | wsend = 0; | ||
393 | } else { | ||
394 | ctx->sent += wsend * 4; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | if (bleft) { | ||
399 | ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents, | ||
400 | ctx->buffer, bleft, ctx->sent); | ||
401 | tbc = 0; | ||
402 | ctx->sg = sg_next(ctx->sg); | ||
403 | while (ctx->sg && (ctx->bufcnt < 4)) { | ||
404 | len = ctx->sg->length; | ||
405 | if (likely(len > (4 - ctx->bufcnt))) | ||
406 | len = 4 - ctx->bufcnt; | ||
407 | tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents, | ||
408 | ctx->buffer + ctx->bufcnt, len, | ||
409 | ctx->sent + ctx->bufcnt); | ||
410 | ctx->bufcnt += tbc; | ||
411 | if (tbc >= ctx->sg->length) { | ||
412 | ctx->sg = sg_next(ctx->sg); | ||
413 | tbc = 0; | ||
414 | } | ||
415 | } | ||
416 | |||
417 | ctx->sent += ctx->bufcnt; | ||
418 | ctx->offset = tbc; | ||
419 | |||
420 | if (!wsend) | ||
421 | img_hash_dma_callback(hdev); | ||
422 | } else { | ||
423 | ctx->offset = 0; | ||
424 | ctx->sg = sg_next(ctx->sg); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) | ||
429 | { | ||
430 | struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); | ||
431 | |||
432 | if (ctx->flags & DRIVER_FLAGS_SG) | ||
433 | dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE); | ||
434 | |||
435 | return 0; | ||
436 | } | ||
437 | |||
438 | static int img_hash_process_data(struct img_hash_dev *hdev) | ||
439 | { | ||
440 | struct ahash_request *req = hdev->req; | ||
441 | struct img_hash_request_ctx *ctx = ahash_request_ctx(req); | ||
442 | int err = 0; | ||
443 | |||
444 | ctx->bufcnt = 0; | ||
445 | |||
446 | if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) { | ||
447 | dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n", | ||
448 | req->nbytes); | ||
449 | err = img_hash_write_via_dma(hdev); | ||
450 | } else { | ||
451 | dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n", | ||
452 | req->nbytes); | ||
453 | err = img_hash_write_via_cpu(hdev); | ||
454 | } | ||
455 | return err; | ||
456 | } | ||
457 | |||
458 | static int img_hash_hw_init(struct img_hash_dev *hdev) | ||
459 | { | ||
460 | unsigned long long nbits; | ||
461 | u32 u, l; | ||
462 | |||
463 | img_hash_write(hdev, CR_RESET, CR_RESET_SET); | ||
464 | img_hash_write(hdev, CR_RESET, CR_RESET_UNSET); | ||
465 | img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET); | ||
466 | |||
467 | nbits = (u64)hdev->req->nbytes << 3; | ||
468 | u = nbits >> 32; | ||
469 | l = nbits; | ||
470 | img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u); | ||
471 | img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l); | ||
472 | |||
473 | if (!(DRIVER_FLAGS_INIT & hdev->flags)) { | ||
474 | hdev->flags |= DRIVER_FLAGS_INIT; | ||
475 | hdev->err = 0; | ||
476 | } | ||
477 | dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits); | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | static int img_hash_init(struct ahash_request *req) | ||
482 | { | ||
483 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
484 | struct img_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
485 | struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); | ||
486 | |||
487 | ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); | ||
488 | rctx->fallback_req.base.flags = req->base.flags | ||
489 | & CRYPTO_TFM_REQ_MAY_SLEEP; | ||
490 | |||
491 | return crypto_ahash_init(&rctx->fallback_req); | ||
492 | } | ||
493 | |||
494 | static int img_hash_handle_queue(struct img_hash_dev *hdev, | ||
495 | struct ahash_request *req) | ||
496 | { | ||
497 | struct crypto_async_request *async_req, *backlog; | ||
498 | struct img_hash_request_ctx *ctx; | ||
499 | unsigned long flags; | ||
500 | int err = 0, res = 0; | ||
501 | |||
502 | spin_lock_irqsave(&hdev->lock, flags); | ||
503 | |||
504 | if (req) | ||
505 | res = ahash_enqueue_request(&hdev->queue, req); | ||
506 | |||
507 | if (DRIVER_FLAGS_BUSY & hdev->flags) { | ||
508 | spin_unlock_irqrestore(&hdev->lock, flags); | ||
509 | return res; | ||
510 | } | ||
511 | |||
512 | backlog = crypto_get_backlog(&hdev->queue); | ||
513 | async_req = crypto_dequeue_request(&hdev->queue); | ||
514 | if (async_req) | ||
515 | hdev->flags |= DRIVER_FLAGS_BUSY; | ||
516 | |||
517 | spin_unlock_irqrestore(&hdev->lock, flags); | ||
518 | |||
519 | if (!async_req) | ||
520 | return res; | ||
521 | |||
522 | if (backlog) | ||
523 | backlog->complete(backlog, -EINPROGRESS); | ||
524 | |||
525 | req = ahash_request_cast(async_req); | ||
526 | hdev->req = req; | ||
527 | |||
528 | ctx = ahash_request_ctx(req); | ||
529 | |||
530 | dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n", | ||
531 | ctx->op, req->nbytes); | ||
532 | |||
533 | err = img_hash_hw_init(hdev); | ||
534 | |||
535 | if (!err) | ||
536 | err = img_hash_process_data(hdev); | ||
537 | |||
538 | if (err != -EINPROGRESS) { | ||
539 | /* done_task will not finish so do it here */ | ||
540 | img_hash_finish_req(req, err); | ||
541 | } | ||
542 | return res; | ||
543 | } | ||
544 | |||
545 | static int img_hash_update(struct ahash_request *req) | ||
546 | { | ||
547 | struct img_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
548 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
549 | struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); | ||
550 | |||
551 | ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); | ||
552 | rctx->fallback_req.base.flags = req->base.flags | ||
553 | & CRYPTO_TFM_REQ_MAY_SLEEP; | ||
554 | rctx->fallback_req.nbytes = req->nbytes; | ||
555 | rctx->fallback_req.src = req->src; | ||
556 | |||
557 | return crypto_ahash_update(&rctx->fallback_req); | ||
558 | } | ||
559 | |||
560 | static int img_hash_final(struct ahash_request *req) | ||
561 | { | ||
562 | struct img_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
563 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
564 | struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); | ||
565 | |||
566 | ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); | ||
567 | rctx->fallback_req.base.flags = req->base.flags | ||
568 | & CRYPTO_TFM_REQ_MAY_SLEEP; | ||
569 | rctx->fallback_req.result = req->result; | ||
570 | |||
571 | return crypto_ahash_final(&rctx->fallback_req); | ||
572 | } | ||
573 | |||
574 | static int img_hash_finup(struct ahash_request *req) | ||
575 | { | ||
576 | struct img_hash_request_ctx *rctx = ahash_request_ctx(req); | ||
577 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
578 | struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm); | ||
579 | |||
580 | ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback); | ||
581 | rctx->fallback_req.base.flags = req->base.flags | ||
582 | & CRYPTO_TFM_REQ_MAY_SLEEP; | ||
583 | rctx->fallback_req.nbytes = req->nbytes; | ||
584 | rctx->fallback_req.src = req->src; | ||
585 | rctx->fallback_req.result = req->result; | ||
586 | |||
587 | return crypto_ahash_finup(&rctx->fallback_req); | ||
588 | } | ||
589 | |||
590 | static int img_hash_digest(struct ahash_request *req) | ||
591 | { | ||
592 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
593 | struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm); | ||
594 | struct img_hash_request_ctx *ctx = ahash_request_ctx(req); | ||
595 | struct img_hash_dev *hdev = NULL; | ||
596 | struct img_hash_dev *tmp; | ||
597 | int err; | ||
598 | |||
599 | spin_lock(&img_hash.lock); | ||
600 | if (!tctx->hdev) { | ||
601 | list_for_each_entry(tmp, &img_hash.dev_list, list) { | ||
602 | hdev = tmp; | ||
603 | break; | ||
604 | } | ||
605 | tctx->hdev = hdev; | ||
606 | |||
607 | } else { | ||
608 | hdev = tctx->hdev; | ||
609 | } | ||
610 | |||
611 | spin_unlock(&img_hash.lock); | ||
612 | ctx->hdev = hdev; | ||
613 | ctx->flags = 0; | ||
614 | ctx->digsize = crypto_ahash_digestsize(tfm); | ||
615 | |||
616 | switch (ctx->digsize) { | ||
617 | case SHA1_DIGEST_SIZE: | ||
618 | ctx->flags |= DRIVER_FLAGS_SHA1; | ||
619 | break; | ||
620 | case SHA256_DIGEST_SIZE: | ||
621 | ctx->flags |= DRIVER_FLAGS_SHA256; | ||
622 | break; | ||
623 | case SHA224_DIGEST_SIZE: | ||
624 | ctx->flags |= DRIVER_FLAGS_SHA224; | ||
625 | break; | ||
626 | case MD5_DIGEST_SIZE: | ||
627 | ctx->flags |= DRIVER_FLAGS_MD5; | ||
628 | break; | ||
629 | default: | ||
630 | return -EINVAL; | ||
631 | } | ||
632 | |||
633 | ctx->bufcnt = 0; | ||
634 | ctx->offset = 0; | ||
635 | ctx->sent = 0; | ||
636 | ctx->total = req->nbytes; | ||
637 | ctx->sg = req->src; | ||
638 | ctx->sgfirst = req->src; | ||
639 | ctx->nents = sg_nents(ctx->sg); | ||
640 | |||
641 | err = img_hash_handle_queue(tctx->hdev, req); | ||
642 | |||
643 | return err; | ||
644 | } | ||
645 | |||
646 | static int img_hash_cra_init(struct crypto_tfm *tfm) | ||
647 | { | ||
648 | struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm); | ||
649 | const char *alg_name = crypto_tfm_alg_name(tfm); | ||
650 | int err = -ENOMEM; | ||
651 | |||
652 | ctx->fallback = crypto_alloc_ahash(alg_name, 0, | ||
653 | CRYPTO_ALG_NEED_FALLBACK); | ||
654 | if (IS_ERR(ctx->fallback)) { | ||
655 | pr_err("img_hash: Could not load fallback driver.\n"); | ||
656 | err = PTR_ERR(ctx->fallback); | ||
657 | goto err; | ||
658 | } | ||
659 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
660 | sizeof(struct img_hash_request_ctx) + | ||
661 | IMG_HASH_DMA_THRESHOLD); | ||
662 | |||
663 | return 0; | ||
664 | |||
665 | err: | ||
666 | return err; | ||
667 | } | ||
668 | |||
669 | static void img_hash_cra_exit(struct crypto_tfm *tfm) | ||
670 | { | ||
671 | struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm); | ||
672 | |||
673 | crypto_free_ahash(tctx->fallback); | ||
674 | } | ||
675 | |||
676 | static irqreturn_t img_irq_handler(int irq, void *dev_id) | ||
677 | { | ||
678 | struct img_hash_dev *hdev = dev_id; | ||
679 | u32 reg; | ||
680 | |||
681 | reg = img_hash_read(hdev, CR_INTSTAT); | ||
682 | img_hash_write(hdev, CR_INTCLEAR, reg); | ||
683 | |||
684 | if (reg & CR_INT_NEW_RESULTS_SET) { | ||
685 | dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n"); | ||
686 | if (DRIVER_FLAGS_BUSY & hdev->flags) { | ||
687 | hdev->flags |= DRIVER_FLAGS_OUTPUT_READY; | ||
688 | if (!(DRIVER_FLAGS_CPU & hdev->flags)) | ||
689 | hdev->flags |= DRIVER_FLAGS_DMA_READY; | ||
690 | tasklet_schedule(&hdev->done_task); | ||
691 | } else { | ||
692 | dev_warn(hdev->dev, | ||
693 | "HASH interrupt when no active requests.\n"); | ||
694 | } | ||
695 | } else if (reg & CR_INT_RESULTS_AVAILABLE) { | ||
696 | dev_warn(hdev->dev, | ||
697 | "IRQ triggered before the hash had completed\n"); | ||
698 | } else if (reg & CR_INT_RESULT_READ_ERR) { | ||
699 | dev_warn(hdev->dev, | ||
700 | "Attempt to read from an empty result queue\n"); | ||
701 | } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) { | ||
702 | dev_warn(hdev->dev, | ||
703 | "Data written before the hardware was configured\n"); | ||
704 | } | ||
705 | return IRQ_HANDLED; | ||
706 | } | ||
707 | |||
708 | static struct ahash_alg img_algs[] = { | ||
709 | { | ||
710 | .init = img_hash_init, | ||
711 | .update = img_hash_update, | ||
712 | .final = img_hash_final, | ||
713 | .finup = img_hash_finup, | ||
714 | .digest = img_hash_digest, | ||
715 | .halg = { | ||
716 | .digestsize = MD5_DIGEST_SIZE, | ||
717 | .base = { | ||
718 | .cra_name = "md5", | ||
719 | .cra_driver_name = "img-md5", | ||
720 | .cra_priority = 300, | ||
721 | .cra_flags = | ||
722 | CRYPTO_ALG_ASYNC | | ||
723 | CRYPTO_ALG_NEED_FALLBACK, | ||
724 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
725 | .cra_ctxsize = sizeof(struct img_hash_ctx), | ||
726 | .cra_init = img_hash_cra_init, | ||
727 | .cra_exit = img_hash_cra_exit, | ||
728 | .cra_module = THIS_MODULE, | ||
729 | } | ||
730 | } | ||
731 | }, | ||
732 | { | ||
733 | .init = img_hash_init, | ||
734 | .update = img_hash_update, | ||
735 | .final = img_hash_final, | ||
736 | .finup = img_hash_finup, | ||
737 | .digest = img_hash_digest, | ||
738 | .halg = { | ||
739 | .digestsize = SHA1_DIGEST_SIZE, | ||
740 | .base = { | ||
741 | .cra_name = "sha1", | ||
742 | .cra_driver_name = "img-sha1", | ||
743 | .cra_priority = 300, | ||
744 | .cra_flags = | ||
745 | CRYPTO_ALG_ASYNC | | ||
746 | CRYPTO_ALG_NEED_FALLBACK, | ||
747 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
748 | .cra_ctxsize = sizeof(struct img_hash_ctx), | ||
749 | .cra_init = img_hash_cra_init, | ||
750 | .cra_exit = img_hash_cra_exit, | ||
751 | .cra_module = THIS_MODULE, | ||
752 | } | ||
753 | } | ||
754 | }, | ||
755 | { | ||
756 | .init = img_hash_init, | ||
757 | .update = img_hash_update, | ||
758 | .final = img_hash_final, | ||
759 | .finup = img_hash_finup, | ||
760 | .digest = img_hash_digest, | ||
761 | .halg = { | ||
762 | .digestsize = SHA224_DIGEST_SIZE, | ||
763 | .base = { | ||
764 | .cra_name = "sha224", | ||
765 | .cra_driver_name = "img-sha224", | ||
766 | .cra_priority = 300, | ||
767 | .cra_flags = | ||
768 | CRYPTO_ALG_ASYNC | | ||
769 | CRYPTO_ALG_NEED_FALLBACK, | ||
770 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
771 | .cra_ctxsize = sizeof(struct img_hash_ctx), | ||
772 | .cra_init = img_hash_cra_init, | ||
773 | .cra_exit = img_hash_cra_exit, | ||
774 | .cra_module = THIS_MODULE, | ||
775 | } | ||
776 | } | ||
777 | }, | ||
778 | { | ||
779 | .init = img_hash_init, | ||
780 | .update = img_hash_update, | ||
781 | .final = img_hash_final, | ||
782 | .finup = img_hash_finup, | ||
783 | .digest = img_hash_digest, | ||
784 | .halg = { | ||
785 | .digestsize = SHA256_DIGEST_SIZE, | ||
786 | .base = { | ||
787 | .cra_name = "sha256", | ||
788 | .cra_driver_name = "img-sha256", | ||
789 | .cra_priority = 300, | ||
790 | .cra_flags = | ||
791 | CRYPTO_ALG_ASYNC | | ||
792 | CRYPTO_ALG_NEED_FALLBACK, | ||
793 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
794 | .cra_ctxsize = sizeof(struct img_hash_ctx), | ||
795 | .cra_init = img_hash_cra_init, | ||
796 | .cra_exit = img_hash_cra_exit, | ||
797 | .cra_module = THIS_MODULE, | ||
798 | } | ||
799 | } | ||
800 | } | ||
801 | }; | ||
802 | |||
803 | static int img_register_algs(struct img_hash_dev *hdev) | ||
804 | { | ||
805 | int i, err; | ||
806 | |||
807 | for (i = 0; i < ARRAY_SIZE(img_algs); i++) { | ||
808 | err = crypto_register_ahash(&img_algs[i]); | ||
809 | if (err) | ||
810 | goto err_reg; | ||
811 | } | ||
812 | return 0; | ||
813 | |||
814 | err_reg: | ||
815 | for (; i--; ) | ||
816 | crypto_unregister_ahash(&img_algs[i]); | ||
817 | |||
818 | return err; | ||
819 | } | ||
820 | |||
821 | static int img_unregister_algs(struct img_hash_dev *hdev) | ||
822 | { | ||
823 | int i; | ||
824 | |||
825 | for (i = 0; i < ARRAY_SIZE(img_algs); i++) | ||
826 | crypto_unregister_ahash(&img_algs[i]); | ||
827 | return 0; | ||
828 | } | ||
829 | |||
830 | static void img_hash_done_task(unsigned long data) | ||
831 | { | ||
832 | struct img_hash_dev *hdev = (struct img_hash_dev *)data; | ||
833 | int err = 0; | ||
834 | |||
835 | if (hdev->err == -EINVAL) { | ||
836 | err = hdev->err; | ||
837 | goto finish; | ||
838 | } | ||
839 | |||
840 | if (!(DRIVER_FLAGS_BUSY & hdev->flags)) { | ||
841 | img_hash_handle_queue(hdev, NULL); | ||
842 | return; | ||
843 | } | ||
844 | |||
845 | if (DRIVER_FLAGS_CPU & hdev->flags) { | ||
846 | if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) { | ||
847 | hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY; | ||
848 | goto finish; | ||
849 | } | ||
850 | } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) { | ||
851 | if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) { | ||
852 | hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE; | ||
853 | img_hash_write_via_dma_stop(hdev); | ||
854 | if (hdev->err) { | ||
855 | err = hdev->err; | ||
856 | goto finish; | ||
857 | } | ||
858 | } | ||
859 | if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) { | ||
860 | hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | | ||
861 | DRIVER_FLAGS_OUTPUT_READY); | ||
862 | goto finish; | ||
863 | } | ||
864 | } | ||
865 | return; | ||
866 | |||
867 | finish: | ||
868 | img_hash_finish_req(hdev->req, err); | ||
869 | } | ||
870 | |||
871 | static const struct of_device_id img_hash_match[] = { | ||
872 | { .compatible = "img,hash-accelerator" }, | ||
873 | {} | ||
874 | }; | ||
875 | MODULE_DEVICE_TABLE(of, img_hash_match); | ||
876 | |||
877 | static int img_hash_probe(struct platform_device *pdev) | ||
878 | { | ||
879 | struct img_hash_dev *hdev; | ||
880 | struct device *dev = &pdev->dev; | ||
881 | struct resource *hash_res; | ||
882 | int irq; | ||
883 | int err; | ||
884 | |||
885 | hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL); | ||
886 | if (hdev == NULL) | ||
887 | return -ENOMEM; | ||
888 | |||
889 | spin_lock_init(&hdev->lock); | ||
890 | |||
891 | hdev->dev = dev; | ||
892 | |||
893 | platform_set_drvdata(pdev, hdev); | ||
894 | |||
895 | INIT_LIST_HEAD(&hdev->list); | ||
896 | |||
897 | tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev); | ||
898 | tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev); | ||
899 | |||
900 | crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH); | ||
901 | |||
902 | /* Register bank */ | ||
903 | hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
904 | |||
905 | hdev->io_base = devm_ioremap_resource(dev, hash_res); | ||
906 | if (IS_ERR(hdev->io_base)) { | ||
907 | err = PTR_ERR(hdev->io_base); | ||
908 | dev_err(dev, "can't ioremap, returned %d\n", err); | ||
909 | |||
910 | goto res_err; | ||
911 | } | ||
912 | |||
913 | /* Write port (DMA or CPU) */ | ||
914 | hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); | ||
915 | hdev->cpu_addr = devm_ioremap_resource(dev, hash_res); | ||
916 | if (IS_ERR(hdev->cpu_addr)) { | ||
917 | dev_err(dev, "can't ioremap write port\n"); | ||
918 | err = PTR_ERR(hdev->cpu_addr); | ||
919 | goto res_err; | ||
920 | } | ||
921 | hdev->bus_addr = hash_res->start; | ||
922 | |||
923 | irq = platform_get_irq(pdev, 0); | ||
924 | if (irq < 0) { | ||
925 | dev_err(dev, "no IRQ resource info\n"); | ||
926 | err = irq; | ||
927 | goto res_err; | ||
928 | } | ||
929 | |||
930 | err = devm_request_irq(dev, irq, img_irq_handler, 0, | ||
931 | dev_name(dev), hdev); | ||
932 | if (err) { | ||
933 | dev_err(dev, "unable to request irq\n"); | ||
934 | goto res_err; | ||
935 | } | ||
936 | dev_dbg(dev, "using IRQ channel %d\n", irq); | ||
937 | |||
938 | hdev->hash_clk = devm_clk_get(&pdev->dev, "hash"); | ||
939 | if (IS_ERR(hdev->hash_clk)) { | ||
940 | dev_err(dev, "clock initialization failed.\n"); | ||
941 | err = PTR_ERR(hdev->hash_clk); | ||
942 | goto res_err; | ||
943 | } | ||
944 | |||
945 | hdev->sys_clk = devm_clk_get(&pdev->dev, "sys"); | ||
946 | if (IS_ERR(hdev->sys_clk)) { | ||
947 | dev_err(dev, "clock initialization failed.\n"); | ||
948 | err = PTR_ERR(hdev->sys_clk); | ||
949 | goto res_err; | ||
950 | } | ||
951 | |||
952 | err = clk_prepare_enable(hdev->hash_clk); | ||
953 | if (err) | ||
954 | goto res_err; | ||
955 | |||
956 | err = clk_prepare_enable(hdev->sys_clk); | ||
957 | if (err) | ||
958 | goto clk_err; | ||
959 | |||
960 | err = img_hash_dma_init(hdev); | ||
961 | if (err) | ||
962 | goto dma_err; | ||
963 | |||
964 | dev_dbg(dev, "using %s for DMA transfers\n", | ||
965 | dma_chan_name(hdev->dma_lch)); | ||
966 | |||
967 | spin_lock(&img_hash.lock); | ||
968 | list_add_tail(&hdev->list, &img_hash.dev_list); | ||
969 | spin_unlock(&img_hash.lock); | ||
970 | |||
971 | err = img_register_algs(hdev); | ||
972 | if (err) | ||
973 | goto err_algs; | ||
974 | dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n"); | ||
975 | |||
976 | return 0; | ||
977 | |||
978 | err_algs: | ||
979 | spin_lock(&img_hash.lock); | ||
980 | list_del(&hdev->list); | ||
981 | spin_unlock(&img_hash.lock); | ||
982 | dma_release_channel(hdev->dma_lch); | ||
983 | dma_err: | ||
984 | clk_disable_unprepare(hdev->sys_clk); | ||
985 | clk_err: | ||
986 | clk_disable_unprepare(hdev->hash_clk); | ||
987 | res_err: | ||
988 | tasklet_kill(&hdev->done_task); | ||
989 | tasklet_kill(&hdev->dma_task); | ||
990 | |||
991 | return err; | ||
992 | } | ||
993 | |||
994 | static int img_hash_remove(struct platform_device *pdev) | ||
995 | { | ||
996 | static struct img_hash_dev *hdev; | ||
997 | |||
998 | hdev = platform_get_drvdata(pdev); | ||
999 | spin_lock(&img_hash.lock); | ||
1000 | list_del(&hdev->list); | ||
1001 | spin_unlock(&img_hash.lock); | ||
1002 | |||
1003 | img_unregister_algs(hdev); | ||
1004 | |||
1005 | tasklet_kill(&hdev->done_task); | ||
1006 | tasklet_kill(&hdev->dma_task); | ||
1007 | |||
1008 | dma_release_channel(hdev->dma_lch); | ||
1009 | |||
1010 | clk_disable_unprepare(hdev->hash_clk); | ||
1011 | clk_disable_unprepare(hdev->sys_clk); | ||
1012 | |||
1013 | return 0; | ||
1014 | } | ||
1015 | |||
1016 | static struct platform_driver img_hash_driver = { | ||
1017 | .probe = img_hash_probe, | ||
1018 | .remove = img_hash_remove, | ||
1019 | .driver = { | ||
1020 | .name = "img-hash-accelerator", | ||
1021 | .of_match_table = of_match_ptr(img_hash_match), | ||
1022 | } | ||
1023 | }; | ||
1024 | module_platform_driver(img_hash_driver); | ||
1025 | |||
1026 | MODULE_LICENSE("GPL v2"); | ||
1027 | MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver"); | ||
1028 | MODULE_AUTHOR("Will Thomas."); | ||
1029 | MODULE_AUTHOR("James Hartley <james.hartley@imgtec.com>"); | ||
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 829d6394fb33..59ed54e464a9 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c | |||
@@ -153,7 +153,7 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) | |||
153 | struct dcp *sdcp = global_sdcp; | 153 | struct dcp *sdcp = global_sdcp; |
154 | const int chan = actx->chan; | 154 | const int chan = actx->chan; |
155 | uint32_t stat; | 155 | uint32_t stat; |
156 | int ret; | 156 | unsigned long ret; |
157 | struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; | 157 | struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; |
158 | 158 | ||
159 | dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), | 159 | dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), |
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 42f95a4326b0..9a28b7e07c71 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c | |||
@@ -554,15 +554,23 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) | |||
554 | return err; | 554 | return err; |
555 | } | 555 | } |
556 | 556 | ||
557 | static int omap_aes_check_aligned(struct scatterlist *sg) | 557 | static int omap_aes_check_aligned(struct scatterlist *sg, int total) |
558 | { | 558 | { |
559 | int len = 0; | ||
560 | |||
559 | while (sg) { | 561 | while (sg) { |
560 | if (!IS_ALIGNED(sg->offset, 4)) | 562 | if (!IS_ALIGNED(sg->offset, 4)) |
561 | return -1; | 563 | return -1; |
562 | if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) | 564 | if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE)) |
563 | return -1; | 565 | return -1; |
566 | |||
567 | len += sg->length; | ||
564 | sg = sg_next(sg); | 568 | sg = sg_next(sg); |
565 | } | 569 | } |
570 | |||
571 | if (len != total) | ||
572 | return -1; | ||
573 | |||
566 | return 0; | 574 | return 0; |
567 | } | 575 | } |
568 | 576 | ||
@@ -633,8 +641,8 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd, | |||
633 | dd->in_sg = req->src; | 641 | dd->in_sg = req->src; |
634 | dd->out_sg = req->dst; | 642 | dd->out_sg = req->dst; |
635 | 643 | ||
636 | if (omap_aes_check_aligned(dd->in_sg) || | 644 | if (omap_aes_check_aligned(dd->in_sg, dd->total) || |
637 | omap_aes_check_aligned(dd->out_sg)) { | 645 | omap_aes_check_aligned(dd->out_sg, dd->total)) { |
638 | if (omap_aes_copy_sgs(dd)) | 646 | if (omap_aes_copy_sgs(dd)) |
639 | pr_err("Failed to copy SGs for unaligned cases\n"); | 647 | pr_err("Failed to copy SGs for unaligned cases\n"); |
640 | dd->sgs_copied = 1; | 648 | dd->sgs_copied = 1; |
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3c76696ee578..4d63e0d4da9a 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c | |||
@@ -640,6 +640,7 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) | |||
640 | 640 | ||
641 | while (ctx->sg) { | 641 | while (ctx->sg) { |
642 | vaddr = kmap_atomic(sg_page(ctx->sg)); | 642 | vaddr = kmap_atomic(sg_page(ctx->sg)); |
643 | vaddr += ctx->sg->offset; | ||
643 | 644 | ||
644 | count = omap_sham_append_buffer(ctx, | 645 | count = omap_sham_append_buffer(ctx, |
645 | vaddr + ctx->offset, | 646 | vaddr + ctx->offset, |
@@ -1945,6 +1946,7 @@ static int omap_sham_probe(struct platform_device *pdev) | |||
1945 | dd->flags |= dd->pdata->flags; | 1946 | dd->flags |= dd->pdata->flags; |
1946 | 1947 | ||
1947 | pm_runtime_enable(dev); | 1948 | pm_runtime_enable(dev); |
1949 | pm_runtime_irq_safe(dev); | ||
1948 | pm_runtime_get_sync(dev); | 1950 | pm_runtime_get_sync(dev); |
1949 | rev = omap_sham_read(dd, SHA_REG_REV(dd)); | 1951 | rev = omap_sham_read(dd, SHA_REG_REV(dd)); |
1950 | pm_runtime_put_sync(&pdev->dev); | 1952 | pm_runtime_put_sync(&pdev->dev); |
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 19c0efa29ab3..f22ce7169fa5 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/io.h> | 52 | #include <linux/io.h> |
53 | #include "adf_cfg_common.h" | 53 | #include "adf_cfg_common.h" |
54 | 54 | ||
55 | #define PCI_VENDOR_ID_INTEL 0x8086 | ||
56 | #define ADF_DH895XCC_DEVICE_NAME "dh895xcc" | 55 | #define ADF_DH895XCC_DEVICE_NAME "dh895xcc" |
57 | #define ADF_DH895XCC_PCI_DEVICE_ID 0x435 | 56 | #define ADF_DH895XCC_PCI_DEVICE_ID 0x435 |
58 | #define ADF_PCI_MAX_BARS 3 | 57 | #define ADF_PCI_MAX_BARS 3 |
diff --git a/drivers/crypto/qat/qat_common/adf_accel_engine.c b/drivers/crypto/qat/qat_common/adf_accel_engine.c index c77453b900a3..7f8b66c915ed 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/qat/qat_common/adf_accel_engine.c | |||
@@ -60,36 +60,40 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev) | |||
60 | 60 | ||
61 | if (request_firmware(&loader_data->uof_fw, hw_device->fw_name, | 61 | if (request_firmware(&loader_data->uof_fw, hw_device->fw_name, |
62 | &accel_dev->accel_pci_dev.pci_dev->dev)) { | 62 | &accel_dev->accel_pci_dev.pci_dev->dev)) { |
63 | pr_err("QAT: Failed to load firmware %s\n", hw_device->fw_name); | 63 | dev_err(&GET_DEV(accel_dev), "Failed to load firmware %s\n", |
64 | hw_device->fw_name); | ||
64 | return -EFAULT; | 65 | return -EFAULT; |
65 | } | 66 | } |
66 | 67 | ||
67 | uof_size = loader_data->uof_fw->size; | 68 | uof_size = loader_data->uof_fw->size; |
68 | uof_addr = (void *)loader_data->uof_fw->data; | 69 | uof_addr = (void *)loader_data->uof_fw->data; |
69 | if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) { | 70 | if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) { |
70 | pr_err("QAT: Failed to map UOF\n"); | 71 | dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n"); |
71 | goto out_err; | 72 | goto out_err; |
72 | } | 73 | } |
73 | if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) { | 74 | if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) { |
74 | pr_err("QAT: Failed to map UOF\n"); | 75 | dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n"); |
75 | goto out_err; | 76 | goto out_err; |
76 | } | 77 | } |
77 | return 0; | 78 | return 0; |
78 | 79 | ||
79 | out_err: | 80 | out_err: |
80 | release_firmware(loader_data->uof_fw); | 81 | adf_ae_fw_release(accel_dev); |
81 | return -EFAULT; | 82 | return -EFAULT; |
82 | } | 83 | } |
83 | 84 | ||
84 | int adf_ae_fw_release(struct adf_accel_dev *accel_dev) | 85 | void adf_ae_fw_release(struct adf_accel_dev *accel_dev) |
85 | { | 86 | { |
86 | struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; | 87 | struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; |
87 | 88 | ||
88 | release_firmware(loader_data->uof_fw); | ||
89 | qat_uclo_del_uof_obj(loader_data->fw_loader); | 89 | qat_uclo_del_uof_obj(loader_data->fw_loader); |
90 | qat_hal_deinit(loader_data->fw_loader); | 90 | qat_hal_deinit(loader_data->fw_loader); |
91 | |||
92 | if (loader_data->uof_fw) | ||
93 | release_firmware(loader_data->uof_fw); | ||
94 | |||
95 | loader_data->uof_fw = NULL; | ||
91 | loader_data->fw_loader = NULL; | 96 | loader_data->fw_loader = NULL; |
92 | return 0; | ||
93 | } | 97 | } |
94 | 98 | ||
95 | int adf_ae_start(struct adf_accel_dev *accel_dev) | 99 | int adf_ae_start(struct adf_accel_dev *accel_dev) |
@@ -104,8 +108,9 @@ int adf_ae_start(struct adf_accel_dev *accel_dev) | |||
104 | ae_ctr++; | 108 | ae_ctr++; |
105 | } | 109 | } |
106 | } | 110 | } |
107 | pr_info("QAT: qat_dev%d started %d acceleration engines\n", | 111 | dev_info(&GET_DEV(accel_dev), |
108 | accel_dev->accel_id, ae_ctr); | 112 | "qat_dev%d started %d acceleration engines\n", |
113 | accel_dev->accel_id, ae_ctr); | ||
109 | return 0; | 114 | return 0; |
110 | } | 115 | } |
111 | 116 | ||
@@ -121,8 +126,9 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev) | |||
121 | ae_ctr++; | 126 | ae_ctr++; |
122 | } | 127 | } |
123 | } | 128 | } |
124 | pr_info("QAT: qat_dev%d stopped %d acceleration engines\n", | 129 | dev_info(&GET_DEV(accel_dev), |
125 | accel_dev->accel_id, ae_ctr); | 130 | "qat_dev%d stopped %d acceleration engines\n", |
131 | accel_dev->accel_id, ae_ctr); | ||
126 | return 0; | 132 | return 0; |
127 | } | 133 | } |
128 | 134 | ||
@@ -147,12 +153,12 @@ int adf_ae_init(struct adf_accel_dev *accel_dev) | |||
147 | 153 | ||
148 | accel_dev->fw_loader = loader_data; | 154 | accel_dev->fw_loader = loader_data; |
149 | if (qat_hal_init(accel_dev)) { | 155 | if (qat_hal_init(accel_dev)) { |
150 | pr_err("QAT: Failed to init the AEs\n"); | 156 | dev_err(&GET_DEV(accel_dev), "Failed to init the AEs\n"); |
151 | kfree(loader_data); | 157 | kfree(loader_data); |
152 | return -EFAULT; | 158 | return -EFAULT; |
153 | } | 159 | } |
154 | if (adf_ae_reset(accel_dev, 0)) { | 160 | if (adf_ae_reset(accel_dev, 0)) { |
155 | pr_err("QAT: Failed to reset the AEs\n"); | 161 | dev_err(&GET_DEV(accel_dev), "Failed to reset the AEs\n"); |
156 | qat_hal_deinit(loader_data->fw_loader); | 162 | qat_hal_deinit(loader_data->fw_loader); |
157 | kfree(loader_data); | 163 | kfree(loader_data); |
158 | return -EFAULT; | 164 | return -EFAULT; |
@@ -162,6 +168,9 @@ int adf_ae_init(struct adf_accel_dev *accel_dev) | |||
162 | 168 | ||
163 | int adf_ae_shutdown(struct adf_accel_dev *accel_dev) | 169 | int adf_ae_shutdown(struct adf_accel_dev *accel_dev) |
164 | { | 170 | { |
171 | struct adf_fw_loader_data *loader_data = accel_dev->fw_loader; | ||
172 | |||
173 | qat_hal_deinit(loader_data->fw_loader); | ||
165 | kfree(accel_dev->fw_loader); | 174 | kfree(accel_dev->fw_loader); |
166 | accel_dev->fw_loader = NULL; | 175 | accel_dev->fw_loader = NULL; |
167 | return 0; | 176 | return 0; |
diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index fa1fef824de2..2dbc733b8ab2 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c | |||
@@ -60,14 +60,14 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, | |||
60 | { | 60 | { |
61 | struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); | 61 | struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); |
62 | 62 | ||
63 | pr_info("QAT: Acceleration driver hardware error detected.\n"); | 63 | dev_info(&pdev->dev, "Acceleration driver hardware error detected.\n"); |
64 | if (!accel_dev) { | 64 | if (!accel_dev) { |
65 | pr_err("QAT: Can't find acceleration device\n"); | 65 | dev_err(&pdev->dev, "Can't find acceleration device\n"); |
66 | return PCI_ERS_RESULT_DISCONNECT; | 66 | return PCI_ERS_RESULT_DISCONNECT; |
67 | } | 67 | } |
68 | 68 | ||
69 | if (state == pci_channel_io_perm_failure) { | 69 | if (state == pci_channel_io_perm_failure) { |
70 | pr_err("QAT: Can't recover from device error\n"); | 70 | dev_err(&pdev->dev, "Can't recover from device error\n"); |
71 | return PCI_ERS_RESULT_DISCONNECT; | 71 | return PCI_ERS_RESULT_DISCONNECT; |
72 | } | 72 | } |
73 | 73 | ||
@@ -88,10 +88,12 @@ static void adf_dev_restore(struct adf_accel_dev *accel_dev) | |||
88 | struct pci_dev *parent = pdev->bus->self; | 88 | struct pci_dev *parent = pdev->bus->self; |
89 | uint16_t bridge_ctl = 0; | 89 | uint16_t bridge_ctl = 0; |
90 | 90 | ||
91 | pr_info("QAT: Resetting device qat_dev%d\n", accel_dev->accel_id); | 91 | dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", |
92 | accel_dev->accel_id); | ||
92 | 93 | ||
93 | if (!pci_wait_for_pending_transaction(pdev)) | 94 | if (!pci_wait_for_pending_transaction(pdev)) |
94 | pr_info("QAT: Transaction still in progress. Proceeding\n"); | 95 | dev_info(&GET_DEV(accel_dev), |
96 | "Transaction still in progress. Proceeding\n"); | ||
95 | 97 | ||
96 | pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); | 98 | pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); |
97 | bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; | 99 | bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; |
@@ -158,7 +160,8 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, | |||
158 | unsigned long timeout = wait_for_completion_timeout( | 160 | unsigned long timeout = wait_for_completion_timeout( |
159 | &reset_data->compl, wait_jiffies); | 161 | &reset_data->compl, wait_jiffies); |
160 | if (!timeout) { | 162 | if (!timeout) { |
161 | pr_err("QAT: Reset device timeout expired\n"); | 163 | dev_err(&GET_DEV(accel_dev), |
164 | "Reset device timeout expired\n"); | ||
162 | ret = -EFAULT; | 165 | ret = -EFAULT; |
163 | } | 166 | } |
164 | kfree(reset_data); | 167 | kfree(reset_data); |
@@ -184,8 +187,8 @@ static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) | |||
184 | 187 | ||
185 | static void adf_resume(struct pci_dev *pdev) | 188 | static void adf_resume(struct pci_dev *pdev) |
186 | { | 189 | { |
187 | pr_info("QAT: Acceleration driver reset completed\n"); | 190 | dev_info(&pdev->dev, "Acceleration driver reset completed\n"); |
188 | pr_info("QAT: Device is up and runnig\n"); | 191 | dev_info(&pdev->dev, "Device is up and runnig\n"); |
189 | } | 192 | } |
190 | 193 | ||
191 | static struct pci_error_handlers adf_err_handler = { | 194 | static struct pci_error_handlers adf_err_handler = { |
@@ -236,7 +239,7 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); | |||
236 | int adf_init_aer(void) | 239 | int adf_init_aer(void) |
237 | { | 240 | { |
238 | device_reset_wq = create_workqueue("qat_device_reset_wq"); | 241 | device_reset_wq = create_workqueue("qat_device_reset_wq"); |
239 | return (device_reset_wq == NULL) ? -EFAULT : 0; | 242 | return !device_reset_wq ? -EFAULT : 0; |
240 | } | 243 | } |
241 | 244 | ||
242 | void adf_exit_aer(void) | 245 | void adf_exit_aer(void) |
diff --git a/drivers/crypto/qat/qat_common/adf_cfg.c b/drivers/crypto/qat/qat_common/adf_cfg.c index de16da9070a5..ab65bc274561 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg.c +++ b/drivers/crypto/qat/qat_common/adf_cfg.c | |||
@@ -142,7 +142,8 @@ int adf_cfg_dev_add(struct adf_accel_dev *accel_dev) | |||
142 | dev_cfg_data, | 142 | dev_cfg_data, |
143 | &qat_dev_cfg_fops); | 143 | &qat_dev_cfg_fops); |
144 | if (!dev_cfg_data->debug) { | 144 | if (!dev_cfg_data->debug) { |
145 | pr_err("QAT: Failed to create qat cfg debugfs entry.\n"); | 145 | dev_err(&GET_DEV(accel_dev), |
146 | "Failed to create qat cfg debugfs entry.\n"); | ||
146 | kfree(dev_cfg_data); | 147 | kfree(dev_cfg_data); |
147 | accel_dev->cfg = NULL; | 148 | accel_dev->cfg = NULL; |
148 | return -EFAULT; | 149 | return -EFAULT; |
@@ -305,7 +306,7 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev, | |||
305 | snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES, | 306 | snprintf(key_val->val, ADF_CFG_MAX_VAL_LEN_IN_BYTES, |
306 | "0x%lx", (unsigned long)val); | 307 | "0x%lx", (unsigned long)val); |
307 | } else { | 308 | } else { |
308 | pr_err("QAT: Unknown type given.\n"); | 309 | dev_err(&GET_DEV(accel_dev), "Unknown type given.\n"); |
309 | kfree(key_val); | 310 | kfree(key_val); |
310 | return -1; | 311 | return -1; |
311 | } | 312 | } |
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index c7ac758ebc90..13575111382c 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h | |||
@@ -59,7 +59,7 @@ | |||
59 | #define ADF_RING_SYM_TX "RingSymTx" | 59 | #define ADF_RING_SYM_TX "RingSymTx" |
60 | #define ADF_RING_RND_TX "RingNrbgTx" | 60 | #define ADF_RING_RND_TX "RingNrbgTx" |
61 | #define ADF_RING_ASYM_RX "RingAsymRx" | 61 | #define ADF_RING_ASYM_RX "RingAsymRx" |
62 | #define ADF_RING_SYM_RX "RinSymRx" | 62 | #define ADF_RING_SYM_RX "RingSymRx" |
63 | #define ADF_RING_RND_RX "RingNrbgRx" | 63 | #define ADF_RING_RND_RX "RingNrbgRx" |
64 | #define ADF_RING_DC_TX "RingTx" | 64 | #define ADF_RING_DC_TX "RingTx" |
65 | #define ADF_RING_DC_RX "RingRx" | 65 | #define ADF_RING_DC_RX "RingRx" |
@@ -69,15 +69,15 @@ | |||
69 | #define ADF_DC "Dc" | 69 | #define ADF_DC "Dc" |
70 | #define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled" | 70 | #define ADF_ETRMGR_COALESCING_ENABLED "InterruptCoalescingEnabled" |
71 | #define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \ | 71 | #define ADF_ETRMGR_COALESCING_ENABLED_FORMAT \ |
72 | ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_ENABLED | 72 | ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_ENABLED |
73 | #define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs" | 73 | #define ADF_ETRMGR_COALESCE_TIMER "InterruptCoalescingTimerNs" |
74 | #define ADF_ETRMGR_COALESCE_TIMER_FORMAT \ | 74 | #define ADF_ETRMGR_COALESCE_TIMER_FORMAT \ |
75 | ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCE_TIMER | 75 | ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCE_TIMER |
76 | #define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses" | 76 | #define ADF_ETRMGR_COALESCING_MSG_ENABLED "InterruptCoalescingNumResponses" |
77 | #define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \ | 77 | #define ADF_ETRMGR_COALESCING_MSG_ENABLED_FORMAT \ |
78 | ADF_ETRMGR_BANK"%d"ADF_ETRMGR_COALESCING_MSG_ENABLED | 78 | ADF_ETRMGR_BANK "%d" ADF_ETRMGR_COALESCING_MSG_ENABLED |
79 | #define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity" | 79 | #define ADF_ETRMGR_CORE_AFFINITY "CoreAffinity" |
80 | #define ADF_ETRMGR_CORE_AFFINITY_FORMAT \ | 80 | #define ADF_ETRMGR_CORE_AFFINITY_FORMAT \ |
81 | ADF_ETRMGR_BANK"%d"ADF_ETRMGR_CORE_AFFINITY | 81 | ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY |
82 | #define ADF_ACCEL_STR "Accelerator%d" | 82 | #define ADF_ACCEL_STR "Accelerator%d" |
83 | #endif | 83 | #endif |
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index a62e485c8786..0666ee6a3360 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h | |||
@@ -115,7 +115,7 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev); | |||
115 | int adf_ae_init(struct adf_accel_dev *accel_dev); | 115 | int adf_ae_init(struct adf_accel_dev *accel_dev); |
116 | int adf_ae_shutdown(struct adf_accel_dev *accel_dev); | 116 | int adf_ae_shutdown(struct adf_accel_dev *accel_dev); |
117 | int adf_ae_fw_load(struct adf_accel_dev *accel_dev); | 117 | int adf_ae_fw_load(struct adf_accel_dev *accel_dev); |
118 | int adf_ae_fw_release(struct adf_accel_dev *accel_dev); | 118 | void adf_ae_fw_release(struct adf_accel_dev *accel_dev); |
119 | int adf_ae_start(struct adf_accel_dev *accel_dev); | 119 | int adf_ae_start(struct adf_accel_dev *accel_dev); |
120 | int adf_ae_stop(struct adf_accel_dev *accel_dev); | 120 | int adf_ae_stop(struct adf_accel_dev *accel_dev); |
121 | 121 | ||
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 74207a6f0516..cb5f066e93a6 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c | |||
@@ -77,14 +77,14 @@ struct adf_ctl_drv_info { | |||
77 | struct class *drv_class; | 77 | struct class *drv_class; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | static struct adf_ctl_drv_info adt_ctl_drv; | 80 | static struct adf_ctl_drv_info adf_ctl_drv; |
81 | 81 | ||
82 | static void adf_chr_drv_destroy(void) | 82 | static void adf_chr_drv_destroy(void) |
83 | { | 83 | { |
84 | device_destroy(adt_ctl_drv.drv_class, MKDEV(adt_ctl_drv.major, 0)); | 84 | device_destroy(adf_ctl_drv.drv_class, MKDEV(adf_ctl_drv.major, 0)); |
85 | cdev_del(&adt_ctl_drv.drv_cdev); | 85 | cdev_del(&adf_ctl_drv.drv_cdev); |
86 | class_destroy(adt_ctl_drv.drv_class); | 86 | class_destroy(adf_ctl_drv.drv_class); |
87 | unregister_chrdev_region(MKDEV(adt_ctl_drv.major, 0), 1); | 87 | unregister_chrdev_region(MKDEV(adf_ctl_drv.major, 0), 1); |
88 | } | 88 | } |
89 | 89 | ||
90 | static int adf_chr_drv_create(void) | 90 | static int adf_chr_drv_create(void) |
@@ -97,20 +97,20 @@ static int adf_chr_drv_create(void) | |||
97 | return -EFAULT; | 97 | return -EFAULT; |
98 | } | 98 | } |
99 | 99 | ||
100 | adt_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME); | 100 | adf_ctl_drv.drv_class = class_create(THIS_MODULE, DEVICE_NAME); |
101 | if (IS_ERR(adt_ctl_drv.drv_class)) { | 101 | if (IS_ERR(adf_ctl_drv.drv_class)) { |
102 | pr_err("QAT: class_create failed for adf_ctl\n"); | 102 | pr_err("QAT: class_create failed for adf_ctl\n"); |
103 | goto err_chrdev_unreg; | 103 | goto err_chrdev_unreg; |
104 | } | 104 | } |
105 | adt_ctl_drv.major = MAJOR(dev_id); | 105 | adf_ctl_drv.major = MAJOR(dev_id); |
106 | cdev_init(&adt_ctl_drv.drv_cdev, &adf_ctl_ops); | 106 | cdev_init(&adf_ctl_drv.drv_cdev, &adf_ctl_ops); |
107 | if (cdev_add(&adt_ctl_drv.drv_cdev, dev_id, 1)) { | 107 | if (cdev_add(&adf_ctl_drv.drv_cdev, dev_id, 1)) { |
108 | pr_err("QAT: cdev add failed\n"); | 108 | pr_err("QAT: cdev add failed\n"); |
109 | goto err_class_destr; | 109 | goto err_class_destr; |
110 | } | 110 | } |
111 | 111 | ||
112 | drv_device = device_create(adt_ctl_drv.drv_class, NULL, | 112 | drv_device = device_create(adf_ctl_drv.drv_class, NULL, |
113 | MKDEV(adt_ctl_drv.major, 0), | 113 | MKDEV(adf_ctl_drv.major, 0), |
114 | NULL, DEVICE_NAME); | 114 | NULL, DEVICE_NAME); |
115 | if (IS_ERR(drv_device)) { | 115 | if (IS_ERR(drv_device)) { |
116 | pr_err("QAT: failed to create device\n"); | 116 | pr_err("QAT: failed to create device\n"); |
@@ -118,9 +118,9 @@ static int adf_chr_drv_create(void) | |||
118 | } | 118 | } |
119 | return 0; | 119 | return 0; |
120 | err_cdev_del: | 120 | err_cdev_del: |
121 | cdev_del(&adt_ctl_drv.drv_cdev); | 121 | cdev_del(&adf_ctl_drv.drv_cdev); |
122 | err_class_destr: | 122 | err_class_destr: |
123 | class_destroy(adt_ctl_drv.drv_class); | 123 | class_destroy(adf_ctl_drv.drv_class); |
124 | err_chrdev_unreg: | 124 | err_chrdev_unreg: |
125 | unregister_chrdev_region(dev_id, 1); | 125 | unregister_chrdev_region(dev_id, 1); |
126 | return -EFAULT; | 126 | return -EFAULT; |
@@ -159,14 +159,16 @@ static int adf_add_key_value_data(struct adf_accel_dev *accel_dev, | |||
159 | if (adf_cfg_add_key_value_param(accel_dev, section, | 159 | if (adf_cfg_add_key_value_param(accel_dev, section, |
160 | key_val->key, (void *)val, | 160 | key_val->key, (void *)val, |
161 | key_val->type)) { | 161 | key_val->type)) { |
162 | pr_err("QAT: failed to add keyvalue.\n"); | 162 | dev_err(&GET_DEV(accel_dev), |
163 | "failed to add hex keyvalue.\n"); | ||
163 | return -EFAULT; | 164 | return -EFAULT; |
164 | } | 165 | } |
165 | } else { | 166 | } else { |
166 | if (adf_cfg_add_key_value_param(accel_dev, section, | 167 | if (adf_cfg_add_key_value_param(accel_dev, section, |
167 | key_val->key, key_val->val, | 168 | key_val->key, key_val->val, |
168 | key_val->type)) { | 169 | key_val->type)) { |
169 | pr_err("QAT: failed to add keyvalue.\n"); | 170 | dev_err(&GET_DEV(accel_dev), |
171 | "failed to add keyvalue.\n"); | ||
170 | return -EFAULT; | 172 | return -EFAULT; |
171 | } | 173 | } |
172 | } | 174 | } |
@@ -185,12 +187,14 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev, | |||
185 | while (section_head) { | 187 | while (section_head) { |
186 | if (copy_from_user(§ion, (void __user *)section_head, | 188 | if (copy_from_user(§ion, (void __user *)section_head, |
187 | sizeof(*section_head))) { | 189 | sizeof(*section_head))) { |
188 | pr_err("QAT: failed to copy section info\n"); | 190 | dev_err(&GET_DEV(accel_dev), |
191 | "failed to copy section info\n"); | ||
189 | goto out_err; | 192 | goto out_err; |
190 | } | 193 | } |
191 | 194 | ||
192 | if (adf_cfg_section_add(accel_dev, section.name)) { | 195 | if (adf_cfg_section_add(accel_dev, section.name)) { |
193 | pr_err("QAT: failed to add section.\n"); | 196 | dev_err(&GET_DEV(accel_dev), |
197 | "failed to add section.\n"); | ||
194 | goto out_err; | 198 | goto out_err; |
195 | } | 199 | } |
196 | 200 | ||
@@ -199,7 +203,8 @@ static int adf_copy_key_value_data(struct adf_accel_dev *accel_dev, | |||
199 | while (params_head) { | 203 | while (params_head) { |
200 | if (copy_from_user(&key_val, (void __user *)params_head, | 204 | if (copy_from_user(&key_val, (void __user *)params_head, |
201 | sizeof(key_val))) { | 205 | sizeof(key_val))) { |
202 | pr_err("QAT: Failed to copy keyvalue.\n"); | 206 | dev_err(&GET_DEV(accel_dev), |
207 | "Failed to copy keyvalue.\n"); | ||
203 | goto out_err; | 208 | goto out_err; |
204 | } | 209 | } |
205 | if (adf_add_key_value_data(accel_dev, section.name, | 210 | if (adf_add_key_value_data(accel_dev, section.name, |
@@ -258,8 +263,9 @@ static int adf_ctl_is_device_in_use(int id) | |||
258 | 263 | ||
259 | if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) { | 264 | if (id == dev->accel_id || id == ADF_CFG_ALL_DEVICES) { |
260 | if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) { | 265 | if (adf_devmgr_in_reset(dev) || adf_dev_in_use(dev)) { |
261 | pr_info("QAT: device qat_dev%d is busy\n", | 266 | dev_info(&GET_DEV(dev), |
262 | dev->accel_id); | 267 | "device qat_dev%d is busy\n", |
268 | dev->accel_id); | ||
263 | return -EBUSY; | 269 | return -EBUSY; |
264 | } | 270 | } |
265 | } | 271 | } |
@@ -280,7 +286,8 @@ static int adf_ctl_stop_devices(uint32_t id) | |||
280 | continue; | 286 | continue; |
281 | 287 | ||
282 | if (adf_dev_stop(accel_dev)) { | 288 | if (adf_dev_stop(accel_dev)) { |
283 | pr_err("QAT: Failed to stop qat_dev%d\n", id); | 289 | dev_err(&GET_DEV(accel_dev), |
290 | "Failed to stop qat_dev%d\n", id); | ||
284 | ret = -EFAULT; | 291 | ret = -EFAULT; |
285 | } else { | 292 | } else { |
286 | adf_dev_shutdown(accel_dev); | 293 | adf_dev_shutdown(accel_dev); |
@@ -343,17 +350,20 @@ static int adf_ctl_ioctl_dev_start(struct file *fp, unsigned int cmd, | |||
343 | } | 350 | } |
344 | 351 | ||
345 | if (!adf_dev_started(accel_dev)) { | 352 | if (!adf_dev_started(accel_dev)) { |
346 | pr_info("QAT: Starting acceleration device qat_dev%d.\n", | 353 | dev_info(&GET_DEV(accel_dev), |
347 | ctl_data->device_id); | 354 | "Starting acceleration device qat_dev%d.\n", |
355 | ctl_data->device_id); | ||
348 | ret = adf_dev_init(accel_dev); | 356 | ret = adf_dev_init(accel_dev); |
349 | if (!ret) | 357 | if (!ret) |
350 | ret = adf_dev_start(accel_dev); | 358 | ret = adf_dev_start(accel_dev); |
351 | } else { | 359 | } else { |
352 | pr_info("QAT: Acceleration device qat_dev%d already started.\n", | 360 | dev_info(&GET_DEV(accel_dev), |
353 | ctl_data->device_id); | 361 | "Acceleration device qat_dev%d already started.\n", |
362 | ctl_data->device_id); | ||
354 | } | 363 | } |
355 | if (ret) { | 364 | if (ret) { |
356 | pr_err("QAT: Failed to start qat_dev%d\n", ctl_data->device_id); | 365 | dev_err(&GET_DEV(accel_dev), "Failed to start qat_dev%d\n", |
366 | ctl_data->device_id); | ||
357 | adf_dev_stop(accel_dev); | 367 | adf_dev_stop(accel_dev); |
358 | adf_dev_shutdown(accel_dev); | 368 | adf_dev_shutdown(accel_dev); |
359 | } | 369 | } |
@@ -408,7 +418,7 @@ static int adf_ctl_ioctl_get_status(struct file *fp, unsigned int cmd, | |||
408 | 418 | ||
409 | if (copy_to_user((void __user *)arg, &dev_info, | 419 | if (copy_to_user((void __user *)arg, &dev_info, |
410 | sizeof(struct adf_dev_status_info))) { | 420 | sizeof(struct adf_dev_status_info))) { |
411 | pr_err("QAT: failed to copy status.\n"); | 421 | dev_err(&GET_DEV(accel_dev), "failed to copy status.\n"); |
412 | return -EFAULT; | 422 | return -EFAULT; |
413 | } | 423 | } |
414 | return 0; | 424 | return 0; |
diff --git a/drivers/crypto/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/qat/qat_common/adf_dev_mgr.c index 4a0a829d4500..3f0ff9e7d840 100644 --- a/drivers/crypto/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/qat/qat_common/adf_dev_mgr.c | |||
@@ -67,7 +67,8 @@ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev) | |||
67 | struct list_head *itr; | 67 | struct list_head *itr; |
68 | 68 | ||
69 | if (num_devices == ADF_MAX_DEVICES) { | 69 | if (num_devices == ADF_MAX_DEVICES) { |
70 | pr_err("QAT: Only support up to %d devices\n", ADF_MAX_DEVICES); | 70 | dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n", |
71 | ADF_MAX_DEVICES); | ||
71 | return -EFAULT; | 72 | return -EFAULT; |
72 | } | 73 | } |
73 | 74 | ||
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 8f0ca498ab87..245f43237a2d 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c | |||
@@ -124,12 +124,12 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) | |||
124 | 124 | ||
125 | if (!hw_data) { | 125 | if (!hw_data) { |
126 | dev_err(&GET_DEV(accel_dev), | 126 | dev_err(&GET_DEV(accel_dev), |
127 | "QAT: Failed to init device - hw_data not set\n"); | 127 | "Failed to init device - hw_data not set\n"); |
128 | return -EFAULT; | 128 | return -EFAULT; |
129 | } | 129 | } |
130 | 130 | ||
131 | if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) { | 131 | if (!test_bit(ADF_STATUS_CONFIGURED, &accel_dev->status)) { |
132 | pr_info("QAT: Device not configured\n"); | 132 | dev_err(&GET_DEV(accel_dev), "Device not configured\n"); |
133 | return -EFAULT; | 133 | return -EFAULT; |
134 | } | 134 | } |
135 | 135 | ||
@@ -151,20 +151,21 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) | |||
151 | hw_data->enable_ints(accel_dev); | 151 | hw_data->enable_ints(accel_dev); |
152 | 152 | ||
153 | if (adf_ae_init(accel_dev)) { | 153 | if (adf_ae_init(accel_dev)) { |
154 | pr_err("QAT: Failed to initialise Acceleration Engine\n"); | 154 | dev_err(&GET_DEV(accel_dev), |
155 | "Failed to initialise Acceleration Engine\n"); | ||
155 | return -EFAULT; | 156 | return -EFAULT; |
156 | } | 157 | } |
157 | set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status); | 158 | set_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status); |
158 | 159 | ||
159 | if (adf_ae_fw_load(accel_dev)) { | 160 | if (adf_ae_fw_load(accel_dev)) { |
160 | pr_err("QAT: Failed to load acceleration FW\n"); | 161 | dev_err(&GET_DEV(accel_dev), |
161 | adf_ae_fw_release(accel_dev); | 162 | "Failed to load acceleration FW\n"); |
162 | return -EFAULT; | 163 | return -EFAULT; |
163 | } | 164 | } |
164 | set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); | 165 | set_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); |
165 | 166 | ||
166 | if (hw_data->alloc_irq(accel_dev)) { | 167 | if (hw_data->alloc_irq(accel_dev)) { |
167 | pr_err("QAT: Failed to allocate interrupts\n"); | 168 | dev_err(&GET_DEV(accel_dev), "Failed to allocate interrupts\n"); |
168 | return -EFAULT; | 169 | return -EFAULT; |
169 | } | 170 | } |
170 | set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); | 171 | set_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status); |
@@ -179,8 +180,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) | |||
179 | if (!service->admin) | 180 | if (!service->admin) |
180 | continue; | 181 | continue; |
181 | if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { | 182 | if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { |
182 | pr_err("QAT: Failed to initialise service %s\n", | 183 | dev_err(&GET_DEV(accel_dev), |
183 | service->name); | 184 | "Failed to initialise service %s\n", |
185 | service->name); | ||
184 | return -EFAULT; | 186 | return -EFAULT; |
185 | } | 187 | } |
186 | set_bit(accel_dev->accel_id, &service->init_status); | 188 | set_bit(accel_dev->accel_id, &service->init_status); |
@@ -190,8 +192,9 @@ int adf_dev_init(struct adf_accel_dev *accel_dev) | |||
190 | if (service->admin) | 192 | if (service->admin) |
191 | continue; | 193 | continue; |
192 | if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { | 194 | if (service->event_hld(accel_dev, ADF_EVENT_INIT)) { |
193 | pr_err("QAT: Failed to initialise service %s\n", | 195 | dev_err(&GET_DEV(accel_dev), |
194 | service->name); | 196 | "Failed to initialise service %s\n", |
197 | service->name); | ||
195 | return -EFAULT; | 198 | return -EFAULT; |
196 | } | 199 | } |
197 | set_bit(accel_dev->accel_id, &service->init_status); | 200 | set_bit(accel_dev->accel_id, &service->init_status); |
@@ -221,7 +224,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) | |||
221 | set_bit(ADF_STATUS_STARTING, &accel_dev->status); | 224 | set_bit(ADF_STATUS_STARTING, &accel_dev->status); |
222 | 225 | ||
223 | if (adf_ae_start(accel_dev)) { | 226 | if (adf_ae_start(accel_dev)) { |
224 | pr_err("QAT: AE Start Failed\n"); | 227 | dev_err(&GET_DEV(accel_dev), "AE Start Failed\n"); |
225 | return -EFAULT; | 228 | return -EFAULT; |
226 | } | 229 | } |
227 | set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); | 230 | set_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); |
@@ -231,8 +234,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) | |||
231 | if (!service->admin) | 234 | if (!service->admin) |
232 | continue; | 235 | continue; |
233 | if (service->event_hld(accel_dev, ADF_EVENT_START)) { | 236 | if (service->event_hld(accel_dev, ADF_EVENT_START)) { |
234 | pr_err("QAT: Failed to start service %s\n", | 237 | dev_err(&GET_DEV(accel_dev), |
235 | service->name); | 238 | "Failed to start service %s\n", |
239 | service->name); | ||
236 | return -EFAULT; | 240 | return -EFAULT; |
237 | } | 241 | } |
238 | set_bit(accel_dev->accel_id, &service->start_status); | 242 | set_bit(accel_dev->accel_id, &service->start_status); |
@@ -242,8 +246,9 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) | |||
242 | if (service->admin) | 246 | if (service->admin) |
243 | continue; | 247 | continue; |
244 | if (service->event_hld(accel_dev, ADF_EVENT_START)) { | 248 | if (service->event_hld(accel_dev, ADF_EVENT_START)) { |
245 | pr_err("QAT: Failed to start service %s\n", | 249 | dev_err(&GET_DEV(accel_dev), |
246 | service->name); | 250 | "Failed to start service %s\n", |
251 | service->name); | ||
247 | return -EFAULT; | 252 | return -EFAULT; |
248 | } | 253 | } |
249 | set_bit(accel_dev->accel_id, &service->start_status); | 254 | set_bit(accel_dev->accel_id, &service->start_status); |
@@ -253,7 +258,8 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) | |||
253 | set_bit(ADF_STATUS_STARTED, &accel_dev->status); | 258 | set_bit(ADF_STATUS_STARTED, &accel_dev->status); |
254 | 259 | ||
255 | if (qat_algs_register()) { | 260 | if (qat_algs_register()) { |
256 | pr_err("QAT: Failed to register crypto algs\n"); | 261 | dev_err(&GET_DEV(accel_dev), |
262 | "Failed to register crypto algs\n"); | ||
257 | set_bit(ADF_STATUS_STARTING, &accel_dev->status); | 263 | set_bit(ADF_STATUS_STARTING, &accel_dev->status); |
258 | clear_bit(ADF_STATUS_STARTED, &accel_dev->status); | 264 | clear_bit(ADF_STATUS_STARTED, &accel_dev->status); |
259 | return -EFAULT; | 265 | return -EFAULT; |
@@ -287,7 +293,8 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) | |||
287 | clear_bit(ADF_STATUS_STARTED, &accel_dev->status); | 293 | clear_bit(ADF_STATUS_STARTED, &accel_dev->status); |
288 | 294 | ||
289 | if (qat_algs_unregister()) | 295 | if (qat_algs_unregister()) |
290 | pr_err("QAT: Failed to unregister crypto algs\n"); | 296 | dev_err(&GET_DEV(accel_dev), |
297 | "Failed to unregister crypto algs\n"); | ||
291 | 298 | ||
292 | list_for_each(list_itr, &service_table) { | 299 | list_for_each(list_itr, &service_table) { |
293 | service = list_entry(list_itr, struct service_hndl, list); | 300 | service = list_entry(list_itr, struct service_hndl, list); |
@@ -310,8 +317,9 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) | |||
310 | if (!test_bit(accel_dev->accel_id, &service->start_status)) | 317 | if (!test_bit(accel_dev->accel_id, &service->start_status)) |
311 | continue; | 318 | continue; |
312 | if (service->event_hld(accel_dev, ADF_EVENT_STOP)) | 319 | if (service->event_hld(accel_dev, ADF_EVENT_STOP)) |
313 | pr_err("QAT: Failed to shutdown service %s\n", | 320 | dev_err(&GET_DEV(accel_dev), |
314 | service->name); | 321 | "Failed to shutdown service %s\n", |
322 | service->name); | ||
315 | else | 323 | else |
316 | clear_bit(accel_dev->accel_id, &service->start_status); | 324 | clear_bit(accel_dev->accel_id, &service->start_status); |
317 | } | 325 | } |
@@ -321,7 +329,7 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev) | |||
321 | 329 | ||
322 | if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) { | 330 | if (test_bit(ADF_STATUS_AE_STARTED, &accel_dev->status)) { |
323 | if (adf_ae_stop(accel_dev)) | 331 | if (adf_ae_stop(accel_dev)) |
324 | pr_err("QAT: failed to stop AE\n"); | 332 | dev_err(&GET_DEV(accel_dev), "failed to stop AE\n"); |
325 | else | 333 | else |
326 | clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); | 334 | clear_bit(ADF_STATUS_AE_STARTED, &accel_dev->status); |
327 | } | 335 | } |
@@ -350,16 +358,14 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) | |||
350 | } | 358 | } |
351 | 359 | ||
352 | if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) { | 360 | if (test_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status)) { |
353 | if (adf_ae_fw_release(accel_dev)) | 361 | adf_ae_fw_release(accel_dev); |
354 | pr_err("QAT: Failed to release the ucode\n"); | 362 | clear_bit(ADF_STATUS_AE_UCODE_LOADED, &accel_dev->status); |
355 | else | ||
356 | clear_bit(ADF_STATUS_AE_UCODE_LOADED, | ||
357 | &accel_dev->status); | ||
358 | } | 363 | } |
359 | 364 | ||
360 | if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) { | 365 | if (test_bit(ADF_STATUS_AE_INITIALISED, &accel_dev->status)) { |
361 | if (adf_ae_shutdown(accel_dev)) | 366 | if (adf_ae_shutdown(accel_dev)) |
362 | pr_err("QAT: Failed to shutdown Accel Engine\n"); | 367 | dev_err(&GET_DEV(accel_dev), |
368 | "Failed to shutdown Accel Engine\n"); | ||
363 | else | 369 | else |
364 | clear_bit(ADF_STATUS_AE_INITIALISED, | 370 | clear_bit(ADF_STATUS_AE_INITIALISED, |
365 | &accel_dev->status); | 371 | &accel_dev->status); |
@@ -372,8 +378,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) | |||
372 | if (!test_bit(accel_dev->accel_id, &service->init_status)) | 378 | if (!test_bit(accel_dev->accel_id, &service->init_status)) |
373 | continue; | 379 | continue; |
374 | if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) | 380 | if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) |
375 | pr_err("QAT: Failed to shutdown service %s\n", | 381 | dev_err(&GET_DEV(accel_dev), |
376 | service->name); | 382 | "Failed to shutdown service %s\n", |
383 | service->name); | ||
377 | else | 384 | else |
378 | clear_bit(accel_dev->accel_id, &service->init_status); | 385 | clear_bit(accel_dev->accel_id, &service->init_status); |
379 | } | 386 | } |
@@ -384,8 +391,9 @@ void adf_dev_shutdown(struct adf_accel_dev *accel_dev) | |||
384 | if (!test_bit(accel_dev->accel_id, &service->init_status)) | 391 | if (!test_bit(accel_dev->accel_id, &service->init_status)) |
385 | continue; | 392 | continue; |
386 | if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) | 393 | if (service->event_hld(accel_dev, ADF_EVENT_SHUTDOWN)) |
387 | pr_err("QAT: Failed to shutdown service %s\n", | 394 | dev_err(&GET_DEV(accel_dev), |
388 | service->name); | 395 | "Failed to shutdown service %s\n", |
396 | service->name); | ||
389 | else | 397 | else |
390 | clear_bit(accel_dev->accel_id, &service->init_status); | 398 | clear_bit(accel_dev->accel_id, &service->init_status); |
391 | } | 399 | } |
@@ -419,16 +427,18 @@ int adf_dev_restarting_notify(struct adf_accel_dev *accel_dev) | |||
419 | if (service->admin) | 427 | if (service->admin) |
420 | continue; | 428 | continue; |
421 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) | 429 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) |
422 | pr_err("QAT: Failed to restart service %s.\n", | 430 | dev_err(&GET_DEV(accel_dev), |
423 | service->name); | 431 | "Failed to restart service %s.\n", |
432 | service->name); | ||
424 | } | 433 | } |
425 | list_for_each(list_itr, &service_table) { | 434 | list_for_each(list_itr, &service_table) { |
426 | service = list_entry(list_itr, struct service_hndl, list); | 435 | service = list_entry(list_itr, struct service_hndl, list); |
427 | if (!service->admin) | 436 | if (!service->admin) |
428 | continue; | 437 | continue; |
429 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) | 438 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTING)) |
430 | pr_err("QAT: Failed to restart service %s.\n", | 439 | dev_err(&GET_DEV(accel_dev), |
431 | service->name); | 440 | "Failed to restart service %s.\n", |
441 | service->name); | ||
432 | } | 442 | } |
433 | return 0; | 443 | return 0; |
434 | } | 444 | } |
@@ -443,16 +453,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev) | |||
443 | if (service->admin) | 453 | if (service->admin) |
444 | continue; | 454 | continue; |
445 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) | 455 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) |
446 | pr_err("QAT: Failed to restart service %s.\n", | 456 | dev_err(&GET_DEV(accel_dev), |
447 | service->name); | 457 | "Failed to restart service %s.\n", |
458 | service->name); | ||
448 | } | 459 | } |
449 | list_for_each(list_itr, &service_table) { | 460 | list_for_each(list_itr, &service_table) { |
450 | service = list_entry(list_itr, struct service_hndl, list); | 461 | service = list_entry(list_itr, struct service_hndl, list); |
451 | if (!service->admin) | 462 | if (!service->admin) |
452 | continue; | 463 | continue; |
453 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) | 464 | if (service->event_hld(accel_dev, ADF_EVENT_RESTARTED)) |
454 | pr_err("QAT: Failed to restart service %s.\n", | 465 | dev_err(&GET_DEV(accel_dev), |
455 | service->name); | 466 | "Failed to restart service %s.\n", |
467 | service->name); | ||
456 | } | 468 | } |
457 | return 0; | 469 | return 0; |
458 | } | 470 | } |
diff --git a/drivers/crypto/qat/qat_common/adf_transport.c b/drivers/crypto/qat/qat_common/adf_transport.c index 7dd54aaee9fa..ccec327489da 100644 --- a/drivers/crypto/qat/qat_common/adf_transport.c +++ b/drivers/crypto/qat/qat_common/adf_transport.c | |||
@@ -195,7 +195,7 @@ static int adf_init_ring(struct adf_etr_ring_data *ring) | |||
195 | memset(ring->base_addr, 0x7F, ring_size_bytes); | 195 | memset(ring->base_addr, 0x7F, ring_size_bytes); |
196 | /* The base_addr has to be aligned to the size of the buffer */ | 196 | /* The base_addr has to be aligned to the size of the buffer */ |
197 | if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) { | 197 | if (adf_check_ring_alignment(ring->dma_addr, ring_size_bytes)) { |
198 | pr_err("QAT: Ring address not aligned\n"); | 198 | dev_err(&GET_DEV(accel_dev), "Ring address not aligned\n"); |
199 | dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes, | 199 | dma_free_coherent(&GET_DEV(accel_dev), ring_size_bytes, |
200 | ring->base_addr, ring->dma_addr); | 200 | ring->base_addr, ring->dma_addr); |
201 | return -EFAULT; | 201 | return -EFAULT; |
@@ -242,32 +242,33 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, | |||
242 | int ret; | 242 | int ret; |
243 | 243 | ||
244 | if (bank_num >= GET_MAX_BANKS(accel_dev)) { | 244 | if (bank_num >= GET_MAX_BANKS(accel_dev)) { |
245 | pr_err("QAT: Invalid bank number\n"); | 245 | dev_err(&GET_DEV(accel_dev), "Invalid bank number\n"); |
246 | return -EFAULT; | 246 | return -EFAULT; |
247 | } | 247 | } |
248 | if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) { | 248 | if (msg_size > ADF_MSG_SIZE_TO_BYTES(ADF_MAX_MSG_SIZE)) { |
249 | pr_err("QAT: Invalid msg size\n"); | 249 | dev_err(&GET_DEV(accel_dev), "Invalid msg size\n"); |
250 | return -EFAULT; | 250 | return -EFAULT; |
251 | } | 251 | } |
252 | if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs), | 252 | if (ADF_MAX_INFLIGHTS(adf_verify_ring_size(msg_size, num_msgs), |
253 | ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) { | 253 | ADF_BYTES_TO_MSG_SIZE(msg_size)) < 2) { |
254 | pr_err("QAT: Invalid ring size for given msg size\n"); | 254 | dev_err(&GET_DEV(accel_dev), |
255 | "Invalid ring size for given msg size\n"); | ||
255 | return -EFAULT; | 256 | return -EFAULT; |
256 | } | 257 | } |
257 | if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) { | 258 | if (adf_cfg_get_param_value(accel_dev, section, ring_name, val)) { |
258 | pr_err("QAT: Section %s, no such entry : %s\n", | 259 | dev_err(&GET_DEV(accel_dev), "Section %s, no such entry : %s\n", |
259 | section, ring_name); | 260 | section, ring_name); |
260 | return -EFAULT; | 261 | return -EFAULT; |
261 | } | 262 | } |
262 | if (kstrtouint(val, 10, &ring_num)) { | 263 | if (kstrtouint(val, 10, &ring_num)) { |
263 | pr_err("QAT: Can't get ring number\n"); | 264 | dev_err(&GET_DEV(accel_dev), "Can't get ring number\n"); |
264 | return -EFAULT; | 265 | return -EFAULT; |
265 | } | 266 | } |
266 | 267 | ||
267 | bank = &transport_data->banks[bank_num]; | 268 | bank = &transport_data->banks[bank_num]; |
268 | if (adf_reserve_ring(bank, ring_num)) { | 269 | if (adf_reserve_ring(bank, ring_num)) { |
269 | pr_err("QAT: Ring %d, %s already exists.\n", | 270 | dev_err(&GET_DEV(accel_dev), "Ring %d, %s already exists.\n", |
270 | ring_num, ring_name); | 271 | ring_num, ring_name); |
271 | return -EFAULT; | 272 | return -EFAULT; |
272 | } | 273 | } |
273 | ring = &bank->rings[ring_num]; | 274 | ring = &bank->rings[ring_num]; |
@@ -287,7 +288,8 @@ int adf_create_ring(struct adf_accel_dev *accel_dev, const char *section, | |||
287 | accel_dev->hw_device->hw_arb_ring_enable(ring); | 288 | accel_dev->hw_device->hw_arb_ring_enable(ring); |
288 | 289 | ||
289 | if (adf_ring_debugfs_add(ring, ring_name)) { | 290 | if (adf_ring_debugfs_add(ring, ring_name)) { |
290 | pr_err("QAT: Couldn't add ring debugfs entry\n"); | 291 | dev_err(&GET_DEV(accel_dev), |
292 | "Couldn't add ring debugfs entry\n"); | ||
291 | ret = -EFAULT; | 293 | ret = -EFAULT; |
292 | goto err; | 294 | goto err; |
293 | } | 295 | } |
@@ -428,7 +430,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, | |||
428 | goto err; | 430 | goto err; |
429 | } else { | 431 | } else { |
430 | if (i < hw_data->tx_rx_gap) { | 432 | if (i < hw_data->tx_rx_gap) { |
431 | pr_err("QAT: Invalid tx rings mask config\n"); | 433 | dev_err(&GET_DEV(accel_dev), |
434 | "Invalid tx rings mask config\n"); | ||
432 | goto err; | 435 | goto err; |
433 | } | 436 | } |
434 | tx_ring = &bank->rings[i - hw_data->tx_rx_gap]; | 437 | tx_ring = &bank->rings[i - hw_data->tx_rx_gap]; |
@@ -436,7 +439,8 @@ static int adf_init_bank(struct adf_accel_dev *accel_dev, | |||
436 | } | 439 | } |
437 | } | 440 | } |
438 | if (adf_bank_debugfs_add(bank)) { | 441 | if (adf_bank_debugfs_add(bank)) { |
439 | pr_err("QAT: Failed to add bank debugfs entry\n"); | 442 | dev_err(&GET_DEV(accel_dev), |
443 | "Failed to add bank debugfs entry\n"); | ||
440 | goto err; | 444 | goto err; |
441 | } | 445 | } |
442 | 446 | ||
@@ -492,7 +496,8 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev) | |||
492 | etr_data->debug = debugfs_create_dir("transport", | 496 | etr_data->debug = debugfs_create_dir("transport", |
493 | accel_dev->debugfs_dir); | 497 | accel_dev->debugfs_dir); |
494 | if (!etr_data->debug) { | 498 | if (!etr_data->debug) { |
495 | pr_err("QAT: Unable to create transport debugfs entry\n"); | 499 | dev_err(&GET_DEV(accel_dev), |
500 | "Unable to create transport debugfs entry\n"); | ||
496 | ret = -ENOENT; | 501 | ret = -ENOENT; |
497 | goto err_bank_debug; | 502 | goto err_bank_debug; |
498 | } | 503 | } |
diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c index 6b6974553514..e41986967294 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c | |||
@@ -100,6 +100,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v) | |||
100 | empty = READ_CSR_E_STAT(csr, bank->bank_number); | 100 | empty = READ_CSR_E_STAT(csr, bank->bank_number); |
101 | 101 | ||
102 | seq_puts(sfile, "------- Ring configuration -------\n"); | 102 | seq_puts(sfile, "------- Ring configuration -------\n"); |
103 | seq_printf(sfile, "ring name: %s\n", | ||
104 | ring->ring_debug->ring_name); | ||
103 | seq_printf(sfile, "ring num %d, bank num %d\n", | 105 | seq_printf(sfile, "ring num %d, bank num %d\n", |
104 | ring->ring_number, ring->bank->bank_number); | 106 | ring->ring_number, ring->bank->bank_number); |
105 | seq_printf(sfile, "head %x, tail %x, empty: %d\n", | 107 | seq_printf(sfile, "head %x, tail %x, empty: %d\n", |
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h index 68f191b653b0..121d5e6e46ca 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h | |||
@@ -145,7 +145,7 @@ struct icp_qat_hw_auth_setup { | |||
145 | }; | 145 | }; |
146 | 146 | ||
147 | #define QAT_HW_DEFAULT_ALIGNMENT 8 | 147 | #define QAT_HW_DEFAULT_ALIGNMENT 8 |
148 | #define QAT_HW_ROUND_UP(val, n) (((val) + ((n)-1)) & (~(n-1))) | 148 | #define QAT_HW_ROUND_UP(val, n) (((val) + ((n) - 1)) & (~(n - 1))) |
149 | #define ICP_QAT_HW_NULL_STATE1_SZ 32 | 149 | #define ICP_QAT_HW_NULL_STATE1_SZ 32 |
150 | #define ICP_QAT_HW_MD5_STATE1_SZ 16 | 150 | #define ICP_QAT_HW_MD5_STATE1_SZ 16 |
151 | #define ICP_QAT_HW_SHA1_STATE1_SZ 20 | 151 | #define ICP_QAT_HW_SHA1_STATE1_SZ 20 |
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 828f2a686aab..3bd705ca5973 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c | |||
@@ -110,13 +110,13 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) | |||
110 | list_for_each(itr, adf_devmgr_get_head()) { | 110 | list_for_each(itr, adf_devmgr_get_head()) { |
111 | accel_dev = list_entry(itr, struct adf_accel_dev, list); | 111 | accel_dev = list_entry(itr, struct adf_accel_dev, list); |
112 | if ((node == dev_to_node(&GET_DEV(accel_dev)) || | 112 | if ((node == dev_to_node(&GET_DEV(accel_dev)) || |
113 | dev_to_node(&GET_DEV(accel_dev)) < 0) | 113 | dev_to_node(&GET_DEV(accel_dev)) < 0) && |
114 | && adf_dev_started(accel_dev)) | 114 | adf_dev_started(accel_dev)) |
115 | break; | 115 | break; |
116 | accel_dev = NULL; | 116 | accel_dev = NULL; |
117 | } | 117 | } |
118 | if (!accel_dev) { | 118 | if (!accel_dev) { |
119 | pr_err("QAT: Could not find device on node %d\n", node); | 119 | pr_err("QAT: Could not find a device on node %d\n", node); |
120 | accel_dev = adf_devmgr_get_first(); | 120 | accel_dev = adf_devmgr_get_first(); |
121 | } | 121 | } |
122 | if (!accel_dev || !adf_dev_started(accel_dev)) | 122 | if (!accel_dev || !adf_dev_started(accel_dev)) |
@@ -137,7 +137,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) | |||
137 | if (atomic_add_return(1, &inst_best->refctr) == 1) { | 137 | if (atomic_add_return(1, &inst_best->refctr) == 1) { |
138 | if (adf_dev_get(accel_dev)) { | 138 | if (adf_dev_get(accel_dev)) { |
139 | atomic_dec(&inst_best->refctr); | 139 | atomic_dec(&inst_best->refctr); |
140 | pr_err("QAT: Could increment dev refctr\n"); | 140 | dev_err(&GET_DEV(accel_dev), |
141 | "Could not increment dev refctr\n"); | ||
141 | return NULL; | 142 | return NULL; |
142 | } | 143 | } |
143 | } | 144 | } |
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c index b818c19713bf..274ff7e9de6e 100644 --- a/drivers/crypto/qat/qat_common/qat_hal.c +++ b/drivers/crypto/qat/qat_common/qat_hal.c | |||
@@ -434,8 +434,8 @@ static void qat_hal_reset_timestamp(struct icp_qat_fw_loader_handle *handle) | |||
434 | SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE); | 434 | SET_GLB_CSR(handle, MISC_CONTROL, misc_ctl | MC_TIMESTAMP_ENABLE); |
435 | } | 435 | } |
436 | 436 | ||
437 | #define ESRAM_AUTO_TINIT (1<<2) | 437 | #define ESRAM_AUTO_TINIT BIT(2) |
438 | #define ESRAM_AUTO_TINIT_DONE (1<<3) | 438 | #define ESRAM_AUTO_TINIT_DONE BIT(3) |
439 | #define ESRAM_AUTO_INIT_USED_CYCLES (1640) | 439 | #define ESRAM_AUTO_INIT_USED_CYCLES (1640) |
440 | #define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C | 440 | #define ESRAM_AUTO_INIT_CSR_OFFSET 0xC1C |
441 | static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) | 441 | static int qat_hal_init_esram(struct icp_qat_fw_loader_handle *handle) |
@@ -718,7 +718,7 @@ int qat_hal_init(struct adf_accel_dev *accel_dev) | |||
718 | handle->hal_handle->ae_max_num = max_en_ae_id + 1; | 718 | handle->hal_handle->ae_max_num = max_en_ae_id + 1; |
719 | /* take all AEs out of reset */ | 719 | /* take all AEs out of reset */ |
720 | if (qat_hal_clr_reset(handle)) { | 720 | if (qat_hal_clr_reset(handle)) { |
721 | pr_err("QAT: qat_hal_clr_reset error\n"); | 721 | dev_err(&GET_DEV(accel_dev), "qat_hal_clr_reset error\n"); |
722 | goto out_err; | 722 | goto out_err; |
723 | } | 723 | } |
724 | if (qat_hal_clear_gpr(handle)) | 724 | if (qat_hal_clear_gpr(handle)) |
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c index 53c491b59f07..e4666065c399 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_admin.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_admin.c | |||
@@ -93,7 +93,8 @@ int adf_put_admin_msg_sync(struct adf_accel_dev *accel_dev, | |||
93 | memcpy(out, admin->virt_addr + offset + | 93 | memcpy(out, admin->virt_addr + offset + |
94 | ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN); | 94 | ADF_ADMINMSG_LEN, ADF_ADMINMSG_LEN); |
95 | else | 95 | else |
96 | pr_err("QAT: Failed to send admin msg to accelerator\n"); | 96 | dev_err(&GET_DEV(accel_dev), |
97 | "Failed to send admin msg to accelerator\n"); | ||
97 | 98 | ||
98 | mutex_unlock(&admin->lock); | 99 | mutex_unlock(&admin->lock); |
99 | return received ? 0 : -EFAULT; | 100 | return received ? 0 : -EFAULT; |
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6a735d5c0e37..b1386922d7a2 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | |||
@@ -150,7 +150,8 @@ void adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev, | |||
150 | *arb_map_config = thrd_to_arb_map_sku6; | 150 | *arb_map_config = thrd_to_arb_map_sku6; |
151 | break; | 151 | break; |
152 | default: | 152 | default: |
153 | pr_err("QAT: The configuration doesn't match any SKU"); | 153 | dev_err(&GET_DEV(accel_dev), |
154 | "The configuration doesn't match any SKU"); | ||
154 | *arb_map_config = NULL; | 155 | *arb_map_config = NULL; |
155 | } | 156 | } |
156 | } | 157 | } |
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h index 01e0be21e93a..25269a9f24a2 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h | |||
@@ -73,11 +73,11 @@ | |||
73 | /* Error detection and correction */ | 73 | /* Error detection and correction */ |
74 | #define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) | 74 | #define ADF_DH895XCC_AE_CTX_ENABLES(i) (i * 0x1000 + 0x20818) |
75 | #define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) | 75 | #define ADF_DH895XCC_AE_MISC_CONTROL(i) (i * 0x1000 + 0x20960) |
76 | #define ADF_DH895XCC_ENABLE_AE_ECC_ERR (1 << 28) | 76 | #define ADF_DH895XCC_ENABLE_AE_ECC_ERR BIT(28) |
77 | #define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (1 << 24 | 1 << 12) | 77 | #define ADF_DH895XCC_ENABLE_AE_ECC_PARITY_CORR (BIT(24) | BIT(12)) |
78 | #define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18) | 78 | #define ADF_DH895XCC_UERRSSMSH(i) (i * 0x4000 + 0x18) |
79 | #define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10) | 79 | #define ADF_DH895XCC_CERRSSMSH(i) (i * 0x4000 + 0x10) |
80 | #define ADF_DH895XCC_ERRSSMSH_EN (1 << 3) | 80 | #define ADF_DH895XCC_ERRSSMSH_EN BIT(3) |
81 | 81 | ||
82 | /* Admin Messages Registers */ | 82 | /* Admin Messages Registers */ |
83 | #define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574) | 83 | #define ADF_DH895XCC_ADMINMSGUR_OFFSET (0x3A000 + 0x574) |
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 8ffdb95c9804..9decea2779c6 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c | |||
@@ -236,7 +236,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
236 | } | 236 | } |
237 | 237 | ||
238 | accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, | 238 | accel_dev = kzalloc_node(sizeof(*accel_dev), GFP_KERNEL, |
239 | dev_to_node(&pdev->dev)); | 239 | dev_to_node(&pdev->dev)); |
240 | if (!accel_dev) | 240 | if (!accel_dev) |
241 | return -ENOMEM; | 241 | return -ENOMEM; |
242 | 242 | ||
@@ -379,7 +379,7 @@ out_err: | |||
379 | return ret; | 379 | return ret; |
380 | } | 380 | } |
381 | 381 | ||
382 | static void __exit adf_remove(struct pci_dev *pdev) | 382 | static void adf_remove(struct pci_dev *pdev) |
383 | { | 383 | { |
384 | struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); | 384 | struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); |
385 | 385 | ||
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c index fe8f89697ad8..0d03c109c2d3 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_isr.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_isr.c | |||
@@ -73,7 +73,7 @@ static int adf_enable_msix(struct adf_accel_dev *accel_dev) | |||
73 | if (pci_enable_msix_exact(pci_dev_info->pci_dev, | 73 | if (pci_enable_msix_exact(pci_dev_info->pci_dev, |
74 | pci_dev_info->msix_entries.entries, | 74 | pci_dev_info->msix_entries.entries, |
75 | msix_num_entries)) { | 75 | msix_num_entries)) { |
76 | pr_err("QAT: Failed to enable MSIX IRQ\n"); | 76 | dev_err(&GET_DEV(accel_dev), "Failed to enable MSIX IRQ\n"); |
77 | return -EFAULT; | 77 | return -EFAULT; |
78 | } | 78 | } |
79 | return 0; | 79 | return 0; |
@@ -97,7 +97,8 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr) | |||
97 | { | 97 | { |
98 | struct adf_accel_dev *accel_dev = dev_ptr; | 98 | struct adf_accel_dev *accel_dev = dev_ptr; |
99 | 99 | ||
100 | pr_info("QAT: qat_dev%d spurious AE interrupt\n", accel_dev->accel_id); | 100 | dev_info(&GET_DEV(accel_dev), "qat_dev%d spurious AE interrupt\n", |
101 | accel_dev->accel_id); | ||
101 | return IRQ_HANDLED; | 102 | return IRQ_HANDLED; |
102 | } | 103 | } |
103 | 104 | ||
@@ -121,8 +122,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev) | |||
121 | ret = request_irq(msixe[i].vector, | 122 | ret = request_irq(msixe[i].vector, |
122 | adf_msix_isr_bundle, 0, name, bank); | 123 | adf_msix_isr_bundle, 0, name, bank); |
123 | if (ret) { | 124 | if (ret) { |
124 | pr_err("QAT: failed to enable irq %d for %s\n", | 125 | dev_err(&GET_DEV(accel_dev), |
125 | msixe[i].vector, name); | 126 | "failed to enable irq %d for %s\n", |
127 | msixe[i].vector, name); | ||
126 | return ret; | 128 | return ret; |
127 | } | 129 | } |
128 | 130 | ||
@@ -136,8 +138,9 @@ static int adf_request_irqs(struct adf_accel_dev *accel_dev) | |||
136 | "qat%d-ae-cluster", accel_dev->accel_id); | 138 | "qat%d-ae-cluster", accel_dev->accel_id); |
137 | ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev); | 139 | ret = request_irq(msixe[i].vector, adf_msix_isr_ae, 0, name, accel_dev); |
138 | if (ret) { | 140 | if (ret) { |
139 | pr_err("QAT: failed to enable irq %d, for %s\n", | 141 | dev_err(&GET_DEV(accel_dev), |
140 | msixe[i].vector, name); | 142 | "failed to enable irq %d, for %s\n", |
143 | msixe[i].vector, name); | ||
141 | return ret; | 144 | return ret; |
142 | } | 145 | } |
143 | return ret; | 146 | return ret; |
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 290a7f0a681f..6be377f6b9e7 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c | |||
@@ -479,6 +479,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) | |||
479 | struct scatterlist *sg; | 479 | struct scatterlist *sg; |
480 | int ret; | 480 | int ret; |
481 | int i, j; | 481 | int i, j; |
482 | int idx = 0; | ||
482 | 483 | ||
483 | /* Copy new key if necessary */ | 484 | /* Copy new key if necessary */ |
484 | if (ctx->flags & FLAGS_NEW_KEY) { | 485 | if (ctx->flags & FLAGS_NEW_KEY) { |
@@ -486,17 +487,20 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) | |||
486 | ctx->flags &= ~FLAGS_NEW_KEY; | 487 | ctx->flags &= ~FLAGS_NEW_KEY; |
487 | 488 | ||
488 | if (dev->flags & FLAGS_CBC) { | 489 | if (dev->flags & FLAGS_CBC) { |
489 | dev->hw_desc[0]->len1 = AES_BLOCK_SIZE; | 490 | dev->hw_desc[idx]->len1 = AES_BLOCK_SIZE; |
490 | dev->hw_desc[0]->p1 = dev->iv_phys_base; | 491 | dev->hw_desc[idx]->p1 = dev->iv_phys_base; |
491 | } else { | 492 | } else { |
492 | dev->hw_desc[0]->len1 = 0; | 493 | dev->hw_desc[idx]->len1 = 0; |
493 | dev->hw_desc[0]->p1 = 0; | 494 | dev->hw_desc[idx]->p1 = 0; |
494 | } | 495 | } |
495 | dev->hw_desc[0]->len2 = ctx->keylen; | 496 | dev->hw_desc[idx]->len2 = ctx->keylen; |
496 | dev->hw_desc[0]->p2 = dev->key_phys_base; | 497 | dev->hw_desc[idx]->p2 = dev->key_phys_base; |
497 | dev->hw_desc[0]->next = dev->hw_phys_desc[1]; | 498 | dev->hw_desc[idx]->next = dev->hw_phys_desc[1]; |
499 | |||
500 | dev->hw_desc[idx]->hdr = sahara_aes_key_hdr(dev); | ||
501 | |||
502 | idx++; | ||
498 | } | 503 | } |
499 | dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev); | ||
500 | 504 | ||
501 | dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total); | 505 | dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total); |
502 | dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total); | 506 | dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total); |
@@ -520,7 +524,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) | |||
520 | } | 524 | } |
521 | 525 | ||
522 | /* Create input links */ | 526 | /* Create input links */ |
523 | dev->hw_desc[1]->p1 = dev->hw_phys_link[0]; | 527 | dev->hw_desc[idx]->p1 = dev->hw_phys_link[0]; |
524 | sg = dev->in_sg; | 528 | sg = dev->in_sg; |
525 | for (i = 0; i < dev->nb_in_sg; i++) { | 529 | for (i = 0; i < dev->nb_in_sg; i++) { |
526 | dev->hw_link[i]->len = sg->length; | 530 | dev->hw_link[i]->len = sg->length; |
@@ -534,7 +538,7 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) | |||
534 | } | 538 | } |
535 | 539 | ||
536 | /* Create output links */ | 540 | /* Create output links */ |
537 | dev->hw_desc[1]->p2 = dev->hw_phys_link[i]; | 541 | dev->hw_desc[idx]->p2 = dev->hw_phys_link[i]; |
538 | sg = dev->out_sg; | 542 | sg = dev->out_sg; |
539 | for (j = i; j < dev->nb_out_sg + i; j++) { | 543 | for (j = i; j < dev->nb_out_sg + i; j++) { |
540 | dev->hw_link[j]->len = sg->length; | 544 | dev->hw_link[j]->len = sg->length; |
@@ -548,10 +552,10 @@ static int sahara_hw_descriptor_create(struct sahara_dev *dev) | |||
548 | } | 552 | } |
549 | 553 | ||
550 | /* Fill remaining fields of hw_desc[1] */ | 554 | /* Fill remaining fields of hw_desc[1] */ |
551 | dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev); | 555 | dev->hw_desc[idx]->hdr = sahara_aes_data_link_hdr(dev); |
552 | dev->hw_desc[1]->len1 = dev->total; | 556 | dev->hw_desc[idx]->len1 = dev->total; |
553 | dev->hw_desc[1]->len2 = dev->total; | 557 | dev->hw_desc[idx]->len2 = dev->total; |
554 | dev->hw_desc[1]->next = 0; | 558 | dev->hw_desc[idx]->next = 0; |
555 | 559 | ||
556 | sahara_dump_descriptors(dev); | 560 | sahara_dump_descriptors(dev); |
557 | sahara_dump_links(dev); | 561 | sahara_dump_links(dev); |
@@ -576,6 +580,7 @@ static int sahara_aes_process(struct ablkcipher_request *req) | |||
576 | struct sahara_ctx *ctx; | 580 | struct sahara_ctx *ctx; |
577 | struct sahara_aes_reqctx *rctx; | 581 | struct sahara_aes_reqctx *rctx; |
578 | int ret; | 582 | int ret; |
583 | unsigned long timeout; | ||
579 | 584 | ||
580 | /* Request is ready to be dispatched by the device */ | 585 | /* Request is ready to be dispatched by the device */ |
581 | dev_dbg(dev->device, | 586 | dev_dbg(dev->device, |
@@ -601,10 +606,12 @@ static int sahara_aes_process(struct ablkcipher_request *req) | |||
601 | reinit_completion(&dev->dma_completion); | 606 | reinit_completion(&dev->dma_completion); |
602 | 607 | ||
603 | ret = sahara_hw_descriptor_create(dev); | 608 | ret = sahara_hw_descriptor_create(dev); |
609 | if (ret) | ||
610 | return -EINVAL; | ||
604 | 611 | ||
605 | ret = wait_for_completion_timeout(&dev->dma_completion, | 612 | timeout = wait_for_completion_timeout(&dev->dma_completion, |
606 | msecs_to_jiffies(SAHARA_TIMEOUT_MS)); | 613 | msecs_to_jiffies(SAHARA_TIMEOUT_MS)); |
607 | if (!ret) { | 614 | if (!timeout) { |
608 | dev_err(dev->device, "AES timeout\n"); | 615 | dev_err(dev->device, "AES timeout\n"); |
609 | return -ETIMEDOUT; | 616 | return -ETIMEDOUT; |
610 | } | 617 | } |
@@ -1044,7 +1051,8 @@ static int sahara_sha_process(struct ahash_request *req) | |||
1044 | { | 1051 | { |
1045 | struct sahara_dev *dev = dev_ptr; | 1052 | struct sahara_dev *dev = dev_ptr; |
1046 | struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); | 1053 | struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); |
1047 | int ret = -EINPROGRESS; | 1054 | int ret; |
1055 | unsigned long timeout; | ||
1048 | 1056 | ||
1049 | ret = sahara_sha_prepare_request(req); | 1057 | ret = sahara_sha_prepare_request(req); |
1050 | if (!ret) | 1058 | if (!ret) |
@@ -1070,9 +1078,9 @@ static int sahara_sha_process(struct ahash_request *req) | |||
1070 | 1078 | ||
1071 | sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); | 1079 | sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); |
1072 | 1080 | ||
1073 | ret = wait_for_completion_timeout(&dev->dma_completion, | 1081 | timeout = wait_for_completion_timeout(&dev->dma_completion, |
1074 | msecs_to_jiffies(SAHARA_TIMEOUT_MS)); | 1082 | msecs_to_jiffies(SAHARA_TIMEOUT_MS)); |
1075 | if (!ret) { | 1083 | if (!timeout) { |
1076 | dev_err(dev->device, "SHA timeout\n"); | 1084 | dev_err(dev->device, "SHA timeout\n"); |
1077 | return -ETIMEDOUT; | 1085 | return -ETIMEDOUT; |
1078 | } | 1086 | } |
@@ -1092,15 +1100,20 @@ static int sahara_queue_manage(void *data) | |||
1092 | { | 1100 | { |
1093 | struct sahara_dev *dev = (struct sahara_dev *)data; | 1101 | struct sahara_dev *dev = (struct sahara_dev *)data; |
1094 | struct crypto_async_request *async_req; | 1102 | struct crypto_async_request *async_req; |
1103 | struct crypto_async_request *backlog; | ||
1095 | int ret = 0; | 1104 | int ret = 0; |
1096 | 1105 | ||
1097 | do { | 1106 | do { |
1098 | __set_current_state(TASK_INTERRUPTIBLE); | 1107 | __set_current_state(TASK_INTERRUPTIBLE); |
1099 | 1108 | ||
1100 | mutex_lock(&dev->queue_mutex); | 1109 | mutex_lock(&dev->queue_mutex); |
1110 | backlog = crypto_get_backlog(&dev->queue); | ||
1101 | async_req = crypto_dequeue_request(&dev->queue); | 1111 | async_req = crypto_dequeue_request(&dev->queue); |
1102 | mutex_unlock(&dev->queue_mutex); | 1112 | mutex_unlock(&dev->queue_mutex); |
1103 | 1113 | ||
1114 | if (backlog) | ||
1115 | backlog->complete(backlog, -EINPROGRESS); | ||
1116 | |||
1104 | if (async_req) { | 1117 | if (async_req) { |
1105 | if (crypto_tfm_alg_type(async_req->tfm) == | 1118 | if (crypto_tfm_alg_type(async_req->tfm) == |
1106 | CRYPTO_ALG_TYPE_AHASH) { | 1119 | CRYPTO_ALG_TYPE_AHASH) { |
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index ebbae8d3ce0d..857414afa29a 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c | |||
@@ -637,8 +637,6 @@ static void talitos_unregister_rng(struct device *dev) | |||
637 | #define TALITOS_MAX_KEY_SIZE 96 | 637 | #define TALITOS_MAX_KEY_SIZE 96 |
638 | #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ | 638 | #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ |
639 | 639 | ||
640 | #define MD5_BLOCK_SIZE 64 | ||
641 | |||
642 | struct talitos_ctx { | 640 | struct talitos_ctx { |
643 | struct device *dev; | 641 | struct device *dev; |
644 | int ch; | 642 | int ch; |
@@ -2195,7 +2193,7 @@ static struct talitos_alg_template driver_algs[] = { | |||
2195 | .halg.base = { | 2193 | .halg.base = { |
2196 | .cra_name = "md5", | 2194 | .cra_name = "md5", |
2197 | .cra_driver_name = "md5-talitos", | 2195 | .cra_driver_name = "md5-talitos", |
2198 | .cra_blocksize = MD5_BLOCK_SIZE, | 2196 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, |
2199 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | | 2197 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | |
2200 | CRYPTO_ALG_ASYNC, | 2198 | CRYPTO_ALG_ASYNC, |
2201 | } | 2199 | } |
@@ -2285,7 +2283,7 @@ static struct talitos_alg_template driver_algs[] = { | |||
2285 | .halg.base = { | 2283 | .halg.base = { |
2286 | .cra_name = "hmac(md5)", | 2284 | .cra_name = "hmac(md5)", |
2287 | .cra_driver_name = "hmac-md5-talitos", | 2285 | .cra_driver_name = "hmac-md5-talitos", |
2288 | .cra_blocksize = MD5_BLOCK_SIZE, | 2286 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, |
2289 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | | 2287 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | |
2290 | CRYPTO_ALG_ASYNC, | 2288 | CRYPTO_ALG_ASYNC, |
2291 | } | 2289 | } |
@@ -2706,20 +2704,16 @@ static int talitos_probe(struct platform_device *ofdev) | |||
2706 | goto err_out; | 2704 | goto err_out; |
2707 | } | 2705 | } |
2708 | 2706 | ||
2707 | priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); | ||
2708 | |||
2709 | for (i = 0; i < priv->num_channels; i++) { | 2709 | for (i = 0; i < priv->num_channels; i++) { |
2710 | priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); | 2710 | priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); |
2711 | if (!priv->irq[1] || !(i & 1)) | 2711 | if (!priv->irq[1] || !(i & 1)) |
2712 | priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; | 2712 | priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; |
2713 | } | ||
2714 | 2713 | ||
2715 | for (i = 0; i < priv->num_channels; i++) { | ||
2716 | spin_lock_init(&priv->chan[i].head_lock); | 2714 | spin_lock_init(&priv->chan[i].head_lock); |
2717 | spin_lock_init(&priv->chan[i].tail_lock); | 2715 | spin_lock_init(&priv->chan[i].tail_lock); |
2718 | } | ||
2719 | 2716 | ||
2720 | priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); | ||
2721 | |||
2722 | for (i = 0; i < priv->num_channels; i++) { | ||
2723 | priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) * | 2717 | priv->chan[i].fifo = kzalloc(sizeof(struct talitos_request) * |
2724 | priv->fifo_len, GFP_KERNEL); | 2718 | priv->fifo_len, GFP_KERNEL); |
2725 | if (!priv->chan[i].fifo) { | 2719 | if (!priv->chan[i].fifo) { |
@@ -2727,11 +2721,10 @@ static int talitos_probe(struct platform_device *ofdev) | |||
2727 | err = -ENOMEM; | 2721 | err = -ENOMEM; |
2728 | goto err_out; | 2722 | goto err_out; |
2729 | } | 2723 | } |
2730 | } | ||
2731 | 2724 | ||
2732 | for (i = 0; i < priv->num_channels; i++) | ||
2733 | atomic_set(&priv->chan[i].submit_count, | 2725 | atomic_set(&priv->chan[i].submit_count, |
2734 | -(priv->chfifo_len - 1)); | 2726 | -(priv->chfifo_len - 1)); |
2727 | } | ||
2735 | 2728 | ||
2736 | dma_set_mask(dev, DMA_BIT_MASK(36)); | 2729 | dma_set_mask(dev, DMA_BIT_MASK(36)); |
2737 | 2730 | ||
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 187a8fd7eee7..5f5f360628fc 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c | |||
@@ -184,7 +184,7 @@ static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, | |||
184 | direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); | 184 | direction, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); |
185 | if (!desc) { | 185 | if (!desc) { |
186 | dev_err(ctx->device->dev, | 186 | dev_err(ctx->device->dev, |
187 | "%s: device_prep_slave_sg() failed!\n", __func__); | 187 | "%s: dmaengine_prep_slave_sg() failed!\n", __func__); |
188 | return -EFAULT; | 188 | return -EFAULT; |
189 | } | 189 | } |
190 | 190 | ||
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig new file mode 100644 index 000000000000..771babf16aa0 --- /dev/null +++ b/drivers/crypto/vmx/Kconfig | |||
@@ -0,0 +1,8 @@ | |||
1 | config CRYPTO_DEV_VMX_ENCRYPT | ||
2 | tristate "Encryption acceleration support on P8 CPU" | ||
3 | depends on PPC64 && CRYPTO_DEV_VMX | ||
4 | default y | ||
5 | help | ||
6 | Support for VMX cryptographic acceleration instructions on Power8 CPU. | ||
7 | This module supports acceleration for AES and GHASH in hardware. If you | ||
8 | choose 'M' here, this module will be called vmx-crypto. | ||
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile new file mode 100644 index 000000000000..c699c6e6c82e --- /dev/null +++ b/drivers/crypto/vmx/Makefile | |||
@@ -0,0 +1,19 @@ | |||
1 | obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o | ||
2 | vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o | ||
3 | |||
4 | ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) | ||
5 | TARGET := linux-ppc64le | ||
6 | else | ||
7 | TARGET := linux-pcc64 | ||
8 | endif | ||
9 | |||
10 | quiet_cmd_perl = PERL $@ | ||
11 | cmd_perl = $(PERL) $(<) $(TARGET) > $(@) | ||
12 | |||
13 | $(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl | ||
14 | $(call cmd,perl) | ||
15 | |||
16 | $(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl | ||
17 | $(call cmd,perl) | ||
18 | |||
19 | .PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S | ||
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c new file mode 100644 index 000000000000..ab300ea19434 --- /dev/null +++ b/drivers/crypto/vmx/aes.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /** | ||
2 | * AES routines supporting VMX instructions on the Power 8 | ||
3 | * | ||
4 | * Copyright (C) 2015 International Business Machines Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; version 2 only. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/crypto.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/hardirq.h> | ||
27 | #include <asm/switch_to.h> | ||
28 | #include <crypto/aes.h> | ||
29 | |||
30 | #include "aesp8-ppc.h" | ||
31 | |||
32 | struct p8_aes_ctx { | ||
33 | struct crypto_cipher *fallback; | ||
34 | struct aes_key enc_key; | ||
35 | struct aes_key dec_key; | ||
36 | }; | ||
37 | |||
38 | static int p8_aes_init(struct crypto_tfm *tfm) | ||
39 | { | ||
40 | const char *alg; | ||
41 | struct crypto_cipher *fallback; | ||
42 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
43 | |||
44 | if (!(alg = crypto_tfm_alg_name(tfm))) { | ||
45 | printk(KERN_ERR "Failed to get algorithm name.\n"); | ||
46 | return -ENOENT; | ||
47 | } | ||
48 | |||
49 | fallback = crypto_alloc_cipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); | ||
50 | if (IS_ERR(fallback)) { | ||
51 | printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", | ||
52 | alg, PTR_ERR(fallback)); | ||
53 | return PTR_ERR(fallback); | ||
54 | } | ||
55 | printk(KERN_INFO "Using '%s' as fallback implementation.\n", | ||
56 | crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); | ||
57 | |||
58 | crypto_cipher_set_flags(fallback, | ||
59 | crypto_cipher_get_flags((struct crypto_cipher *) tfm)); | ||
60 | ctx->fallback = fallback; | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static void p8_aes_exit(struct crypto_tfm *tfm) | ||
66 | { | ||
67 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
68 | |||
69 | if (ctx->fallback) { | ||
70 | crypto_free_cipher(ctx->fallback); | ||
71 | ctx->fallback = NULL; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
76 | unsigned int keylen) | ||
77 | { | ||
78 | int ret; | ||
79 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
80 | |||
81 | pagefault_disable(); | ||
82 | enable_kernel_altivec(); | ||
83 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); | ||
84 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); | ||
85 | pagefault_enable(); | ||
86 | |||
87 | ret += crypto_cipher_setkey(ctx->fallback, key, keylen); | ||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
92 | { | ||
93 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
94 | |||
95 | if (in_interrupt()) { | ||
96 | crypto_cipher_encrypt_one(ctx->fallback, dst, src); | ||
97 | } else { | ||
98 | pagefault_disable(); | ||
99 | enable_kernel_altivec(); | ||
100 | aes_p8_encrypt(src, dst, &ctx->enc_key); | ||
101 | pagefault_enable(); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
106 | { | ||
107 | struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
108 | |||
109 | if (in_interrupt()) { | ||
110 | crypto_cipher_decrypt_one(ctx->fallback, dst, src); | ||
111 | } else { | ||
112 | pagefault_disable(); | ||
113 | enable_kernel_altivec(); | ||
114 | aes_p8_decrypt(src, dst, &ctx->dec_key); | ||
115 | pagefault_enable(); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | struct crypto_alg p8_aes_alg = { | ||
120 | .cra_name = "aes", | ||
121 | .cra_driver_name = "p8_aes", | ||
122 | .cra_module = THIS_MODULE, | ||
123 | .cra_priority = 1000, | ||
124 | .cra_type = NULL, | ||
125 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, | ||
126 | .cra_alignmask = 0, | ||
127 | .cra_blocksize = AES_BLOCK_SIZE, | ||
128 | .cra_ctxsize = sizeof(struct p8_aes_ctx), | ||
129 | .cra_init = p8_aes_init, | ||
130 | .cra_exit = p8_aes_exit, | ||
131 | .cra_cipher = { | ||
132 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
133 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
134 | .cia_setkey = p8_aes_setkey, | ||
135 | .cia_encrypt = p8_aes_encrypt, | ||
136 | .cia_decrypt = p8_aes_decrypt, | ||
137 | }, | ||
138 | }; | ||
139 | |||
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c new file mode 100644 index 000000000000..1a559b7dddb5 --- /dev/null +++ b/drivers/crypto/vmx/aes_cbc.c | |||
@@ -0,0 +1,184 @@ | |||
1 | /** | ||
2 | * AES CBC routines supporting VMX instructions on the Power 8 | ||
3 | * | ||
4 | * Copyright (C) 2015 International Business Machines Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; version 2 only. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/crypto.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/hardirq.h> | ||
27 | #include <asm/switch_to.h> | ||
28 | #include <crypto/aes.h> | ||
29 | #include <crypto/scatterwalk.h> | ||
30 | |||
31 | #include "aesp8-ppc.h" | ||
32 | |||
33 | struct p8_aes_cbc_ctx { | ||
34 | struct crypto_blkcipher *fallback; | ||
35 | struct aes_key enc_key; | ||
36 | struct aes_key dec_key; | ||
37 | }; | ||
38 | |||
39 | static int p8_aes_cbc_init(struct crypto_tfm *tfm) | ||
40 | { | ||
41 | const char *alg; | ||
42 | struct crypto_blkcipher *fallback; | ||
43 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); | ||
44 | |||
45 | if (!(alg = crypto_tfm_alg_name(tfm))) { | ||
46 | printk(KERN_ERR "Failed to get algorithm name.\n"); | ||
47 | return -ENOENT; | ||
48 | } | ||
49 | |||
50 | fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); | ||
51 | if (IS_ERR(fallback)) { | ||
52 | printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", | ||
53 | alg, PTR_ERR(fallback)); | ||
54 | return PTR_ERR(fallback); | ||
55 | } | ||
56 | printk(KERN_INFO "Using '%s' as fallback implementation.\n", | ||
57 | crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); | ||
58 | |||
59 | crypto_blkcipher_set_flags(fallback, | ||
60 | crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); | ||
61 | ctx->fallback = fallback; | ||
62 | |||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | static void p8_aes_cbc_exit(struct crypto_tfm *tfm) | ||
67 | { | ||
68 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); | ||
69 | |||
70 | if (ctx->fallback) { | ||
71 | crypto_free_blkcipher(ctx->fallback); | ||
72 | ctx->fallback = NULL; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
77 | unsigned int keylen) | ||
78 | { | ||
79 | int ret; | ||
80 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); | ||
81 | |||
82 | pagefault_disable(); | ||
83 | enable_kernel_altivec(); | ||
84 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); | ||
85 | ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); | ||
86 | pagefault_enable(); | ||
87 | |||
88 | ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); | ||
89 | return ret; | ||
90 | } | ||
91 | |||
92 | static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, | ||
93 | struct scatterlist *dst, struct scatterlist *src, | ||
94 | unsigned int nbytes) | ||
95 | { | ||
96 | int ret; | ||
97 | struct blkcipher_walk walk; | ||
98 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( | ||
99 | crypto_blkcipher_tfm(desc->tfm)); | ||
100 | struct blkcipher_desc fallback_desc = { | ||
101 | .tfm = ctx->fallback, | ||
102 | .info = desc->info, | ||
103 | .flags = desc->flags | ||
104 | }; | ||
105 | |||
106 | if (in_interrupt()) { | ||
107 | ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); | ||
108 | } else { | ||
109 | pagefault_disable(); | ||
110 | enable_kernel_altivec(); | ||
111 | |||
112 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
113 | ret = blkcipher_walk_virt(desc, &walk); | ||
114 | while ((nbytes = walk.nbytes)) { | ||
115 | aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, | ||
116 | nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); | ||
117 | nbytes &= AES_BLOCK_SIZE - 1; | ||
118 | ret = blkcipher_walk_done(desc, &walk, nbytes); | ||
119 | } | ||
120 | |||
121 | pagefault_enable(); | ||
122 | } | ||
123 | |||
124 | return ret; | ||
125 | } | ||
126 | |||
127 | static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, | ||
128 | struct scatterlist *dst, struct scatterlist *src, | ||
129 | unsigned int nbytes) | ||
130 | { | ||
131 | int ret; | ||
132 | struct blkcipher_walk walk; | ||
133 | struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( | ||
134 | crypto_blkcipher_tfm(desc->tfm)); | ||
135 | struct blkcipher_desc fallback_desc = { | ||
136 | .tfm = ctx->fallback, | ||
137 | .info = desc->info, | ||
138 | .flags = desc->flags | ||
139 | }; | ||
140 | |||
141 | if (in_interrupt()) { | ||
142 | ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); | ||
143 | } else { | ||
144 | pagefault_disable(); | ||
145 | enable_kernel_altivec(); | ||
146 | |||
147 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
148 | ret = blkcipher_walk_virt(desc, &walk); | ||
149 | while ((nbytes = walk.nbytes)) { | ||
150 | aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, | ||
151 | nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); | ||
152 | nbytes &= AES_BLOCK_SIZE - 1; | ||
153 | ret = blkcipher_walk_done(desc, &walk, nbytes); | ||
154 | } | ||
155 | |||
156 | pagefault_enable(); | ||
157 | } | ||
158 | |||
159 | return ret; | ||
160 | } | ||
161 | |||
162 | |||
163 | struct crypto_alg p8_aes_cbc_alg = { | ||
164 | .cra_name = "cbc(aes)", | ||
165 | .cra_driver_name = "p8_aes_cbc", | ||
166 | .cra_module = THIS_MODULE, | ||
167 | .cra_priority = 1000, | ||
168 | .cra_type = &crypto_blkcipher_type, | ||
169 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, | ||
170 | .cra_alignmask = 0, | ||
171 | .cra_blocksize = AES_BLOCK_SIZE, | ||
172 | .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), | ||
173 | .cra_init = p8_aes_cbc_init, | ||
174 | .cra_exit = p8_aes_cbc_exit, | ||
175 | .cra_blkcipher = { | ||
176 | .ivsize = 0, | ||
177 | .min_keysize = AES_MIN_KEY_SIZE, | ||
178 | .max_keysize = AES_MAX_KEY_SIZE, | ||
179 | .setkey = p8_aes_cbc_setkey, | ||
180 | .encrypt = p8_aes_cbc_encrypt, | ||
181 | .decrypt = p8_aes_cbc_decrypt, | ||
182 | }, | ||
183 | }; | ||
184 | |||
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c new file mode 100644 index 000000000000..96dbee4bf4a6 --- /dev/null +++ b/drivers/crypto/vmx/aes_ctr.c | |||
@@ -0,0 +1,167 @@ | |||
1 | /** | ||
2 | * AES CTR routines supporting VMX instructions on the Power 8 | ||
3 | * | ||
4 | * Copyright (C) 2015 International Business Machines Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; version 2 only. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/crypto.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/hardirq.h> | ||
27 | #include <asm/switch_to.h> | ||
28 | #include <crypto/aes.h> | ||
29 | #include <crypto/scatterwalk.h> | ||
30 | #include "aesp8-ppc.h" | ||
31 | |||
32 | struct p8_aes_ctr_ctx { | ||
33 | struct crypto_blkcipher *fallback; | ||
34 | struct aes_key enc_key; | ||
35 | }; | ||
36 | |||
37 | static int p8_aes_ctr_init(struct crypto_tfm *tfm) | ||
38 | { | ||
39 | const char *alg; | ||
40 | struct crypto_blkcipher *fallback; | ||
41 | struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); | ||
42 | |||
43 | if (!(alg = crypto_tfm_alg_name(tfm))) { | ||
44 | printk(KERN_ERR "Failed to get algorithm name.\n"); | ||
45 | return -ENOENT; | ||
46 | } | ||
47 | |||
48 | fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); | ||
49 | if (IS_ERR(fallback)) { | ||
50 | printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", | ||
51 | alg, PTR_ERR(fallback)); | ||
52 | return PTR_ERR(fallback); | ||
53 | } | ||
54 | printk(KERN_INFO "Using '%s' as fallback implementation.\n", | ||
55 | crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); | ||
56 | |||
57 | crypto_blkcipher_set_flags(fallback, | ||
58 | crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); | ||
59 | ctx->fallback = fallback; | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static void p8_aes_ctr_exit(struct crypto_tfm *tfm) | ||
65 | { | ||
66 | struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); | ||
67 | |||
68 | if (ctx->fallback) { | ||
69 | crypto_free_blkcipher(ctx->fallback); | ||
70 | ctx->fallback = NULL; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
75 | unsigned int keylen) | ||
76 | { | ||
77 | int ret; | ||
78 | struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); | ||
79 | |||
80 | pagefault_disable(); | ||
81 | enable_kernel_altivec(); | ||
82 | ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); | ||
83 | pagefault_enable(); | ||
84 | |||
85 | ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); | ||
86 | return ret; | ||
87 | } | ||
88 | |||
89 | static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, | ||
90 | struct blkcipher_walk *walk) | ||
91 | { | ||
92 | u8 *ctrblk = walk->iv; | ||
93 | u8 keystream[AES_BLOCK_SIZE]; | ||
94 | u8 *src = walk->src.virt.addr; | ||
95 | u8 *dst = walk->dst.virt.addr; | ||
96 | unsigned int nbytes = walk->nbytes; | ||
97 | |||
98 | pagefault_disable(); | ||
99 | enable_kernel_altivec(); | ||
100 | aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); | ||
101 | pagefault_enable(); | ||
102 | |||
103 | crypto_xor(keystream, src, nbytes); | ||
104 | memcpy(dst, keystream, nbytes); | ||
105 | crypto_inc(ctrblk, AES_BLOCK_SIZE); | ||
106 | } | ||
107 | |||
108 | static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, | ||
109 | struct scatterlist *dst, struct scatterlist *src, | ||
110 | unsigned int nbytes) | ||
111 | { | ||
112 | int ret; | ||
113 | struct blkcipher_walk walk; | ||
114 | struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx( | ||
115 | crypto_blkcipher_tfm(desc->tfm)); | ||
116 | struct blkcipher_desc fallback_desc = { | ||
117 | .tfm = ctx->fallback, | ||
118 | .info = desc->info, | ||
119 | .flags = desc->flags | ||
120 | }; | ||
121 | |||
122 | if (in_interrupt()) { | ||
123 | ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); | ||
124 | } else { | ||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
127 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | ||
128 | pagefault_disable(); | ||
129 | enable_kernel_altivec(); | ||
130 | aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, | ||
131 | (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); | ||
132 | pagefault_enable(); | ||
133 | |||
134 | crypto_inc(walk.iv, AES_BLOCK_SIZE); | ||
135 | nbytes &= AES_BLOCK_SIZE - 1; | ||
136 | ret = blkcipher_walk_done(desc, &walk, nbytes); | ||
137 | } | ||
138 | if (walk.nbytes) { | ||
139 | p8_aes_ctr_final(ctx, &walk); | ||
140 | ret = blkcipher_walk_done(desc, &walk, 0); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | struct crypto_alg p8_aes_ctr_alg = { | ||
148 | .cra_name = "ctr(aes)", | ||
149 | .cra_driver_name = "p8_aes_ctr", | ||
150 | .cra_module = THIS_MODULE, | ||
151 | .cra_priority = 1000, | ||
152 | .cra_type = &crypto_blkcipher_type, | ||
153 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, | ||
154 | .cra_alignmask = 0, | ||
155 | .cra_blocksize = 1, | ||
156 | .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), | ||
157 | .cra_init = p8_aes_ctr_init, | ||
158 | .cra_exit = p8_aes_ctr_exit, | ||
159 | .cra_blkcipher = { | ||
160 | .ivsize = 0, | ||
161 | .min_keysize = AES_MIN_KEY_SIZE, | ||
162 | .max_keysize = AES_MAX_KEY_SIZE, | ||
163 | .setkey = p8_aes_ctr_setkey, | ||
164 | .encrypt = p8_aes_ctr_crypt, | ||
165 | .decrypt = p8_aes_ctr_crypt, | ||
166 | }, | ||
167 | }; | ||
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h new file mode 100644 index 000000000000..e963945a83e1 --- /dev/null +++ b/drivers/crypto/vmx/aesp8-ppc.h | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <crypto/aes.h> | ||
3 | |||
4 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | ||
5 | |||
6 | struct aes_key { | ||
7 | u8 key[AES_MAX_KEYLENGTH]; | ||
8 | int rounds; | ||
9 | }; | ||
10 | |||
11 | int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, | ||
12 | struct aes_key *key); | ||
13 | int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, | ||
14 | struct aes_key *key); | ||
15 | void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key); | ||
16 | void aes_p8_decrypt(const u8 *in, u8 *out,const struct aes_key *key); | ||
17 | void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, | ||
18 | const struct aes_key *key, u8 *iv, const int enc); | ||
19 | void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, | ||
20 | size_t len, const struct aes_key *key, const u8 *iv); | ||
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl new file mode 100644 index 000000000000..6c5c20c6108e --- /dev/null +++ b/drivers/crypto/vmx/aesp8-ppc.pl | |||
@@ -0,0 +1,1930 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | # | ||
10 | # This module implements support for AES instructions as per PowerISA | ||
11 | # specification version 2.07, first implemented by POWER8 processor. | ||
12 | # The module is endian-agnostic in sense that it supports both big- | ||
13 | # and little-endian cases. Data alignment in parallelizable modes is | ||
14 | # handled with VSX loads and stores, which implies MSR.VSX flag being | ||
15 | # set. It should also be noted that ISA specification doesn't prohibit | ||
16 | # alignment exceptions for these instructions on page boundaries. | ||
17 | # Initially alignment was handled in pure AltiVec/VMX way [when data | ||
18 | # is aligned programmatically, which in turn guarantees exception- | ||
19 | # free execution], but it turned to hamper performance when vcipher | ||
20 | # instructions are interleaved. It's reckoned that eventual | ||
21 | # misalignment penalties at page boundaries are in average lower | ||
22 | # than additional overhead in pure AltiVec approach. | ||
23 | |||
24 | $flavour = shift; | ||
25 | |||
26 | if ($flavour =~ /64/) { | ||
27 | $SIZE_T =8; | ||
28 | $LRSAVE =2*$SIZE_T; | ||
29 | $STU ="stdu"; | ||
30 | $POP ="ld"; | ||
31 | $PUSH ="std"; | ||
32 | $UCMP ="cmpld"; | ||
33 | $SHL ="sldi"; | ||
34 | } elsif ($flavour =~ /32/) { | ||
35 | $SIZE_T =4; | ||
36 | $LRSAVE =$SIZE_T; | ||
37 | $STU ="stwu"; | ||
38 | $POP ="lwz"; | ||
39 | $PUSH ="stw"; | ||
40 | $UCMP ="cmplw"; | ||
41 | $SHL ="slwi"; | ||
42 | } else { die "nonsense $flavour"; } | ||
43 | |||
44 | $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; | ||
45 | |||
46 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
47 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | ||
48 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | ||
49 | die "can't locate ppc-xlate.pl"; | ||
50 | |||
51 | open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; | ||
52 | |||
53 | $FRAME=8*$SIZE_T; | ||
54 | $prefix="aes_p8"; | ||
55 | |||
56 | $sp="r1"; | ||
57 | $vrsave="r12"; | ||
58 | |||
59 | ######################################################################### | ||
60 | {{{ # Key setup procedures # | ||
61 | my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); | ||
62 | my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); | ||
63 | my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); | ||
64 | |||
65 | $code.=<<___; | ||
66 | .machine "any" | ||
67 | |||
68 | .text | ||
69 | |||
70 | .align 7 | ||
71 | rcon: | ||
72 | .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev | ||
73 | .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev | ||
74 | .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev | ||
75 | .long 0,0,0,0 ?asis | ||
76 | Lconsts: | ||
77 | mflr r0 | ||
78 | bcl 20,31,\$+4 | ||
79 | mflr $ptr #vvvvv "distance between . and rcon | ||
80 | addi $ptr,$ptr,-0x48 | ||
81 | mtlr r0 | ||
82 | blr | ||
83 | .long 0 | ||
84 | .byte 0,12,0x14,0,0,0,0,0 | ||
85 | .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" | ||
86 | |||
87 | .globl .${prefix}_set_encrypt_key | ||
88 | Lset_encrypt_key: | ||
89 | mflr r11 | ||
90 | $PUSH r11,$LRSAVE($sp) | ||
91 | |||
92 | li $ptr,-1 | ||
93 | ${UCMP}i $inp,0 | ||
94 | beq- Lenc_key_abort # if ($inp==0) return -1; | ||
95 | ${UCMP}i $out,0 | ||
96 | beq- Lenc_key_abort # if ($out==0) return -1; | ||
97 | li $ptr,-2 | ||
98 | cmpwi $bits,128 | ||
99 | blt- Lenc_key_abort | ||
100 | cmpwi $bits,256 | ||
101 | bgt- Lenc_key_abort | ||
102 | andi. r0,$bits,0x3f | ||
103 | bne- Lenc_key_abort | ||
104 | |||
105 | lis r0,0xfff0 | ||
106 | mfspr $vrsave,256 | ||
107 | mtspr 256,r0 | ||
108 | |||
109 | bl Lconsts | ||
110 | mtlr r11 | ||
111 | |||
112 | neg r9,$inp | ||
113 | lvx $in0,0,$inp | ||
114 | addi $inp,$inp,15 # 15 is not typo | ||
115 | lvsr $key,0,r9 # borrow $key | ||
116 | li r8,0x20 | ||
117 | cmpwi $bits,192 | ||
118 | lvx $in1,0,$inp | ||
119 | le?vspltisb $mask,0x0f # borrow $mask | ||
120 | lvx $rcon,0,$ptr | ||
121 | le?vxor $key,$key,$mask # adjust for byte swap | ||
122 | lvx $mask,r8,$ptr | ||
123 | addi $ptr,$ptr,0x10 | ||
124 | vperm $in0,$in0,$in1,$key # align [and byte swap in LE] | ||
125 | li $cnt,8 | ||
126 | vxor $zero,$zero,$zero | ||
127 | mtctr $cnt | ||
128 | |||
129 | ?lvsr $outperm,0,$out | ||
130 | vspltisb $outmask,-1 | ||
131 | lvx $outhead,0,$out | ||
132 | ?vperm $outmask,$zero,$outmask,$outperm | ||
133 | |||
134 | blt Loop128 | ||
135 | addi $inp,$inp,8 | ||
136 | beq L192 | ||
137 | addi $inp,$inp,8 | ||
138 | b L256 | ||
139 | |||
140 | .align 4 | ||
141 | Loop128: | ||
142 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
143 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
144 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
145 | vsel $stage,$outhead,$outtail,$outmask | ||
146 | vmr $outhead,$outtail | ||
147 | vcipherlast $key,$key,$rcon | ||
148 | stvx $stage,0,$out | ||
149 | addi $out,$out,16 | ||
150 | |||
151 | vxor $in0,$in0,$tmp | ||
152 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
153 | vxor $in0,$in0,$tmp | ||
154 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
155 | vxor $in0,$in0,$tmp | ||
156 | vadduwm $rcon,$rcon,$rcon | ||
157 | vxor $in0,$in0,$key | ||
158 | bdnz Loop128 | ||
159 | |||
160 | lvx $rcon,0,$ptr # last two round keys | ||
161 | |||
162 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
163 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
164 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
165 | vsel $stage,$outhead,$outtail,$outmask | ||
166 | vmr $outhead,$outtail | ||
167 | vcipherlast $key,$key,$rcon | ||
168 | stvx $stage,0,$out | ||
169 | addi $out,$out,16 | ||
170 | |||
171 | vxor $in0,$in0,$tmp | ||
172 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
173 | vxor $in0,$in0,$tmp | ||
174 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
175 | vxor $in0,$in0,$tmp | ||
176 | vadduwm $rcon,$rcon,$rcon | ||
177 | vxor $in0,$in0,$key | ||
178 | |||
179 | vperm $key,$in0,$in0,$mask # rotate-n-splat | ||
180 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
181 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
182 | vsel $stage,$outhead,$outtail,$outmask | ||
183 | vmr $outhead,$outtail | ||
184 | vcipherlast $key,$key,$rcon | ||
185 | stvx $stage,0,$out | ||
186 | addi $out,$out,16 | ||
187 | |||
188 | vxor $in0,$in0,$tmp | ||
189 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
190 | vxor $in0,$in0,$tmp | ||
191 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
192 | vxor $in0,$in0,$tmp | ||
193 | vxor $in0,$in0,$key | ||
194 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
195 | vsel $stage,$outhead,$outtail,$outmask | ||
196 | vmr $outhead,$outtail | ||
197 | stvx $stage,0,$out | ||
198 | |||
199 | addi $inp,$out,15 # 15 is not typo | ||
200 | addi $out,$out,0x50 | ||
201 | |||
202 | li $rounds,10 | ||
203 | b Ldone | ||
204 | |||
205 | .align 4 | ||
206 | L192: | ||
207 | lvx $tmp,0,$inp | ||
208 | li $cnt,4 | ||
209 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
210 | vsel $stage,$outhead,$outtail,$outmask | ||
211 | vmr $outhead,$outtail | ||
212 | stvx $stage,0,$out | ||
213 | addi $out,$out,16 | ||
214 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | ||
215 | vspltisb $key,8 # borrow $key | ||
216 | mtctr $cnt | ||
217 | vsububm $mask,$mask,$key # adjust the mask | ||
218 | |||
219 | Loop192: | ||
220 | vperm $key,$in1,$in1,$mask # roate-n-splat | ||
221 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
222 | vcipherlast $key,$key,$rcon | ||
223 | |||
224 | vxor $in0,$in0,$tmp | ||
225 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
226 | vxor $in0,$in0,$tmp | ||
227 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
228 | vxor $in0,$in0,$tmp | ||
229 | |||
230 | vsldoi $stage,$zero,$in1,8 | ||
231 | vspltw $tmp,$in0,3 | ||
232 | vxor $tmp,$tmp,$in1 | ||
233 | vsldoi $in1,$zero,$in1,12 # >>32 | ||
234 | vadduwm $rcon,$rcon,$rcon | ||
235 | vxor $in1,$in1,$tmp | ||
236 | vxor $in0,$in0,$key | ||
237 | vxor $in1,$in1,$key | ||
238 | vsldoi $stage,$stage,$in0,8 | ||
239 | |||
240 | vperm $key,$in1,$in1,$mask # rotate-n-splat | ||
241 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
242 | vperm $outtail,$stage,$stage,$outperm # rotate | ||
243 | vsel $stage,$outhead,$outtail,$outmask | ||
244 | vmr $outhead,$outtail | ||
245 | vcipherlast $key,$key,$rcon | ||
246 | stvx $stage,0,$out | ||
247 | addi $out,$out,16 | ||
248 | |||
249 | vsldoi $stage,$in0,$in1,8 | ||
250 | vxor $in0,$in0,$tmp | ||
251 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
252 | vperm $outtail,$stage,$stage,$outperm # rotate | ||
253 | vsel $stage,$outhead,$outtail,$outmask | ||
254 | vmr $outhead,$outtail | ||
255 | vxor $in0,$in0,$tmp | ||
256 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
257 | vxor $in0,$in0,$tmp | ||
258 | stvx $stage,0,$out | ||
259 | addi $out,$out,16 | ||
260 | |||
261 | vspltw $tmp,$in0,3 | ||
262 | vxor $tmp,$tmp,$in1 | ||
263 | vsldoi $in1,$zero,$in1,12 # >>32 | ||
264 | vadduwm $rcon,$rcon,$rcon | ||
265 | vxor $in1,$in1,$tmp | ||
266 | vxor $in0,$in0,$key | ||
267 | vxor $in1,$in1,$key | ||
268 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
269 | vsel $stage,$outhead,$outtail,$outmask | ||
270 | vmr $outhead,$outtail | ||
271 | stvx $stage,0,$out | ||
272 | addi $inp,$out,15 # 15 is not typo | ||
273 | addi $out,$out,16 | ||
274 | bdnz Loop192 | ||
275 | |||
276 | li $rounds,12 | ||
277 | addi $out,$out,0x20 | ||
278 | b Ldone | ||
279 | |||
280 | .align 4 | ||
281 | L256: | ||
282 | lvx $tmp,0,$inp | ||
283 | li $cnt,7 | ||
284 | li $rounds,14 | ||
285 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
286 | vsel $stage,$outhead,$outtail,$outmask | ||
287 | vmr $outhead,$outtail | ||
288 | stvx $stage,0,$out | ||
289 | addi $out,$out,16 | ||
290 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | ||
291 | mtctr $cnt | ||
292 | |||
293 | Loop256: | ||
294 | vperm $key,$in1,$in1,$mask # rotate-n-splat | ||
295 | vsldoi $tmp,$zero,$in0,12 # >>32 | ||
296 | vperm $outtail,$in1,$in1,$outperm # rotate | ||
297 | vsel $stage,$outhead,$outtail,$outmask | ||
298 | vmr $outhead,$outtail | ||
299 | vcipherlast $key,$key,$rcon | ||
300 | stvx $stage,0,$out | ||
301 | addi $out,$out,16 | ||
302 | |||
303 | vxor $in0,$in0,$tmp | ||
304 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
305 | vxor $in0,$in0,$tmp | ||
306 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
307 | vxor $in0,$in0,$tmp | ||
308 | vadduwm $rcon,$rcon,$rcon | ||
309 | vxor $in0,$in0,$key | ||
310 | vperm $outtail,$in0,$in0,$outperm # rotate | ||
311 | vsel $stage,$outhead,$outtail,$outmask | ||
312 | vmr $outhead,$outtail | ||
313 | stvx $stage,0,$out | ||
314 | addi $inp,$out,15 # 15 is not typo | ||
315 | addi $out,$out,16 | ||
316 | bdz Ldone | ||
317 | |||
318 | vspltw $key,$in0,3 # just splat | ||
319 | vsldoi $tmp,$zero,$in1,12 # >>32 | ||
320 | vsbox $key,$key | ||
321 | |||
322 | vxor $in1,$in1,$tmp | ||
323 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
324 | vxor $in1,$in1,$tmp | ||
325 | vsldoi $tmp,$zero,$tmp,12 # >>32 | ||
326 | vxor $in1,$in1,$tmp | ||
327 | |||
328 | vxor $in1,$in1,$key | ||
329 | b Loop256 | ||
330 | |||
331 | .align 4 | ||
332 | Ldone: | ||
333 | lvx $in1,0,$inp # redundant in aligned case | ||
334 | vsel $in1,$outhead,$in1,$outmask | ||
335 | stvx $in1,0,$inp | ||
336 | li $ptr,0 | ||
337 | mtspr 256,$vrsave | ||
338 | stw $rounds,0($out) | ||
339 | |||
340 | Lenc_key_abort: | ||
341 | mr r3,$ptr | ||
342 | blr | ||
343 | .long 0 | ||
344 | .byte 0,12,0x14,1,0,0,3,0 | ||
345 | .long 0 | ||
346 | .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key | ||
347 | |||
348 | .globl .${prefix}_set_decrypt_key | ||
349 | $STU $sp,-$FRAME($sp) | ||
350 | mflr r10 | ||
351 | $PUSH r10,$FRAME+$LRSAVE($sp) | ||
352 | bl Lset_encrypt_key | ||
353 | mtlr r10 | ||
354 | |||
355 | cmpwi r3,0 | ||
356 | bne- Ldec_key_abort | ||
357 | |||
358 | slwi $cnt,$rounds,4 | ||
359 | subi $inp,$out,240 # first round key | ||
360 | srwi $rounds,$rounds,1 | ||
361 | add $out,$inp,$cnt # last round key | ||
362 | mtctr $rounds | ||
363 | |||
364 | Ldeckey: | ||
365 | lwz r0, 0($inp) | ||
366 | lwz r6, 4($inp) | ||
367 | lwz r7, 8($inp) | ||
368 | lwz r8, 12($inp) | ||
369 | addi $inp,$inp,16 | ||
370 | lwz r9, 0($out) | ||
371 | lwz r10,4($out) | ||
372 | lwz r11,8($out) | ||
373 | lwz r12,12($out) | ||
374 | stw r0, 0($out) | ||
375 | stw r6, 4($out) | ||
376 | stw r7, 8($out) | ||
377 | stw r8, 12($out) | ||
378 | subi $out,$out,16 | ||
379 | stw r9, -16($inp) | ||
380 | stw r10,-12($inp) | ||
381 | stw r11,-8($inp) | ||
382 | stw r12,-4($inp) | ||
383 | bdnz Ldeckey | ||
384 | |||
385 | xor r3,r3,r3 # return value | ||
386 | Ldec_key_abort: | ||
387 | addi $sp,$sp,$FRAME | ||
388 | blr | ||
389 | .long 0 | ||
390 | .byte 0,12,4,1,0x80,0,3,0 | ||
391 | .long 0 | ||
392 | .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key | ||
393 | ___ | ||
394 | }}} | ||
395 | ######################################################################### | ||
396 | {{{ # Single block en- and decrypt procedures # | ||
397 | sub gen_block () { | ||
398 | my $dir = shift; | ||
399 | my $n = $dir eq "de" ? "n" : ""; | ||
400 | my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); | ||
401 | |||
402 | $code.=<<___; | ||
403 | .globl .${prefix}_${dir}crypt | ||
404 | lwz $rounds,240($key) | ||
405 | lis r0,0xfc00 | ||
406 | mfspr $vrsave,256 | ||
407 | li $idx,15 # 15 is not typo | ||
408 | mtspr 256,r0 | ||
409 | |||
410 | lvx v0,0,$inp | ||
411 | neg r11,$out | ||
412 | lvx v1,$idx,$inp | ||
413 | lvsl v2,0,$inp # inpperm | ||
414 | le?vspltisb v4,0x0f | ||
415 | ?lvsl v3,0,r11 # outperm | ||
416 | le?vxor v2,v2,v4 | ||
417 | li $idx,16 | ||
418 | vperm v0,v0,v1,v2 # align [and byte swap in LE] | ||
419 | lvx v1,0,$key | ||
420 | ?lvsl v5,0,$key # keyperm | ||
421 | srwi $rounds,$rounds,1 | ||
422 | lvx v2,$idx,$key | ||
423 | addi $idx,$idx,16 | ||
424 | subi $rounds,$rounds,1 | ||
425 | ?vperm v1,v1,v2,v5 # align round key | ||
426 | |||
427 | vxor v0,v0,v1 | ||
428 | lvx v1,$idx,$key | ||
429 | addi $idx,$idx,16 | ||
430 | mtctr $rounds | ||
431 | |||
432 | Loop_${dir}c: | ||
433 | ?vperm v2,v2,v1,v5 | ||
434 | v${n}cipher v0,v0,v2 | ||
435 | lvx v2,$idx,$key | ||
436 | addi $idx,$idx,16 | ||
437 | ?vperm v1,v1,v2,v5 | ||
438 | v${n}cipher v0,v0,v1 | ||
439 | lvx v1,$idx,$key | ||
440 | addi $idx,$idx,16 | ||
441 | bdnz Loop_${dir}c | ||
442 | |||
443 | ?vperm v2,v2,v1,v5 | ||
444 | v${n}cipher v0,v0,v2 | ||
445 | lvx v2,$idx,$key | ||
446 | ?vperm v1,v1,v2,v5 | ||
447 | v${n}cipherlast v0,v0,v1 | ||
448 | |||
449 | vspltisb v2,-1 | ||
450 | vxor v1,v1,v1 | ||
451 | li $idx,15 # 15 is not typo | ||
452 | ?vperm v2,v1,v2,v3 # outmask | ||
453 | le?vxor v3,v3,v4 | ||
454 | lvx v1,0,$out # outhead | ||
455 | vperm v0,v0,v0,v3 # rotate [and byte swap in LE] | ||
456 | vsel v1,v1,v0,v2 | ||
457 | lvx v4,$idx,$out | ||
458 | stvx v1,0,$out | ||
459 | vsel v0,v0,v4,v2 | ||
460 | stvx v0,$idx,$out | ||
461 | |||
462 | mtspr 256,$vrsave | ||
463 | blr | ||
464 | .long 0 | ||
465 | .byte 0,12,0x14,0,0,0,3,0 | ||
466 | .long 0 | ||
467 | .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt | ||
468 | ___ | ||
469 | } | ||
470 | &gen_block("en"); | ||
471 | &gen_block("de"); | ||
472 | }}} | ||
473 | ######################################################################### | ||
474 | {{{ # CBC en- and decrypt procedures # | ||
475 | my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); | ||
476 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | ||
477 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= | ||
478 | map("v$_",(4..10)); | ||
479 | $code.=<<___; | ||
480 | .globl .${prefix}_cbc_encrypt | ||
481 | ${UCMP}i $len,16 | ||
482 | bltlr- | ||
483 | |||
484 | cmpwi $enc,0 # test direction | ||
485 | lis r0,0xffe0 | ||
486 | mfspr $vrsave,256 | ||
487 | mtspr 256,r0 | ||
488 | |||
489 | li $idx,15 | ||
490 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
491 | le?vspltisb $tmp,0x0f | ||
492 | |||
493 | lvx $ivec,0,$ivp # load [unaligned] iv | ||
494 | lvsl $inpperm,0,$ivp | ||
495 | lvx $inptail,$idx,$ivp | ||
496 | le?vxor $inpperm,$inpperm,$tmp | ||
497 | vperm $ivec,$ivec,$inptail,$inpperm | ||
498 | |||
499 | neg r11,$inp | ||
500 | ?lvsl $keyperm,0,$key # prepare for unaligned key | ||
501 | lwz $rounds,240($key) | ||
502 | |||
503 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
504 | lvx $inptail,0,$inp | ||
505 | addi $inp,$inp,15 # 15 is not typo | ||
506 | le?vxor $inpperm,$inpperm,$tmp | ||
507 | |||
508 | ?lvsr $outperm,0,$out # prepare for unaligned store | ||
509 | vspltisb $outmask,-1 | ||
510 | lvx $outhead,0,$out | ||
511 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
512 | le?vxor $outperm,$outperm,$tmp | ||
513 | |||
514 | srwi $rounds,$rounds,1 | ||
515 | li $idx,16 | ||
516 | subi $rounds,$rounds,1 | ||
517 | beq Lcbc_dec | ||
518 | |||
519 | Lcbc_enc: | ||
520 | vmr $inout,$inptail | ||
521 | lvx $inptail,0,$inp | ||
522 | addi $inp,$inp,16 | ||
523 | mtctr $rounds | ||
524 | subi $len,$len,16 # len-=16 | ||
525 | |||
526 | lvx $rndkey0,0,$key | ||
527 | vperm $inout,$inout,$inptail,$inpperm | ||
528 | lvx $rndkey1,$idx,$key | ||
529 | addi $idx,$idx,16 | ||
530 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
531 | vxor $inout,$inout,$rndkey0 | ||
532 | lvx $rndkey0,$idx,$key | ||
533 | addi $idx,$idx,16 | ||
534 | vxor $inout,$inout,$ivec | ||
535 | |||
536 | Loop_cbc_enc: | ||
537 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
538 | vcipher $inout,$inout,$rndkey1 | ||
539 | lvx $rndkey1,$idx,$key | ||
540 | addi $idx,$idx,16 | ||
541 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
542 | vcipher $inout,$inout,$rndkey0 | ||
543 | lvx $rndkey0,$idx,$key | ||
544 | addi $idx,$idx,16 | ||
545 | bdnz Loop_cbc_enc | ||
546 | |||
547 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
548 | vcipher $inout,$inout,$rndkey1 | ||
549 | lvx $rndkey1,$idx,$key | ||
550 | li $idx,16 | ||
551 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
552 | vcipherlast $ivec,$inout,$rndkey0 | ||
553 | ${UCMP}i $len,16 | ||
554 | |||
555 | vperm $tmp,$ivec,$ivec,$outperm | ||
556 | vsel $inout,$outhead,$tmp,$outmask | ||
557 | vmr $outhead,$tmp | ||
558 | stvx $inout,0,$out | ||
559 | addi $out,$out,16 | ||
560 | bge Lcbc_enc | ||
561 | |||
562 | b Lcbc_done | ||
563 | |||
564 | .align 4 | ||
565 | Lcbc_dec: | ||
566 | ${UCMP}i $len,128 | ||
567 | bge _aesp8_cbc_decrypt8x | ||
568 | vmr $tmp,$inptail | ||
569 | lvx $inptail,0,$inp | ||
570 | addi $inp,$inp,16 | ||
571 | mtctr $rounds | ||
572 | subi $len,$len,16 # len-=16 | ||
573 | |||
574 | lvx $rndkey0,0,$key | ||
575 | vperm $tmp,$tmp,$inptail,$inpperm | ||
576 | lvx $rndkey1,$idx,$key | ||
577 | addi $idx,$idx,16 | ||
578 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
579 | vxor $inout,$tmp,$rndkey0 | ||
580 | lvx $rndkey0,$idx,$key | ||
581 | addi $idx,$idx,16 | ||
582 | |||
583 | Loop_cbc_dec: | ||
584 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
585 | vncipher $inout,$inout,$rndkey1 | ||
586 | lvx $rndkey1,$idx,$key | ||
587 | addi $idx,$idx,16 | ||
588 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
589 | vncipher $inout,$inout,$rndkey0 | ||
590 | lvx $rndkey0,$idx,$key | ||
591 | addi $idx,$idx,16 | ||
592 | bdnz Loop_cbc_dec | ||
593 | |||
594 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
595 | vncipher $inout,$inout,$rndkey1 | ||
596 | lvx $rndkey1,$idx,$key | ||
597 | li $idx,16 | ||
598 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
599 | vncipherlast $inout,$inout,$rndkey0 | ||
600 | ${UCMP}i $len,16 | ||
601 | |||
602 | vxor $inout,$inout,$ivec | ||
603 | vmr $ivec,$tmp | ||
604 | vperm $tmp,$inout,$inout,$outperm | ||
605 | vsel $inout,$outhead,$tmp,$outmask | ||
606 | vmr $outhead,$tmp | ||
607 | stvx $inout,0,$out | ||
608 | addi $out,$out,16 | ||
609 | bge Lcbc_dec | ||
610 | |||
611 | Lcbc_done: | ||
612 | addi $out,$out,-1 | ||
613 | lvx $inout,0,$out # redundant in aligned case | ||
614 | vsel $inout,$outhead,$inout,$outmask | ||
615 | stvx $inout,0,$out | ||
616 | |||
617 | neg $enc,$ivp # write [unaligned] iv | ||
618 | li $idx,15 # 15 is not typo | ||
619 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
620 | vspltisb $outmask,-1 | ||
621 | le?vspltisb $tmp,0x0f | ||
622 | ?lvsl $outperm,0,$enc | ||
623 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
624 | le?vxor $outperm,$outperm,$tmp | ||
625 | lvx $outhead,0,$ivp | ||
626 | vperm $ivec,$ivec,$ivec,$outperm | ||
627 | vsel $inout,$outhead,$ivec,$outmask | ||
628 | lvx $inptail,$idx,$ivp | ||
629 | stvx $inout,0,$ivp | ||
630 | vsel $inout,$ivec,$inptail,$outmask | ||
631 | stvx $inout,$idx,$ivp | ||
632 | |||
633 | mtspr 256,$vrsave | ||
634 | blr | ||
635 | .long 0 | ||
636 | .byte 0,12,0x14,0,0,0,6,0 | ||
637 | .long 0 | ||
638 | ___ | ||
639 | ######################################################################### | ||
640 | {{ # Optimized CBC decrypt procedure # | ||
641 | my $key_="r11"; | ||
642 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | ||
643 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); | ||
644 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); | ||
645 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | ||
646 | # v26-v31 last 6 round keys | ||
647 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | ||
648 | |||
649 | $code.=<<___; | ||
650 | .align 5 | ||
651 | _aesp8_cbc_decrypt8x: | ||
652 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
653 | li r10,`$FRAME+8*16+15` | ||
654 | li r11,`$FRAME+8*16+31` | ||
655 | stvx v20,r10,$sp # ABI says so | ||
656 | addi r10,r10,32 | ||
657 | stvx v21,r11,$sp | ||
658 | addi r11,r11,32 | ||
659 | stvx v22,r10,$sp | ||
660 | addi r10,r10,32 | ||
661 | stvx v23,r11,$sp | ||
662 | addi r11,r11,32 | ||
663 | stvx v24,r10,$sp | ||
664 | addi r10,r10,32 | ||
665 | stvx v25,r11,$sp | ||
666 | addi r11,r11,32 | ||
667 | stvx v26,r10,$sp | ||
668 | addi r10,r10,32 | ||
669 | stvx v27,r11,$sp | ||
670 | addi r11,r11,32 | ||
671 | stvx v28,r10,$sp | ||
672 | addi r10,r10,32 | ||
673 | stvx v29,r11,$sp | ||
674 | addi r11,r11,32 | ||
675 | stvx v30,r10,$sp | ||
676 | stvx v31,r11,$sp | ||
677 | li r0,-1 | ||
678 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
679 | li $x10,0x10 | ||
680 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
681 | li $x20,0x20 | ||
682 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
683 | li $x30,0x30 | ||
684 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
685 | li $x40,0x40 | ||
686 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
687 | li $x50,0x50 | ||
688 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
689 | li $x60,0x60 | ||
690 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
691 | li $x70,0x70 | ||
692 | mtspr 256,r0 | ||
693 | |||
694 | subi $rounds,$rounds,3 # -4 in total | ||
695 | subi $len,$len,128 # bias | ||
696 | |||
697 | lvx $rndkey0,$x00,$key # load key schedule | ||
698 | lvx v30,$x10,$key | ||
699 | addi $key,$key,0x20 | ||
700 | lvx v31,$x00,$key | ||
701 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
702 | addi $key_,$sp,$FRAME+15 | ||
703 | mtctr $rounds | ||
704 | |||
705 | Load_cbc_dec_key: | ||
706 | ?vperm v24,v30,v31,$keyperm | ||
707 | lvx v30,$x10,$key | ||
708 | addi $key,$key,0x20 | ||
709 | stvx v24,$x00,$key_ # off-load round[1] | ||
710 | ?vperm v25,v31,v30,$keyperm | ||
711 | lvx v31,$x00,$key | ||
712 | stvx v25,$x10,$key_ # off-load round[2] | ||
713 | addi $key_,$key_,0x20 | ||
714 | bdnz Load_cbc_dec_key | ||
715 | |||
716 | lvx v26,$x10,$key | ||
717 | ?vperm v24,v30,v31,$keyperm | ||
718 | lvx v27,$x20,$key | ||
719 | stvx v24,$x00,$key_ # off-load round[3] | ||
720 | ?vperm v25,v31,v26,$keyperm | ||
721 | lvx v28,$x30,$key | ||
722 | stvx v25,$x10,$key_ # off-load round[4] | ||
723 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
724 | ?vperm v26,v26,v27,$keyperm | ||
725 | lvx v29,$x40,$key | ||
726 | ?vperm v27,v27,v28,$keyperm | ||
727 | lvx v30,$x50,$key | ||
728 | ?vperm v28,v28,v29,$keyperm | ||
729 | lvx v31,$x60,$key | ||
730 | ?vperm v29,v29,v30,$keyperm | ||
731 | lvx $out0,$x70,$key # borrow $out0 | ||
732 | ?vperm v30,v30,v31,$keyperm | ||
733 | lvx v24,$x00,$key_ # pre-load round[1] | ||
734 | ?vperm v31,v31,$out0,$keyperm | ||
735 | lvx v25,$x10,$key_ # pre-load round[2] | ||
736 | |||
737 | #lvx $inptail,0,$inp # "caller" already did this | ||
738 | #addi $inp,$inp,15 # 15 is not typo | ||
739 | subi $inp,$inp,15 # undo "caller" | ||
740 | |||
741 | le?li $idx,8 | ||
742 | lvx_u $in0,$x00,$inp # load first 8 "words" | ||
743 | le?lvsl $inpperm,0,$idx | ||
744 | le?vspltisb $tmp,0x0f | ||
745 | lvx_u $in1,$x10,$inp | ||
746 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | ||
747 | lvx_u $in2,$x20,$inp | ||
748 | le?vperm $in0,$in0,$in0,$inpperm | ||
749 | lvx_u $in3,$x30,$inp | ||
750 | le?vperm $in1,$in1,$in1,$inpperm | ||
751 | lvx_u $in4,$x40,$inp | ||
752 | le?vperm $in2,$in2,$in2,$inpperm | ||
753 | vxor $out0,$in0,$rndkey0 | ||
754 | lvx_u $in5,$x50,$inp | ||
755 | le?vperm $in3,$in3,$in3,$inpperm | ||
756 | vxor $out1,$in1,$rndkey0 | ||
757 | lvx_u $in6,$x60,$inp | ||
758 | le?vperm $in4,$in4,$in4,$inpperm | ||
759 | vxor $out2,$in2,$rndkey0 | ||
760 | lvx_u $in7,$x70,$inp | ||
761 | addi $inp,$inp,0x80 | ||
762 | le?vperm $in5,$in5,$in5,$inpperm | ||
763 | vxor $out3,$in3,$rndkey0 | ||
764 | le?vperm $in6,$in6,$in6,$inpperm | ||
765 | vxor $out4,$in4,$rndkey0 | ||
766 | le?vperm $in7,$in7,$in7,$inpperm | ||
767 | vxor $out5,$in5,$rndkey0 | ||
768 | vxor $out6,$in6,$rndkey0 | ||
769 | vxor $out7,$in7,$rndkey0 | ||
770 | |||
771 | mtctr $rounds | ||
772 | b Loop_cbc_dec8x | ||
773 | .align 5 | ||
774 | Loop_cbc_dec8x: | ||
775 | vncipher $out0,$out0,v24 | ||
776 | vncipher $out1,$out1,v24 | ||
777 | vncipher $out2,$out2,v24 | ||
778 | vncipher $out3,$out3,v24 | ||
779 | vncipher $out4,$out4,v24 | ||
780 | vncipher $out5,$out5,v24 | ||
781 | vncipher $out6,$out6,v24 | ||
782 | vncipher $out7,$out7,v24 | ||
783 | lvx v24,$x20,$key_ # round[3] | ||
784 | addi $key_,$key_,0x20 | ||
785 | |||
786 | vncipher $out0,$out0,v25 | ||
787 | vncipher $out1,$out1,v25 | ||
788 | vncipher $out2,$out2,v25 | ||
789 | vncipher $out3,$out3,v25 | ||
790 | vncipher $out4,$out4,v25 | ||
791 | vncipher $out5,$out5,v25 | ||
792 | vncipher $out6,$out6,v25 | ||
793 | vncipher $out7,$out7,v25 | ||
794 | lvx v25,$x10,$key_ # round[4] | ||
795 | bdnz Loop_cbc_dec8x | ||
796 | |||
797 | subic $len,$len,128 # $len-=128 | ||
798 | vncipher $out0,$out0,v24 | ||
799 | vncipher $out1,$out1,v24 | ||
800 | vncipher $out2,$out2,v24 | ||
801 | vncipher $out3,$out3,v24 | ||
802 | vncipher $out4,$out4,v24 | ||
803 | vncipher $out5,$out5,v24 | ||
804 | vncipher $out6,$out6,v24 | ||
805 | vncipher $out7,$out7,v24 | ||
806 | |||
807 | subfe. r0,r0,r0 # borrow?-1:0 | ||
808 | vncipher $out0,$out0,v25 | ||
809 | vncipher $out1,$out1,v25 | ||
810 | vncipher $out2,$out2,v25 | ||
811 | vncipher $out3,$out3,v25 | ||
812 | vncipher $out4,$out4,v25 | ||
813 | vncipher $out5,$out5,v25 | ||
814 | vncipher $out6,$out6,v25 | ||
815 | vncipher $out7,$out7,v25 | ||
816 | |||
817 | and r0,r0,$len | ||
818 | vncipher $out0,$out0,v26 | ||
819 | vncipher $out1,$out1,v26 | ||
820 | vncipher $out2,$out2,v26 | ||
821 | vncipher $out3,$out3,v26 | ||
822 | vncipher $out4,$out4,v26 | ||
823 | vncipher $out5,$out5,v26 | ||
824 | vncipher $out6,$out6,v26 | ||
825 | vncipher $out7,$out7,v26 | ||
826 | |||
827 | add $inp,$inp,r0 # $inp is adjusted in such | ||
828 | # way that at exit from the | ||
829 | # loop inX-in7 are loaded | ||
830 | # with last "words" | ||
831 | vncipher $out0,$out0,v27 | ||
832 | vncipher $out1,$out1,v27 | ||
833 | vncipher $out2,$out2,v27 | ||
834 | vncipher $out3,$out3,v27 | ||
835 | vncipher $out4,$out4,v27 | ||
836 | vncipher $out5,$out5,v27 | ||
837 | vncipher $out6,$out6,v27 | ||
838 | vncipher $out7,$out7,v27 | ||
839 | |||
840 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
841 | vncipher $out0,$out0,v28 | ||
842 | vncipher $out1,$out1,v28 | ||
843 | vncipher $out2,$out2,v28 | ||
844 | vncipher $out3,$out3,v28 | ||
845 | vncipher $out4,$out4,v28 | ||
846 | vncipher $out5,$out5,v28 | ||
847 | vncipher $out6,$out6,v28 | ||
848 | vncipher $out7,$out7,v28 | ||
849 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
850 | |||
851 | vncipher $out0,$out0,v29 | ||
852 | vncipher $out1,$out1,v29 | ||
853 | vncipher $out2,$out2,v29 | ||
854 | vncipher $out3,$out3,v29 | ||
855 | vncipher $out4,$out4,v29 | ||
856 | vncipher $out5,$out5,v29 | ||
857 | vncipher $out6,$out6,v29 | ||
858 | vncipher $out7,$out7,v29 | ||
859 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
860 | |||
861 | vncipher $out0,$out0,v30 | ||
862 | vxor $ivec,$ivec,v31 # xor with last round key | ||
863 | vncipher $out1,$out1,v30 | ||
864 | vxor $in0,$in0,v31 | ||
865 | vncipher $out2,$out2,v30 | ||
866 | vxor $in1,$in1,v31 | ||
867 | vncipher $out3,$out3,v30 | ||
868 | vxor $in2,$in2,v31 | ||
869 | vncipher $out4,$out4,v30 | ||
870 | vxor $in3,$in3,v31 | ||
871 | vncipher $out5,$out5,v30 | ||
872 | vxor $in4,$in4,v31 | ||
873 | vncipher $out6,$out6,v30 | ||
874 | vxor $in5,$in5,v31 | ||
875 | vncipher $out7,$out7,v30 | ||
876 | vxor $in6,$in6,v31 | ||
877 | |||
878 | vncipherlast $out0,$out0,$ivec | ||
879 | vncipherlast $out1,$out1,$in0 | ||
880 | lvx_u $in0,$x00,$inp # load next input block | ||
881 | vncipherlast $out2,$out2,$in1 | ||
882 | lvx_u $in1,$x10,$inp | ||
883 | vncipherlast $out3,$out3,$in2 | ||
884 | le?vperm $in0,$in0,$in0,$inpperm | ||
885 | lvx_u $in2,$x20,$inp | ||
886 | vncipherlast $out4,$out4,$in3 | ||
887 | le?vperm $in1,$in1,$in1,$inpperm | ||
888 | lvx_u $in3,$x30,$inp | ||
889 | vncipherlast $out5,$out5,$in4 | ||
890 | le?vperm $in2,$in2,$in2,$inpperm | ||
891 | lvx_u $in4,$x40,$inp | ||
892 | vncipherlast $out6,$out6,$in5 | ||
893 | le?vperm $in3,$in3,$in3,$inpperm | ||
894 | lvx_u $in5,$x50,$inp | ||
895 | vncipherlast $out7,$out7,$in6 | ||
896 | le?vperm $in4,$in4,$in4,$inpperm | ||
897 | lvx_u $in6,$x60,$inp | ||
898 | vmr $ivec,$in7 | ||
899 | le?vperm $in5,$in5,$in5,$inpperm | ||
900 | lvx_u $in7,$x70,$inp | ||
901 | addi $inp,$inp,0x80 | ||
902 | |||
903 | le?vperm $out0,$out0,$out0,$inpperm | ||
904 | le?vperm $out1,$out1,$out1,$inpperm | ||
905 | stvx_u $out0,$x00,$out | ||
906 | le?vperm $in6,$in6,$in6,$inpperm | ||
907 | vxor $out0,$in0,$rndkey0 | ||
908 | le?vperm $out2,$out2,$out2,$inpperm | ||
909 | stvx_u $out1,$x10,$out | ||
910 | le?vperm $in7,$in7,$in7,$inpperm | ||
911 | vxor $out1,$in1,$rndkey0 | ||
912 | le?vperm $out3,$out3,$out3,$inpperm | ||
913 | stvx_u $out2,$x20,$out | ||
914 | vxor $out2,$in2,$rndkey0 | ||
915 | le?vperm $out4,$out4,$out4,$inpperm | ||
916 | stvx_u $out3,$x30,$out | ||
917 | vxor $out3,$in3,$rndkey0 | ||
918 | le?vperm $out5,$out5,$out5,$inpperm | ||
919 | stvx_u $out4,$x40,$out | ||
920 | vxor $out4,$in4,$rndkey0 | ||
921 | le?vperm $out6,$out6,$out6,$inpperm | ||
922 | stvx_u $out5,$x50,$out | ||
923 | vxor $out5,$in5,$rndkey0 | ||
924 | le?vperm $out7,$out7,$out7,$inpperm | ||
925 | stvx_u $out6,$x60,$out | ||
926 | vxor $out6,$in6,$rndkey0 | ||
927 | stvx_u $out7,$x70,$out | ||
928 | addi $out,$out,0x80 | ||
929 | vxor $out7,$in7,$rndkey0 | ||
930 | |||
931 | mtctr $rounds | ||
932 | beq Loop_cbc_dec8x # did $len-=128 borrow? | ||
933 | |||
934 | addic. $len,$len,128 | ||
935 | beq Lcbc_dec8x_done | ||
936 | nop | ||
937 | nop | ||
938 | |||
939 | Loop_cbc_dec8x_tail: # up to 7 "words" tail... | ||
940 | vncipher $out1,$out1,v24 | ||
941 | vncipher $out2,$out2,v24 | ||
942 | vncipher $out3,$out3,v24 | ||
943 | vncipher $out4,$out4,v24 | ||
944 | vncipher $out5,$out5,v24 | ||
945 | vncipher $out6,$out6,v24 | ||
946 | vncipher $out7,$out7,v24 | ||
947 | lvx v24,$x20,$key_ # round[3] | ||
948 | addi $key_,$key_,0x20 | ||
949 | |||
950 | vncipher $out1,$out1,v25 | ||
951 | vncipher $out2,$out2,v25 | ||
952 | vncipher $out3,$out3,v25 | ||
953 | vncipher $out4,$out4,v25 | ||
954 | vncipher $out5,$out5,v25 | ||
955 | vncipher $out6,$out6,v25 | ||
956 | vncipher $out7,$out7,v25 | ||
957 | lvx v25,$x10,$key_ # round[4] | ||
958 | bdnz Loop_cbc_dec8x_tail | ||
959 | |||
960 | vncipher $out1,$out1,v24 | ||
961 | vncipher $out2,$out2,v24 | ||
962 | vncipher $out3,$out3,v24 | ||
963 | vncipher $out4,$out4,v24 | ||
964 | vncipher $out5,$out5,v24 | ||
965 | vncipher $out6,$out6,v24 | ||
966 | vncipher $out7,$out7,v24 | ||
967 | |||
968 | vncipher $out1,$out1,v25 | ||
969 | vncipher $out2,$out2,v25 | ||
970 | vncipher $out3,$out3,v25 | ||
971 | vncipher $out4,$out4,v25 | ||
972 | vncipher $out5,$out5,v25 | ||
973 | vncipher $out6,$out6,v25 | ||
974 | vncipher $out7,$out7,v25 | ||
975 | |||
976 | vncipher $out1,$out1,v26 | ||
977 | vncipher $out2,$out2,v26 | ||
978 | vncipher $out3,$out3,v26 | ||
979 | vncipher $out4,$out4,v26 | ||
980 | vncipher $out5,$out5,v26 | ||
981 | vncipher $out6,$out6,v26 | ||
982 | vncipher $out7,$out7,v26 | ||
983 | |||
984 | vncipher $out1,$out1,v27 | ||
985 | vncipher $out2,$out2,v27 | ||
986 | vncipher $out3,$out3,v27 | ||
987 | vncipher $out4,$out4,v27 | ||
988 | vncipher $out5,$out5,v27 | ||
989 | vncipher $out6,$out6,v27 | ||
990 | vncipher $out7,$out7,v27 | ||
991 | |||
992 | vncipher $out1,$out1,v28 | ||
993 | vncipher $out2,$out2,v28 | ||
994 | vncipher $out3,$out3,v28 | ||
995 | vncipher $out4,$out4,v28 | ||
996 | vncipher $out5,$out5,v28 | ||
997 | vncipher $out6,$out6,v28 | ||
998 | vncipher $out7,$out7,v28 | ||
999 | |||
1000 | vncipher $out1,$out1,v29 | ||
1001 | vncipher $out2,$out2,v29 | ||
1002 | vncipher $out3,$out3,v29 | ||
1003 | vncipher $out4,$out4,v29 | ||
1004 | vncipher $out5,$out5,v29 | ||
1005 | vncipher $out6,$out6,v29 | ||
1006 | vncipher $out7,$out7,v29 | ||
1007 | |||
1008 | vncipher $out1,$out1,v30 | ||
1009 | vxor $ivec,$ivec,v31 # last round key | ||
1010 | vncipher $out2,$out2,v30 | ||
1011 | vxor $in1,$in1,v31 | ||
1012 | vncipher $out3,$out3,v30 | ||
1013 | vxor $in2,$in2,v31 | ||
1014 | vncipher $out4,$out4,v30 | ||
1015 | vxor $in3,$in3,v31 | ||
1016 | vncipher $out5,$out5,v30 | ||
1017 | vxor $in4,$in4,v31 | ||
1018 | vncipher $out6,$out6,v30 | ||
1019 | vxor $in5,$in5,v31 | ||
1020 | vncipher $out7,$out7,v30 | ||
1021 | vxor $in6,$in6,v31 | ||
1022 | |||
1023 | cmplwi $len,32 # switch($len) | ||
1024 | blt Lcbc_dec8x_one | ||
1025 | nop | ||
1026 | beq Lcbc_dec8x_two | ||
1027 | cmplwi $len,64 | ||
1028 | blt Lcbc_dec8x_three | ||
1029 | nop | ||
1030 | beq Lcbc_dec8x_four | ||
1031 | cmplwi $len,96 | ||
1032 | blt Lcbc_dec8x_five | ||
1033 | nop | ||
1034 | beq Lcbc_dec8x_six | ||
1035 | |||
1036 | Lcbc_dec8x_seven: | ||
1037 | vncipherlast $out1,$out1,$ivec | ||
1038 | vncipherlast $out2,$out2,$in1 | ||
1039 | vncipherlast $out3,$out3,$in2 | ||
1040 | vncipherlast $out4,$out4,$in3 | ||
1041 | vncipherlast $out5,$out5,$in4 | ||
1042 | vncipherlast $out6,$out6,$in5 | ||
1043 | vncipherlast $out7,$out7,$in6 | ||
1044 | vmr $ivec,$in7 | ||
1045 | |||
1046 | le?vperm $out1,$out1,$out1,$inpperm | ||
1047 | le?vperm $out2,$out2,$out2,$inpperm | ||
1048 | stvx_u $out1,$x00,$out | ||
1049 | le?vperm $out3,$out3,$out3,$inpperm | ||
1050 | stvx_u $out2,$x10,$out | ||
1051 | le?vperm $out4,$out4,$out4,$inpperm | ||
1052 | stvx_u $out3,$x20,$out | ||
1053 | le?vperm $out5,$out5,$out5,$inpperm | ||
1054 | stvx_u $out4,$x30,$out | ||
1055 | le?vperm $out6,$out6,$out6,$inpperm | ||
1056 | stvx_u $out5,$x40,$out | ||
1057 | le?vperm $out7,$out7,$out7,$inpperm | ||
1058 | stvx_u $out6,$x50,$out | ||
1059 | stvx_u $out7,$x60,$out | ||
1060 | addi $out,$out,0x70 | ||
1061 | b Lcbc_dec8x_done | ||
1062 | |||
1063 | .align 5 | ||
1064 | Lcbc_dec8x_six: | ||
1065 | vncipherlast $out2,$out2,$ivec | ||
1066 | vncipherlast $out3,$out3,$in2 | ||
1067 | vncipherlast $out4,$out4,$in3 | ||
1068 | vncipherlast $out5,$out5,$in4 | ||
1069 | vncipherlast $out6,$out6,$in5 | ||
1070 | vncipherlast $out7,$out7,$in6 | ||
1071 | vmr $ivec,$in7 | ||
1072 | |||
1073 | le?vperm $out2,$out2,$out2,$inpperm | ||
1074 | le?vperm $out3,$out3,$out3,$inpperm | ||
1075 | stvx_u $out2,$x00,$out | ||
1076 | le?vperm $out4,$out4,$out4,$inpperm | ||
1077 | stvx_u $out3,$x10,$out | ||
1078 | le?vperm $out5,$out5,$out5,$inpperm | ||
1079 | stvx_u $out4,$x20,$out | ||
1080 | le?vperm $out6,$out6,$out6,$inpperm | ||
1081 | stvx_u $out5,$x30,$out | ||
1082 | le?vperm $out7,$out7,$out7,$inpperm | ||
1083 | stvx_u $out6,$x40,$out | ||
1084 | stvx_u $out7,$x50,$out | ||
1085 | addi $out,$out,0x60 | ||
1086 | b Lcbc_dec8x_done | ||
1087 | |||
1088 | .align 5 | ||
1089 | Lcbc_dec8x_five: | ||
1090 | vncipherlast $out3,$out3,$ivec | ||
1091 | vncipherlast $out4,$out4,$in3 | ||
1092 | vncipherlast $out5,$out5,$in4 | ||
1093 | vncipherlast $out6,$out6,$in5 | ||
1094 | vncipherlast $out7,$out7,$in6 | ||
1095 | vmr $ivec,$in7 | ||
1096 | |||
1097 | le?vperm $out3,$out3,$out3,$inpperm | ||
1098 | le?vperm $out4,$out4,$out4,$inpperm | ||
1099 | stvx_u $out3,$x00,$out | ||
1100 | le?vperm $out5,$out5,$out5,$inpperm | ||
1101 | stvx_u $out4,$x10,$out | ||
1102 | le?vperm $out6,$out6,$out6,$inpperm | ||
1103 | stvx_u $out5,$x20,$out | ||
1104 | le?vperm $out7,$out7,$out7,$inpperm | ||
1105 | stvx_u $out6,$x30,$out | ||
1106 | stvx_u $out7,$x40,$out | ||
1107 | addi $out,$out,0x50 | ||
1108 | b Lcbc_dec8x_done | ||
1109 | |||
1110 | .align 5 | ||
1111 | Lcbc_dec8x_four: | ||
1112 | vncipherlast $out4,$out4,$ivec | ||
1113 | vncipherlast $out5,$out5,$in4 | ||
1114 | vncipherlast $out6,$out6,$in5 | ||
1115 | vncipherlast $out7,$out7,$in6 | ||
1116 | vmr $ivec,$in7 | ||
1117 | |||
1118 | le?vperm $out4,$out4,$out4,$inpperm | ||
1119 | le?vperm $out5,$out5,$out5,$inpperm | ||
1120 | stvx_u $out4,$x00,$out | ||
1121 | le?vperm $out6,$out6,$out6,$inpperm | ||
1122 | stvx_u $out5,$x10,$out | ||
1123 | le?vperm $out7,$out7,$out7,$inpperm | ||
1124 | stvx_u $out6,$x20,$out | ||
1125 | stvx_u $out7,$x30,$out | ||
1126 | addi $out,$out,0x40 | ||
1127 | b Lcbc_dec8x_done | ||
1128 | |||
1129 | .align 5 | ||
1130 | Lcbc_dec8x_three: | ||
1131 | vncipherlast $out5,$out5,$ivec | ||
1132 | vncipherlast $out6,$out6,$in5 | ||
1133 | vncipherlast $out7,$out7,$in6 | ||
1134 | vmr $ivec,$in7 | ||
1135 | |||
1136 | le?vperm $out5,$out5,$out5,$inpperm | ||
1137 | le?vperm $out6,$out6,$out6,$inpperm | ||
1138 | stvx_u $out5,$x00,$out | ||
1139 | le?vperm $out7,$out7,$out7,$inpperm | ||
1140 | stvx_u $out6,$x10,$out | ||
1141 | stvx_u $out7,$x20,$out | ||
1142 | addi $out,$out,0x30 | ||
1143 | b Lcbc_dec8x_done | ||
1144 | |||
1145 | .align 5 | ||
1146 | Lcbc_dec8x_two: | ||
1147 | vncipherlast $out6,$out6,$ivec | ||
1148 | vncipherlast $out7,$out7,$in6 | ||
1149 | vmr $ivec,$in7 | ||
1150 | |||
1151 | le?vperm $out6,$out6,$out6,$inpperm | ||
1152 | le?vperm $out7,$out7,$out7,$inpperm | ||
1153 | stvx_u $out6,$x00,$out | ||
1154 | stvx_u $out7,$x10,$out | ||
1155 | addi $out,$out,0x20 | ||
1156 | b Lcbc_dec8x_done | ||
1157 | |||
1158 | .align 5 | ||
1159 | Lcbc_dec8x_one: | ||
1160 | vncipherlast $out7,$out7,$ivec | ||
1161 | vmr $ivec,$in7 | ||
1162 | |||
1163 | le?vperm $out7,$out7,$out7,$inpperm | ||
1164 | stvx_u $out7,0,$out | ||
1165 | addi $out,$out,0x10 | ||
1166 | |||
1167 | Lcbc_dec8x_done: | ||
1168 | le?vperm $ivec,$ivec,$ivec,$inpperm | ||
1169 | stvx_u $ivec,0,$ivp # write [unaligned] iv | ||
1170 | |||
1171 | li r10,`$FRAME+15` | ||
1172 | li r11,`$FRAME+31` | ||
1173 | stvx $inpperm,r10,$sp # wipe copies of round keys | ||
1174 | addi r10,r10,32 | ||
1175 | stvx $inpperm,r11,$sp | ||
1176 | addi r11,r11,32 | ||
1177 | stvx $inpperm,r10,$sp | ||
1178 | addi r10,r10,32 | ||
1179 | stvx $inpperm,r11,$sp | ||
1180 | addi r11,r11,32 | ||
1181 | stvx $inpperm,r10,$sp | ||
1182 | addi r10,r10,32 | ||
1183 | stvx $inpperm,r11,$sp | ||
1184 | addi r11,r11,32 | ||
1185 | stvx $inpperm,r10,$sp | ||
1186 | addi r10,r10,32 | ||
1187 | stvx $inpperm,r11,$sp | ||
1188 | addi r11,r11,32 | ||
1189 | |||
1190 | mtspr 256,$vrsave | ||
1191 | lvx v20,r10,$sp # ABI says so | ||
1192 | addi r10,r10,32 | ||
1193 | lvx v21,r11,$sp | ||
1194 | addi r11,r11,32 | ||
1195 | lvx v22,r10,$sp | ||
1196 | addi r10,r10,32 | ||
1197 | lvx v23,r11,$sp | ||
1198 | addi r11,r11,32 | ||
1199 | lvx v24,r10,$sp | ||
1200 | addi r10,r10,32 | ||
1201 | lvx v25,r11,$sp | ||
1202 | addi r11,r11,32 | ||
1203 | lvx v26,r10,$sp | ||
1204 | addi r10,r10,32 | ||
1205 | lvx v27,r11,$sp | ||
1206 | addi r11,r11,32 | ||
1207 | lvx v28,r10,$sp | ||
1208 | addi r10,r10,32 | ||
1209 | lvx v29,r11,$sp | ||
1210 | addi r11,r11,32 | ||
1211 | lvx v30,r10,$sp | ||
1212 | lvx v31,r11,$sp | ||
1213 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1214 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1215 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1216 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1217 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1218 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1219 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
1220 | blr | ||
1221 | .long 0 | ||
1222 | .byte 0,12,0x14,0,0x80,6,6,0 | ||
1223 | .long 0 | ||
1224 | .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt | ||
1225 | ___ | ||
1226 | }} }}} | ||
1227 | |||
1228 | ######################################################################### | ||
1229 | {{{ # CTR procedure[s] # | ||
1230 | my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); | ||
1231 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | ||
1232 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= | ||
1233 | map("v$_",(4..11)); | ||
1234 | my $dat=$tmp; | ||
1235 | |||
1236 | $code.=<<___; | ||
1237 | .globl .${prefix}_ctr32_encrypt_blocks | ||
1238 | ${UCMP}i $len,1 | ||
1239 | bltlr- | ||
1240 | |||
1241 | lis r0,0xfff0 | ||
1242 | mfspr $vrsave,256 | ||
1243 | mtspr 256,r0 | ||
1244 | |||
1245 | li $idx,15 | ||
1246 | vxor $rndkey0,$rndkey0,$rndkey0 | ||
1247 | le?vspltisb $tmp,0x0f | ||
1248 | |||
1249 | lvx $ivec,0,$ivp # load [unaligned] iv | ||
1250 | lvsl $inpperm,0,$ivp | ||
1251 | lvx $inptail,$idx,$ivp | ||
1252 | vspltisb $one,1 | ||
1253 | le?vxor $inpperm,$inpperm,$tmp | ||
1254 | vperm $ivec,$ivec,$inptail,$inpperm | ||
1255 | vsldoi $one,$rndkey0,$one,1 | ||
1256 | |||
1257 | neg r11,$inp | ||
1258 | ?lvsl $keyperm,0,$key # prepare for unaligned key | ||
1259 | lwz $rounds,240($key) | ||
1260 | |||
1261 | lvsr $inpperm,0,r11 # prepare for unaligned load | ||
1262 | lvx $inptail,0,$inp | ||
1263 | addi $inp,$inp,15 # 15 is not typo | ||
1264 | le?vxor $inpperm,$inpperm,$tmp | ||
1265 | |||
1266 | srwi $rounds,$rounds,1 | ||
1267 | li $idx,16 | ||
1268 | subi $rounds,$rounds,1 | ||
1269 | |||
1270 | ${UCMP}i $len,8 | ||
1271 | bge _aesp8_ctr32_encrypt8x | ||
1272 | |||
1273 | ?lvsr $outperm,0,$out # prepare for unaligned store | ||
1274 | vspltisb $outmask,-1 | ||
1275 | lvx $outhead,0,$out | ||
1276 | ?vperm $outmask,$rndkey0,$outmask,$outperm | ||
1277 | le?vxor $outperm,$outperm,$tmp | ||
1278 | |||
1279 | lvx $rndkey0,0,$key | ||
1280 | mtctr $rounds | ||
1281 | lvx $rndkey1,$idx,$key | ||
1282 | addi $idx,$idx,16 | ||
1283 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1284 | vxor $inout,$ivec,$rndkey0 | ||
1285 | lvx $rndkey0,$idx,$key | ||
1286 | addi $idx,$idx,16 | ||
1287 | b Loop_ctr32_enc | ||
1288 | |||
1289 | .align 5 | ||
1290 | Loop_ctr32_enc: | ||
1291 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1292 | vcipher $inout,$inout,$rndkey1 | ||
1293 | lvx $rndkey1,$idx,$key | ||
1294 | addi $idx,$idx,16 | ||
1295 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1296 | vcipher $inout,$inout,$rndkey0 | ||
1297 | lvx $rndkey0,$idx,$key | ||
1298 | addi $idx,$idx,16 | ||
1299 | bdnz Loop_ctr32_enc | ||
1300 | |||
1301 | vadduwm $ivec,$ivec,$one | ||
1302 | vmr $dat,$inptail | ||
1303 | lvx $inptail,0,$inp | ||
1304 | addi $inp,$inp,16 | ||
1305 | subic. $len,$len,1 # blocks-- | ||
1306 | |||
1307 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | ||
1308 | vcipher $inout,$inout,$rndkey1 | ||
1309 | lvx $rndkey1,$idx,$key | ||
1310 | vperm $dat,$dat,$inptail,$inpperm | ||
1311 | li $idx,16 | ||
1312 | ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm | ||
1313 | lvx $rndkey0,0,$key | ||
1314 | vxor $dat,$dat,$rndkey1 # last round key | ||
1315 | vcipherlast $inout,$inout,$dat | ||
1316 | |||
1317 | lvx $rndkey1,$idx,$key | ||
1318 | addi $idx,$idx,16 | ||
1319 | vperm $inout,$inout,$inout,$outperm | ||
1320 | vsel $dat,$outhead,$inout,$outmask | ||
1321 | mtctr $rounds | ||
1322 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | ||
1323 | vmr $outhead,$inout | ||
1324 | vxor $inout,$ivec,$rndkey0 | ||
1325 | lvx $rndkey0,$idx,$key | ||
1326 | addi $idx,$idx,16 | ||
1327 | stvx $dat,0,$out | ||
1328 | addi $out,$out,16 | ||
1329 | bne Loop_ctr32_enc | ||
1330 | |||
1331 | addi $out,$out,-1 | ||
1332 | lvx $inout,0,$out # redundant in aligned case | ||
1333 | vsel $inout,$outhead,$inout,$outmask | ||
1334 | stvx $inout,0,$out | ||
1335 | |||
1336 | mtspr 256,$vrsave | ||
1337 | blr | ||
1338 | .long 0 | ||
1339 | .byte 0,12,0x14,0,0,0,6,0 | ||
1340 | .long 0 | ||
1341 | ___ | ||
1342 | ######################################################################### | ||
1343 | {{ # Optimized CTR procedure # | ||
1344 | my $key_="r11"; | ||
1345 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | ||
1346 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); | ||
1347 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); | ||
1348 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | ||
1349 | # v26-v31 last 6 round keys | ||
1350 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | ||
1351 | my ($two,$three,$four)=($outhead,$outperm,$outmask); | ||
1352 | |||
1353 | $code.=<<___; | ||
1354 | .align 5 | ||
1355 | _aesp8_ctr32_encrypt8x: | ||
1356 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | ||
1357 | li r10,`$FRAME+8*16+15` | ||
1358 | li r11,`$FRAME+8*16+31` | ||
1359 | stvx v20,r10,$sp # ABI says so | ||
1360 | addi r10,r10,32 | ||
1361 | stvx v21,r11,$sp | ||
1362 | addi r11,r11,32 | ||
1363 | stvx v22,r10,$sp | ||
1364 | addi r10,r10,32 | ||
1365 | stvx v23,r11,$sp | ||
1366 | addi r11,r11,32 | ||
1367 | stvx v24,r10,$sp | ||
1368 | addi r10,r10,32 | ||
1369 | stvx v25,r11,$sp | ||
1370 | addi r11,r11,32 | ||
1371 | stvx v26,r10,$sp | ||
1372 | addi r10,r10,32 | ||
1373 | stvx v27,r11,$sp | ||
1374 | addi r11,r11,32 | ||
1375 | stvx v28,r10,$sp | ||
1376 | addi r10,r10,32 | ||
1377 | stvx v29,r11,$sp | ||
1378 | addi r11,r11,32 | ||
1379 | stvx v30,r10,$sp | ||
1380 | stvx v31,r11,$sp | ||
1381 | li r0,-1 | ||
1382 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | ||
1383 | li $x10,0x10 | ||
1384 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1385 | li $x20,0x20 | ||
1386 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1387 | li $x30,0x30 | ||
1388 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1389 | li $x40,0x40 | ||
1390 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1391 | li $x50,0x50 | ||
1392 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1393 | li $x60,0x60 | ||
1394 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1395 | li $x70,0x70 | ||
1396 | mtspr 256,r0 | ||
1397 | |||
1398 | subi $rounds,$rounds,3 # -4 in total | ||
1399 | |||
1400 | lvx $rndkey0,$x00,$key # load key schedule | ||
1401 | lvx v30,$x10,$key | ||
1402 | addi $key,$key,0x20 | ||
1403 | lvx v31,$x00,$key | ||
1404 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | ||
1405 | addi $key_,$sp,$FRAME+15 | ||
1406 | mtctr $rounds | ||
1407 | |||
1408 | Load_ctr32_enc_key: | ||
1409 | ?vperm v24,v30,v31,$keyperm | ||
1410 | lvx v30,$x10,$key | ||
1411 | addi $key,$key,0x20 | ||
1412 | stvx v24,$x00,$key_ # off-load round[1] | ||
1413 | ?vperm v25,v31,v30,$keyperm | ||
1414 | lvx v31,$x00,$key | ||
1415 | stvx v25,$x10,$key_ # off-load round[2] | ||
1416 | addi $key_,$key_,0x20 | ||
1417 | bdnz Load_ctr32_enc_key | ||
1418 | |||
1419 | lvx v26,$x10,$key | ||
1420 | ?vperm v24,v30,v31,$keyperm | ||
1421 | lvx v27,$x20,$key | ||
1422 | stvx v24,$x00,$key_ # off-load round[3] | ||
1423 | ?vperm v25,v31,v26,$keyperm | ||
1424 | lvx v28,$x30,$key | ||
1425 | stvx v25,$x10,$key_ # off-load round[4] | ||
1426 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
1427 | ?vperm v26,v26,v27,$keyperm | ||
1428 | lvx v29,$x40,$key | ||
1429 | ?vperm v27,v27,v28,$keyperm | ||
1430 | lvx v30,$x50,$key | ||
1431 | ?vperm v28,v28,v29,$keyperm | ||
1432 | lvx v31,$x60,$key | ||
1433 | ?vperm v29,v29,v30,$keyperm | ||
1434 | lvx $out0,$x70,$key # borrow $out0 | ||
1435 | ?vperm v30,v30,v31,$keyperm | ||
1436 | lvx v24,$x00,$key_ # pre-load round[1] | ||
1437 | ?vperm v31,v31,$out0,$keyperm | ||
1438 | lvx v25,$x10,$key_ # pre-load round[2] | ||
1439 | |||
1440 | vadduwm $two,$one,$one | ||
1441 | subi $inp,$inp,15 # undo "caller" | ||
1442 | $SHL $len,$len,4 | ||
1443 | |||
1444 | vadduwm $out1,$ivec,$one # counter values ... | ||
1445 | vadduwm $out2,$ivec,$two | ||
1446 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] | ||
1447 | le?li $idx,8 | ||
1448 | vadduwm $out3,$out1,$two | ||
1449 | vxor $out1,$out1,$rndkey0 | ||
1450 | le?lvsl $inpperm,0,$idx | ||
1451 | vadduwm $out4,$out2,$two | ||
1452 | vxor $out2,$out2,$rndkey0 | ||
1453 | le?vspltisb $tmp,0x0f | ||
1454 | vadduwm $out5,$out3,$two | ||
1455 | vxor $out3,$out3,$rndkey0 | ||
1456 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | ||
1457 | vadduwm $out6,$out4,$two | ||
1458 | vxor $out4,$out4,$rndkey0 | ||
1459 | vadduwm $out7,$out5,$two | ||
1460 | vxor $out5,$out5,$rndkey0 | ||
1461 | vadduwm $ivec,$out6,$two # next counter value | ||
1462 | vxor $out6,$out6,$rndkey0 | ||
1463 | vxor $out7,$out7,$rndkey0 | ||
1464 | |||
1465 | mtctr $rounds | ||
1466 | b Loop_ctr32_enc8x | ||
1467 | .align 5 | ||
1468 | Loop_ctr32_enc8x: | ||
1469 | vcipher $out0,$out0,v24 | ||
1470 | vcipher $out1,$out1,v24 | ||
1471 | vcipher $out2,$out2,v24 | ||
1472 | vcipher $out3,$out3,v24 | ||
1473 | vcipher $out4,$out4,v24 | ||
1474 | vcipher $out5,$out5,v24 | ||
1475 | vcipher $out6,$out6,v24 | ||
1476 | vcipher $out7,$out7,v24 | ||
1477 | Loop_ctr32_enc8x_middle: | ||
1478 | lvx v24,$x20,$key_ # round[3] | ||
1479 | addi $key_,$key_,0x20 | ||
1480 | |||
1481 | vcipher $out0,$out0,v25 | ||
1482 | vcipher $out1,$out1,v25 | ||
1483 | vcipher $out2,$out2,v25 | ||
1484 | vcipher $out3,$out3,v25 | ||
1485 | vcipher $out4,$out4,v25 | ||
1486 | vcipher $out5,$out5,v25 | ||
1487 | vcipher $out6,$out6,v25 | ||
1488 | vcipher $out7,$out7,v25 | ||
1489 | lvx v25,$x10,$key_ # round[4] | ||
1490 | bdnz Loop_ctr32_enc8x | ||
1491 | |||
1492 | subic r11,$len,256 # $len-256, borrow $key_ | ||
1493 | vcipher $out0,$out0,v24 | ||
1494 | vcipher $out1,$out1,v24 | ||
1495 | vcipher $out2,$out2,v24 | ||
1496 | vcipher $out3,$out3,v24 | ||
1497 | vcipher $out4,$out4,v24 | ||
1498 | vcipher $out5,$out5,v24 | ||
1499 | vcipher $out6,$out6,v24 | ||
1500 | vcipher $out7,$out7,v24 | ||
1501 | |||
1502 | subfe r0,r0,r0 # borrow?-1:0 | ||
1503 | vcipher $out0,$out0,v25 | ||
1504 | vcipher $out1,$out1,v25 | ||
1505 | vcipher $out2,$out2,v25 | ||
1506 | vcipher $out3,$out3,v25 | ||
1507 | vcipher $out4,$out4,v25 | ||
1508 | vcipher $out5,$out5,v25 | ||
1509 | vcipher $out6,$out6,v25 | ||
1510 | vcipher $out7,$out7,v25 | ||
1511 | |||
1512 | and r0,r0,r11 | ||
1513 | addi $key_,$sp,$FRAME+15 # rewind $key_ | ||
1514 | vcipher $out0,$out0,v26 | ||
1515 | vcipher $out1,$out1,v26 | ||
1516 | vcipher $out2,$out2,v26 | ||
1517 | vcipher $out3,$out3,v26 | ||
1518 | vcipher $out4,$out4,v26 | ||
1519 | vcipher $out5,$out5,v26 | ||
1520 | vcipher $out6,$out6,v26 | ||
1521 | vcipher $out7,$out7,v26 | ||
1522 | lvx v24,$x00,$key_ # re-pre-load round[1] | ||
1523 | |||
1524 | subic $len,$len,129 # $len-=129 | ||
1525 | vcipher $out0,$out0,v27 | ||
1526 | addi $len,$len,1 # $len-=128 really | ||
1527 | vcipher $out1,$out1,v27 | ||
1528 | vcipher $out2,$out2,v27 | ||
1529 | vcipher $out3,$out3,v27 | ||
1530 | vcipher $out4,$out4,v27 | ||
1531 | vcipher $out5,$out5,v27 | ||
1532 | vcipher $out6,$out6,v27 | ||
1533 | vcipher $out7,$out7,v27 | ||
1534 | lvx v25,$x10,$key_ # re-pre-load round[2] | ||
1535 | |||
1536 | vcipher $out0,$out0,v28 | ||
1537 | lvx_u $in0,$x00,$inp # load input | ||
1538 | vcipher $out1,$out1,v28 | ||
1539 | lvx_u $in1,$x10,$inp | ||
1540 | vcipher $out2,$out2,v28 | ||
1541 | lvx_u $in2,$x20,$inp | ||
1542 | vcipher $out3,$out3,v28 | ||
1543 | lvx_u $in3,$x30,$inp | ||
1544 | vcipher $out4,$out4,v28 | ||
1545 | lvx_u $in4,$x40,$inp | ||
1546 | vcipher $out5,$out5,v28 | ||
1547 | lvx_u $in5,$x50,$inp | ||
1548 | vcipher $out6,$out6,v28 | ||
1549 | lvx_u $in6,$x60,$inp | ||
1550 | vcipher $out7,$out7,v28 | ||
1551 | lvx_u $in7,$x70,$inp | ||
1552 | addi $inp,$inp,0x80 | ||
1553 | |||
1554 | vcipher $out0,$out0,v29 | ||
1555 | le?vperm $in0,$in0,$in0,$inpperm | ||
1556 | vcipher $out1,$out1,v29 | ||
1557 | le?vperm $in1,$in1,$in1,$inpperm | ||
1558 | vcipher $out2,$out2,v29 | ||
1559 | le?vperm $in2,$in2,$in2,$inpperm | ||
1560 | vcipher $out3,$out3,v29 | ||
1561 | le?vperm $in3,$in3,$in3,$inpperm | ||
1562 | vcipher $out4,$out4,v29 | ||
1563 | le?vperm $in4,$in4,$in4,$inpperm | ||
1564 | vcipher $out5,$out5,v29 | ||
1565 | le?vperm $in5,$in5,$in5,$inpperm | ||
1566 | vcipher $out6,$out6,v29 | ||
1567 | le?vperm $in6,$in6,$in6,$inpperm | ||
1568 | vcipher $out7,$out7,v29 | ||
1569 | le?vperm $in7,$in7,$in7,$inpperm | ||
1570 | |||
1571 | add $inp,$inp,r0 # $inp is adjusted in such | ||
1572 | # way that at exit from the | ||
1573 | # loop inX-in7 are loaded | ||
1574 | # with last "words" | ||
1575 | subfe. r0,r0,r0 # borrow?-1:0 | ||
1576 | vcipher $out0,$out0,v30 | ||
1577 | vxor $in0,$in0,v31 # xor with last round key | ||
1578 | vcipher $out1,$out1,v30 | ||
1579 | vxor $in1,$in1,v31 | ||
1580 | vcipher $out2,$out2,v30 | ||
1581 | vxor $in2,$in2,v31 | ||
1582 | vcipher $out3,$out3,v30 | ||
1583 | vxor $in3,$in3,v31 | ||
1584 | vcipher $out4,$out4,v30 | ||
1585 | vxor $in4,$in4,v31 | ||
1586 | vcipher $out5,$out5,v30 | ||
1587 | vxor $in5,$in5,v31 | ||
1588 | vcipher $out6,$out6,v30 | ||
1589 | vxor $in6,$in6,v31 | ||
1590 | vcipher $out7,$out7,v30 | ||
1591 | vxor $in7,$in7,v31 | ||
1592 | |||
1593 | bne Lctr32_enc8x_break # did $len-129 borrow? | ||
1594 | |||
1595 | vcipherlast $in0,$out0,$in0 | ||
1596 | vcipherlast $in1,$out1,$in1 | ||
1597 | vadduwm $out1,$ivec,$one # counter values ... | ||
1598 | vcipherlast $in2,$out2,$in2 | ||
1599 | vadduwm $out2,$ivec,$two | ||
1600 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] | ||
1601 | vcipherlast $in3,$out3,$in3 | ||
1602 | vadduwm $out3,$out1,$two | ||
1603 | vxor $out1,$out1,$rndkey0 | ||
1604 | vcipherlast $in4,$out4,$in4 | ||
1605 | vadduwm $out4,$out2,$two | ||
1606 | vxor $out2,$out2,$rndkey0 | ||
1607 | vcipherlast $in5,$out5,$in5 | ||
1608 | vadduwm $out5,$out3,$two | ||
1609 | vxor $out3,$out3,$rndkey0 | ||
1610 | vcipherlast $in6,$out6,$in6 | ||
1611 | vadduwm $out6,$out4,$two | ||
1612 | vxor $out4,$out4,$rndkey0 | ||
1613 | vcipherlast $in7,$out7,$in7 | ||
1614 | vadduwm $out7,$out5,$two | ||
1615 | vxor $out5,$out5,$rndkey0 | ||
1616 | le?vperm $in0,$in0,$in0,$inpperm | ||
1617 | vadduwm $ivec,$out6,$two # next counter value | ||
1618 | vxor $out6,$out6,$rndkey0 | ||
1619 | le?vperm $in1,$in1,$in1,$inpperm | ||
1620 | vxor $out7,$out7,$rndkey0 | ||
1621 | mtctr $rounds | ||
1622 | |||
1623 | vcipher $out0,$out0,v24 | ||
1624 | stvx_u $in0,$x00,$out | ||
1625 | le?vperm $in2,$in2,$in2,$inpperm | ||
1626 | vcipher $out1,$out1,v24 | ||
1627 | stvx_u $in1,$x10,$out | ||
1628 | le?vperm $in3,$in3,$in3,$inpperm | ||
1629 | vcipher $out2,$out2,v24 | ||
1630 | stvx_u $in2,$x20,$out | ||
1631 | le?vperm $in4,$in4,$in4,$inpperm | ||
1632 | vcipher $out3,$out3,v24 | ||
1633 | stvx_u $in3,$x30,$out | ||
1634 | le?vperm $in5,$in5,$in5,$inpperm | ||
1635 | vcipher $out4,$out4,v24 | ||
1636 | stvx_u $in4,$x40,$out | ||
1637 | le?vperm $in6,$in6,$in6,$inpperm | ||
1638 | vcipher $out5,$out5,v24 | ||
1639 | stvx_u $in5,$x50,$out | ||
1640 | le?vperm $in7,$in7,$in7,$inpperm | ||
1641 | vcipher $out6,$out6,v24 | ||
1642 | stvx_u $in6,$x60,$out | ||
1643 | vcipher $out7,$out7,v24 | ||
1644 | stvx_u $in7,$x70,$out | ||
1645 | addi $out,$out,0x80 | ||
1646 | |||
1647 | b Loop_ctr32_enc8x_middle | ||
1648 | |||
1649 | .align 5 | ||
1650 | Lctr32_enc8x_break: | ||
1651 | cmpwi $len,-0x60 | ||
1652 | blt Lctr32_enc8x_one | ||
1653 | nop | ||
1654 | beq Lctr32_enc8x_two | ||
1655 | cmpwi $len,-0x40 | ||
1656 | blt Lctr32_enc8x_three | ||
1657 | nop | ||
1658 | beq Lctr32_enc8x_four | ||
1659 | cmpwi $len,-0x20 | ||
1660 | blt Lctr32_enc8x_five | ||
1661 | nop | ||
1662 | beq Lctr32_enc8x_six | ||
1663 | cmpwi $len,0x00 | ||
1664 | blt Lctr32_enc8x_seven | ||
1665 | |||
1666 | Lctr32_enc8x_eight: | ||
1667 | vcipherlast $out0,$out0,$in0 | ||
1668 | vcipherlast $out1,$out1,$in1 | ||
1669 | vcipherlast $out2,$out2,$in2 | ||
1670 | vcipherlast $out3,$out3,$in3 | ||
1671 | vcipherlast $out4,$out4,$in4 | ||
1672 | vcipherlast $out5,$out5,$in5 | ||
1673 | vcipherlast $out6,$out6,$in6 | ||
1674 | vcipherlast $out7,$out7,$in7 | ||
1675 | |||
1676 | le?vperm $out0,$out0,$out0,$inpperm | ||
1677 | le?vperm $out1,$out1,$out1,$inpperm | ||
1678 | stvx_u $out0,$x00,$out | ||
1679 | le?vperm $out2,$out2,$out2,$inpperm | ||
1680 | stvx_u $out1,$x10,$out | ||
1681 | le?vperm $out3,$out3,$out3,$inpperm | ||
1682 | stvx_u $out2,$x20,$out | ||
1683 | le?vperm $out4,$out4,$out4,$inpperm | ||
1684 | stvx_u $out3,$x30,$out | ||
1685 | le?vperm $out5,$out5,$out5,$inpperm | ||
1686 | stvx_u $out4,$x40,$out | ||
1687 | le?vperm $out6,$out6,$out6,$inpperm | ||
1688 | stvx_u $out5,$x50,$out | ||
1689 | le?vperm $out7,$out7,$out7,$inpperm | ||
1690 | stvx_u $out6,$x60,$out | ||
1691 | stvx_u $out7,$x70,$out | ||
1692 | addi $out,$out,0x80 | ||
1693 | b Lctr32_enc8x_done | ||
1694 | |||
1695 | .align 5 | ||
1696 | Lctr32_enc8x_seven: | ||
1697 | vcipherlast $out0,$out0,$in1 | ||
1698 | vcipherlast $out1,$out1,$in2 | ||
1699 | vcipherlast $out2,$out2,$in3 | ||
1700 | vcipherlast $out3,$out3,$in4 | ||
1701 | vcipherlast $out4,$out4,$in5 | ||
1702 | vcipherlast $out5,$out5,$in6 | ||
1703 | vcipherlast $out6,$out6,$in7 | ||
1704 | |||
1705 | le?vperm $out0,$out0,$out0,$inpperm | ||
1706 | le?vperm $out1,$out1,$out1,$inpperm | ||
1707 | stvx_u $out0,$x00,$out | ||
1708 | le?vperm $out2,$out2,$out2,$inpperm | ||
1709 | stvx_u $out1,$x10,$out | ||
1710 | le?vperm $out3,$out3,$out3,$inpperm | ||
1711 | stvx_u $out2,$x20,$out | ||
1712 | le?vperm $out4,$out4,$out4,$inpperm | ||
1713 | stvx_u $out3,$x30,$out | ||
1714 | le?vperm $out5,$out5,$out5,$inpperm | ||
1715 | stvx_u $out4,$x40,$out | ||
1716 | le?vperm $out6,$out6,$out6,$inpperm | ||
1717 | stvx_u $out5,$x50,$out | ||
1718 | stvx_u $out6,$x60,$out | ||
1719 | addi $out,$out,0x70 | ||
1720 | b Lctr32_enc8x_done | ||
1721 | |||
1722 | .align 5 | ||
1723 | Lctr32_enc8x_six: | ||
1724 | vcipherlast $out0,$out0,$in2 | ||
1725 | vcipherlast $out1,$out1,$in3 | ||
1726 | vcipherlast $out2,$out2,$in4 | ||
1727 | vcipherlast $out3,$out3,$in5 | ||
1728 | vcipherlast $out4,$out4,$in6 | ||
1729 | vcipherlast $out5,$out5,$in7 | ||
1730 | |||
1731 | le?vperm $out0,$out0,$out0,$inpperm | ||
1732 | le?vperm $out1,$out1,$out1,$inpperm | ||
1733 | stvx_u $out0,$x00,$out | ||
1734 | le?vperm $out2,$out2,$out2,$inpperm | ||
1735 | stvx_u $out1,$x10,$out | ||
1736 | le?vperm $out3,$out3,$out3,$inpperm | ||
1737 | stvx_u $out2,$x20,$out | ||
1738 | le?vperm $out4,$out4,$out4,$inpperm | ||
1739 | stvx_u $out3,$x30,$out | ||
1740 | le?vperm $out5,$out5,$out5,$inpperm | ||
1741 | stvx_u $out4,$x40,$out | ||
1742 | stvx_u $out5,$x50,$out | ||
1743 | addi $out,$out,0x60 | ||
1744 | b Lctr32_enc8x_done | ||
1745 | |||
1746 | .align 5 | ||
1747 | Lctr32_enc8x_five: | ||
1748 | vcipherlast $out0,$out0,$in3 | ||
1749 | vcipherlast $out1,$out1,$in4 | ||
1750 | vcipherlast $out2,$out2,$in5 | ||
1751 | vcipherlast $out3,$out3,$in6 | ||
1752 | vcipherlast $out4,$out4,$in7 | ||
1753 | |||
1754 | le?vperm $out0,$out0,$out0,$inpperm | ||
1755 | le?vperm $out1,$out1,$out1,$inpperm | ||
1756 | stvx_u $out0,$x00,$out | ||
1757 | le?vperm $out2,$out2,$out2,$inpperm | ||
1758 | stvx_u $out1,$x10,$out | ||
1759 | le?vperm $out3,$out3,$out3,$inpperm | ||
1760 | stvx_u $out2,$x20,$out | ||
1761 | le?vperm $out4,$out4,$out4,$inpperm | ||
1762 | stvx_u $out3,$x30,$out | ||
1763 | stvx_u $out4,$x40,$out | ||
1764 | addi $out,$out,0x50 | ||
1765 | b Lctr32_enc8x_done | ||
1766 | |||
1767 | .align 5 | ||
1768 | Lctr32_enc8x_four: | ||
1769 | vcipherlast $out0,$out0,$in4 | ||
1770 | vcipherlast $out1,$out1,$in5 | ||
1771 | vcipherlast $out2,$out2,$in6 | ||
1772 | vcipherlast $out3,$out3,$in7 | ||
1773 | |||
1774 | le?vperm $out0,$out0,$out0,$inpperm | ||
1775 | le?vperm $out1,$out1,$out1,$inpperm | ||
1776 | stvx_u $out0,$x00,$out | ||
1777 | le?vperm $out2,$out2,$out2,$inpperm | ||
1778 | stvx_u $out1,$x10,$out | ||
1779 | le?vperm $out3,$out3,$out3,$inpperm | ||
1780 | stvx_u $out2,$x20,$out | ||
1781 | stvx_u $out3,$x30,$out | ||
1782 | addi $out,$out,0x40 | ||
1783 | b Lctr32_enc8x_done | ||
1784 | |||
1785 | .align 5 | ||
1786 | Lctr32_enc8x_three: | ||
1787 | vcipherlast $out0,$out0,$in5 | ||
1788 | vcipherlast $out1,$out1,$in6 | ||
1789 | vcipherlast $out2,$out2,$in7 | ||
1790 | |||
1791 | le?vperm $out0,$out0,$out0,$inpperm | ||
1792 | le?vperm $out1,$out1,$out1,$inpperm | ||
1793 | stvx_u $out0,$x00,$out | ||
1794 | le?vperm $out2,$out2,$out2,$inpperm | ||
1795 | stvx_u $out1,$x10,$out | ||
1796 | stvx_u $out2,$x20,$out | ||
1797 | addi $out,$out,0x30 | ||
1798 | b Lcbc_dec8x_done | ||
1799 | |||
1800 | .align 5 | ||
1801 | Lctr32_enc8x_two: | ||
1802 | vcipherlast $out0,$out0,$in6 | ||
1803 | vcipherlast $out1,$out1,$in7 | ||
1804 | |||
1805 | le?vperm $out0,$out0,$out0,$inpperm | ||
1806 | le?vperm $out1,$out1,$out1,$inpperm | ||
1807 | stvx_u $out0,$x00,$out | ||
1808 | stvx_u $out1,$x10,$out | ||
1809 | addi $out,$out,0x20 | ||
1810 | b Lcbc_dec8x_done | ||
1811 | |||
1812 | .align 5 | ||
1813 | Lctr32_enc8x_one: | ||
1814 | vcipherlast $out0,$out0,$in7 | ||
1815 | |||
1816 | le?vperm $out0,$out0,$out0,$inpperm | ||
1817 | stvx_u $out0,0,$out | ||
1818 | addi $out,$out,0x10 | ||
1819 | |||
1820 | Lctr32_enc8x_done: | ||
1821 | li r10,`$FRAME+15` | ||
1822 | li r11,`$FRAME+31` | ||
1823 | stvx $inpperm,r10,$sp # wipe copies of round keys | ||
1824 | addi r10,r10,32 | ||
1825 | stvx $inpperm,r11,$sp | ||
1826 | addi r11,r11,32 | ||
1827 | stvx $inpperm,r10,$sp | ||
1828 | addi r10,r10,32 | ||
1829 | stvx $inpperm,r11,$sp | ||
1830 | addi r11,r11,32 | ||
1831 | stvx $inpperm,r10,$sp | ||
1832 | addi r10,r10,32 | ||
1833 | stvx $inpperm,r11,$sp | ||
1834 | addi r11,r11,32 | ||
1835 | stvx $inpperm,r10,$sp | ||
1836 | addi r10,r10,32 | ||
1837 | stvx $inpperm,r11,$sp | ||
1838 | addi r11,r11,32 | ||
1839 | |||
1840 | mtspr 256,$vrsave | ||
1841 | lvx v20,r10,$sp # ABI says so | ||
1842 | addi r10,r10,32 | ||
1843 | lvx v21,r11,$sp | ||
1844 | addi r11,r11,32 | ||
1845 | lvx v22,r10,$sp | ||
1846 | addi r10,r10,32 | ||
1847 | lvx v23,r11,$sp | ||
1848 | addi r11,r11,32 | ||
1849 | lvx v24,r10,$sp | ||
1850 | addi r10,r10,32 | ||
1851 | lvx v25,r11,$sp | ||
1852 | addi r11,r11,32 | ||
1853 | lvx v26,r10,$sp | ||
1854 | addi r10,r10,32 | ||
1855 | lvx v27,r11,$sp | ||
1856 | addi r11,r11,32 | ||
1857 | lvx v28,r10,$sp | ||
1858 | addi r10,r10,32 | ||
1859 | lvx v29,r11,$sp | ||
1860 | addi r11,r11,32 | ||
1861 | lvx v30,r10,$sp | ||
1862 | lvx v31,r11,$sp | ||
1863 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | ||
1864 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | ||
1865 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | ||
1866 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | ||
1867 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | ||
1868 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | ||
1869 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | ||
1870 | blr | ||
1871 | .long 0 | ||
1872 | .byte 0,12,0x14,0,0x80,6,6,0 | ||
1873 | .long 0 | ||
1874 | .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks | ||
1875 | ___ | ||
1876 | }} }}} | ||
1877 | |||
1878 | my $consts=1; | ||
1879 | foreach(split("\n",$code)) { | ||
1880 | s/\`([^\`]*)\`/eval($1)/geo; | ||
1881 | |||
1882 | # constants table endian-specific conversion | ||
1883 | if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { | ||
1884 | my $conv=$3; | ||
1885 | my @bytes=(); | ||
1886 | |||
1887 | # convert to endian-agnostic format | ||
1888 | if ($1 eq "long") { | ||
1889 | foreach (split(/,\s*/,$2)) { | ||
1890 | my $l = /^0/?oct:int; | ||
1891 | push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; | ||
1892 | } | ||
1893 | } else { | ||
1894 | @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); | ||
1895 | } | ||
1896 | |||
1897 | # little-endian conversion | ||
1898 | if ($flavour =~ /le$/o) { | ||
1899 | SWITCH: for($conv) { | ||
1900 | /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; | ||
1901 | /\?rev/ && do { @bytes=reverse(@bytes); last; }; | ||
1902 | } | ||
1903 | } | ||
1904 | |||
1905 | #emit | ||
1906 | print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; | ||
1907 | next; | ||
1908 | } | ||
1909 | $consts=0 if (m/Lconsts:/o); # end of table | ||
1910 | |||
1911 | # instructions prefixed with '?' are endian-specific and need | ||
1912 | # to be adjusted accordingly... | ||
1913 | if ($flavour =~ /le$/o) { # little-endian | ||
1914 | s/le\?//o or | ||
1915 | s/be\?/#be#/o or | ||
1916 | s/\?lvsr/lvsl/o or | ||
1917 | s/\?lvsl/lvsr/o or | ||
1918 | s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or | ||
1919 | s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or | ||
1920 | s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; | ||
1921 | } else { # big-endian | ||
1922 | s/le\?/#le#/o or | ||
1923 | s/be\?//o or | ||
1924 | s/\?([a-z]+)/$1/o; | ||
1925 | } | ||
1926 | |||
1927 | print $_,"\n"; | ||
1928 | } | ||
1929 | |||
1930 | close STDOUT; | ||
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c new file mode 100644 index 000000000000..d0ffe277af5c --- /dev/null +++ b/drivers/crypto/vmx/ghash.c | |||
@@ -0,0 +1,214 @@ | |||
1 | /** | ||
2 | * GHASH routines supporting VMX instructions on the Power 8 | ||
3 | * | ||
4 | * Copyright (C) 2015 International Business Machines Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; version 2 only. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/crypto.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/hardirq.h> | ||
27 | #include <asm/switch_to.h> | ||
28 | #include <crypto/aes.h> | ||
29 | #include <crypto/scatterwalk.h> | ||
30 | #include <crypto/internal/hash.h> | ||
31 | #include <crypto/b128ops.h> | ||
32 | |||
33 | #define IN_INTERRUPT in_interrupt() | ||
34 | |||
35 | #define GHASH_BLOCK_SIZE (16) | ||
36 | #define GHASH_DIGEST_SIZE (16) | ||
37 | #define GHASH_KEY_LEN (16) | ||
38 | |||
39 | void gcm_init_p8(u128 htable[16], const u64 Xi[2]); | ||
40 | void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]); | ||
41 | void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], | ||
42 | const u8 *in,size_t len); | ||
43 | |||
44 | struct p8_ghash_ctx { | ||
45 | u128 htable[16]; | ||
46 | struct crypto_shash *fallback; | ||
47 | }; | ||
48 | |||
49 | struct p8_ghash_desc_ctx { | ||
50 | u64 shash[2]; | ||
51 | u8 buffer[GHASH_DIGEST_SIZE]; | ||
52 | int bytes; | ||
53 | struct shash_desc fallback_desc; | ||
54 | }; | ||
55 | |||
56 | static int p8_ghash_init_tfm(struct crypto_tfm *tfm) | ||
57 | { | ||
58 | const char *alg; | ||
59 | struct crypto_shash *fallback; | ||
60 | struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm); | ||
61 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); | ||
62 | |||
63 | if (!(alg = crypto_tfm_alg_name(tfm))) { | ||
64 | printk(KERN_ERR "Failed to get algorithm name.\n"); | ||
65 | return -ENOENT; | ||
66 | } | ||
67 | |||
68 | fallback = crypto_alloc_shash(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); | ||
69 | if (IS_ERR(fallback)) { | ||
70 | printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", | ||
71 | alg, PTR_ERR(fallback)); | ||
72 | return PTR_ERR(fallback); | ||
73 | } | ||
74 | printk(KERN_INFO "Using '%s' as fallback implementation.\n", | ||
75 | crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback))); | ||
76 | |||
77 | crypto_shash_set_flags(fallback, | ||
78 | crypto_shash_get_flags((struct crypto_shash *) tfm)); | ||
79 | ctx->fallback = fallback; | ||
80 | |||
81 | shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx) | ||
82 | + crypto_shash_descsize(fallback); | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static void p8_ghash_exit_tfm(struct crypto_tfm *tfm) | ||
88 | { | ||
89 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); | ||
90 | |||
91 | if (ctx->fallback) { | ||
92 | crypto_free_shash(ctx->fallback); | ||
93 | ctx->fallback = NULL; | ||
94 | } | ||
95 | } | ||
96 | |||
97 | static int p8_ghash_init(struct shash_desc *desc) | ||
98 | { | ||
99 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); | ||
100 | struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
101 | |||
102 | dctx->bytes = 0; | ||
103 | memset(dctx->shash, 0, GHASH_DIGEST_SIZE); | ||
104 | dctx->fallback_desc.tfm = ctx->fallback; | ||
105 | dctx->fallback_desc.flags = desc->flags; | ||
106 | return crypto_shash_init(&dctx->fallback_desc); | ||
107 | } | ||
108 | |||
109 | static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, | ||
110 | unsigned int keylen) | ||
111 | { | ||
112 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); | ||
113 | |||
114 | if (keylen != GHASH_KEY_LEN) | ||
115 | return -EINVAL; | ||
116 | |||
117 | pagefault_disable(); | ||
118 | enable_kernel_altivec(); | ||
119 | enable_kernel_fp(); | ||
120 | gcm_init_p8(ctx->htable, (const u64 *) key); | ||
121 | pagefault_enable(); | ||
122 | return crypto_shash_setkey(ctx->fallback, key, keylen); | ||
123 | } | ||
124 | |||
125 | static int p8_ghash_update(struct shash_desc *desc, | ||
126 | const u8 *src, unsigned int srclen) | ||
127 | { | ||
128 | unsigned int len; | ||
129 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); | ||
130 | struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
131 | |||
132 | if (IN_INTERRUPT) { | ||
133 | return crypto_shash_update(&dctx->fallback_desc, src, srclen); | ||
134 | } else { | ||
135 | if (dctx->bytes) { | ||
136 | if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { | ||
137 | memcpy(dctx->buffer + dctx->bytes, src, srclen); | ||
138 | dctx->bytes += srclen; | ||
139 | return 0; | ||
140 | } | ||
141 | memcpy(dctx->buffer + dctx->bytes, src, | ||
142 | GHASH_DIGEST_SIZE - dctx->bytes); | ||
143 | pagefault_disable(); | ||
144 | enable_kernel_altivec(); | ||
145 | enable_kernel_fp(); | ||
146 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, | ||
147 | GHASH_DIGEST_SIZE); | ||
148 | pagefault_enable(); | ||
149 | src += GHASH_DIGEST_SIZE - dctx->bytes; | ||
150 | srclen -= GHASH_DIGEST_SIZE - dctx->bytes; | ||
151 | dctx->bytes = 0; | ||
152 | } | ||
153 | len = srclen & ~(GHASH_DIGEST_SIZE - 1); | ||
154 | if (len) { | ||
155 | pagefault_disable(); | ||
156 | enable_kernel_altivec(); | ||
157 | enable_kernel_fp(); | ||
158 | gcm_ghash_p8(dctx->shash, ctx->htable, src, len); | ||
159 | pagefault_enable(); | ||
160 | src += len; | ||
161 | srclen -= len; | ||
162 | } | ||
163 | if (srclen) { | ||
164 | memcpy(dctx->buffer, src, srclen); | ||
165 | dctx->bytes = srclen; | ||
166 | } | ||
167 | return 0; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | static int p8_ghash_final(struct shash_desc *desc, u8 *out) | ||
172 | { | ||
173 | int i; | ||
174 | struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); | ||
175 | struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); | ||
176 | |||
177 | if (IN_INTERRUPT) { | ||
178 | return crypto_shash_final(&dctx->fallback_desc, out); | ||
179 | } else { | ||
180 | if (dctx->bytes) { | ||
181 | for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) | ||
182 | dctx->buffer[i] = 0; | ||
183 | pagefault_disable(); | ||
184 | enable_kernel_altivec(); | ||
185 | enable_kernel_fp(); | ||
186 | gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, | ||
187 | GHASH_DIGEST_SIZE); | ||
188 | pagefault_enable(); | ||
189 | dctx->bytes = 0; | ||
190 | } | ||
191 | memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); | ||
192 | return 0; | ||
193 | } | ||
194 | } | ||
195 | |||
196 | struct shash_alg p8_ghash_alg = { | ||
197 | .digestsize = GHASH_DIGEST_SIZE, | ||
198 | .init = p8_ghash_init, | ||
199 | .update = p8_ghash_update, | ||
200 | .final = p8_ghash_final, | ||
201 | .setkey = p8_ghash_setkey, | ||
202 | .descsize = sizeof(struct p8_ghash_desc_ctx), | ||
203 | .base = { | ||
204 | .cra_name = "ghash", | ||
205 | .cra_driver_name = "p8_ghash", | ||
206 | .cra_priority = 1000, | ||
207 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK, | ||
208 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
209 | .cra_ctxsize = sizeof(struct p8_ghash_ctx), | ||
210 | .cra_module = THIS_MODULE, | ||
211 | .cra_init = p8_ghash_init_tfm, | ||
212 | .cra_exit = p8_ghash_exit_tfm, | ||
213 | }, | ||
214 | }; | ||
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl new file mode 100644 index 000000000000..0a6f899839dd --- /dev/null +++ b/drivers/crypto/vmx/ghashp8-ppc.pl | |||
@@ -0,0 +1,228 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | # | ||
10 | # GHASH for for PowerISA v2.07. | ||
11 | # | ||
12 | # July 2014 | ||
13 | # | ||
14 | # Accurate performance measurements are problematic, because it's | ||
15 | # always virtualized setup with possibly throttled processor. | ||
16 | # Relative comparison is therefore more informative. This initial | ||
17 | # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x | ||
18 | # faster than "4-bit" integer-only compiler-generated 64-bit code. | ||
19 | # "Initial version" means that there is room for futher improvement. | ||
20 | |||
21 | $flavour=shift; | ||
22 | $output =shift; | ||
23 | |||
24 | if ($flavour =~ /64/) { | ||
25 | $SIZE_T=8; | ||
26 | $LRSAVE=2*$SIZE_T; | ||
27 | $STU="stdu"; | ||
28 | $POP="ld"; | ||
29 | $PUSH="std"; | ||
30 | } elsif ($flavour =~ /32/) { | ||
31 | $SIZE_T=4; | ||
32 | $LRSAVE=$SIZE_T; | ||
33 | $STU="stwu"; | ||
34 | $POP="lwz"; | ||
35 | $PUSH="stw"; | ||
36 | } else { die "nonsense $flavour"; } | ||
37 | |||
38 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
39 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | ||
40 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | ||
41 | die "can't locate ppc-xlate.pl"; | ||
42 | |||
43 | open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; | ||
44 | |||
45 | my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block | ||
46 | |||
47 | my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); | ||
48 | my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); | ||
49 | my $vrsave="r12"; | ||
50 | |||
51 | $code=<<___; | ||
52 | .machine "any" | ||
53 | |||
54 | .text | ||
55 | |||
56 | .globl .gcm_init_p8 | ||
57 | lis r0,0xfff0 | ||
58 | li r8,0x10 | ||
59 | mfspr $vrsave,256 | ||
60 | li r9,0x20 | ||
61 | mtspr 256,r0 | ||
62 | li r10,0x30 | ||
63 | lvx_u $H,0,r4 # load H | ||
64 | |||
65 | vspltisb $xC2,-16 # 0xf0 | ||
66 | vspltisb $t0,1 # one | ||
67 | vaddubm $xC2,$xC2,$xC2 # 0xe0 | ||
68 | vxor $zero,$zero,$zero | ||
69 | vor $xC2,$xC2,$t0 # 0xe1 | ||
70 | vsldoi $xC2,$xC2,$zero,15 # 0xe1... | ||
71 | vsldoi $t1,$zero,$t0,1 # ...1 | ||
72 | vaddubm $xC2,$xC2,$xC2 # 0xc2... | ||
73 | vspltisb $t2,7 | ||
74 | vor $xC2,$xC2,$t1 # 0xc2....01 | ||
75 | vspltb $t1,$H,0 # most significant byte | ||
76 | vsl $H,$H,$t0 # H<<=1 | ||
77 | vsrab $t1,$t1,$t2 # broadcast carry bit | ||
78 | vand $t1,$t1,$xC2 | ||
79 | vxor $H,$H,$t1 # twisted H | ||
80 | |||
81 | vsldoi $H,$H,$H,8 # twist even more ... | ||
82 | vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 | ||
83 | vsldoi $Hl,$zero,$H,8 # ... and split | ||
84 | vsldoi $Hh,$H,$zero,8 | ||
85 | |||
86 | stvx_u $xC2,0,r3 # save pre-computed table | ||
87 | stvx_u $Hl,r8,r3 | ||
88 | stvx_u $H, r9,r3 | ||
89 | stvx_u $Hh,r10,r3 | ||
90 | |||
91 | mtspr 256,$vrsave | ||
92 | blr | ||
93 | .long 0 | ||
94 | .byte 0,12,0x14,0,0,0,2,0 | ||
95 | .long 0 | ||
96 | .size .gcm_init_p8,.-.gcm_init_p8 | ||
97 | |||
98 | .globl .gcm_gmult_p8 | ||
99 | lis r0,0xfff8 | ||
100 | li r8,0x10 | ||
101 | mfspr $vrsave,256 | ||
102 | li r9,0x20 | ||
103 | mtspr 256,r0 | ||
104 | li r10,0x30 | ||
105 | lvx_u $IN,0,$Xip # load Xi | ||
106 | |||
107 | lvx_u $Hl,r8,$Htbl # load pre-computed table | ||
108 | le?lvsl $lemask,r0,r0 | ||
109 | lvx_u $H, r9,$Htbl | ||
110 | le?vspltisb $t0,0x07 | ||
111 | lvx_u $Hh,r10,$Htbl | ||
112 | le?vxor $lemask,$lemask,$t0 | ||
113 | lvx_u $xC2,0,$Htbl | ||
114 | le?vperm $IN,$IN,$IN,$lemask | ||
115 | vxor $zero,$zero,$zero | ||
116 | |||
117 | vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo | ||
118 | vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi | ||
119 | vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi | ||
120 | |||
121 | vpmsumd $t2,$Xl,$xC2 # 1st phase | ||
122 | |||
123 | vsldoi $t0,$Xm,$zero,8 | ||
124 | vsldoi $t1,$zero,$Xm,8 | ||
125 | vxor $Xl,$Xl,$t0 | ||
126 | vxor $Xh,$Xh,$t1 | ||
127 | |||
128 | vsldoi $Xl,$Xl,$Xl,8 | ||
129 | vxor $Xl,$Xl,$t2 | ||
130 | |||
131 | vsldoi $t1,$Xl,$Xl,8 # 2nd phase | ||
132 | vpmsumd $Xl,$Xl,$xC2 | ||
133 | vxor $t1,$t1,$Xh | ||
134 | vxor $Xl,$Xl,$t1 | ||
135 | |||
136 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
137 | stvx_u $Xl,0,$Xip # write out Xi | ||
138 | |||
139 | mtspr 256,$vrsave | ||
140 | blr | ||
141 | .long 0 | ||
142 | .byte 0,12,0x14,0,0,0,2,0 | ||
143 | .long 0 | ||
144 | .size .gcm_gmult_p8,.-.gcm_gmult_p8 | ||
145 | |||
146 | .globl .gcm_ghash_p8 | ||
147 | lis r0,0xfff8 | ||
148 | li r8,0x10 | ||
149 | mfspr $vrsave,256 | ||
150 | li r9,0x20 | ||
151 | mtspr 256,r0 | ||
152 | li r10,0x30 | ||
153 | lvx_u $Xl,0,$Xip # load Xi | ||
154 | |||
155 | lvx_u $Hl,r8,$Htbl # load pre-computed table | ||
156 | le?lvsl $lemask,r0,r0 | ||
157 | lvx_u $H, r9,$Htbl | ||
158 | le?vspltisb $t0,0x07 | ||
159 | lvx_u $Hh,r10,$Htbl | ||
160 | le?vxor $lemask,$lemask,$t0 | ||
161 | lvx_u $xC2,0,$Htbl | ||
162 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
163 | vxor $zero,$zero,$zero | ||
164 | |||
165 | lvx_u $IN,0,$inp | ||
166 | addi $inp,$inp,16 | ||
167 | subi $len,$len,16 | ||
168 | le?vperm $IN,$IN,$IN,$lemask | ||
169 | vxor $IN,$IN,$Xl | ||
170 | b Loop | ||
171 | |||
172 | .align 5 | ||
173 | Loop: | ||
174 | subic $len,$len,16 | ||
175 | vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo | ||
176 | subfe. r0,r0,r0 # borrow?-1:0 | ||
177 | vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi | ||
178 | and r0,r0,$len | ||
179 | vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi | ||
180 | add $inp,$inp,r0 | ||
181 | |||
182 | vpmsumd $t2,$Xl,$xC2 # 1st phase | ||
183 | |||
184 | vsldoi $t0,$Xm,$zero,8 | ||
185 | vsldoi $t1,$zero,$Xm,8 | ||
186 | vxor $Xl,$Xl,$t0 | ||
187 | vxor $Xh,$Xh,$t1 | ||
188 | |||
189 | vsldoi $Xl,$Xl,$Xl,8 | ||
190 | vxor $Xl,$Xl,$t2 | ||
191 | lvx_u $IN,0,$inp | ||
192 | addi $inp,$inp,16 | ||
193 | |||
194 | vsldoi $t1,$Xl,$Xl,8 # 2nd phase | ||
195 | vpmsumd $Xl,$Xl,$xC2 | ||
196 | le?vperm $IN,$IN,$IN,$lemask | ||
197 | vxor $t1,$t1,$Xh | ||
198 | vxor $IN,$IN,$t1 | ||
199 | vxor $IN,$IN,$Xl | ||
200 | beq Loop # did $len-=16 borrow? | ||
201 | |||
202 | vxor $Xl,$Xl,$t1 | ||
203 | le?vperm $Xl,$Xl,$Xl,$lemask | ||
204 | stvx_u $Xl,0,$Xip # write out Xi | ||
205 | |||
206 | mtspr 256,$vrsave | ||
207 | blr | ||
208 | .long 0 | ||
209 | .byte 0,12,0x14,0,0,0,4,0 | ||
210 | .long 0 | ||
211 | .size .gcm_ghash_p8,.-.gcm_ghash_p8 | ||
212 | |||
213 | .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" | ||
214 | .align 2 | ||
215 | ___ | ||
216 | |||
217 | foreach (split("\n",$code)) { | ||
218 | if ($flavour =~ /le$/o) { # little-endian | ||
219 | s/le\?//o or | ||
220 | s/be\?/#be#/o; | ||
221 | } else { | ||
222 | s/le\?/#le#/o or | ||
223 | s/be\?//o; | ||
224 | } | ||
225 | print $_,"\n"; | ||
226 | } | ||
227 | |||
228 | close STDOUT; # enforce flush | ||
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl new file mode 100644 index 000000000000..a59188494af8 --- /dev/null +++ b/drivers/crypto/vmx/ppc-xlate.pl | |||
@@ -0,0 +1,207 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # PowerPC assembler distiller by <appro>. | ||
4 | |||
5 | my $flavour = shift; | ||
6 | my $output = shift; | ||
7 | open STDOUT,">$output" || die "can't open $output: $!"; | ||
8 | |||
9 | my %GLOBALS; | ||
10 | my $dotinlocallabels=($flavour=~/linux/)?1:0; | ||
11 | |||
12 | ################################################################ | ||
13 | # directives which need special treatment on different platforms | ||
14 | ################################################################ | ||
15 | my $globl = sub { | ||
16 | my $junk = shift; | ||
17 | my $name = shift; | ||
18 | my $global = \$GLOBALS{$name}; | ||
19 | my $ret; | ||
20 | |||
21 | $name =~ s|^[\.\_]||; | ||
22 | |||
23 | SWITCH: for ($flavour) { | ||
24 | /aix/ && do { $name = ".$name"; | ||
25 | last; | ||
26 | }; | ||
27 | /osx/ && do { $name = "_$name"; | ||
28 | last; | ||
29 | }; | ||
30 | /linux/ | ||
31 | && do { $ret = "_GLOBAL($name)"; | ||
32 | last; | ||
33 | }; | ||
34 | } | ||
35 | |||
36 | $ret = ".globl $name\nalign 5\n$name:" if (!$ret); | ||
37 | $$global = $name; | ||
38 | $ret; | ||
39 | }; | ||
40 | my $text = sub { | ||
41 | my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; | ||
42 | $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); | ||
43 | $ret; | ||
44 | }; | ||
45 | my $machine = sub { | ||
46 | my $junk = shift; | ||
47 | my $arch = shift; | ||
48 | if ($flavour =~ /osx/) | ||
49 | { $arch =~ s/\"//g; | ||
50 | $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); | ||
51 | } | ||
52 | ".machine $arch"; | ||
53 | }; | ||
54 | my $size = sub { | ||
55 | if ($flavour =~ /linux/) | ||
56 | { shift; | ||
57 | my $name = shift; $name =~ s|^[\.\_]||; | ||
58 | my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; | ||
59 | $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); | ||
60 | $ret; | ||
61 | } | ||
62 | else | ||
63 | { ""; } | ||
64 | }; | ||
65 | my $asciz = sub { | ||
66 | shift; | ||
67 | my $line = join(",",@_); | ||
68 | if ($line =~ /^"(.*)"$/) | ||
69 | { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } | ||
70 | else | ||
71 | { ""; } | ||
72 | }; | ||
73 | my $quad = sub { | ||
74 | shift; | ||
75 | my @ret; | ||
76 | my ($hi,$lo); | ||
77 | for (@_) { | ||
78 | if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) | ||
79 | { $hi=$1?"0x$1":"0"; $lo="0x$2"; } | ||
80 | elsif (/^([0-9]+)$/o) | ||
81 | { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl | ||
82 | else | ||
83 | { $hi=undef; $lo=$_; } | ||
84 | |||
85 | if (defined($hi)) | ||
86 | { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } | ||
87 | else | ||
88 | { push(@ret,".quad $lo"); } | ||
89 | } | ||
90 | join("\n",@ret); | ||
91 | }; | ||
92 | |||
93 | ################################################################ | ||
94 | # simplified mnemonics not handled by at least one assembler | ||
95 | ################################################################ | ||
96 | my $cmplw = sub { | ||
97 | my $f = shift; | ||
98 | my $cr = 0; $cr = shift if ($#_>1); | ||
99 | # Some out-of-date 32-bit GNU assembler just can't handle cmplw... | ||
100 | ($flavour =~ /linux.*32/) ? | ||
101 | " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : | ||
102 | " cmplw ".join(',',$cr,@_); | ||
103 | }; | ||
104 | my $bdnz = sub { | ||
105 | my $f = shift; | ||
106 | my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint | ||
107 | " bc $bo,0,".shift; | ||
108 | } if ($flavour!~/linux/); | ||
109 | my $bltlr = sub { | ||
110 | my $f = shift; | ||
111 | my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint | ||
112 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
113 | " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : | ||
114 | " bclr $bo,0"; | ||
115 | }; | ||
116 | my $bnelr = sub { | ||
117 | my $f = shift; | ||
118 | my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint | ||
119 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
120 | " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : | ||
121 | " bclr $bo,2"; | ||
122 | }; | ||
123 | my $beqlr = sub { | ||
124 | my $f = shift; | ||
125 | my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint | ||
126 | ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints | ||
127 | " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : | ||
128 | " bclr $bo,2"; | ||
129 | }; | ||
130 | # GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two | ||
131 | # arguments is 64, with "operand out of range" error. | ||
132 | my $extrdi = sub { | ||
133 | my ($f,$ra,$rs,$n,$b) = @_; | ||
134 | $b = ($b+$n)&63; $n = 64-$n; | ||
135 | " rldicl $ra,$rs,$b,$n"; | ||
136 | }; | ||
137 | my $vmr = sub { | ||
138 | my ($f,$vx,$vy) = @_; | ||
139 | " vor $vx,$vy,$vy"; | ||
140 | }; | ||
141 | |||
142 | # PowerISA 2.06 stuff | ||
143 | sub vsxmem_op { | ||
144 | my ($f, $vrt, $ra, $rb, $op) = @_; | ||
145 | " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); | ||
146 | } | ||
147 | # made-up unaligned memory reference AltiVec/VMX instructions | ||
148 | my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x | ||
149 | my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x | ||
150 | my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx | ||
151 | my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx | ||
152 | my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x | ||
153 | my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x | ||
154 | |||
155 | # PowerISA 2.07 stuff | ||
156 | sub vcrypto_op { | ||
157 | my ($f, $vrt, $vra, $vrb, $op) = @_; | ||
158 | " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; | ||
159 | } | ||
160 | my $vcipher = sub { vcrypto_op(@_, 1288); }; | ||
161 | my $vcipherlast = sub { vcrypto_op(@_, 1289); }; | ||
162 | my $vncipher = sub { vcrypto_op(@_, 1352); }; | ||
163 | my $vncipherlast= sub { vcrypto_op(@_, 1353); }; | ||
164 | my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; | ||
165 | my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; | ||
166 | my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; | ||
167 | my $vpmsumb = sub { vcrypto_op(@_, 1032); }; | ||
168 | my $vpmsumd = sub { vcrypto_op(@_, 1224); }; | ||
169 | my $vpmsubh = sub { vcrypto_op(@_, 1096); }; | ||
170 | my $vpmsumw = sub { vcrypto_op(@_, 1160); }; | ||
171 | my $vaddudm = sub { vcrypto_op(@_, 192); }; | ||
172 | |||
173 | my $mtsle = sub { | ||
174 | my ($f, $arg) = @_; | ||
175 | " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); | ||
176 | }; | ||
177 | |||
178 | print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/; | ||
179 | |||
180 | while($line=<>) { | ||
181 | |||
182 | $line =~ s|[#!;].*$||; # get rid of asm-style comments... | ||
183 | $line =~ s|/\*.*\*/||; # ... and C-style comments... | ||
184 | $line =~ s|^\s+||; # ... and skip white spaces in beginning... | ||
185 | $line =~ s|\s+$||; # ... and at the end | ||
186 | |||
187 | { | ||
188 | $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel | ||
189 | $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); | ||
190 | } | ||
191 | |||
192 | { | ||
193 | $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; | ||
194 | my $c = $1; $c = "\t" if ($c eq ""); | ||
195 | my $mnemonic = $2; | ||
196 | my $f = $3; | ||
197 | my $opcode = eval("\$$mnemonic"); | ||
198 | $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); | ||
199 | if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } | ||
200 | elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } | ||
201 | } | ||
202 | |||
203 | print $line if ($line); | ||
204 | print "\n"; | ||
205 | } | ||
206 | |||
207 | close STDOUT; | ||
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c new file mode 100644 index 000000000000..44d8d5cfe40d --- /dev/null +++ b/drivers/crypto/vmx/vmx.c | |||
@@ -0,0 +1,88 @@ | |||
1 | /** | ||
2 | * Routines supporting VMX instructions on the Power 8 | ||
3 | * | ||
4 | * Copyright (C) 2015 International Business Machines Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; version 2 only. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | * | ||
19 | * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/module.h> | ||
23 | #include <linux/moduleparam.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/crypto.h> | ||
27 | #include <asm/cputable.h> | ||
28 | #include <crypto/internal/hash.h> | ||
29 | |||
30 | extern struct shash_alg p8_ghash_alg; | ||
31 | extern struct crypto_alg p8_aes_alg; | ||
32 | extern struct crypto_alg p8_aes_cbc_alg; | ||
33 | extern struct crypto_alg p8_aes_ctr_alg; | ||
34 | static struct crypto_alg *algs[] = { | ||
35 | &p8_aes_alg, | ||
36 | &p8_aes_cbc_alg, | ||
37 | &p8_aes_ctr_alg, | ||
38 | NULL, | ||
39 | }; | ||
40 | |||
41 | int __init p8_init(void) | ||
42 | { | ||
43 | int ret = 0; | ||
44 | struct crypto_alg **alg_it; | ||
45 | |||
46 | if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) | ||
47 | return -ENODEV; | ||
48 | |||
49 | for (alg_it = algs; *alg_it; alg_it++) { | ||
50 | ret = crypto_register_alg(*alg_it); | ||
51 | printk(KERN_INFO "crypto_register_alg '%s' = %d\n", | ||
52 | (*alg_it)->cra_name, ret); | ||
53 | if (ret) { | ||
54 | for (alg_it--; alg_it >= algs; alg_it--) | ||
55 | crypto_unregister_alg(*alg_it); | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | if (ret) | ||
60 | return ret; | ||
61 | |||
62 | ret = crypto_register_shash(&p8_ghash_alg); | ||
63 | if (ret) { | ||
64 | for (alg_it = algs; *alg_it; alg_it++) | ||
65 | crypto_unregister_alg(*alg_it); | ||
66 | } | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | void __exit p8_exit(void) | ||
71 | { | ||
72 | struct crypto_alg **alg_it; | ||
73 | |||
74 | for (alg_it = algs; *alg_it; alg_it++) { | ||
75 | printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name); | ||
76 | crypto_unregister_alg(*alg_it); | ||
77 | } | ||
78 | crypto_unregister_shash(&p8_ghash_alg); | ||
79 | } | ||
80 | |||
81 | module_init(p8_init); | ||
82 | module_exit(p8_exit); | ||
83 | |||
84 | MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); | ||
85 | MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8"); | ||
86 | MODULE_LICENSE("GPL"); | ||
87 | MODULE_VERSION("1.0.0"); | ||
88 | |||
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 623a59c1ff5a..0ecb7688af71 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h | |||
@@ -137,7 +137,7 @@ struct crypto_template *crypto_lookup_template(const char *name); | |||
137 | 137 | ||
138 | int crypto_register_instance(struct crypto_template *tmpl, | 138 | int crypto_register_instance(struct crypto_template *tmpl, |
139 | struct crypto_instance *inst); | 139 | struct crypto_instance *inst); |
140 | int crypto_unregister_instance(struct crypto_alg *alg); | 140 | int crypto_unregister_instance(struct crypto_instance *inst); |
141 | 141 | ||
142 | int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, | 142 | int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, |
143 | struct crypto_instance *inst, u32 mask); | 143 | struct crypto_instance *inst, u32 mask); |
diff --git a/include/crypto/rng.h b/include/crypto/rng.h index a16fb10142bf..6e28ea5be9f1 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h | |||
@@ -103,8 +103,7 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) | |||
103 | * This function fills the caller-allocated buffer with random numbers using the | 103 | * This function fills the caller-allocated buffer with random numbers using the |
104 | * random number generator referenced by the cipher handle. | 104 | * random number generator referenced by the cipher handle. |
105 | * | 105 | * |
106 | * Return: > 0 function was successful and returns the number of generated | 106 | * Return: 0 function was successful; < 0 if an error occurred |
107 | * bytes; < 0 if an error occurred | ||
108 | */ | 107 | */ |
109 | static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, | 108 | static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, |
110 | u8 *rdata, unsigned int dlen) | 109 | u8 *rdata, unsigned int dlen) |
diff --git a/include/crypto/sha.h b/include/crypto/sha.h index 190f8a0e0242..dd7905a3c22e 100644 --- a/include/crypto/sha.h +++ b/include/crypto/sha.h | |||
@@ -65,20 +65,20 @@ | |||
65 | #define SHA512_H7 0x5be0cd19137e2179ULL | 65 | #define SHA512_H7 0x5be0cd19137e2179ULL |
66 | 66 | ||
67 | struct sha1_state { | 67 | struct sha1_state { |
68 | u64 count; | ||
69 | u32 state[SHA1_DIGEST_SIZE / 4]; | 68 | u32 state[SHA1_DIGEST_SIZE / 4]; |
69 | u64 count; | ||
70 | u8 buffer[SHA1_BLOCK_SIZE]; | 70 | u8 buffer[SHA1_BLOCK_SIZE]; |
71 | }; | 71 | }; |
72 | 72 | ||
73 | struct sha256_state { | 73 | struct sha256_state { |
74 | u64 count; | ||
75 | u32 state[SHA256_DIGEST_SIZE / 4]; | 74 | u32 state[SHA256_DIGEST_SIZE / 4]; |
75 | u64 count; | ||
76 | u8 buf[SHA256_BLOCK_SIZE]; | 76 | u8 buf[SHA256_BLOCK_SIZE]; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | struct sha512_state { | 79 | struct sha512_state { |
80 | u64 count[2]; | ||
81 | u64 state[SHA512_DIGEST_SIZE / 8]; | 80 | u64 state[SHA512_DIGEST_SIZE / 8]; |
81 | u64 count[2]; | ||
82 | u8 buf[SHA512_BLOCK_SIZE]; | 82 | u8 buf[SHA512_BLOCK_SIZE]; |
83 | }; | 83 | }; |
84 | 84 | ||
@@ -87,9 +87,18 @@ struct shash_desc; | |||
87 | extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, | 87 | extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, |
88 | unsigned int len); | 88 | unsigned int len); |
89 | 89 | ||
90 | extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data, | ||
91 | unsigned int len, u8 *hash); | ||
92 | |||
90 | extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, | 93 | extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, |
91 | unsigned int len); | 94 | unsigned int len); |
92 | 95 | ||
96 | extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data, | ||
97 | unsigned int len, u8 *hash); | ||
98 | |||
93 | extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, | 99 | extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, |
94 | unsigned int len); | 100 | unsigned int len); |
101 | |||
102 | extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data, | ||
103 | unsigned int len, u8 *hash); | ||
95 | #endif | 104 | #endif |
diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h new file mode 100644 index 000000000000..d0df431f9a97 --- /dev/null +++ b/include/crypto/sha1_base.h | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | * sha1_base.h - core logic for SHA-1 implementations | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <linux/crypto.h> | ||
14 | #include <linux/module.h> | ||
15 | |||
16 | #include <asm/unaligned.h> | ||
17 | |||
18 | typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks); | ||
19 | |||
20 | static inline int sha1_base_init(struct shash_desc *desc) | ||
21 | { | ||
22 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
23 | |||
24 | sctx->state[0] = SHA1_H0; | ||
25 | sctx->state[1] = SHA1_H1; | ||
26 | sctx->state[2] = SHA1_H2; | ||
27 | sctx->state[3] = SHA1_H3; | ||
28 | sctx->state[4] = SHA1_H4; | ||
29 | sctx->count = 0; | ||
30 | |||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | static inline int sha1_base_do_update(struct shash_desc *desc, | ||
35 | const u8 *data, | ||
36 | unsigned int len, | ||
37 | sha1_block_fn *block_fn) | ||
38 | { | ||
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | |||
50 | memcpy(sctx->buffer + partial, data, p); | ||
51 | data += p; | ||
52 | len -= p; | ||
53 | |||
54 | block_fn(sctx, sctx->buffer, 1); | ||
55 | } | ||
56 | |||
57 | blocks = len / SHA1_BLOCK_SIZE; | ||
58 | len %= SHA1_BLOCK_SIZE; | ||
59 | |||
60 | if (blocks) { | ||
61 | block_fn(sctx, data, blocks); | ||
62 | data += blocks * SHA1_BLOCK_SIZE; | ||
63 | } | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static inline int sha1_base_do_finalize(struct shash_desc *desc, | ||
73 | sha1_block_fn *block_fn) | ||
74 | { | ||
75 | const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64); | ||
76 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
77 | __be64 *bits = (__be64 *)(sctx->buffer + bit_offset); | ||
78 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
79 | |||
80 | sctx->buffer[partial++] = 0x80; | ||
81 | if (partial > bit_offset) { | ||
82 | memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial); | ||
83 | partial = 0; | ||
84 | |||
85 | block_fn(sctx, sctx->buffer, 1); | ||
86 | } | ||
87 | |||
88 | memset(sctx->buffer + partial, 0x0, bit_offset - partial); | ||
89 | *bits = cpu_to_be64(sctx->count << 3); | ||
90 | block_fn(sctx, sctx->buffer, 1); | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | static inline int sha1_base_finish(struct shash_desc *desc, u8 *out) | ||
96 | { | ||
97 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
98 | __be32 *digest = (__be32 *)out; | ||
99 | int i; | ||
100 | |||
101 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
102 | put_unaligned_be32(sctx->state[i], digest++); | ||
103 | |||
104 | *sctx = (struct sha1_state){}; | ||
105 | return 0; | ||
106 | } | ||
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h new file mode 100644 index 000000000000..d1f2195bb7de --- /dev/null +++ b/include/crypto/sha256_base.h | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * sha256_base.h - core logic for SHA-256 implementations | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <linux/crypto.h> | ||
14 | #include <linux/module.h> | ||
15 | |||
16 | #include <asm/unaligned.h> | ||
17 | |||
18 | typedef void (sha256_block_fn)(struct sha256_state *sst, u8 const *src, | ||
19 | int blocks); | ||
20 | |||
21 | static inline int sha224_base_init(struct shash_desc *desc) | ||
22 | { | ||
23 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
24 | |||
25 | sctx->state[0] = SHA224_H0; | ||
26 | sctx->state[1] = SHA224_H1; | ||
27 | sctx->state[2] = SHA224_H2; | ||
28 | sctx->state[3] = SHA224_H3; | ||
29 | sctx->state[4] = SHA224_H4; | ||
30 | sctx->state[5] = SHA224_H5; | ||
31 | sctx->state[6] = SHA224_H6; | ||
32 | sctx->state[7] = SHA224_H7; | ||
33 | sctx->count = 0; | ||
34 | |||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static inline int sha256_base_init(struct shash_desc *desc) | ||
39 | { | ||
40 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
41 | |||
42 | sctx->state[0] = SHA256_H0; | ||
43 | sctx->state[1] = SHA256_H1; | ||
44 | sctx->state[2] = SHA256_H2; | ||
45 | sctx->state[3] = SHA256_H3; | ||
46 | sctx->state[4] = SHA256_H4; | ||
47 | sctx->state[5] = SHA256_H5; | ||
48 | sctx->state[6] = SHA256_H6; | ||
49 | sctx->state[7] = SHA256_H7; | ||
50 | sctx->count = 0; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static inline int sha256_base_do_update(struct shash_desc *desc, | ||
56 | const u8 *data, | ||
57 | unsigned int len, | ||
58 | sha256_block_fn *block_fn) | ||
59 | { | ||
60 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
61 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
62 | |||
63 | sctx->count += len; | ||
64 | |||
65 | if (unlikely((partial + len) >= SHA256_BLOCK_SIZE)) { | ||
66 | int blocks; | ||
67 | |||
68 | if (partial) { | ||
69 | int p = SHA256_BLOCK_SIZE - partial; | ||
70 | |||
71 | memcpy(sctx->buf + partial, data, p); | ||
72 | data += p; | ||
73 | len -= p; | ||
74 | |||
75 | block_fn(sctx, sctx->buf, 1); | ||
76 | } | ||
77 | |||
78 | blocks = len / SHA256_BLOCK_SIZE; | ||
79 | len %= SHA256_BLOCK_SIZE; | ||
80 | |||
81 | if (blocks) { | ||
82 | block_fn(sctx, data, blocks); | ||
83 | data += blocks * SHA256_BLOCK_SIZE; | ||
84 | } | ||
85 | partial = 0; | ||
86 | } | ||
87 | if (len) | ||
88 | memcpy(sctx->buf + partial, data, len); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static inline int sha256_base_do_finalize(struct shash_desc *desc, | ||
94 | sha256_block_fn *block_fn) | ||
95 | { | ||
96 | const int bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64); | ||
97 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
98 | __be64 *bits = (__be64 *)(sctx->buf + bit_offset); | ||
99 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
100 | |||
101 | sctx->buf[partial++] = 0x80; | ||
102 | if (partial > bit_offset) { | ||
103 | memset(sctx->buf + partial, 0x0, SHA256_BLOCK_SIZE - partial); | ||
104 | partial = 0; | ||
105 | |||
106 | block_fn(sctx, sctx->buf, 1); | ||
107 | } | ||
108 | |||
109 | memset(sctx->buf + partial, 0x0, bit_offset - partial); | ||
110 | *bits = cpu_to_be64(sctx->count << 3); | ||
111 | block_fn(sctx, sctx->buf, 1); | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static inline int sha256_base_finish(struct shash_desc *desc, u8 *out) | ||
117 | { | ||
118 | unsigned int digest_size = crypto_shash_digestsize(desc->tfm); | ||
119 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
120 | __be32 *digest = (__be32 *)out; | ||
121 | int i; | ||
122 | |||
123 | for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be32)) | ||
124 | put_unaligned_be32(sctx->state[i], digest++); | ||
125 | |||
126 | *sctx = (struct sha256_state){}; | ||
127 | return 0; | ||
128 | } | ||
diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h new file mode 100644 index 000000000000..6c5341e005ea --- /dev/null +++ b/include/crypto/sha512_base.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | * sha512_base.h - core logic for SHA-512 implementations | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <linux/crypto.h> | ||
14 | #include <linux/module.h> | ||
15 | |||
16 | #include <asm/unaligned.h> | ||
17 | |||
18 | typedef void (sha512_block_fn)(struct sha512_state *sst, u8 const *src, | ||
19 | int blocks); | ||
20 | |||
21 | static inline int sha384_base_init(struct shash_desc *desc) | ||
22 | { | ||
23 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
24 | |||
25 | sctx->state[0] = SHA384_H0; | ||
26 | sctx->state[1] = SHA384_H1; | ||
27 | sctx->state[2] = SHA384_H2; | ||
28 | sctx->state[3] = SHA384_H3; | ||
29 | sctx->state[4] = SHA384_H4; | ||
30 | sctx->state[5] = SHA384_H5; | ||
31 | sctx->state[6] = SHA384_H6; | ||
32 | sctx->state[7] = SHA384_H7; | ||
33 | sctx->count[0] = sctx->count[1] = 0; | ||
34 | |||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static inline int sha512_base_init(struct shash_desc *desc) | ||
39 | { | ||
40 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
41 | |||
42 | sctx->state[0] = SHA512_H0; | ||
43 | sctx->state[1] = SHA512_H1; | ||
44 | sctx->state[2] = SHA512_H2; | ||
45 | sctx->state[3] = SHA512_H3; | ||
46 | sctx->state[4] = SHA512_H4; | ||
47 | sctx->state[5] = SHA512_H5; | ||
48 | sctx->state[6] = SHA512_H6; | ||
49 | sctx->state[7] = SHA512_H7; | ||
50 | sctx->count[0] = sctx->count[1] = 0; | ||
51 | |||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static inline int sha512_base_do_update(struct shash_desc *desc, | ||
56 | const u8 *data, | ||
57 | unsigned int len, | ||
58 | sha512_block_fn *block_fn) | ||
59 | { | ||
60 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
61 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
62 | |||
63 | sctx->count[0] += len; | ||
64 | if (sctx->count[0] < len) | ||
65 | sctx->count[1]++; | ||
66 | |||
67 | if (unlikely((partial + len) >= SHA512_BLOCK_SIZE)) { | ||
68 | int blocks; | ||
69 | |||
70 | if (partial) { | ||
71 | int p = SHA512_BLOCK_SIZE - partial; | ||
72 | |||
73 | memcpy(sctx->buf + partial, data, p); | ||
74 | data += p; | ||
75 | len -= p; | ||
76 | |||
77 | block_fn(sctx, sctx->buf, 1); | ||
78 | } | ||
79 | |||
80 | blocks = len / SHA512_BLOCK_SIZE; | ||
81 | len %= SHA512_BLOCK_SIZE; | ||
82 | |||
83 | if (blocks) { | ||
84 | block_fn(sctx, data, blocks); | ||
85 | data += blocks * SHA512_BLOCK_SIZE; | ||
86 | } | ||
87 | partial = 0; | ||
88 | } | ||
89 | if (len) | ||
90 | memcpy(sctx->buf + partial, data, len); | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | static inline int sha512_base_do_finalize(struct shash_desc *desc, | ||
96 | sha512_block_fn *block_fn) | ||
97 | { | ||
98 | const int bit_offset = SHA512_BLOCK_SIZE - sizeof(__be64[2]); | ||
99 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
100 | __be64 *bits = (__be64 *)(sctx->buf + bit_offset); | ||
101 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
102 | |||
103 | sctx->buf[partial++] = 0x80; | ||
104 | if (partial > bit_offset) { | ||
105 | memset(sctx->buf + partial, 0x0, SHA512_BLOCK_SIZE - partial); | ||
106 | partial = 0; | ||
107 | |||
108 | block_fn(sctx, sctx->buf, 1); | ||
109 | } | ||
110 | |||
111 | memset(sctx->buf + partial, 0x0, bit_offset - partial); | ||
112 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
113 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
114 | block_fn(sctx, sctx->buf, 1); | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static inline int sha512_base_finish(struct shash_desc *desc, u8 *out) | ||
120 | { | ||
121 | unsigned int digest_size = crypto_shash_digestsize(desc->tfm); | ||
122 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
123 | __be64 *digest = (__be64 *)out; | ||
124 | int i; | ||
125 | |||
126 | for (i = 0; digest_size > 0; i++, digest_size -= sizeof(__be64)) | ||
127 | put_unaligned_be64(sctx->state[i], digest++); | ||
128 | |||
129 | *sctx = (struct sha512_state){}; | ||
130 | return 0; | ||
131 | } | ||
diff --git a/include/linux/crypto.h b/include/linux/crypto.h index fb5ef16d6a12..10df5d2d093a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h | |||
@@ -95,6 +95,12 @@ | |||
95 | #define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 | 95 | #define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * Mark a cipher as a service implementation only usable by another | ||
99 | * cipher and never by a normal user of the kernel crypto API | ||
100 | */ | ||
101 | #define CRYPTO_ALG_INTERNAL 0x00002000 | ||
102 | |||
103 | /* | ||
98 | * Transform masks and values (for crt_flags). | 104 | * Transform masks and values (for crt_flags). |
99 | */ | 105 | */ |
100 | #define CRYPTO_TFM_REQ_MASK 0x000fff00 | 106 | #define CRYPTO_TFM_REQ_MASK 0x000fff00 |
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index eb7b414d232b..4f7d8f4b1e9a 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h | |||
@@ -50,10 +50,14 @@ struct hwrng { | |||
50 | struct completion cleanup_done; | 50 | struct completion cleanup_done; |
51 | }; | 51 | }; |
52 | 52 | ||
53 | struct device; | ||
54 | |||
53 | /** Register a new Hardware Random Number Generator driver. */ | 55 | /** Register a new Hardware Random Number Generator driver. */ |
54 | extern int hwrng_register(struct hwrng *rng); | 56 | extern int hwrng_register(struct hwrng *rng); |
57 | extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); | ||
55 | /** Unregister a Hardware Random Number Generator driver. */ | 58 | /** Unregister a Hardware Random Number Generator driver. */ |
56 | extern void hwrng_unregister(struct hwrng *rng); | 59 | extern void hwrng_unregister(struct hwrng *rng); |
60 | extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng); | ||
57 | /** Feed random bits into the pool. */ | 61 | /** Feed random bits into the pool. */ |
58 | extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); | 62 | extern void add_hwgenerator_randomness(const char *buffer, size_t count, size_t entropy); |
59 | 63 | ||
diff --git a/lib/string.c b/lib/string.c index ce81aaec3839..a5792019193c 100644 --- a/lib/string.c +++ b/lib/string.c | |||
@@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset); | |||
607 | void memzero_explicit(void *s, size_t count) | 607 | void memzero_explicit(void *s, size_t count) |
608 | { | 608 | { |
609 | memset(s, 0, count); | 609 | memset(s, 0, count); |
610 | OPTIMIZER_HIDE_VAR(s); | 610 | barrier(); |
611 | } | 611 | } |
612 | EXPORT_SYMBOL(memzero_explicit); | 612 | EXPORT_SYMBOL(memzero_explicit); |
613 | 613 | ||