diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-23 21:11:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-23 21:11:00 -0500 |
commit | 13c789a6b219aa23f917466c7e630566106b14c2 (patch) | |
tree | ad9e096ded01f433306bcd40af3a3f8dc1ddea6f | |
parent | 6dd9158ae8577372aa433e6b0eae3c3d4caa5439 (diff) | |
parent | 79ba451d66ca8402c8d052ceb50e359ddc5e1161 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 3.14:
- Improved crypto_memneq helper
- Use cyprto_memneq in arch-specific crypto code
- Replaced orphaned DCP driver with Freescale MXS DCP driver
- Added AVX/AVX2 version of AESNI-GCM encode and decode
- Added AMD Cryptographic Coprocessor (CCP) driver
- Misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (41 commits)
crypto: aesni - fix build on x86 (32bit)
crypto: mxs - Fix sparse non static symbol warning
crypto: ccp - CCP device enabled/disabled changes
crypto: ccp - Cleanup hash invocation calls
crypto: ccp - Change data length declarations to u64
crypto: ccp - Check for caller result area before using it
crypto: ccp - Cleanup scatterlist usage
crypto: ccp - Apply appropriate gfp_t type to memory allocations
crypto: drivers - Sort drivers/crypto/Makefile
ARM: mxs: dts: Enable DCP for MXS
crypto: mxs - Add Freescale MXS DCP driver
crypto: mxs - Remove the old DCP driver
crypto: ahash - Fully restore ahash request before completing
crypto: aesni - fix build on x86 (32bit)
crypto: talitos - Remove redundant dev_set_drvdata
crypto: ccp - Remove redundant dev_set_drvdata
crypto: crypto4xx - Remove redundant dev_set_drvdata
crypto: caam - simplify and harden key parsing
crypto: omap-sham - Fix Polling mode for larger blocks
crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite
...
40 files changed, 10527 insertions, 1009 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt new file mode 100644 index 000000000000..6949e50f1f16 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt | |||
@@ -0,0 +1,17 @@ | |||
1 | Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28 . | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : Should be "fsl,<soc>-dcp" | ||
5 | - reg : Should contain MXS DCP registers location and length | ||
6 | - interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ | ||
7 | must be supplied, optionally Secure IRQ can be present, but | ||
8 | is currently not implemented and not used. | ||
9 | |||
10 | Example: | ||
11 | |||
12 | dcp@80028000 { | ||
13 | compatible = "fsl,imx28-dcp", "fsl,imx23-dcp"; | ||
14 | reg = <0x80028000 0x2000>; | ||
15 | interrupts = <52 53>; | ||
16 | status = "okay"; | ||
17 | }; | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 3229945a96b3..0e13d692b176 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -538,6 +538,13 @@ F: drivers/tty/serial/altera_jtaguart.c | |||
538 | F: include/linux/altera_uart.h | 538 | F: include/linux/altera_uart.h |
539 | F: include/linux/altera_jtaguart.h | 539 | F: include/linux/altera_jtaguart.h |
540 | 540 | ||
541 | AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER | ||
542 | M: Tom Lendacky <thomas.lendacky@amd.com> | ||
543 | L: linux-crypto@vger.kernel.org | ||
544 | S: Supported | ||
545 | F: drivers/crypto/ccp/ | ||
546 | F: include/linux/ccp.h | ||
547 | |||
541 | AMD FAM15H PROCESSOR POWER MONITORING DRIVER | 548 | AMD FAM15H PROCESSOR POWER MONITORING DRIVER |
542 | M: Andreas Herrmann <herrmann.der.user@googlemail.com> | 549 | M: Andreas Herrmann <herrmann.der.user@googlemail.com> |
543 | L: lm-sensors@lm-sensors.org | 550 | L: lm-sensors@lm-sensors.org |
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index c96ceaef7ddf..581b75433be6 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi | |||
@@ -337,8 +337,10 @@ | |||
337 | }; | 337 | }; |
338 | 338 | ||
339 | dcp@80028000 { | 339 | dcp@80028000 { |
340 | compatible = "fsl,imx23-dcp"; | ||
340 | reg = <0x80028000 0x2000>; | 341 | reg = <0x80028000 0x2000>; |
341 | status = "disabled"; | 342 | interrupts = <53 54>; |
343 | status = "okay"; | ||
342 | }; | 344 | }; |
343 | 345 | ||
344 | pxp@8002a000 { | 346 | pxp@8002a000 { |
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi index cda19c8b0a47..f8e9b20f6982 100644 --- a/arch/arm/boot/dts/imx28.dtsi +++ b/arch/arm/boot/dts/imx28.dtsi | |||
@@ -813,9 +813,10 @@ | |||
813 | }; | 813 | }; |
814 | 814 | ||
815 | dcp: dcp@80028000 { | 815 | dcp: dcp@80028000 { |
816 | compatible = "fsl,imx28-dcp", "fsl,imx23-dcp"; | ||
816 | reg = <0x80028000 0x2000>; | 817 | reg = <0x80028000 0x2000>; |
817 | interrupts = <52 53 54>; | 818 | interrupts = <52 53 54>; |
818 | compatible = "fsl-dcp"; | 819 | status = "okay"; |
819 | }; | 820 | }; |
820 | 821 | ||
821 | pxp: pxp@8002a000 { | 822 | pxp: pxp@8002a000 { |
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index bcca01c9989d..200f2a1b599d 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c | |||
@@ -237,9 +237,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
237 | struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); | 237 | struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); |
238 | u32 *flags = &tfm->crt_flags; | 238 | u32 *flags = &tfm->crt_flags; |
239 | 239 | ||
240 | if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && | 240 | if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && |
241 | memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], | 241 | crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], |
242 | DES_KEY_SIZE)) && | 242 | DES_KEY_SIZE)) && |
243 | (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { | 243 | (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { |
244 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; | 244 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; |
245 | return -EINVAL; | 245 | return -EINVAL; |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0fc24db234a..6ba54d640383 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes) | |||
76 | endif | 76 | endif |
77 | 77 | ||
78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
79 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o | ||
79 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 80 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
80 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 81 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
81 | crc32c-intel-y := crc32c-intel_glue.o | 82 | crc32c-intel-y := crc32c-intel_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S new file mode 100644 index 000000000000..522ab68d1c88 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S | |||
@@ -0,0 +1,2811 @@ | |||
1 | ######################################################################## | ||
2 | # Copyright (c) 2013, Intel Corporation | ||
3 | # | ||
4 | # This software is available to you under a choice of one of two | ||
5 | # licenses. You may choose to be licensed under the terms of the GNU | ||
6 | # General Public License (GPL) Version 2, available from the file | ||
7 | # COPYING in the main directory of this source tree, or the | ||
8 | # OpenIB.org BSD license below: | ||
9 | # | ||
10 | # Redistribution and use in source and binary forms, with or without | ||
11 | # modification, are permitted provided that the following conditions are | ||
12 | # met: | ||
13 | # | ||
14 | # * Redistributions of source code must retain the above copyright | ||
15 | # notice, this list of conditions and the following disclaimer. | ||
16 | # | ||
17 | # * Redistributions in binary form must reproduce the above copyright | ||
18 | # notice, this list of conditions and the following disclaimer in the | ||
19 | # documentation and/or other materials provided with the | ||
20 | # distribution. | ||
21 | # | ||
22 | # * Neither the name of the Intel Corporation nor the names of its | ||
23 | # contributors may be used to endorse or promote products derived from | ||
24 | # this software without specific prior written permission. | ||
25 | # | ||
26 | # | ||
27 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
28 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
29 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
30 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
31 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
32 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
33 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR | ||
34 | # PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | ######################################################################## | ||
39 | ## | ||
40 | ## Authors: | ||
41 | ## Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
42 | ## Vinodh Gopal <vinodh.gopal@intel.com> | ||
43 | ## James Guilford <james.guilford@intel.com> | ||
44 | ## Tim Chen <tim.c.chen@linux.intel.com> | ||
45 | ## | ||
46 | ## References: | ||
47 | ## This code was derived and highly optimized from the code described in paper: | ||
48 | ## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation | ||
49 | ## on Intel Architecture Processors. August, 2010 | ||
50 | ## The details of the implementation is explained in: | ||
51 | ## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode | ||
52 | ## on Intel Architecture Processors. October, 2012. | ||
53 | ## | ||
54 | ## Assumptions: | ||
55 | ## | ||
56 | ## | ||
57 | ## | ||
58 | ## iv: | ||
59 | ## 0 1 2 3 | ||
60 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
61 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
62 | ## | Salt (From the SA) | | ||
63 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
64 | ## | Initialization Vector | | ||
65 | ## | (This is the sequence number from IPSec header) | | ||
66 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
67 | ## | 0x1 | | ||
68 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
69 | ## | ||
70 | ## | ||
71 | ## | ||
72 | ## AAD: | ||
73 | ## AAD padded to 128 bits with 0 | ||
74 | ## for example, assume AAD is a u32 vector | ||
75 | ## | ||
76 | ## if AAD is 8 bytes: | ||
77 | ## AAD[3] = {A0, A1}# | ||
78 | ## padded AAD in xmm register = {A1 A0 0 0} | ||
79 | ## | ||
80 | ## 0 1 2 3 | ||
81 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
82 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
83 | ## | SPI (A1) | | ||
84 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
85 | ## | 32-bit Sequence Number (A0) | | ||
86 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
87 | ## | 0x0 | | ||
88 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
89 | ## | ||
90 | ## AAD Format with 32-bit Sequence Number | ||
91 | ## | ||
92 | ## if AAD is 12 bytes: | ||
93 | ## AAD[3] = {A0, A1, A2}# | ||
94 | ## padded AAD in xmm register = {A2 A1 A0 0} | ||
95 | ## | ||
96 | ## 0 1 2 3 | ||
97 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
98 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
99 | ## | SPI (A2) | | ||
100 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
101 | ## | 64-bit Extended Sequence Number {A1,A0} | | ||
102 | ## | | | ||
103 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | ## | 0x0 | | ||
105 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | ## | ||
107 | ## AAD Format with 64-bit Extended Sequence Number | ||
108 | ## | ||
109 | ## | ||
110 | ## aadLen: | ||
111 | ## from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
112 | ## The code additionally supports aadLen of length 16 bytes. | ||
113 | ## | ||
114 | ## TLen: | ||
115 | ## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
116 | ## | ||
117 | ## poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
118 | ## throughout the code, one tab and two tab indentations are used. one tab is | ||
119 | ## for GHASH part, two tabs is for AES part. | ||
120 | ## | ||
121 | |||
122 | #include <linux/linkage.h> | ||
123 | #include <asm/inst.h> | ||
124 | |||
125 | .data | ||
126 | .align 16 | ||
127 | |||
128 | POLY: .octa 0xC2000000000000000000000000000001 | ||
129 | POLY2: .octa 0xC20000000000000000000001C2000000 | ||
130 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
131 | |||
132 | # order of these constants should not change. | ||
133 | # more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | ||
134 | |||
135 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
136 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
137 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
138 | ZERO: .octa 0x00000000000000000000000000000000 | ||
139 | ONE: .octa 0x00000000000000000000000000000001 | ||
140 | ONEf: .octa 0x01000000000000000000000000000000 | ||
141 | |||
142 | .text | ||
143 | |||
144 | |||
145 | ##define the fields of the gcm aes context | ||
146 | #{ | ||
147 | # u8 expanded_keys[16*11] store expanded keys | ||
148 | # u8 shifted_hkey_1[16] store HashKey <<1 mod poly here | ||
149 | # u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here | ||
150 | # u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here | ||
151 | # u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here | ||
152 | # u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here | ||
153 | # u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here | ||
154 | # u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here | ||
155 | # u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here | ||
156 | # u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes) | ||
157 | # u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
158 | # u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
159 | # u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
160 | # u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
161 | # u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
162 | # u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
163 | # u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
164 | #} gcm_ctx# | ||
165 | |||
166 | HashKey = 16*11 # store HashKey <<1 mod poly here | ||
167 | HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here | ||
168 | HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here | ||
169 | HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here | ||
170 | HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here | ||
171 | HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here | ||
172 | HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here | ||
173 | HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here | ||
174 | HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) | ||
175 | HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
176 | HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
177 | HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
178 | HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
179 | HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
180 | HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
181 | HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
182 | |||
183 | #define arg1 %rdi | ||
184 | #define arg2 %rsi | ||
185 | #define arg3 %rdx | ||
186 | #define arg4 %rcx | ||
187 | #define arg5 %r8 | ||
188 | #define arg6 %r9 | ||
189 | #define arg7 STACK_OFFSET+8*1(%r14) | ||
190 | #define arg8 STACK_OFFSET+8*2(%r14) | ||
191 | #define arg9 STACK_OFFSET+8*3(%r14) | ||
192 | |||
193 | i = 0 | ||
194 | j = 0 | ||
195 | |||
196 | out_order = 0 | ||
197 | in_order = 1 | ||
198 | DEC = 0 | ||
199 | ENC = 1 | ||
200 | |||
201 | .macro define_reg r n | ||
202 | reg_\r = %xmm\n | ||
203 | .endm | ||
204 | |||
205 | .macro setreg | ||
206 | .altmacro | ||
207 | define_reg i %i | ||
208 | define_reg j %j | ||
209 | .noaltmacro | ||
210 | .endm | ||
211 | |||
212 | # need to push 4 registers into stack to maintain | ||
213 | STACK_OFFSET = 8*4 | ||
214 | |||
215 | TMP1 = 16*0 # Temporary storage for AAD | ||
216 | TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) | ||
217 | TMP3 = 16*2 # Temporary storage for AES State 3 | ||
218 | TMP4 = 16*3 # Temporary storage for AES State 4 | ||
219 | TMP5 = 16*4 # Temporary storage for AES State 5 | ||
220 | TMP6 = 16*5 # Temporary storage for AES State 6 | ||
221 | TMP7 = 16*6 # Temporary storage for AES State 7 | ||
222 | TMP8 = 16*7 # Temporary storage for AES State 8 | ||
223 | |||
224 | VARIABLE_OFFSET = 16*8 | ||
225 | |||
226 | ################################ | ||
227 | # Utility Macros | ||
228 | ################################ | ||
229 | |||
230 | # Encryption of a single block | ||
231 | .macro ENCRYPT_SINGLE_BLOCK XMM0 | ||
232 | vpxor (arg1), \XMM0, \XMM0 | ||
233 | i = 1 | ||
234 | setreg | ||
235 | .rep 9 | ||
236 | vaesenc 16*i(arg1), \XMM0, \XMM0 | ||
237 | i = (i+1) | ||
238 | setreg | ||
239 | .endr | ||
240 | vaesenclast 16*10(arg1), \XMM0, \XMM0 | ||
241 | .endm | ||
242 | |||
243 | #ifdef CONFIG_AS_AVX | ||
244 | ############################################################################### | ||
245 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
246 | # Input: A and B (128-bits each, bit-reflected) | ||
247 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
248 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
249 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
250 | ############################################################################### | ||
251 | .macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 | ||
252 | |||
253 | vpshufd $0b01001110, \GH, \T2 | ||
254 | vpshufd $0b01001110, \HK, \T3 | ||
255 | vpxor \GH , \T2, \T2 # T2 = (a1+a0) | ||
256 | vpxor \HK , \T3, \T3 # T3 = (b1+b0) | ||
257 | |||
258 | vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 | ||
259 | vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 | ||
260 | vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) | ||
261 | vpxor \GH, \T2,\T2 | ||
262 | vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 | ||
263 | |||
264 | vpslldq $8, \T2,\T3 # shift-L T3 2 DWs | ||
265 | vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs | ||
266 | vpxor \T3, \GH, \GH | ||
267 | vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK | ||
268 | |||
269 | #first phase of the reduction | ||
270 | vpslld $31, \GH, \T2 # packed right shifting << 31 | ||
271 | vpslld $30, \GH, \T3 # packed right shifting shift << 30 | ||
272 | vpslld $25, \GH, \T4 # packed right shifting shift << 25 | ||
273 | |||
274 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
275 | vpxor \T4, \T2, \T2 | ||
276 | |||
277 | vpsrldq $4, \T2, \T5 # shift-R T5 1 DW | ||
278 | |||
279 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
280 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
281 | |||
282 | #second phase of the reduction | ||
283 | |||
284 | vpsrld $1,\GH, \T2 # packed left shifting >> 1 | ||
285 | vpsrld $2,\GH, \T3 # packed left shifting >> 2 | ||
286 | vpsrld $7,\GH, \T4 # packed left shifting >> 7 | ||
287 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
288 | vpxor \T4, \T2, \T2 | ||
289 | |||
290 | vpxor \T5, \T2, \T2 | ||
291 | vpxor \T2, \GH, \GH | ||
292 | vpxor \T1, \GH, \GH # the result is in GH | ||
293 | |||
294 | |||
295 | .endm | ||
296 | |||
297 | .macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 | ||
298 | |||
299 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
300 | vmovdqa \HK, \T5 | ||
301 | |||
302 | vpshufd $0b01001110, \T5, \T1 | ||
303 | vpxor \T5, \T1, \T1 | ||
304 | vmovdqa \T1, HashKey_k(arg1) | ||
305 | |||
306 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
307 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
308 | vpshufd $0b01001110, \T5, \T1 | ||
309 | vpxor \T5, \T1, \T1 | ||
310 | vmovdqa \T1, HashKey_2_k(arg1) | ||
311 | |||
312 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
313 | vmovdqa \T5, HashKey_3(arg1) | ||
314 | vpshufd $0b01001110, \T5, \T1 | ||
315 | vpxor \T5, \T1, \T1 | ||
316 | vmovdqa \T1, HashKey_3_k(arg1) | ||
317 | |||
318 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
319 | vmovdqa \T5, HashKey_4(arg1) | ||
320 | vpshufd $0b01001110, \T5, \T1 | ||
321 | vpxor \T5, \T1, \T1 | ||
322 | vmovdqa \T1, HashKey_4_k(arg1) | ||
323 | |||
324 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
325 | vmovdqa \T5, HashKey_5(arg1) | ||
326 | vpshufd $0b01001110, \T5, \T1 | ||
327 | vpxor \T5, \T1, \T1 | ||
328 | vmovdqa \T1, HashKey_5_k(arg1) | ||
329 | |||
330 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
331 | vmovdqa \T5, HashKey_6(arg1) | ||
332 | vpshufd $0b01001110, \T5, \T1 | ||
333 | vpxor \T5, \T1, \T1 | ||
334 | vmovdqa \T1, HashKey_6_k(arg1) | ||
335 | |||
336 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
337 | vmovdqa \T5, HashKey_7(arg1) | ||
338 | vpshufd $0b01001110, \T5, \T1 | ||
339 | vpxor \T5, \T1, \T1 | ||
340 | vmovdqa \T1, HashKey_7_k(arg1) | ||
341 | |||
342 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
343 | vmovdqa \T5, HashKey_8(arg1) | ||
344 | vpshufd $0b01001110, \T5, \T1 | ||
345 | vpxor \T5, \T1, \T1 | ||
346 | vmovdqa \T1, HashKey_8_k(arg1) | ||
347 | |||
348 | .endm | ||
349 | |||
350 | ## if a = number of total plaintext bytes | ||
351 | ## b = floor(a/16) | ||
352 | ## num_initial_blocks = b mod 4# | ||
353 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
354 | ## r10, r11, r12, rax are clobbered | ||
355 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
356 | |||
357 | .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC | ||
358 | i = (8-\num_initial_blocks) | ||
359 | setreg | ||
360 | |||
361 | mov arg6, %r10 # r10 = AAD | ||
362 | mov arg7, %r12 # r12 = aadLen | ||
363 | |||
364 | |||
365 | mov %r12, %r11 | ||
366 | |||
367 | vpxor reg_i, reg_i, reg_i | ||
368 | _get_AAD_loop\@: | ||
369 | vmovd (%r10), \T1 | ||
370 | vpslldq $12, \T1, \T1 | ||
371 | vpsrldq $4, reg_i, reg_i | ||
372 | vpxor \T1, reg_i, reg_i | ||
373 | |||
374 | add $4, %r10 | ||
375 | sub $4, %r12 | ||
376 | jg _get_AAD_loop\@ | ||
377 | |||
378 | |||
379 | cmp $16, %r11 | ||
380 | je _get_AAD_loop2_done\@ | ||
381 | mov $16, %r12 | ||
382 | |||
383 | _get_AAD_loop2\@: | ||
384 | vpsrldq $4, reg_i, reg_i | ||
385 | sub $4, %r12 | ||
386 | cmp %r11, %r12 | ||
387 | jg _get_AAD_loop2\@ | ||
388 | |||
389 | _get_AAD_loop2_done\@: | ||
390 | |||
391 | #byte-reflect the AAD data | ||
392 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
393 | |||
394 | # initialize the data pointer offset as zero | ||
395 | xor %r11, %r11 | ||
396 | |||
397 | # start AES for num_initial_blocks blocks | ||
398 | mov arg5, %rax # rax = *Y0 | ||
399 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
400 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
401 | |||
402 | |||
403 | i = (9-\num_initial_blocks) | ||
404 | setreg | ||
405 | .rep \num_initial_blocks | ||
406 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
407 | vmovdqa \CTR, reg_i | ||
408 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
409 | i = (i+1) | ||
410 | setreg | ||
411 | .endr | ||
412 | |||
413 | vmovdqa (arg1), \T_key | ||
414 | i = (9-\num_initial_blocks) | ||
415 | setreg | ||
416 | .rep \num_initial_blocks | ||
417 | vpxor \T_key, reg_i, reg_i | ||
418 | i = (i+1) | ||
419 | setreg | ||
420 | .endr | ||
421 | |||
422 | j = 1 | ||
423 | setreg | ||
424 | .rep 9 | ||
425 | vmovdqa 16*j(arg1), \T_key | ||
426 | i = (9-\num_initial_blocks) | ||
427 | setreg | ||
428 | .rep \num_initial_blocks | ||
429 | vaesenc \T_key, reg_i, reg_i | ||
430 | i = (i+1) | ||
431 | setreg | ||
432 | .endr | ||
433 | |||
434 | j = (j+1) | ||
435 | setreg | ||
436 | .endr | ||
437 | |||
438 | |||
439 | vmovdqa 16*10(arg1), \T_key | ||
440 | i = (9-\num_initial_blocks) | ||
441 | setreg | ||
442 | .rep \num_initial_blocks | ||
443 | vaesenclast \T_key, reg_i, reg_i | ||
444 | i = (i+1) | ||
445 | setreg | ||
446 | .endr | ||
447 | |||
448 | i = (9-\num_initial_blocks) | ||
449 | setreg | ||
450 | .rep \num_initial_blocks | ||
451 | vmovdqu (arg3, %r11), \T1 | ||
452 | vpxor \T1, reg_i, reg_i | ||
453 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks | ||
454 | add $16, %r11 | ||
455 | .if \ENC_DEC == DEC | ||
456 | vmovdqa \T1, reg_i | ||
457 | .endif | ||
458 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
459 | i = (i+1) | ||
460 | setreg | ||
461 | .endr | ||
462 | |||
463 | |||
464 | i = (8-\num_initial_blocks) | ||
465 | j = (9-\num_initial_blocks) | ||
466 | setreg | ||
467 | GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
468 | |||
469 | .rep \num_initial_blocks | ||
470 | vpxor reg_i, reg_j, reg_j | ||
471 | GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
472 | i = (i+1) | ||
473 | j = (j+1) | ||
474 | setreg | ||
475 | .endr | ||
476 | # XMM8 has the combined result here | ||
477 | |||
478 | vmovdqa \XMM8, TMP1(%rsp) | ||
479 | vmovdqa \XMM8, \T3 | ||
480 | |||
481 | cmp $128, %r13 | ||
482 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
483 | |||
484 | ############################################################################### | ||
485 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
486 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
487 | vmovdqa \CTR, \XMM1 | ||
488 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
489 | |||
490 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
491 | vmovdqa \CTR, \XMM2 | ||
492 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
493 | |||
494 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
495 | vmovdqa \CTR, \XMM3 | ||
496 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
497 | |||
498 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
499 | vmovdqa \CTR, \XMM4 | ||
500 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
501 | |||
502 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
503 | vmovdqa \CTR, \XMM5 | ||
504 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
505 | |||
506 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
507 | vmovdqa \CTR, \XMM6 | ||
508 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
509 | |||
510 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
511 | vmovdqa \CTR, \XMM7 | ||
512 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
513 | |||
514 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
515 | vmovdqa \CTR, \XMM8 | ||
516 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
517 | |||
518 | vmovdqa (arg1), \T_key | ||
519 | vpxor \T_key, \XMM1, \XMM1 | ||
520 | vpxor \T_key, \XMM2, \XMM2 | ||
521 | vpxor \T_key, \XMM3, \XMM3 | ||
522 | vpxor \T_key, \XMM4, \XMM4 | ||
523 | vpxor \T_key, \XMM5, \XMM5 | ||
524 | vpxor \T_key, \XMM6, \XMM6 | ||
525 | vpxor \T_key, \XMM7, \XMM7 | ||
526 | vpxor \T_key, \XMM8, \XMM8 | ||
527 | |||
528 | i = 1 | ||
529 | setreg | ||
530 | .rep 9 # do 9 rounds | ||
531 | vmovdqa 16*i(arg1), \T_key | ||
532 | vaesenc \T_key, \XMM1, \XMM1 | ||
533 | vaesenc \T_key, \XMM2, \XMM2 | ||
534 | vaesenc \T_key, \XMM3, \XMM3 | ||
535 | vaesenc \T_key, \XMM4, \XMM4 | ||
536 | vaesenc \T_key, \XMM5, \XMM5 | ||
537 | vaesenc \T_key, \XMM6, \XMM6 | ||
538 | vaesenc \T_key, \XMM7, \XMM7 | ||
539 | vaesenc \T_key, \XMM8, \XMM8 | ||
540 | i = (i+1) | ||
541 | setreg | ||
542 | .endr | ||
543 | |||
544 | |||
545 | vmovdqa 16*i(arg1), \T_key | ||
546 | vaesenclast \T_key, \XMM1, \XMM1 | ||
547 | vaesenclast \T_key, \XMM2, \XMM2 | ||
548 | vaesenclast \T_key, \XMM3, \XMM3 | ||
549 | vaesenclast \T_key, \XMM4, \XMM4 | ||
550 | vaesenclast \T_key, \XMM5, \XMM5 | ||
551 | vaesenclast \T_key, \XMM6, \XMM6 | ||
552 | vaesenclast \T_key, \XMM7, \XMM7 | ||
553 | vaesenclast \T_key, \XMM8, \XMM8 | ||
554 | |||
555 | vmovdqu (arg3, %r11), \T1 | ||
556 | vpxor \T1, \XMM1, \XMM1 | ||
557 | vmovdqu \XMM1, (arg2 , %r11) | ||
558 | .if \ENC_DEC == DEC | ||
559 | vmovdqa \T1, \XMM1 | ||
560 | .endif | ||
561 | |||
562 | vmovdqu 16*1(arg3, %r11), \T1 | ||
563 | vpxor \T1, \XMM2, \XMM2 | ||
564 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
565 | .if \ENC_DEC == DEC | ||
566 | vmovdqa \T1, \XMM2 | ||
567 | .endif | ||
568 | |||
569 | vmovdqu 16*2(arg3, %r11), \T1 | ||
570 | vpxor \T1, \XMM3, \XMM3 | ||
571 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
572 | .if \ENC_DEC == DEC | ||
573 | vmovdqa \T1, \XMM3 | ||
574 | .endif | ||
575 | |||
576 | vmovdqu 16*3(arg3, %r11), \T1 | ||
577 | vpxor \T1, \XMM4, \XMM4 | ||
578 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
579 | .if \ENC_DEC == DEC | ||
580 | vmovdqa \T1, \XMM4 | ||
581 | .endif | ||
582 | |||
583 | vmovdqu 16*4(arg3, %r11), \T1 | ||
584 | vpxor \T1, \XMM5, \XMM5 | ||
585 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
586 | .if \ENC_DEC == DEC | ||
587 | vmovdqa \T1, \XMM5 | ||
588 | .endif | ||
589 | |||
590 | vmovdqu 16*5(arg3, %r11), \T1 | ||
591 | vpxor \T1, \XMM6, \XMM6 | ||
592 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
593 | .if \ENC_DEC == DEC | ||
594 | vmovdqa \T1, \XMM6 | ||
595 | .endif | ||
596 | |||
597 | vmovdqu 16*6(arg3, %r11), \T1 | ||
598 | vpxor \T1, \XMM7, \XMM7 | ||
599 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
600 | .if \ENC_DEC == DEC | ||
601 | vmovdqa \T1, \XMM7 | ||
602 | .endif | ||
603 | |||
604 | vmovdqu 16*7(arg3, %r11), \T1 | ||
605 | vpxor \T1, \XMM8, \XMM8 | ||
606 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
607 | .if \ENC_DEC == DEC | ||
608 | vmovdqa \T1, \XMM8 | ||
609 | .endif | ||
610 | |||
611 | add $128, %r11 | ||
612 | |||
613 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
614 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext | ||
615 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
616 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
617 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
618 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
619 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
620 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
621 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
622 | |||
623 | ############################################################################### | ||
624 | |||
625 | _initial_blocks_done\@: | ||
626 | |||
627 | .endm | ||
628 | |||
629 | # encrypt 8 blocks at a time | ||
630 | # ghash the 8 previously encrypted ciphertext blocks | ||
631 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
632 | # r11 is the data offset value | ||
633 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
634 | |||
635 | vmovdqa \XMM1, \T2 | ||
636 | vmovdqa \XMM2, TMP2(%rsp) | ||
637 | vmovdqa \XMM3, TMP3(%rsp) | ||
638 | vmovdqa \XMM4, TMP4(%rsp) | ||
639 | vmovdqa \XMM5, TMP5(%rsp) | ||
640 | vmovdqa \XMM6, TMP6(%rsp) | ||
641 | vmovdqa \XMM7, TMP7(%rsp) | ||
642 | vmovdqa \XMM8, TMP8(%rsp) | ||
643 | |||
644 | .if \loop_idx == in_order | ||
645 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
646 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
647 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
648 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
649 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
650 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
651 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
652 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
653 | vmovdqa \XMM8, \CTR | ||
654 | |||
655 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
656 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
657 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
658 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
659 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
660 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
661 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
662 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
663 | .else | ||
664 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
665 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
666 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
667 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
668 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
669 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
670 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
671 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
672 | vmovdqa \XMM8, \CTR | ||
673 | .endif | ||
674 | |||
675 | |||
676 | ####################################################################### | ||
677 | |||
678 | vmovdqu (arg1), \T1 | ||
679 | vpxor \T1, \XMM1, \XMM1 | ||
680 | vpxor \T1, \XMM2, \XMM2 | ||
681 | vpxor \T1, \XMM3, \XMM3 | ||
682 | vpxor \T1, \XMM4, \XMM4 | ||
683 | vpxor \T1, \XMM5, \XMM5 | ||
684 | vpxor \T1, \XMM6, \XMM6 | ||
685 | vpxor \T1, \XMM7, \XMM7 | ||
686 | vpxor \T1, \XMM8, \XMM8 | ||
687 | |||
688 | ####################################################################### | ||
689 | |||
690 | |||
691 | |||
692 | |||
693 | |||
694 | vmovdqu 16*1(arg1), \T1 | ||
695 | vaesenc \T1, \XMM1, \XMM1 | ||
696 | vaesenc \T1, \XMM2, \XMM2 | ||
697 | vaesenc \T1, \XMM3, \XMM3 | ||
698 | vaesenc \T1, \XMM4, \XMM4 | ||
699 | vaesenc \T1, \XMM5, \XMM5 | ||
700 | vaesenc \T1, \XMM6, \XMM6 | ||
701 | vaesenc \T1, \XMM7, \XMM7 | ||
702 | vaesenc \T1, \XMM8, \XMM8 | ||
703 | |||
704 | vmovdqu 16*2(arg1), \T1 | ||
705 | vaesenc \T1, \XMM1, \XMM1 | ||
706 | vaesenc \T1, \XMM2, \XMM2 | ||
707 | vaesenc \T1, \XMM3, \XMM3 | ||
708 | vaesenc \T1, \XMM4, \XMM4 | ||
709 | vaesenc \T1, \XMM5, \XMM5 | ||
710 | vaesenc \T1, \XMM6, \XMM6 | ||
711 | vaesenc \T1, \XMM7, \XMM7 | ||
712 | vaesenc \T1, \XMM8, \XMM8 | ||
713 | |||
714 | |||
715 | ####################################################################### | ||
716 | |||
717 | vmovdqa HashKey_8(arg1), \T5 | ||
718 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
719 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
720 | |||
721 | vpshufd $0b01001110, \T2, \T6 | ||
722 | vpxor \T2, \T6, \T6 | ||
723 | |||
724 | vmovdqa HashKey_8_k(arg1), \T5 | ||
725 | vpclmulqdq $0x00, \T5, \T6, \T6 | ||
726 | |||
727 | vmovdqu 16*3(arg1), \T1 | ||
728 | vaesenc \T1, \XMM1, \XMM1 | ||
729 | vaesenc \T1, \XMM2, \XMM2 | ||
730 | vaesenc \T1, \XMM3, \XMM3 | ||
731 | vaesenc \T1, \XMM4, \XMM4 | ||
732 | vaesenc \T1, \XMM5, \XMM5 | ||
733 | vaesenc \T1, \XMM6, \XMM6 | ||
734 | vaesenc \T1, \XMM7, \XMM7 | ||
735 | vaesenc \T1, \XMM8, \XMM8 | ||
736 | |||
737 | vmovdqa TMP2(%rsp), \T1 | ||
738 | vmovdqa HashKey_7(arg1), \T5 | ||
739 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
740 | vpxor \T3, \T4, \T4 | ||
741 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
742 | vpxor \T3, \T7, \T7 | ||
743 | |||
744 | vpshufd $0b01001110, \T1, \T3 | ||
745 | vpxor \T1, \T3, \T3 | ||
746 | vmovdqa HashKey_7_k(arg1), \T5 | ||
747 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
748 | vpxor \T3, \T6, \T6 | ||
749 | |||
750 | vmovdqu 16*4(arg1), \T1 | ||
751 | vaesenc \T1, \XMM1, \XMM1 | ||
752 | vaesenc \T1, \XMM2, \XMM2 | ||
753 | vaesenc \T1, \XMM3, \XMM3 | ||
754 | vaesenc \T1, \XMM4, \XMM4 | ||
755 | vaesenc \T1, \XMM5, \XMM5 | ||
756 | vaesenc \T1, \XMM6, \XMM6 | ||
757 | vaesenc \T1, \XMM7, \XMM7 | ||
758 | vaesenc \T1, \XMM8, \XMM8 | ||
759 | |||
760 | ####################################################################### | ||
761 | |||
762 | vmovdqa TMP3(%rsp), \T1 | ||
763 | vmovdqa HashKey_6(arg1), \T5 | ||
764 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
765 | vpxor \T3, \T4, \T4 | ||
766 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
767 | vpxor \T3, \T7, \T7 | ||
768 | |||
769 | vpshufd $0b01001110, \T1, \T3 | ||
770 | vpxor \T1, \T3, \T3 | ||
771 | vmovdqa HashKey_6_k(arg1), \T5 | ||
772 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
773 | vpxor \T3, \T6, \T6 | ||
774 | |||
775 | vmovdqu 16*5(arg1), \T1 | ||
776 | vaesenc \T1, \XMM1, \XMM1 | ||
777 | vaesenc \T1, \XMM2, \XMM2 | ||
778 | vaesenc \T1, \XMM3, \XMM3 | ||
779 | vaesenc \T1, \XMM4, \XMM4 | ||
780 | vaesenc \T1, \XMM5, \XMM5 | ||
781 | vaesenc \T1, \XMM6, \XMM6 | ||
782 | vaesenc \T1, \XMM7, \XMM7 | ||
783 | vaesenc \T1, \XMM8, \XMM8 | ||
784 | |||
785 | vmovdqa TMP4(%rsp), \T1 | ||
786 | vmovdqa HashKey_5(arg1), \T5 | ||
787 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
788 | vpxor \T3, \T4, \T4 | ||
789 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
790 | vpxor \T3, \T7, \T7 | ||
791 | |||
792 | vpshufd $0b01001110, \T1, \T3 | ||
793 | vpxor \T1, \T3, \T3 | ||
794 | vmovdqa HashKey_5_k(arg1), \T5 | ||
795 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
796 | vpxor \T3, \T6, \T6 | ||
797 | |||
798 | vmovdqu 16*6(arg1), \T1 | ||
799 | vaesenc \T1, \XMM1, \XMM1 | ||
800 | vaesenc \T1, \XMM2, \XMM2 | ||
801 | vaesenc \T1, \XMM3, \XMM3 | ||
802 | vaesenc \T1, \XMM4, \XMM4 | ||
803 | vaesenc \T1, \XMM5, \XMM5 | ||
804 | vaesenc \T1, \XMM6, \XMM6 | ||
805 | vaesenc \T1, \XMM7, \XMM7 | ||
806 | vaesenc \T1, \XMM8, \XMM8 | ||
807 | |||
808 | |||
809 | vmovdqa TMP5(%rsp), \T1 | ||
810 | vmovdqa HashKey_4(arg1), \T5 | ||
811 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
812 | vpxor \T3, \T4, \T4 | ||
813 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
814 | vpxor \T3, \T7, \T7 | ||
815 | |||
816 | vpshufd $0b01001110, \T1, \T3 | ||
817 | vpxor \T1, \T3, \T3 | ||
818 | vmovdqa HashKey_4_k(arg1), \T5 | ||
819 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
820 | vpxor \T3, \T6, \T6 | ||
821 | |||
822 | vmovdqu 16*7(arg1), \T1 | ||
823 | vaesenc \T1, \XMM1, \XMM1 | ||
824 | vaesenc \T1, \XMM2, \XMM2 | ||
825 | vaesenc \T1, \XMM3, \XMM3 | ||
826 | vaesenc \T1, \XMM4, \XMM4 | ||
827 | vaesenc \T1, \XMM5, \XMM5 | ||
828 | vaesenc \T1, \XMM6, \XMM6 | ||
829 | vaesenc \T1, \XMM7, \XMM7 | ||
830 | vaesenc \T1, \XMM8, \XMM8 | ||
831 | |||
832 | vmovdqa TMP6(%rsp), \T1 | ||
833 | vmovdqa HashKey_3(arg1), \T5 | ||
834 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
835 | vpxor \T3, \T4, \T4 | ||
836 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
837 | vpxor \T3, \T7, \T7 | ||
838 | |||
839 | vpshufd $0b01001110, \T1, \T3 | ||
840 | vpxor \T1, \T3, \T3 | ||
841 | vmovdqa HashKey_3_k(arg1), \T5 | ||
842 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
843 | vpxor \T3, \T6, \T6 | ||
844 | |||
845 | |||
846 | vmovdqu 16*8(arg1), \T1 | ||
847 | vaesenc \T1, \XMM1, \XMM1 | ||
848 | vaesenc \T1, \XMM2, \XMM2 | ||
849 | vaesenc \T1, \XMM3, \XMM3 | ||
850 | vaesenc \T1, \XMM4, \XMM4 | ||
851 | vaesenc \T1, \XMM5, \XMM5 | ||
852 | vaesenc \T1, \XMM6, \XMM6 | ||
853 | vaesenc \T1, \XMM7, \XMM7 | ||
854 | vaesenc \T1, \XMM8, \XMM8 | ||
855 | |||
856 | vmovdqa TMP7(%rsp), \T1 | ||
857 | vmovdqa HashKey_2(arg1), \T5 | ||
858 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
859 | vpxor \T3, \T4, \T4 | ||
860 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
861 | vpxor \T3, \T7, \T7 | ||
862 | |||
863 | vpshufd $0b01001110, \T1, \T3 | ||
864 | vpxor \T1, \T3, \T3 | ||
865 | vmovdqa HashKey_2_k(arg1), \T5 | ||
866 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
867 | vpxor \T3, \T6, \T6 | ||
868 | |||
869 | ####################################################################### | ||
870 | |||
871 | vmovdqu 16*9(arg1), \T5 | ||
872 | vaesenc \T5, \XMM1, \XMM1 | ||
873 | vaesenc \T5, \XMM2, \XMM2 | ||
874 | vaesenc \T5, \XMM3, \XMM3 | ||
875 | vaesenc \T5, \XMM4, \XMM4 | ||
876 | vaesenc \T5, \XMM5, \XMM5 | ||
877 | vaesenc \T5, \XMM6, \XMM6 | ||
878 | vaesenc \T5, \XMM7, \XMM7 | ||
879 | vaesenc \T5, \XMM8, \XMM8 | ||
880 | |||
881 | vmovdqa TMP8(%rsp), \T1 | ||
882 | vmovdqa HashKey(arg1), \T5 | ||
883 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
884 | vpxor \T3, \T4, \T4 | ||
885 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
886 | vpxor \T3, \T7, \T7 | ||
887 | |||
888 | vpshufd $0b01001110, \T1, \T3 | ||
889 | vpxor \T1, \T3, \T3 | ||
890 | vmovdqa HashKey_k(arg1), \T5 | ||
891 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
892 | vpxor \T3, \T6, \T6 | ||
893 | |||
894 | vpxor \T4, \T6, \T6 | ||
895 | vpxor \T7, \T6, \T6 | ||
896 | |||
897 | vmovdqu 16*10(arg1), \T5 | ||
898 | |||
899 | i = 0 | ||
900 | j = 1 | ||
901 | setreg | ||
902 | .rep 8 | ||
903 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
904 | .if \ENC_DEC == ENC | ||
905 | vaesenclast \T2, reg_j, reg_j | ||
906 | .else | ||
907 | vaesenclast \T2, reg_j, \T3 | ||
908 | vmovdqu 16*i(arg3, %r11), reg_j | ||
909 | vmovdqu \T3, 16*i(arg2, %r11) | ||
910 | .endif | ||
911 | i = (i+1) | ||
912 | j = (j+1) | ||
913 | setreg | ||
914 | .endr | ||
915 | ####################################################################### | ||
916 | |||
917 | |||
918 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
919 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
920 | vpxor \T3, \T7, \T7 | ||
921 | vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 | ||
922 | |||
923 | |||
924 | |||
925 | ####################################################################### | ||
926 | #first phase of the reduction | ||
927 | ####################################################################### | ||
928 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
929 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
930 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
931 | |||
932 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
933 | vpxor \T4, \T2, \T2 | ||
934 | |||
935 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
936 | |||
937 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
938 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
939 | ####################################################################### | ||
940 | .if \ENC_DEC == ENC | ||
941 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
942 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
943 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
944 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
945 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
946 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
947 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
948 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
949 | .endif | ||
950 | |||
951 | ####################################################################### | ||
952 | #second phase of the reduction | ||
953 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
954 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
955 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
956 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
957 | vpxor \T4, \T2, \T2 | ||
958 | |||
959 | vpxor \T1, \T2, \T2 | ||
960 | vpxor \T2, \T7, \T7 | ||
961 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
962 | ####################################################################### | ||
963 | |||
964 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
965 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
966 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
967 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
968 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
969 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
970 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
971 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
972 | |||
973 | |||
974 | vpxor \T6, \XMM1, \XMM1 | ||
975 | |||
976 | |||
977 | |||
978 | .endm | ||
979 | |||
980 | |||
981 | # GHASH the last 4 ciphertext blocks. | ||
982 | .macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
983 | |||
984 | ## Karatsuba Method | ||
985 | |||
986 | |||
987 | vpshufd $0b01001110, \XMM1, \T2 | ||
988 | vpxor \XMM1, \T2, \T2 | ||
989 | vmovdqa HashKey_8(arg1), \T5 | ||
990 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
991 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
992 | |||
993 | vmovdqa HashKey_8_k(arg1), \T3 | ||
994 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
995 | |||
996 | ###################### | ||
997 | |||
998 | vpshufd $0b01001110, \XMM2, \T2 | ||
999 | vpxor \XMM2, \T2, \T2 | ||
1000 | vmovdqa HashKey_7(arg1), \T5 | ||
1001 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
1002 | vpxor \T4, \T6, \T6 | ||
1003 | |||
1004 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
1005 | vpxor \T4, \T7, \T7 | ||
1006 | |||
1007 | vmovdqa HashKey_7_k(arg1), \T3 | ||
1008 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1009 | vpxor \T2, \XMM1, \XMM1 | ||
1010 | |||
1011 | ###################### | ||
1012 | |||
1013 | vpshufd $0b01001110, \XMM3, \T2 | ||
1014 | vpxor \XMM3, \T2, \T2 | ||
1015 | vmovdqa HashKey_6(arg1), \T5 | ||
1016 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
1017 | vpxor \T4, \T6, \T6 | ||
1018 | |||
1019 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
1020 | vpxor \T4, \T7, \T7 | ||
1021 | |||
1022 | vmovdqa HashKey_6_k(arg1), \T3 | ||
1023 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1024 | vpxor \T2, \XMM1, \XMM1 | ||
1025 | |||
1026 | ###################### | ||
1027 | |||
1028 | vpshufd $0b01001110, \XMM4, \T2 | ||
1029 | vpxor \XMM4, \T2, \T2 | ||
1030 | vmovdqa HashKey_5(arg1), \T5 | ||
1031 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
1032 | vpxor \T4, \T6, \T6 | ||
1033 | |||
1034 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
1035 | vpxor \T4, \T7, \T7 | ||
1036 | |||
1037 | vmovdqa HashKey_5_k(arg1), \T3 | ||
1038 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1039 | vpxor \T2, \XMM1, \XMM1 | ||
1040 | |||
1041 | ###################### | ||
1042 | |||
1043 | vpshufd $0b01001110, \XMM5, \T2 | ||
1044 | vpxor \XMM5, \T2, \T2 | ||
1045 | vmovdqa HashKey_4(arg1), \T5 | ||
1046 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
1047 | vpxor \T4, \T6, \T6 | ||
1048 | |||
1049 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
1050 | vpxor \T4, \T7, \T7 | ||
1051 | |||
1052 | vmovdqa HashKey_4_k(arg1), \T3 | ||
1053 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1054 | vpxor \T2, \XMM1, \XMM1 | ||
1055 | |||
1056 | ###################### | ||
1057 | |||
1058 | vpshufd $0b01001110, \XMM6, \T2 | ||
1059 | vpxor \XMM6, \T2, \T2 | ||
1060 | vmovdqa HashKey_3(arg1), \T5 | ||
1061 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
1062 | vpxor \T4, \T6, \T6 | ||
1063 | |||
1064 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
1065 | vpxor \T4, \T7, \T7 | ||
1066 | |||
1067 | vmovdqa HashKey_3_k(arg1), \T3 | ||
1068 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1069 | vpxor \T2, \XMM1, \XMM1 | ||
1070 | |||
1071 | ###################### | ||
1072 | |||
1073 | vpshufd $0b01001110, \XMM7, \T2 | ||
1074 | vpxor \XMM7, \T2, \T2 | ||
1075 | vmovdqa HashKey_2(arg1), \T5 | ||
1076 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
1077 | vpxor \T4, \T6, \T6 | ||
1078 | |||
1079 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
1080 | vpxor \T4, \T7, \T7 | ||
1081 | |||
1082 | vmovdqa HashKey_2_k(arg1), \T3 | ||
1083 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1084 | vpxor \T2, \XMM1, \XMM1 | ||
1085 | |||
1086 | ###################### | ||
1087 | |||
1088 | vpshufd $0b01001110, \XMM8, \T2 | ||
1089 | vpxor \XMM8, \T2, \T2 | ||
1090 | vmovdqa HashKey(arg1), \T5 | ||
1091 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
1092 | vpxor \T4, \T6, \T6 | ||
1093 | |||
1094 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
1095 | vpxor \T4, \T7, \T7 | ||
1096 | |||
1097 | vmovdqa HashKey_k(arg1), \T3 | ||
1098 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1099 | |||
1100 | vpxor \T2, \XMM1, \XMM1 | ||
1101 | vpxor \T6, \XMM1, \XMM1 | ||
1102 | vpxor \T7, \XMM1, \T2 | ||
1103 | |||
1104 | |||
1105 | |||
1106 | |||
1107 | vpslldq $8, \T2, \T4 | ||
1108 | vpsrldq $8, \T2, \T2 | ||
1109 | |||
1110 | vpxor \T4, \T7, \T7 | ||
1111 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of | ||
1112 | # the accumulated carry-less multiplications | ||
1113 | |||
1114 | ####################################################################### | ||
1115 | #first phase of the reduction | ||
1116 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
1117 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
1118 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
1119 | |||
1120 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1121 | vpxor \T4, \T2, \T2 | ||
1122 | |||
1123 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
1124 | |||
1125 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
1126 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
1127 | ####################################################################### | ||
1128 | |||
1129 | |||
1130 | #second phase of the reduction | ||
1131 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
1132 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
1133 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
1134 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1135 | vpxor \T4, \T2, \T2 | ||
1136 | |||
1137 | vpxor \T1, \T2, \T2 | ||
1138 | vpxor \T2, \T7, \T7 | ||
1139 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
1140 | |||
1141 | .endm | ||
1142 | |||
1143 | |||
1144 | # combined for GCM encrypt and decrypt functions | ||
1145 | # clobbering all xmm registers | ||
1146 | # clobbering r10, r11, r12, r13, r14, r15 | ||
1147 | .macro GCM_ENC_DEC_AVX ENC_DEC | ||
1148 | |||
1149 | #the number of pushes must equal STACK_OFFSET | ||
1150 | push %r12 | ||
1151 | push %r13 | ||
1152 | push %r14 | ||
1153 | push %r15 | ||
1154 | |||
1155 | mov %rsp, %r14 | ||
1156 | |||
1157 | |||
1158 | |||
1159 | |||
1160 | sub $VARIABLE_OFFSET, %rsp | ||
1161 | and $~63, %rsp # align rsp to 64 bytes | ||
1162 | |||
1163 | |||
1164 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
1165 | |||
1166 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1167 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
1168 | |||
1169 | mov %r13, %r12 | ||
1170 | shr $4, %r12 | ||
1171 | and $7, %r12 | ||
1172 | jz _initial_num_blocks_is_0\@ | ||
1173 | |||
1174 | cmp $7, %r12 | ||
1175 | je _initial_num_blocks_is_7\@ | ||
1176 | cmp $6, %r12 | ||
1177 | je _initial_num_blocks_is_6\@ | ||
1178 | cmp $5, %r12 | ||
1179 | je _initial_num_blocks_is_5\@ | ||
1180 | cmp $4, %r12 | ||
1181 | je _initial_num_blocks_is_4\@ | ||
1182 | cmp $3, %r12 | ||
1183 | je _initial_num_blocks_is_3\@ | ||
1184 | cmp $2, %r12 | ||
1185 | je _initial_num_blocks_is_2\@ | ||
1186 | |||
1187 | jmp _initial_num_blocks_is_1\@ | ||
1188 | |||
1189 | _initial_num_blocks_is_7\@: | ||
1190 | INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1191 | sub $16*7, %r13 | ||
1192 | jmp _initial_blocks_encrypted\@ | ||
1193 | |||
1194 | _initial_num_blocks_is_6\@: | ||
1195 | INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1196 | sub $16*6, %r13 | ||
1197 | jmp _initial_blocks_encrypted\@ | ||
1198 | |||
1199 | _initial_num_blocks_is_5\@: | ||
1200 | INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1201 | sub $16*5, %r13 | ||
1202 | jmp _initial_blocks_encrypted\@ | ||
1203 | |||
1204 | _initial_num_blocks_is_4\@: | ||
1205 | INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1206 | sub $16*4, %r13 | ||
1207 | jmp _initial_blocks_encrypted\@ | ||
1208 | |||
1209 | _initial_num_blocks_is_3\@: | ||
1210 | INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1211 | sub $16*3, %r13 | ||
1212 | jmp _initial_blocks_encrypted\@ | ||
1213 | |||
1214 | _initial_num_blocks_is_2\@: | ||
1215 | INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1216 | sub $16*2, %r13 | ||
1217 | jmp _initial_blocks_encrypted\@ | ||
1218 | |||
1219 | _initial_num_blocks_is_1\@: | ||
1220 | INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1221 | sub $16*1, %r13 | ||
1222 | jmp _initial_blocks_encrypted\@ | ||
1223 | |||
1224 | _initial_num_blocks_is_0\@: | ||
1225 | INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1226 | |||
1227 | |||
1228 | _initial_blocks_encrypted\@: | ||
1229 | cmp $0, %r13 | ||
1230 | je _zero_cipher_left\@ | ||
1231 | |||
1232 | sub $128, %r13 | ||
1233 | je _eight_cipher_left\@ | ||
1234 | |||
1235 | |||
1236 | |||
1237 | |||
1238 | vmovd %xmm9, %r15d | ||
1239 | and $255, %r15d | ||
1240 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1241 | |||
1242 | |||
1243 | _encrypt_by_8_new\@: | ||
1244 | cmp $(255-8), %r15d | ||
1245 | jg _encrypt_by_8\@ | ||
1246 | |||
1247 | |||
1248 | |||
1249 | add $8, %r15b | ||
1250 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
1251 | add $128, %r11 | ||
1252 | sub $128, %r13 | ||
1253 | jne _encrypt_by_8_new\@ | ||
1254 | |||
1255 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1256 | jmp _eight_cipher_left\@ | ||
1257 | |||
1258 | _encrypt_by_8\@: | ||
1259 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1260 | add $8, %r15b | ||
1261 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
1262 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1263 | add $128, %r11 | ||
1264 | sub $128, %r13 | ||
1265 | jne _encrypt_by_8_new\@ | ||
1266 | |||
1267 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1268 | |||
1269 | |||
1270 | |||
1271 | |||
1272 | _eight_cipher_left\@: | ||
1273 | GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
1274 | |||
1275 | |||
1276 | _zero_cipher_left\@: | ||
1277 | cmp $16, arg4 | ||
1278 | jl _only_less_than_16\@ | ||
1279 | |||
1280 | mov arg4, %r13 | ||
1281 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1282 | |||
1283 | je _multiple_of_16_bytes\@ | ||
1284 | |||
1285 | # handle the last <16 Byte block seperately | ||
1286 | |||
1287 | |||
1288 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1289 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1290 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1291 | |||
1292 | sub $16, %r11 | ||
1293 | add %r13, %r11 | ||
1294 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
1295 | |||
1296 | lea SHIFT_MASK+16(%rip), %r12 | ||
1297 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1298 | # able to shift 16-r13 bytes (r13 is the | ||
1299 | # number of bytes in plaintext mod 16) | ||
1300 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1301 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
1302 | jmp _final_ghash_mul\@ | ||
1303 | |||
1304 | _only_less_than_16\@: | ||
1305 | # check for 0 length | ||
1306 | mov arg4, %r13 | ||
1307 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1308 | |||
1309 | je _multiple_of_16_bytes\@ | ||
1310 | |||
1311 | # handle the last <16 Byte block seperately | ||
1312 | |||
1313 | |||
1314 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1315 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1316 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1317 | |||
1318 | |||
1319 | lea SHIFT_MASK+16(%rip), %r12 | ||
1320 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1321 | # able to shift 16-r13 bytes (r13 is the | ||
1322 | # number of bytes in plaintext mod 16) | ||
1323 | |||
1324 | _get_last_16_byte_loop\@: | ||
1325 | movb (arg3, %r11), %al | ||
1326 | movb %al, TMP1 (%rsp , %r11) | ||
1327 | add $1, %r11 | ||
1328 | cmp %r13, %r11 | ||
1329 | jne _get_last_16_byte_loop\@ | ||
1330 | |||
1331 | vmovdqu TMP1(%rsp), %xmm1 | ||
1332 | |||
1333 | sub $16, %r11 | ||
1334 | |||
1335 | _final_ghash_mul\@: | ||
1336 | .if \ENC_DEC == DEC | ||
1337 | vmovdqa %xmm1, %xmm2 | ||
1338 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1339 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1340 | # mask out top 16-r13 bytes of xmm9 | ||
1341 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1342 | vpand %xmm1, %xmm2, %xmm2 | ||
1343 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
1344 | vpxor %xmm2, %xmm14, %xmm14 | ||
1345 | #GHASH computation for the last <16 Byte block | ||
1346 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1347 | sub %r13, %r11 | ||
1348 | add $16, %r11 | ||
1349 | .else | ||
1350 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1351 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1352 | # mask out top 16-r13 bytes of xmm9 | ||
1353 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1354 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1355 | vpxor %xmm9, %xmm14, %xmm14 | ||
1356 | #GHASH computation for the last <16 Byte block | ||
1357 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1358 | sub %r13, %r11 | ||
1359 | add $16, %r11 | ||
1360 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
1361 | .endif | ||
1362 | |||
1363 | |||
1364 | ############################# | ||
1365 | # output r13 Bytes | ||
1366 | vmovq %xmm9, %rax | ||
1367 | cmp $8, %r13 | ||
1368 | jle _less_than_8_bytes_left\@ | ||
1369 | |||
1370 | mov %rax, (arg2 , %r11) | ||
1371 | add $8, %r11 | ||
1372 | vpsrldq $8, %xmm9, %xmm9 | ||
1373 | vmovq %xmm9, %rax | ||
1374 | sub $8, %r13 | ||
1375 | |||
1376 | _less_than_8_bytes_left\@: | ||
1377 | movb %al, (arg2 , %r11) | ||
1378 | add $1, %r11 | ||
1379 | shr $8, %rax | ||
1380 | sub $1, %r13 | ||
1381 | jne _less_than_8_bytes_left\@ | ||
1382 | ############################# | ||
1383 | |||
1384 | _multiple_of_16_bytes\@: | ||
1385 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
1386 | shl $3, %r12 # convert into number of bits | ||
1387 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
1388 | |||
1389 | shl $3, arg4 # len(C) in bits (*128) | ||
1390 | vmovq arg4, %xmm1 | ||
1391 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
1392 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
1393 | |||
1394 | vpxor %xmm15, %xmm14, %xmm14 | ||
1395 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
1396 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
1397 | |||
1398 | mov arg5, %rax # rax = *Y0 | ||
1399 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
1400 | |||
1401 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
1402 | |||
1403 | vpxor %xmm14, %xmm9, %xmm9 | ||
1404 | |||
1405 | |||
1406 | |||
1407 | _return_T\@: | ||
1408 | mov arg8, %r10 # r10 = authTag | ||
1409 | mov arg9, %r11 # r11 = auth_tag_len | ||
1410 | |||
1411 | cmp $16, %r11 | ||
1412 | je _T_16\@ | ||
1413 | |||
1414 | cmp $12, %r11 | ||
1415 | je _T_12\@ | ||
1416 | |||
1417 | _T_8\@: | ||
1418 | vmovq %xmm9, %rax | ||
1419 | mov %rax, (%r10) | ||
1420 | jmp _return_T_done\@ | ||
1421 | _T_12\@: | ||
1422 | vmovq %xmm9, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | vpsrldq $8, %xmm9, %xmm9 | ||
1425 | vmovd %xmm9, %eax | ||
1426 | mov %eax, 8(%r10) | ||
1427 | jmp _return_T_done\@ | ||
1428 | |||
1429 | _T_16\@: | ||
1430 | vmovdqu %xmm9, (%r10) | ||
1431 | |||
1432 | _return_T_done\@: | ||
1433 | mov %r14, %rsp | ||
1434 | |||
1435 | pop %r15 | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | .endm | ||
1440 | |||
1441 | |||
1442 | ############################################################# | ||
1443 | #void aesni_gcm_precomp_avx_gen2 | ||
1444 | # (gcm_data *my_ctx_data, | ||
1445 | # u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ | ||
1446 | ############################################################# | ||
1447 | ENTRY(aesni_gcm_precomp_avx_gen2) | ||
1448 | #the number of pushes must equal STACK_OFFSET | ||
1449 | push %r12 | ||
1450 | push %r13 | ||
1451 | push %r14 | ||
1452 | push %r15 | ||
1453 | |||
1454 | mov %rsp, %r14 | ||
1455 | |||
1456 | |||
1457 | |||
1458 | sub $VARIABLE_OFFSET, %rsp | ||
1459 | and $~63, %rsp # align rsp to 64 bytes | ||
1460 | |||
1461 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
1462 | |||
1463 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
1464 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
1465 | vmovdqa %xmm6, %xmm2 | ||
1466 | vpsllq $1, %xmm6, %xmm6 | ||
1467 | vpsrlq $63, %xmm2, %xmm2 | ||
1468 | vmovdqa %xmm2, %xmm1 | ||
1469 | vpslldq $8, %xmm2, %xmm2 | ||
1470 | vpsrldq $8, %xmm1, %xmm1 | ||
1471 | vpor %xmm2, %xmm6, %xmm6 | ||
1472 | #reduction | ||
1473 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
1474 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
1475 | vpand POLY(%rip), %xmm2, %xmm2 | ||
1476 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
1477 | ####################################################################### | ||
1478 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
1479 | |||
1480 | |||
1481 | PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
1482 | |||
1483 | mov %r14, %rsp | ||
1484 | |||
1485 | pop %r15 | ||
1486 | pop %r14 | ||
1487 | pop %r13 | ||
1488 | pop %r12 | ||
1489 | ret | ||
1490 | ENDPROC(aesni_gcm_precomp_avx_gen2) | ||
1491 | |||
1492 | ############################################################################### | ||
1493 | #void aesni_gcm_enc_avx_gen2( | ||
1494 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1495 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
1496 | # const u8 *in, /* Plaintext input */ | ||
1497 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1498 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1499 | # (from Security Association) concatenated with 8 byte | ||
1500 | # Initialisation Vector (from IPSec ESP Payload) | ||
1501 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1502 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1503 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1504 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1505 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1506 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1507 | ############################################################################### | ||
1508 | ENTRY(aesni_gcm_enc_avx_gen2) | ||
1509 | GCM_ENC_DEC_AVX ENC | ||
1510 | ret | ||
1511 | ENDPROC(aesni_gcm_enc_avx_gen2) | ||
1512 | |||
1513 | ############################################################################### | ||
1514 | #void aesni_gcm_dec_avx_gen2( | ||
1515 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1516 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
1517 | # const u8 *in, /* Ciphertext input */ | ||
1518 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1519 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1520 | # (from Security Association) concatenated with 8 byte | ||
1521 | # Initialisation Vector (from IPSec ESP Payload) | ||
1522 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1523 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1524 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1525 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1526 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1527 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1528 | ############################################################################### | ||
1529 | ENTRY(aesni_gcm_dec_avx_gen2) | ||
1530 | GCM_ENC_DEC_AVX DEC | ||
1531 | ret | ||
1532 | ENDPROC(aesni_gcm_dec_avx_gen2) | ||
1533 | #endif /* CONFIG_AS_AVX */ | ||
1534 | |||
1535 | #ifdef CONFIG_AS_AVX2 | ||
1536 | ############################################################################### | ||
1537 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
1538 | # Input: A and B (128-bits each, bit-reflected) | ||
1539 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
1540 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
1541 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
1542 | ############################################################################### | ||
1543 | .macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 | ||
1544 | |||
1545 | vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 | ||
1546 | vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 | ||
1547 | vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 | ||
1548 | vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 | ||
1549 | vpxor \T3, \GH, \GH | ||
1550 | |||
1551 | |||
1552 | vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs | ||
1553 | vpslldq $8 , \GH, \GH # shift-L GH 2 DWs | ||
1554 | |||
1555 | vpxor \T3, \T1, \T1 | ||
1556 | vpxor \T2, \GH, \GH | ||
1557 | |||
1558 | ####################################################################### | ||
1559 | #first phase of the reduction | ||
1560 | vmovdqa POLY2(%rip), \T3 | ||
1561 | |||
1562 | vpclmulqdq $0x01, \GH, \T3, \T2 | ||
1563 | vpslldq $8, \T2, \T2 # shift-L T2 2 DWs | ||
1564 | |||
1565 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
1566 | ####################################################################### | ||
1567 | #second phase of the reduction | ||
1568 | vpclmulqdq $0x00, \GH, \T3, \T2 | ||
1569 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
1570 | |||
1571 | vpclmulqdq $0x10, \GH, \T3, \GH | ||
1572 | vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
1573 | |||
1574 | vpxor \T2, \GH, \GH # second phase of the reduction complete | ||
1575 | ####################################################################### | ||
1576 | vpxor \T1, \GH, \GH # the result is in GH | ||
1577 | |||
1578 | |||
1579 | .endm | ||
1580 | |||
1581 | .macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 | ||
1582 | |||
1583 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1584 | vmovdqa \HK, \T5 | ||
1585 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
1586 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
1587 | |||
1588 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
1589 | vmovdqa \T5, HashKey_3(arg1) | ||
1590 | |||
1591 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
1592 | vmovdqa \T5, HashKey_4(arg1) | ||
1593 | |||
1594 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
1595 | vmovdqa \T5, HashKey_5(arg1) | ||
1596 | |||
1597 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
1598 | vmovdqa \T5, HashKey_6(arg1) | ||
1599 | |||
1600 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
1601 | vmovdqa \T5, HashKey_7(arg1) | ||
1602 | |||
1603 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
1604 | vmovdqa \T5, HashKey_8(arg1) | ||
1605 | |||
1606 | .endm | ||
1607 | |||
1608 | |||
1609 | ## if a = number of total plaintext bytes | ||
1610 | ## b = floor(a/16) | ||
1611 | ## num_initial_blocks = b mod 4# | ||
1612 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
1613 | ## r10, r11, r12, rax are clobbered | ||
1614 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
1615 | |||
1616 | .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER | ||
1617 | i = (8-\num_initial_blocks) | ||
1618 | setreg | ||
1619 | |||
1620 | mov arg6, %r10 # r10 = AAD | ||
1621 | mov arg7, %r12 # r12 = aadLen | ||
1622 | |||
1623 | |||
1624 | mov %r12, %r11 | ||
1625 | |||
1626 | vpxor reg_i, reg_i, reg_i | ||
1627 | _get_AAD_loop\@: | ||
1628 | vmovd (%r10), \T1 | ||
1629 | vpslldq $12, \T1, \T1 | ||
1630 | vpsrldq $4, reg_i, reg_i | ||
1631 | vpxor \T1, reg_i, reg_i | ||
1632 | |||
1633 | add $4, %r10 | ||
1634 | sub $4, %r12 | ||
1635 | jg _get_AAD_loop\@ | ||
1636 | |||
1637 | |||
1638 | cmp $16, %r11 | ||
1639 | je _get_AAD_loop2_done\@ | ||
1640 | mov $16, %r12 | ||
1641 | |||
1642 | _get_AAD_loop2\@: | ||
1643 | vpsrldq $4, reg_i, reg_i | ||
1644 | sub $4, %r12 | ||
1645 | cmp %r11, %r12 | ||
1646 | jg _get_AAD_loop2\@ | ||
1647 | |||
1648 | _get_AAD_loop2_done\@: | ||
1649 | |||
1650 | #byte-reflect the AAD data | ||
1651 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
1652 | |||
1653 | # initialize the data pointer offset as zero | ||
1654 | xor %r11, %r11 | ||
1655 | |||
1656 | # start AES for num_initial_blocks blocks | ||
1657 | mov arg5, %rax # rax = *Y0 | ||
1658 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
1659 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
1660 | |||
1661 | |||
1662 | i = (9-\num_initial_blocks) | ||
1663 | setreg | ||
1664 | .rep \num_initial_blocks | ||
1665 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1666 | vmovdqa \CTR, reg_i | ||
1667 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
1668 | i = (i+1) | ||
1669 | setreg | ||
1670 | .endr | ||
1671 | |||
1672 | vmovdqa (arg1), \T_key | ||
1673 | i = (9-\num_initial_blocks) | ||
1674 | setreg | ||
1675 | .rep \num_initial_blocks | ||
1676 | vpxor \T_key, reg_i, reg_i | ||
1677 | i = (i+1) | ||
1678 | setreg | ||
1679 | .endr | ||
1680 | |||
1681 | j = 1 | ||
1682 | setreg | ||
1683 | .rep 9 | ||
1684 | vmovdqa 16*j(arg1), \T_key | ||
1685 | i = (9-\num_initial_blocks) | ||
1686 | setreg | ||
1687 | .rep \num_initial_blocks | ||
1688 | vaesenc \T_key, reg_i, reg_i | ||
1689 | i = (i+1) | ||
1690 | setreg | ||
1691 | .endr | ||
1692 | |||
1693 | j = (j+1) | ||
1694 | setreg | ||
1695 | .endr | ||
1696 | |||
1697 | |||
1698 | vmovdqa 16*10(arg1), \T_key | ||
1699 | i = (9-\num_initial_blocks) | ||
1700 | setreg | ||
1701 | .rep \num_initial_blocks | ||
1702 | vaesenclast \T_key, reg_i, reg_i | ||
1703 | i = (i+1) | ||
1704 | setreg | ||
1705 | .endr | ||
1706 | |||
1707 | i = (9-\num_initial_blocks) | ||
1708 | setreg | ||
1709 | .rep \num_initial_blocks | ||
1710 | vmovdqu (arg3, %r11), \T1 | ||
1711 | vpxor \T1, reg_i, reg_i | ||
1712 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for | ||
1713 | # num_initial_blocks blocks | ||
1714 | add $16, %r11 | ||
1715 | .if \ENC_DEC == DEC | ||
1716 | vmovdqa \T1, reg_i | ||
1717 | .endif | ||
1718 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
1719 | i = (i+1) | ||
1720 | setreg | ||
1721 | .endr | ||
1722 | |||
1723 | |||
1724 | i = (8-\num_initial_blocks) | ||
1725 | j = (9-\num_initial_blocks) | ||
1726 | setreg | ||
1727 | GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
1728 | |||
1729 | .rep \num_initial_blocks | ||
1730 | vpxor reg_i, reg_j, reg_j | ||
1731 | GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
1732 | i = (i+1) | ||
1733 | j = (j+1) | ||
1734 | setreg | ||
1735 | .endr | ||
1736 | # XMM8 has the combined result here | ||
1737 | |||
1738 | vmovdqa \XMM8, TMP1(%rsp) | ||
1739 | vmovdqa \XMM8, \T3 | ||
1740 | |||
1741 | cmp $128, %r13 | ||
1742 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
1743 | |||
1744 | ############################################################################### | ||
1745 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1746 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1747 | vmovdqa \CTR, \XMM1 | ||
1748 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1749 | |||
1750 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1751 | vmovdqa \CTR, \XMM2 | ||
1752 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1753 | |||
1754 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1755 | vmovdqa \CTR, \XMM3 | ||
1756 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1757 | |||
1758 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1759 | vmovdqa \CTR, \XMM4 | ||
1760 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1761 | |||
1762 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1763 | vmovdqa \CTR, \XMM5 | ||
1764 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1765 | |||
1766 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1767 | vmovdqa \CTR, \XMM6 | ||
1768 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1769 | |||
1770 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1771 | vmovdqa \CTR, \XMM7 | ||
1772 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1773 | |||
1774 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1775 | vmovdqa \CTR, \XMM8 | ||
1776 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1777 | |||
1778 | vmovdqa (arg1), \T_key | ||
1779 | vpxor \T_key, \XMM1, \XMM1 | ||
1780 | vpxor \T_key, \XMM2, \XMM2 | ||
1781 | vpxor \T_key, \XMM3, \XMM3 | ||
1782 | vpxor \T_key, \XMM4, \XMM4 | ||
1783 | vpxor \T_key, \XMM5, \XMM5 | ||
1784 | vpxor \T_key, \XMM6, \XMM6 | ||
1785 | vpxor \T_key, \XMM7, \XMM7 | ||
1786 | vpxor \T_key, \XMM8, \XMM8 | ||
1787 | |||
1788 | i = 1 | ||
1789 | setreg | ||
1790 | .rep 9 # do 9 rounds | ||
1791 | vmovdqa 16*i(arg1), \T_key | ||
1792 | vaesenc \T_key, \XMM1, \XMM1 | ||
1793 | vaesenc \T_key, \XMM2, \XMM2 | ||
1794 | vaesenc \T_key, \XMM3, \XMM3 | ||
1795 | vaesenc \T_key, \XMM4, \XMM4 | ||
1796 | vaesenc \T_key, \XMM5, \XMM5 | ||
1797 | vaesenc \T_key, \XMM6, \XMM6 | ||
1798 | vaesenc \T_key, \XMM7, \XMM7 | ||
1799 | vaesenc \T_key, \XMM8, \XMM8 | ||
1800 | i = (i+1) | ||
1801 | setreg | ||
1802 | .endr | ||
1803 | |||
1804 | |||
1805 | vmovdqa 16*i(arg1), \T_key | ||
1806 | vaesenclast \T_key, \XMM1, \XMM1 | ||
1807 | vaesenclast \T_key, \XMM2, \XMM2 | ||
1808 | vaesenclast \T_key, \XMM3, \XMM3 | ||
1809 | vaesenclast \T_key, \XMM4, \XMM4 | ||
1810 | vaesenclast \T_key, \XMM5, \XMM5 | ||
1811 | vaesenclast \T_key, \XMM6, \XMM6 | ||
1812 | vaesenclast \T_key, \XMM7, \XMM7 | ||
1813 | vaesenclast \T_key, \XMM8, \XMM8 | ||
1814 | |||
1815 | vmovdqu (arg3, %r11), \T1 | ||
1816 | vpxor \T1, \XMM1, \XMM1 | ||
1817 | vmovdqu \XMM1, (arg2 , %r11) | ||
1818 | .if \ENC_DEC == DEC | ||
1819 | vmovdqa \T1, \XMM1 | ||
1820 | .endif | ||
1821 | |||
1822 | vmovdqu 16*1(arg3, %r11), \T1 | ||
1823 | vpxor \T1, \XMM2, \XMM2 | ||
1824 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
1825 | .if \ENC_DEC == DEC | ||
1826 | vmovdqa \T1, \XMM2 | ||
1827 | .endif | ||
1828 | |||
1829 | vmovdqu 16*2(arg3, %r11), \T1 | ||
1830 | vpxor \T1, \XMM3, \XMM3 | ||
1831 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
1832 | .if \ENC_DEC == DEC | ||
1833 | vmovdqa \T1, \XMM3 | ||
1834 | .endif | ||
1835 | |||
1836 | vmovdqu 16*3(arg3, %r11), \T1 | ||
1837 | vpxor \T1, \XMM4, \XMM4 | ||
1838 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
1839 | .if \ENC_DEC == DEC | ||
1840 | vmovdqa \T1, \XMM4 | ||
1841 | .endif | ||
1842 | |||
1843 | vmovdqu 16*4(arg3, %r11), \T1 | ||
1844 | vpxor \T1, \XMM5, \XMM5 | ||
1845 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
1846 | .if \ENC_DEC == DEC | ||
1847 | vmovdqa \T1, \XMM5 | ||
1848 | .endif | ||
1849 | |||
1850 | vmovdqu 16*5(arg3, %r11), \T1 | ||
1851 | vpxor \T1, \XMM6, \XMM6 | ||
1852 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
1853 | .if \ENC_DEC == DEC | ||
1854 | vmovdqa \T1, \XMM6 | ||
1855 | .endif | ||
1856 | |||
1857 | vmovdqu 16*6(arg3, %r11), \T1 | ||
1858 | vpxor \T1, \XMM7, \XMM7 | ||
1859 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
1860 | .if \ENC_DEC == DEC | ||
1861 | vmovdqa \T1, \XMM7 | ||
1862 | .endif | ||
1863 | |||
1864 | vmovdqu 16*7(arg3, %r11), \T1 | ||
1865 | vpxor \T1, \XMM8, \XMM8 | ||
1866 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
1867 | .if \ENC_DEC == DEC | ||
1868 | vmovdqa \T1, \XMM8 | ||
1869 | .endif | ||
1870 | |||
1871 | add $128, %r11 | ||
1872 | |||
1873 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1874 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with | ||
1875 | # the corresponding ciphertext | ||
1876 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1877 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1878 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1879 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1880 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1881 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1882 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1883 | |||
1884 | ############################################################################### | ||
1885 | |||
1886 | _initial_blocks_done\@: | ||
1887 | |||
1888 | |||
1889 | .endm | ||
1890 | |||
1891 | |||
1892 | |||
1893 | # encrypt 8 blocks at a time | ||
1894 | # ghash the 8 previously encrypted ciphertext blocks | ||
1895 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
1896 | # r11 is the data offset value | ||
1897 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
1898 | |||
1899 | vmovdqa \XMM1, \T2 | ||
1900 | vmovdqa \XMM2, TMP2(%rsp) | ||
1901 | vmovdqa \XMM3, TMP3(%rsp) | ||
1902 | vmovdqa \XMM4, TMP4(%rsp) | ||
1903 | vmovdqa \XMM5, TMP5(%rsp) | ||
1904 | vmovdqa \XMM6, TMP6(%rsp) | ||
1905 | vmovdqa \XMM7, TMP7(%rsp) | ||
1906 | vmovdqa \XMM8, TMP8(%rsp) | ||
1907 | |||
1908 | .if \loop_idx == in_order | ||
1909 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
1910 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
1911 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
1912 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
1913 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
1914 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
1915 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
1916 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
1917 | vmovdqa \XMM8, \CTR | ||
1918 | |||
1919 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1920 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1921 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1922 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1923 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1924 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1925 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1926 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1927 | .else | ||
1928 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
1929 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
1930 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
1931 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
1932 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
1933 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
1934 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
1935 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
1936 | vmovdqa \XMM8, \CTR | ||
1937 | .endif | ||
1938 | |||
1939 | |||
1940 | ####################################################################### | ||
1941 | |||
1942 | vmovdqu (arg1), \T1 | ||
1943 | vpxor \T1, \XMM1, \XMM1 | ||
1944 | vpxor \T1, \XMM2, \XMM2 | ||
1945 | vpxor \T1, \XMM3, \XMM3 | ||
1946 | vpxor \T1, \XMM4, \XMM4 | ||
1947 | vpxor \T1, \XMM5, \XMM5 | ||
1948 | vpxor \T1, \XMM6, \XMM6 | ||
1949 | vpxor \T1, \XMM7, \XMM7 | ||
1950 | vpxor \T1, \XMM8, \XMM8 | ||
1951 | |||
1952 | ####################################################################### | ||
1953 | |||
1954 | |||
1955 | |||
1956 | |||
1957 | |||
1958 | vmovdqu 16*1(arg1), \T1 | ||
1959 | vaesenc \T1, \XMM1, \XMM1 | ||
1960 | vaesenc \T1, \XMM2, \XMM2 | ||
1961 | vaesenc \T1, \XMM3, \XMM3 | ||
1962 | vaesenc \T1, \XMM4, \XMM4 | ||
1963 | vaesenc \T1, \XMM5, \XMM5 | ||
1964 | vaesenc \T1, \XMM6, \XMM6 | ||
1965 | vaesenc \T1, \XMM7, \XMM7 | ||
1966 | vaesenc \T1, \XMM8, \XMM8 | ||
1967 | |||
1968 | vmovdqu 16*2(arg1), \T1 | ||
1969 | vaesenc \T1, \XMM1, \XMM1 | ||
1970 | vaesenc \T1, \XMM2, \XMM2 | ||
1971 | vaesenc \T1, \XMM3, \XMM3 | ||
1972 | vaesenc \T1, \XMM4, \XMM4 | ||
1973 | vaesenc \T1, \XMM5, \XMM5 | ||
1974 | vaesenc \T1, \XMM6, \XMM6 | ||
1975 | vaesenc \T1, \XMM7, \XMM7 | ||
1976 | vaesenc \T1, \XMM8, \XMM8 | ||
1977 | |||
1978 | |||
1979 | ####################################################################### | ||
1980 | |||
1981 | vmovdqa HashKey_8(arg1), \T5 | ||
1982 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
1983 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
1984 | vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 | ||
1985 | vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 | ||
1986 | vpxor \T5, \T6, \T6 | ||
1987 | |||
1988 | vmovdqu 16*3(arg1), \T1 | ||
1989 | vaesenc \T1, \XMM1, \XMM1 | ||
1990 | vaesenc \T1, \XMM2, \XMM2 | ||
1991 | vaesenc \T1, \XMM3, \XMM3 | ||
1992 | vaesenc \T1, \XMM4, \XMM4 | ||
1993 | vaesenc \T1, \XMM5, \XMM5 | ||
1994 | vaesenc \T1, \XMM6, \XMM6 | ||
1995 | vaesenc \T1, \XMM7, \XMM7 | ||
1996 | vaesenc \T1, \XMM8, \XMM8 | ||
1997 | |||
1998 | vmovdqa TMP2(%rsp), \T1 | ||
1999 | vmovdqa HashKey_7(arg1), \T5 | ||
2000 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2001 | vpxor \T3, \T4, \T4 | ||
2002 | |||
2003 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2004 | vpxor \T3, \T7, \T7 | ||
2005 | |||
2006 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2007 | vpxor \T3, \T6, \T6 | ||
2008 | |||
2009 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2010 | vpxor \T3, \T6, \T6 | ||
2011 | |||
2012 | vmovdqu 16*4(arg1), \T1 | ||
2013 | vaesenc \T1, \XMM1, \XMM1 | ||
2014 | vaesenc \T1, \XMM2, \XMM2 | ||
2015 | vaesenc \T1, \XMM3, \XMM3 | ||
2016 | vaesenc \T1, \XMM4, \XMM4 | ||
2017 | vaesenc \T1, \XMM5, \XMM5 | ||
2018 | vaesenc \T1, \XMM6, \XMM6 | ||
2019 | vaesenc \T1, \XMM7, \XMM7 | ||
2020 | vaesenc \T1, \XMM8, \XMM8 | ||
2021 | |||
2022 | ####################################################################### | ||
2023 | |||
2024 | vmovdqa TMP3(%rsp), \T1 | ||
2025 | vmovdqa HashKey_6(arg1), \T5 | ||
2026 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2027 | vpxor \T3, \T4, \T4 | ||
2028 | |||
2029 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2030 | vpxor \T3, \T7, \T7 | ||
2031 | |||
2032 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2033 | vpxor \T3, \T6, \T6 | ||
2034 | |||
2035 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2036 | vpxor \T3, \T6, \T6 | ||
2037 | |||
2038 | vmovdqu 16*5(arg1), \T1 | ||
2039 | vaesenc \T1, \XMM1, \XMM1 | ||
2040 | vaesenc \T1, \XMM2, \XMM2 | ||
2041 | vaesenc \T1, \XMM3, \XMM3 | ||
2042 | vaesenc \T1, \XMM4, \XMM4 | ||
2043 | vaesenc \T1, \XMM5, \XMM5 | ||
2044 | vaesenc \T1, \XMM6, \XMM6 | ||
2045 | vaesenc \T1, \XMM7, \XMM7 | ||
2046 | vaesenc \T1, \XMM8, \XMM8 | ||
2047 | |||
2048 | vmovdqa TMP4(%rsp), \T1 | ||
2049 | vmovdqa HashKey_5(arg1), \T5 | ||
2050 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2051 | vpxor \T3, \T4, \T4 | ||
2052 | |||
2053 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2054 | vpxor \T3, \T7, \T7 | ||
2055 | |||
2056 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2057 | vpxor \T3, \T6, \T6 | ||
2058 | |||
2059 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2060 | vpxor \T3, \T6, \T6 | ||
2061 | |||
2062 | vmovdqu 16*6(arg1), \T1 | ||
2063 | vaesenc \T1, \XMM1, \XMM1 | ||
2064 | vaesenc \T1, \XMM2, \XMM2 | ||
2065 | vaesenc \T1, \XMM3, \XMM3 | ||
2066 | vaesenc \T1, \XMM4, \XMM4 | ||
2067 | vaesenc \T1, \XMM5, \XMM5 | ||
2068 | vaesenc \T1, \XMM6, \XMM6 | ||
2069 | vaesenc \T1, \XMM7, \XMM7 | ||
2070 | vaesenc \T1, \XMM8, \XMM8 | ||
2071 | |||
2072 | |||
2073 | vmovdqa TMP5(%rsp), \T1 | ||
2074 | vmovdqa HashKey_4(arg1), \T5 | ||
2075 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2076 | vpxor \T3, \T4, \T4 | ||
2077 | |||
2078 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2079 | vpxor \T3, \T7, \T7 | ||
2080 | |||
2081 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2082 | vpxor \T3, \T6, \T6 | ||
2083 | |||
2084 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2085 | vpxor \T3, \T6, \T6 | ||
2086 | |||
2087 | vmovdqu 16*7(arg1), \T1 | ||
2088 | vaesenc \T1, \XMM1, \XMM1 | ||
2089 | vaesenc \T1, \XMM2, \XMM2 | ||
2090 | vaesenc \T1, \XMM3, \XMM3 | ||
2091 | vaesenc \T1, \XMM4, \XMM4 | ||
2092 | vaesenc \T1, \XMM5, \XMM5 | ||
2093 | vaesenc \T1, \XMM6, \XMM6 | ||
2094 | vaesenc \T1, \XMM7, \XMM7 | ||
2095 | vaesenc \T1, \XMM8, \XMM8 | ||
2096 | |||
2097 | vmovdqa TMP6(%rsp), \T1 | ||
2098 | vmovdqa HashKey_3(arg1), \T5 | ||
2099 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2100 | vpxor \T3, \T4, \T4 | ||
2101 | |||
2102 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2103 | vpxor \T3, \T7, \T7 | ||
2104 | |||
2105 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2106 | vpxor \T3, \T6, \T6 | ||
2107 | |||
2108 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2109 | vpxor \T3, \T6, \T6 | ||
2110 | |||
2111 | vmovdqu 16*8(arg1), \T1 | ||
2112 | vaesenc \T1, \XMM1, \XMM1 | ||
2113 | vaesenc \T1, \XMM2, \XMM2 | ||
2114 | vaesenc \T1, \XMM3, \XMM3 | ||
2115 | vaesenc \T1, \XMM4, \XMM4 | ||
2116 | vaesenc \T1, \XMM5, \XMM5 | ||
2117 | vaesenc \T1, \XMM6, \XMM6 | ||
2118 | vaesenc \T1, \XMM7, \XMM7 | ||
2119 | vaesenc \T1, \XMM8, \XMM8 | ||
2120 | |||
2121 | vmovdqa TMP7(%rsp), \T1 | ||
2122 | vmovdqa HashKey_2(arg1), \T5 | ||
2123 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2124 | vpxor \T3, \T4, \T4 | ||
2125 | |||
2126 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2127 | vpxor \T3, \T7, \T7 | ||
2128 | |||
2129 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2130 | vpxor \T3, \T6, \T6 | ||
2131 | |||
2132 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2133 | vpxor \T3, \T6, \T6 | ||
2134 | |||
2135 | |||
2136 | ####################################################################### | ||
2137 | |||
2138 | vmovdqu 16*9(arg1), \T5 | ||
2139 | vaesenc \T5, \XMM1, \XMM1 | ||
2140 | vaesenc \T5, \XMM2, \XMM2 | ||
2141 | vaesenc \T5, \XMM3, \XMM3 | ||
2142 | vaesenc \T5, \XMM4, \XMM4 | ||
2143 | vaesenc \T5, \XMM5, \XMM5 | ||
2144 | vaesenc \T5, \XMM6, \XMM6 | ||
2145 | vaesenc \T5, \XMM7, \XMM7 | ||
2146 | vaesenc \T5, \XMM8, \XMM8 | ||
2147 | |||
2148 | vmovdqa TMP8(%rsp), \T1 | ||
2149 | vmovdqa HashKey(arg1), \T5 | ||
2150 | |||
2151 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2152 | vpxor \T3, \T7, \T7 | ||
2153 | |||
2154 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2155 | vpxor \T3, \T6, \T6 | ||
2156 | |||
2157 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2158 | vpxor \T3, \T6, \T6 | ||
2159 | |||
2160 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2161 | vpxor \T3, \T4, \T1 | ||
2162 | |||
2163 | |||
2164 | vmovdqu 16*10(arg1), \T5 | ||
2165 | |||
2166 | i = 0 | ||
2167 | j = 1 | ||
2168 | setreg | ||
2169 | .rep 8 | ||
2170 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
2171 | .if \ENC_DEC == ENC | ||
2172 | vaesenclast \T2, reg_j, reg_j | ||
2173 | .else | ||
2174 | vaesenclast \T2, reg_j, \T3 | ||
2175 | vmovdqu 16*i(arg3, %r11), reg_j | ||
2176 | vmovdqu \T3, 16*i(arg2, %r11) | ||
2177 | .endif | ||
2178 | i = (i+1) | ||
2179 | j = (j+1) | ||
2180 | setreg | ||
2181 | .endr | ||
2182 | ####################################################################### | ||
2183 | |||
2184 | |||
2185 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
2186 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
2187 | vpxor \T3, \T7, \T7 | ||
2188 | vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 | ||
2189 | |||
2190 | |||
2191 | |||
2192 | ####################################################################### | ||
2193 | #first phase of the reduction | ||
2194 | vmovdqa POLY2(%rip), \T3 | ||
2195 | |||
2196 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2197 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2198 | |||
2199 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2200 | ####################################################################### | ||
2201 | .if \ENC_DEC == ENC | ||
2202 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
2203 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
2204 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
2205 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
2206 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
2207 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
2208 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
2209 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
2210 | .endif | ||
2211 | |||
2212 | ####################################################################### | ||
2213 | #second phase of the reduction | ||
2214 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2215 | vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2216 | |||
2217 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2218 | vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2219 | |||
2220 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2221 | ####################################################################### | ||
2222 | vpxor \T4, \T1, \T1 # the result is in T1 | ||
2223 | |||
2224 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
2225 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
2226 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
2227 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
2228 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
2229 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
2230 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
2231 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
2232 | |||
2233 | |||
2234 | vpxor \T1, \XMM1, \XMM1 | ||
2235 | |||
2236 | |||
2237 | |||
2238 | .endm | ||
2239 | |||
2240 | |||
2241 | # GHASH the last 4 ciphertext blocks. | ||
2242 | .macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
2243 | |||
2244 | ## Karatsuba Method | ||
2245 | |||
2246 | vmovdqa HashKey_8(arg1), \T5 | ||
2247 | |||
2248 | vpshufd $0b01001110, \XMM1, \T2 | ||
2249 | vpshufd $0b01001110, \T5, \T3 | ||
2250 | vpxor \XMM1, \T2, \T2 | ||
2251 | vpxor \T5, \T3, \T3 | ||
2252 | |||
2253 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
2254 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
2255 | |||
2256 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
2257 | |||
2258 | ###################### | ||
2259 | |||
2260 | vmovdqa HashKey_7(arg1), \T5 | ||
2261 | vpshufd $0b01001110, \XMM2, \T2 | ||
2262 | vpshufd $0b01001110, \T5, \T3 | ||
2263 | vpxor \XMM2, \T2, \T2 | ||
2264 | vpxor \T5, \T3, \T3 | ||
2265 | |||
2266 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
2267 | vpxor \T4, \T6, \T6 | ||
2268 | |||
2269 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
2270 | vpxor \T4, \T7, \T7 | ||
2271 | |||
2272 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2273 | |||
2274 | vpxor \T2, \XMM1, \XMM1 | ||
2275 | |||
2276 | ###################### | ||
2277 | |||
2278 | vmovdqa HashKey_6(arg1), \T5 | ||
2279 | vpshufd $0b01001110, \XMM3, \T2 | ||
2280 | vpshufd $0b01001110, \T5, \T3 | ||
2281 | vpxor \XMM3, \T2, \T2 | ||
2282 | vpxor \T5, \T3, \T3 | ||
2283 | |||
2284 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
2285 | vpxor \T4, \T6, \T6 | ||
2286 | |||
2287 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
2288 | vpxor \T4, \T7, \T7 | ||
2289 | |||
2290 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2291 | |||
2292 | vpxor \T2, \XMM1, \XMM1 | ||
2293 | |||
2294 | ###################### | ||
2295 | |||
2296 | vmovdqa HashKey_5(arg1), \T5 | ||
2297 | vpshufd $0b01001110, \XMM4, \T2 | ||
2298 | vpshufd $0b01001110, \T5, \T3 | ||
2299 | vpxor \XMM4, \T2, \T2 | ||
2300 | vpxor \T5, \T3, \T3 | ||
2301 | |||
2302 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
2303 | vpxor \T4, \T6, \T6 | ||
2304 | |||
2305 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
2306 | vpxor \T4, \T7, \T7 | ||
2307 | |||
2308 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2309 | |||
2310 | vpxor \T2, \XMM1, \XMM1 | ||
2311 | |||
2312 | ###################### | ||
2313 | |||
2314 | vmovdqa HashKey_4(arg1), \T5 | ||
2315 | vpshufd $0b01001110, \XMM5, \T2 | ||
2316 | vpshufd $0b01001110, \T5, \T3 | ||
2317 | vpxor \XMM5, \T2, \T2 | ||
2318 | vpxor \T5, \T3, \T3 | ||
2319 | |||
2320 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
2321 | vpxor \T4, \T6, \T6 | ||
2322 | |||
2323 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
2324 | vpxor \T4, \T7, \T7 | ||
2325 | |||
2326 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2327 | |||
2328 | vpxor \T2, \XMM1, \XMM1 | ||
2329 | |||
2330 | ###################### | ||
2331 | |||
2332 | vmovdqa HashKey_3(arg1), \T5 | ||
2333 | vpshufd $0b01001110, \XMM6, \T2 | ||
2334 | vpshufd $0b01001110, \T5, \T3 | ||
2335 | vpxor \XMM6, \T2, \T2 | ||
2336 | vpxor \T5, \T3, \T3 | ||
2337 | |||
2338 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
2339 | vpxor \T4, \T6, \T6 | ||
2340 | |||
2341 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
2342 | vpxor \T4, \T7, \T7 | ||
2343 | |||
2344 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2345 | |||
2346 | vpxor \T2, \XMM1, \XMM1 | ||
2347 | |||
2348 | ###################### | ||
2349 | |||
2350 | vmovdqa HashKey_2(arg1), \T5 | ||
2351 | vpshufd $0b01001110, \XMM7, \T2 | ||
2352 | vpshufd $0b01001110, \T5, \T3 | ||
2353 | vpxor \XMM7, \T2, \T2 | ||
2354 | vpxor \T5, \T3, \T3 | ||
2355 | |||
2356 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
2357 | vpxor \T4, \T6, \T6 | ||
2358 | |||
2359 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
2360 | vpxor \T4, \T7, \T7 | ||
2361 | |||
2362 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2363 | |||
2364 | vpxor \T2, \XMM1, \XMM1 | ||
2365 | |||
2366 | ###################### | ||
2367 | |||
2368 | vmovdqa HashKey(arg1), \T5 | ||
2369 | vpshufd $0b01001110, \XMM8, \T2 | ||
2370 | vpshufd $0b01001110, \T5, \T3 | ||
2371 | vpxor \XMM8, \T2, \T2 | ||
2372 | vpxor \T5, \T3, \T3 | ||
2373 | |||
2374 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
2375 | vpxor \T4, \T6, \T6 | ||
2376 | |||
2377 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
2378 | vpxor \T4, \T7, \T7 | ||
2379 | |||
2380 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2381 | |||
2382 | vpxor \T2, \XMM1, \XMM1 | ||
2383 | vpxor \T6, \XMM1, \XMM1 | ||
2384 | vpxor \T7, \XMM1, \T2 | ||
2385 | |||
2386 | |||
2387 | |||
2388 | |||
2389 | vpslldq $8, \T2, \T4 | ||
2390 | vpsrldq $8, \T2, \T2 | ||
2391 | |||
2392 | vpxor \T4, \T7, \T7 | ||
2393 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the | ||
2394 | # accumulated carry-less multiplications | ||
2395 | |||
2396 | ####################################################################### | ||
2397 | #first phase of the reduction | ||
2398 | vmovdqa POLY2(%rip), \T3 | ||
2399 | |||
2400 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2401 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2402 | |||
2403 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2404 | ####################################################################### | ||
2405 | |||
2406 | |||
2407 | #second phase of the reduction | ||
2408 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2409 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2410 | |||
2411 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2412 | vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2413 | |||
2414 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2415 | ####################################################################### | ||
2416 | vpxor \T4, \T6, \T6 # the result is in T6 | ||
2417 | .endm | ||
2418 | |||
2419 | |||
2420 | |||
2421 | # combined for GCM encrypt and decrypt functions | ||
2422 | # clobbering all xmm registers | ||
2423 | # clobbering r10, r11, r12, r13, r14, r15 | ||
2424 | .macro GCM_ENC_DEC_AVX2 ENC_DEC | ||
2425 | |||
2426 | #the number of pushes must equal STACK_OFFSET | ||
2427 | push %r12 | ||
2428 | push %r13 | ||
2429 | push %r14 | ||
2430 | push %r15 | ||
2431 | |||
2432 | mov %rsp, %r14 | ||
2433 | |||
2434 | |||
2435 | |||
2436 | |||
2437 | sub $VARIABLE_OFFSET, %rsp | ||
2438 | and $~63, %rsp # align rsp to 64 bytes | ||
2439 | |||
2440 | |||
2441 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
2442 | |||
2443 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
2444 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
2445 | |||
2446 | mov %r13, %r12 | ||
2447 | shr $4, %r12 | ||
2448 | and $7, %r12 | ||
2449 | jz _initial_num_blocks_is_0\@ | ||
2450 | |||
2451 | cmp $7, %r12 | ||
2452 | je _initial_num_blocks_is_7\@ | ||
2453 | cmp $6, %r12 | ||
2454 | je _initial_num_blocks_is_6\@ | ||
2455 | cmp $5, %r12 | ||
2456 | je _initial_num_blocks_is_5\@ | ||
2457 | cmp $4, %r12 | ||
2458 | je _initial_num_blocks_is_4\@ | ||
2459 | cmp $3, %r12 | ||
2460 | je _initial_num_blocks_is_3\@ | ||
2461 | cmp $2, %r12 | ||
2462 | je _initial_num_blocks_is_2\@ | ||
2463 | |||
2464 | jmp _initial_num_blocks_is_1\@ | ||
2465 | |||
2466 | _initial_num_blocks_is_7\@: | ||
2467 | INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2468 | sub $16*7, %r13 | ||
2469 | jmp _initial_blocks_encrypted\@ | ||
2470 | |||
2471 | _initial_num_blocks_is_6\@: | ||
2472 | INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2473 | sub $16*6, %r13 | ||
2474 | jmp _initial_blocks_encrypted\@ | ||
2475 | |||
2476 | _initial_num_blocks_is_5\@: | ||
2477 | INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2478 | sub $16*5, %r13 | ||
2479 | jmp _initial_blocks_encrypted\@ | ||
2480 | |||
2481 | _initial_num_blocks_is_4\@: | ||
2482 | INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2483 | sub $16*4, %r13 | ||
2484 | jmp _initial_blocks_encrypted\@ | ||
2485 | |||
2486 | _initial_num_blocks_is_3\@: | ||
2487 | INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2488 | sub $16*3, %r13 | ||
2489 | jmp _initial_blocks_encrypted\@ | ||
2490 | |||
2491 | _initial_num_blocks_is_2\@: | ||
2492 | INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2493 | sub $16*2, %r13 | ||
2494 | jmp _initial_blocks_encrypted\@ | ||
2495 | |||
2496 | _initial_num_blocks_is_1\@: | ||
2497 | INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2498 | sub $16*1, %r13 | ||
2499 | jmp _initial_blocks_encrypted\@ | ||
2500 | |||
2501 | _initial_num_blocks_is_0\@: | ||
2502 | INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2503 | |||
2504 | |||
2505 | _initial_blocks_encrypted\@: | ||
2506 | cmp $0, %r13 | ||
2507 | je _zero_cipher_left\@ | ||
2508 | |||
2509 | sub $128, %r13 | ||
2510 | je _eight_cipher_left\@ | ||
2511 | |||
2512 | |||
2513 | |||
2514 | |||
2515 | vmovd %xmm9, %r15d | ||
2516 | and $255, %r15d | ||
2517 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2518 | |||
2519 | |||
2520 | _encrypt_by_8_new\@: | ||
2521 | cmp $(255-8), %r15d | ||
2522 | jg _encrypt_by_8\@ | ||
2523 | |||
2524 | |||
2525 | |||
2526 | add $8, %r15b | ||
2527 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
2528 | add $128, %r11 | ||
2529 | sub $128, %r13 | ||
2530 | jne _encrypt_by_8_new\@ | ||
2531 | |||
2532 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2533 | jmp _eight_cipher_left\@ | ||
2534 | |||
2535 | _encrypt_by_8\@: | ||
2536 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2537 | add $8, %r15b | ||
2538 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
2539 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2540 | add $128, %r11 | ||
2541 | sub $128, %r13 | ||
2542 | jne _encrypt_by_8_new\@ | ||
2543 | |||
2544 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2545 | |||
2546 | |||
2547 | |||
2548 | |||
2549 | _eight_cipher_left\@: | ||
2550 | GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
2551 | |||
2552 | |||
2553 | _zero_cipher_left\@: | ||
2554 | cmp $16, arg4 | ||
2555 | jl _only_less_than_16\@ | ||
2556 | |||
2557 | mov arg4, %r13 | ||
2558 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2559 | |||
2560 | je _multiple_of_16_bytes\@ | ||
2561 | |||
2562 | # handle the last <16 Byte block seperately | ||
2563 | |||
2564 | |||
2565 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2566 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2567 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2568 | |||
2569 | sub $16, %r11 | ||
2570 | add %r13, %r11 | ||
2571 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
2572 | |||
2573 | lea SHIFT_MASK+16(%rip), %r12 | ||
2574 | sub %r13, %r12 # adjust the shuffle mask pointer | ||
2575 | # to be able to shift 16-r13 bytes | ||
2576 | # (r13 is the number of bytes in plaintext mod 16) | ||
2577 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
2578 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
2579 | jmp _final_ghash_mul\@ | ||
2580 | |||
2581 | _only_less_than_16\@: | ||
2582 | # check for 0 length | ||
2583 | mov arg4, %r13 | ||
2584 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2585 | |||
2586 | je _multiple_of_16_bytes\@ | ||
2587 | |||
2588 | # handle the last <16 Byte block seperately | ||
2589 | |||
2590 | |||
2591 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2592 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2593 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2594 | |||
2595 | |||
2596 | lea SHIFT_MASK+16(%rip), %r12 | ||
2597 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
2598 | # able to shift 16-r13 bytes (r13 is the | ||
2599 | # number of bytes in plaintext mod 16) | ||
2600 | |||
2601 | _get_last_16_byte_loop\@: | ||
2602 | movb (arg3, %r11), %al | ||
2603 | movb %al, TMP1 (%rsp , %r11) | ||
2604 | add $1, %r11 | ||
2605 | cmp %r13, %r11 | ||
2606 | jne _get_last_16_byte_loop\@ | ||
2607 | |||
2608 | vmovdqu TMP1(%rsp), %xmm1 | ||
2609 | |||
2610 | sub $16, %r11 | ||
2611 | |||
2612 | _final_ghash_mul\@: | ||
2613 | .if \ENC_DEC == DEC | ||
2614 | vmovdqa %xmm1, %xmm2 | ||
2615 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2616 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2617 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2618 | vpand %xmm1, %xmm2, %xmm2 | ||
2619 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
2620 | vpxor %xmm2, %xmm14, %xmm14 | ||
2621 | #GHASH computation for the last <16 Byte block | ||
2622 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2623 | sub %r13, %r11 | ||
2624 | add $16, %r11 | ||
2625 | .else | ||
2626 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2627 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2628 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2629 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2630 | vpxor %xmm9, %xmm14, %xmm14 | ||
2631 | #GHASH computation for the last <16 Byte block | ||
2632 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2633 | sub %r13, %r11 | ||
2634 | add $16, %r11 | ||
2635 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
2636 | .endif | ||
2637 | |||
2638 | |||
2639 | ############################# | ||
2640 | # output r13 Bytes | ||
2641 | vmovq %xmm9, %rax | ||
2642 | cmp $8, %r13 | ||
2643 | jle _less_than_8_bytes_left\@ | ||
2644 | |||
2645 | mov %rax, (arg2 , %r11) | ||
2646 | add $8, %r11 | ||
2647 | vpsrldq $8, %xmm9, %xmm9 | ||
2648 | vmovq %xmm9, %rax | ||
2649 | sub $8, %r13 | ||
2650 | |||
2651 | _less_than_8_bytes_left\@: | ||
2652 | movb %al, (arg2 , %r11) | ||
2653 | add $1, %r11 | ||
2654 | shr $8, %rax | ||
2655 | sub $1, %r13 | ||
2656 | jne _less_than_8_bytes_left\@ | ||
2657 | ############################# | ||
2658 | |||
2659 | _multiple_of_16_bytes\@: | ||
2660 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
2661 | shl $3, %r12 # convert into number of bits | ||
2662 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
2663 | |||
2664 | shl $3, arg4 # len(C) in bits (*128) | ||
2665 | vmovq arg4, %xmm1 | ||
2666 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
2667 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
2668 | |||
2669 | vpxor %xmm15, %xmm14, %xmm14 | ||
2670 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
2671 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
2672 | |||
2673 | mov arg5, %rax # rax = *Y0 | ||
2674 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
2675 | |||
2676 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
2677 | |||
2678 | vpxor %xmm14, %xmm9, %xmm9 | ||
2679 | |||
2680 | |||
2681 | |||
2682 | _return_T\@: | ||
2683 | mov arg8, %r10 # r10 = authTag | ||
2684 | mov arg9, %r11 # r11 = auth_tag_len | ||
2685 | |||
2686 | cmp $16, %r11 | ||
2687 | je _T_16\@ | ||
2688 | |||
2689 | cmp $12, %r11 | ||
2690 | je _T_12\@ | ||
2691 | |||
2692 | _T_8\@: | ||
2693 | vmovq %xmm9, %rax | ||
2694 | mov %rax, (%r10) | ||
2695 | jmp _return_T_done\@ | ||
2696 | _T_12\@: | ||
2697 | vmovq %xmm9, %rax | ||
2698 | mov %rax, (%r10) | ||
2699 | vpsrldq $8, %xmm9, %xmm9 | ||
2700 | vmovd %xmm9, %eax | ||
2701 | mov %eax, 8(%r10) | ||
2702 | jmp _return_T_done\@ | ||
2703 | |||
2704 | _T_16\@: | ||
2705 | vmovdqu %xmm9, (%r10) | ||
2706 | |||
2707 | _return_T_done\@: | ||
2708 | mov %r14, %rsp | ||
2709 | |||
2710 | pop %r15 | ||
2711 | pop %r14 | ||
2712 | pop %r13 | ||
2713 | pop %r12 | ||
2714 | .endm | ||
2715 | |||
2716 | |||
2717 | ############################################################# | ||
2718 | #void aesni_gcm_precomp_avx_gen4 | ||
2719 | # (gcm_data *my_ctx_data, | ||
2720 | # u8 *hash_subkey)# /* H, the Hash sub key input. | ||
2721 | # Data starts on a 16-byte boundary. */ | ||
2722 | ############################################################# | ||
2723 | ENTRY(aesni_gcm_precomp_avx_gen4) | ||
2724 | #the number of pushes must equal STACK_OFFSET | ||
2725 | push %r12 | ||
2726 | push %r13 | ||
2727 | push %r14 | ||
2728 | push %r15 | ||
2729 | |||
2730 | mov %rsp, %r14 | ||
2731 | |||
2732 | |||
2733 | |||
2734 | sub $VARIABLE_OFFSET, %rsp | ||
2735 | and $~63, %rsp # align rsp to 64 bytes | ||
2736 | |||
2737 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
2738 | |||
2739 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
2740 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
2741 | vmovdqa %xmm6, %xmm2 | ||
2742 | vpsllq $1, %xmm6, %xmm6 | ||
2743 | vpsrlq $63, %xmm2, %xmm2 | ||
2744 | vmovdqa %xmm2, %xmm1 | ||
2745 | vpslldq $8, %xmm2, %xmm2 | ||
2746 | vpsrldq $8, %xmm1, %xmm1 | ||
2747 | vpor %xmm2, %xmm6, %xmm6 | ||
2748 | #reduction | ||
2749 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
2750 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
2751 | vpand POLY(%rip), %xmm2, %xmm2 | ||
2752 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
2753 | ####################################################################### | ||
2754 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
2755 | |||
2756 | |||
2757 | PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
2758 | |||
2759 | mov %r14, %rsp | ||
2760 | |||
2761 | pop %r15 | ||
2762 | pop %r14 | ||
2763 | pop %r13 | ||
2764 | pop %r12 | ||
2765 | ret | ||
2766 | ENDPROC(aesni_gcm_precomp_avx_gen4) | ||
2767 | |||
2768 | |||
2769 | ############################################################################### | ||
2770 | #void aesni_gcm_enc_avx_gen4( | ||
2771 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2772 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
2773 | # const u8 *in, /* Plaintext input */ | ||
2774 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2775 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2776 | # (from Security Association) concatenated with 8 byte | ||
2777 | # Initialisation Vector (from IPSec ESP Payload) | ||
2778 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2779 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2780 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2781 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2782 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2783 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2784 | ############################################################################### | ||
2785 | ENTRY(aesni_gcm_enc_avx_gen4) | ||
2786 | GCM_ENC_DEC_AVX2 ENC | ||
2787 | ret | ||
2788 | ENDPROC(aesni_gcm_enc_avx_gen4) | ||
2789 | |||
2790 | ############################################################################### | ||
2791 | #void aesni_gcm_dec_avx_gen4( | ||
2792 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2793 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
2794 | # const u8 *in, /* Ciphertext input */ | ||
2795 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2796 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2797 | # (from Security Association) concatenated with 8 byte | ||
2798 | # Initialisation Vector (from IPSec ESP Payload) | ||
2799 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2800 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2801 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2802 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2803 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2804 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2805 | ############################################################################### | ||
2806 | ENTRY(aesni_gcm_dec_avx_gen4) | ||
2807 | GCM_ENC_DEC_AVX2 DEC | ||
2808 | ret | ||
2809 | ENDPROC(aesni_gcm_dec_avx_gen4) | ||
2810 | |||
2811 | #endif /* CONFIG_AS_AVX2 */ | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 835488b745ee..948ad0e77741 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | |||
101 | int crypto_fpu_init(void); | 101 | int crypto_fpu_init(void); |
102 | void crypto_fpu_exit(void); | 102 | void crypto_fpu_exit(void); |
103 | 103 | ||
104 | #define AVX_GEN2_OPTSIZE 640 | ||
105 | #define AVX_GEN4_OPTSIZE 4096 | ||
106 | |||
104 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
105 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 108 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
106 | const u8 *in, unsigned int len, u8 *iv); | 109 | const u8 *in, unsigned int len, u8 *iv); |
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | |||
150 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 153 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
151 | u8 *auth_tag, unsigned long auth_tag_len); | 154 | u8 *auth_tag, unsigned long auth_tag_len); |
152 | 155 | ||
156 | |||
157 | #ifdef CONFIG_AS_AVX | ||
158 | /* | ||
159 | * asmlinkage void aesni_gcm_precomp_avx_gen2() | ||
160 | * gcm_data *my_ctx_data, context data | ||
161 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
162 | */ | ||
163 | asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey); | ||
164 | |||
165 | asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out, | ||
166 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
167 | const u8 *aad, unsigned long aad_len, | ||
168 | u8 *auth_tag, unsigned long auth_tag_len); | ||
169 | |||
170 | asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, | ||
171 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
172 | const u8 *aad, unsigned long aad_len, | ||
173 | u8 *auth_tag, unsigned long auth_tag_len); | ||
174 | |||
175 | static void aesni_gcm_enc_avx(void *ctx, u8 *out, | ||
176 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
177 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
178 | u8 *auth_tag, unsigned long auth_tag_len) | ||
179 | { | ||
180 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
181 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
182 | aad_len, auth_tag, auth_tag_len); | ||
183 | } else { | ||
184 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
185 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
186 | aad_len, auth_tag, auth_tag_len); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void aesni_gcm_dec_avx(void *ctx, u8 *out, | ||
191 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
192 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
193 | u8 *auth_tag, unsigned long auth_tag_len) | ||
194 | { | ||
195 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
196 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, | ||
197 | aad_len, auth_tag, auth_tag_len); | ||
198 | } else { | ||
199 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
200 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
201 | aad_len, auth_tag, auth_tag_len); | ||
202 | } | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | #ifdef CONFIG_AS_AVX2 | ||
207 | /* | ||
208 | * asmlinkage void aesni_gcm_precomp_avx_gen4() | ||
209 | * gcm_data *my_ctx_data, context data | ||
210 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
211 | */ | ||
212 | asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey); | ||
213 | |||
214 | asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out, | ||
215 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
216 | const u8 *aad, unsigned long aad_len, | ||
217 | u8 *auth_tag, unsigned long auth_tag_len); | ||
218 | |||
219 | asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, | ||
220 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
221 | const u8 *aad, unsigned long aad_len, | ||
222 | u8 *auth_tag, unsigned long auth_tag_len); | ||
223 | |||
224 | static void aesni_gcm_enc_avx2(void *ctx, u8 *out, | ||
225 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
226 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
227 | u8 *auth_tag, unsigned long auth_tag_len) | ||
228 | { | ||
229 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
230 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
231 | aad_len, auth_tag, auth_tag_len); | ||
232 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { | ||
233 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
234 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
235 | aad_len, auth_tag, auth_tag_len); | ||
236 | } else { | ||
237 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
238 | aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad, | ||
239 | aad_len, auth_tag, auth_tag_len); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void aesni_gcm_dec_avx2(void *ctx, u8 *out, | ||
244 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
245 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
246 | u8 *auth_tag, unsigned long auth_tag_len) | ||
247 | { | ||
248 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
249 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, | ||
250 | aad, aad_len, auth_tag, auth_tag_len); | ||
251 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { | ||
252 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
253 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
254 | aad_len, auth_tag, auth_tag_len); | ||
255 | } else { | ||
256 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
257 | aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad, | ||
258 | aad_len, auth_tag, auth_tag_len); | ||
259 | } | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, | ||
264 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
265 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
266 | u8 *auth_tag, unsigned long auth_tag_len); | ||
267 | |||
268 | static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, | ||
269 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
270 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
271 | u8 *auth_tag, unsigned long auth_tag_len); | ||
272 | |||
153 | static inline struct | 273 | static inline struct |
154 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | 274 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) |
155 | { | 275 | { |
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
915 | dst = src; | 1035 | dst = src; |
916 | } | 1036 | } |
917 | 1037 | ||
918 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | 1038 | aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, |
919 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | 1039 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst |
920 | + ((unsigned long)req->cryptlen), auth_tag_len); | 1040 | + ((unsigned long)req->cryptlen), auth_tag_len); |
921 | 1041 | ||
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
996 | dst = src; | 1116 | dst = src; |
997 | } | 1117 | } |
998 | 1118 | ||
999 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | 1119 | aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, |
1000 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | 1120 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, |
1001 | authTag, auth_tag_len); | 1121 | authTag, auth_tag_len); |
1002 | 1122 | ||
1003 | /* Compare generated tag with passed in tag. */ | 1123 | /* Compare generated tag with passed in tag. */ |
1004 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | 1124 | retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ? |
1005 | -EBADMSG : 0; | 1125 | -EBADMSG : 0; |
1006 | 1126 | ||
1007 | if (one_entry_in_sg) { | 1127 | if (one_entry_in_sg) { |
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void) | |||
1353 | 1473 | ||
1354 | if (!x86_match_cpu(aesni_cpu_id)) | 1474 | if (!x86_match_cpu(aesni_cpu_id)) |
1355 | return -ENODEV; | 1475 | return -ENODEV; |
1476 | #ifdef CONFIG_X86_64 | ||
1477 | #ifdef CONFIG_AS_AVX2 | ||
1478 | if (boot_cpu_has(X86_FEATURE_AVX2)) { | ||
1479 | pr_info("AVX2 version of gcm_enc/dec engaged.\n"); | ||
1480 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx2; | ||
1481 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx2; | ||
1482 | } else | ||
1483 | #endif | ||
1484 | #ifdef CONFIG_AS_AVX | ||
1485 | if (boot_cpu_has(X86_FEATURE_AVX)) { | ||
1486 | pr_info("AVX version of gcm_enc/dec engaged.\n"); | ||
1487 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx; | ||
1488 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx; | ||
1489 | } else | ||
1490 | #endif | ||
1491 | { | ||
1492 | pr_info("SSE version of gcm_enc/dec engaged.\n"); | ||
1493 | aesni_gcm_enc_tfm = aesni_gcm_enc; | ||
1494 | aesni_gcm_dec_tfm = aesni_gcm_dec; | ||
1495 | } | ||
1496 | #endif | ||
1356 | 1497 | ||
1357 | err = crypto_fpu_init(); | 1498 | err = crypto_fpu_init(); |
1358 | if (err) | 1499 | if (err) |
diff --git a/crypto/Makefile b/crypto/Makefile index 989c510da8cc..b29402a7b9b5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -2,11 +2,6 @@ | |||
2 | # Cryptographic API | 2 | # Cryptographic API |
3 | # | 3 | # |
4 | 4 | ||
5 | # memneq MUST be built with -Os or -O0 to prevent early-return optimizations | ||
6 | # that will defeat memneq's actual purpose to prevent timing attacks. | ||
7 | CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3 | ||
8 | CFLAGS_memneq.o := -Os | ||
9 | |||
10 | obj-$(CONFIG_CRYPTO) += crypto.o | 5 | obj-$(CONFIG_CRYPTO) += crypto.o |
11 | crypto-y := api.o cipher.o compress.o memneq.o | 6 | crypto-y := api.o cipher.o compress.o memneq.o |
12 | 7 | ||
diff --git a/crypto/ahash.c b/crypto/ahash.c index 793a27f2493e..a92dc382f781 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c | |||
@@ -213,7 +213,10 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) | |||
213 | 213 | ||
214 | ahash_op_unaligned_finish(areq, err); | 214 | ahash_op_unaligned_finish(areq, err); |
215 | 215 | ||
216 | complete(data, err); | 216 | areq->base.complete = complete; |
217 | areq->base.data = data; | ||
218 | |||
219 | complete(&areq->base, err); | ||
217 | } | 220 | } |
218 | 221 | ||
219 | static int ahash_op_unaligned(struct ahash_request *req, | 222 | static int ahash_op_unaligned(struct ahash_request *req, |
diff --git a/crypto/memneq.c b/crypto/memneq.c index cd0162221c14..afed1bd16aee 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c | |||
@@ -72,6 +72,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) | |||
72 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | 72 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
73 | while (size >= sizeof(unsigned long)) { | 73 | while (size >= sizeof(unsigned long)) { |
74 | neq |= *(unsigned long *)a ^ *(unsigned long *)b; | 74 | neq |= *(unsigned long *)a ^ *(unsigned long *)b; |
75 | OPTIMIZER_HIDE_VAR(neq); | ||
75 | a += sizeof(unsigned long); | 76 | a += sizeof(unsigned long); |
76 | b += sizeof(unsigned long); | 77 | b += sizeof(unsigned long); |
77 | size -= sizeof(unsigned long); | 78 | size -= sizeof(unsigned long); |
@@ -79,6 +80,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) | |||
79 | #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | 80 | #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ |
80 | while (size > 0) { | 81 | while (size > 0) { |
81 | neq |= *(unsigned char *)a ^ *(unsigned char *)b; | 82 | neq |= *(unsigned char *)a ^ *(unsigned char *)b; |
83 | OPTIMIZER_HIDE_VAR(neq); | ||
82 | a += 1; | 84 | a += 1; |
83 | b += 1; | 85 | b += 1; |
84 | size -= 1; | 86 | size -= 1; |
@@ -89,33 +91,61 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) | |||
89 | /* Loop-free fast-path for frequently used 16-byte size */ | 91 | /* Loop-free fast-path for frequently used 16-byte size */ |
90 | static inline unsigned long __crypto_memneq_16(const void *a, const void *b) | 92 | static inline unsigned long __crypto_memneq_16(const void *a, const void *b) |
91 | { | 93 | { |
94 | unsigned long neq = 0; | ||
95 | |||
92 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | 96 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS |
93 | if (sizeof(unsigned long) == 8) | 97 | if (sizeof(unsigned long) == 8) { |
94 | return ((*(unsigned long *)(a) ^ *(unsigned long *)(b)) | 98 | neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b); |
95 | | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8))); | 99 | OPTIMIZER_HIDE_VAR(neq); |
96 | else if (sizeof(unsigned int) == 4) | 100 | neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8); |
97 | return ((*(unsigned int *)(a) ^ *(unsigned int *)(b)) | 101 | OPTIMIZER_HIDE_VAR(neq); |
98 | | (*(unsigned int *)(a+4) ^ *(unsigned int *)(b+4)) | 102 | } else if (sizeof(unsigned int) == 4) { |
99 | | (*(unsigned int *)(a+8) ^ *(unsigned int *)(b+8)) | 103 | neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b); |
100 | | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12))); | 104 | OPTIMIZER_HIDE_VAR(neq); |
101 | else | 105 | neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4); |
106 | OPTIMIZER_HIDE_VAR(neq); | ||
107 | neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8); | ||
108 | OPTIMIZER_HIDE_VAR(neq); | ||
109 | neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); | ||
110 | OPTIMIZER_HIDE_VAR(neq); | ||
111 | } else | ||
102 | #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | 112 | #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ |
103 | return ((*(unsigned char *)(a) ^ *(unsigned char *)(b)) | 113 | { |
104 | | (*(unsigned char *)(a+1) ^ *(unsigned char *)(b+1)) | 114 | neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); |
105 | | (*(unsigned char *)(a+2) ^ *(unsigned char *)(b+2)) | 115 | OPTIMIZER_HIDE_VAR(neq); |
106 | | (*(unsigned char *)(a+3) ^ *(unsigned char *)(b+3)) | 116 | neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); |
107 | | (*(unsigned char *)(a+4) ^ *(unsigned char *)(b+4)) | 117 | OPTIMIZER_HIDE_VAR(neq); |
108 | | (*(unsigned char *)(a+5) ^ *(unsigned char *)(b+5)) | 118 | neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2); |
109 | | (*(unsigned char *)(a+6) ^ *(unsigned char *)(b+6)) | 119 | OPTIMIZER_HIDE_VAR(neq); |
110 | | (*(unsigned char *)(a+7) ^ *(unsigned char *)(b+7)) | 120 | neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3); |
111 | | (*(unsigned char *)(a+8) ^ *(unsigned char *)(b+8)) | 121 | OPTIMIZER_HIDE_VAR(neq); |
112 | | (*(unsigned char *)(a+9) ^ *(unsigned char *)(b+9)) | 122 | neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4); |
113 | | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10)) | 123 | OPTIMIZER_HIDE_VAR(neq); |
114 | | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11)) | 124 | neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5); |
115 | | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12)) | 125 | OPTIMIZER_HIDE_VAR(neq); |
116 | | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13)) | 126 | neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6); |
117 | | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14)) | 127 | OPTIMIZER_HIDE_VAR(neq); |
118 | | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15))); | 128 | neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7); |
129 | OPTIMIZER_HIDE_VAR(neq); | ||
130 | neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8); | ||
131 | OPTIMIZER_HIDE_VAR(neq); | ||
132 | neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9); | ||
133 | OPTIMIZER_HIDE_VAR(neq); | ||
134 | neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10); | ||
135 | OPTIMIZER_HIDE_VAR(neq); | ||
136 | neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11); | ||
137 | OPTIMIZER_HIDE_VAR(neq); | ||
138 | neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12); | ||
139 | OPTIMIZER_HIDE_VAR(neq); | ||
140 | neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13); | ||
141 | OPTIMIZER_HIDE_VAR(neq); | ||
142 | neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14); | ||
143 | OPTIMIZER_HIDE_VAR(neq); | ||
144 | neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15); | ||
145 | OPTIMIZER_HIDE_VAR(neq); | ||
146 | } | ||
147 | |||
148 | return neq; | ||
119 | } | 149 | } |
120 | 150 | ||
121 | /* Compare two areas of memory without leaking timing information, | 151 | /* Compare two areas of memory without leaking timing information, |
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8c920cafe63..309d345ead95 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c | |||
@@ -78,7 +78,7 @@ static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu, | |||
78 | cpu = *cb_cpu; | 78 | cpu = *cb_cpu; |
79 | 79 | ||
80 | rcu_read_lock_bh(); | 80 | rcu_read_lock_bh(); |
81 | cpumask = rcu_dereference(pcrypt->cb_cpumask); | 81 | cpumask = rcu_dereference_bh(pcrypt->cb_cpumask); |
82 | if (cpumask_test_cpu(cpu, cpumask->mask)) | 82 | if (cpumask_test_cpu(cpu, cpumask->mask)) |
83 | goto out; | 83 | goto out; |
84 | 84 | ||
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 001f07cdb828..0d9003ae8c61 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c | |||
@@ -137,7 +137,272 @@ out: | |||
137 | return ret; | 137 | return ret; |
138 | } | 138 | } |
139 | 139 | ||
140 | static int test_aead_jiffies(struct aead_request *req, int enc, | ||
141 | int blen, int sec) | ||
142 | { | ||
143 | unsigned long start, end; | ||
144 | int bcount; | ||
145 | int ret; | ||
146 | |||
147 | for (start = jiffies, end = start + sec * HZ, bcount = 0; | ||
148 | time_before(jiffies, end); bcount++) { | ||
149 | if (enc) | ||
150 | ret = crypto_aead_encrypt(req); | ||
151 | else | ||
152 | ret = crypto_aead_decrypt(req); | ||
153 | |||
154 | if (ret) | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | printk("%d operations in %d seconds (%ld bytes)\n", | ||
159 | bcount, sec, (long)bcount * blen); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | static int test_aead_cycles(struct aead_request *req, int enc, int blen) | ||
164 | { | ||
165 | unsigned long cycles = 0; | ||
166 | int ret = 0; | ||
167 | int i; | ||
168 | |||
169 | local_irq_disable(); | ||
170 | |||
171 | /* Warm-up run. */ | ||
172 | for (i = 0; i < 4; i++) { | ||
173 | if (enc) | ||
174 | ret = crypto_aead_encrypt(req); | ||
175 | else | ||
176 | ret = crypto_aead_decrypt(req); | ||
177 | |||
178 | if (ret) | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | /* The real thing. */ | ||
183 | for (i = 0; i < 8; i++) { | ||
184 | cycles_t start, end; | ||
185 | |||
186 | start = get_cycles(); | ||
187 | if (enc) | ||
188 | ret = crypto_aead_encrypt(req); | ||
189 | else | ||
190 | ret = crypto_aead_decrypt(req); | ||
191 | end = get_cycles(); | ||
192 | |||
193 | if (ret) | ||
194 | goto out; | ||
195 | |||
196 | cycles += end - start; | ||
197 | } | ||
198 | |||
199 | out: | ||
200 | local_irq_enable(); | ||
201 | |||
202 | if (ret == 0) | ||
203 | printk("1 operation in %lu cycles (%d bytes)\n", | ||
204 | (cycles + 4) / 8, blen); | ||
205 | |||
206 | return ret; | ||
207 | } | ||
208 | |||
140 | static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; | 209 | static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; |
210 | static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; | ||
211 | |||
212 | #define XBUFSIZE 8 | ||
213 | #define MAX_IVLEN 32 | ||
214 | |||
215 | static int testmgr_alloc_buf(char *buf[XBUFSIZE]) | ||
216 | { | ||
217 | int i; | ||
218 | |||
219 | for (i = 0; i < XBUFSIZE; i++) { | ||
220 | buf[i] = (void *)__get_free_page(GFP_KERNEL); | ||
221 | if (!buf[i]) | ||
222 | goto err_free_buf; | ||
223 | } | ||
224 | |||
225 | return 0; | ||
226 | |||
227 | err_free_buf: | ||
228 | while (i-- > 0) | ||
229 | free_page((unsigned long)buf[i]); | ||
230 | |||
231 | return -ENOMEM; | ||
232 | } | ||
233 | |||
234 | static void testmgr_free_buf(char *buf[XBUFSIZE]) | ||
235 | { | ||
236 | int i; | ||
237 | |||
238 | for (i = 0; i < XBUFSIZE; i++) | ||
239 | free_page((unsigned long)buf[i]); | ||
240 | } | ||
241 | |||
242 | static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], | ||
243 | unsigned int buflen) | ||
244 | { | ||
245 | int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE; | ||
246 | int k, rem; | ||
247 | |||
248 | np = (np > XBUFSIZE) ? XBUFSIZE : np; | ||
249 | rem = buflen % PAGE_SIZE; | ||
250 | if (np > XBUFSIZE) { | ||
251 | rem = PAGE_SIZE; | ||
252 | np = XBUFSIZE; | ||
253 | } | ||
254 | sg_init_table(sg, np); | ||
255 | for (k = 0; k < np; ++k) { | ||
256 | if (k == (np-1)) | ||
257 | sg_set_buf(&sg[k], xbuf[k], rem); | ||
258 | else | ||
259 | sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | static void test_aead_speed(const char *algo, int enc, unsigned int sec, | ||
264 | struct aead_speed_template *template, | ||
265 | unsigned int tcount, u8 authsize, | ||
266 | unsigned int aad_size, u8 *keysize) | ||
267 | { | ||
268 | unsigned int i, j; | ||
269 | struct crypto_aead *tfm; | ||
270 | int ret = -ENOMEM; | ||
271 | const char *key; | ||
272 | struct aead_request *req; | ||
273 | struct scatterlist *sg; | ||
274 | struct scatterlist *asg; | ||
275 | struct scatterlist *sgout; | ||
276 | const char *e; | ||
277 | void *assoc; | ||
278 | char iv[MAX_IVLEN]; | ||
279 | char *xbuf[XBUFSIZE]; | ||
280 | char *xoutbuf[XBUFSIZE]; | ||
281 | char *axbuf[XBUFSIZE]; | ||
282 | unsigned int *b_size; | ||
283 | unsigned int iv_len; | ||
284 | |||
285 | if (enc == ENCRYPT) | ||
286 | e = "encryption"; | ||
287 | else | ||
288 | e = "decryption"; | ||
289 | |||
290 | if (testmgr_alloc_buf(xbuf)) | ||
291 | goto out_noxbuf; | ||
292 | if (testmgr_alloc_buf(axbuf)) | ||
293 | goto out_noaxbuf; | ||
294 | if (testmgr_alloc_buf(xoutbuf)) | ||
295 | goto out_nooutbuf; | ||
296 | |||
297 | sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL); | ||
298 | if (!sg) | ||
299 | goto out_nosg; | ||
300 | asg = &sg[8]; | ||
301 | sgout = &asg[8]; | ||
302 | |||
303 | |||
304 | printk(KERN_INFO "\ntesting speed of %s %s\n", algo, e); | ||
305 | |||
306 | tfm = crypto_alloc_aead(algo, 0, 0); | ||
307 | |||
308 | if (IS_ERR(tfm)) { | ||
309 | pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, | ||
310 | PTR_ERR(tfm)); | ||
311 | return; | ||
312 | } | ||
313 | |||
314 | req = aead_request_alloc(tfm, GFP_KERNEL); | ||
315 | if (!req) { | ||
316 | pr_err("alg: aead: Failed to allocate request for %s\n", | ||
317 | algo); | ||
318 | goto out; | ||
319 | } | ||
320 | |||
321 | i = 0; | ||
322 | do { | ||
323 | b_size = aead_sizes; | ||
324 | do { | ||
325 | assoc = axbuf[0]; | ||
326 | |||
327 | if (aad_size < PAGE_SIZE) | ||
328 | memset(assoc, 0xff, aad_size); | ||
329 | else { | ||
330 | pr_err("associate data length (%u) too big\n", | ||
331 | aad_size); | ||
332 | goto out_nosg; | ||
333 | } | ||
334 | sg_init_one(&asg[0], assoc, aad_size); | ||
335 | |||
336 | if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { | ||
337 | pr_err("template (%u) too big for tvmem (%lu)\n", | ||
338 | *keysize + *b_size, | ||
339 | TVMEMSIZE * PAGE_SIZE); | ||
340 | goto out; | ||
341 | } | ||
342 | |||
343 | key = tvmem[0]; | ||
344 | for (j = 0; j < tcount; j++) { | ||
345 | if (template[j].klen == *keysize) { | ||
346 | key = template[j].key; | ||
347 | break; | ||
348 | } | ||
349 | } | ||
350 | ret = crypto_aead_setkey(tfm, key, *keysize); | ||
351 | ret = crypto_aead_setauthsize(tfm, authsize); | ||
352 | |||
353 | iv_len = crypto_aead_ivsize(tfm); | ||
354 | if (iv_len) | ||
355 | memset(&iv, 0xff, iv_len); | ||
356 | |||
357 | crypto_aead_clear_flags(tfm, ~0); | ||
358 | printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", | ||
359 | i, *keysize * 8, *b_size); | ||
360 | |||
361 | |||
362 | memset(tvmem[0], 0xff, PAGE_SIZE); | ||
363 | |||
364 | if (ret) { | ||
365 | pr_err("setkey() failed flags=%x\n", | ||
366 | crypto_aead_get_flags(tfm)); | ||
367 | goto out; | ||
368 | } | ||
369 | |||
370 | sg_init_aead(&sg[0], xbuf, | ||
371 | *b_size + (enc ? authsize : 0)); | ||
372 | |||
373 | sg_init_aead(&sgout[0], xoutbuf, | ||
374 | *b_size + (enc ? authsize : 0)); | ||
375 | |||
376 | aead_request_set_crypt(req, sg, sgout, *b_size, iv); | ||
377 | aead_request_set_assoc(req, asg, aad_size); | ||
378 | |||
379 | if (sec) | ||
380 | ret = test_aead_jiffies(req, enc, *b_size, sec); | ||
381 | else | ||
382 | ret = test_aead_cycles(req, enc, *b_size); | ||
383 | |||
384 | if (ret) { | ||
385 | pr_err("%s() failed return code=%d\n", e, ret); | ||
386 | break; | ||
387 | } | ||
388 | b_size++; | ||
389 | i++; | ||
390 | } while (*b_size); | ||
391 | keysize++; | ||
392 | } while (*keysize); | ||
393 | |||
394 | out: | ||
395 | crypto_free_aead(tfm); | ||
396 | kfree(sg); | ||
397 | out_nosg: | ||
398 | testmgr_free_buf(xoutbuf); | ||
399 | out_nooutbuf: | ||
400 | testmgr_free_buf(axbuf); | ||
401 | out_noaxbuf: | ||
402 | testmgr_free_buf(xbuf); | ||
403 | out_noxbuf: | ||
404 | return; | ||
405 | } | ||
141 | 406 | ||
142 | static void test_cipher_speed(const char *algo, int enc, unsigned int sec, | 407 | static void test_cipher_speed(const char *algo, int enc, unsigned int sec, |
143 | struct cipher_speed_template *template, | 408 | struct cipher_speed_template *template, |
@@ -1427,6 +1692,11 @@ static int do_test(int m) | |||
1427 | speed_template_32_64); | 1692 | speed_template_32_64); |
1428 | break; | 1693 | break; |
1429 | 1694 | ||
1695 | case 211: | ||
1696 | test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, | ||
1697 | NULL, 0, 16, 8, aead_speed_template_20); | ||
1698 | break; | ||
1699 | |||
1430 | case 300: | 1700 | case 300: |
1431 | /* fall through */ | 1701 | /* fall through */ |
1432 | 1702 | ||
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index ecdeeb1a7b05..6c7e21a09f78 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h | |||
@@ -22,6 +22,11 @@ struct cipher_speed_template { | |||
22 | unsigned int klen; | 22 | unsigned int klen; |
23 | }; | 23 | }; |
24 | 24 | ||
25 | struct aead_speed_template { | ||
26 | const char *key; | ||
27 | unsigned int klen; | ||
28 | }; | ||
29 | |||
25 | struct hash_speed { | 30 | struct hash_speed { |
26 | unsigned int blen; /* buffer length */ | 31 | unsigned int blen; /* buffer length */ |
27 | unsigned int plen; /* per-update length */ | 32 | unsigned int plen; /* per-update length */ |
@@ -58,6 +63,11 @@ static u8 speed_template_32_48_64[] = {32, 48, 64, 0}; | |||
58 | static u8 speed_template_32_64[] = {32, 64, 0}; | 63 | static u8 speed_template_32_64[] = {32, 64, 0}; |
59 | 64 | ||
60 | /* | 65 | /* |
66 | * AEAD speed tests | ||
67 | */ | ||
68 | static u8 aead_speed_template_20[] = {20, 0}; | ||
69 | |||
70 | /* | ||
61 | * Digest speed tests | 71 | * Digest speed tests |
62 | */ | 72 | */ |
63 | static struct hash_speed generic_hash_speed_template[] = { | 73 | static struct hash_speed generic_hash_speed_template[] = { |
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index f4fd837bcb82..13857f5d28f7 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig | |||
@@ -289,16 +289,6 @@ config CRYPTO_DEV_SAHARA | |||
289 | This option enables support for the SAHARA HW crypto accelerator | 289 | This option enables support for the SAHARA HW crypto accelerator |
290 | found in some Freescale i.MX chips. | 290 | found in some Freescale i.MX chips. |
291 | 291 | ||
292 | config CRYPTO_DEV_DCP | ||
293 | tristate "Support for the DCP engine" | ||
294 | depends on ARCH_MXS && OF | ||
295 | select CRYPTO_BLKCIPHER | ||
296 | select CRYPTO_AES | ||
297 | select CRYPTO_CBC | ||
298 | help | ||
299 | This options enables support for the hardware crypto-acceleration | ||
300 | capabilities of the DCP co-processor | ||
301 | |||
302 | config CRYPTO_DEV_S5P | 292 | config CRYPTO_DEV_S5P |
303 | tristate "Support for Samsung S5PV210 crypto accelerator" | 293 | tristate "Support for Samsung S5PV210 crypto accelerator" |
304 | depends on ARCH_S5PV210 | 294 | depends on ARCH_S5PV210 |
@@ -399,4 +389,33 @@ config CRYPTO_DEV_ATMEL_SHA | |||
399 | To compile this driver as a module, choose M here: the module | 389 | To compile this driver as a module, choose M here: the module |
400 | will be called atmel-sha. | 390 | will be called atmel-sha. |
401 | 391 | ||
392 | config CRYPTO_DEV_CCP | ||
393 | bool "Support for AMD Cryptographic Coprocessor" | ||
394 | depends on X86 && PCI | ||
395 | default n | ||
396 | help | ||
397 | The AMD Cryptographic Coprocessor provides hardware support | ||
398 | for encryption, hashing and related operations. | ||
399 | |||
400 | if CRYPTO_DEV_CCP | ||
401 | source "drivers/crypto/ccp/Kconfig" | ||
402 | endif | ||
403 | |||
404 | config CRYPTO_DEV_MXS_DCP | ||
405 | tristate "Support for Freescale MXS DCP" | ||
406 | depends on ARCH_MXS | ||
407 | select CRYPTO_SHA1 | ||
408 | select CRYPTO_SHA256 | ||
409 | select CRYPTO_CBC | ||
410 | select CRYPTO_ECB | ||
411 | select CRYPTO_AES | ||
412 | select CRYPTO_BLKCIPHER | ||
413 | select CRYPTO_ALGAPI | ||
414 | help | ||
415 | The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB | ||
416 | co-processor on the die. | ||
417 | |||
418 | To compile this driver as a module, choose M here: the module | ||
419 | will be called mxs-dcp. | ||
420 | |||
402 | endif # CRYPTO_HW | 421 | endif # CRYPTO_HW |
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index b4946ddd2550..0bc6aa0a54d7 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile | |||
@@ -1,24 +1,25 @@ | |||
1 | obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o | 1 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o |
2 | obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o | 2 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o |
3 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o | ||
4 | obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o | ||
5 | obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/ | ||
6 | obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ | ||
3 | obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o | 7 | obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o |
4 | obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o | ||
5 | n2_crypto-y := n2_core.o n2_asm.o | ||
6 | obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o | 8 | obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o |
7 | obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o | ||
8 | obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o | ||
9 | obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/ | ||
10 | obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o | 9 | obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o |
11 | obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ | 10 | obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o |
12 | obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o | 11 | obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o |
12 | obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o | ||
13 | n2_crypto-y := n2_core.o n2_asm.o | ||
14 | obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ | ||
13 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o | 15 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o |
16 | obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o | ||
17 | obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o | ||
18 | obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o | ||
14 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o | 19 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o |
15 | obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o | 20 | obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ |
16 | obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o | ||
17 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o | 21 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o |
22 | obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o | ||
23 | obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o | ||
18 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o | 24 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o |
19 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ | 25 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ |
20 | obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o | ||
21 | obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ | ||
22 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o | ||
23 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o | ||
24 | obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o | ||
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index efaf6302405f..37f9cc98ba17 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c | |||
@@ -724,7 +724,6 @@ static void crypto4xx_stop_all(struct crypto4xx_core_device *core_dev) | |||
724 | crypto4xx_destroy_pdr(core_dev->dev); | 724 | crypto4xx_destroy_pdr(core_dev->dev); |
725 | crypto4xx_destroy_gdr(core_dev->dev); | 725 | crypto4xx_destroy_gdr(core_dev->dev); |
726 | crypto4xx_destroy_sdr(core_dev->dev); | 726 | crypto4xx_destroy_sdr(core_dev->dev); |
727 | dev_set_drvdata(core_dev->device, NULL); | ||
728 | iounmap(core_dev->dev->ce_base); | 727 | iounmap(core_dev->dev->ce_base); |
729 | kfree(core_dev->dev); | 728 | kfree(core_dev->dev); |
730 | kfree(core_dev); | 729 | kfree(core_dev); |
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 4cf5dec826e1..b71f2fd749df 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c | |||
@@ -467,24 +467,10 @@ static int aead_setkey(struct crypto_aead *aead, | |||
467 | static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; | 467 | static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; |
468 | struct caam_ctx *ctx = crypto_aead_ctx(aead); | 468 | struct caam_ctx *ctx = crypto_aead_ctx(aead); |
469 | struct device *jrdev = ctx->jrdev; | 469 | struct device *jrdev = ctx->jrdev; |
470 | struct rtattr *rta = (void *)key; | 470 | struct crypto_authenc_keys keys; |
471 | struct crypto_authenc_key_param *param; | ||
472 | unsigned int authkeylen; | ||
473 | unsigned int enckeylen; | ||
474 | int ret = 0; | 471 | int ret = 0; |
475 | 472 | ||
476 | param = RTA_DATA(rta); | 473 | if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) |
477 | enckeylen = be32_to_cpu(param->enckeylen); | ||
478 | |||
479 | key += RTA_ALIGN(rta->rta_len); | ||
480 | keylen -= RTA_ALIGN(rta->rta_len); | ||
481 | |||
482 | if (keylen < enckeylen) | ||
483 | goto badkey; | ||
484 | |||
485 | authkeylen = keylen - enckeylen; | ||
486 | |||
487 | if (keylen > CAAM_MAX_KEY_SIZE) | ||
488 | goto badkey; | 474 | goto badkey; |
489 | 475 | ||
490 | /* Pick class 2 key length from algorithm submask */ | 476 | /* Pick class 2 key length from algorithm submask */ |
@@ -492,25 +478,29 @@ static int aead_setkey(struct crypto_aead *aead, | |||
492 | OP_ALG_ALGSEL_SHIFT] * 2; | 478 | OP_ALG_ALGSEL_SHIFT] * 2; |
493 | ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); | 479 | ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); |
494 | 480 | ||
481 | if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE) | ||
482 | goto badkey; | ||
483 | |||
495 | #ifdef DEBUG | 484 | #ifdef DEBUG |
496 | printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", | 485 | printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", |
497 | keylen, enckeylen, authkeylen); | 486 | keys.authkeylen + keys.enckeylen, keys.enckeylen, |
487 | keys.authkeylen); | ||
498 | printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", | 488 | printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", |
499 | ctx->split_key_len, ctx->split_key_pad_len); | 489 | ctx->split_key_len, ctx->split_key_pad_len); |
500 | print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", | 490 | print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", |
501 | DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); | 491 | DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); |
502 | #endif | 492 | #endif |
503 | 493 | ||
504 | ret = gen_split_aead_key(ctx, key, authkeylen); | 494 | ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen); |
505 | if (ret) { | 495 | if (ret) { |
506 | goto badkey; | 496 | goto badkey; |
507 | } | 497 | } |
508 | 498 | ||
509 | /* postpend encryption key to auth split key */ | 499 | /* postpend encryption key to auth split key */ |
510 | memcpy(ctx->key + ctx->split_key_pad_len, key + authkeylen, enckeylen); | 500 | memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen); |
511 | 501 | ||
512 | ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + | 502 | ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + |
513 | enckeylen, DMA_TO_DEVICE); | 503 | keys.enckeylen, DMA_TO_DEVICE); |
514 | if (dma_mapping_error(jrdev, ctx->key_dma)) { | 504 | if (dma_mapping_error(jrdev, ctx->key_dma)) { |
515 | dev_err(jrdev, "unable to map key i/o memory\n"); | 505 | dev_err(jrdev, "unable to map key i/o memory\n"); |
516 | return -ENOMEM; | 506 | return -ENOMEM; |
@@ -518,15 +508,15 @@ static int aead_setkey(struct crypto_aead *aead, | |||
518 | #ifdef DEBUG | 508 | #ifdef DEBUG |
519 | print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", | 509 | print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", |
520 | DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, | 510 | DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, |
521 | ctx->split_key_pad_len + enckeylen, 1); | 511 | ctx->split_key_pad_len + keys.enckeylen, 1); |
522 | #endif | 512 | #endif |
523 | 513 | ||
524 | ctx->enckeylen = enckeylen; | 514 | ctx->enckeylen = keys.enckeylen; |
525 | 515 | ||
526 | ret = aead_set_sh_desc(aead); | 516 | ret = aead_set_sh_desc(aead); |
527 | if (ret) { | 517 | if (ret) { |
528 | dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len + | 518 | dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len + |
529 | enckeylen, DMA_TO_DEVICE); | 519 | keys.enckeylen, DMA_TO_DEVICE); |
530 | } | 520 | } |
531 | 521 | ||
532 | return ret; | 522 | return ret; |
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig new file mode 100644 index 000000000000..7639ffc36c68 --- /dev/null +++ b/drivers/crypto/ccp/Kconfig | |||
@@ -0,0 +1,24 @@ | |||
1 | config CRYPTO_DEV_CCP_DD | ||
2 | tristate "Cryptographic Coprocessor device driver" | ||
3 | depends on CRYPTO_DEV_CCP | ||
4 | default m | ||
5 | select HW_RANDOM | ||
6 | help | ||
7 | Provides the interface to use the AMD Cryptographic Coprocessor | ||
8 | which can be used to accelerate or offload encryption operations | ||
9 | such as SHA, AES and more. If you choose 'M' here, this module | ||
10 | will be called ccp. | ||
11 | |||
12 | config CRYPTO_DEV_CCP_CRYPTO | ||
13 | tristate "Encryption and hashing acceleration support" | ||
14 | depends on CRYPTO_DEV_CCP_DD | ||
15 | default m | ||
16 | select CRYPTO_ALGAPI | ||
17 | select CRYPTO_HASH | ||
18 | select CRYPTO_BLKCIPHER | ||
19 | select CRYPTO_AUTHENC | ||
20 | help | ||
21 | Support for using the cryptographic API with the AMD Cryptographic | ||
22 | Coprocessor. This module supports acceleration and offload of SHA | ||
23 | and AES algorithms. If you choose 'M' here, this module will be | ||
24 | called ccp_crypto. | ||
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile new file mode 100644 index 000000000000..d3505a018720 --- /dev/null +++ b/drivers/crypto/ccp/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o | ||
2 | ccp-objs := ccp-dev.o ccp-ops.o | ||
3 | ccp-objs += ccp-pci.o | ||
4 | |||
5 | obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o | ||
6 | ccp-crypto-objs := ccp-crypto-main.o \ | ||
7 | ccp-crypto-aes.o \ | ||
8 | ccp-crypto-aes-cmac.o \ | ||
9 | ccp-crypto-aes-xts.o \ | ||
10 | ccp-crypto-sha.o | ||
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c new file mode 100644 index 000000000000..8e162ad82085 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c | |||
@@ -0,0 +1,365 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/scatterlist.h> | ||
17 | #include <linux/crypto.h> | ||
18 | #include <crypto/algapi.h> | ||
19 | #include <crypto/aes.h> | ||
20 | #include <crypto/hash.h> | ||
21 | #include <crypto/internal/hash.h> | ||
22 | #include <crypto/scatterwalk.h> | ||
23 | |||
24 | #include "ccp-crypto.h" | ||
25 | |||
26 | |||
27 | static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, | ||
28 | int ret) | ||
29 | { | ||
30 | struct ahash_request *req = ahash_request_cast(async_req); | ||
31 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
32 | struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); | ||
33 | unsigned int digest_size = crypto_ahash_digestsize(tfm); | ||
34 | |||
35 | if (ret) | ||
36 | goto e_free; | ||
37 | |||
38 | if (rctx->hash_rem) { | ||
39 | /* Save remaining data to buffer */ | ||
40 | unsigned int offset = rctx->nbytes - rctx->hash_rem; | ||
41 | scatterwalk_map_and_copy(rctx->buf, rctx->src, | ||
42 | offset, rctx->hash_rem, 0); | ||
43 | rctx->buf_count = rctx->hash_rem; | ||
44 | } else | ||
45 | rctx->buf_count = 0; | ||
46 | |||
47 | /* Update result area if supplied */ | ||
48 | if (req->result) | ||
49 | memcpy(req->result, rctx->iv, digest_size); | ||
50 | |||
51 | e_free: | ||
52 | sg_free_table(&rctx->data_sg); | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, | ||
58 | unsigned int final) | ||
59 | { | ||
60 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
61 | struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); | ||
62 | struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); | ||
63 | struct scatterlist *sg, *cmac_key_sg = NULL; | ||
64 | unsigned int block_size = | ||
65 | crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); | ||
66 | unsigned int need_pad, sg_count; | ||
67 | gfp_t gfp; | ||
68 | u64 len; | ||
69 | int ret; | ||
70 | |||
71 | if (!ctx->u.aes.key_len) | ||
72 | return -EINVAL; | ||
73 | |||
74 | if (nbytes) | ||
75 | rctx->null_msg = 0; | ||
76 | |||
77 | len = (u64)rctx->buf_count + (u64)nbytes; | ||
78 | |||
79 | if (!final && (len <= block_size)) { | ||
80 | scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, | ||
81 | 0, nbytes, 0); | ||
82 | rctx->buf_count += nbytes; | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | rctx->src = req->src; | ||
88 | rctx->nbytes = nbytes; | ||
89 | |||
90 | rctx->final = final; | ||
91 | rctx->hash_rem = final ? 0 : len & (block_size - 1); | ||
92 | rctx->hash_cnt = len - rctx->hash_rem; | ||
93 | if (!final && !rctx->hash_rem) { | ||
94 | /* CCP can't do zero length final, so keep some data around */ | ||
95 | rctx->hash_cnt -= block_size; | ||
96 | rctx->hash_rem = block_size; | ||
97 | } | ||
98 | |||
99 | if (final && (rctx->null_msg || (len & (block_size - 1)))) | ||
100 | need_pad = 1; | ||
101 | else | ||
102 | need_pad = 0; | ||
103 | |||
104 | sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv)); | ||
105 | |||
106 | /* Build the data scatterlist table - allocate enough entries for all | ||
107 | * possible data pieces (buffer, input data, padding) | ||
108 | */ | ||
109 | sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2; | ||
110 | gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? | ||
111 | GFP_KERNEL : GFP_ATOMIC; | ||
112 | ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); | ||
113 | if (ret) | ||
114 | return ret; | ||
115 | |||
116 | sg = NULL; | ||
117 | if (rctx->buf_count) { | ||
118 | sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); | ||
119 | sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); | ||
120 | } | ||
121 | |||
122 | if (nbytes) | ||
123 | sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); | ||
124 | |||
125 | if (need_pad) { | ||
126 | int pad_length = block_size - (len & (block_size - 1)); | ||
127 | |||
128 | rctx->hash_cnt += pad_length; | ||
129 | |||
130 | memset(rctx->pad, 0, sizeof(rctx->pad)); | ||
131 | rctx->pad[0] = 0x80; | ||
132 | sg_init_one(&rctx->pad_sg, rctx->pad, pad_length); | ||
133 | sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg); | ||
134 | } | ||
135 | if (sg) { | ||
136 | sg_mark_end(sg); | ||
137 | sg = rctx->data_sg.sgl; | ||
138 | } | ||
139 | |||
140 | /* Initialize the K1/K2 scatterlist */ | ||
141 | if (final) | ||
142 | cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg | ||
143 | : &ctx->u.aes.k1_sg; | ||
144 | |||
145 | memset(&rctx->cmd, 0, sizeof(rctx->cmd)); | ||
146 | INIT_LIST_HEAD(&rctx->cmd.entry); | ||
147 | rctx->cmd.engine = CCP_ENGINE_AES; | ||
148 | rctx->cmd.u.aes.type = ctx->u.aes.type; | ||
149 | rctx->cmd.u.aes.mode = ctx->u.aes.mode; | ||
150 | rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT; | ||
151 | rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; | ||
152 | rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; | ||
153 | rctx->cmd.u.aes.iv = &rctx->iv_sg; | ||
154 | rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE; | ||
155 | rctx->cmd.u.aes.src = sg; | ||
156 | rctx->cmd.u.aes.src_len = rctx->hash_cnt; | ||
157 | rctx->cmd.u.aes.dst = NULL; | ||
158 | rctx->cmd.u.aes.cmac_key = cmac_key_sg; | ||
159 | rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len; | ||
160 | rctx->cmd.u.aes.cmac_final = final; | ||
161 | |||
162 | ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); | ||
163 | |||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | static int ccp_aes_cmac_init(struct ahash_request *req) | ||
168 | { | ||
169 | struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); | ||
170 | |||
171 | memset(rctx, 0, sizeof(*rctx)); | ||
172 | |||
173 | rctx->null_msg = 1; | ||
174 | |||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static int ccp_aes_cmac_update(struct ahash_request *req) | ||
179 | { | ||
180 | return ccp_do_cmac_update(req, req->nbytes, 0); | ||
181 | } | ||
182 | |||
183 | static int ccp_aes_cmac_final(struct ahash_request *req) | ||
184 | { | ||
185 | return ccp_do_cmac_update(req, 0, 1); | ||
186 | } | ||
187 | |||
188 | static int ccp_aes_cmac_finup(struct ahash_request *req) | ||
189 | { | ||
190 | return ccp_do_cmac_update(req, req->nbytes, 1); | ||
191 | } | ||
192 | |||
193 | static int ccp_aes_cmac_digest(struct ahash_request *req) | ||
194 | { | ||
195 | int ret; | ||
196 | |||
197 | ret = ccp_aes_cmac_init(req); | ||
198 | if (ret) | ||
199 | return ret; | ||
200 | |||
201 | return ccp_aes_cmac_finup(req); | ||
202 | } | ||
203 | |||
204 | static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
205 | unsigned int key_len) | ||
206 | { | ||
207 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); | ||
208 | struct ccp_crypto_ahash_alg *alg = | ||
209 | ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); | ||
210 | u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; | ||
211 | u64 rb_hi = 0x00, rb_lo = 0x87; | ||
212 | __be64 *gk; | ||
213 | int ret; | ||
214 | |||
215 | switch (key_len) { | ||
216 | case AES_KEYSIZE_128: | ||
217 | ctx->u.aes.type = CCP_AES_TYPE_128; | ||
218 | break; | ||
219 | case AES_KEYSIZE_192: | ||
220 | ctx->u.aes.type = CCP_AES_TYPE_192; | ||
221 | break; | ||
222 | case AES_KEYSIZE_256: | ||
223 | ctx->u.aes.type = CCP_AES_TYPE_256; | ||
224 | break; | ||
225 | default: | ||
226 | crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
227 | return -EINVAL; | ||
228 | } | ||
229 | ctx->u.aes.mode = alg->mode; | ||
230 | |||
231 | /* Set to zero until complete */ | ||
232 | ctx->u.aes.key_len = 0; | ||
233 | |||
234 | /* Set the key for the AES cipher used to generate the keys */ | ||
235 | ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len); | ||
236 | if (ret) | ||
237 | return ret; | ||
238 | |||
239 | /* Encrypt a block of zeroes - use key area in context */ | ||
240 | memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); | ||
241 | crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key, | ||
242 | ctx->u.aes.key); | ||
243 | |||
244 | /* Generate K1 and K2 */ | ||
245 | k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key)); | ||
246 | k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1)); | ||
247 | |||
248 | k1_hi = (k0_hi << 1) | (k0_lo >> 63); | ||
249 | k1_lo = k0_lo << 1; | ||
250 | if (ctx->u.aes.key[0] & 0x80) { | ||
251 | k1_hi ^= rb_hi; | ||
252 | k1_lo ^= rb_lo; | ||
253 | } | ||
254 | gk = (__be64 *)ctx->u.aes.k1; | ||
255 | *gk = cpu_to_be64(k1_hi); | ||
256 | gk++; | ||
257 | *gk = cpu_to_be64(k1_lo); | ||
258 | |||
259 | k2_hi = (k1_hi << 1) | (k1_lo >> 63); | ||
260 | k2_lo = k1_lo << 1; | ||
261 | if (ctx->u.aes.k1[0] & 0x80) { | ||
262 | k2_hi ^= rb_hi; | ||
263 | k2_lo ^= rb_lo; | ||
264 | } | ||
265 | gk = (__be64 *)ctx->u.aes.k2; | ||
266 | *gk = cpu_to_be64(k2_hi); | ||
267 | gk++; | ||
268 | *gk = cpu_to_be64(k2_lo); | ||
269 | |||
270 | ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1); | ||
271 | sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1)); | ||
272 | sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2)); | ||
273 | |||
274 | /* Save the supplied key */ | ||
275 | memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); | ||
276 | memcpy(ctx->u.aes.key, key, key_len); | ||
277 | ctx->u.aes.key_len = key_len; | ||
278 | sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); | ||
279 | |||
280 | return ret; | ||
281 | } | ||
282 | |||
283 | static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) | ||
284 | { | ||
285 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
286 | struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); | ||
287 | struct crypto_cipher *cipher_tfm; | ||
288 | |||
289 | ctx->complete = ccp_aes_cmac_complete; | ||
290 | ctx->u.aes.key_len = 0; | ||
291 | |||
292 | crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); | ||
293 | |||
294 | cipher_tfm = crypto_alloc_cipher("aes", 0, | ||
295 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); | ||
296 | if (IS_ERR(cipher_tfm)) { | ||
297 | pr_warn("could not load aes cipher driver\n"); | ||
298 | return PTR_ERR(cipher_tfm); | ||
299 | } | ||
300 | ctx->u.aes.tfm_cipher = cipher_tfm; | ||
301 | |||
302 | return 0; | ||
303 | } | ||
304 | |||
305 | static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm) | ||
306 | { | ||
307 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
308 | |||
309 | if (ctx->u.aes.tfm_cipher) | ||
310 | crypto_free_cipher(ctx->u.aes.tfm_cipher); | ||
311 | ctx->u.aes.tfm_cipher = NULL; | ||
312 | } | ||
313 | |||
314 | int ccp_register_aes_cmac_algs(struct list_head *head) | ||
315 | { | ||
316 | struct ccp_crypto_ahash_alg *ccp_alg; | ||
317 | struct ahash_alg *alg; | ||
318 | struct hash_alg_common *halg; | ||
319 | struct crypto_alg *base; | ||
320 | int ret; | ||
321 | |||
322 | ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); | ||
323 | if (!ccp_alg) | ||
324 | return -ENOMEM; | ||
325 | |||
326 | INIT_LIST_HEAD(&ccp_alg->entry); | ||
327 | ccp_alg->mode = CCP_AES_MODE_CMAC; | ||
328 | |||
329 | alg = &ccp_alg->alg; | ||
330 | alg->init = ccp_aes_cmac_init; | ||
331 | alg->update = ccp_aes_cmac_update; | ||
332 | alg->final = ccp_aes_cmac_final; | ||
333 | alg->finup = ccp_aes_cmac_finup; | ||
334 | alg->digest = ccp_aes_cmac_digest; | ||
335 | alg->setkey = ccp_aes_cmac_setkey; | ||
336 | |||
337 | halg = &alg->halg; | ||
338 | halg->digestsize = AES_BLOCK_SIZE; | ||
339 | |||
340 | base = &halg->base; | ||
341 | snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); | ||
342 | snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp"); | ||
343 | base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | | ||
344 | CRYPTO_ALG_KERN_DRIVER_ONLY | | ||
345 | CRYPTO_ALG_NEED_FALLBACK; | ||
346 | base->cra_blocksize = AES_BLOCK_SIZE; | ||
347 | base->cra_ctxsize = sizeof(struct ccp_ctx); | ||
348 | base->cra_priority = CCP_CRA_PRIORITY; | ||
349 | base->cra_type = &crypto_ahash_type; | ||
350 | base->cra_init = ccp_aes_cmac_cra_init; | ||
351 | base->cra_exit = ccp_aes_cmac_cra_exit; | ||
352 | base->cra_module = THIS_MODULE; | ||
353 | |||
354 | ret = crypto_register_ahash(alg); | ||
355 | if (ret) { | ||
356 | pr_err("%s ahash algorithm registration error (%d)\n", | ||
357 | base->cra_name, ret); | ||
358 | kfree(ccp_alg); | ||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | list_add(&ccp_alg->entry, head); | ||
363 | |||
364 | return 0; | ||
365 | } | ||
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c new file mode 100644 index 000000000000..0237ab58f242 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c | |||
@@ -0,0 +1,279 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/scatterlist.h> | ||
17 | #include <linux/crypto.h> | ||
18 | #include <crypto/algapi.h> | ||
19 | #include <crypto/aes.h> | ||
20 | #include <crypto/scatterwalk.h> | ||
21 | |||
22 | #include "ccp-crypto.h" | ||
23 | |||
24 | |||
25 | struct ccp_aes_xts_def { | ||
26 | const char *name; | ||
27 | const char *drv_name; | ||
28 | }; | ||
29 | |||
30 | static struct ccp_aes_xts_def aes_xts_algs[] = { | ||
31 | { | ||
32 | .name = "xts(aes)", | ||
33 | .drv_name = "xts-aes-ccp", | ||
34 | }, | ||
35 | }; | ||
36 | |||
37 | struct ccp_unit_size_map { | ||
38 | unsigned int size; | ||
39 | u32 value; | ||
40 | }; | ||
41 | |||
42 | static struct ccp_unit_size_map unit_size_map[] = { | ||
43 | { | ||
44 | .size = 4096, | ||
45 | .value = CCP_XTS_AES_UNIT_SIZE_4096, | ||
46 | }, | ||
47 | { | ||
48 | .size = 2048, | ||
49 | .value = CCP_XTS_AES_UNIT_SIZE_2048, | ||
50 | }, | ||
51 | { | ||
52 | .size = 1024, | ||
53 | .value = CCP_XTS_AES_UNIT_SIZE_1024, | ||
54 | }, | ||
55 | { | ||
56 | .size = 512, | ||
57 | .value = CCP_XTS_AES_UNIT_SIZE_512, | ||
58 | }, | ||
59 | { | ||
60 | .size = 256, | ||
61 | .value = CCP_XTS_AES_UNIT_SIZE__LAST, | ||
62 | }, | ||
63 | { | ||
64 | .size = 128, | ||
65 | .value = CCP_XTS_AES_UNIT_SIZE__LAST, | ||
66 | }, | ||
67 | { | ||
68 | .size = 64, | ||
69 | .value = CCP_XTS_AES_UNIT_SIZE__LAST, | ||
70 | }, | ||
71 | { | ||
72 | .size = 32, | ||
73 | .value = CCP_XTS_AES_UNIT_SIZE__LAST, | ||
74 | }, | ||
75 | { | ||
76 | .size = 16, | ||
77 | .value = CCP_XTS_AES_UNIT_SIZE_16, | ||
78 | }, | ||
79 | { | ||
80 | .size = 1, | ||
81 | .value = CCP_XTS_AES_UNIT_SIZE__LAST, | ||
82 | }, | ||
83 | }; | ||
84 | |||
85 | static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) | ||
86 | { | ||
87 | struct ablkcipher_request *req = ablkcipher_request_cast(async_req); | ||
88 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
89 | |||
90 | if (ret) | ||
91 | return ret; | ||
92 | |||
93 | memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
99 | unsigned int key_len) | ||
100 | { | ||
101 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); | ||
102 | |||
103 | /* Only support 128-bit AES key with a 128-bit Tweak key, | ||
104 | * otherwise use the fallback | ||
105 | */ | ||
106 | switch (key_len) { | ||
107 | case AES_KEYSIZE_128 * 2: | ||
108 | memcpy(ctx->u.aes.key, key, key_len); | ||
109 | break; | ||
110 | } | ||
111 | ctx->u.aes.key_len = key_len / 2; | ||
112 | sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); | ||
113 | |||
114 | return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, | ||
115 | key_len); | ||
116 | } | ||
117 | |||
118 | static int ccp_aes_xts_crypt(struct ablkcipher_request *req, | ||
119 | unsigned int encrypt) | ||
120 | { | ||
121 | struct crypto_tfm *tfm = | ||
122 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
123 | struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); | ||
124 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
125 | unsigned int unit; | ||
126 | int ret; | ||
127 | |||
128 | if (!ctx->u.aes.key_len) | ||
129 | return -EINVAL; | ||
130 | |||
131 | if (req->nbytes & (AES_BLOCK_SIZE - 1)) | ||
132 | return -EINVAL; | ||
133 | |||
134 | if (!req->info) | ||
135 | return -EINVAL; | ||
136 | |||
137 | for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) | ||
138 | if (!(req->nbytes & (unit_size_map[unit].size - 1))) | ||
139 | break; | ||
140 | |||
141 | if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) || | ||
142 | (ctx->u.aes.key_len != AES_KEYSIZE_128)) { | ||
143 | /* Use the fallback to process the request for any | ||
144 | * unsupported unit sizes or key sizes | ||
145 | */ | ||
146 | ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); | ||
147 | ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : | ||
148 | crypto_ablkcipher_decrypt(req); | ||
149 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
150 | |||
151 | return ret; | ||
152 | } | ||
153 | |||
154 | memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); | ||
155 | sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE); | ||
156 | |||
157 | memset(&rctx->cmd, 0, sizeof(rctx->cmd)); | ||
158 | INIT_LIST_HEAD(&rctx->cmd.entry); | ||
159 | rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; | ||
160 | rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT | ||
161 | : CCP_AES_ACTION_DECRYPT; | ||
162 | rctx->cmd.u.xts.unit_size = unit_size_map[unit].value; | ||
163 | rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; | ||
164 | rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; | ||
165 | rctx->cmd.u.xts.iv = &rctx->iv_sg; | ||
166 | rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE; | ||
167 | rctx->cmd.u.xts.src = req->src; | ||
168 | rctx->cmd.u.xts.src_len = req->nbytes; | ||
169 | rctx->cmd.u.xts.dst = req->dst; | ||
170 | |||
171 | ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); | ||
172 | |||
173 | return ret; | ||
174 | } | ||
175 | |||
176 | static int ccp_aes_xts_encrypt(struct ablkcipher_request *req) | ||
177 | { | ||
178 | return ccp_aes_xts_crypt(req, 1); | ||
179 | } | ||
180 | |||
181 | static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) | ||
182 | { | ||
183 | return ccp_aes_xts_crypt(req, 0); | ||
184 | } | ||
185 | |||
186 | static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) | ||
187 | { | ||
188 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
189 | struct crypto_ablkcipher *fallback_tfm; | ||
190 | |||
191 | ctx->complete = ccp_aes_xts_complete; | ||
192 | ctx->u.aes.key_len = 0; | ||
193 | |||
194 | fallback_tfm = crypto_alloc_ablkcipher(tfm->__crt_alg->cra_name, 0, | ||
195 | CRYPTO_ALG_ASYNC | | ||
196 | CRYPTO_ALG_NEED_FALLBACK); | ||
197 | if (IS_ERR(fallback_tfm)) { | ||
198 | pr_warn("could not load fallback driver %s\n", | ||
199 | tfm->__crt_alg->cra_name); | ||
200 | return PTR_ERR(fallback_tfm); | ||
201 | } | ||
202 | ctx->u.aes.tfm_ablkcipher = fallback_tfm; | ||
203 | |||
204 | tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + | ||
205 | fallback_tfm->base.crt_ablkcipher.reqsize; | ||
206 | |||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) | ||
211 | { | ||
212 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
213 | |||
214 | if (ctx->u.aes.tfm_ablkcipher) | ||
215 | crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); | ||
216 | ctx->u.aes.tfm_ablkcipher = NULL; | ||
217 | } | ||
218 | |||
219 | |||
220 | static int ccp_register_aes_xts_alg(struct list_head *head, | ||
221 | const struct ccp_aes_xts_def *def) | ||
222 | { | ||
223 | struct ccp_crypto_ablkcipher_alg *ccp_alg; | ||
224 | struct crypto_alg *alg; | ||
225 | int ret; | ||
226 | |||
227 | ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); | ||
228 | if (!ccp_alg) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | INIT_LIST_HEAD(&ccp_alg->entry); | ||
232 | |||
233 | alg = &ccp_alg->alg; | ||
234 | |||
235 | snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); | ||
236 | snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", | ||
237 | def->drv_name); | ||
238 | alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | | ||
239 | CRYPTO_ALG_KERN_DRIVER_ONLY | | ||
240 | CRYPTO_ALG_NEED_FALLBACK; | ||
241 | alg->cra_blocksize = AES_BLOCK_SIZE; | ||
242 | alg->cra_ctxsize = sizeof(struct ccp_ctx); | ||
243 | alg->cra_priority = CCP_CRA_PRIORITY; | ||
244 | alg->cra_type = &crypto_ablkcipher_type; | ||
245 | alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey; | ||
246 | alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt; | ||
247 | alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt; | ||
248 | alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2; | ||
249 | alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2; | ||
250 | alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE; | ||
251 | alg->cra_init = ccp_aes_xts_cra_init; | ||
252 | alg->cra_exit = ccp_aes_xts_cra_exit; | ||
253 | alg->cra_module = THIS_MODULE; | ||
254 | |||
255 | ret = crypto_register_alg(alg); | ||
256 | if (ret) { | ||
257 | pr_err("%s ablkcipher algorithm registration error (%d)\n", | ||
258 | alg->cra_name, ret); | ||
259 | kfree(ccp_alg); | ||
260 | return ret; | ||
261 | } | ||
262 | |||
263 | list_add(&ccp_alg->entry, head); | ||
264 | |||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | int ccp_register_aes_xts_algs(struct list_head *head) | ||
269 | { | ||
270 | int i, ret; | ||
271 | |||
272 | for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) { | ||
273 | ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]); | ||
274 | if (ret) | ||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | return 0; | ||
279 | } | ||
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c new file mode 100644 index 000000000000..e46490db0f63 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes.c | |||
@@ -0,0 +1,369 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) AES crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/scatterlist.h> | ||
17 | #include <linux/crypto.h> | ||
18 | #include <crypto/algapi.h> | ||
19 | #include <crypto/aes.h> | ||
20 | #include <crypto/ctr.h> | ||
21 | #include <crypto/scatterwalk.h> | ||
22 | |||
23 | #include "ccp-crypto.h" | ||
24 | |||
25 | |||
26 | static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) | ||
27 | { | ||
28 | struct ablkcipher_request *req = ablkcipher_request_cast(async_req); | ||
29 | struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); | ||
30 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
31 | |||
32 | if (ret) | ||
33 | return ret; | ||
34 | |||
35 | if (ctx->u.aes.mode != CCP_AES_MODE_ECB) | ||
36 | memcpy(req->info, rctx->iv, AES_BLOCK_SIZE); | ||
37 | |||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
42 | unsigned int key_len) | ||
43 | { | ||
44 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); | ||
45 | struct ccp_crypto_ablkcipher_alg *alg = | ||
46 | ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm)); | ||
47 | |||
48 | switch (key_len) { | ||
49 | case AES_KEYSIZE_128: | ||
50 | ctx->u.aes.type = CCP_AES_TYPE_128; | ||
51 | break; | ||
52 | case AES_KEYSIZE_192: | ||
53 | ctx->u.aes.type = CCP_AES_TYPE_192; | ||
54 | break; | ||
55 | case AES_KEYSIZE_256: | ||
56 | ctx->u.aes.type = CCP_AES_TYPE_256; | ||
57 | break; | ||
58 | default: | ||
59 | crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
60 | return -EINVAL; | ||
61 | } | ||
62 | ctx->u.aes.mode = alg->mode; | ||
63 | ctx->u.aes.key_len = key_len; | ||
64 | |||
65 | memcpy(ctx->u.aes.key, key, key_len); | ||
66 | sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt) | ||
72 | { | ||
73 | struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); | ||
74 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
75 | struct scatterlist *iv_sg = NULL; | ||
76 | unsigned int iv_len = 0; | ||
77 | int ret; | ||
78 | |||
79 | if (!ctx->u.aes.key_len) | ||
80 | return -EINVAL; | ||
81 | |||
82 | if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || | ||
83 | (ctx->u.aes.mode == CCP_AES_MODE_CBC) || | ||
84 | (ctx->u.aes.mode == CCP_AES_MODE_CFB)) && | ||
85 | (req->nbytes & (AES_BLOCK_SIZE - 1))) | ||
86 | return -EINVAL; | ||
87 | |||
88 | if (ctx->u.aes.mode != CCP_AES_MODE_ECB) { | ||
89 | if (!req->info) | ||
90 | return -EINVAL; | ||
91 | |||
92 | memcpy(rctx->iv, req->info, AES_BLOCK_SIZE); | ||
93 | iv_sg = &rctx->iv_sg; | ||
94 | iv_len = AES_BLOCK_SIZE; | ||
95 | sg_init_one(iv_sg, rctx->iv, iv_len); | ||
96 | } | ||
97 | |||
98 | memset(&rctx->cmd, 0, sizeof(rctx->cmd)); | ||
99 | INIT_LIST_HEAD(&rctx->cmd.entry); | ||
100 | rctx->cmd.engine = CCP_ENGINE_AES; | ||
101 | rctx->cmd.u.aes.type = ctx->u.aes.type; | ||
102 | rctx->cmd.u.aes.mode = ctx->u.aes.mode; | ||
103 | rctx->cmd.u.aes.action = | ||
104 | (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; | ||
105 | rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; | ||
106 | rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; | ||
107 | rctx->cmd.u.aes.iv = iv_sg; | ||
108 | rctx->cmd.u.aes.iv_len = iv_len; | ||
109 | rctx->cmd.u.aes.src = req->src; | ||
110 | rctx->cmd.u.aes.src_len = req->nbytes; | ||
111 | rctx->cmd.u.aes.dst = req->dst; | ||
112 | |||
113 | ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); | ||
114 | |||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | static int ccp_aes_encrypt(struct ablkcipher_request *req) | ||
119 | { | ||
120 | return ccp_aes_crypt(req, true); | ||
121 | } | ||
122 | |||
123 | static int ccp_aes_decrypt(struct ablkcipher_request *req) | ||
124 | { | ||
125 | return ccp_aes_crypt(req, false); | ||
126 | } | ||
127 | |||
128 | static int ccp_aes_cra_init(struct crypto_tfm *tfm) | ||
129 | { | ||
130 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
131 | |||
132 | ctx->complete = ccp_aes_complete; | ||
133 | ctx->u.aes.key_len = 0; | ||
134 | |||
135 | tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | static void ccp_aes_cra_exit(struct crypto_tfm *tfm) | ||
141 | { | ||
142 | } | ||
143 | |||
144 | static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, | ||
145 | int ret) | ||
146 | { | ||
147 | struct ablkcipher_request *req = ablkcipher_request_cast(async_req); | ||
148 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
149 | |||
150 | /* Restore the original pointer */ | ||
151 | req->info = rctx->rfc3686_info; | ||
152 | |||
153 | return ccp_aes_complete(async_req, ret); | ||
154 | } | ||
155 | |||
156 | static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
157 | unsigned int key_len) | ||
158 | { | ||
159 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm)); | ||
160 | |||
161 | if (key_len < CTR_RFC3686_NONCE_SIZE) | ||
162 | return -EINVAL; | ||
163 | |||
164 | key_len -= CTR_RFC3686_NONCE_SIZE; | ||
165 | memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE); | ||
166 | |||
167 | return ccp_aes_setkey(tfm, key, key_len); | ||
168 | } | ||
169 | |||
170 | static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt) | ||
171 | { | ||
172 | struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); | ||
173 | struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
174 | u8 *iv; | ||
175 | |||
176 | /* Initialize the CTR block */ | ||
177 | iv = rctx->rfc3686_iv; | ||
178 | memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE); | ||
179 | |||
180 | iv += CTR_RFC3686_NONCE_SIZE; | ||
181 | memcpy(iv, req->info, CTR_RFC3686_IV_SIZE); | ||
182 | |||
183 | iv += CTR_RFC3686_IV_SIZE; | ||
184 | *(__be32 *)iv = cpu_to_be32(1); | ||
185 | |||
186 | /* Point to the new IV */ | ||
187 | rctx->rfc3686_info = req->info; | ||
188 | req->info = rctx->rfc3686_iv; | ||
189 | |||
190 | return ccp_aes_crypt(req, encrypt); | ||
191 | } | ||
192 | |||
193 | static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req) | ||
194 | { | ||
195 | return ccp_aes_rfc3686_crypt(req, true); | ||
196 | } | ||
197 | |||
198 | static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req) | ||
199 | { | ||
200 | return ccp_aes_rfc3686_crypt(req, false); | ||
201 | } | ||
202 | |||
203 | static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm) | ||
204 | { | ||
205 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
206 | |||
207 | ctx->complete = ccp_aes_rfc3686_complete; | ||
208 | ctx->u.aes.key_len = 0; | ||
209 | |||
210 | tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm) | ||
216 | { | ||
217 | } | ||
218 | |||
219 | static struct crypto_alg ccp_aes_defaults = { | ||
220 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
221 | CRYPTO_ALG_ASYNC | | ||
222 | CRYPTO_ALG_KERN_DRIVER_ONLY | | ||
223 | CRYPTO_ALG_NEED_FALLBACK, | ||
224 | .cra_blocksize = AES_BLOCK_SIZE, | ||
225 | .cra_ctxsize = sizeof(struct ccp_ctx), | ||
226 | .cra_priority = CCP_CRA_PRIORITY, | ||
227 | .cra_type = &crypto_ablkcipher_type, | ||
228 | .cra_init = ccp_aes_cra_init, | ||
229 | .cra_exit = ccp_aes_cra_exit, | ||
230 | .cra_module = THIS_MODULE, | ||
231 | .cra_ablkcipher = { | ||
232 | .setkey = ccp_aes_setkey, | ||
233 | .encrypt = ccp_aes_encrypt, | ||
234 | .decrypt = ccp_aes_decrypt, | ||
235 | .min_keysize = AES_MIN_KEY_SIZE, | ||
236 | .max_keysize = AES_MAX_KEY_SIZE, | ||
237 | }, | ||
238 | }; | ||
239 | |||
240 | static struct crypto_alg ccp_aes_rfc3686_defaults = { | ||
241 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
242 | CRYPTO_ALG_ASYNC | | ||
243 | CRYPTO_ALG_KERN_DRIVER_ONLY | | ||
244 | CRYPTO_ALG_NEED_FALLBACK, | ||
245 | .cra_blocksize = CTR_RFC3686_BLOCK_SIZE, | ||
246 | .cra_ctxsize = sizeof(struct ccp_ctx), | ||
247 | .cra_priority = CCP_CRA_PRIORITY, | ||
248 | .cra_type = &crypto_ablkcipher_type, | ||
249 | .cra_init = ccp_aes_rfc3686_cra_init, | ||
250 | .cra_exit = ccp_aes_rfc3686_cra_exit, | ||
251 | .cra_module = THIS_MODULE, | ||
252 | .cra_ablkcipher = { | ||
253 | .setkey = ccp_aes_rfc3686_setkey, | ||
254 | .encrypt = ccp_aes_rfc3686_encrypt, | ||
255 | .decrypt = ccp_aes_rfc3686_decrypt, | ||
256 | .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, | ||
257 | .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, | ||
258 | }, | ||
259 | }; | ||
260 | |||
261 | struct ccp_aes_def { | ||
262 | enum ccp_aes_mode mode; | ||
263 | const char *name; | ||
264 | const char *driver_name; | ||
265 | unsigned int blocksize; | ||
266 | unsigned int ivsize; | ||
267 | struct crypto_alg *alg_defaults; | ||
268 | }; | ||
269 | |||
270 | static struct ccp_aes_def aes_algs[] = { | ||
271 | { | ||
272 | .mode = CCP_AES_MODE_ECB, | ||
273 | .name = "ecb(aes)", | ||
274 | .driver_name = "ecb-aes-ccp", | ||
275 | .blocksize = AES_BLOCK_SIZE, | ||
276 | .ivsize = 0, | ||
277 | .alg_defaults = &ccp_aes_defaults, | ||
278 | }, | ||
279 | { | ||
280 | .mode = CCP_AES_MODE_CBC, | ||
281 | .name = "cbc(aes)", | ||
282 | .driver_name = "cbc-aes-ccp", | ||
283 | .blocksize = AES_BLOCK_SIZE, | ||
284 | .ivsize = AES_BLOCK_SIZE, | ||
285 | .alg_defaults = &ccp_aes_defaults, | ||
286 | }, | ||
287 | { | ||
288 | .mode = CCP_AES_MODE_CFB, | ||
289 | .name = "cfb(aes)", | ||
290 | .driver_name = "cfb-aes-ccp", | ||
291 | .blocksize = AES_BLOCK_SIZE, | ||
292 | .ivsize = AES_BLOCK_SIZE, | ||
293 | .alg_defaults = &ccp_aes_defaults, | ||
294 | }, | ||
295 | { | ||
296 | .mode = CCP_AES_MODE_OFB, | ||
297 | .name = "ofb(aes)", | ||
298 | .driver_name = "ofb-aes-ccp", | ||
299 | .blocksize = 1, | ||
300 | .ivsize = AES_BLOCK_SIZE, | ||
301 | .alg_defaults = &ccp_aes_defaults, | ||
302 | }, | ||
303 | { | ||
304 | .mode = CCP_AES_MODE_CTR, | ||
305 | .name = "ctr(aes)", | ||
306 | .driver_name = "ctr-aes-ccp", | ||
307 | .blocksize = 1, | ||
308 | .ivsize = AES_BLOCK_SIZE, | ||
309 | .alg_defaults = &ccp_aes_defaults, | ||
310 | }, | ||
311 | { | ||
312 | .mode = CCP_AES_MODE_CTR, | ||
313 | .name = "rfc3686(ctr(aes))", | ||
314 | .driver_name = "rfc3686-ctr-aes-ccp", | ||
315 | .blocksize = 1, | ||
316 | .ivsize = CTR_RFC3686_IV_SIZE, | ||
317 | .alg_defaults = &ccp_aes_rfc3686_defaults, | ||
318 | }, | ||
319 | }; | ||
320 | |||
321 | static int ccp_register_aes_alg(struct list_head *head, | ||
322 | const struct ccp_aes_def *def) | ||
323 | { | ||
324 | struct ccp_crypto_ablkcipher_alg *ccp_alg; | ||
325 | struct crypto_alg *alg; | ||
326 | int ret; | ||
327 | |||
328 | ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); | ||
329 | if (!ccp_alg) | ||
330 | return -ENOMEM; | ||
331 | |||
332 | INIT_LIST_HEAD(&ccp_alg->entry); | ||
333 | |||
334 | ccp_alg->mode = def->mode; | ||
335 | |||
336 | /* Copy the defaults and override as necessary */ | ||
337 | alg = &ccp_alg->alg; | ||
338 | *alg = *def->alg_defaults; | ||
339 | snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); | ||
340 | snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", | ||
341 | def->driver_name); | ||
342 | alg->cra_blocksize = def->blocksize; | ||
343 | alg->cra_ablkcipher.ivsize = def->ivsize; | ||
344 | |||
345 | ret = crypto_register_alg(alg); | ||
346 | if (ret) { | ||
347 | pr_err("%s ablkcipher algorithm registration error (%d)\n", | ||
348 | alg->cra_name, ret); | ||
349 | kfree(ccp_alg); | ||
350 | return ret; | ||
351 | } | ||
352 | |||
353 | list_add(&ccp_alg->entry, head); | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | int ccp_register_aes_algs(struct list_head *head) | ||
359 | { | ||
360 | int i, ret; | ||
361 | |||
362 | for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { | ||
363 | ret = ccp_register_aes_alg(head, &aes_algs[i]); | ||
364 | if (ret) | ||
365 | return ret; | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c new file mode 100644 index 000000000000..2636f044789d --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-main.c | |||
@@ -0,0 +1,432 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/list.h> | ||
16 | #include <linux/ccp.h> | ||
17 | #include <linux/scatterlist.h> | ||
18 | #include <crypto/internal/hash.h> | ||
19 | |||
20 | #include "ccp-crypto.h" | ||
21 | |||
22 | MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); | ||
23 | MODULE_LICENSE("GPL"); | ||
24 | MODULE_VERSION("1.0.0"); | ||
25 | MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support"); | ||
26 | |||
27 | |||
28 | /* List heads for the supported algorithms */ | ||
29 | static LIST_HEAD(hash_algs); | ||
30 | static LIST_HEAD(cipher_algs); | ||
31 | |||
32 | /* For any tfm, requests for that tfm on the same CPU must be returned | ||
33 | * in the order received. With multiple queues available, the CCP can | ||
34 | * process more than one cmd at a time. Therefore we must maintain | ||
35 | * a cmd list to insure the proper ordering of requests on a given tfm/cpu | ||
36 | * combination. | ||
37 | */ | ||
38 | struct ccp_crypto_cpu_queue { | ||
39 | struct list_head cmds; | ||
40 | struct list_head *backlog; | ||
41 | unsigned int cmd_count; | ||
42 | }; | ||
43 | #define CCP_CRYPTO_MAX_QLEN 50 | ||
44 | |||
45 | struct ccp_crypto_percpu_queue { | ||
46 | struct ccp_crypto_cpu_queue __percpu *cpu_queue; | ||
47 | }; | ||
48 | static struct ccp_crypto_percpu_queue req_queue; | ||
49 | |||
50 | struct ccp_crypto_cmd { | ||
51 | struct list_head entry; | ||
52 | |||
53 | struct ccp_cmd *cmd; | ||
54 | |||
55 | /* Save the crypto_tfm and crypto_async_request addresses | ||
56 | * separately to avoid any reference to a possibly invalid | ||
57 | * crypto_async_request structure after invoking the request | ||
58 | * callback | ||
59 | */ | ||
60 | struct crypto_async_request *req; | ||
61 | struct crypto_tfm *tfm; | ||
62 | |||
63 | /* Used for held command processing to determine state */ | ||
64 | int ret; | ||
65 | |||
66 | int cpu; | ||
67 | }; | ||
68 | |||
69 | struct ccp_crypto_cpu { | ||
70 | struct work_struct work; | ||
71 | struct completion completion; | ||
72 | struct ccp_crypto_cmd *crypto_cmd; | ||
73 | int err; | ||
74 | }; | ||
75 | |||
76 | |||
77 | static inline bool ccp_crypto_success(int err) | ||
78 | { | ||
79 | if (err && (err != -EINPROGRESS) && (err != -EBUSY)) | ||
80 | return false; | ||
81 | |||
82 | return true; | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * ccp_crypto_cmd_complete must be called while running on the appropriate | ||
87 | * cpu and the caller must have done a get_cpu to disable preemption | ||
88 | */ | ||
89 | static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( | ||
90 | struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog) | ||
91 | { | ||
92 | struct ccp_crypto_cpu_queue *cpu_queue; | ||
93 | struct ccp_crypto_cmd *held = NULL, *tmp; | ||
94 | |||
95 | *backlog = NULL; | ||
96 | |||
97 | cpu_queue = this_cpu_ptr(req_queue.cpu_queue); | ||
98 | |||
99 | /* Held cmds will be after the current cmd in the queue so start | ||
100 | * searching for a cmd with a matching tfm for submission. | ||
101 | */ | ||
102 | tmp = crypto_cmd; | ||
103 | list_for_each_entry_continue(tmp, &cpu_queue->cmds, entry) { | ||
104 | if (crypto_cmd->tfm != tmp->tfm) | ||
105 | continue; | ||
106 | held = tmp; | ||
107 | break; | ||
108 | } | ||
109 | |||
110 | /* Process the backlog: | ||
111 | * Because cmds can be executed from any point in the cmd list | ||
112 | * special precautions have to be taken when handling the backlog. | ||
113 | */ | ||
114 | if (cpu_queue->backlog != &cpu_queue->cmds) { | ||
115 | /* Skip over this cmd if it is the next backlog cmd */ | ||
116 | if (cpu_queue->backlog == &crypto_cmd->entry) | ||
117 | cpu_queue->backlog = crypto_cmd->entry.next; | ||
118 | |||
119 | *backlog = container_of(cpu_queue->backlog, | ||
120 | struct ccp_crypto_cmd, entry); | ||
121 | cpu_queue->backlog = cpu_queue->backlog->next; | ||
122 | |||
123 | /* Skip over this cmd if it is now the next backlog cmd */ | ||
124 | if (cpu_queue->backlog == &crypto_cmd->entry) | ||
125 | cpu_queue->backlog = crypto_cmd->entry.next; | ||
126 | } | ||
127 | |||
128 | /* Remove the cmd entry from the list of cmds */ | ||
129 | cpu_queue->cmd_count--; | ||
130 | list_del(&crypto_cmd->entry); | ||
131 | |||
132 | return held; | ||
133 | } | ||
134 | |||
135 | static void ccp_crypto_complete_on_cpu(struct work_struct *work) | ||
136 | { | ||
137 | struct ccp_crypto_cpu *cpu_work = | ||
138 | container_of(work, struct ccp_crypto_cpu, work); | ||
139 | struct ccp_crypto_cmd *crypto_cmd = cpu_work->crypto_cmd; | ||
140 | struct ccp_crypto_cmd *held, *next, *backlog; | ||
141 | struct crypto_async_request *req = crypto_cmd->req; | ||
142 | struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); | ||
143 | int cpu, ret; | ||
144 | |||
145 | cpu = get_cpu(); | ||
146 | |||
147 | if (cpu_work->err == -EINPROGRESS) { | ||
148 | /* Only propogate the -EINPROGRESS if necessary */ | ||
149 | if (crypto_cmd->ret == -EBUSY) { | ||
150 | crypto_cmd->ret = -EINPROGRESS; | ||
151 | req->complete(req, -EINPROGRESS); | ||
152 | } | ||
153 | |||
154 | goto e_cpu; | ||
155 | } | ||
156 | |||
157 | /* Operation has completed - update the queue before invoking | ||
158 | * the completion callbacks and retrieve the next cmd (cmd with | ||
159 | * a matching tfm) that can be submitted to the CCP. | ||
160 | */ | ||
161 | held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); | ||
162 | if (backlog) { | ||
163 | backlog->ret = -EINPROGRESS; | ||
164 | backlog->req->complete(backlog->req, -EINPROGRESS); | ||
165 | } | ||
166 | |||
167 | /* Transition the state from -EBUSY to -EINPROGRESS first */ | ||
168 | if (crypto_cmd->ret == -EBUSY) | ||
169 | req->complete(req, -EINPROGRESS); | ||
170 | |||
171 | /* Completion callbacks */ | ||
172 | ret = cpu_work->err; | ||
173 | if (ctx->complete) | ||
174 | ret = ctx->complete(req, ret); | ||
175 | req->complete(req, ret); | ||
176 | |||
177 | /* Submit the next cmd */ | ||
178 | while (held) { | ||
179 | ret = ccp_enqueue_cmd(held->cmd); | ||
180 | if (ccp_crypto_success(ret)) | ||
181 | break; | ||
182 | |||
183 | /* Error occurred, report it and get the next entry */ | ||
184 | held->req->complete(held->req, ret); | ||
185 | |||
186 | next = ccp_crypto_cmd_complete(held, &backlog); | ||
187 | if (backlog) { | ||
188 | backlog->ret = -EINPROGRESS; | ||
189 | backlog->req->complete(backlog->req, -EINPROGRESS); | ||
190 | } | ||
191 | |||
192 | kfree(held); | ||
193 | held = next; | ||
194 | } | ||
195 | |||
196 | kfree(crypto_cmd); | ||
197 | |||
198 | e_cpu: | ||
199 | put_cpu(); | ||
200 | |||
201 | complete(&cpu_work->completion); | ||
202 | } | ||
203 | |||
204 | static void ccp_crypto_complete(void *data, int err) | ||
205 | { | ||
206 | struct ccp_crypto_cmd *crypto_cmd = data; | ||
207 | struct ccp_crypto_cpu cpu_work; | ||
208 | |||
209 | INIT_WORK(&cpu_work.work, ccp_crypto_complete_on_cpu); | ||
210 | init_completion(&cpu_work.completion); | ||
211 | cpu_work.crypto_cmd = crypto_cmd; | ||
212 | cpu_work.err = err; | ||
213 | |||
214 | schedule_work_on(crypto_cmd->cpu, &cpu_work.work); | ||
215 | |||
216 | /* Keep the completion call synchronous */ | ||
217 | wait_for_completion(&cpu_work.completion); | ||
218 | } | ||
219 | |||
220 | static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) | ||
221 | { | ||
222 | struct ccp_crypto_cpu_queue *cpu_queue; | ||
223 | struct ccp_crypto_cmd *active = NULL, *tmp; | ||
224 | int cpu, ret; | ||
225 | |||
226 | cpu = get_cpu(); | ||
227 | crypto_cmd->cpu = cpu; | ||
228 | |||
229 | cpu_queue = this_cpu_ptr(req_queue.cpu_queue); | ||
230 | |||
231 | /* Check if the cmd can/should be queued */ | ||
232 | if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) { | ||
233 | ret = -EBUSY; | ||
234 | if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) | ||
235 | goto e_cpu; | ||
236 | } | ||
237 | |||
238 | /* Look for an entry with the same tfm. If there is a cmd | ||
239 | * with the same tfm in the list for this cpu then the current | ||
240 | * cmd cannot be submitted to the CCP yet. | ||
241 | */ | ||
242 | list_for_each_entry(tmp, &cpu_queue->cmds, entry) { | ||
243 | if (crypto_cmd->tfm != tmp->tfm) | ||
244 | continue; | ||
245 | active = tmp; | ||
246 | break; | ||
247 | } | ||
248 | |||
249 | ret = -EINPROGRESS; | ||
250 | if (!active) { | ||
251 | ret = ccp_enqueue_cmd(crypto_cmd->cmd); | ||
252 | if (!ccp_crypto_success(ret)) | ||
253 | goto e_cpu; | ||
254 | } | ||
255 | |||
256 | if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) { | ||
257 | ret = -EBUSY; | ||
258 | if (cpu_queue->backlog == &cpu_queue->cmds) | ||
259 | cpu_queue->backlog = &crypto_cmd->entry; | ||
260 | } | ||
261 | crypto_cmd->ret = ret; | ||
262 | |||
263 | cpu_queue->cmd_count++; | ||
264 | list_add_tail(&crypto_cmd->entry, &cpu_queue->cmds); | ||
265 | |||
266 | e_cpu: | ||
267 | put_cpu(); | ||
268 | |||
269 | return ret; | ||
270 | } | ||
271 | |||
272 | /** | ||
273 | * ccp_crypto_enqueue_request - queue an crypto async request for processing | ||
274 | * by the CCP | ||
275 | * | ||
276 | * @req: crypto_async_request struct to be processed | ||
277 | * @cmd: ccp_cmd struct to be sent to the CCP | ||
278 | */ | ||
279 | int ccp_crypto_enqueue_request(struct crypto_async_request *req, | ||
280 | struct ccp_cmd *cmd) | ||
281 | { | ||
282 | struct ccp_crypto_cmd *crypto_cmd; | ||
283 | gfp_t gfp; | ||
284 | int ret; | ||
285 | |||
286 | gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; | ||
287 | |||
288 | crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp); | ||
289 | if (!crypto_cmd) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | /* The tfm pointer must be saved and not referenced from the | ||
293 | * crypto_async_request (req) pointer because it is used after | ||
294 | * completion callback for the request and the req pointer | ||
295 | * might not be valid anymore. | ||
296 | */ | ||
297 | crypto_cmd->cmd = cmd; | ||
298 | crypto_cmd->req = req; | ||
299 | crypto_cmd->tfm = req->tfm; | ||
300 | |||
301 | cmd->callback = ccp_crypto_complete; | ||
302 | cmd->data = crypto_cmd; | ||
303 | |||
304 | if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) | ||
305 | cmd->flags |= CCP_CMD_MAY_BACKLOG; | ||
306 | else | ||
307 | cmd->flags &= ~CCP_CMD_MAY_BACKLOG; | ||
308 | |||
309 | ret = ccp_crypto_enqueue_cmd(crypto_cmd); | ||
310 | if (!ccp_crypto_success(ret)) | ||
311 | kfree(crypto_cmd); | ||
312 | |||
313 | return ret; | ||
314 | } | ||
315 | |||
316 | struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, | ||
317 | struct scatterlist *sg_add) | ||
318 | { | ||
319 | struct scatterlist *sg, *sg_last = NULL; | ||
320 | |||
321 | for (sg = table->sgl; sg; sg = sg_next(sg)) | ||
322 | if (!sg_page(sg)) | ||
323 | break; | ||
324 | BUG_ON(!sg); | ||
325 | |||
326 | for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) { | ||
327 | sg_set_page(sg, sg_page(sg_add), sg_add->length, | ||
328 | sg_add->offset); | ||
329 | sg_last = sg; | ||
330 | } | ||
331 | BUG_ON(sg_add); | ||
332 | |||
333 | return sg_last; | ||
334 | } | ||
335 | |||
336 | static int ccp_register_algs(void) | ||
337 | { | ||
338 | int ret; | ||
339 | |||
340 | ret = ccp_register_aes_algs(&cipher_algs); | ||
341 | if (ret) | ||
342 | return ret; | ||
343 | |||
344 | ret = ccp_register_aes_cmac_algs(&hash_algs); | ||
345 | if (ret) | ||
346 | return ret; | ||
347 | |||
348 | ret = ccp_register_aes_xts_algs(&cipher_algs); | ||
349 | if (ret) | ||
350 | return ret; | ||
351 | |||
352 | ret = ccp_register_sha_algs(&hash_algs); | ||
353 | if (ret) | ||
354 | return ret; | ||
355 | |||
356 | return 0; | ||
357 | } | ||
358 | |||
359 | static void ccp_unregister_algs(void) | ||
360 | { | ||
361 | struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp; | ||
362 | struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp; | ||
363 | |||
364 | list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) { | ||
365 | crypto_unregister_ahash(&ahash_alg->alg); | ||
366 | list_del(&ahash_alg->entry); | ||
367 | kfree(ahash_alg); | ||
368 | } | ||
369 | |||
370 | list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) { | ||
371 | crypto_unregister_alg(&ablk_alg->alg); | ||
372 | list_del(&ablk_alg->entry); | ||
373 | kfree(ablk_alg); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | static int ccp_init_queues(void) | ||
378 | { | ||
379 | struct ccp_crypto_cpu_queue *cpu_queue; | ||
380 | int cpu; | ||
381 | |||
382 | req_queue.cpu_queue = alloc_percpu(struct ccp_crypto_cpu_queue); | ||
383 | if (!req_queue.cpu_queue) | ||
384 | return -ENOMEM; | ||
385 | |||
386 | for_each_possible_cpu(cpu) { | ||
387 | cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu); | ||
388 | INIT_LIST_HEAD(&cpu_queue->cmds); | ||
389 | cpu_queue->backlog = &cpu_queue->cmds; | ||
390 | cpu_queue->cmd_count = 0; | ||
391 | } | ||
392 | |||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | static void ccp_fini_queue(void) | ||
397 | { | ||
398 | struct ccp_crypto_cpu_queue *cpu_queue; | ||
399 | int cpu; | ||
400 | |||
401 | for_each_possible_cpu(cpu) { | ||
402 | cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu); | ||
403 | BUG_ON(!list_empty(&cpu_queue->cmds)); | ||
404 | } | ||
405 | free_percpu(req_queue.cpu_queue); | ||
406 | } | ||
407 | |||
408 | static int ccp_crypto_init(void) | ||
409 | { | ||
410 | int ret; | ||
411 | |||
412 | ret = ccp_init_queues(); | ||
413 | if (ret) | ||
414 | return ret; | ||
415 | |||
416 | ret = ccp_register_algs(); | ||
417 | if (ret) { | ||
418 | ccp_unregister_algs(); | ||
419 | ccp_fini_queue(); | ||
420 | } | ||
421 | |||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | static void ccp_crypto_exit(void) | ||
426 | { | ||
427 | ccp_unregister_algs(); | ||
428 | ccp_fini_queue(); | ||
429 | } | ||
430 | |||
431 | module_init(ccp_crypto_init); | ||
432 | module_exit(ccp_crypto_exit); | ||
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c new file mode 100644 index 000000000000..3867290b3531 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-sha.c | |||
@@ -0,0 +1,517 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) SHA crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/scatterlist.h> | ||
17 | #include <linux/crypto.h> | ||
18 | #include <crypto/algapi.h> | ||
19 | #include <crypto/hash.h> | ||
20 | #include <crypto/internal/hash.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <crypto/scatterwalk.h> | ||
23 | |||
24 | #include "ccp-crypto.h" | ||
25 | |||
26 | |||
27 | struct ccp_sha_result { | ||
28 | struct completion completion; | ||
29 | int err; | ||
30 | }; | ||
31 | |||
32 | static void ccp_sync_hash_complete(struct crypto_async_request *req, int err) | ||
33 | { | ||
34 | struct ccp_sha_result *result = req->data; | ||
35 | |||
36 | if (err == -EINPROGRESS) | ||
37 | return; | ||
38 | |||
39 | result->err = err; | ||
40 | complete(&result->completion); | ||
41 | } | ||
42 | |||
43 | static int ccp_sync_hash(struct crypto_ahash *tfm, u8 *buf, | ||
44 | struct scatterlist *sg, unsigned int len) | ||
45 | { | ||
46 | struct ccp_sha_result result; | ||
47 | struct ahash_request *req; | ||
48 | int ret; | ||
49 | |||
50 | init_completion(&result.completion); | ||
51 | |||
52 | req = ahash_request_alloc(tfm, GFP_KERNEL); | ||
53 | if (!req) | ||
54 | return -ENOMEM; | ||
55 | |||
56 | ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, | ||
57 | ccp_sync_hash_complete, &result); | ||
58 | ahash_request_set_crypt(req, sg, buf, len); | ||
59 | |||
60 | ret = crypto_ahash_digest(req); | ||
61 | if ((ret == -EINPROGRESS) || (ret == -EBUSY)) { | ||
62 | ret = wait_for_completion_interruptible(&result.completion); | ||
63 | if (!ret) | ||
64 | ret = result.err; | ||
65 | } | ||
66 | |||
67 | ahash_request_free(req); | ||
68 | |||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | static int ccp_sha_finish_hmac(struct crypto_async_request *async_req) | ||
73 | { | ||
74 | struct ahash_request *req = ahash_request_cast(async_req); | ||
75 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
76 | struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); | ||
77 | struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
78 | struct scatterlist sg[2]; | ||
79 | unsigned int block_size = | ||
80 | crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); | ||
81 | unsigned int digest_size = crypto_ahash_digestsize(tfm); | ||
82 | |||
83 | sg_init_table(sg, ARRAY_SIZE(sg)); | ||
84 | sg_set_buf(&sg[0], ctx->u.sha.opad, block_size); | ||
85 | sg_set_buf(&sg[1], rctx->ctx, digest_size); | ||
86 | |||
87 | return ccp_sync_hash(ctx->u.sha.hmac_tfm, req->result, sg, | ||
88 | block_size + digest_size); | ||
89 | } | ||
90 | |||
91 | static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) | ||
92 | { | ||
93 | struct ahash_request *req = ahash_request_cast(async_req); | ||
94 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
95 | struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); | ||
96 | struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
97 | unsigned int digest_size = crypto_ahash_digestsize(tfm); | ||
98 | |||
99 | if (ret) | ||
100 | goto e_free; | ||
101 | |||
102 | if (rctx->hash_rem) { | ||
103 | /* Save remaining data to buffer */ | ||
104 | unsigned int offset = rctx->nbytes - rctx->hash_rem; | ||
105 | scatterwalk_map_and_copy(rctx->buf, rctx->src, | ||
106 | offset, rctx->hash_rem, 0); | ||
107 | rctx->buf_count = rctx->hash_rem; | ||
108 | } else | ||
109 | rctx->buf_count = 0; | ||
110 | |||
111 | /* Update result area if supplied */ | ||
112 | if (req->result) | ||
113 | memcpy(req->result, rctx->ctx, digest_size); | ||
114 | |||
115 | /* If we're doing an HMAC, we need to perform that on the final op */ | ||
116 | if (rctx->final && ctx->u.sha.key_len) | ||
117 | ret = ccp_sha_finish_hmac(async_req); | ||
118 | |||
119 | e_free: | ||
120 | sg_free_table(&rctx->data_sg); | ||
121 | |||
122 | return ret; | ||
123 | } | ||
124 | |||
125 | static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, | ||
126 | unsigned int final) | ||
127 | { | ||
128 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
129 | struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
130 | struct scatterlist *sg; | ||
131 | unsigned int block_size = | ||
132 | crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); | ||
133 | unsigned int sg_count; | ||
134 | gfp_t gfp; | ||
135 | u64 len; | ||
136 | int ret; | ||
137 | |||
138 | len = (u64)rctx->buf_count + (u64)nbytes; | ||
139 | |||
140 | if (!final && (len <= block_size)) { | ||
141 | scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, | ||
142 | 0, nbytes, 0); | ||
143 | rctx->buf_count += nbytes; | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | rctx->src = req->src; | ||
149 | rctx->nbytes = nbytes; | ||
150 | |||
151 | rctx->final = final; | ||
152 | rctx->hash_rem = final ? 0 : len & (block_size - 1); | ||
153 | rctx->hash_cnt = len - rctx->hash_rem; | ||
154 | if (!final && !rctx->hash_rem) { | ||
155 | /* CCP can't do zero length final, so keep some data around */ | ||
156 | rctx->hash_cnt -= block_size; | ||
157 | rctx->hash_rem = block_size; | ||
158 | } | ||
159 | |||
160 | /* Initialize the context scatterlist */ | ||
161 | sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx)); | ||
162 | |||
163 | sg = NULL; | ||
164 | if (rctx->buf_count && nbytes) { | ||
165 | /* Build the data scatterlist table - allocate enough entries | ||
166 | * for both data pieces (buffer and input data) | ||
167 | */ | ||
168 | gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? | ||
169 | GFP_KERNEL : GFP_ATOMIC; | ||
170 | sg_count = sg_nents(req->src) + 1; | ||
171 | ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); | ||
172 | if (ret) | ||
173 | return ret; | ||
174 | |||
175 | sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); | ||
176 | sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); | ||
177 | sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); | ||
178 | sg_mark_end(sg); | ||
179 | |||
180 | sg = rctx->data_sg.sgl; | ||
181 | } else if (rctx->buf_count) { | ||
182 | sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); | ||
183 | |||
184 | sg = &rctx->buf_sg; | ||
185 | } else if (nbytes) { | ||
186 | sg = req->src; | ||
187 | } | ||
188 | |||
189 | rctx->msg_bits += (rctx->hash_cnt << 3); /* Total in bits */ | ||
190 | |||
191 | memset(&rctx->cmd, 0, sizeof(rctx->cmd)); | ||
192 | INIT_LIST_HEAD(&rctx->cmd.entry); | ||
193 | rctx->cmd.engine = CCP_ENGINE_SHA; | ||
194 | rctx->cmd.u.sha.type = rctx->type; | ||
195 | rctx->cmd.u.sha.ctx = &rctx->ctx_sg; | ||
196 | rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx); | ||
197 | rctx->cmd.u.sha.src = sg; | ||
198 | rctx->cmd.u.sha.src_len = rctx->hash_cnt; | ||
199 | rctx->cmd.u.sha.final = rctx->final; | ||
200 | rctx->cmd.u.sha.msg_bits = rctx->msg_bits; | ||
201 | |||
202 | rctx->first = 0; | ||
203 | |||
204 | ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); | ||
205 | |||
206 | return ret; | ||
207 | } | ||
208 | |||
209 | static int ccp_sha_init(struct ahash_request *req) | ||
210 | { | ||
211 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
212 | struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); | ||
213 | struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
214 | struct ccp_crypto_ahash_alg *alg = | ||
215 | ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); | ||
216 | unsigned int block_size = | ||
217 | crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); | ||
218 | |||
219 | memset(rctx, 0, sizeof(*rctx)); | ||
220 | |||
221 | memcpy(rctx->ctx, alg->init, sizeof(rctx->ctx)); | ||
222 | rctx->type = alg->type; | ||
223 | rctx->first = 1; | ||
224 | |||
225 | if (ctx->u.sha.key_len) { | ||
226 | /* Buffer the HMAC key for first update */ | ||
227 | memcpy(rctx->buf, ctx->u.sha.ipad, block_size); | ||
228 | rctx->buf_count = block_size; | ||
229 | } | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int ccp_sha_update(struct ahash_request *req) | ||
235 | { | ||
236 | return ccp_do_sha_update(req, req->nbytes, 0); | ||
237 | } | ||
238 | |||
239 | static int ccp_sha_final(struct ahash_request *req) | ||
240 | { | ||
241 | return ccp_do_sha_update(req, 0, 1); | ||
242 | } | ||
243 | |||
244 | static int ccp_sha_finup(struct ahash_request *req) | ||
245 | { | ||
246 | return ccp_do_sha_update(req, req->nbytes, 1); | ||
247 | } | ||
248 | |||
249 | static int ccp_sha_digest(struct ahash_request *req) | ||
250 | { | ||
251 | int ret; | ||
252 | |||
253 | ret = ccp_sha_init(req); | ||
254 | if (ret) | ||
255 | return ret; | ||
256 | |||
257 | return ccp_sha_finup(req); | ||
258 | } | ||
259 | |||
260 | static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
261 | unsigned int key_len) | ||
262 | { | ||
263 | struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); | ||
264 | struct scatterlist sg; | ||
265 | unsigned int block_size = | ||
266 | crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); | ||
267 | unsigned int digest_size = crypto_ahash_digestsize(tfm); | ||
268 | int i, ret; | ||
269 | |||
270 | /* Set to zero until complete */ | ||
271 | ctx->u.sha.key_len = 0; | ||
272 | |||
273 | /* Clear key area to provide zero padding for keys smaller | ||
274 | * than the block size | ||
275 | */ | ||
276 | memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key)); | ||
277 | |||
278 | if (key_len > block_size) { | ||
279 | /* Must hash the input key */ | ||
280 | sg_init_one(&sg, key, key_len); | ||
281 | ret = ccp_sync_hash(tfm, ctx->u.sha.key, &sg, key_len); | ||
282 | if (ret) { | ||
283 | crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
284 | return -EINVAL; | ||
285 | } | ||
286 | |||
287 | key_len = digest_size; | ||
288 | } else | ||
289 | memcpy(ctx->u.sha.key, key, key_len); | ||
290 | |||
291 | for (i = 0; i < block_size; i++) { | ||
292 | ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; | ||
293 | ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; | ||
294 | } | ||
295 | |||
296 | ctx->u.sha.key_len = key_len; | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int ccp_sha_cra_init(struct crypto_tfm *tfm) | ||
302 | { | ||
303 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
304 | struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); | ||
305 | |||
306 | ctx->complete = ccp_sha_complete; | ||
307 | ctx->u.sha.key_len = 0; | ||
308 | |||
309 | crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); | ||
310 | |||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | static void ccp_sha_cra_exit(struct crypto_tfm *tfm) | ||
315 | { | ||
316 | } | ||
317 | |||
318 | static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) | ||
319 | { | ||
320 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
321 | struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); | ||
322 | struct crypto_ahash *hmac_tfm; | ||
323 | |||
324 | hmac_tfm = crypto_alloc_ahash(alg->child_alg, | ||
325 | CRYPTO_ALG_TYPE_AHASH, 0); | ||
326 | if (IS_ERR(hmac_tfm)) { | ||
327 | pr_warn("could not load driver %s need for HMAC support\n", | ||
328 | alg->child_alg); | ||
329 | return PTR_ERR(hmac_tfm); | ||
330 | } | ||
331 | |||
332 | ctx->u.sha.hmac_tfm = hmac_tfm; | ||
333 | |||
334 | return ccp_sha_cra_init(tfm); | ||
335 | } | ||
336 | |||
337 | static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) | ||
338 | { | ||
339 | struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); | ||
340 | |||
341 | if (ctx->u.sha.hmac_tfm) | ||
342 | crypto_free_ahash(ctx->u.sha.hmac_tfm); | ||
343 | |||
344 | ccp_sha_cra_exit(tfm); | ||
345 | } | ||
346 | |||
347 | static const __be32 sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { | ||
348 | cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), | ||
349 | cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), | ||
350 | cpu_to_be32(SHA1_H4), 0, 0, 0, | ||
351 | }; | ||
352 | |||
353 | static const __be32 sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { | ||
354 | cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), | ||
355 | cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), | ||
356 | cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), | ||
357 | cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), | ||
358 | }; | ||
359 | |||
360 | static const __be32 sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { | ||
361 | cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), | ||
362 | cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), | ||
363 | cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), | ||
364 | cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), | ||
365 | }; | ||
366 | |||
367 | struct ccp_sha_def { | ||
368 | const char *name; | ||
369 | const char *drv_name; | ||
370 | const __be32 *init; | ||
371 | enum ccp_sha_type type; | ||
372 | u32 digest_size; | ||
373 | u32 block_size; | ||
374 | }; | ||
375 | |||
376 | static struct ccp_sha_def sha_algs[] = { | ||
377 | { | ||
378 | .name = "sha1", | ||
379 | .drv_name = "sha1-ccp", | ||
380 | .init = sha1_init, | ||
381 | .type = CCP_SHA_TYPE_1, | ||
382 | .digest_size = SHA1_DIGEST_SIZE, | ||
383 | .block_size = SHA1_BLOCK_SIZE, | ||
384 | }, | ||
385 | { | ||
386 | .name = "sha224", | ||
387 | .drv_name = "sha224-ccp", | ||
388 | .init = sha224_init, | ||
389 | .type = CCP_SHA_TYPE_224, | ||
390 | .digest_size = SHA224_DIGEST_SIZE, | ||
391 | .block_size = SHA224_BLOCK_SIZE, | ||
392 | }, | ||
393 | { | ||
394 | .name = "sha256", | ||
395 | .drv_name = "sha256-ccp", | ||
396 | .init = sha256_init, | ||
397 | .type = CCP_SHA_TYPE_256, | ||
398 | .digest_size = SHA256_DIGEST_SIZE, | ||
399 | .block_size = SHA256_BLOCK_SIZE, | ||
400 | }, | ||
401 | }; | ||
402 | |||
403 | static int ccp_register_hmac_alg(struct list_head *head, | ||
404 | const struct ccp_sha_def *def, | ||
405 | const struct ccp_crypto_ahash_alg *base_alg) | ||
406 | { | ||
407 | struct ccp_crypto_ahash_alg *ccp_alg; | ||
408 | struct ahash_alg *alg; | ||
409 | struct hash_alg_common *halg; | ||
410 | struct crypto_alg *base; | ||
411 | int ret; | ||
412 | |||
413 | ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); | ||
414 | if (!ccp_alg) | ||
415 | return -ENOMEM; | ||
416 | |||
417 | /* Copy the base algorithm and only change what's necessary */ | ||
418 | *ccp_alg = *base_alg; | ||
419 | INIT_LIST_HEAD(&ccp_alg->entry); | ||
420 | |||
421 | strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME); | ||
422 | |||
423 | alg = &ccp_alg->alg; | ||
424 | alg->setkey = ccp_sha_setkey; | ||
425 | |||
426 | halg = &alg->halg; | ||
427 | |||
428 | base = &halg->base; | ||
429 | snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name); | ||
430 | snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s", | ||
431 | def->drv_name); | ||
432 | base->cra_init = ccp_hmac_sha_cra_init; | ||
433 | base->cra_exit = ccp_hmac_sha_cra_exit; | ||
434 | |||
435 | ret = crypto_register_ahash(alg); | ||
436 | if (ret) { | ||
437 | pr_err("%s ahash algorithm registration error (%d)\n", | ||
438 | base->cra_name, ret); | ||
439 | kfree(ccp_alg); | ||
440 | return ret; | ||
441 | } | ||
442 | |||
443 | list_add(&ccp_alg->entry, head); | ||
444 | |||
445 | return ret; | ||
446 | } | ||
447 | |||
448 | static int ccp_register_sha_alg(struct list_head *head, | ||
449 | const struct ccp_sha_def *def) | ||
450 | { | ||
451 | struct ccp_crypto_ahash_alg *ccp_alg; | ||
452 | struct ahash_alg *alg; | ||
453 | struct hash_alg_common *halg; | ||
454 | struct crypto_alg *base; | ||
455 | int ret; | ||
456 | |||
457 | ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); | ||
458 | if (!ccp_alg) | ||
459 | return -ENOMEM; | ||
460 | |||
461 | INIT_LIST_HEAD(&ccp_alg->entry); | ||
462 | |||
463 | ccp_alg->init = def->init; | ||
464 | ccp_alg->type = def->type; | ||
465 | |||
466 | alg = &ccp_alg->alg; | ||
467 | alg->init = ccp_sha_init; | ||
468 | alg->update = ccp_sha_update; | ||
469 | alg->final = ccp_sha_final; | ||
470 | alg->finup = ccp_sha_finup; | ||
471 | alg->digest = ccp_sha_digest; | ||
472 | |||
473 | halg = &alg->halg; | ||
474 | halg->digestsize = def->digest_size; | ||
475 | |||
476 | base = &halg->base; | ||
477 | snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); | ||
478 | snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", | ||
479 | def->drv_name); | ||
480 | base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | | ||
481 | CRYPTO_ALG_KERN_DRIVER_ONLY | | ||
482 | CRYPTO_ALG_NEED_FALLBACK; | ||
483 | base->cra_blocksize = def->block_size; | ||
484 | base->cra_ctxsize = sizeof(struct ccp_ctx); | ||
485 | base->cra_priority = CCP_CRA_PRIORITY; | ||
486 | base->cra_type = &crypto_ahash_type; | ||
487 | base->cra_init = ccp_sha_cra_init; | ||
488 | base->cra_exit = ccp_sha_cra_exit; | ||
489 | base->cra_module = THIS_MODULE; | ||
490 | |||
491 | ret = crypto_register_ahash(alg); | ||
492 | if (ret) { | ||
493 | pr_err("%s ahash algorithm registration error (%d)\n", | ||
494 | base->cra_name, ret); | ||
495 | kfree(ccp_alg); | ||
496 | return ret; | ||
497 | } | ||
498 | |||
499 | list_add(&ccp_alg->entry, head); | ||
500 | |||
501 | ret = ccp_register_hmac_alg(head, def, ccp_alg); | ||
502 | |||
503 | return ret; | ||
504 | } | ||
505 | |||
506 | int ccp_register_sha_algs(struct list_head *head) | ||
507 | { | ||
508 | int i, ret; | ||
509 | |||
510 | for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { | ||
511 | ret = ccp_register_sha_alg(head, &sha_algs[i]); | ||
512 | if (ret) | ||
513 | return ret; | ||
514 | } | ||
515 | |||
516 | return 0; | ||
517 | } | ||
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h new file mode 100644 index 000000000000..b222231b6169 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto.h | |||
@@ -0,0 +1,197 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) crypto API support | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __CCP_CRYPTO_H__ | ||
14 | #define __CCP_CRYPTO_H__ | ||
15 | |||
16 | |||
17 | #include <linux/list.h> | ||
18 | #include <linux/wait.h> | ||
19 | #include <linux/pci.h> | ||
20 | #include <linux/ccp.h> | ||
21 | #include <linux/crypto.h> | ||
22 | #include <crypto/algapi.h> | ||
23 | #include <crypto/aes.h> | ||
24 | #include <crypto/ctr.h> | ||
25 | #include <crypto/hash.h> | ||
26 | #include <crypto/sha.h> | ||
27 | |||
28 | |||
29 | #define CCP_CRA_PRIORITY 300 | ||
30 | |||
31 | struct ccp_crypto_ablkcipher_alg { | ||
32 | struct list_head entry; | ||
33 | |||
34 | u32 mode; | ||
35 | |||
36 | struct crypto_alg alg; | ||
37 | }; | ||
38 | |||
39 | struct ccp_crypto_ahash_alg { | ||
40 | struct list_head entry; | ||
41 | |||
42 | const __be32 *init; | ||
43 | u32 type; | ||
44 | u32 mode; | ||
45 | |||
46 | /* Child algorithm used for HMAC, CMAC, etc */ | ||
47 | char child_alg[CRYPTO_MAX_ALG_NAME]; | ||
48 | |||
49 | struct ahash_alg alg; | ||
50 | }; | ||
51 | |||
52 | static inline struct ccp_crypto_ablkcipher_alg * | ||
53 | ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm) | ||
54 | { | ||
55 | struct crypto_alg *alg = tfm->__crt_alg; | ||
56 | |||
57 | return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg); | ||
58 | } | ||
59 | |||
60 | static inline struct ccp_crypto_ahash_alg * | ||
61 | ccp_crypto_ahash_alg(struct crypto_tfm *tfm) | ||
62 | { | ||
63 | struct crypto_alg *alg = tfm->__crt_alg; | ||
64 | struct ahash_alg *ahash_alg; | ||
65 | |||
66 | ahash_alg = container_of(alg, struct ahash_alg, halg.base); | ||
67 | |||
68 | return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); | ||
69 | } | ||
70 | |||
71 | |||
72 | /***** AES related defines *****/ | ||
73 | struct ccp_aes_ctx { | ||
74 | /* Fallback cipher for XTS with unsupported unit sizes */ | ||
75 | struct crypto_ablkcipher *tfm_ablkcipher; | ||
76 | |||
77 | /* Cipher used to generate CMAC K1/K2 keys */ | ||
78 | struct crypto_cipher *tfm_cipher; | ||
79 | |||
80 | enum ccp_engine engine; | ||
81 | enum ccp_aes_type type; | ||
82 | enum ccp_aes_mode mode; | ||
83 | |||
84 | struct scatterlist key_sg; | ||
85 | unsigned int key_len; | ||
86 | u8 key[AES_MAX_KEY_SIZE]; | ||
87 | |||
88 | u8 nonce[CTR_RFC3686_NONCE_SIZE]; | ||
89 | |||
90 | /* CMAC key structures */ | ||
91 | struct scatterlist k1_sg; | ||
92 | struct scatterlist k2_sg; | ||
93 | unsigned int kn_len; | ||
94 | u8 k1[AES_BLOCK_SIZE]; | ||
95 | u8 k2[AES_BLOCK_SIZE]; | ||
96 | }; | ||
97 | |||
98 | struct ccp_aes_req_ctx { | ||
99 | struct scatterlist iv_sg; | ||
100 | u8 iv[AES_BLOCK_SIZE]; | ||
101 | |||
102 | /* Fields used for RFC3686 requests */ | ||
103 | u8 *rfc3686_info; | ||
104 | u8 rfc3686_iv[AES_BLOCK_SIZE]; | ||
105 | |||
106 | struct ccp_cmd cmd; | ||
107 | }; | ||
108 | |||
109 | struct ccp_aes_cmac_req_ctx { | ||
110 | unsigned int null_msg; | ||
111 | unsigned int final; | ||
112 | |||
113 | struct scatterlist *src; | ||
114 | unsigned int nbytes; | ||
115 | |||
116 | u64 hash_cnt; | ||
117 | unsigned int hash_rem; | ||
118 | |||
119 | struct sg_table data_sg; | ||
120 | |||
121 | struct scatterlist iv_sg; | ||
122 | u8 iv[AES_BLOCK_SIZE]; | ||
123 | |||
124 | struct scatterlist buf_sg; | ||
125 | unsigned int buf_count; | ||
126 | u8 buf[AES_BLOCK_SIZE]; | ||
127 | |||
128 | struct scatterlist pad_sg; | ||
129 | unsigned int pad_count; | ||
130 | u8 pad[AES_BLOCK_SIZE]; | ||
131 | |||
132 | struct ccp_cmd cmd; | ||
133 | }; | ||
134 | |||
135 | /***** SHA related defines *****/ | ||
136 | #define MAX_SHA_CONTEXT_SIZE SHA256_DIGEST_SIZE | ||
137 | #define MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE | ||
138 | |||
139 | struct ccp_sha_ctx { | ||
140 | unsigned int key_len; | ||
141 | u8 key[MAX_SHA_BLOCK_SIZE]; | ||
142 | u8 ipad[MAX_SHA_BLOCK_SIZE]; | ||
143 | u8 opad[MAX_SHA_BLOCK_SIZE]; | ||
144 | struct crypto_ahash *hmac_tfm; | ||
145 | }; | ||
146 | |||
147 | struct ccp_sha_req_ctx { | ||
148 | enum ccp_sha_type type; | ||
149 | |||
150 | u64 msg_bits; | ||
151 | |||
152 | unsigned int first; | ||
153 | unsigned int final; | ||
154 | |||
155 | struct scatterlist *src; | ||
156 | unsigned int nbytes; | ||
157 | |||
158 | u64 hash_cnt; | ||
159 | unsigned int hash_rem; | ||
160 | |||
161 | struct sg_table data_sg; | ||
162 | |||
163 | struct scatterlist ctx_sg; | ||
164 | u8 ctx[MAX_SHA_CONTEXT_SIZE]; | ||
165 | |||
166 | struct scatterlist buf_sg; | ||
167 | unsigned int buf_count; | ||
168 | u8 buf[MAX_SHA_BLOCK_SIZE]; | ||
169 | |||
170 | /* HMAC support field */ | ||
171 | struct scatterlist pad_sg; | ||
172 | |||
173 | /* CCP driver command */ | ||
174 | struct ccp_cmd cmd; | ||
175 | }; | ||
176 | |||
177 | /***** Common Context Structure *****/ | ||
178 | struct ccp_ctx { | ||
179 | int (*complete)(struct crypto_async_request *req, int ret); | ||
180 | |||
181 | union { | ||
182 | struct ccp_aes_ctx aes; | ||
183 | struct ccp_sha_ctx sha; | ||
184 | } u; | ||
185 | }; | ||
186 | |||
187 | int ccp_crypto_enqueue_request(struct crypto_async_request *req, | ||
188 | struct ccp_cmd *cmd); | ||
189 | struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, | ||
190 | struct scatterlist *sg_add); | ||
191 | |||
192 | int ccp_register_aes_algs(struct list_head *head); | ||
193 | int ccp_register_aes_cmac_algs(struct list_head *head); | ||
194 | int ccp_register_aes_xts_algs(struct list_head *head); | ||
195 | int ccp_register_sha_algs(struct list_head *head); | ||
196 | |||
197 | #endif | ||
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c new file mode 100644 index 000000000000..c3bc21264600 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.c | |||
@@ -0,0 +1,595 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kthread.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/spinlock.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <linux/hw_random.h> | ||
22 | #include <linux/cpu.h> | ||
23 | #include <asm/cpu_device_id.h> | ||
24 | #include <linux/ccp.h> | ||
25 | |||
26 | #include "ccp-dev.h" | ||
27 | |||
28 | MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); | ||
29 | MODULE_LICENSE("GPL"); | ||
30 | MODULE_VERSION("1.0.0"); | ||
31 | MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); | ||
32 | |||
33 | |||
34 | static struct ccp_device *ccp_dev; | ||
35 | static inline struct ccp_device *ccp_get_device(void) | ||
36 | { | ||
37 | return ccp_dev; | ||
38 | } | ||
39 | |||
40 | static inline void ccp_add_device(struct ccp_device *ccp) | ||
41 | { | ||
42 | ccp_dev = ccp; | ||
43 | } | ||
44 | |||
45 | static inline void ccp_del_device(struct ccp_device *ccp) | ||
46 | { | ||
47 | ccp_dev = NULL; | ||
48 | } | ||
49 | |||
50 | /** | ||
51 | * ccp_enqueue_cmd - queue an operation for processing by the CCP | ||
52 | * | ||
53 | * @cmd: ccp_cmd struct to be processed | ||
54 | * | ||
55 | * Queue a cmd to be processed by the CCP. If queueing the cmd | ||
56 | * would exceed the defined length of the cmd queue the cmd will | ||
57 | * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will | ||
58 | * result in a return code of -EBUSY. | ||
59 | * | ||
60 | * The callback routine specified in the ccp_cmd struct will be | ||
61 | * called to notify the caller of completion (if the cmd was not | ||
62 | * backlogged) or advancement out of the backlog. If the cmd has | ||
63 | * advanced out of the backlog the "err" value of the callback | ||
64 | * will be -EINPROGRESS. Any other "err" value during callback is | ||
65 | * the result of the operation. | ||
66 | * | ||
67 | * The cmd has been successfully queued if: | ||
68 | * the return code is -EINPROGRESS or | ||
69 | * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set | ||
70 | */ | ||
71 | int ccp_enqueue_cmd(struct ccp_cmd *cmd) | ||
72 | { | ||
73 | struct ccp_device *ccp = ccp_get_device(); | ||
74 | unsigned long flags; | ||
75 | unsigned int i; | ||
76 | int ret; | ||
77 | |||
78 | if (!ccp) | ||
79 | return -ENODEV; | ||
80 | |||
81 | /* Caller must supply a callback routine */ | ||
82 | if (!cmd->callback) | ||
83 | return -EINVAL; | ||
84 | |||
85 | cmd->ccp = ccp; | ||
86 | |||
87 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
88 | |||
89 | i = ccp->cmd_q_count; | ||
90 | |||
91 | if (ccp->cmd_count >= MAX_CMD_QLEN) { | ||
92 | ret = -EBUSY; | ||
93 | if (cmd->flags & CCP_CMD_MAY_BACKLOG) | ||
94 | list_add_tail(&cmd->entry, &ccp->backlog); | ||
95 | } else { | ||
96 | ret = -EINPROGRESS; | ||
97 | ccp->cmd_count++; | ||
98 | list_add_tail(&cmd->entry, &ccp->cmd); | ||
99 | |||
100 | /* Find an idle queue */ | ||
101 | if (!ccp->suspending) { | ||
102 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
103 | if (ccp->cmd_q[i].active) | ||
104 | continue; | ||
105 | |||
106 | break; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | |||
111 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
112 | |||
113 | /* If we found an idle queue, wake it up */ | ||
114 | if (i < ccp->cmd_q_count) | ||
115 | wake_up_process(ccp->cmd_q[i].kthread); | ||
116 | |||
117 | return ret; | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ccp_enqueue_cmd); | ||
120 | |||
121 | static void ccp_do_cmd_backlog(struct work_struct *work) | ||
122 | { | ||
123 | struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); | ||
124 | struct ccp_device *ccp = cmd->ccp; | ||
125 | unsigned long flags; | ||
126 | unsigned int i; | ||
127 | |||
128 | cmd->callback(cmd->data, -EINPROGRESS); | ||
129 | |||
130 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
131 | |||
132 | ccp->cmd_count++; | ||
133 | list_add_tail(&cmd->entry, &ccp->cmd); | ||
134 | |||
135 | /* Find an idle queue */ | ||
136 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
137 | if (ccp->cmd_q[i].active) | ||
138 | continue; | ||
139 | |||
140 | break; | ||
141 | } | ||
142 | |||
143 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
144 | |||
145 | /* If we found an idle queue, wake it up */ | ||
146 | if (i < ccp->cmd_q_count) | ||
147 | wake_up_process(ccp->cmd_q[i].kthread); | ||
148 | } | ||
149 | |||
150 | static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) | ||
151 | { | ||
152 | struct ccp_device *ccp = cmd_q->ccp; | ||
153 | struct ccp_cmd *cmd = NULL; | ||
154 | struct ccp_cmd *backlog = NULL; | ||
155 | unsigned long flags; | ||
156 | |||
157 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
158 | |||
159 | cmd_q->active = 0; | ||
160 | |||
161 | if (ccp->suspending) { | ||
162 | cmd_q->suspended = 1; | ||
163 | |||
164 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
165 | wake_up_interruptible(&ccp->suspend_queue); | ||
166 | |||
167 | return NULL; | ||
168 | } | ||
169 | |||
170 | if (ccp->cmd_count) { | ||
171 | cmd_q->active = 1; | ||
172 | |||
173 | cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); | ||
174 | list_del(&cmd->entry); | ||
175 | |||
176 | ccp->cmd_count--; | ||
177 | } | ||
178 | |||
179 | if (!list_empty(&ccp->backlog)) { | ||
180 | backlog = list_first_entry(&ccp->backlog, struct ccp_cmd, | ||
181 | entry); | ||
182 | list_del(&backlog->entry); | ||
183 | } | ||
184 | |||
185 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
186 | |||
187 | if (backlog) { | ||
188 | INIT_WORK(&backlog->work, ccp_do_cmd_backlog); | ||
189 | schedule_work(&backlog->work); | ||
190 | } | ||
191 | |||
192 | return cmd; | ||
193 | } | ||
194 | |||
195 | static void ccp_do_cmd_complete(struct work_struct *work) | ||
196 | { | ||
197 | struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); | ||
198 | |||
199 | cmd->callback(cmd->data, cmd->ret); | ||
200 | } | ||
201 | |||
202 | static int ccp_cmd_queue_thread(void *data) | ||
203 | { | ||
204 | struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; | ||
205 | struct ccp_cmd *cmd; | ||
206 | |||
207 | set_current_state(TASK_INTERRUPTIBLE); | ||
208 | while (!kthread_should_stop()) { | ||
209 | schedule(); | ||
210 | |||
211 | set_current_state(TASK_INTERRUPTIBLE); | ||
212 | |||
213 | cmd = ccp_dequeue_cmd(cmd_q); | ||
214 | if (!cmd) | ||
215 | continue; | ||
216 | |||
217 | __set_current_state(TASK_RUNNING); | ||
218 | |||
219 | /* Execute the command */ | ||
220 | cmd->ret = ccp_run_cmd(cmd_q, cmd); | ||
221 | |||
222 | /* Schedule the completion callback */ | ||
223 | INIT_WORK(&cmd->work, ccp_do_cmd_complete); | ||
224 | schedule_work(&cmd->work); | ||
225 | } | ||
226 | |||
227 | __set_current_state(TASK_RUNNING); | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) | ||
233 | { | ||
234 | struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); | ||
235 | u32 trng_value; | ||
236 | int len = min_t(int, sizeof(trng_value), max); | ||
237 | |||
238 | /* | ||
239 | * Locking is provided by the caller so we can update device | ||
240 | * hwrng-related fields safely | ||
241 | */ | ||
242 | trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); | ||
243 | if (!trng_value) { | ||
244 | /* Zero is returned if not data is available or if a | ||
245 | * bad-entropy error is present. Assume an error if | ||
246 | * we exceed TRNG_RETRIES reads of zero. | ||
247 | */ | ||
248 | if (ccp->hwrng_retries++ > TRNG_RETRIES) | ||
249 | return -EIO; | ||
250 | |||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | /* Reset the counter and save the rng value */ | ||
255 | ccp->hwrng_retries = 0; | ||
256 | memcpy(data, &trng_value, len); | ||
257 | |||
258 | return len; | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * ccp_alloc_struct - allocate and initialize the ccp_device struct | ||
263 | * | ||
264 | * @dev: device struct of the CCP | ||
265 | */ | ||
266 | struct ccp_device *ccp_alloc_struct(struct device *dev) | ||
267 | { | ||
268 | struct ccp_device *ccp; | ||
269 | |||
270 | ccp = kzalloc(sizeof(*ccp), GFP_KERNEL); | ||
271 | if (ccp == NULL) { | ||
272 | dev_err(dev, "unable to allocate device struct\n"); | ||
273 | return NULL; | ||
274 | } | ||
275 | ccp->dev = dev; | ||
276 | |||
277 | INIT_LIST_HEAD(&ccp->cmd); | ||
278 | INIT_LIST_HEAD(&ccp->backlog); | ||
279 | |||
280 | spin_lock_init(&ccp->cmd_lock); | ||
281 | mutex_init(&ccp->req_mutex); | ||
282 | mutex_init(&ccp->ksb_mutex); | ||
283 | ccp->ksb_count = KSB_COUNT; | ||
284 | ccp->ksb_start = 0; | ||
285 | |||
286 | return ccp; | ||
287 | } | ||
288 | |||
289 | /** | ||
290 | * ccp_init - initialize the CCP device | ||
291 | * | ||
292 | * @ccp: ccp_device struct | ||
293 | */ | ||
294 | int ccp_init(struct ccp_device *ccp) | ||
295 | { | ||
296 | struct device *dev = ccp->dev; | ||
297 | struct ccp_cmd_queue *cmd_q; | ||
298 | struct dma_pool *dma_pool; | ||
299 | char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; | ||
300 | unsigned int qmr, qim, i; | ||
301 | int ret; | ||
302 | |||
303 | /* Find available queues */ | ||
304 | qim = 0; | ||
305 | qmr = ioread32(ccp->io_regs + Q_MASK_REG); | ||
306 | for (i = 0; i < MAX_HW_QUEUES; i++) { | ||
307 | if (!(qmr & (1 << i))) | ||
308 | continue; | ||
309 | |||
310 | /* Allocate a dma pool for this queue */ | ||
311 | snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i); | ||
312 | dma_pool = dma_pool_create(dma_pool_name, dev, | ||
313 | CCP_DMAPOOL_MAX_SIZE, | ||
314 | CCP_DMAPOOL_ALIGN, 0); | ||
315 | if (!dma_pool) { | ||
316 | dev_err(dev, "unable to allocate dma pool\n"); | ||
317 | ret = -ENOMEM; | ||
318 | goto e_pool; | ||
319 | } | ||
320 | |||
321 | cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; | ||
322 | ccp->cmd_q_count++; | ||
323 | |||
324 | cmd_q->ccp = ccp; | ||
325 | cmd_q->id = i; | ||
326 | cmd_q->dma_pool = dma_pool; | ||
327 | |||
328 | /* Reserve 2 KSB regions for the queue */ | ||
329 | cmd_q->ksb_key = KSB_START + ccp->ksb_start++; | ||
330 | cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; | ||
331 | ccp->ksb_count -= 2; | ||
332 | |||
333 | /* Preset some register values and masks that are queue | ||
334 | * number dependent | ||
335 | */ | ||
336 | cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + | ||
337 | (CMD_Q_STATUS_INCR * i); | ||
338 | cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + | ||
339 | (CMD_Q_STATUS_INCR * i); | ||
340 | cmd_q->int_ok = 1 << (i * 2); | ||
341 | cmd_q->int_err = 1 << ((i * 2) + 1); | ||
342 | |||
343 | cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); | ||
344 | |||
345 | init_waitqueue_head(&cmd_q->int_queue); | ||
346 | |||
347 | /* Build queue interrupt mask (two interrupts per queue) */ | ||
348 | qim |= cmd_q->int_ok | cmd_q->int_err; | ||
349 | |||
350 | dev_dbg(dev, "queue #%u available\n", i); | ||
351 | } | ||
352 | if (ccp->cmd_q_count == 0) { | ||
353 | dev_notice(dev, "no command queues available\n"); | ||
354 | ret = -EIO; | ||
355 | goto e_pool; | ||
356 | } | ||
357 | dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); | ||
358 | |||
359 | /* Disable and clear interrupts until ready */ | ||
360 | iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); | ||
361 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
362 | cmd_q = &ccp->cmd_q[i]; | ||
363 | |||
364 | ioread32(cmd_q->reg_int_status); | ||
365 | ioread32(cmd_q->reg_status); | ||
366 | } | ||
367 | iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); | ||
368 | |||
369 | /* Request an irq */ | ||
370 | ret = ccp->get_irq(ccp); | ||
371 | if (ret) { | ||
372 | dev_err(dev, "unable to allocate an IRQ\n"); | ||
373 | goto e_pool; | ||
374 | } | ||
375 | |||
376 | /* Initialize the queues used to wait for KSB space and suspend */ | ||
377 | init_waitqueue_head(&ccp->ksb_queue); | ||
378 | init_waitqueue_head(&ccp->suspend_queue); | ||
379 | |||
380 | /* Create a kthread for each queue */ | ||
381 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
382 | struct task_struct *kthread; | ||
383 | |||
384 | cmd_q = &ccp->cmd_q[i]; | ||
385 | |||
386 | kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, | ||
387 | "ccp-q%u", cmd_q->id); | ||
388 | if (IS_ERR(kthread)) { | ||
389 | dev_err(dev, "error creating queue thread (%ld)\n", | ||
390 | PTR_ERR(kthread)); | ||
391 | ret = PTR_ERR(kthread); | ||
392 | goto e_kthread; | ||
393 | } | ||
394 | |||
395 | cmd_q->kthread = kthread; | ||
396 | wake_up_process(kthread); | ||
397 | } | ||
398 | |||
399 | /* Register the RNG */ | ||
400 | ccp->hwrng.name = "ccp-rng"; | ||
401 | ccp->hwrng.read = ccp_trng_read; | ||
402 | ret = hwrng_register(&ccp->hwrng); | ||
403 | if (ret) { | ||
404 | dev_err(dev, "error registering hwrng (%d)\n", ret); | ||
405 | goto e_kthread; | ||
406 | } | ||
407 | |||
408 | /* Make the device struct available before enabling interrupts */ | ||
409 | ccp_add_device(ccp); | ||
410 | |||
411 | /* Enable interrupts */ | ||
412 | iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); | ||
413 | |||
414 | return 0; | ||
415 | |||
416 | e_kthread: | ||
417 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
418 | if (ccp->cmd_q[i].kthread) | ||
419 | kthread_stop(ccp->cmd_q[i].kthread); | ||
420 | |||
421 | ccp->free_irq(ccp); | ||
422 | |||
423 | e_pool: | ||
424 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
425 | dma_pool_destroy(ccp->cmd_q[i].dma_pool); | ||
426 | |||
427 | return ret; | ||
428 | } | ||
429 | |||
430 | /** | ||
431 | * ccp_destroy - tear down the CCP device | ||
432 | * | ||
433 | * @ccp: ccp_device struct | ||
434 | */ | ||
435 | void ccp_destroy(struct ccp_device *ccp) | ||
436 | { | ||
437 | struct ccp_cmd_queue *cmd_q; | ||
438 | struct ccp_cmd *cmd; | ||
439 | unsigned int qim, i; | ||
440 | |||
441 | /* Remove general access to the device struct */ | ||
442 | ccp_del_device(ccp); | ||
443 | |||
444 | /* Unregister the RNG */ | ||
445 | hwrng_unregister(&ccp->hwrng); | ||
446 | |||
447 | /* Stop the queue kthreads */ | ||
448 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
449 | if (ccp->cmd_q[i].kthread) | ||
450 | kthread_stop(ccp->cmd_q[i].kthread); | ||
451 | |||
452 | /* Build queue interrupt mask (two interrupt masks per queue) */ | ||
453 | qim = 0; | ||
454 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
455 | cmd_q = &ccp->cmd_q[i]; | ||
456 | qim |= cmd_q->int_ok | cmd_q->int_err; | ||
457 | } | ||
458 | |||
459 | /* Disable and clear interrupts */ | ||
460 | iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); | ||
461 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
462 | cmd_q = &ccp->cmd_q[i]; | ||
463 | |||
464 | ioread32(cmd_q->reg_int_status); | ||
465 | ioread32(cmd_q->reg_status); | ||
466 | } | ||
467 | iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); | ||
468 | |||
469 | ccp->free_irq(ccp); | ||
470 | |||
471 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
472 | dma_pool_destroy(ccp->cmd_q[i].dma_pool); | ||
473 | |||
474 | /* Flush the cmd and backlog queue */ | ||
475 | while (!list_empty(&ccp->cmd)) { | ||
476 | /* Invoke the callback directly with an error code */ | ||
477 | cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); | ||
478 | list_del(&cmd->entry); | ||
479 | cmd->callback(cmd->data, -ENODEV); | ||
480 | } | ||
481 | while (!list_empty(&ccp->backlog)) { | ||
482 | /* Invoke the callback directly with an error code */ | ||
483 | cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); | ||
484 | list_del(&cmd->entry); | ||
485 | cmd->callback(cmd->data, -ENODEV); | ||
486 | } | ||
487 | } | ||
488 | |||
489 | /** | ||
490 | * ccp_irq_handler - handle interrupts generated by the CCP device | ||
491 | * | ||
492 | * @irq: the irq associated with the interrupt | ||
493 | * @data: the data value supplied when the irq was created | ||
494 | */ | ||
495 | irqreturn_t ccp_irq_handler(int irq, void *data) | ||
496 | { | ||
497 | struct device *dev = data; | ||
498 | struct ccp_device *ccp = dev_get_drvdata(dev); | ||
499 | struct ccp_cmd_queue *cmd_q; | ||
500 | u32 q_int, status; | ||
501 | unsigned int i; | ||
502 | |||
503 | status = ioread32(ccp->io_regs + IRQ_STATUS_REG); | ||
504 | |||
505 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
506 | cmd_q = &ccp->cmd_q[i]; | ||
507 | |||
508 | q_int = status & (cmd_q->int_ok | cmd_q->int_err); | ||
509 | if (q_int) { | ||
510 | cmd_q->int_status = status; | ||
511 | cmd_q->q_status = ioread32(cmd_q->reg_status); | ||
512 | cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); | ||
513 | |||
514 | /* On error, only save the first error value */ | ||
515 | if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) | ||
516 | cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); | ||
517 | |||
518 | cmd_q->int_rcvd = 1; | ||
519 | |||
520 | /* Acknowledge the interrupt and wake the kthread */ | ||
521 | iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); | ||
522 | wake_up_interruptible(&cmd_q->int_queue); | ||
523 | } | ||
524 | } | ||
525 | |||
526 | return IRQ_HANDLED; | ||
527 | } | ||
528 | |||
529 | #ifdef CONFIG_PM | ||
530 | bool ccp_queues_suspended(struct ccp_device *ccp) | ||
531 | { | ||
532 | unsigned int suspended = 0; | ||
533 | unsigned long flags; | ||
534 | unsigned int i; | ||
535 | |||
536 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
537 | |||
538 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
539 | if (ccp->cmd_q[i].suspended) | ||
540 | suspended++; | ||
541 | |||
542 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
543 | |||
544 | return ccp->cmd_q_count == suspended; | ||
545 | } | ||
546 | #endif | ||
547 | |||
548 | static const struct x86_cpu_id ccp_support[] = { | ||
549 | { X86_VENDOR_AMD, 22, }, | ||
550 | }; | ||
551 | |||
552 | static int __init ccp_mod_init(void) | ||
553 | { | ||
554 | struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; | ||
555 | int ret; | ||
556 | |||
557 | if (!x86_match_cpu(ccp_support)) | ||
558 | return -ENODEV; | ||
559 | |||
560 | switch (cpuinfo->x86) { | ||
561 | case 22: | ||
562 | if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63)) | ||
563 | return -ENODEV; | ||
564 | |||
565 | ret = ccp_pci_init(); | ||
566 | if (ret) | ||
567 | return ret; | ||
568 | |||
569 | /* Don't leave the driver loaded if init failed */ | ||
570 | if (!ccp_get_device()) { | ||
571 | ccp_pci_exit(); | ||
572 | return -ENODEV; | ||
573 | } | ||
574 | |||
575 | return 0; | ||
576 | |||
577 | break; | ||
578 | } | ||
579 | |||
580 | return -ENODEV; | ||
581 | } | ||
582 | |||
583 | static void __exit ccp_mod_exit(void) | ||
584 | { | ||
585 | struct cpuinfo_x86 *cpuinfo = &boot_cpu_data; | ||
586 | |||
587 | switch (cpuinfo->x86) { | ||
588 | case 22: | ||
589 | ccp_pci_exit(); | ||
590 | break; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | module_init(ccp_mod_init); | ||
595 | module_exit(ccp_mod_exit); | ||
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h new file mode 100644 index 000000000000..7ec536e702ec --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.h | |||
@@ -0,0 +1,272 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __CCP_DEV_H__ | ||
14 | #define __CCP_DEV_H__ | ||
15 | |||
16 | #include <linux/device.h> | ||
17 | #include <linux/pci.h> | ||
18 | #include <linux/spinlock.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/list.h> | ||
21 | #include <linux/wait.h> | ||
22 | #include <linux/dmapool.h> | ||
23 | #include <linux/hw_random.h> | ||
24 | |||
25 | |||
26 | #define IO_OFFSET 0x20000 | ||
27 | |||
28 | #define MAX_DMAPOOL_NAME_LEN 32 | ||
29 | |||
30 | #define MAX_HW_QUEUES 5 | ||
31 | #define MAX_CMD_QLEN 100 | ||
32 | |||
33 | #define TRNG_RETRIES 10 | ||
34 | |||
35 | |||
36 | /****** Register Mappings ******/ | ||
37 | #define Q_MASK_REG 0x000 | ||
38 | #define TRNG_OUT_REG 0x00c | ||
39 | #define IRQ_MASK_REG 0x040 | ||
40 | #define IRQ_STATUS_REG 0x200 | ||
41 | |||
42 | #define DEL_CMD_Q_JOB 0x124 | ||
43 | #define DEL_Q_ACTIVE 0x00000200 | ||
44 | #define DEL_Q_ID_SHIFT 6 | ||
45 | |||
46 | #define CMD_REQ0 0x180 | ||
47 | #define CMD_REQ_INCR 0x04 | ||
48 | |||
49 | #define CMD_Q_STATUS_BASE 0x210 | ||
50 | #define CMD_Q_INT_STATUS_BASE 0x214 | ||
51 | #define CMD_Q_STATUS_INCR 0x20 | ||
52 | |||
53 | #define CMD_Q_CACHE 0x228 | ||
54 | #define CMD_Q_CACHE_INC 0x20 | ||
55 | |||
56 | #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f); | ||
57 | #define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f); | ||
58 | |||
59 | /****** REQ0 Related Values ******/ | ||
60 | #define REQ0_WAIT_FOR_WRITE 0x00000004 | ||
61 | #define REQ0_INT_ON_COMPLETE 0x00000002 | ||
62 | #define REQ0_STOP_ON_COMPLETE 0x00000001 | ||
63 | |||
64 | #define REQ0_CMD_Q_SHIFT 9 | ||
65 | #define REQ0_JOBID_SHIFT 3 | ||
66 | |||
67 | /****** REQ1 Related Values ******/ | ||
68 | #define REQ1_PROTECT_SHIFT 27 | ||
69 | #define REQ1_ENGINE_SHIFT 23 | ||
70 | #define REQ1_KEY_KSB_SHIFT 2 | ||
71 | |||
72 | #define REQ1_EOM 0x00000002 | ||
73 | #define REQ1_INIT 0x00000001 | ||
74 | |||
75 | /* AES Related Values */ | ||
76 | #define REQ1_AES_TYPE_SHIFT 21 | ||
77 | #define REQ1_AES_MODE_SHIFT 18 | ||
78 | #define REQ1_AES_ACTION_SHIFT 17 | ||
79 | #define REQ1_AES_CFB_SIZE_SHIFT 10 | ||
80 | |||
81 | /* XTS-AES Related Values */ | ||
82 | #define REQ1_XTS_AES_SIZE_SHIFT 10 | ||
83 | |||
84 | /* SHA Related Values */ | ||
85 | #define REQ1_SHA_TYPE_SHIFT 21 | ||
86 | |||
87 | /* RSA Related Values */ | ||
88 | #define REQ1_RSA_MOD_SIZE_SHIFT 10 | ||
89 | |||
90 | /* Pass-Through Related Values */ | ||
91 | #define REQ1_PT_BW_SHIFT 12 | ||
92 | #define REQ1_PT_BS_SHIFT 10 | ||
93 | |||
94 | /* ECC Related Values */ | ||
95 | #define REQ1_ECC_AFFINE_CONVERT 0x00200000 | ||
96 | #define REQ1_ECC_FUNCTION_SHIFT 18 | ||
97 | |||
98 | /****** REQ4 Related Values ******/ | ||
99 | #define REQ4_KSB_SHIFT 18 | ||
100 | #define REQ4_MEMTYPE_SHIFT 16 | ||
101 | |||
102 | /****** REQ6 Related Values ******/ | ||
103 | #define REQ6_MEMTYPE_SHIFT 16 | ||
104 | |||
105 | |||
106 | /****** Key Storage Block ******/ | ||
107 | #define KSB_START 77 | ||
108 | #define KSB_END 127 | ||
109 | #define KSB_COUNT (KSB_END - KSB_START + 1) | ||
110 | #define CCP_KSB_BITS 256 | ||
111 | #define CCP_KSB_BYTES 32 | ||
112 | |||
113 | #define CCP_JOBID_MASK 0x0000003f | ||
114 | |||
115 | #define CCP_DMAPOOL_MAX_SIZE 64 | ||
116 | #define CCP_DMAPOOL_ALIGN (1 << 5) | ||
117 | |||
118 | #define CCP_REVERSE_BUF_SIZE 64 | ||
119 | |||
120 | #define CCP_AES_KEY_KSB_COUNT 1 | ||
121 | #define CCP_AES_CTX_KSB_COUNT 1 | ||
122 | |||
123 | #define CCP_XTS_AES_KEY_KSB_COUNT 1 | ||
124 | #define CCP_XTS_AES_CTX_KSB_COUNT 1 | ||
125 | |||
126 | #define CCP_SHA_KSB_COUNT 1 | ||
127 | |||
128 | #define CCP_RSA_MAX_WIDTH 4096 | ||
129 | |||
130 | #define CCP_PASSTHRU_BLOCKSIZE 256 | ||
131 | #define CCP_PASSTHRU_MASKSIZE 32 | ||
132 | #define CCP_PASSTHRU_KSB_COUNT 1 | ||
133 | |||
134 | #define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ | ||
135 | #define CCP_ECC_MAX_OPERANDS 6 | ||
136 | #define CCP_ECC_MAX_OUTPUTS 3 | ||
137 | #define CCP_ECC_SRC_BUF_SIZE 448 | ||
138 | #define CCP_ECC_DST_BUF_SIZE 192 | ||
139 | #define CCP_ECC_OPERAND_SIZE 64 | ||
140 | #define CCP_ECC_OUTPUT_SIZE 64 | ||
141 | #define CCP_ECC_RESULT_OFFSET 60 | ||
142 | #define CCP_ECC_RESULT_SUCCESS 0x0001 | ||
143 | |||
144 | |||
145 | struct ccp_device; | ||
146 | struct ccp_cmd; | ||
147 | |||
148 | struct ccp_cmd_queue { | ||
149 | struct ccp_device *ccp; | ||
150 | |||
151 | /* Queue identifier */ | ||
152 | u32 id; | ||
153 | |||
154 | /* Queue dma pool */ | ||
155 | struct dma_pool *dma_pool; | ||
156 | |||
157 | /* Queue reserved KSB regions */ | ||
158 | u32 ksb_key; | ||
159 | u32 ksb_ctx; | ||
160 | |||
161 | /* Queue processing thread */ | ||
162 | struct task_struct *kthread; | ||
163 | unsigned int active; | ||
164 | unsigned int suspended; | ||
165 | |||
166 | /* Number of free command slots available */ | ||
167 | unsigned int free_slots; | ||
168 | |||
169 | /* Interrupt masks */ | ||
170 | u32 int_ok; | ||
171 | u32 int_err; | ||
172 | |||
173 | /* Register addresses for queue */ | ||
174 | void __iomem *reg_status; | ||
175 | void __iomem *reg_int_status; | ||
176 | |||
177 | /* Status values from job */ | ||
178 | u32 int_status; | ||
179 | u32 q_status; | ||
180 | u32 q_int_status; | ||
181 | u32 cmd_error; | ||
182 | |||
183 | /* Interrupt wait queue */ | ||
184 | wait_queue_head_t int_queue; | ||
185 | unsigned int int_rcvd; | ||
186 | } ____cacheline_aligned; | ||
187 | |||
188 | struct ccp_device { | ||
189 | struct device *dev; | ||
190 | |||
191 | /* | ||
192 | * Bus specific device information | ||
193 | */ | ||
194 | void *dev_specific; | ||
195 | int (*get_irq)(struct ccp_device *ccp); | ||
196 | void (*free_irq)(struct ccp_device *ccp); | ||
197 | |||
198 | /* | ||
199 | * I/O area used for device communication. The register mapping | ||
200 | * starts at an offset into the mapped bar. | ||
201 | * The CMD_REQx registers and the Delete_Cmd_Queue_Job register | ||
202 | * need to be protected while a command queue thread is accessing | ||
203 | * them. | ||
204 | */ | ||
205 | struct mutex req_mutex ____cacheline_aligned; | ||
206 | void __iomem *io_map; | ||
207 | void __iomem *io_regs; | ||
208 | |||
209 | /* | ||
210 | * Master lists that all cmds are queued on. Because there can be | ||
211 | * more than one CCP command queue that can process a cmd a separate | ||
212 | * backlog list is neeeded so that the backlog completion call | ||
213 | * completes before the cmd is available for execution. | ||
214 | */ | ||
215 | spinlock_t cmd_lock ____cacheline_aligned; | ||
216 | unsigned int cmd_count; | ||
217 | struct list_head cmd; | ||
218 | struct list_head backlog; | ||
219 | |||
220 | /* | ||
221 | * The command queues. These represent the queues available on the | ||
222 | * CCP that are available for processing cmds | ||
223 | */ | ||
224 | struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; | ||
225 | unsigned int cmd_q_count; | ||
226 | |||
227 | /* | ||
228 | * Support for the CCP True RNG | ||
229 | */ | ||
230 | struct hwrng hwrng; | ||
231 | unsigned int hwrng_retries; | ||
232 | |||
233 | /* | ||
234 | * A counter used to generate job-ids for cmds submitted to the CCP | ||
235 | */ | ||
236 | atomic_t current_id ____cacheline_aligned; | ||
237 | |||
238 | /* | ||
239 | * The CCP uses key storage blocks (KSB) to maintain context for certain | ||
240 | * operations. To prevent multiple cmds from using the same KSB range | ||
241 | * a command queue reserves a KSB range for the duration of the cmd. | ||
242 | * Each queue, will however, reserve 2 KSB blocks for operations that | ||
243 | * only require single KSB entries (eg. AES context/iv and key) in order | ||
244 | * to avoid allocation contention. This will reserve at most 10 KSB | ||
245 | * entries, leaving 40 KSB entries available for dynamic allocation. | ||
246 | */ | ||
247 | struct mutex ksb_mutex ____cacheline_aligned; | ||
248 | DECLARE_BITMAP(ksb, KSB_COUNT); | ||
249 | wait_queue_head_t ksb_queue; | ||
250 | unsigned int ksb_avail; | ||
251 | unsigned int ksb_count; | ||
252 | u32 ksb_start; | ||
253 | |||
254 | /* Suspend support */ | ||
255 | unsigned int suspending; | ||
256 | wait_queue_head_t suspend_queue; | ||
257 | }; | ||
258 | |||
259 | |||
260 | int ccp_pci_init(void); | ||
261 | void ccp_pci_exit(void); | ||
262 | |||
263 | struct ccp_device *ccp_alloc_struct(struct device *dev); | ||
264 | int ccp_init(struct ccp_device *ccp); | ||
265 | void ccp_destroy(struct ccp_device *ccp); | ||
266 | bool ccp_queues_suspended(struct ccp_device *ccp); | ||
267 | |||
268 | irqreturn_t ccp_irq_handler(int irq, void *data); | ||
269 | |||
270 | int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); | ||
271 | |||
272 | #endif | ||
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c new file mode 100644 index 000000000000..71ed3ade7e12 --- /dev/null +++ b/drivers/crypto/ccp/ccp-ops.c | |||
@@ -0,0 +1,2024 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/pci_ids.h> | ||
17 | #include <linux/kthread.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/mutex.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/ccp.h> | ||
24 | #include <linux/scatterlist.h> | ||
25 | #include <crypto/scatterwalk.h> | ||
26 | |||
27 | #include "ccp-dev.h" | ||
28 | |||
29 | |||
30 | enum ccp_memtype { | ||
31 | CCP_MEMTYPE_SYSTEM = 0, | ||
32 | CCP_MEMTYPE_KSB, | ||
33 | CCP_MEMTYPE_LOCAL, | ||
34 | CCP_MEMTYPE__LAST, | ||
35 | }; | ||
36 | |||
37 | struct ccp_dma_info { | ||
38 | dma_addr_t address; | ||
39 | unsigned int offset; | ||
40 | unsigned int length; | ||
41 | enum dma_data_direction dir; | ||
42 | }; | ||
43 | |||
44 | struct ccp_dm_workarea { | ||
45 | struct device *dev; | ||
46 | struct dma_pool *dma_pool; | ||
47 | unsigned int length; | ||
48 | |||
49 | u8 *address; | ||
50 | struct ccp_dma_info dma; | ||
51 | }; | ||
52 | |||
53 | struct ccp_sg_workarea { | ||
54 | struct scatterlist *sg; | ||
55 | unsigned int nents; | ||
56 | unsigned int length; | ||
57 | |||
58 | struct scatterlist *dma_sg; | ||
59 | struct device *dma_dev; | ||
60 | unsigned int dma_count; | ||
61 | enum dma_data_direction dma_dir; | ||
62 | |||
63 | unsigned int sg_used; | ||
64 | |||
65 | u64 bytes_left; | ||
66 | }; | ||
67 | |||
68 | struct ccp_data { | ||
69 | struct ccp_sg_workarea sg_wa; | ||
70 | struct ccp_dm_workarea dm_wa; | ||
71 | }; | ||
72 | |||
73 | struct ccp_mem { | ||
74 | enum ccp_memtype type; | ||
75 | union { | ||
76 | struct ccp_dma_info dma; | ||
77 | u32 ksb; | ||
78 | } u; | ||
79 | }; | ||
80 | |||
81 | struct ccp_aes_op { | ||
82 | enum ccp_aes_type type; | ||
83 | enum ccp_aes_mode mode; | ||
84 | enum ccp_aes_action action; | ||
85 | }; | ||
86 | |||
87 | struct ccp_xts_aes_op { | ||
88 | enum ccp_aes_action action; | ||
89 | enum ccp_xts_aes_unit_size unit_size; | ||
90 | }; | ||
91 | |||
92 | struct ccp_sha_op { | ||
93 | enum ccp_sha_type type; | ||
94 | u64 msg_bits; | ||
95 | }; | ||
96 | |||
97 | struct ccp_rsa_op { | ||
98 | u32 mod_size; | ||
99 | u32 input_len; | ||
100 | }; | ||
101 | |||
102 | struct ccp_passthru_op { | ||
103 | enum ccp_passthru_bitwise bit_mod; | ||
104 | enum ccp_passthru_byteswap byte_swap; | ||
105 | }; | ||
106 | |||
107 | struct ccp_ecc_op { | ||
108 | enum ccp_ecc_function function; | ||
109 | }; | ||
110 | |||
111 | struct ccp_op { | ||
112 | struct ccp_cmd_queue *cmd_q; | ||
113 | |||
114 | u32 jobid; | ||
115 | u32 ioc; | ||
116 | u32 soc; | ||
117 | u32 ksb_key; | ||
118 | u32 ksb_ctx; | ||
119 | u32 init; | ||
120 | u32 eom; | ||
121 | |||
122 | struct ccp_mem src; | ||
123 | struct ccp_mem dst; | ||
124 | |||
125 | union { | ||
126 | struct ccp_aes_op aes; | ||
127 | struct ccp_xts_aes_op xts; | ||
128 | struct ccp_sha_op sha; | ||
129 | struct ccp_rsa_op rsa; | ||
130 | struct ccp_passthru_op passthru; | ||
131 | struct ccp_ecc_op ecc; | ||
132 | } u; | ||
133 | }; | ||
134 | |||
135 | /* The CCP cannot perform zero-length sha operations so the caller | ||
136 | * is required to buffer data for the final operation. However, a | ||
137 | * sha operation for a message with a total length of zero is valid | ||
138 | * so known values are required to supply the result. | ||
139 | */ | ||
140 | static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = { | ||
141 | 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, | ||
142 | 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, | ||
143 | 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, | ||
144 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||
145 | }; | ||
146 | |||
147 | static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = { | ||
148 | 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9, | ||
149 | 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4, | ||
150 | 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a, | ||
151 | 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00, | ||
152 | }; | ||
153 | |||
154 | static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = { | ||
155 | 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, | ||
156 | 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, | ||
157 | 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, | ||
158 | 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55, | ||
159 | }; | ||
160 | |||
161 | static u32 ccp_addr_lo(struct ccp_dma_info *info) | ||
162 | { | ||
163 | return lower_32_bits(info->address + info->offset); | ||
164 | } | ||
165 | |||
166 | static u32 ccp_addr_hi(struct ccp_dma_info *info) | ||
167 | { | ||
168 | return upper_32_bits(info->address + info->offset) & 0x0000ffff; | ||
169 | } | ||
170 | |||
171 | static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) | ||
172 | { | ||
173 | struct ccp_cmd_queue *cmd_q = op->cmd_q; | ||
174 | struct ccp_device *ccp = cmd_q->ccp; | ||
175 | void __iomem *cr_addr; | ||
176 | u32 cr0, cmd; | ||
177 | unsigned int i; | ||
178 | int ret = 0; | ||
179 | |||
180 | /* We could read a status register to see how many free slots | ||
181 | * are actually available, but reading that register resets it | ||
182 | * and you could lose some error information. | ||
183 | */ | ||
184 | cmd_q->free_slots--; | ||
185 | |||
186 | cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) | ||
187 | | (op->jobid << REQ0_JOBID_SHIFT) | ||
188 | | REQ0_WAIT_FOR_WRITE; | ||
189 | |||
190 | if (op->soc) | ||
191 | cr0 |= REQ0_STOP_ON_COMPLETE | ||
192 | | REQ0_INT_ON_COMPLETE; | ||
193 | |||
194 | if (op->ioc || !cmd_q->free_slots) | ||
195 | cr0 |= REQ0_INT_ON_COMPLETE; | ||
196 | |||
197 | /* Start at CMD_REQ1 */ | ||
198 | cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; | ||
199 | |||
200 | mutex_lock(&ccp->req_mutex); | ||
201 | |||
202 | /* Write CMD_REQ1 through CMD_REQx first */ | ||
203 | for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) | ||
204 | iowrite32(*(cr + i), cr_addr); | ||
205 | |||
206 | /* Tell the CCP to start */ | ||
207 | wmb(); | ||
208 | iowrite32(cr0, ccp->io_regs + CMD_REQ0); | ||
209 | |||
210 | mutex_unlock(&ccp->req_mutex); | ||
211 | |||
212 | if (cr0 & REQ0_INT_ON_COMPLETE) { | ||
213 | /* Wait for the job to complete */ | ||
214 | ret = wait_event_interruptible(cmd_q->int_queue, | ||
215 | cmd_q->int_rcvd); | ||
216 | if (ret || cmd_q->cmd_error) { | ||
217 | /* On error delete all related jobs from the queue */ | ||
218 | cmd = (cmd_q->id << DEL_Q_ID_SHIFT) | ||
219 | | op->jobid; | ||
220 | |||
221 | iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); | ||
222 | |||
223 | if (!ret) | ||
224 | ret = -EIO; | ||
225 | } else if (op->soc) { | ||
226 | /* Delete just head job from the queue on SoC */ | ||
227 | cmd = DEL_Q_ACTIVE | ||
228 | | (cmd_q->id << DEL_Q_ID_SHIFT) | ||
229 | | op->jobid; | ||
230 | |||
231 | iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); | ||
232 | } | ||
233 | |||
234 | cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); | ||
235 | |||
236 | cmd_q->int_rcvd = 0; | ||
237 | } | ||
238 | |||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | static int ccp_perform_aes(struct ccp_op *op) | ||
243 | { | ||
244 | u32 cr[6]; | ||
245 | |||
246 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
247 | cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) | ||
248 | | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) | ||
249 | | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) | ||
250 | | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) | ||
251 | | (op->ksb_key << REQ1_KEY_KSB_SHIFT); | ||
252 | cr[1] = op->src.u.dma.length - 1; | ||
253 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
254 | cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | ||
255 | | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
256 | | ccp_addr_hi(&op->src.u.dma); | ||
257 | cr[4] = ccp_addr_lo(&op->dst.u.dma); | ||
258 | cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ||
259 | | ccp_addr_hi(&op->dst.u.dma); | ||
260 | |||
261 | if (op->u.aes.mode == CCP_AES_MODE_CFB) | ||
262 | cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); | ||
263 | |||
264 | if (op->eom) | ||
265 | cr[0] |= REQ1_EOM; | ||
266 | |||
267 | if (op->init) | ||
268 | cr[0] |= REQ1_INIT; | ||
269 | |||
270 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
271 | } | ||
272 | |||
273 | static int ccp_perform_xts_aes(struct ccp_op *op) | ||
274 | { | ||
275 | u32 cr[6]; | ||
276 | |||
277 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
278 | cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) | ||
279 | | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) | ||
280 | | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) | ||
281 | | (op->ksb_key << REQ1_KEY_KSB_SHIFT); | ||
282 | cr[1] = op->src.u.dma.length - 1; | ||
283 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
284 | cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | ||
285 | | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
286 | | ccp_addr_hi(&op->src.u.dma); | ||
287 | cr[4] = ccp_addr_lo(&op->dst.u.dma); | ||
288 | cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ||
289 | | ccp_addr_hi(&op->dst.u.dma); | ||
290 | |||
291 | if (op->eom) | ||
292 | cr[0] |= REQ1_EOM; | ||
293 | |||
294 | if (op->init) | ||
295 | cr[0] |= REQ1_INIT; | ||
296 | |||
297 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
298 | } | ||
299 | |||
300 | static int ccp_perform_sha(struct ccp_op *op) | ||
301 | { | ||
302 | u32 cr[6]; | ||
303 | |||
304 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
305 | cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) | ||
306 | | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) | ||
307 | | REQ1_INIT; | ||
308 | cr[1] = op->src.u.dma.length - 1; | ||
309 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
310 | cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | ||
311 | | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
312 | | ccp_addr_hi(&op->src.u.dma); | ||
313 | |||
314 | if (op->eom) { | ||
315 | cr[0] |= REQ1_EOM; | ||
316 | cr[4] = lower_32_bits(op->u.sha.msg_bits); | ||
317 | cr[5] = upper_32_bits(op->u.sha.msg_bits); | ||
318 | } else { | ||
319 | cr[4] = 0; | ||
320 | cr[5] = 0; | ||
321 | } | ||
322 | |||
323 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
324 | } | ||
325 | |||
326 | static int ccp_perform_rsa(struct ccp_op *op) | ||
327 | { | ||
328 | u32 cr[6]; | ||
329 | |||
330 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
331 | cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) | ||
332 | | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) | ||
333 | | (op->ksb_key << REQ1_KEY_KSB_SHIFT) | ||
334 | | REQ1_EOM; | ||
335 | cr[1] = op->u.rsa.input_len - 1; | ||
336 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
337 | cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | ||
338 | | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
339 | | ccp_addr_hi(&op->src.u.dma); | ||
340 | cr[4] = ccp_addr_lo(&op->dst.u.dma); | ||
341 | cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ||
342 | | ccp_addr_hi(&op->dst.u.dma); | ||
343 | |||
344 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
345 | } | ||
346 | |||
347 | static int ccp_perform_passthru(struct ccp_op *op) | ||
348 | { | ||
349 | u32 cr[6]; | ||
350 | |||
351 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
352 | cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) | ||
353 | | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) | ||
354 | | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); | ||
355 | |||
356 | if (op->src.type == CCP_MEMTYPE_SYSTEM) | ||
357 | cr[1] = op->src.u.dma.length - 1; | ||
358 | else | ||
359 | cr[1] = op->dst.u.dma.length - 1; | ||
360 | |||
361 | if (op->src.type == CCP_MEMTYPE_SYSTEM) { | ||
362 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
363 | cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
364 | | ccp_addr_hi(&op->src.u.dma); | ||
365 | |||
366 | if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) | ||
367 | cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); | ||
368 | } else { | ||
369 | cr[2] = op->src.u.ksb * CCP_KSB_BYTES; | ||
370 | cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); | ||
371 | } | ||
372 | |||
373 | if (op->dst.type == CCP_MEMTYPE_SYSTEM) { | ||
374 | cr[4] = ccp_addr_lo(&op->dst.u.dma); | ||
375 | cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ||
376 | | ccp_addr_hi(&op->dst.u.dma); | ||
377 | } else { | ||
378 | cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; | ||
379 | cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); | ||
380 | } | ||
381 | |||
382 | if (op->eom) | ||
383 | cr[0] |= REQ1_EOM; | ||
384 | |||
385 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
386 | } | ||
387 | |||
388 | static int ccp_perform_ecc(struct ccp_op *op) | ||
389 | { | ||
390 | u32 cr[6]; | ||
391 | |||
392 | /* Fill out the register contents for REQ1 through REQ6 */ | ||
393 | cr[0] = REQ1_ECC_AFFINE_CONVERT | ||
394 | | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) | ||
395 | | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) | ||
396 | | REQ1_EOM; | ||
397 | cr[1] = op->src.u.dma.length - 1; | ||
398 | cr[2] = ccp_addr_lo(&op->src.u.dma); | ||
399 | cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ||
400 | | ccp_addr_hi(&op->src.u.dma); | ||
401 | cr[4] = ccp_addr_lo(&op->dst.u.dma); | ||
402 | cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ||
403 | | ccp_addr_hi(&op->dst.u.dma); | ||
404 | |||
405 | return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); | ||
406 | } | ||
407 | |||
408 | static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count) | ||
409 | { | ||
410 | int start; | ||
411 | |||
412 | for (;;) { | ||
413 | mutex_lock(&ccp->ksb_mutex); | ||
414 | |||
415 | start = (u32)bitmap_find_next_zero_area(ccp->ksb, | ||
416 | ccp->ksb_count, | ||
417 | ccp->ksb_start, | ||
418 | count, 0); | ||
419 | if (start <= ccp->ksb_count) { | ||
420 | bitmap_set(ccp->ksb, start, count); | ||
421 | |||
422 | mutex_unlock(&ccp->ksb_mutex); | ||
423 | break; | ||
424 | } | ||
425 | |||
426 | ccp->ksb_avail = 0; | ||
427 | |||
428 | mutex_unlock(&ccp->ksb_mutex); | ||
429 | |||
430 | /* Wait for KSB entries to become available */ | ||
431 | if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail)) | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | return KSB_START + start; | ||
436 | } | ||
437 | |||
438 | static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start, | ||
439 | unsigned int count) | ||
440 | { | ||
441 | if (!start) | ||
442 | return; | ||
443 | |||
444 | mutex_lock(&ccp->ksb_mutex); | ||
445 | |||
446 | bitmap_clear(ccp->ksb, start - KSB_START, count); | ||
447 | |||
448 | ccp->ksb_avail = 1; | ||
449 | |||
450 | mutex_unlock(&ccp->ksb_mutex); | ||
451 | |||
452 | wake_up_interruptible_all(&ccp->ksb_queue); | ||
453 | } | ||
454 | |||
455 | static u32 ccp_gen_jobid(struct ccp_device *ccp) | ||
456 | { | ||
457 | return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK; | ||
458 | } | ||
459 | |||
460 | static void ccp_sg_free(struct ccp_sg_workarea *wa) | ||
461 | { | ||
462 | if (wa->dma_count) | ||
463 | dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir); | ||
464 | |||
465 | wa->dma_count = 0; | ||
466 | } | ||
467 | |||
468 | static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, | ||
469 | struct scatterlist *sg, u64 len, | ||
470 | enum dma_data_direction dma_dir) | ||
471 | { | ||
472 | memset(wa, 0, sizeof(*wa)); | ||
473 | |||
474 | wa->sg = sg; | ||
475 | if (!sg) | ||
476 | return 0; | ||
477 | |||
478 | wa->nents = sg_nents(sg); | ||
479 | wa->length = sg->length; | ||
480 | wa->bytes_left = len; | ||
481 | wa->sg_used = 0; | ||
482 | |||
483 | if (len == 0) | ||
484 | return 0; | ||
485 | |||
486 | if (dma_dir == DMA_NONE) | ||
487 | return 0; | ||
488 | |||
489 | wa->dma_sg = sg; | ||
490 | wa->dma_dev = dev; | ||
491 | wa->dma_dir = dma_dir; | ||
492 | wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir); | ||
493 | if (!wa->dma_count) | ||
494 | return -ENOMEM; | ||
495 | |||
496 | |||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len) | ||
501 | { | ||
502 | unsigned int nbytes = min_t(u64, len, wa->bytes_left); | ||
503 | |||
504 | if (!wa->sg) | ||
505 | return; | ||
506 | |||
507 | wa->sg_used += nbytes; | ||
508 | wa->bytes_left -= nbytes; | ||
509 | if (wa->sg_used == wa->sg->length) { | ||
510 | wa->sg = sg_next(wa->sg); | ||
511 | wa->sg_used = 0; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | static void ccp_dm_free(struct ccp_dm_workarea *wa) | ||
516 | { | ||
517 | if (wa->length <= CCP_DMAPOOL_MAX_SIZE) { | ||
518 | if (wa->address) | ||
519 | dma_pool_free(wa->dma_pool, wa->address, | ||
520 | wa->dma.address); | ||
521 | } else { | ||
522 | if (wa->dma.address) | ||
523 | dma_unmap_single(wa->dev, wa->dma.address, wa->length, | ||
524 | wa->dma.dir); | ||
525 | kfree(wa->address); | ||
526 | } | ||
527 | |||
528 | wa->address = NULL; | ||
529 | wa->dma.address = 0; | ||
530 | } | ||
531 | |||
532 | static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, | ||
533 | struct ccp_cmd_queue *cmd_q, | ||
534 | unsigned int len, | ||
535 | enum dma_data_direction dir) | ||
536 | { | ||
537 | memset(wa, 0, sizeof(*wa)); | ||
538 | |||
539 | if (!len) | ||
540 | return 0; | ||
541 | |||
542 | wa->dev = cmd_q->ccp->dev; | ||
543 | wa->length = len; | ||
544 | |||
545 | if (len <= CCP_DMAPOOL_MAX_SIZE) { | ||
546 | wa->dma_pool = cmd_q->dma_pool; | ||
547 | |||
548 | wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL, | ||
549 | &wa->dma.address); | ||
550 | if (!wa->address) | ||
551 | return -ENOMEM; | ||
552 | |||
553 | wa->dma.length = CCP_DMAPOOL_MAX_SIZE; | ||
554 | |||
555 | memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE); | ||
556 | } else { | ||
557 | wa->address = kzalloc(len, GFP_KERNEL); | ||
558 | if (!wa->address) | ||
559 | return -ENOMEM; | ||
560 | |||
561 | wa->dma.address = dma_map_single(wa->dev, wa->address, len, | ||
562 | dir); | ||
563 | if (!wa->dma.address) | ||
564 | return -ENOMEM; | ||
565 | |||
566 | wa->dma.length = len; | ||
567 | } | ||
568 | wa->dma.dir = dir; | ||
569 | |||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, | ||
574 | struct scatterlist *sg, unsigned int sg_offset, | ||
575 | unsigned int len) | ||
576 | { | ||
577 | WARN_ON(!wa->address); | ||
578 | |||
579 | scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, | ||
580 | 0); | ||
581 | } | ||
582 | |||
583 | static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, | ||
584 | struct scatterlist *sg, unsigned int sg_offset, | ||
585 | unsigned int len) | ||
586 | { | ||
587 | WARN_ON(!wa->address); | ||
588 | |||
589 | scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, | ||
590 | 1); | ||
591 | } | ||
592 | |||
593 | static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, | ||
594 | struct scatterlist *sg, | ||
595 | unsigned int len, unsigned int se_len, | ||
596 | bool sign_extend) | ||
597 | { | ||
598 | unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; | ||
599 | u8 buffer[CCP_REVERSE_BUF_SIZE]; | ||
600 | |||
601 | BUG_ON(se_len > sizeof(buffer)); | ||
602 | |||
603 | sg_offset = len; | ||
604 | dm_offset = 0; | ||
605 | nbytes = len; | ||
606 | while (nbytes) { | ||
607 | ksb_len = min_t(unsigned int, nbytes, se_len); | ||
608 | sg_offset -= ksb_len; | ||
609 | |||
610 | scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0); | ||
611 | for (i = 0; i < ksb_len; i++) | ||
612 | wa->address[dm_offset + i] = buffer[ksb_len - i - 1]; | ||
613 | |||
614 | dm_offset += ksb_len; | ||
615 | nbytes -= ksb_len; | ||
616 | |||
617 | if ((ksb_len != se_len) && sign_extend) { | ||
618 | /* Must sign-extend to nearest sign-extend length */ | ||
619 | if (wa->address[dm_offset - 1] & 0x80) | ||
620 | memset(wa->address + dm_offset, 0xff, | ||
621 | se_len - ksb_len); | ||
622 | } | ||
623 | } | ||
624 | } | ||
625 | |||
626 | static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, | ||
627 | struct scatterlist *sg, | ||
628 | unsigned int len) | ||
629 | { | ||
630 | unsigned int nbytes, sg_offset, dm_offset, ksb_len, i; | ||
631 | u8 buffer[CCP_REVERSE_BUF_SIZE]; | ||
632 | |||
633 | sg_offset = 0; | ||
634 | dm_offset = len; | ||
635 | nbytes = len; | ||
636 | while (nbytes) { | ||
637 | ksb_len = min_t(unsigned int, nbytes, sizeof(buffer)); | ||
638 | dm_offset -= ksb_len; | ||
639 | |||
640 | for (i = 0; i < ksb_len; i++) | ||
641 | buffer[ksb_len - i - 1] = wa->address[dm_offset + i]; | ||
642 | scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1); | ||
643 | |||
644 | sg_offset += ksb_len; | ||
645 | nbytes -= ksb_len; | ||
646 | } | ||
647 | } | ||
648 | |||
649 | static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) | ||
650 | { | ||
651 | ccp_dm_free(&data->dm_wa); | ||
652 | ccp_sg_free(&data->sg_wa); | ||
653 | } | ||
654 | |||
655 | static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q, | ||
656 | struct scatterlist *sg, u64 sg_len, | ||
657 | unsigned int dm_len, | ||
658 | enum dma_data_direction dir) | ||
659 | { | ||
660 | int ret; | ||
661 | |||
662 | memset(data, 0, sizeof(*data)); | ||
663 | |||
664 | ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len, | ||
665 | dir); | ||
666 | if (ret) | ||
667 | goto e_err; | ||
668 | |||
669 | ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir); | ||
670 | if (ret) | ||
671 | goto e_err; | ||
672 | |||
673 | return 0; | ||
674 | |||
675 | e_err: | ||
676 | ccp_free_data(data, cmd_q); | ||
677 | |||
678 | return ret; | ||
679 | } | ||
680 | |||
681 | static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from) | ||
682 | { | ||
683 | struct ccp_sg_workarea *sg_wa = &data->sg_wa; | ||
684 | struct ccp_dm_workarea *dm_wa = &data->dm_wa; | ||
685 | unsigned int buf_count, nbytes; | ||
686 | |||
687 | /* Clear the buffer if setting it */ | ||
688 | if (!from) | ||
689 | memset(dm_wa->address, 0, dm_wa->length); | ||
690 | |||
691 | if (!sg_wa->sg) | ||
692 | return 0; | ||
693 | |||
694 | /* Perform the copy operation | ||
695 | * nbytes will always be <= UINT_MAX because dm_wa->length is | ||
696 | * an unsigned int | ||
697 | */ | ||
698 | nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length); | ||
699 | scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used, | ||
700 | nbytes, from); | ||
701 | |||
702 | /* Update the structures and generate the count */ | ||
703 | buf_count = 0; | ||
704 | while (sg_wa->bytes_left && (buf_count < dm_wa->length)) { | ||
705 | nbytes = min(sg_wa->sg->length - sg_wa->sg_used, | ||
706 | dm_wa->length - buf_count); | ||
707 | nbytes = min_t(u64, sg_wa->bytes_left, nbytes); | ||
708 | |||
709 | buf_count += nbytes; | ||
710 | ccp_update_sg_workarea(sg_wa, nbytes); | ||
711 | } | ||
712 | |||
713 | return buf_count; | ||
714 | } | ||
715 | |||
716 | static unsigned int ccp_fill_queue_buf(struct ccp_data *data) | ||
717 | { | ||
718 | return ccp_queue_buf(data, 0); | ||
719 | } | ||
720 | |||
721 | static unsigned int ccp_empty_queue_buf(struct ccp_data *data) | ||
722 | { | ||
723 | return ccp_queue_buf(data, 1); | ||
724 | } | ||
725 | |||
726 | static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, | ||
727 | struct ccp_op *op, unsigned int block_size, | ||
728 | bool blocksize_op) | ||
729 | { | ||
730 | unsigned int sg_src_len, sg_dst_len, op_len; | ||
731 | |||
732 | /* The CCP can only DMA from/to one address each per operation. This | ||
733 | * requires that we find the smallest DMA area between the source | ||
734 | * and destination. The resulting len values will always be <= UINT_MAX | ||
735 | * because the dma length is an unsigned int. | ||
736 | */ | ||
737 | sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used; | ||
738 | sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len); | ||
739 | |||
740 | if (dst) { | ||
741 | sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used; | ||
742 | sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); | ||
743 | op_len = min(sg_src_len, sg_dst_len); | ||
744 | } else | ||
745 | op_len = sg_src_len; | ||
746 | |||
747 | /* The data operation length will be at least block_size in length | ||
748 | * or the smaller of available sg room remaining for the source or | ||
749 | * the destination | ||
750 | */ | ||
751 | op_len = max(op_len, block_size); | ||
752 | |||
753 | /* Unless we have to buffer data, there's no reason to wait */ | ||
754 | op->soc = 0; | ||
755 | |||
756 | if (sg_src_len < block_size) { | ||
757 | /* Not enough data in the sg element, so it | ||
758 | * needs to be buffered into a blocksize chunk | ||
759 | */ | ||
760 | int cp_len = ccp_fill_queue_buf(src); | ||
761 | |||
762 | op->soc = 1; | ||
763 | op->src.u.dma.address = src->dm_wa.dma.address; | ||
764 | op->src.u.dma.offset = 0; | ||
765 | op->src.u.dma.length = (blocksize_op) ? block_size : cp_len; | ||
766 | } else { | ||
767 | /* Enough data in the sg element, but we need to | ||
768 | * adjust for any previously copied data | ||
769 | */ | ||
770 | op->src.u.dma.address = sg_dma_address(src->sg_wa.sg); | ||
771 | op->src.u.dma.offset = src->sg_wa.sg_used; | ||
772 | op->src.u.dma.length = op_len & ~(block_size - 1); | ||
773 | |||
774 | ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length); | ||
775 | } | ||
776 | |||
777 | if (dst) { | ||
778 | if (sg_dst_len < block_size) { | ||
779 | /* Not enough room in the sg element or we're on the | ||
780 | * last piece of data (when using padding), so the | ||
781 | * output needs to be buffered into a blocksize chunk | ||
782 | */ | ||
783 | op->soc = 1; | ||
784 | op->dst.u.dma.address = dst->dm_wa.dma.address; | ||
785 | op->dst.u.dma.offset = 0; | ||
786 | op->dst.u.dma.length = op->src.u.dma.length; | ||
787 | } else { | ||
788 | /* Enough room in the sg element, but we need to | ||
789 | * adjust for any previously used area | ||
790 | */ | ||
791 | op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg); | ||
792 | op->dst.u.dma.offset = dst->sg_wa.sg_used; | ||
793 | op->dst.u.dma.length = op->src.u.dma.length; | ||
794 | } | ||
795 | } | ||
796 | } | ||
797 | |||
798 | static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, | ||
799 | struct ccp_op *op) | ||
800 | { | ||
801 | op->init = 0; | ||
802 | |||
803 | if (dst) { | ||
804 | if (op->dst.u.dma.address == dst->dm_wa.dma.address) | ||
805 | ccp_empty_queue_buf(dst); | ||
806 | else | ||
807 | ccp_update_sg_workarea(&dst->sg_wa, | ||
808 | op->dst.u.dma.length); | ||
809 | } | ||
810 | } | ||
811 | |||
812 | static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q, | ||
813 | struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, | ||
814 | u32 byte_swap, bool from) | ||
815 | { | ||
816 | struct ccp_op op; | ||
817 | |||
818 | memset(&op, 0, sizeof(op)); | ||
819 | |||
820 | op.cmd_q = cmd_q; | ||
821 | op.jobid = jobid; | ||
822 | op.eom = 1; | ||
823 | |||
824 | if (from) { | ||
825 | op.soc = 1; | ||
826 | op.src.type = CCP_MEMTYPE_KSB; | ||
827 | op.src.u.ksb = ksb; | ||
828 | op.dst.type = CCP_MEMTYPE_SYSTEM; | ||
829 | op.dst.u.dma.address = wa->dma.address; | ||
830 | op.dst.u.dma.length = wa->length; | ||
831 | } else { | ||
832 | op.src.type = CCP_MEMTYPE_SYSTEM; | ||
833 | op.src.u.dma.address = wa->dma.address; | ||
834 | op.src.u.dma.length = wa->length; | ||
835 | op.dst.type = CCP_MEMTYPE_KSB; | ||
836 | op.dst.u.ksb = ksb; | ||
837 | } | ||
838 | |||
839 | op.u.passthru.byte_swap = byte_swap; | ||
840 | |||
841 | return ccp_perform_passthru(&op); | ||
842 | } | ||
843 | |||
844 | static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q, | ||
845 | struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, | ||
846 | u32 byte_swap) | ||
847 | { | ||
848 | return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false); | ||
849 | } | ||
850 | |||
851 | static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q, | ||
852 | struct ccp_dm_workarea *wa, u32 jobid, u32 ksb, | ||
853 | u32 byte_swap) | ||
854 | { | ||
855 | return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true); | ||
856 | } | ||
857 | |||
858 | static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, | ||
859 | struct ccp_cmd *cmd) | ||
860 | { | ||
861 | struct ccp_aes_engine *aes = &cmd->u.aes; | ||
862 | struct ccp_dm_workarea key, ctx; | ||
863 | struct ccp_data src; | ||
864 | struct ccp_op op; | ||
865 | unsigned int dm_offset; | ||
866 | int ret; | ||
867 | |||
868 | if (!((aes->key_len == AES_KEYSIZE_128) || | ||
869 | (aes->key_len == AES_KEYSIZE_192) || | ||
870 | (aes->key_len == AES_KEYSIZE_256))) | ||
871 | return -EINVAL; | ||
872 | |||
873 | if (aes->src_len & (AES_BLOCK_SIZE - 1)) | ||
874 | return -EINVAL; | ||
875 | |||
876 | if (aes->iv_len != AES_BLOCK_SIZE) | ||
877 | return -EINVAL; | ||
878 | |||
879 | if (!aes->key || !aes->iv || !aes->src) | ||
880 | return -EINVAL; | ||
881 | |||
882 | if (aes->cmac_final) { | ||
883 | if (aes->cmac_key_len != AES_BLOCK_SIZE) | ||
884 | return -EINVAL; | ||
885 | |||
886 | if (!aes->cmac_key) | ||
887 | return -EINVAL; | ||
888 | } | ||
889 | |||
890 | BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); | ||
891 | BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); | ||
892 | |||
893 | ret = -EIO; | ||
894 | memset(&op, 0, sizeof(op)); | ||
895 | op.cmd_q = cmd_q; | ||
896 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
897 | op.ksb_key = cmd_q->ksb_key; | ||
898 | op.ksb_ctx = cmd_q->ksb_ctx; | ||
899 | op.init = 1; | ||
900 | op.u.aes.type = aes->type; | ||
901 | op.u.aes.mode = aes->mode; | ||
902 | op.u.aes.action = aes->action; | ||
903 | |||
904 | /* All supported key sizes fit in a single (32-byte) KSB entry | ||
905 | * and must be in little endian format. Use the 256-bit byte | ||
906 | * swap passthru option to convert from big endian to little | ||
907 | * endian. | ||
908 | */ | ||
909 | ret = ccp_init_dm_workarea(&key, cmd_q, | ||
910 | CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, | ||
911 | DMA_TO_DEVICE); | ||
912 | if (ret) | ||
913 | return ret; | ||
914 | |||
915 | dm_offset = CCP_KSB_BYTES - aes->key_len; | ||
916 | ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); | ||
917 | ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, | ||
918 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
919 | if (ret) { | ||
920 | cmd->engine_error = cmd_q->cmd_error; | ||
921 | goto e_key; | ||
922 | } | ||
923 | |||
924 | /* The AES context fits in a single (32-byte) KSB entry and | ||
925 | * must be in little endian format. Use the 256-bit byte swap | ||
926 | * passthru option to convert from big endian to little endian. | ||
927 | */ | ||
928 | ret = ccp_init_dm_workarea(&ctx, cmd_q, | ||
929 | CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, | ||
930 | DMA_BIDIRECTIONAL); | ||
931 | if (ret) | ||
932 | goto e_key; | ||
933 | |||
934 | dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; | ||
935 | ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); | ||
936 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
937 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
938 | if (ret) { | ||
939 | cmd->engine_error = cmd_q->cmd_error; | ||
940 | goto e_ctx; | ||
941 | } | ||
942 | |||
943 | /* Send data to the CCP AES engine */ | ||
944 | ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, | ||
945 | AES_BLOCK_SIZE, DMA_TO_DEVICE); | ||
946 | if (ret) | ||
947 | goto e_ctx; | ||
948 | |||
949 | while (src.sg_wa.bytes_left) { | ||
950 | ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true); | ||
951 | if (aes->cmac_final && !src.sg_wa.bytes_left) { | ||
952 | op.eom = 1; | ||
953 | |||
954 | /* Push the K1/K2 key to the CCP now */ | ||
955 | ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, | ||
956 | op.ksb_ctx, | ||
957 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
958 | if (ret) { | ||
959 | cmd->engine_error = cmd_q->cmd_error; | ||
960 | goto e_src; | ||
961 | } | ||
962 | |||
963 | ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, | ||
964 | aes->cmac_key_len); | ||
965 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
966 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
967 | if (ret) { | ||
968 | cmd->engine_error = cmd_q->cmd_error; | ||
969 | goto e_src; | ||
970 | } | ||
971 | } | ||
972 | |||
973 | ret = ccp_perform_aes(&op); | ||
974 | if (ret) { | ||
975 | cmd->engine_error = cmd_q->cmd_error; | ||
976 | goto e_src; | ||
977 | } | ||
978 | |||
979 | ccp_process_data(&src, NULL, &op); | ||
980 | } | ||
981 | |||
982 | /* Retrieve the AES context - convert from LE to BE using | ||
983 | * 32-byte (256-bit) byteswapping | ||
984 | */ | ||
985 | ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
986 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
987 | if (ret) { | ||
988 | cmd->engine_error = cmd_q->cmd_error; | ||
989 | goto e_src; | ||
990 | } | ||
991 | |||
992 | /* ...but we only need AES_BLOCK_SIZE bytes */ | ||
993 | dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; | ||
994 | ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); | ||
995 | |||
996 | e_src: | ||
997 | ccp_free_data(&src, cmd_q); | ||
998 | |||
999 | e_ctx: | ||
1000 | ccp_dm_free(&ctx); | ||
1001 | |||
1002 | e_key: | ||
1003 | ccp_dm_free(&key); | ||
1004 | |||
1005 | return ret; | ||
1006 | } | ||
1007 | |||
1008 | static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1009 | { | ||
1010 | struct ccp_aes_engine *aes = &cmd->u.aes; | ||
1011 | struct ccp_dm_workarea key, ctx; | ||
1012 | struct ccp_data src, dst; | ||
1013 | struct ccp_op op; | ||
1014 | unsigned int dm_offset; | ||
1015 | bool in_place = false; | ||
1016 | int ret; | ||
1017 | |||
1018 | if (aes->mode == CCP_AES_MODE_CMAC) | ||
1019 | return ccp_run_aes_cmac_cmd(cmd_q, cmd); | ||
1020 | |||
1021 | if (!((aes->key_len == AES_KEYSIZE_128) || | ||
1022 | (aes->key_len == AES_KEYSIZE_192) || | ||
1023 | (aes->key_len == AES_KEYSIZE_256))) | ||
1024 | return -EINVAL; | ||
1025 | |||
1026 | if (((aes->mode == CCP_AES_MODE_ECB) || | ||
1027 | (aes->mode == CCP_AES_MODE_CBC) || | ||
1028 | (aes->mode == CCP_AES_MODE_CFB)) && | ||
1029 | (aes->src_len & (AES_BLOCK_SIZE - 1))) | ||
1030 | return -EINVAL; | ||
1031 | |||
1032 | if (!aes->key || !aes->src || !aes->dst) | ||
1033 | return -EINVAL; | ||
1034 | |||
1035 | if (aes->mode != CCP_AES_MODE_ECB) { | ||
1036 | if (aes->iv_len != AES_BLOCK_SIZE) | ||
1037 | return -EINVAL; | ||
1038 | |||
1039 | if (!aes->iv) | ||
1040 | return -EINVAL; | ||
1041 | } | ||
1042 | |||
1043 | BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1); | ||
1044 | BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1); | ||
1045 | |||
1046 | ret = -EIO; | ||
1047 | memset(&op, 0, sizeof(op)); | ||
1048 | op.cmd_q = cmd_q; | ||
1049 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1050 | op.ksb_key = cmd_q->ksb_key; | ||
1051 | op.ksb_ctx = cmd_q->ksb_ctx; | ||
1052 | op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; | ||
1053 | op.u.aes.type = aes->type; | ||
1054 | op.u.aes.mode = aes->mode; | ||
1055 | op.u.aes.action = aes->action; | ||
1056 | |||
1057 | /* All supported key sizes fit in a single (32-byte) KSB entry | ||
1058 | * and must be in little endian format. Use the 256-bit byte | ||
1059 | * swap passthru option to convert from big endian to little | ||
1060 | * endian. | ||
1061 | */ | ||
1062 | ret = ccp_init_dm_workarea(&key, cmd_q, | ||
1063 | CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, | ||
1064 | DMA_TO_DEVICE); | ||
1065 | if (ret) | ||
1066 | return ret; | ||
1067 | |||
1068 | dm_offset = CCP_KSB_BYTES - aes->key_len; | ||
1069 | ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); | ||
1070 | ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, | ||
1071 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1072 | if (ret) { | ||
1073 | cmd->engine_error = cmd_q->cmd_error; | ||
1074 | goto e_key; | ||
1075 | } | ||
1076 | |||
1077 | /* The AES context fits in a single (32-byte) KSB entry and | ||
1078 | * must be in little endian format. Use the 256-bit byte swap | ||
1079 | * passthru option to convert from big endian to little endian. | ||
1080 | */ | ||
1081 | ret = ccp_init_dm_workarea(&ctx, cmd_q, | ||
1082 | CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, | ||
1083 | DMA_BIDIRECTIONAL); | ||
1084 | if (ret) | ||
1085 | goto e_key; | ||
1086 | |||
1087 | if (aes->mode != CCP_AES_MODE_ECB) { | ||
1088 | /* Load the AES context - conver to LE */ | ||
1089 | dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; | ||
1090 | ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); | ||
1091 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1092 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1093 | if (ret) { | ||
1094 | cmd->engine_error = cmd_q->cmd_error; | ||
1095 | goto e_ctx; | ||
1096 | } | ||
1097 | } | ||
1098 | |||
1099 | /* Prepare the input and output data workareas. For in-place | ||
1100 | * operations we need to set the dma direction to BIDIRECTIONAL | ||
1101 | * and copy the src workarea to the dst workarea. | ||
1102 | */ | ||
1103 | if (sg_virt(aes->src) == sg_virt(aes->dst)) | ||
1104 | in_place = true; | ||
1105 | |||
1106 | ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, | ||
1107 | AES_BLOCK_SIZE, | ||
1108 | in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); | ||
1109 | if (ret) | ||
1110 | goto e_ctx; | ||
1111 | |||
1112 | if (in_place) | ||
1113 | dst = src; | ||
1114 | else { | ||
1115 | ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, | ||
1116 | AES_BLOCK_SIZE, DMA_FROM_DEVICE); | ||
1117 | if (ret) | ||
1118 | goto e_src; | ||
1119 | } | ||
1120 | |||
1121 | /* Send data to the CCP AES engine */ | ||
1122 | while (src.sg_wa.bytes_left) { | ||
1123 | ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); | ||
1124 | if (!src.sg_wa.bytes_left) { | ||
1125 | op.eom = 1; | ||
1126 | |||
1127 | /* Since we don't retrieve the AES context in ECB | ||
1128 | * mode we have to wait for the operation to complete | ||
1129 | * on the last piece of data | ||
1130 | */ | ||
1131 | if (aes->mode == CCP_AES_MODE_ECB) | ||
1132 | op.soc = 1; | ||
1133 | } | ||
1134 | |||
1135 | ret = ccp_perform_aes(&op); | ||
1136 | if (ret) { | ||
1137 | cmd->engine_error = cmd_q->cmd_error; | ||
1138 | goto e_dst; | ||
1139 | } | ||
1140 | |||
1141 | ccp_process_data(&src, &dst, &op); | ||
1142 | } | ||
1143 | |||
1144 | if (aes->mode != CCP_AES_MODE_ECB) { | ||
1145 | /* Retrieve the AES context - convert from LE to BE using | ||
1146 | * 32-byte (256-bit) byteswapping | ||
1147 | */ | ||
1148 | ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1149 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1150 | if (ret) { | ||
1151 | cmd->engine_error = cmd_q->cmd_error; | ||
1152 | goto e_dst; | ||
1153 | } | ||
1154 | |||
1155 | /* ...but we only need AES_BLOCK_SIZE bytes */ | ||
1156 | dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; | ||
1157 | ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); | ||
1158 | } | ||
1159 | |||
1160 | e_dst: | ||
1161 | if (!in_place) | ||
1162 | ccp_free_data(&dst, cmd_q); | ||
1163 | |||
1164 | e_src: | ||
1165 | ccp_free_data(&src, cmd_q); | ||
1166 | |||
1167 | e_ctx: | ||
1168 | ccp_dm_free(&ctx); | ||
1169 | |||
1170 | e_key: | ||
1171 | ccp_dm_free(&key); | ||
1172 | |||
1173 | return ret; | ||
1174 | } | ||
1175 | |||
1176 | static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, | ||
1177 | struct ccp_cmd *cmd) | ||
1178 | { | ||
1179 | struct ccp_xts_aes_engine *xts = &cmd->u.xts; | ||
1180 | struct ccp_dm_workarea key, ctx; | ||
1181 | struct ccp_data src, dst; | ||
1182 | struct ccp_op op; | ||
1183 | unsigned int unit_size, dm_offset; | ||
1184 | bool in_place = false; | ||
1185 | int ret; | ||
1186 | |||
1187 | switch (xts->unit_size) { | ||
1188 | case CCP_XTS_AES_UNIT_SIZE_16: | ||
1189 | unit_size = 16; | ||
1190 | break; | ||
1191 | case CCP_XTS_AES_UNIT_SIZE_512: | ||
1192 | unit_size = 512; | ||
1193 | break; | ||
1194 | case CCP_XTS_AES_UNIT_SIZE_1024: | ||
1195 | unit_size = 1024; | ||
1196 | break; | ||
1197 | case CCP_XTS_AES_UNIT_SIZE_2048: | ||
1198 | unit_size = 2048; | ||
1199 | break; | ||
1200 | case CCP_XTS_AES_UNIT_SIZE_4096: | ||
1201 | unit_size = 4096; | ||
1202 | break; | ||
1203 | |||
1204 | default: | ||
1205 | return -EINVAL; | ||
1206 | } | ||
1207 | |||
1208 | if (xts->key_len != AES_KEYSIZE_128) | ||
1209 | return -EINVAL; | ||
1210 | |||
1211 | if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) | ||
1212 | return -EINVAL; | ||
1213 | |||
1214 | if (xts->iv_len != AES_BLOCK_SIZE) | ||
1215 | return -EINVAL; | ||
1216 | |||
1217 | if (!xts->key || !xts->iv || !xts->src || !xts->dst) | ||
1218 | return -EINVAL; | ||
1219 | |||
1220 | BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1); | ||
1221 | BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1); | ||
1222 | |||
1223 | ret = -EIO; | ||
1224 | memset(&op, 0, sizeof(op)); | ||
1225 | op.cmd_q = cmd_q; | ||
1226 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1227 | op.ksb_key = cmd_q->ksb_key; | ||
1228 | op.ksb_ctx = cmd_q->ksb_ctx; | ||
1229 | op.init = 1; | ||
1230 | op.u.xts.action = xts->action; | ||
1231 | op.u.xts.unit_size = xts->unit_size; | ||
1232 | |||
1233 | /* All supported key sizes fit in a single (32-byte) KSB entry | ||
1234 | * and must be in little endian format. Use the 256-bit byte | ||
1235 | * swap passthru option to convert from big endian to little | ||
1236 | * endian. | ||
1237 | */ | ||
1238 | ret = ccp_init_dm_workarea(&key, cmd_q, | ||
1239 | CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES, | ||
1240 | DMA_TO_DEVICE); | ||
1241 | if (ret) | ||
1242 | return ret; | ||
1243 | |||
1244 | dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128; | ||
1245 | ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); | ||
1246 | ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len); | ||
1247 | ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key, | ||
1248 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1249 | if (ret) { | ||
1250 | cmd->engine_error = cmd_q->cmd_error; | ||
1251 | goto e_key; | ||
1252 | } | ||
1253 | |||
1254 | /* The AES context fits in a single (32-byte) KSB entry and | ||
1255 | * for XTS is already in little endian format so no byte swapping | ||
1256 | * is needed. | ||
1257 | */ | ||
1258 | ret = ccp_init_dm_workarea(&ctx, cmd_q, | ||
1259 | CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES, | ||
1260 | DMA_BIDIRECTIONAL); | ||
1261 | if (ret) | ||
1262 | goto e_key; | ||
1263 | |||
1264 | ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); | ||
1265 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1266 | CCP_PASSTHRU_BYTESWAP_NOOP); | ||
1267 | if (ret) { | ||
1268 | cmd->engine_error = cmd_q->cmd_error; | ||
1269 | goto e_ctx; | ||
1270 | } | ||
1271 | |||
1272 | /* Prepare the input and output data workareas. For in-place | ||
1273 | * operations we need to set the dma direction to BIDIRECTIONAL | ||
1274 | * and copy the src workarea to the dst workarea. | ||
1275 | */ | ||
1276 | if (sg_virt(xts->src) == sg_virt(xts->dst)) | ||
1277 | in_place = true; | ||
1278 | |||
1279 | ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len, | ||
1280 | unit_size, | ||
1281 | in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); | ||
1282 | if (ret) | ||
1283 | goto e_ctx; | ||
1284 | |||
1285 | if (in_place) | ||
1286 | dst = src; | ||
1287 | else { | ||
1288 | ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, | ||
1289 | unit_size, DMA_FROM_DEVICE); | ||
1290 | if (ret) | ||
1291 | goto e_src; | ||
1292 | } | ||
1293 | |||
1294 | /* Send data to the CCP AES engine */ | ||
1295 | while (src.sg_wa.bytes_left) { | ||
1296 | ccp_prepare_data(&src, &dst, &op, unit_size, true); | ||
1297 | if (!src.sg_wa.bytes_left) | ||
1298 | op.eom = 1; | ||
1299 | |||
1300 | ret = ccp_perform_xts_aes(&op); | ||
1301 | if (ret) { | ||
1302 | cmd->engine_error = cmd_q->cmd_error; | ||
1303 | goto e_dst; | ||
1304 | } | ||
1305 | |||
1306 | ccp_process_data(&src, &dst, &op); | ||
1307 | } | ||
1308 | |||
1309 | /* Retrieve the AES context - convert from LE to BE using | ||
1310 | * 32-byte (256-bit) byteswapping | ||
1311 | */ | ||
1312 | ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1313 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1314 | if (ret) { | ||
1315 | cmd->engine_error = cmd_q->cmd_error; | ||
1316 | goto e_dst; | ||
1317 | } | ||
1318 | |||
1319 | /* ...but we only need AES_BLOCK_SIZE bytes */ | ||
1320 | dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE; | ||
1321 | ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); | ||
1322 | |||
1323 | e_dst: | ||
1324 | if (!in_place) | ||
1325 | ccp_free_data(&dst, cmd_q); | ||
1326 | |||
1327 | e_src: | ||
1328 | ccp_free_data(&src, cmd_q); | ||
1329 | |||
1330 | e_ctx: | ||
1331 | ccp_dm_free(&ctx); | ||
1332 | |||
1333 | e_key: | ||
1334 | ccp_dm_free(&key); | ||
1335 | |||
1336 | return ret; | ||
1337 | } | ||
1338 | |||
1339 | static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1340 | { | ||
1341 | struct ccp_sha_engine *sha = &cmd->u.sha; | ||
1342 | struct ccp_dm_workarea ctx; | ||
1343 | struct ccp_data src; | ||
1344 | struct ccp_op op; | ||
1345 | int ret; | ||
1346 | |||
1347 | if (sha->ctx_len != CCP_SHA_CTXSIZE) | ||
1348 | return -EINVAL; | ||
1349 | |||
1350 | if (!sha->ctx) | ||
1351 | return -EINVAL; | ||
1352 | |||
1353 | if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1))) | ||
1354 | return -EINVAL; | ||
1355 | |||
1356 | if (!sha->src_len) { | ||
1357 | const u8 *sha_zero; | ||
1358 | |||
1359 | /* Not final, just return */ | ||
1360 | if (!sha->final) | ||
1361 | return 0; | ||
1362 | |||
1363 | /* CCP can't do a zero length sha operation so the caller | ||
1364 | * must buffer the data. | ||
1365 | */ | ||
1366 | if (sha->msg_bits) | ||
1367 | return -EINVAL; | ||
1368 | |||
1369 | /* A sha operation for a message with a total length of zero, | ||
1370 | * return known result. | ||
1371 | */ | ||
1372 | switch (sha->type) { | ||
1373 | case CCP_SHA_TYPE_1: | ||
1374 | sha_zero = ccp_sha1_zero; | ||
1375 | break; | ||
1376 | case CCP_SHA_TYPE_224: | ||
1377 | sha_zero = ccp_sha224_zero; | ||
1378 | break; | ||
1379 | case CCP_SHA_TYPE_256: | ||
1380 | sha_zero = ccp_sha256_zero; | ||
1381 | break; | ||
1382 | default: | ||
1383 | return -EINVAL; | ||
1384 | } | ||
1385 | |||
1386 | scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, | ||
1387 | sha->ctx_len, 1); | ||
1388 | |||
1389 | return 0; | ||
1390 | } | ||
1391 | |||
1392 | if (!sha->src) | ||
1393 | return -EINVAL; | ||
1394 | |||
1395 | BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1); | ||
1396 | |||
1397 | memset(&op, 0, sizeof(op)); | ||
1398 | op.cmd_q = cmd_q; | ||
1399 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1400 | op.ksb_ctx = cmd_q->ksb_ctx; | ||
1401 | op.u.sha.type = sha->type; | ||
1402 | op.u.sha.msg_bits = sha->msg_bits; | ||
1403 | |||
1404 | /* The SHA context fits in a single (32-byte) KSB entry and | ||
1405 | * must be in little endian format. Use the 256-bit byte swap | ||
1406 | * passthru option to convert from big endian to little endian. | ||
1407 | */ | ||
1408 | ret = ccp_init_dm_workarea(&ctx, cmd_q, | ||
1409 | CCP_SHA_KSB_COUNT * CCP_KSB_BYTES, | ||
1410 | DMA_BIDIRECTIONAL); | ||
1411 | if (ret) | ||
1412 | return ret; | ||
1413 | |||
1414 | ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); | ||
1415 | ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1416 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1417 | if (ret) { | ||
1418 | cmd->engine_error = cmd_q->cmd_error; | ||
1419 | goto e_ctx; | ||
1420 | } | ||
1421 | |||
1422 | /* Send data to the CCP SHA engine */ | ||
1423 | ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, | ||
1424 | CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE); | ||
1425 | if (ret) | ||
1426 | goto e_ctx; | ||
1427 | |||
1428 | while (src.sg_wa.bytes_left) { | ||
1429 | ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false); | ||
1430 | if (sha->final && !src.sg_wa.bytes_left) | ||
1431 | op.eom = 1; | ||
1432 | |||
1433 | ret = ccp_perform_sha(&op); | ||
1434 | if (ret) { | ||
1435 | cmd->engine_error = cmd_q->cmd_error; | ||
1436 | goto e_data; | ||
1437 | } | ||
1438 | |||
1439 | ccp_process_data(&src, NULL, &op); | ||
1440 | } | ||
1441 | |||
1442 | /* Retrieve the SHA context - convert from LE to BE using | ||
1443 | * 32-byte (256-bit) byteswapping to BE | ||
1444 | */ | ||
1445 | ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, | ||
1446 | CCP_PASSTHRU_BYTESWAP_256BIT); | ||
1447 | if (ret) { | ||
1448 | cmd->engine_error = cmd_q->cmd_error; | ||
1449 | goto e_data; | ||
1450 | } | ||
1451 | |||
1452 | ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); | ||
1453 | |||
1454 | e_data: | ||
1455 | ccp_free_data(&src, cmd_q); | ||
1456 | |||
1457 | e_ctx: | ||
1458 | ccp_dm_free(&ctx); | ||
1459 | |||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1464 | { | ||
1465 | struct ccp_rsa_engine *rsa = &cmd->u.rsa; | ||
1466 | struct ccp_dm_workarea exp, src; | ||
1467 | struct ccp_data dst; | ||
1468 | struct ccp_op op; | ||
1469 | unsigned int ksb_count, i_len, o_len; | ||
1470 | int ret; | ||
1471 | |||
1472 | if (rsa->key_size > CCP_RSA_MAX_WIDTH) | ||
1473 | return -EINVAL; | ||
1474 | |||
1475 | if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) | ||
1476 | return -EINVAL; | ||
1477 | |||
1478 | /* The RSA modulus must precede the message being acted upon, so | ||
1479 | * it must be copied to a DMA area where the message and the | ||
1480 | * modulus can be concatenated. Therefore the input buffer | ||
1481 | * length required is twice the output buffer length (which | ||
1482 | * must be a multiple of 256-bits). | ||
1483 | */ | ||
1484 | o_len = ((rsa->key_size + 255) / 256) * 32; | ||
1485 | i_len = o_len * 2; | ||
1486 | |||
1487 | ksb_count = o_len / CCP_KSB_BYTES; | ||
1488 | |||
1489 | memset(&op, 0, sizeof(op)); | ||
1490 | op.cmd_q = cmd_q; | ||
1491 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1492 | op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count); | ||
1493 | if (!op.ksb_key) | ||
1494 | return -EIO; | ||
1495 | |||
1496 | /* The RSA exponent may span multiple (32-byte) KSB entries and must | ||
1497 | * be in little endian format. Reverse copy each 32-byte chunk | ||
1498 | * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk) | ||
1499 | * and each byte within that chunk and do not perform any byte swap | ||
1500 | * operations on the passthru operation. | ||
1501 | */ | ||
1502 | ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); | ||
1503 | if (ret) | ||
1504 | goto e_ksb; | ||
1505 | |||
1506 | ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES, | ||
1507 | true); | ||
1508 | ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key, | ||
1509 | CCP_PASSTHRU_BYTESWAP_NOOP); | ||
1510 | if (ret) { | ||
1511 | cmd->engine_error = cmd_q->cmd_error; | ||
1512 | goto e_exp; | ||
1513 | } | ||
1514 | |||
1515 | /* Concatenate the modulus and the message. Both the modulus and | ||
1516 | * the operands must be in little endian format. Since the input | ||
1517 | * is in big endian format it must be converted. | ||
1518 | */ | ||
1519 | ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE); | ||
1520 | if (ret) | ||
1521 | goto e_exp; | ||
1522 | |||
1523 | ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES, | ||
1524 | true); | ||
1525 | src.address += o_len; /* Adjust the address for the copy operation */ | ||
1526 | ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES, | ||
1527 | true); | ||
1528 | src.address -= o_len; /* Reset the address to original value */ | ||
1529 | |||
1530 | /* Prepare the output area for the operation */ | ||
1531 | ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len, | ||
1532 | o_len, DMA_FROM_DEVICE); | ||
1533 | if (ret) | ||
1534 | goto e_src; | ||
1535 | |||
1536 | op.soc = 1; | ||
1537 | op.src.u.dma.address = src.dma.address; | ||
1538 | op.src.u.dma.offset = 0; | ||
1539 | op.src.u.dma.length = i_len; | ||
1540 | op.dst.u.dma.address = dst.dm_wa.dma.address; | ||
1541 | op.dst.u.dma.offset = 0; | ||
1542 | op.dst.u.dma.length = o_len; | ||
1543 | |||
1544 | op.u.rsa.mod_size = rsa->key_size; | ||
1545 | op.u.rsa.input_len = i_len; | ||
1546 | |||
1547 | ret = ccp_perform_rsa(&op); | ||
1548 | if (ret) { | ||
1549 | cmd->engine_error = cmd_q->cmd_error; | ||
1550 | goto e_dst; | ||
1551 | } | ||
1552 | |||
1553 | ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len); | ||
1554 | |||
1555 | e_dst: | ||
1556 | ccp_free_data(&dst, cmd_q); | ||
1557 | |||
1558 | e_src: | ||
1559 | ccp_dm_free(&src); | ||
1560 | |||
1561 | e_exp: | ||
1562 | ccp_dm_free(&exp); | ||
1563 | |||
1564 | e_ksb: | ||
1565 | ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count); | ||
1566 | |||
1567 | return ret; | ||
1568 | } | ||
1569 | |||
1570 | static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, | ||
1571 | struct ccp_cmd *cmd) | ||
1572 | { | ||
1573 | struct ccp_passthru_engine *pt = &cmd->u.passthru; | ||
1574 | struct ccp_dm_workarea mask; | ||
1575 | struct ccp_data src, dst; | ||
1576 | struct ccp_op op; | ||
1577 | bool in_place = false; | ||
1578 | unsigned int i; | ||
1579 | int ret; | ||
1580 | |||
1581 | if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) | ||
1582 | return -EINVAL; | ||
1583 | |||
1584 | if (!pt->src || !pt->dst) | ||
1585 | return -EINVAL; | ||
1586 | |||
1587 | if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { | ||
1588 | if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) | ||
1589 | return -EINVAL; | ||
1590 | if (!pt->mask) | ||
1591 | return -EINVAL; | ||
1592 | } | ||
1593 | |||
1594 | BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1); | ||
1595 | |||
1596 | memset(&op, 0, sizeof(op)); | ||
1597 | op.cmd_q = cmd_q; | ||
1598 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1599 | |||
1600 | if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { | ||
1601 | /* Load the mask */ | ||
1602 | op.ksb_key = cmd_q->ksb_key; | ||
1603 | |||
1604 | ret = ccp_init_dm_workarea(&mask, cmd_q, | ||
1605 | CCP_PASSTHRU_KSB_COUNT * | ||
1606 | CCP_KSB_BYTES, | ||
1607 | DMA_TO_DEVICE); | ||
1608 | if (ret) | ||
1609 | return ret; | ||
1610 | |||
1611 | ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); | ||
1612 | ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key, | ||
1613 | CCP_PASSTHRU_BYTESWAP_NOOP); | ||
1614 | if (ret) { | ||
1615 | cmd->engine_error = cmd_q->cmd_error; | ||
1616 | goto e_mask; | ||
1617 | } | ||
1618 | } | ||
1619 | |||
1620 | /* Prepare the input and output data workareas. For in-place | ||
1621 | * operations we need to set the dma direction to BIDIRECTIONAL | ||
1622 | * and copy the src workarea to the dst workarea. | ||
1623 | */ | ||
1624 | if (sg_virt(pt->src) == sg_virt(pt->dst)) | ||
1625 | in_place = true; | ||
1626 | |||
1627 | ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len, | ||
1628 | CCP_PASSTHRU_MASKSIZE, | ||
1629 | in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); | ||
1630 | if (ret) | ||
1631 | goto e_mask; | ||
1632 | |||
1633 | if (in_place) | ||
1634 | dst = src; | ||
1635 | else { | ||
1636 | ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, | ||
1637 | CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); | ||
1638 | if (ret) | ||
1639 | goto e_src; | ||
1640 | } | ||
1641 | |||
1642 | /* Send data to the CCP Passthru engine | ||
1643 | * Because the CCP engine works on a single source and destination | ||
1644 | * dma address at a time, each entry in the source scatterlist | ||
1645 | * (after the dma_map_sg call) must be less than or equal to the | ||
1646 | * (remaining) length in the destination scatterlist entry and the | ||
1647 | * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE | ||
1648 | */ | ||
1649 | dst.sg_wa.sg_used = 0; | ||
1650 | for (i = 1; i <= src.sg_wa.dma_count; i++) { | ||
1651 | if (!dst.sg_wa.sg || | ||
1652 | (dst.sg_wa.sg->length < src.sg_wa.sg->length)) { | ||
1653 | ret = -EINVAL; | ||
1654 | goto e_dst; | ||
1655 | } | ||
1656 | |||
1657 | if (i == src.sg_wa.dma_count) { | ||
1658 | op.eom = 1; | ||
1659 | op.soc = 1; | ||
1660 | } | ||
1661 | |||
1662 | op.src.type = CCP_MEMTYPE_SYSTEM; | ||
1663 | op.src.u.dma.address = sg_dma_address(src.sg_wa.sg); | ||
1664 | op.src.u.dma.offset = 0; | ||
1665 | op.src.u.dma.length = sg_dma_len(src.sg_wa.sg); | ||
1666 | |||
1667 | op.dst.type = CCP_MEMTYPE_SYSTEM; | ||
1668 | op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); | ||
1669 | op.src.u.dma.offset = dst.sg_wa.sg_used; | ||
1670 | op.src.u.dma.length = op.src.u.dma.length; | ||
1671 | |||
1672 | ret = ccp_perform_passthru(&op); | ||
1673 | if (ret) { | ||
1674 | cmd->engine_error = cmd_q->cmd_error; | ||
1675 | goto e_dst; | ||
1676 | } | ||
1677 | |||
1678 | dst.sg_wa.sg_used += src.sg_wa.sg->length; | ||
1679 | if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) { | ||
1680 | dst.sg_wa.sg = sg_next(dst.sg_wa.sg); | ||
1681 | dst.sg_wa.sg_used = 0; | ||
1682 | } | ||
1683 | src.sg_wa.sg = sg_next(src.sg_wa.sg); | ||
1684 | } | ||
1685 | |||
1686 | e_dst: | ||
1687 | if (!in_place) | ||
1688 | ccp_free_data(&dst, cmd_q); | ||
1689 | |||
1690 | e_src: | ||
1691 | ccp_free_data(&src, cmd_q); | ||
1692 | |||
1693 | e_mask: | ||
1694 | if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) | ||
1695 | ccp_dm_free(&mask); | ||
1696 | |||
1697 | return ret; | ||
1698 | } | ||
1699 | |||
1700 | static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1701 | { | ||
1702 | struct ccp_ecc_engine *ecc = &cmd->u.ecc; | ||
1703 | struct ccp_dm_workarea src, dst; | ||
1704 | struct ccp_op op; | ||
1705 | int ret; | ||
1706 | u8 *save; | ||
1707 | |||
1708 | if (!ecc->u.mm.operand_1 || | ||
1709 | (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES)) | ||
1710 | return -EINVAL; | ||
1711 | |||
1712 | if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) | ||
1713 | if (!ecc->u.mm.operand_2 || | ||
1714 | (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES)) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (!ecc->u.mm.result || | ||
1718 | (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES)) | ||
1719 | return -EINVAL; | ||
1720 | |||
1721 | memset(&op, 0, sizeof(op)); | ||
1722 | op.cmd_q = cmd_q; | ||
1723 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1724 | |||
1725 | /* Concatenate the modulus and the operands. Both the modulus and | ||
1726 | * the operands must be in little endian format. Since the input | ||
1727 | * is in big endian format it must be converted and placed in a | ||
1728 | * fixed length buffer. | ||
1729 | */ | ||
1730 | ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, | ||
1731 | DMA_TO_DEVICE); | ||
1732 | if (ret) | ||
1733 | return ret; | ||
1734 | |||
1735 | /* Save the workarea address since it is updated in order to perform | ||
1736 | * the concatenation | ||
1737 | */ | ||
1738 | save = src.address; | ||
1739 | |||
1740 | /* Copy the ECC modulus */ | ||
1741 | ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, | ||
1742 | CCP_ECC_OPERAND_SIZE, true); | ||
1743 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1744 | |||
1745 | /* Copy the first operand */ | ||
1746 | ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1, | ||
1747 | ecc->u.mm.operand_1_len, | ||
1748 | CCP_ECC_OPERAND_SIZE, true); | ||
1749 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1750 | |||
1751 | if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { | ||
1752 | /* Copy the second operand */ | ||
1753 | ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2, | ||
1754 | ecc->u.mm.operand_2_len, | ||
1755 | CCP_ECC_OPERAND_SIZE, true); | ||
1756 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1757 | } | ||
1758 | |||
1759 | /* Restore the workarea address */ | ||
1760 | src.address = save; | ||
1761 | |||
1762 | /* Prepare the output area for the operation */ | ||
1763 | ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, | ||
1764 | DMA_FROM_DEVICE); | ||
1765 | if (ret) | ||
1766 | goto e_src; | ||
1767 | |||
1768 | op.soc = 1; | ||
1769 | op.src.u.dma.address = src.dma.address; | ||
1770 | op.src.u.dma.offset = 0; | ||
1771 | op.src.u.dma.length = src.length; | ||
1772 | op.dst.u.dma.address = dst.dma.address; | ||
1773 | op.dst.u.dma.offset = 0; | ||
1774 | op.dst.u.dma.length = dst.length; | ||
1775 | |||
1776 | op.u.ecc.function = cmd->u.ecc.function; | ||
1777 | |||
1778 | ret = ccp_perform_ecc(&op); | ||
1779 | if (ret) { | ||
1780 | cmd->engine_error = cmd_q->cmd_error; | ||
1781 | goto e_dst; | ||
1782 | } | ||
1783 | |||
1784 | ecc->ecc_result = le16_to_cpup( | ||
1785 | (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); | ||
1786 | if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { | ||
1787 | ret = -EIO; | ||
1788 | goto e_dst; | ||
1789 | } | ||
1790 | |||
1791 | /* Save the ECC result */ | ||
1792 | ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES); | ||
1793 | |||
1794 | e_dst: | ||
1795 | ccp_dm_free(&dst); | ||
1796 | |||
1797 | e_src: | ||
1798 | ccp_dm_free(&src); | ||
1799 | |||
1800 | return ret; | ||
1801 | } | ||
1802 | |||
1803 | static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1804 | { | ||
1805 | struct ccp_ecc_engine *ecc = &cmd->u.ecc; | ||
1806 | struct ccp_dm_workarea src, dst; | ||
1807 | struct ccp_op op; | ||
1808 | int ret; | ||
1809 | u8 *save; | ||
1810 | |||
1811 | if (!ecc->u.pm.point_1.x || | ||
1812 | (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) || | ||
1813 | !ecc->u.pm.point_1.y || | ||
1814 | (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES)) | ||
1815 | return -EINVAL; | ||
1816 | |||
1817 | if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { | ||
1818 | if (!ecc->u.pm.point_2.x || | ||
1819 | (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) || | ||
1820 | !ecc->u.pm.point_2.y || | ||
1821 | (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES)) | ||
1822 | return -EINVAL; | ||
1823 | } else { | ||
1824 | if (!ecc->u.pm.domain_a || | ||
1825 | (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES)) | ||
1826 | return -EINVAL; | ||
1827 | |||
1828 | if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) | ||
1829 | if (!ecc->u.pm.scalar || | ||
1830 | (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES)) | ||
1831 | return -EINVAL; | ||
1832 | } | ||
1833 | |||
1834 | if (!ecc->u.pm.result.x || | ||
1835 | (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) || | ||
1836 | !ecc->u.pm.result.y || | ||
1837 | (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES)) | ||
1838 | return -EINVAL; | ||
1839 | |||
1840 | memset(&op, 0, sizeof(op)); | ||
1841 | op.cmd_q = cmd_q; | ||
1842 | op.jobid = ccp_gen_jobid(cmd_q->ccp); | ||
1843 | |||
1844 | /* Concatenate the modulus and the operands. Both the modulus and | ||
1845 | * the operands must be in little endian format. Since the input | ||
1846 | * is in big endian format it must be converted and placed in a | ||
1847 | * fixed length buffer. | ||
1848 | */ | ||
1849 | ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, | ||
1850 | DMA_TO_DEVICE); | ||
1851 | if (ret) | ||
1852 | return ret; | ||
1853 | |||
1854 | /* Save the workarea address since it is updated in order to perform | ||
1855 | * the concatenation | ||
1856 | */ | ||
1857 | save = src.address; | ||
1858 | |||
1859 | /* Copy the ECC modulus */ | ||
1860 | ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len, | ||
1861 | CCP_ECC_OPERAND_SIZE, true); | ||
1862 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1863 | |||
1864 | /* Copy the first point X and Y coordinate */ | ||
1865 | ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x, | ||
1866 | ecc->u.pm.point_1.x_len, | ||
1867 | CCP_ECC_OPERAND_SIZE, true); | ||
1868 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1869 | ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y, | ||
1870 | ecc->u.pm.point_1.y_len, | ||
1871 | CCP_ECC_OPERAND_SIZE, true); | ||
1872 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1873 | |||
1874 | /* Set the first point Z coordianate to 1 */ | ||
1875 | *(src.address) = 0x01; | ||
1876 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1877 | |||
1878 | if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { | ||
1879 | /* Copy the second point X and Y coordinate */ | ||
1880 | ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x, | ||
1881 | ecc->u.pm.point_2.x_len, | ||
1882 | CCP_ECC_OPERAND_SIZE, true); | ||
1883 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1884 | ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y, | ||
1885 | ecc->u.pm.point_2.y_len, | ||
1886 | CCP_ECC_OPERAND_SIZE, true); | ||
1887 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1888 | |||
1889 | /* Set the second point Z coordianate to 1 */ | ||
1890 | *(src.address) = 0x01; | ||
1891 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1892 | } else { | ||
1893 | /* Copy the Domain "a" parameter */ | ||
1894 | ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a, | ||
1895 | ecc->u.pm.domain_a_len, | ||
1896 | CCP_ECC_OPERAND_SIZE, true); | ||
1897 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1898 | |||
1899 | if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { | ||
1900 | /* Copy the scalar value */ | ||
1901 | ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar, | ||
1902 | ecc->u.pm.scalar_len, | ||
1903 | CCP_ECC_OPERAND_SIZE, true); | ||
1904 | src.address += CCP_ECC_OPERAND_SIZE; | ||
1905 | } | ||
1906 | } | ||
1907 | |||
1908 | /* Restore the workarea address */ | ||
1909 | src.address = save; | ||
1910 | |||
1911 | /* Prepare the output area for the operation */ | ||
1912 | ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, | ||
1913 | DMA_FROM_DEVICE); | ||
1914 | if (ret) | ||
1915 | goto e_src; | ||
1916 | |||
1917 | op.soc = 1; | ||
1918 | op.src.u.dma.address = src.dma.address; | ||
1919 | op.src.u.dma.offset = 0; | ||
1920 | op.src.u.dma.length = src.length; | ||
1921 | op.dst.u.dma.address = dst.dma.address; | ||
1922 | op.dst.u.dma.offset = 0; | ||
1923 | op.dst.u.dma.length = dst.length; | ||
1924 | |||
1925 | op.u.ecc.function = cmd->u.ecc.function; | ||
1926 | |||
1927 | ret = ccp_perform_ecc(&op); | ||
1928 | if (ret) { | ||
1929 | cmd->engine_error = cmd_q->cmd_error; | ||
1930 | goto e_dst; | ||
1931 | } | ||
1932 | |||
1933 | ecc->ecc_result = le16_to_cpup( | ||
1934 | (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); | ||
1935 | if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { | ||
1936 | ret = -EIO; | ||
1937 | goto e_dst; | ||
1938 | } | ||
1939 | |||
1940 | /* Save the workarea address since it is updated as we walk through | ||
1941 | * to copy the point math result | ||
1942 | */ | ||
1943 | save = dst.address; | ||
1944 | |||
1945 | /* Save the ECC result X and Y coordinates */ | ||
1946 | ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x, | ||
1947 | CCP_ECC_MODULUS_BYTES); | ||
1948 | dst.address += CCP_ECC_OUTPUT_SIZE; | ||
1949 | ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y, | ||
1950 | CCP_ECC_MODULUS_BYTES); | ||
1951 | dst.address += CCP_ECC_OUTPUT_SIZE; | ||
1952 | |||
1953 | /* Restore the workarea address */ | ||
1954 | dst.address = save; | ||
1955 | |||
1956 | e_dst: | ||
1957 | ccp_dm_free(&dst); | ||
1958 | |||
1959 | e_src: | ||
1960 | ccp_dm_free(&src); | ||
1961 | |||
1962 | return ret; | ||
1963 | } | ||
1964 | |||
1965 | static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1966 | { | ||
1967 | struct ccp_ecc_engine *ecc = &cmd->u.ecc; | ||
1968 | |||
1969 | ecc->ecc_result = 0; | ||
1970 | |||
1971 | if (!ecc->mod || | ||
1972 | (ecc->mod_len > CCP_ECC_MODULUS_BYTES)) | ||
1973 | return -EINVAL; | ||
1974 | |||
1975 | switch (ecc->function) { | ||
1976 | case CCP_ECC_FUNCTION_MMUL_384BIT: | ||
1977 | case CCP_ECC_FUNCTION_MADD_384BIT: | ||
1978 | case CCP_ECC_FUNCTION_MINV_384BIT: | ||
1979 | return ccp_run_ecc_mm_cmd(cmd_q, cmd); | ||
1980 | |||
1981 | case CCP_ECC_FUNCTION_PADD_384BIT: | ||
1982 | case CCP_ECC_FUNCTION_PMUL_384BIT: | ||
1983 | case CCP_ECC_FUNCTION_PDBL_384BIT: | ||
1984 | return ccp_run_ecc_pm_cmd(cmd_q, cmd); | ||
1985 | |||
1986 | default: | ||
1987 | return -EINVAL; | ||
1988 | } | ||
1989 | } | ||
1990 | |||
1991 | int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) | ||
1992 | { | ||
1993 | int ret; | ||
1994 | |||
1995 | cmd->engine_error = 0; | ||
1996 | cmd_q->cmd_error = 0; | ||
1997 | cmd_q->int_rcvd = 0; | ||
1998 | cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); | ||
1999 | |||
2000 | switch (cmd->engine) { | ||
2001 | case CCP_ENGINE_AES: | ||
2002 | ret = ccp_run_aes_cmd(cmd_q, cmd); | ||
2003 | break; | ||
2004 | case CCP_ENGINE_XTS_AES_128: | ||
2005 | ret = ccp_run_xts_aes_cmd(cmd_q, cmd); | ||
2006 | break; | ||
2007 | case CCP_ENGINE_SHA: | ||
2008 | ret = ccp_run_sha_cmd(cmd_q, cmd); | ||
2009 | break; | ||
2010 | case CCP_ENGINE_RSA: | ||
2011 | ret = ccp_run_rsa_cmd(cmd_q, cmd); | ||
2012 | break; | ||
2013 | case CCP_ENGINE_PASSTHRU: | ||
2014 | ret = ccp_run_passthru_cmd(cmd_q, cmd); | ||
2015 | break; | ||
2016 | case CCP_ENGINE_ECC: | ||
2017 | ret = ccp_run_ecc_cmd(cmd_q, cmd); | ||
2018 | break; | ||
2019 | default: | ||
2020 | ret = -EINVAL; | ||
2021 | } | ||
2022 | |||
2023 | return ret; | ||
2024 | } | ||
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c new file mode 100644 index 000000000000..93319f9db753 --- /dev/null +++ b/drivers/crypto/ccp/ccp-pci.c | |||
@@ -0,0 +1,361 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/pci_ids.h> | ||
17 | #include <linux/kthread.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/delay.h> | ||
22 | #include <linux/ccp.h> | ||
23 | |||
24 | #include "ccp-dev.h" | ||
25 | |||
26 | #define IO_BAR 2 | ||
27 | #define MSIX_VECTORS 2 | ||
28 | |||
29 | struct ccp_msix { | ||
30 | u32 vector; | ||
31 | char name[16]; | ||
32 | }; | ||
33 | |||
34 | struct ccp_pci { | ||
35 | int msix_count; | ||
36 | struct ccp_msix msix[MSIX_VECTORS]; | ||
37 | }; | ||
38 | |||
39 | static int ccp_get_msix_irqs(struct ccp_device *ccp) | ||
40 | { | ||
41 | struct ccp_pci *ccp_pci = ccp->dev_specific; | ||
42 | struct device *dev = ccp->dev; | ||
43 | struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); | ||
44 | struct msix_entry msix_entry[MSIX_VECTORS]; | ||
45 | unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1; | ||
46 | int v, ret; | ||
47 | |||
48 | for (v = 0; v < ARRAY_SIZE(msix_entry); v++) | ||
49 | msix_entry[v].entry = v; | ||
50 | |||
51 | while ((ret = pci_enable_msix(pdev, msix_entry, v)) > 0) | ||
52 | v = ret; | ||
53 | if (ret) | ||
54 | return ret; | ||
55 | |||
56 | ccp_pci->msix_count = v; | ||
57 | for (v = 0; v < ccp_pci->msix_count; v++) { | ||
58 | /* Set the interrupt names and request the irqs */ | ||
59 | snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v); | ||
60 | ccp_pci->msix[v].vector = msix_entry[v].vector; | ||
61 | ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler, | ||
62 | 0, ccp_pci->msix[v].name, dev); | ||
63 | if (ret) { | ||
64 | dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n", | ||
65 | ret); | ||
66 | goto e_irq; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | return 0; | ||
71 | |||
72 | e_irq: | ||
73 | while (v--) | ||
74 | free_irq(ccp_pci->msix[v].vector, dev); | ||
75 | |||
76 | pci_disable_msix(pdev); | ||
77 | |||
78 | ccp_pci->msix_count = 0; | ||
79 | |||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | static int ccp_get_msi_irq(struct ccp_device *ccp) | ||
84 | { | ||
85 | struct device *dev = ccp->dev; | ||
86 | struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); | ||
87 | int ret; | ||
88 | |||
89 | ret = pci_enable_msi(pdev); | ||
90 | if (ret) | ||
91 | return ret; | ||
92 | |||
93 | ret = request_irq(pdev->irq, ccp_irq_handler, 0, "ccp", dev); | ||
94 | if (ret) { | ||
95 | dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret); | ||
96 | goto e_msi; | ||
97 | } | ||
98 | |||
99 | return 0; | ||
100 | |||
101 | e_msi: | ||
102 | pci_disable_msi(pdev); | ||
103 | |||
104 | return ret; | ||
105 | } | ||
106 | |||
107 | static int ccp_get_irqs(struct ccp_device *ccp) | ||
108 | { | ||
109 | struct device *dev = ccp->dev; | ||
110 | int ret; | ||
111 | |||
112 | ret = ccp_get_msix_irqs(ccp); | ||
113 | if (!ret) | ||
114 | return 0; | ||
115 | |||
116 | /* Couldn't get MSI-X vectors, try MSI */ | ||
117 | dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); | ||
118 | ret = ccp_get_msi_irq(ccp); | ||
119 | if (!ret) | ||
120 | return 0; | ||
121 | |||
122 | /* Couldn't get MSI interrupt */ | ||
123 | dev_notice(dev, "could not enable MSI (%d)\n", ret); | ||
124 | |||
125 | return ret; | ||
126 | } | ||
127 | |||
128 | static void ccp_free_irqs(struct ccp_device *ccp) | ||
129 | { | ||
130 | struct ccp_pci *ccp_pci = ccp->dev_specific; | ||
131 | struct device *dev = ccp->dev; | ||
132 | struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); | ||
133 | |||
134 | if (ccp_pci->msix_count) { | ||
135 | while (ccp_pci->msix_count--) | ||
136 | free_irq(ccp_pci->msix[ccp_pci->msix_count].vector, | ||
137 | dev); | ||
138 | pci_disable_msix(pdev); | ||
139 | } else { | ||
140 | free_irq(pdev->irq, dev); | ||
141 | pci_disable_msi(pdev); | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static int ccp_find_mmio_area(struct ccp_device *ccp) | ||
146 | { | ||
147 | struct device *dev = ccp->dev; | ||
148 | struct pci_dev *pdev = container_of(dev, struct pci_dev, dev); | ||
149 | resource_size_t io_len; | ||
150 | unsigned long io_flags; | ||
151 | int bar; | ||
152 | |||
153 | io_flags = pci_resource_flags(pdev, IO_BAR); | ||
154 | io_len = pci_resource_len(pdev, IO_BAR); | ||
155 | if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800))) | ||
156 | return IO_BAR; | ||
157 | |||
158 | for (bar = 0; bar < PCI_STD_RESOURCE_END; bar++) { | ||
159 | io_flags = pci_resource_flags(pdev, bar); | ||
160 | io_len = pci_resource_len(pdev, bar); | ||
161 | if ((io_flags & IORESOURCE_MEM) && | ||
162 | (io_len >= (IO_OFFSET + 0x800))) | ||
163 | return bar; | ||
164 | } | ||
165 | |||
166 | return -EIO; | ||
167 | } | ||
168 | |||
169 | static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | ||
170 | { | ||
171 | struct ccp_device *ccp; | ||
172 | struct ccp_pci *ccp_pci; | ||
173 | struct device *dev = &pdev->dev; | ||
174 | unsigned int bar; | ||
175 | int ret; | ||
176 | |||
177 | ret = -ENOMEM; | ||
178 | ccp = ccp_alloc_struct(dev); | ||
179 | if (!ccp) | ||
180 | goto e_err; | ||
181 | |||
182 | ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL); | ||
183 | if (!ccp_pci) { | ||
184 | ret = -ENOMEM; | ||
185 | goto e_free1; | ||
186 | } | ||
187 | ccp->dev_specific = ccp_pci; | ||
188 | ccp->get_irq = ccp_get_irqs; | ||
189 | ccp->free_irq = ccp_free_irqs; | ||
190 | |||
191 | ret = pci_request_regions(pdev, "ccp"); | ||
192 | if (ret) { | ||
193 | dev_err(dev, "pci_request_regions failed (%d)\n", ret); | ||
194 | goto e_free2; | ||
195 | } | ||
196 | |||
197 | ret = pci_enable_device(pdev); | ||
198 | if (ret) { | ||
199 | dev_err(dev, "pci_enable_device failed (%d)\n", ret); | ||
200 | goto e_regions; | ||
201 | } | ||
202 | |||
203 | pci_set_master(pdev); | ||
204 | |||
205 | ret = ccp_find_mmio_area(ccp); | ||
206 | if (ret < 0) | ||
207 | goto e_device; | ||
208 | bar = ret; | ||
209 | |||
210 | ret = -EIO; | ||
211 | ccp->io_map = pci_iomap(pdev, bar, 0); | ||
212 | if (ccp->io_map == NULL) { | ||
213 | dev_err(dev, "pci_iomap failed\n"); | ||
214 | goto e_device; | ||
215 | } | ||
216 | ccp->io_regs = ccp->io_map + IO_OFFSET; | ||
217 | |||
218 | ret = dma_set_mask(dev, DMA_BIT_MASK(48)); | ||
219 | if (ret == 0) { | ||
220 | ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(48)); | ||
221 | if (ret) { | ||
222 | dev_err(dev, | ||
223 | "pci_set_consistent_dma_mask failed (%d)\n", | ||
224 | ret); | ||
225 | goto e_bar0; | ||
226 | } | ||
227 | } else { | ||
228 | ret = dma_set_mask(dev, DMA_BIT_MASK(32)); | ||
229 | if (ret) { | ||
230 | dev_err(dev, "pci_set_dma_mask failed (%d)\n", ret); | ||
231 | goto e_bar0; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | dev_set_drvdata(dev, ccp); | ||
236 | |||
237 | ret = ccp_init(ccp); | ||
238 | if (ret) | ||
239 | goto e_bar0; | ||
240 | |||
241 | dev_notice(dev, "enabled\n"); | ||
242 | |||
243 | return 0; | ||
244 | |||
245 | e_bar0: | ||
246 | pci_iounmap(pdev, ccp->io_map); | ||
247 | |||
248 | e_device: | ||
249 | pci_disable_device(pdev); | ||
250 | |||
251 | e_regions: | ||
252 | pci_release_regions(pdev); | ||
253 | |||
254 | e_free2: | ||
255 | kfree(ccp_pci); | ||
256 | |||
257 | e_free1: | ||
258 | kfree(ccp); | ||
259 | |||
260 | e_err: | ||
261 | dev_notice(dev, "initialization failed\n"); | ||
262 | return ret; | ||
263 | } | ||
264 | |||
265 | static void ccp_pci_remove(struct pci_dev *pdev) | ||
266 | { | ||
267 | struct device *dev = &pdev->dev; | ||
268 | struct ccp_device *ccp = dev_get_drvdata(dev); | ||
269 | |||
270 | if (!ccp) | ||
271 | return; | ||
272 | |||
273 | ccp_destroy(ccp); | ||
274 | |||
275 | pci_iounmap(pdev, ccp->io_map); | ||
276 | |||
277 | pci_disable_device(pdev); | ||
278 | |||
279 | pci_release_regions(pdev); | ||
280 | |||
281 | kfree(ccp); | ||
282 | |||
283 | dev_notice(dev, "disabled\n"); | ||
284 | } | ||
285 | |||
286 | #ifdef CONFIG_PM | ||
287 | static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state) | ||
288 | { | ||
289 | struct device *dev = &pdev->dev; | ||
290 | struct ccp_device *ccp = dev_get_drvdata(dev); | ||
291 | unsigned long flags; | ||
292 | unsigned int i; | ||
293 | |||
294 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
295 | |||
296 | ccp->suspending = 1; | ||
297 | |||
298 | /* Wake all the queue kthreads to prepare for suspend */ | ||
299 | for (i = 0; i < ccp->cmd_q_count; i++) | ||
300 | wake_up_process(ccp->cmd_q[i].kthread); | ||
301 | |||
302 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
303 | |||
304 | /* Wait for all queue kthreads to say they're done */ | ||
305 | while (!ccp_queues_suspended(ccp)) | ||
306 | wait_event_interruptible(ccp->suspend_queue, | ||
307 | ccp_queues_suspended(ccp)); | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | static int ccp_pci_resume(struct pci_dev *pdev) | ||
313 | { | ||
314 | struct device *dev = &pdev->dev; | ||
315 | struct ccp_device *ccp = dev_get_drvdata(dev); | ||
316 | unsigned long flags; | ||
317 | unsigned int i; | ||
318 | |||
319 | spin_lock_irqsave(&ccp->cmd_lock, flags); | ||
320 | |||
321 | ccp->suspending = 0; | ||
322 | |||
323 | /* Wake up all the kthreads */ | ||
324 | for (i = 0; i < ccp->cmd_q_count; i++) { | ||
325 | ccp->cmd_q[i].suspended = 0; | ||
326 | wake_up_process(ccp->cmd_q[i].kthread); | ||
327 | } | ||
328 | |||
329 | spin_unlock_irqrestore(&ccp->cmd_lock, flags); | ||
330 | |||
331 | return 0; | ||
332 | } | ||
333 | #endif | ||
334 | |||
335 | static DEFINE_PCI_DEVICE_TABLE(ccp_pci_table) = { | ||
336 | { PCI_VDEVICE(AMD, 0x1537), }, | ||
337 | /* Last entry must be zero */ | ||
338 | { 0, } | ||
339 | }; | ||
340 | MODULE_DEVICE_TABLE(pci, ccp_pci_table); | ||
341 | |||
342 | static struct pci_driver ccp_pci_driver = { | ||
343 | .name = "AMD Cryptographic Coprocessor", | ||
344 | .id_table = ccp_pci_table, | ||
345 | .probe = ccp_pci_probe, | ||
346 | .remove = ccp_pci_remove, | ||
347 | #ifdef CONFIG_PM | ||
348 | .suspend = ccp_pci_suspend, | ||
349 | .resume = ccp_pci_resume, | ||
350 | #endif | ||
351 | }; | ||
352 | |||
353 | int ccp_pci_init(void) | ||
354 | { | ||
355 | return pci_register_driver(&ccp_pci_driver); | ||
356 | } | ||
357 | |||
358 | void ccp_pci_exit(void) | ||
359 | { | ||
360 | pci_unregister_driver(&ccp_pci_driver); | ||
361 | } | ||
diff --git a/drivers/crypto/dcp.c b/drivers/crypto/dcp.c deleted file mode 100644 index 247ab8048f5b..000000000000 --- a/drivers/crypto/dcp.c +++ /dev/null | |||
@@ -1,903 +0,0 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Support for DCP cryptographic accelerator. | ||
5 | * | ||
6 | * Copyright (c) 2013 | ||
7 | * Author: Tobias Rauter <tobias.rauter@gmail.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as published | ||
11 | * by the Free Software Foundation. | ||
12 | * | ||
13 | * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c | ||
14 | */ | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/platform_device.h> | ||
20 | #include <linux/dma-mapping.h> | ||
21 | #include <linux/io.h> | ||
22 | #include <linux/mutex.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/completion.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/delay.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/miscdevice.h> | ||
29 | |||
30 | #include <crypto/scatterwalk.h> | ||
31 | #include <crypto/aes.h> | ||
32 | |||
33 | |||
34 | /* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/ | ||
35 | #define DBS_IOCTL_BASE 'd' | ||
36 | #define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16]) | ||
37 | #define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16]) | ||
38 | |||
39 | /* DCP channel used for AES */ | ||
40 | #define USED_CHANNEL 1 | ||
41 | /* Ring Buffers' maximum size */ | ||
42 | #define DCP_MAX_PKG 20 | ||
43 | |||
44 | /* Control Register */ | ||
45 | #define DCP_REG_CTRL 0x000 | ||
46 | #define DCP_CTRL_SFRST (1<<31) | ||
47 | #define DCP_CTRL_CLKGATE (1<<30) | ||
48 | #define DCP_CTRL_CRYPTO_PRESENT (1<<29) | ||
49 | #define DCP_CTRL_SHA_PRESENT (1<<28) | ||
50 | #define DCP_CTRL_GATHER_RES_WRITE (1<<23) | ||
51 | #define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22) | ||
52 | #define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21) | ||
53 | #define DCP_CTRL_CH_IRQ_E_0 0x01 | ||
54 | #define DCP_CTRL_CH_IRQ_E_1 0x02 | ||
55 | #define DCP_CTRL_CH_IRQ_E_2 0x04 | ||
56 | #define DCP_CTRL_CH_IRQ_E_3 0x08 | ||
57 | |||
58 | /* Status register */ | ||
59 | #define DCP_REG_STAT 0x010 | ||
60 | #define DCP_STAT_OTP_KEY_READY (1<<28) | ||
61 | #define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F) | ||
62 | #define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F) | ||
63 | #define DCP_STAT_IRQ(stat) (stat&0x0F) | ||
64 | #define DCP_STAT_CHAN_0 (0x01) | ||
65 | #define DCP_STAT_CHAN_1 (0x02) | ||
66 | #define DCP_STAT_CHAN_2 (0x04) | ||
67 | #define DCP_STAT_CHAN_3 (0x08) | ||
68 | |||
69 | /* Channel Control Register */ | ||
70 | #define DCP_REG_CHAN_CTRL 0x020 | ||
71 | #define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16) | ||
72 | #define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100) | ||
73 | #define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200) | ||
74 | #define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400) | ||
75 | #define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800) | ||
76 | #define DCP_CHAN_CTRL_ENABLE_0 (0x01) | ||
77 | #define DCP_CHAN_CTRL_ENABLE_1 (0x02) | ||
78 | #define DCP_CHAN_CTRL_ENABLE_2 (0x04) | ||
79 | #define DCP_CHAN_CTRL_ENABLE_3 (0x08) | ||
80 | |||
81 | /* | ||
82 | * Channel Registers: | ||
83 | * The DCP has 4 channels. Each of this channels | ||
84 | * has 4 registers (command pointer, semaphore, status and options). | ||
85 | * The address of register REG of channel CHAN is obtained by | ||
86 | * dcp_chan_reg(REG, CHAN) | ||
87 | */ | ||
88 | #define DCP_REG_CHAN_PTR 0x00000100 | ||
89 | #define DCP_REG_CHAN_SEMA 0x00000110 | ||
90 | #define DCP_REG_CHAN_STAT 0x00000120 | ||
91 | #define DCP_REG_CHAN_OPT 0x00000130 | ||
92 | |||
93 | #define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000 | ||
94 | #define DCP_CHAN_STAT_NO_CHAIN 0x020000 | ||
95 | #define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000 | ||
96 | #define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000 | ||
97 | #define DCP_CHAN_STAT_INVALID_MODE 0x050000 | ||
98 | #define DCP_CHAN_STAT_PAGEFAULT 0x40 | ||
99 | #define DCP_CHAN_STAT_DST 0x20 | ||
100 | #define DCP_CHAN_STAT_SRC 0x10 | ||
101 | #define DCP_CHAN_STAT_PACKET 0x08 | ||
102 | #define DCP_CHAN_STAT_SETUP 0x04 | ||
103 | #define DCP_CHAN_STAT_MISMATCH 0x02 | ||
104 | |||
105 | /* hw packet control*/ | ||
106 | |||
107 | #define DCP_PKT_PAYLOAD_KEY (1<<11) | ||
108 | #define DCP_PKT_OTP_KEY (1<<10) | ||
109 | #define DCP_PKT_CIPHER_INIT (1<<9) | ||
110 | #define DCP_PKG_CIPHER_ENCRYPT (1<<8) | ||
111 | #define DCP_PKT_CIPHER_ENABLE (1<<5) | ||
112 | #define DCP_PKT_DECR_SEM (1<<1) | ||
113 | #define DCP_PKT_CHAIN (1<<2) | ||
114 | #define DCP_PKT_IRQ 1 | ||
115 | |||
116 | #define DCP_PKT_MODE_CBC (1<<4) | ||
117 | #define DCP_PKT_KEYSELECT_OTP (0xFF<<8) | ||
118 | |||
119 | /* cipher flags */ | ||
120 | #define DCP_ENC 0x0001 | ||
121 | #define DCP_DEC 0x0002 | ||
122 | #define DCP_ECB 0x0004 | ||
123 | #define DCP_CBC 0x0008 | ||
124 | #define DCP_CBC_INIT 0x0010 | ||
125 | #define DCP_NEW_KEY 0x0040 | ||
126 | #define DCP_OTP_KEY 0x0080 | ||
127 | #define DCP_AES 0x1000 | ||
128 | |||
129 | /* DCP Flags */ | ||
130 | #define DCP_FLAG_BUSY 0x01 | ||
131 | #define DCP_FLAG_PRODUCING 0x02 | ||
132 | |||
133 | /* clock defines */ | ||
134 | #define CLOCK_ON 1 | ||
135 | #define CLOCK_OFF 0 | ||
136 | |||
137 | struct dcp_dev_req_ctx { | ||
138 | int mode; | ||
139 | }; | ||
140 | |||
141 | struct dcp_op { | ||
142 | unsigned int flags; | ||
143 | u8 key[AES_KEYSIZE_128]; | ||
144 | int keylen; | ||
145 | |||
146 | struct ablkcipher_request *req; | ||
147 | struct crypto_ablkcipher *fallback; | ||
148 | |||
149 | uint32_t stat; | ||
150 | uint32_t pkt1; | ||
151 | uint32_t pkt2; | ||
152 | struct ablkcipher_walk walk; | ||
153 | }; | ||
154 | |||
155 | struct dcp_dev { | ||
156 | struct device *dev; | ||
157 | void __iomem *dcp_regs_base; | ||
158 | |||
159 | int dcp_vmi_irq; | ||
160 | int dcp_irq; | ||
161 | |||
162 | spinlock_t queue_lock; | ||
163 | struct crypto_queue queue; | ||
164 | |||
165 | uint32_t pkt_produced; | ||
166 | uint32_t pkt_consumed; | ||
167 | |||
168 | struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG]; | ||
169 | dma_addr_t hw_phys_pkg; | ||
170 | |||
171 | /* [KEY][IV] Both with 16 Bytes */ | ||
172 | u8 *payload_base; | ||
173 | dma_addr_t payload_base_dma; | ||
174 | |||
175 | |||
176 | struct tasklet_struct done_task; | ||
177 | struct tasklet_struct queue_task; | ||
178 | struct timer_list watchdog; | ||
179 | |||
180 | unsigned long flags; | ||
181 | |||
182 | struct dcp_op *ctx; | ||
183 | |||
184 | struct miscdevice dcp_bootstream_misc; | ||
185 | }; | ||
186 | |||
187 | struct dcp_hw_packet { | ||
188 | uint32_t next; | ||
189 | uint32_t pkt1; | ||
190 | uint32_t pkt2; | ||
191 | uint32_t src; | ||
192 | uint32_t dst; | ||
193 | uint32_t size; | ||
194 | uint32_t payload; | ||
195 | uint32_t stat; | ||
196 | }; | ||
197 | |||
198 | static struct dcp_dev *global_dev; | ||
199 | |||
200 | static inline u32 dcp_chan_reg(u32 reg, int chan) | ||
201 | { | ||
202 | return reg + (chan) * 0x40; | ||
203 | } | ||
204 | |||
205 | static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg) | ||
206 | { | ||
207 | writel(data, dev->dcp_regs_base + reg); | ||
208 | } | ||
209 | |||
210 | static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg) | ||
211 | { | ||
212 | writel(data, dev->dcp_regs_base + (reg | 0x04)); | ||
213 | } | ||
214 | |||
215 | static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg) | ||
216 | { | ||
217 | writel(data, dev->dcp_regs_base + (reg | 0x08)); | ||
218 | } | ||
219 | |||
220 | static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg) | ||
221 | { | ||
222 | writel(data, dev->dcp_regs_base + (reg | 0x0C)); | ||
223 | } | ||
224 | |||
225 | static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg) | ||
226 | { | ||
227 | return readl(dev->dcp_regs_base + reg); | ||
228 | } | ||
229 | |||
230 | static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt) | ||
231 | { | ||
232 | dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); | ||
233 | dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE); | ||
234 | dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt); | ||
235 | } | ||
236 | |||
237 | static int dcp_dma_map(struct dcp_dev *dev, | ||
238 | struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt) | ||
239 | { | ||
240 | dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt); | ||
241 | /* align to length = 16 */ | ||
242 | pkt->size = walk->nbytes - (walk->nbytes % 16); | ||
243 | |||
244 | pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset, | ||
245 | pkt->size, DMA_TO_DEVICE); | ||
246 | |||
247 | if (pkt->src == 0) { | ||
248 | dev_err(dev->dev, "Unable to map src"); | ||
249 | return -ENOMEM; | ||
250 | } | ||
251 | |||
252 | pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset, | ||
253 | pkt->size, DMA_FROM_DEVICE); | ||
254 | |||
255 | if (pkt->dst == 0) { | ||
256 | dev_err(dev->dev, "Unable to map dst"); | ||
257 | dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE); | ||
258 | return -ENOMEM; | ||
259 | } | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt, | ||
265 | uint8_t last) | ||
266 | { | ||
267 | struct dcp_op *ctx = dev->ctx; | ||
268 | pkt->pkt1 = ctx->pkt1; | ||
269 | pkt->pkt2 = ctx->pkt2; | ||
270 | |||
271 | pkt->payload = (u32) dev->payload_base_dma; | ||
272 | pkt->stat = 0; | ||
273 | |||
274 | if (ctx->flags & DCP_CBC_INIT) { | ||
275 | pkt->pkt1 |= DCP_PKT_CIPHER_INIT; | ||
276 | ctx->flags &= ~DCP_CBC_INIT; | ||
277 | } | ||
278 | |||
279 | mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500)); | ||
280 | pkt->pkt1 |= DCP_PKT_IRQ; | ||
281 | if (!last) | ||
282 | pkt->pkt1 |= DCP_PKT_CHAIN; | ||
283 | |||
284 | dev->pkt_produced++; | ||
285 | |||
286 | dcp_write(dev, 1, | ||
287 | dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL)); | ||
288 | } | ||
289 | |||
290 | static void dcp_op_proceed(struct dcp_dev *dev) | ||
291 | { | ||
292 | struct dcp_op *ctx = dev->ctx; | ||
293 | struct dcp_hw_packet *pkt; | ||
294 | |||
295 | while (ctx->walk.nbytes) { | ||
296 | int err = 0; | ||
297 | |||
298 | pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG]; | ||
299 | err = dcp_dma_map(dev, &ctx->walk, pkt); | ||
300 | if (err) { | ||
301 | dev->ctx->stat |= err; | ||
302 | /* start timer to wait for already set up calls */ | ||
303 | mod_timer(&dev->watchdog, | ||
304 | jiffies + msecs_to_jiffies(500)); | ||
305 | break; | ||
306 | } | ||
307 | |||
308 | |||
309 | err = ctx->walk.nbytes - pkt->size; | ||
310 | ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err); | ||
311 | |||
312 | dcp_op_one(dev, pkt, ctx->walk.nbytes == 0); | ||
313 | /* we have to wait if no space is left in buffer */ | ||
314 | if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG) | ||
315 | break; | ||
316 | } | ||
317 | clear_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
318 | } | ||
319 | |||
320 | static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk) | ||
321 | { | ||
322 | struct dcp_op *ctx = dev->ctx; | ||
323 | |||
324 | if (ctx->flags & DCP_NEW_KEY) { | ||
325 | memcpy(dev->payload_base, ctx->key, ctx->keylen); | ||
326 | ctx->flags &= ~DCP_NEW_KEY; | ||
327 | } | ||
328 | |||
329 | ctx->pkt1 = 0; | ||
330 | ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE; | ||
331 | ctx->pkt1 |= DCP_PKT_DECR_SEM; | ||
332 | |||
333 | if (ctx->flags & DCP_OTP_KEY) | ||
334 | ctx->pkt1 |= DCP_PKT_OTP_KEY; | ||
335 | else | ||
336 | ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY; | ||
337 | |||
338 | if (ctx->flags & DCP_ENC) | ||
339 | ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT; | ||
340 | |||
341 | ctx->pkt2 = 0; | ||
342 | if (ctx->flags & DCP_CBC) | ||
343 | ctx->pkt2 |= DCP_PKT_MODE_CBC; | ||
344 | |||
345 | dev->pkt_produced = 0; | ||
346 | dev->pkt_consumed = 0; | ||
347 | |||
348 | ctx->stat = 0; | ||
349 | dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
350 | dcp_write(dev, (u32) dev->hw_phys_pkg, | ||
351 | dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL)); | ||
352 | |||
353 | set_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
354 | |||
355 | if (use_walk) { | ||
356 | ablkcipher_walk_init(&ctx->walk, ctx->req->dst, | ||
357 | ctx->req->src, ctx->req->nbytes); | ||
358 | ablkcipher_walk_phys(ctx->req, &ctx->walk); | ||
359 | dcp_op_proceed(dev); | ||
360 | } else { | ||
361 | dcp_op_one(dev, dev->hw_pkg[0], 1); | ||
362 | clear_bit(DCP_FLAG_PRODUCING, &dev->flags); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static void dcp_done_task(unsigned long data) | ||
367 | { | ||
368 | struct dcp_dev *dev = (struct dcp_dev *)data; | ||
369 | struct dcp_hw_packet *last_packet; | ||
370 | int fin; | ||
371 | fin = 0; | ||
372 | |||
373 | for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG]; | ||
374 | last_packet->stat == 1; | ||
375 | last_packet = | ||
376 | dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) { | ||
377 | |||
378 | dcp_dma_unmap(dev, last_packet); | ||
379 | last_packet->stat = 0; | ||
380 | fin++; | ||
381 | } | ||
382 | /* the last call of this function already consumed this IRQ's packet */ | ||
383 | if (fin == 0) | ||
384 | return; | ||
385 | |||
386 | dev_dbg(dev->dev, | ||
387 | "Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d", | ||
388 | dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed); | ||
389 | |||
390 | last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG]; | ||
391 | if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) { | ||
392 | if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags)) | ||
393 | dcp_op_proceed(dev); | ||
394 | return; | ||
395 | } | ||
396 | |||
397 | while (unlikely(dev->pkt_consumed < dev->pkt_produced)) { | ||
398 | dcp_dma_unmap(dev, | ||
399 | dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]); | ||
400 | } | ||
401 | |||
402 | if (dev->ctx->flags & DCP_OTP_KEY) { | ||
403 | /* we used the miscdevice, no walk to finish */ | ||
404 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
405 | return; | ||
406 | } | ||
407 | |||
408 | ablkcipher_walk_complete(&dev->ctx->walk); | ||
409 | dev->ctx->req->base.complete(&dev->ctx->req->base, | ||
410 | dev->ctx->stat); | ||
411 | dev->ctx->req = NULL; | ||
412 | /* in case there are other requests in the queue */ | ||
413 | tasklet_schedule(&dev->queue_task); | ||
414 | } | ||
415 | |||
416 | static void dcp_watchdog(unsigned long data) | ||
417 | { | ||
418 | struct dcp_dev *dev = (struct dcp_dev *)data; | ||
419 | dev->ctx->stat |= dcp_read(dev, | ||
420 | dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
421 | |||
422 | dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat); | ||
423 | |||
424 | if (!dev->ctx->stat) | ||
425 | dev->ctx->stat = -ETIMEDOUT; | ||
426 | |||
427 | dcp_done_task(data); | ||
428 | } | ||
429 | |||
430 | |||
431 | static irqreturn_t dcp_common_irq(int irq, void *context) | ||
432 | { | ||
433 | u32 msk; | ||
434 | struct dcp_dev *dev = (struct dcp_dev *) context; | ||
435 | |||
436 | del_timer(&dev->watchdog); | ||
437 | |||
438 | msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT)); | ||
439 | dcp_clear(dev, msk, DCP_REG_STAT); | ||
440 | if (msk == 0) | ||
441 | return IRQ_NONE; | ||
442 | |||
443 | dev->ctx->stat |= dcp_read(dev, | ||
444 | dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL)); | ||
445 | |||
446 | if (msk & DCP_STAT_CHAN_1) | ||
447 | tasklet_schedule(&dev->done_task); | ||
448 | |||
449 | return IRQ_HANDLED; | ||
450 | } | ||
451 | |||
452 | static irqreturn_t dcp_vmi_irq(int irq, void *context) | ||
453 | { | ||
454 | return dcp_common_irq(irq, context); | ||
455 | } | ||
456 | |||
457 | static irqreturn_t dcp_irq(int irq, void *context) | ||
458 | { | ||
459 | return dcp_common_irq(irq, context); | ||
460 | } | ||
461 | |||
462 | static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx) | ||
463 | { | ||
464 | dev->ctx = ctx; | ||
465 | |||
466 | if ((ctx->flags & DCP_CBC) && ctx->req->info) { | ||
467 | ctx->flags |= DCP_CBC_INIT; | ||
468 | memcpy(dev->payload_base + AES_KEYSIZE_128, | ||
469 | ctx->req->info, AES_KEYSIZE_128); | ||
470 | } | ||
471 | |||
472 | dcp_op_start(dev, 1); | ||
473 | } | ||
474 | |||
475 | static void dcp_queue_task(unsigned long data) | ||
476 | { | ||
477 | struct dcp_dev *dev = (struct dcp_dev *) data; | ||
478 | struct crypto_async_request *async_req, *backlog; | ||
479 | struct crypto_ablkcipher *tfm; | ||
480 | struct dcp_op *ctx; | ||
481 | struct dcp_dev_req_ctx *rctx; | ||
482 | struct ablkcipher_request *req; | ||
483 | unsigned long flags; | ||
484 | |||
485 | spin_lock_irqsave(&dev->queue_lock, flags); | ||
486 | |||
487 | backlog = crypto_get_backlog(&dev->queue); | ||
488 | async_req = crypto_dequeue_request(&dev->queue); | ||
489 | |||
490 | spin_unlock_irqrestore(&dev->queue_lock, flags); | ||
491 | |||
492 | if (!async_req) | ||
493 | goto ret_nothing_done; | ||
494 | |||
495 | if (backlog) | ||
496 | backlog->complete(backlog, -EINPROGRESS); | ||
497 | |||
498 | req = ablkcipher_request_cast(async_req); | ||
499 | tfm = crypto_ablkcipher_reqtfm(req); | ||
500 | rctx = ablkcipher_request_ctx(req); | ||
501 | ctx = crypto_ablkcipher_ctx(tfm); | ||
502 | |||
503 | if (!req->src || !req->dst) | ||
504 | goto ret_nothing_done; | ||
505 | |||
506 | ctx->flags |= rctx->mode; | ||
507 | ctx->req = req; | ||
508 | |||
509 | dcp_crypt(dev, ctx); | ||
510 | |||
511 | return; | ||
512 | |||
513 | ret_nothing_done: | ||
514 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
515 | } | ||
516 | |||
517 | |||
518 | static int dcp_cra_init(struct crypto_tfm *tfm) | ||
519 | { | ||
520 | const char *name = tfm->__crt_alg->cra_name; | ||
521 | struct dcp_op *ctx = crypto_tfm_ctx(tfm); | ||
522 | |||
523 | tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx); | ||
524 | |||
525 | ctx->fallback = crypto_alloc_ablkcipher(name, 0, | ||
526 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); | ||
527 | |||
528 | if (IS_ERR(ctx->fallback)) { | ||
529 | dev_err(global_dev->dev, "Error allocating fallback algo %s\n", | ||
530 | name); | ||
531 | return PTR_ERR(ctx->fallback); | ||
532 | } | ||
533 | |||
534 | return 0; | ||
535 | } | ||
536 | |||
537 | static void dcp_cra_exit(struct crypto_tfm *tfm) | ||
538 | { | ||
539 | struct dcp_op *ctx = crypto_tfm_ctx(tfm); | ||
540 | |||
541 | if (ctx->fallback) | ||
542 | crypto_free_ablkcipher(ctx->fallback); | ||
543 | |||
544 | ctx->fallback = NULL; | ||
545 | } | ||
546 | |||
547 | /* async interface */ | ||
548 | static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
549 | unsigned int len) | ||
550 | { | ||
551 | struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm); | ||
552 | unsigned int ret = 0; | ||
553 | ctx->keylen = len; | ||
554 | ctx->flags = 0; | ||
555 | if (len == AES_KEYSIZE_128) { | ||
556 | if (memcmp(ctx->key, key, AES_KEYSIZE_128)) { | ||
557 | memcpy(ctx->key, key, len); | ||
558 | ctx->flags |= DCP_NEW_KEY; | ||
559 | } | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; | ||
564 | ctx->fallback->base.crt_flags |= | ||
565 | (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); | ||
566 | |||
567 | ret = crypto_ablkcipher_setkey(ctx->fallback, key, len); | ||
568 | if (ret) { | ||
569 | struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); | ||
570 | |||
571 | tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; | ||
572 | tfm_aux->crt_flags |= | ||
573 | (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); | ||
574 | } | ||
575 | return ret; | ||
576 | } | ||
577 | |||
578 | static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode) | ||
579 | { | ||
580 | struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req); | ||
581 | struct dcp_dev *dev = global_dev; | ||
582 | unsigned long flags; | ||
583 | int err = 0; | ||
584 | |||
585 | if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) | ||
586 | return -EINVAL; | ||
587 | |||
588 | rctx->mode = mode; | ||
589 | |||
590 | spin_lock_irqsave(&dev->queue_lock, flags); | ||
591 | err = ablkcipher_enqueue_request(&dev->queue, req); | ||
592 | spin_unlock_irqrestore(&dev->queue_lock, flags); | ||
593 | |||
594 | flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags); | ||
595 | |||
596 | if (!(flags & DCP_FLAG_BUSY)) | ||
597 | tasklet_schedule(&dev->queue_task); | ||
598 | |||
599 | return err; | ||
600 | } | ||
601 | |||
602 | static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req) | ||
603 | { | ||
604 | struct crypto_tfm *tfm = | ||
605 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
606 | struct dcp_op *ctx = crypto_ablkcipher_ctx( | ||
607 | crypto_ablkcipher_reqtfm(req)); | ||
608 | |||
609 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
610 | int err = 0; | ||
611 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
612 | err = crypto_ablkcipher_encrypt(req); | ||
613 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
614 | return err; | ||
615 | } | ||
616 | |||
617 | return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC); | ||
618 | } | ||
619 | |||
620 | static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req) | ||
621 | { | ||
622 | struct crypto_tfm *tfm = | ||
623 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
624 | struct dcp_op *ctx = crypto_ablkcipher_ctx( | ||
625 | crypto_ablkcipher_reqtfm(req)); | ||
626 | |||
627 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
628 | int err = 0; | ||
629 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
630 | err = crypto_ablkcipher_decrypt(req); | ||
631 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
632 | return err; | ||
633 | } | ||
634 | return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC); | ||
635 | } | ||
636 | |||
637 | static struct crypto_alg algs[] = { | ||
638 | { | ||
639 | .cra_name = "cbc(aes)", | ||
640 | .cra_driver_name = "dcp-cbc-aes", | ||
641 | .cra_alignmask = 3, | ||
642 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC | | ||
643 | CRYPTO_ALG_NEED_FALLBACK, | ||
644 | .cra_blocksize = AES_KEYSIZE_128, | ||
645 | .cra_type = &crypto_ablkcipher_type, | ||
646 | .cra_priority = 300, | ||
647 | .cra_u.ablkcipher = { | ||
648 | .min_keysize = AES_KEYSIZE_128, | ||
649 | .max_keysize = AES_KEYSIZE_128, | ||
650 | .setkey = dcp_aes_setkey, | ||
651 | .encrypt = dcp_aes_cbc_encrypt, | ||
652 | .decrypt = dcp_aes_cbc_decrypt, | ||
653 | .ivsize = AES_KEYSIZE_128, | ||
654 | } | ||
655 | |||
656 | }, | ||
657 | }; | ||
658 | |||
659 | /* DCP bootstream verification interface: uses OTP key for crypto */ | ||
660 | static int dcp_bootstream_open(struct inode *inode, struct file *file) | ||
661 | { | ||
662 | file->private_data = container_of((file->private_data), | ||
663 | struct dcp_dev, dcp_bootstream_misc); | ||
664 | return 0; | ||
665 | } | ||
666 | |||
667 | static long dcp_bootstream_ioctl(struct file *file, | ||
668 | unsigned int cmd, unsigned long arg) | ||
669 | { | ||
670 | struct dcp_dev *dev = (struct dcp_dev *) file->private_data; | ||
671 | void __user *argp = (void __user *)arg; | ||
672 | int ret; | ||
673 | |||
674 | if (dev == NULL) | ||
675 | return -EBADF; | ||
676 | |||
677 | if (cmd != DBS_ENC && cmd != DBS_DEC) | ||
678 | return -EINVAL; | ||
679 | |||
680 | if (copy_from_user(dev->payload_base, argp, 16)) | ||
681 | return -EFAULT; | ||
682 | |||
683 | if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags)) | ||
684 | return -EAGAIN; | ||
685 | |||
686 | dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL); | ||
687 | if (!dev->ctx) { | ||
688 | dev_err(dev->dev, | ||
689 | "cannot allocate context for OTP crypto"); | ||
690 | clear_bit(DCP_FLAG_BUSY, &dev->flags); | ||
691 | return -ENOMEM; | ||
692 | } | ||
693 | |||
694 | dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT; | ||
695 | dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC; | ||
696 | dev->hw_pkg[0]->src = dev->payload_base_dma; | ||
697 | dev->hw_pkg[0]->dst = dev->payload_base_dma; | ||
698 | dev->hw_pkg[0]->size = 16; | ||
699 | |||
700 | dcp_op_start(dev, 0); | ||
701 | |||
702 | while (test_bit(DCP_FLAG_BUSY, &dev->flags)) | ||
703 | cpu_relax(); | ||
704 | |||
705 | ret = dev->ctx->stat; | ||
706 | if (!ret && copy_to_user(argp, dev->payload_base, 16)) | ||
707 | ret = -EFAULT; | ||
708 | |||
709 | kfree(dev->ctx); | ||
710 | |||
711 | return ret; | ||
712 | } | ||
713 | |||
714 | static const struct file_operations dcp_bootstream_fops = { | ||
715 | .owner = THIS_MODULE, | ||
716 | .unlocked_ioctl = dcp_bootstream_ioctl, | ||
717 | .open = dcp_bootstream_open, | ||
718 | }; | ||
719 | |||
720 | static int dcp_probe(struct platform_device *pdev) | ||
721 | { | ||
722 | struct dcp_dev *dev = NULL; | ||
723 | struct resource *r; | ||
724 | int i, ret, j; | ||
725 | |||
726 | dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); | ||
727 | if (!dev) | ||
728 | return -ENOMEM; | ||
729 | |||
730 | global_dev = dev; | ||
731 | dev->dev = &pdev->dev; | ||
732 | |||
733 | platform_set_drvdata(pdev, dev); | ||
734 | |||
735 | r = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
736 | dev->dcp_regs_base = devm_ioremap_resource(&pdev->dev, r); | ||
737 | if (IS_ERR(dev->dcp_regs_base)) | ||
738 | return PTR_ERR(dev->dcp_regs_base); | ||
739 | |||
740 | dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL); | ||
741 | udelay(10); | ||
742 | dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL); | ||
743 | |||
744 | dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE | | ||
745 | DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1, | ||
746 | DCP_REG_CTRL); | ||
747 | |||
748 | dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL); | ||
749 | |||
750 | for (i = 0; i < 4; i++) | ||
751 | dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i)); | ||
752 | |||
753 | dcp_clear(dev, -1, DCP_REG_STAT); | ||
754 | |||
755 | |||
756 | r = platform_get_resource(pdev, IORESOURCE_IRQ, 0); | ||
757 | if (!r) { | ||
758 | dev_err(&pdev->dev, "can't get IRQ resource (0)\n"); | ||
759 | return -EIO; | ||
760 | } | ||
761 | dev->dcp_vmi_irq = r->start; | ||
762 | ret = devm_request_irq(&pdev->dev, dev->dcp_vmi_irq, dcp_vmi_irq, 0, | ||
763 | "dcp", dev); | ||
764 | if (ret != 0) { | ||
765 | dev_err(&pdev->dev, "can't request_irq (0)\n"); | ||
766 | return -EIO; | ||
767 | } | ||
768 | |||
769 | r = platform_get_resource(pdev, IORESOURCE_IRQ, 1); | ||
770 | if (!r) { | ||
771 | dev_err(&pdev->dev, "can't get IRQ resource (1)\n"); | ||
772 | return -EIO; | ||
773 | } | ||
774 | dev->dcp_irq = r->start; | ||
775 | ret = devm_request_irq(&pdev->dev, dev->dcp_irq, dcp_irq, 0, "dcp", | ||
776 | dev); | ||
777 | if (ret != 0) { | ||
778 | dev_err(&pdev->dev, "can't request_irq (1)\n"); | ||
779 | return -EIO; | ||
780 | } | ||
781 | |||
782 | dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev, | ||
783 | DCP_MAX_PKG * sizeof(struct dcp_hw_packet), | ||
784 | &dev->hw_phys_pkg, | ||
785 | GFP_KERNEL); | ||
786 | if (!dev->hw_pkg[0]) { | ||
787 | dev_err(&pdev->dev, "Could not allocate hw descriptors\n"); | ||
788 | return -ENOMEM; | ||
789 | } | ||
790 | |||
791 | for (i = 1; i < DCP_MAX_PKG; i++) { | ||
792 | dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg | ||
793 | + i * sizeof(struct dcp_hw_packet); | ||
794 | dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1; | ||
795 | } | ||
796 | dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg; | ||
797 | |||
798 | |||
799 | dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, | ||
800 | &dev->payload_base_dma, GFP_KERNEL); | ||
801 | if (!dev->payload_base) { | ||
802 | dev_err(&pdev->dev, "Could not allocate memory for key\n"); | ||
803 | ret = -ENOMEM; | ||
804 | goto err_free_hw_packet; | ||
805 | } | ||
806 | tasklet_init(&dev->queue_task, dcp_queue_task, | ||
807 | (unsigned long) dev); | ||
808 | tasklet_init(&dev->done_task, dcp_done_task, | ||
809 | (unsigned long) dev); | ||
810 | spin_lock_init(&dev->queue_lock); | ||
811 | |||
812 | crypto_init_queue(&dev->queue, 10); | ||
813 | |||
814 | init_timer(&dev->watchdog); | ||
815 | dev->watchdog.function = &dcp_watchdog; | ||
816 | dev->watchdog.data = (unsigned long)dev; | ||
817 | |||
818 | dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR, | ||
819 | dev->dcp_bootstream_misc.name = "dcpboot", | ||
820 | dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops, | ||
821 | ret = misc_register(&dev->dcp_bootstream_misc); | ||
822 | if (ret != 0) { | ||
823 | dev_err(dev->dev, "Unable to register misc device\n"); | ||
824 | goto err_free_key_iv; | ||
825 | } | ||
826 | |||
827 | for (i = 0; i < ARRAY_SIZE(algs); i++) { | ||
828 | algs[i].cra_priority = 300; | ||
829 | algs[i].cra_ctxsize = sizeof(struct dcp_op); | ||
830 | algs[i].cra_module = THIS_MODULE; | ||
831 | algs[i].cra_init = dcp_cra_init; | ||
832 | algs[i].cra_exit = dcp_cra_exit; | ||
833 | if (crypto_register_alg(&algs[i])) { | ||
834 | dev_err(&pdev->dev, "register algorithm failed\n"); | ||
835 | ret = -ENOMEM; | ||
836 | goto err_unregister; | ||
837 | } | ||
838 | } | ||
839 | dev_notice(&pdev->dev, "DCP crypto enabled.!\n"); | ||
840 | |||
841 | return 0; | ||
842 | |||
843 | err_unregister: | ||
844 | for (j = 0; j < i; j++) | ||
845 | crypto_unregister_alg(&algs[j]); | ||
846 | err_free_key_iv: | ||
847 | tasklet_kill(&dev->done_task); | ||
848 | tasklet_kill(&dev->queue_task); | ||
849 | dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, | ||
850 | dev->payload_base_dma); | ||
851 | err_free_hw_packet: | ||
852 | dma_free_coherent(&pdev->dev, DCP_MAX_PKG * | ||
853 | sizeof(struct dcp_hw_packet), dev->hw_pkg[0], | ||
854 | dev->hw_phys_pkg); | ||
855 | |||
856 | return ret; | ||
857 | } | ||
858 | |||
859 | static int dcp_remove(struct platform_device *pdev) | ||
860 | { | ||
861 | struct dcp_dev *dev; | ||
862 | int j; | ||
863 | dev = platform_get_drvdata(pdev); | ||
864 | |||
865 | misc_deregister(&dev->dcp_bootstream_misc); | ||
866 | |||
867 | for (j = 0; j < ARRAY_SIZE(algs); j++) | ||
868 | crypto_unregister_alg(&algs[j]); | ||
869 | |||
870 | tasklet_kill(&dev->done_task); | ||
871 | tasklet_kill(&dev->queue_task); | ||
872 | |||
873 | dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base, | ||
874 | dev->payload_base_dma); | ||
875 | |||
876 | dma_free_coherent(&pdev->dev, | ||
877 | DCP_MAX_PKG * sizeof(struct dcp_hw_packet), | ||
878 | dev->hw_pkg[0], dev->hw_phys_pkg); | ||
879 | |||
880 | return 0; | ||
881 | } | ||
882 | |||
883 | static struct of_device_id fs_dcp_of_match[] = { | ||
884 | { .compatible = "fsl-dcp"}, | ||
885 | {}, | ||
886 | }; | ||
887 | |||
888 | static struct platform_driver fs_dcp_driver = { | ||
889 | .probe = dcp_probe, | ||
890 | .remove = dcp_remove, | ||
891 | .driver = { | ||
892 | .name = "fsl-dcp", | ||
893 | .owner = THIS_MODULE, | ||
894 | .of_match_table = fs_dcp_of_match | ||
895 | } | ||
896 | }; | ||
897 | |||
898 | module_platform_driver(fs_dcp_driver); | ||
899 | |||
900 | |||
901 | MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>"); | ||
902 | MODULE_DESCRIPTION("Freescale DCP Crypto Driver"); | ||
903 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c new file mode 100644 index 000000000000..a6db7fa6f891 --- /dev/null +++ b/drivers/crypto/mxs-dcp.c | |||
@@ -0,0 +1,1100 @@ | |||
1 | /* | ||
2 | * Freescale i.MX23/i.MX28 Data Co-Processor driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Marek Vasut <marex@denx.de> | ||
5 | * | ||
6 | * The code contained herein is licensed under the GNU General Public | ||
7 | * License. You may obtain a copy of the GNU General Public License | ||
8 | * Version 2 or later at the following locations: | ||
9 | * | ||
10 | * http://www.opensource.org/licenses/gpl-license.html | ||
11 | * http://www.gnu.org/copyleft/gpl.html | ||
12 | */ | ||
13 | |||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/dma-mapping.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/io.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/kthread.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/of.h> | ||
22 | #include <linux/platform_device.h> | ||
23 | #include <linux/stmp_device.h> | ||
24 | |||
25 | #include <crypto/aes.h> | ||
26 | #include <crypto/sha.h> | ||
27 | #include <crypto/internal/hash.h> | ||
28 | |||
29 | #define DCP_MAX_CHANS 4 | ||
30 | #define DCP_BUF_SZ PAGE_SIZE | ||
31 | |||
32 | /* DCP DMA descriptor. */ | ||
33 | struct dcp_dma_desc { | ||
34 | uint32_t next_cmd_addr; | ||
35 | uint32_t control0; | ||
36 | uint32_t control1; | ||
37 | uint32_t source; | ||
38 | uint32_t destination; | ||
39 | uint32_t size; | ||
40 | uint32_t payload; | ||
41 | uint32_t status; | ||
42 | }; | ||
43 | |||
44 | /* Coherent aligned block for bounce buffering. */ | ||
45 | struct dcp_coherent_block { | ||
46 | uint8_t aes_in_buf[DCP_BUF_SZ]; | ||
47 | uint8_t aes_out_buf[DCP_BUF_SZ]; | ||
48 | uint8_t sha_in_buf[DCP_BUF_SZ]; | ||
49 | |||
50 | uint8_t aes_key[2 * AES_KEYSIZE_128]; | ||
51 | uint8_t sha_digest[SHA256_DIGEST_SIZE]; | ||
52 | |||
53 | struct dcp_dma_desc desc[DCP_MAX_CHANS]; | ||
54 | }; | ||
55 | |||
56 | struct dcp { | ||
57 | struct device *dev; | ||
58 | void __iomem *base; | ||
59 | |||
60 | uint32_t caps; | ||
61 | |||
62 | struct dcp_coherent_block *coh; | ||
63 | |||
64 | struct completion completion[DCP_MAX_CHANS]; | ||
65 | struct mutex mutex[DCP_MAX_CHANS]; | ||
66 | struct task_struct *thread[DCP_MAX_CHANS]; | ||
67 | struct crypto_queue queue[DCP_MAX_CHANS]; | ||
68 | }; | ||
69 | |||
70 | enum dcp_chan { | ||
71 | DCP_CHAN_HASH_SHA = 0, | ||
72 | DCP_CHAN_CRYPTO = 2, | ||
73 | }; | ||
74 | |||
75 | struct dcp_async_ctx { | ||
76 | /* Common context */ | ||
77 | enum dcp_chan chan; | ||
78 | uint32_t fill; | ||
79 | |||
80 | /* SHA Hash-specific context */ | ||
81 | struct mutex mutex; | ||
82 | uint32_t alg; | ||
83 | unsigned int hot:1; | ||
84 | |||
85 | /* Crypto-specific context */ | ||
86 | unsigned int enc:1; | ||
87 | unsigned int ecb:1; | ||
88 | struct crypto_ablkcipher *fallback; | ||
89 | unsigned int key_len; | ||
90 | uint8_t key[AES_KEYSIZE_128]; | ||
91 | }; | ||
92 | |||
93 | struct dcp_sha_req_ctx { | ||
94 | unsigned int init:1; | ||
95 | unsigned int fini:1; | ||
96 | }; | ||
97 | |||
98 | /* | ||
99 | * There can even be only one instance of the MXS DCP due to the | ||
100 | * design of Linux Crypto API. | ||
101 | */ | ||
102 | static struct dcp *global_sdcp; | ||
103 | static DEFINE_MUTEX(global_mutex); | ||
104 | |||
105 | /* DCP register layout. */ | ||
106 | #define MXS_DCP_CTRL 0x00 | ||
107 | #define MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES (1 << 23) | ||
108 | #define MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING (1 << 22) | ||
109 | |||
110 | #define MXS_DCP_STAT 0x10 | ||
111 | #define MXS_DCP_STAT_CLR 0x18 | ||
112 | #define MXS_DCP_STAT_IRQ_MASK 0xf | ||
113 | |||
114 | #define MXS_DCP_CHANNELCTRL 0x20 | ||
115 | #define MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK 0xff | ||
116 | |||
117 | #define MXS_DCP_CAPABILITY1 0x40 | ||
118 | #define MXS_DCP_CAPABILITY1_SHA256 (4 << 16) | ||
119 | #define MXS_DCP_CAPABILITY1_SHA1 (1 << 16) | ||
120 | #define MXS_DCP_CAPABILITY1_AES128 (1 << 0) | ||
121 | |||
122 | #define MXS_DCP_CONTEXT 0x50 | ||
123 | |||
124 | #define MXS_DCP_CH_N_CMDPTR(n) (0x100 + ((n) * 0x40)) | ||
125 | |||
126 | #define MXS_DCP_CH_N_SEMA(n) (0x110 + ((n) * 0x40)) | ||
127 | |||
128 | #define MXS_DCP_CH_N_STAT(n) (0x120 + ((n) * 0x40)) | ||
129 | #define MXS_DCP_CH_N_STAT_CLR(n) (0x128 + ((n) * 0x40)) | ||
130 | |||
131 | /* DMA descriptor bits. */ | ||
132 | #define MXS_DCP_CONTROL0_HASH_TERM (1 << 13) | ||
133 | #define MXS_DCP_CONTROL0_HASH_INIT (1 << 12) | ||
134 | #define MXS_DCP_CONTROL0_PAYLOAD_KEY (1 << 11) | ||
135 | #define MXS_DCP_CONTROL0_CIPHER_ENCRYPT (1 << 8) | ||
136 | #define MXS_DCP_CONTROL0_CIPHER_INIT (1 << 9) | ||
137 | #define MXS_DCP_CONTROL0_ENABLE_HASH (1 << 6) | ||
138 | #define MXS_DCP_CONTROL0_ENABLE_CIPHER (1 << 5) | ||
139 | #define MXS_DCP_CONTROL0_DECR_SEMAPHORE (1 << 1) | ||
140 | #define MXS_DCP_CONTROL0_INTERRUPT (1 << 0) | ||
141 | |||
142 | #define MXS_DCP_CONTROL1_HASH_SELECT_SHA256 (2 << 16) | ||
143 | #define MXS_DCP_CONTROL1_HASH_SELECT_SHA1 (0 << 16) | ||
144 | #define MXS_DCP_CONTROL1_CIPHER_MODE_CBC (1 << 4) | ||
145 | #define MXS_DCP_CONTROL1_CIPHER_MODE_ECB (0 << 4) | ||
146 | #define MXS_DCP_CONTROL1_CIPHER_SELECT_AES128 (0 << 0) | ||
147 | |||
148 | static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) | ||
149 | { | ||
150 | struct dcp *sdcp = global_sdcp; | ||
151 | const int chan = actx->chan; | ||
152 | uint32_t stat; | ||
153 | int ret; | ||
154 | struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; | ||
155 | |||
156 | dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc), | ||
157 | DMA_TO_DEVICE); | ||
158 | |||
159 | reinit_completion(&sdcp->completion[chan]); | ||
160 | |||
161 | /* Clear status register. */ | ||
162 | writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(chan)); | ||
163 | |||
164 | /* Load the DMA descriptor. */ | ||
165 | writel(desc_phys, sdcp->base + MXS_DCP_CH_N_CMDPTR(chan)); | ||
166 | |||
167 | /* Increment the semaphore to start the DMA transfer. */ | ||
168 | writel(1, sdcp->base + MXS_DCP_CH_N_SEMA(chan)); | ||
169 | |||
170 | ret = wait_for_completion_timeout(&sdcp->completion[chan], | ||
171 | msecs_to_jiffies(1000)); | ||
172 | if (!ret) { | ||
173 | dev_err(sdcp->dev, "Channel %i timeout (DCP_STAT=0x%08x)\n", | ||
174 | chan, readl(sdcp->base + MXS_DCP_STAT)); | ||
175 | return -ETIMEDOUT; | ||
176 | } | ||
177 | |||
178 | stat = readl(sdcp->base + MXS_DCP_CH_N_STAT(chan)); | ||
179 | if (stat & 0xff) { | ||
180 | dev_err(sdcp->dev, "Channel %i error (CH_STAT=0x%08x)\n", | ||
181 | chan, stat); | ||
182 | return -EINVAL; | ||
183 | } | ||
184 | |||
185 | dma_unmap_single(sdcp->dev, desc_phys, sizeof(*desc), DMA_TO_DEVICE); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Encryption (AES128) | ||
192 | */ | ||
193 | static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init) | ||
194 | { | ||
195 | struct dcp *sdcp = global_sdcp; | ||
196 | struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; | ||
197 | int ret; | ||
198 | |||
199 | dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, | ||
200 | 2 * AES_KEYSIZE_128, | ||
201 | DMA_TO_DEVICE); | ||
202 | dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, | ||
203 | DCP_BUF_SZ, DMA_TO_DEVICE); | ||
204 | dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, | ||
205 | DCP_BUF_SZ, DMA_FROM_DEVICE); | ||
206 | |||
207 | /* Fill in the DMA descriptor. */ | ||
208 | desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | | ||
209 | MXS_DCP_CONTROL0_INTERRUPT | | ||
210 | MXS_DCP_CONTROL0_ENABLE_CIPHER; | ||
211 | |||
212 | /* Payload contains the key. */ | ||
213 | desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY; | ||
214 | |||
215 | if (actx->enc) | ||
216 | desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT; | ||
217 | if (init) | ||
218 | desc->control0 |= MXS_DCP_CONTROL0_CIPHER_INIT; | ||
219 | |||
220 | desc->control1 = MXS_DCP_CONTROL1_CIPHER_SELECT_AES128; | ||
221 | |||
222 | if (actx->ecb) | ||
223 | desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_ECB; | ||
224 | else | ||
225 | desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC; | ||
226 | |||
227 | desc->next_cmd_addr = 0; | ||
228 | desc->source = src_phys; | ||
229 | desc->destination = dst_phys; | ||
230 | desc->size = actx->fill; | ||
231 | desc->payload = key_phys; | ||
232 | desc->status = 0; | ||
233 | |||
234 | ret = mxs_dcp_start_dma(actx); | ||
235 | |||
236 | dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, | ||
237 | DMA_TO_DEVICE); | ||
238 | dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); | ||
239 | dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE); | ||
240 | |||
241 | return ret; | ||
242 | } | ||
243 | |||
244 | static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) | ||
245 | { | ||
246 | struct dcp *sdcp = global_sdcp; | ||
247 | |||
248 | struct ablkcipher_request *req = ablkcipher_request_cast(arq); | ||
249 | struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm); | ||
250 | |||
251 | struct scatterlist *dst = req->dst; | ||
252 | struct scatterlist *src = req->src; | ||
253 | const int nents = sg_nents(req->src); | ||
254 | |||
255 | const int out_off = DCP_BUF_SZ; | ||
256 | uint8_t *in_buf = sdcp->coh->aes_in_buf; | ||
257 | uint8_t *out_buf = sdcp->coh->aes_out_buf; | ||
258 | |||
259 | uint8_t *out_tmp, *src_buf, *dst_buf = NULL; | ||
260 | uint32_t dst_off = 0; | ||
261 | |||
262 | uint8_t *key = sdcp->coh->aes_key; | ||
263 | |||
264 | int ret = 0; | ||
265 | int split = 0; | ||
266 | unsigned int i, len, clen, rem = 0; | ||
267 | int init = 0; | ||
268 | |||
269 | actx->fill = 0; | ||
270 | |||
271 | /* Copy the key from the temporary location. */ | ||
272 | memcpy(key, actx->key, actx->key_len); | ||
273 | |||
274 | if (!actx->ecb) { | ||
275 | /* Copy the CBC IV just past the key. */ | ||
276 | memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128); | ||
277 | /* CBC needs the INIT set. */ | ||
278 | init = 1; | ||
279 | } else { | ||
280 | memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128); | ||
281 | } | ||
282 | |||
283 | for_each_sg(req->src, src, nents, i) { | ||
284 | src_buf = sg_virt(src); | ||
285 | len = sg_dma_len(src); | ||
286 | |||
287 | do { | ||
288 | if (actx->fill + len > out_off) | ||
289 | clen = out_off - actx->fill; | ||
290 | else | ||
291 | clen = len; | ||
292 | |||
293 | memcpy(in_buf + actx->fill, src_buf, clen); | ||
294 | len -= clen; | ||
295 | src_buf += clen; | ||
296 | actx->fill += clen; | ||
297 | |||
298 | /* | ||
299 | * If we filled the buffer or this is the last SG, | ||
300 | * submit the buffer. | ||
301 | */ | ||
302 | if (actx->fill == out_off || sg_is_last(src)) { | ||
303 | ret = mxs_dcp_run_aes(actx, init); | ||
304 | if (ret) | ||
305 | return ret; | ||
306 | init = 0; | ||
307 | |||
308 | out_tmp = out_buf; | ||
309 | while (dst && actx->fill) { | ||
310 | if (!split) { | ||
311 | dst_buf = sg_virt(dst); | ||
312 | dst_off = 0; | ||
313 | } | ||
314 | rem = min(sg_dma_len(dst) - dst_off, | ||
315 | actx->fill); | ||
316 | |||
317 | memcpy(dst_buf + dst_off, out_tmp, rem); | ||
318 | out_tmp += rem; | ||
319 | dst_off += rem; | ||
320 | actx->fill -= rem; | ||
321 | |||
322 | if (dst_off == sg_dma_len(dst)) { | ||
323 | dst = sg_next(dst); | ||
324 | split = 0; | ||
325 | } else { | ||
326 | split = 1; | ||
327 | } | ||
328 | } | ||
329 | } | ||
330 | } while (len); | ||
331 | } | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static int dcp_chan_thread_aes(void *data) | ||
337 | { | ||
338 | struct dcp *sdcp = global_sdcp; | ||
339 | const int chan = DCP_CHAN_CRYPTO; | ||
340 | |||
341 | struct crypto_async_request *backlog; | ||
342 | struct crypto_async_request *arq; | ||
343 | |||
344 | int ret; | ||
345 | |||
346 | do { | ||
347 | __set_current_state(TASK_INTERRUPTIBLE); | ||
348 | |||
349 | mutex_lock(&sdcp->mutex[chan]); | ||
350 | backlog = crypto_get_backlog(&sdcp->queue[chan]); | ||
351 | arq = crypto_dequeue_request(&sdcp->queue[chan]); | ||
352 | mutex_unlock(&sdcp->mutex[chan]); | ||
353 | |||
354 | if (backlog) | ||
355 | backlog->complete(backlog, -EINPROGRESS); | ||
356 | |||
357 | if (arq) { | ||
358 | ret = mxs_dcp_aes_block_crypt(arq); | ||
359 | arq->complete(arq, ret); | ||
360 | continue; | ||
361 | } | ||
362 | |||
363 | schedule(); | ||
364 | } while (!kthread_should_stop()); | ||
365 | |||
366 | return 0; | ||
367 | } | ||
368 | |||
369 | static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) | ||
370 | { | ||
371 | struct crypto_tfm *tfm = | ||
372 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
373 | struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx( | ||
374 | crypto_ablkcipher_reqtfm(req)); | ||
375 | int ret; | ||
376 | |||
377 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
378 | |||
379 | if (enc) | ||
380 | ret = crypto_ablkcipher_encrypt(req); | ||
381 | else | ||
382 | ret = crypto_ablkcipher_decrypt(req); | ||
383 | |||
384 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
385 | |||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb) | ||
390 | { | ||
391 | struct dcp *sdcp = global_sdcp; | ||
392 | struct crypto_async_request *arq = &req->base; | ||
393 | struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm); | ||
394 | int ret; | ||
395 | |||
396 | if (unlikely(actx->key_len != AES_KEYSIZE_128)) | ||
397 | return mxs_dcp_block_fallback(req, enc); | ||
398 | |||
399 | actx->enc = enc; | ||
400 | actx->ecb = ecb; | ||
401 | actx->chan = DCP_CHAN_CRYPTO; | ||
402 | |||
403 | mutex_lock(&sdcp->mutex[actx->chan]); | ||
404 | ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); | ||
405 | mutex_unlock(&sdcp->mutex[actx->chan]); | ||
406 | |||
407 | wake_up_process(sdcp->thread[actx->chan]); | ||
408 | |||
409 | return -EINPROGRESS; | ||
410 | } | ||
411 | |||
412 | static int mxs_dcp_aes_ecb_decrypt(struct ablkcipher_request *req) | ||
413 | { | ||
414 | return mxs_dcp_aes_enqueue(req, 0, 1); | ||
415 | } | ||
416 | |||
417 | static int mxs_dcp_aes_ecb_encrypt(struct ablkcipher_request *req) | ||
418 | { | ||
419 | return mxs_dcp_aes_enqueue(req, 1, 1); | ||
420 | } | ||
421 | |||
422 | static int mxs_dcp_aes_cbc_decrypt(struct ablkcipher_request *req) | ||
423 | { | ||
424 | return mxs_dcp_aes_enqueue(req, 0, 0); | ||
425 | } | ||
426 | |||
427 | static int mxs_dcp_aes_cbc_encrypt(struct ablkcipher_request *req) | ||
428 | { | ||
429 | return mxs_dcp_aes_enqueue(req, 1, 0); | ||
430 | } | ||
431 | |||
432 | static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
433 | unsigned int len) | ||
434 | { | ||
435 | struct dcp_async_ctx *actx = crypto_ablkcipher_ctx(tfm); | ||
436 | unsigned int ret; | ||
437 | |||
438 | /* | ||
439 | * AES 128 is supposed by the hardware, store key into temporary | ||
440 | * buffer and exit. We must use the temporary buffer here, since | ||
441 | * there can still be an operation in progress. | ||
442 | */ | ||
443 | actx->key_len = len; | ||
444 | if (len == AES_KEYSIZE_128) { | ||
445 | memcpy(actx->key, key, len); | ||
446 | return 0; | ||
447 | } | ||
448 | |||
449 | /* Check if the key size is supported by kernel at all. */ | ||
450 | if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) { | ||
451 | tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * If the requested AES key size is not supported by the hardware, | ||
457 | * but is supported by in-kernel software implementation, we use | ||
458 | * software fallback. | ||
459 | */ | ||
460 | actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; | ||
461 | actx->fallback->base.crt_flags |= | ||
462 | tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK; | ||
463 | |||
464 | ret = crypto_ablkcipher_setkey(actx->fallback, key, len); | ||
465 | if (!ret) | ||
466 | return 0; | ||
467 | |||
468 | tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; | ||
469 | tfm->base.crt_flags |= | ||
470 | actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK; | ||
471 | |||
472 | return ret; | ||
473 | } | ||
474 | |||
475 | static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) | ||
476 | { | ||
477 | const char *name = tfm->__crt_alg->cra_name; | ||
478 | const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; | ||
479 | struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); | ||
480 | struct crypto_ablkcipher *blk; | ||
481 | |||
482 | blk = crypto_alloc_ablkcipher(name, 0, flags); | ||
483 | if (IS_ERR(blk)) | ||
484 | return PTR_ERR(blk); | ||
485 | |||
486 | actx->fallback = blk; | ||
487 | tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_async_ctx); | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) | ||
492 | { | ||
493 | struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); | ||
494 | |||
495 | crypto_free_ablkcipher(actx->fallback); | ||
496 | actx->fallback = NULL; | ||
497 | } | ||
498 | |||
499 | /* | ||
500 | * Hashing (SHA1/SHA256) | ||
501 | */ | ||
502 | static int mxs_dcp_run_sha(struct ahash_request *req) | ||
503 | { | ||
504 | struct dcp *sdcp = global_sdcp; | ||
505 | int ret; | ||
506 | |||
507 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
508 | struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); | ||
509 | struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
510 | |||
511 | struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; | ||
512 | dma_addr_t digest_phys = dma_map_single(sdcp->dev, | ||
513 | sdcp->coh->sha_digest, | ||
514 | SHA256_DIGEST_SIZE, | ||
515 | DMA_FROM_DEVICE); | ||
516 | |||
517 | dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, | ||
518 | DCP_BUF_SZ, DMA_TO_DEVICE); | ||
519 | |||
520 | /* Fill in the DMA descriptor. */ | ||
521 | desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | | ||
522 | MXS_DCP_CONTROL0_INTERRUPT | | ||
523 | MXS_DCP_CONTROL0_ENABLE_HASH; | ||
524 | if (rctx->init) | ||
525 | desc->control0 |= MXS_DCP_CONTROL0_HASH_INIT; | ||
526 | |||
527 | desc->control1 = actx->alg; | ||
528 | desc->next_cmd_addr = 0; | ||
529 | desc->source = buf_phys; | ||
530 | desc->destination = 0; | ||
531 | desc->size = actx->fill; | ||
532 | desc->payload = 0; | ||
533 | desc->status = 0; | ||
534 | |||
535 | /* Set HASH_TERM bit for last transfer block. */ | ||
536 | if (rctx->fini) { | ||
537 | desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; | ||
538 | desc->payload = digest_phys; | ||
539 | } | ||
540 | |||
541 | ret = mxs_dcp_start_dma(actx); | ||
542 | |||
543 | dma_unmap_single(sdcp->dev, digest_phys, SHA256_DIGEST_SIZE, | ||
544 | DMA_FROM_DEVICE); | ||
545 | dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE); | ||
546 | |||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | static int dcp_sha_req_to_buf(struct crypto_async_request *arq) | ||
551 | { | ||
552 | struct dcp *sdcp = global_sdcp; | ||
553 | |||
554 | struct ahash_request *req = ahash_request_cast(arq); | ||
555 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
556 | struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); | ||
557 | struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
558 | struct hash_alg_common *halg = crypto_hash_alg_common(tfm); | ||
559 | const int nents = sg_nents(req->src); | ||
560 | |||
561 | uint8_t *digest = sdcp->coh->sha_digest; | ||
562 | uint8_t *in_buf = sdcp->coh->sha_in_buf; | ||
563 | |||
564 | uint8_t *src_buf; | ||
565 | |||
566 | struct scatterlist *src; | ||
567 | |||
568 | unsigned int i, len, clen; | ||
569 | int ret; | ||
570 | |||
571 | int fin = rctx->fini; | ||
572 | if (fin) | ||
573 | rctx->fini = 0; | ||
574 | |||
575 | for_each_sg(req->src, src, nents, i) { | ||
576 | src_buf = sg_virt(src); | ||
577 | len = sg_dma_len(src); | ||
578 | |||
579 | do { | ||
580 | if (actx->fill + len > DCP_BUF_SZ) | ||
581 | clen = DCP_BUF_SZ - actx->fill; | ||
582 | else | ||
583 | clen = len; | ||
584 | |||
585 | memcpy(in_buf + actx->fill, src_buf, clen); | ||
586 | len -= clen; | ||
587 | src_buf += clen; | ||
588 | actx->fill += clen; | ||
589 | |||
590 | /* | ||
591 | * If we filled the buffer and still have some | ||
592 | * more data, submit the buffer. | ||
593 | */ | ||
594 | if (len && actx->fill == DCP_BUF_SZ) { | ||
595 | ret = mxs_dcp_run_sha(req); | ||
596 | if (ret) | ||
597 | return ret; | ||
598 | actx->fill = 0; | ||
599 | rctx->init = 0; | ||
600 | } | ||
601 | } while (len); | ||
602 | } | ||
603 | |||
604 | if (fin) { | ||
605 | rctx->fini = 1; | ||
606 | |||
607 | /* Submit whatever is left. */ | ||
608 | ret = mxs_dcp_run_sha(req); | ||
609 | if (ret || !req->result) | ||
610 | return ret; | ||
611 | actx->fill = 0; | ||
612 | |||
613 | /* For some reason, the result is flipped. */ | ||
614 | for (i = 0; i < halg->digestsize; i++) | ||
615 | req->result[i] = digest[halg->digestsize - i - 1]; | ||
616 | } | ||
617 | |||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | static int dcp_chan_thread_sha(void *data) | ||
622 | { | ||
623 | struct dcp *sdcp = global_sdcp; | ||
624 | const int chan = DCP_CHAN_HASH_SHA; | ||
625 | |||
626 | struct crypto_async_request *backlog; | ||
627 | struct crypto_async_request *arq; | ||
628 | |||
629 | struct dcp_sha_req_ctx *rctx; | ||
630 | |||
631 | struct ahash_request *req; | ||
632 | int ret, fini; | ||
633 | |||
634 | do { | ||
635 | __set_current_state(TASK_INTERRUPTIBLE); | ||
636 | |||
637 | mutex_lock(&sdcp->mutex[chan]); | ||
638 | backlog = crypto_get_backlog(&sdcp->queue[chan]); | ||
639 | arq = crypto_dequeue_request(&sdcp->queue[chan]); | ||
640 | mutex_unlock(&sdcp->mutex[chan]); | ||
641 | |||
642 | if (backlog) | ||
643 | backlog->complete(backlog, -EINPROGRESS); | ||
644 | |||
645 | if (arq) { | ||
646 | req = ahash_request_cast(arq); | ||
647 | rctx = ahash_request_ctx(req); | ||
648 | |||
649 | ret = dcp_sha_req_to_buf(arq); | ||
650 | fini = rctx->fini; | ||
651 | arq->complete(arq, ret); | ||
652 | if (!fini) | ||
653 | continue; | ||
654 | } | ||
655 | |||
656 | schedule(); | ||
657 | } while (!kthread_should_stop()); | ||
658 | |||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | static int dcp_sha_init(struct ahash_request *req) | ||
663 | { | ||
664 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
665 | struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); | ||
666 | |||
667 | struct hash_alg_common *halg = crypto_hash_alg_common(tfm); | ||
668 | |||
669 | /* | ||
670 | * Start hashing session. The code below only inits the | ||
671 | * hashing session context, nothing more. | ||
672 | */ | ||
673 | memset(actx, 0, sizeof(*actx)); | ||
674 | |||
675 | if (strcmp(halg->base.cra_name, "sha1") == 0) | ||
676 | actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA1; | ||
677 | else | ||
678 | actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA256; | ||
679 | |||
680 | actx->fill = 0; | ||
681 | actx->hot = 0; | ||
682 | actx->chan = DCP_CHAN_HASH_SHA; | ||
683 | |||
684 | mutex_init(&actx->mutex); | ||
685 | |||
686 | return 0; | ||
687 | } | ||
688 | |||
689 | static int dcp_sha_update_fx(struct ahash_request *req, int fini) | ||
690 | { | ||
691 | struct dcp *sdcp = global_sdcp; | ||
692 | |||
693 | struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); | ||
694 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
695 | struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); | ||
696 | |||
697 | int ret; | ||
698 | |||
699 | /* | ||
700 | * Ignore requests that have no data in them and are not | ||
701 | * the trailing requests in the stream of requests. | ||
702 | */ | ||
703 | if (!req->nbytes && !fini) | ||
704 | return 0; | ||
705 | |||
706 | mutex_lock(&actx->mutex); | ||
707 | |||
708 | rctx->fini = fini; | ||
709 | |||
710 | if (!actx->hot) { | ||
711 | actx->hot = 1; | ||
712 | rctx->init = 1; | ||
713 | } | ||
714 | |||
715 | mutex_lock(&sdcp->mutex[actx->chan]); | ||
716 | ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); | ||
717 | mutex_unlock(&sdcp->mutex[actx->chan]); | ||
718 | |||
719 | wake_up_process(sdcp->thread[actx->chan]); | ||
720 | mutex_unlock(&actx->mutex); | ||
721 | |||
722 | return -EINPROGRESS; | ||
723 | } | ||
724 | |||
725 | static int dcp_sha_update(struct ahash_request *req) | ||
726 | { | ||
727 | return dcp_sha_update_fx(req, 0); | ||
728 | } | ||
729 | |||
730 | static int dcp_sha_final(struct ahash_request *req) | ||
731 | { | ||
732 | ahash_request_set_crypt(req, NULL, req->result, 0); | ||
733 | req->nbytes = 0; | ||
734 | return dcp_sha_update_fx(req, 1); | ||
735 | } | ||
736 | |||
737 | static int dcp_sha_finup(struct ahash_request *req) | ||
738 | { | ||
739 | return dcp_sha_update_fx(req, 1); | ||
740 | } | ||
741 | |||
742 | static int dcp_sha_digest(struct ahash_request *req) | ||
743 | { | ||
744 | int ret; | ||
745 | |||
746 | ret = dcp_sha_init(req); | ||
747 | if (ret) | ||
748 | return ret; | ||
749 | |||
750 | return dcp_sha_finup(req); | ||
751 | } | ||
752 | |||
753 | static int dcp_sha_cra_init(struct crypto_tfm *tfm) | ||
754 | { | ||
755 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
756 | sizeof(struct dcp_sha_req_ctx)); | ||
757 | return 0; | ||
758 | } | ||
759 | |||
760 | static void dcp_sha_cra_exit(struct crypto_tfm *tfm) | ||
761 | { | ||
762 | } | ||
763 | |||
764 | /* AES 128 ECB and AES 128 CBC */ | ||
765 | static struct crypto_alg dcp_aes_algs[] = { | ||
766 | { | ||
767 | .cra_name = "ecb(aes)", | ||
768 | .cra_driver_name = "ecb-aes-dcp", | ||
769 | .cra_priority = 400, | ||
770 | .cra_alignmask = 15, | ||
771 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
772 | CRYPTO_ALG_ASYNC | | ||
773 | CRYPTO_ALG_NEED_FALLBACK, | ||
774 | .cra_init = mxs_dcp_aes_fallback_init, | ||
775 | .cra_exit = mxs_dcp_aes_fallback_exit, | ||
776 | .cra_blocksize = AES_BLOCK_SIZE, | ||
777 | .cra_ctxsize = sizeof(struct dcp_async_ctx), | ||
778 | .cra_type = &crypto_ablkcipher_type, | ||
779 | .cra_module = THIS_MODULE, | ||
780 | .cra_u = { | ||
781 | .ablkcipher = { | ||
782 | .min_keysize = AES_MIN_KEY_SIZE, | ||
783 | .max_keysize = AES_MAX_KEY_SIZE, | ||
784 | .setkey = mxs_dcp_aes_setkey, | ||
785 | .encrypt = mxs_dcp_aes_ecb_encrypt, | ||
786 | .decrypt = mxs_dcp_aes_ecb_decrypt | ||
787 | }, | ||
788 | }, | ||
789 | }, { | ||
790 | .cra_name = "cbc(aes)", | ||
791 | .cra_driver_name = "cbc-aes-dcp", | ||
792 | .cra_priority = 400, | ||
793 | .cra_alignmask = 15, | ||
794 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
795 | CRYPTO_ALG_ASYNC | | ||
796 | CRYPTO_ALG_NEED_FALLBACK, | ||
797 | .cra_init = mxs_dcp_aes_fallback_init, | ||
798 | .cra_exit = mxs_dcp_aes_fallback_exit, | ||
799 | .cra_blocksize = AES_BLOCK_SIZE, | ||
800 | .cra_ctxsize = sizeof(struct dcp_async_ctx), | ||
801 | .cra_type = &crypto_ablkcipher_type, | ||
802 | .cra_module = THIS_MODULE, | ||
803 | .cra_u = { | ||
804 | .ablkcipher = { | ||
805 | .min_keysize = AES_MIN_KEY_SIZE, | ||
806 | .max_keysize = AES_MAX_KEY_SIZE, | ||
807 | .setkey = mxs_dcp_aes_setkey, | ||
808 | .encrypt = mxs_dcp_aes_cbc_encrypt, | ||
809 | .decrypt = mxs_dcp_aes_cbc_decrypt, | ||
810 | .ivsize = AES_BLOCK_SIZE, | ||
811 | }, | ||
812 | }, | ||
813 | }, | ||
814 | }; | ||
815 | |||
816 | /* SHA1 */ | ||
817 | static struct ahash_alg dcp_sha1_alg = { | ||
818 | .init = dcp_sha_init, | ||
819 | .update = dcp_sha_update, | ||
820 | .final = dcp_sha_final, | ||
821 | .finup = dcp_sha_finup, | ||
822 | .digest = dcp_sha_digest, | ||
823 | .halg = { | ||
824 | .digestsize = SHA1_DIGEST_SIZE, | ||
825 | .base = { | ||
826 | .cra_name = "sha1", | ||
827 | .cra_driver_name = "sha1-dcp", | ||
828 | .cra_priority = 400, | ||
829 | .cra_alignmask = 63, | ||
830 | .cra_flags = CRYPTO_ALG_ASYNC, | ||
831 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
832 | .cra_ctxsize = sizeof(struct dcp_async_ctx), | ||
833 | .cra_module = THIS_MODULE, | ||
834 | .cra_init = dcp_sha_cra_init, | ||
835 | .cra_exit = dcp_sha_cra_exit, | ||
836 | }, | ||
837 | }, | ||
838 | }; | ||
839 | |||
840 | /* SHA256 */ | ||
841 | static struct ahash_alg dcp_sha256_alg = { | ||
842 | .init = dcp_sha_init, | ||
843 | .update = dcp_sha_update, | ||
844 | .final = dcp_sha_final, | ||
845 | .finup = dcp_sha_finup, | ||
846 | .digest = dcp_sha_digest, | ||
847 | .halg = { | ||
848 | .digestsize = SHA256_DIGEST_SIZE, | ||
849 | .base = { | ||
850 | .cra_name = "sha256", | ||
851 | .cra_driver_name = "sha256-dcp", | ||
852 | .cra_priority = 400, | ||
853 | .cra_alignmask = 63, | ||
854 | .cra_flags = CRYPTO_ALG_ASYNC, | ||
855 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
856 | .cra_ctxsize = sizeof(struct dcp_async_ctx), | ||
857 | .cra_module = THIS_MODULE, | ||
858 | .cra_init = dcp_sha_cra_init, | ||
859 | .cra_exit = dcp_sha_cra_exit, | ||
860 | }, | ||
861 | }, | ||
862 | }; | ||
863 | |||
864 | static irqreturn_t mxs_dcp_irq(int irq, void *context) | ||
865 | { | ||
866 | struct dcp *sdcp = context; | ||
867 | uint32_t stat; | ||
868 | int i; | ||
869 | |||
870 | stat = readl(sdcp->base + MXS_DCP_STAT); | ||
871 | stat &= MXS_DCP_STAT_IRQ_MASK; | ||
872 | if (!stat) | ||
873 | return IRQ_NONE; | ||
874 | |||
875 | /* Clear the interrupts. */ | ||
876 | writel(stat, sdcp->base + MXS_DCP_STAT_CLR); | ||
877 | |||
878 | /* Complete the DMA requests that finished. */ | ||
879 | for (i = 0; i < DCP_MAX_CHANS; i++) | ||
880 | if (stat & (1 << i)) | ||
881 | complete(&sdcp->completion[i]); | ||
882 | |||
883 | return IRQ_HANDLED; | ||
884 | } | ||
885 | |||
886 | static int mxs_dcp_probe(struct platform_device *pdev) | ||
887 | { | ||
888 | struct device *dev = &pdev->dev; | ||
889 | struct dcp *sdcp = NULL; | ||
890 | int i, ret; | ||
891 | |||
892 | struct resource *iores; | ||
893 | int dcp_vmi_irq, dcp_irq; | ||
894 | |||
895 | mutex_lock(&global_mutex); | ||
896 | if (global_sdcp) { | ||
897 | dev_err(dev, "Only one DCP instance allowed!\n"); | ||
898 | ret = -ENODEV; | ||
899 | goto err_mutex; | ||
900 | } | ||
901 | |||
902 | iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
903 | dcp_vmi_irq = platform_get_irq(pdev, 0); | ||
904 | dcp_irq = platform_get_irq(pdev, 1); | ||
905 | if (dcp_vmi_irq < 0 || dcp_irq < 0) { | ||
906 | ret = -EINVAL; | ||
907 | goto err_mutex; | ||
908 | } | ||
909 | |||
910 | sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL); | ||
911 | if (!sdcp) { | ||
912 | ret = -ENOMEM; | ||
913 | goto err_mutex; | ||
914 | } | ||
915 | |||
916 | sdcp->dev = dev; | ||
917 | sdcp->base = devm_ioremap_resource(dev, iores); | ||
918 | if (IS_ERR(sdcp->base)) { | ||
919 | ret = PTR_ERR(sdcp->base); | ||
920 | goto err_mutex; | ||
921 | } | ||
922 | |||
923 | ret = devm_request_irq(dev, dcp_vmi_irq, mxs_dcp_irq, 0, | ||
924 | "dcp-vmi-irq", sdcp); | ||
925 | if (ret) { | ||
926 | dev_err(dev, "Failed to claim DCP VMI IRQ!\n"); | ||
927 | goto err_mutex; | ||
928 | } | ||
929 | |||
930 | ret = devm_request_irq(dev, dcp_irq, mxs_dcp_irq, 0, | ||
931 | "dcp-irq", sdcp); | ||
932 | if (ret) { | ||
933 | dev_err(dev, "Failed to claim DCP IRQ!\n"); | ||
934 | goto err_mutex; | ||
935 | } | ||
936 | |||
937 | /* Allocate coherent helper block. */ | ||
938 | sdcp->coh = kzalloc(sizeof(struct dcp_coherent_block), GFP_KERNEL); | ||
939 | if (!sdcp->coh) { | ||
940 | dev_err(dev, "Error allocating coherent block\n"); | ||
941 | ret = -ENOMEM; | ||
942 | goto err_mutex; | ||
943 | } | ||
944 | |||
945 | /* Restart the DCP block. */ | ||
946 | stmp_reset_block(sdcp->base); | ||
947 | |||
948 | /* Initialize control register. */ | ||
949 | writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES | | ||
950 | MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING | 0xf, | ||
951 | sdcp->base + MXS_DCP_CTRL); | ||
952 | |||
953 | /* Enable all DCP DMA channels. */ | ||
954 | writel(MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK, | ||
955 | sdcp->base + MXS_DCP_CHANNELCTRL); | ||
956 | |||
957 | /* | ||
958 | * We do not enable context switching. Give the context buffer a | ||
959 | * pointer to an illegal address so if context switching is | ||
960 | * inadvertantly enabled, the DCP will return an error instead of | ||
961 | * trashing good memory. The DCP DMA cannot access ROM, so any ROM | ||
962 | * address will do. | ||
963 | */ | ||
964 | writel(0xffff0000, sdcp->base + MXS_DCP_CONTEXT); | ||
965 | for (i = 0; i < DCP_MAX_CHANS; i++) | ||
966 | writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(i)); | ||
967 | writel(0xffffffff, sdcp->base + MXS_DCP_STAT_CLR); | ||
968 | |||
969 | global_sdcp = sdcp; | ||
970 | |||
971 | platform_set_drvdata(pdev, sdcp); | ||
972 | |||
973 | for (i = 0; i < DCP_MAX_CHANS; i++) { | ||
974 | mutex_init(&sdcp->mutex[i]); | ||
975 | init_completion(&sdcp->completion[i]); | ||
976 | crypto_init_queue(&sdcp->queue[i], 50); | ||
977 | } | ||
978 | |||
979 | /* Create the SHA and AES handler threads. */ | ||
980 | sdcp->thread[DCP_CHAN_HASH_SHA] = kthread_run(dcp_chan_thread_sha, | ||
981 | NULL, "mxs_dcp_chan/sha"); | ||
982 | if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) { | ||
983 | dev_err(dev, "Error starting SHA thread!\n"); | ||
984 | ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]); | ||
985 | goto err_free_coherent; | ||
986 | } | ||
987 | |||
988 | sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes, | ||
989 | NULL, "mxs_dcp_chan/aes"); | ||
990 | if (IS_ERR(sdcp->thread[DCP_CHAN_CRYPTO])) { | ||
991 | dev_err(dev, "Error starting SHA thread!\n"); | ||
992 | ret = PTR_ERR(sdcp->thread[DCP_CHAN_CRYPTO]); | ||
993 | goto err_destroy_sha_thread; | ||
994 | } | ||
995 | |||
996 | /* Register the various crypto algorithms. */ | ||
997 | sdcp->caps = readl(sdcp->base + MXS_DCP_CAPABILITY1); | ||
998 | |||
999 | if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) { | ||
1000 | ret = crypto_register_algs(dcp_aes_algs, | ||
1001 | ARRAY_SIZE(dcp_aes_algs)); | ||
1002 | if (ret) { | ||
1003 | /* Failed to register algorithm. */ | ||
1004 | dev_err(dev, "Failed to register AES crypto!\n"); | ||
1005 | goto err_destroy_aes_thread; | ||
1006 | } | ||
1007 | } | ||
1008 | |||
1009 | if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1) { | ||
1010 | ret = crypto_register_ahash(&dcp_sha1_alg); | ||
1011 | if (ret) { | ||
1012 | dev_err(dev, "Failed to register %s hash!\n", | ||
1013 | dcp_sha1_alg.halg.base.cra_name); | ||
1014 | goto err_unregister_aes; | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256) { | ||
1019 | ret = crypto_register_ahash(&dcp_sha256_alg); | ||
1020 | if (ret) { | ||
1021 | dev_err(dev, "Failed to register %s hash!\n", | ||
1022 | dcp_sha256_alg.halg.base.cra_name); | ||
1023 | goto err_unregister_sha1; | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | return 0; | ||
1028 | |||
1029 | err_unregister_sha1: | ||
1030 | if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1) | ||
1031 | crypto_unregister_ahash(&dcp_sha1_alg); | ||
1032 | |||
1033 | err_unregister_aes: | ||
1034 | if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) | ||
1035 | crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs)); | ||
1036 | |||
1037 | err_destroy_aes_thread: | ||
1038 | kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]); | ||
1039 | |||
1040 | err_destroy_sha_thread: | ||
1041 | kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]); | ||
1042 | |||
1043 | err_free_coherent: | ||
1044 | kfree(sdcp->coh); | ||
1045 | err_mutex: | ||
1046 | mutex_unlock(&global_mutex); | ||
1047 | return ret; | ||
1048 | } | ||
1049 | |||
1050 | static int mxs_dcp_remove(struct platform_device *pdev) | ||
1051 | { | ||
1052 | struct dcp *sdcp = platform_get_drvdata(pdev); | ||
1053 | |||
1054 | kfree(sdcp->coh); | ||
1055 | |||
1056 | if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256) | ||
1057 | crypto_unregister_ahash(&dcp_sha256_alg); | ||
1058 | |||
1059 | if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1) | ||
1060 | crypto_unregister_ahash(&dcp_sha1_alg); | ||
1061 | |||
1062 | if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) | ||
1063 | crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs)); | ||
1064 | |||
1065 | kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]); | ||
1066 | kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]); | ||
1067 | |||
1068 | platform_set_drvdata(pdev, NULL); | ||
1069 | |||
1070 | mutex_lock(&global_mutex); | ||
1071 | global_sdcp = NULL; | ||
1072 | mutex_unlock(&global_mutex); | ||
1073 | |||
1074 | return 0; | ||
1075 | } | ||
1076 | |||
1077 | static const struct of_device_id mxs_dcp_dt_ids[] = { | ||
1078 | { .compatible = "fsl,imx23-dcp", .data = NULL, }, | ||
1079 | { .compatible = "fsl,imx28-dcp", .data = NULL, }, | ||
1080 | { /* sentinel */ } | ||
1081 | }; | ||
1082 | |||
1083 | MODULE_DEVICE_TABLE(of, mxs_dcp_dt_ids); | ||
1084 | |||
1085 | static struct platform_driver mxs_dcp_driver = { | ||
1086 | .probe = mxs_dcp_probe, | ||
1087 | .remove = mxs_dcp_remove, | ||
1088 | .driver = { | ||
1089 | .name = "mxs-dcp", | ||
1090 | .owner = THIS_MODULE, | ||
1091 | .of_match_table = mxs_dcp_dt_ids, | ||
1092 | }, | ||
1093 | }; | ||
1094 | |||
1095 | module_platform_driver(mxs_dcp_driver); | ||
1096 | |||
1097 | MODULE_AUTHOR("Marek Vasut <marex@denx.de>"); | ||
1098 | MODULE_DESCRIPTION("Freescale MXS DCP Driver"); | ||
1099 | MODULE_LICENSE("GPL"); | ||
1100 | MODULE_ALIAS("platform:mxs-dcp"); | ||
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index a9ccbf14096e..dde41f1df608 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c | |||
@@ -784,6 +784,7 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req) | |||
784 | static int omap_aes_cra_init(struct crypto_tfm *tfm) | 784 | static int omap_aes_cra_init(struct crypto_tfm *tfm) |
785 | { | 785 | { |
786 | struct omap_aes_dev *dd = NULL; | 786 | struct omap_aes_dev *dd = NULL; |
787 | int err; | ||
787 | 788 | ||
788 | /* Find AES device, currently picks the first device */ | 789 | /* Find AES device, currently picks the first device */ |
789 | spin_lock_bh(&list_lock); | 790 | spin_lock_bh(&list_lock); |
@@ -792,7 +793,13 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm) | |||
792 | } | 793 | } |
793 | spin_unlock_bh(&list_lock); | 794 | spin_unlock_bh(&list_lock); |
794 | 795 | ||
795 | pm_runtime_get_sync(dd->dev); | 796 | err = pm_runtime_get_sync(dd->dev); |
797 | if (err < 0) { | ||
798 | dev_err(dd->dev, "%s: failed to get_sync(%d)\n", | ||
799 | __func__, err); | ||
800 | return err; | ||
801 | } | ||
802 | |||
796 | tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); | 803 | tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); |
797 | 804 | ||
798 | return 0; | 805 | return 0; |
@@ -1182,7 +1189,12 @@ static int omap_aes_probe(struct platform_device *pdev) | |||
1182 | dd->phys_base = res.start; | 1189 | dd->phys_base = res.start; |
1183 | 1190 | ||
1184 | pm_runtime_enable(dev); | 1191 | pm_runtime_enable(dev); |
1185 | pm_runtime_get_sync(dev); | 1192 | err = pm_runtime_get_sync(dev); |
1193 | if (err < 0) { | ||
1194 | dev_err(dev, "%s: failed to get_sync(%d)\n", | ||
1195 | __func__, err); | ||
1196 | goto err_res; | ||
1197 | } | ||
1186 | 1198 | ||
1187 | omap_aes_dma_stop(dd); | 1199 | omap_aes_dma_stop(dd); |
1188 | 1200 | ||
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index e45aaaf0db30..a727a6a59653 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c | |||
@@ -789,10 +789,13 @@ static int omap_sham_update_cpu(struct omap_sham_dev *dd) | |||
789 | dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n", | 789 | dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n", |
790 | ctx->bufcnt, ctx->digcnt, final); | 790 | ctx->bufcnt, ctx->digcnt, final); |
791 | 791 | ||
792 | bufcnt = ctx->bufcnt; | 792 | if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { |
793 | ctx->bufcnt = 0; | 793 | bufcnt = ctx->bufcnt; |
794 | ctx->bufcnt = 0; | ||
795 | return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final); | ||
796 | } | ||
794 | 797 | ||
795 | return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final); | 798 | return 0; |
796 | } | 799 | } |
797 | 800 | ||
798 | static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) | 801 | static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) |
@@ -1103,6 +1106,9 @@ static int omap_sham_update(struct ahash_request *req) | |||
1103 | return 0; | 1106 | return 0; |
1104 | } | 1107 | } |
1105 | 1108 | ||
1109 | if (dd->polling_mode) | ||
1110 | ctx->flags |= BIT(FLAGS_CPU); | ||
1111 | |||
1106 | return omap_sham_enqueue(req, OP_UPDATE); | 1112 | return omap_sham_enqueue(req, OP_UPDATE); |
1107 | } | 1113 | } |
1108 | 1114 | ||
@@ -1970,7 +1976,8 @@ err_algs: | |||
1970 | crypto_unregister_ahash( | 1976 | crypto_unregister_ahash( |
1971 | &dd->pdata->algs_info[i].algs_list[j]); | 1977 | &dd->pdata->algs_info[i].algs_list[j]); |
1972 | pm_runtime_disable(dev); | 1978 | pm_runtime_disable(dev); |
1973 | dma_release_channel(dd->dma_lch); | 1979 | if (dd->dma_lch) |
1980 | dma_release_channel(dd->dma_lch); | ||
1974 | data_err: | 1981 | data_err: |
1975 | dev_err(dev, "initialization failed.\n"); | 1982 | dev_err(dev, "initialization failed.\n"); |
1976 | 1983 | ||
@@ -1994,7 +2001,9 @@ static int omap_sham_remove(struct platform_device *pdev) | |||
1994 | &dd->pdata->algs_info[i].algs_list[j]); | 2001 | &dd->pdata->algs_info[i].algs_list[j]); |
1995 | tasklet_kill(&dd->done_task); | 2002 | tasklet_kill(&dd->done_task); |
1996 | pm_runtime_disable(&pdev->dev); | 2003 | pm_runtime_disable(&pdev->dev); |
1997 | dma_release_channel(dd->dma_lch); | 2004 | |
2005 | if (dd->dma_lch) | ||
2006 | dma_release_channel(dd->dma_lch); | ||
1998 | 2007 | ||
1999 | return 0; | 2008 | return 0; |
2000 | } | 2009 | } |
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b44f4ddc565c..5967667e1a8f 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c | |||
@@ -338,20 +338,29 @@ DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE) | |||
338 | static u32 current_desc_hdr(struct device *dev, int ch) | 338 | static u32 current_desc_hdr(struct device *dev, int ch) |
339 | { | 339 | { |
340 | struct talitos_private *priv = dev_get_drvdata(dev); | 340 | struct talitos_private *priv = dev_get_drvdata(dev); |
341 | int tail = priv->chan[ch].tail; | 341 | int tail, iter; |
342 | dma_addr_t cur_desc; | 342 | dma_addr_t cur_desc; |
343 | 343 | ||
344 | cur_desc = in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO); | 344 | cur_desc = ((u64)in_be32(priv->chan[ch].reg + TALITOS_CDPR)) << 32; |
345 | cur_desc |= in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO); | ||
345 | 346 | ||
346 | while (priv->chan[ch].fifo[tail].dma_desc != cur_desc) { | 347 | if (!cur_desc) { |
347 | tail = (tail + 1) & (priv->fifo_len - 1); | 348 | dev_err(dev, "CDPR is NULL, giving up search for offending descriptor\n"); |
348 | if (tail == priv->chan[ch].tail) { | 349 | return 0; |
350 | } | ||
351 | |||
352 | tail = priv->chan[ch].tail; | ||
353 | |||
354 | iter = tail; | ||
355 | while (priv->chan[ch].fifo[iter].dma_desc != cur_desc) { | ||
356 | iter = (iter + 1) & (priv->fifo_len - 1); | ||
357 | if (iter == tail) { | ||
349 | dev_err(dev, "couldn't locate current descriptor\n"); | 358 | dev_err(dev, "couldn't locate current descriptor\n"); |
350 | return 0; | 359 | return 0; |
351 | } | 360 | } |
352 | } | 361 | } |
353 | 362 | ||
354 | return priv->chan[ch].fifo[tail].desc->hdr; | 363 | return priv->chan[ch].fifo[iter].desc->hdr; |
355 | } | 364 | } |
356 | 365 | ||
357 | /* | 366 | /* |
@@ -2486,8 +2495,6 @@ static int talitos_remove(struct platform_device *ofdev) | |||
2486 | 2495 | ||
2487 | iounmap(priv->reg); | 2496 | iounmap(priv->reg); |
2488 | 2497 | ||
2489 | dev_set_drvdata(dev, NULL); | ||
2490 | |||
2491 | kfree(priv); | 2498 | kfree(priv); |
2492 | 2499 | ||
2493 | return 0; | 2500 | return 0; |
diff --git a/include/linux/ccp.h b/include/linux/ccp.h new file mode 100644 index 000000000000..b941ab9f762b --- /dev/null +++ b/include/linux/ccp.h | |||
@@ -0,0 +1,537 @@ | |||
1 | /* | ||
2 | * AMD Cryptographic Coprocessor (CCP) driver | ||
3 | * | ||
4 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #ifndef __CPP_H__ | ||
14 | #define __CPP_H__ | ||
15 | |||
16 | #include <linux/scatterlist.h> | ||
17 | #include <linux/workqueue.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <crypto/aes.h> | ||
20 | #include <crypto/sha.h> | ||
21 | |||
22 | |||
23 | struct ccp_device; | ||
24 | struct ccp_cmd; | ||
25 | |||
26 | #if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \ | ||
27 | defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE) | ||
28 | |||
29 | /** | ||
30 | * ccp_enqueue_cmd - queue an operation for processing by the CCP | ||
31 | * | ||
32 | * @cmd: ccp_cmd struct to be processed | ||
33 | * | ||
34 | * Refer to the ccp_cmd struct below for required fields. | ||
35 | * | ||
36 | * Queue a cmd to be processed by the CCP. If queueing the cmd | ||
37 | * would exceed the defined length of the cmd queue the cmd will | ||
38 | * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will | ||
39 | * result in a return code of -EBUSY. | ||
40 | * | ||
41 | * The callback routine specified in the ccp_cmd struct will be | ||
42 | * called to notify the caller of completion (if the cmd was not | ||
43 | * backlogged) or advancement out of the backlog. If the cmd has | ||
44 | * advanced out of the backlog the "err" value of the callback | ||
45 | * will be -EINPROGRESS. Any other "err" value during callback is | ||
46 | * the result of the operation. | ||
47 | * | ||
48 | * The cmd has been successfully queued if: | ||
49 | * the return code is -EINPROGRESS or | ||
50 | * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set | ||
51 | */ | ||
52 | int ccp_enqueue_cmd(struct ccp_cmd *cmd); | ||
53 | |||
54 | #else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */ | ||
55 | |||
56 | static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd) | ||
57 | { | ||
58 | return -ENODEV; | ||
59 | } | ||
60 | |||
61 | #endif /* CONFIG_CRYPTO_DEV_CCP_DD */ | ||
62 | |||
63 | |||
64 | /***** AES engine *****/ | ||
65 | /** | ||
66 | * ccp_aes_type - AES key size | ||
67 | * | ||
68 | * @CCP_AES_TYPE_128: 128-bit key | ||
69 | * @CCP_AES_TYPE_192: 192-bit key | ||
70 | * @CCP_AES_TYPE_256: 256-bit key | ||
71 | */ | ||
72 | enum ccp_aes_type { | ||
73 | CCP_AES_TYPE_128 = 0, | ||
74 | CCP_AES_TYPE_192, | ||
75 | CCP_AES_TYPE_256, | ||
76 | CCP_AES_TYPE__LAST, | ||
77 | }; | ||
78 | |||
79 | /** | ||
80 | * ccp_aes_mode - AES operation mode | ||
81 | * | ||
82 | * @CCP_AES_MODE_ECB: ECB mode | ||
83 | * @CCP_AES_MODE_CBC: CBC mode | ||
84 | * @CCP_AES_MODE_OFB: OFB mode | ||
85 | * @CCP_AES_MODE_CFB: CFB mode | ||
86 | * @CCP_AES_MODE_CTR: CTR mode | ||
87 | * @CCP_AES_MODE_CMAC: CMAC mode | ||
88 | */ | ||
89 | enum ccp_aes_mode { | ||
90 | CCP_AES_MODE_ECB = 0, | ||
91 | CCP_AES_MODE_CBC, | ||
92 | CCP_AES_MODE_OFB, | ||
93 | CCP_AES_MODE_CFB, | ||
94 | CCP_AES_MODE_CTR, | ||
95 | CCP_AES_MODE_CMAC, | ||
96 | CCP_AES_MODE__LAST, | ||
97 | }; | ||
98 | |||
99 | /** | ||
100 | * ccp_aes_mode - AES operation mode | ||
101 | * | ||
102 | * @CCP_AES_ACTION_DECRYPT: AES decrypt operation | ||
103 | * @CCP_AES_ACTION_ENCRYPT: AES encrypt operation | ||
104 | */ | ||
105 | enum ccp_aes_action { | ||
106 | CCP_AES_ACTION_DECRYPT = 0, | ||
107 | CCP_AES_ACTION_ENCRYPT, | ||
108 | CCP_AES_ACTION__LAST, | ||
109 | }; | ||
110 | |||
111 | /** | ||
112 | * struct ccp_aes_engine - CCP AES operation | ||
113 | * @type: AES operation key size | ||
114 | * @mode: AES operation mode | ||
115 | * @action: AES operation (decrypt/encrypt) | ||
116 | * @key: key to be used for this AES operation | ||
117 | * @key_len: length in bytes of key | ||
118 | * @iv: IV to be used for this AES operation | ||
119 | * @iv_len: length in bytes of iv | ||
120 | * @src: data to be used for this operation | ||
121 | * @dst: data produced by this operation | ||
122 | * @src_len: length in bytes of data used for this operation | ||
123 | * @cmac_final: indicates final operation when running in CMAC mode | ||
124 | * @cmac_key: K1/K2 key used in final CMAC operation | ||
125 | * @cmac_key_len: length in bytes of cmac_key | ||
126 | * | ||
127 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
128 | * - type, mode, action, key, key_len, src, dst, src_len | ||
129 | * - iv, iv_len for any mode other than ECB | ||
130 | * - cmac_final for CMAC mode | ||
131 | * - cmac_key, cmac_key_len for CMAC mode if cmac_final is non-zero | ||
132 | * | ||
133 | * The iv variable is used as both input and output. On completion of the | ||
134 | * AES operation the new IV overwrites the old IV. | ||
135 | */ | ||
136 | struct ccp_aes_engine { | ||
137 | enum ccp_aes_type type; | ||
138 | enum ccp_aes_mode mode; | ||
139 | enum ccp_aes_action action; | ||
140 | |||
141 | struct scatterlist *key; | ||
142 | u32 key_len; /* In bytes */ | ||
143 | |||
144 | struct scatterlist *iv; | ||
145 | u32 iv_len; /* In bytes */ | ||
146 | |||
147 | struct scatterlist *src, *dst; | ||
148 | u64 src_len; /* In bytes */ | ||
149 | |||
150 | u32 cmac_final; /* Indicates final cmac cmd */ | ||
151 | struct scatterlist *cmac_key; /* K1/K2 cmac key required for | ||
152 | * final cmac cmd */ | ||
153 | u32 cmac_key_len; /* In bytes */ | ||
154 | }; | ||
155 | |||
156 | /***** XTS-AES engine *****/ | ||
157 | /** | ||
158 | * ccp_xts_aes_unit_size - XTS unit size | ||
159 | * | ||
160 | * @CCP_XTS_AES_UNIT_SIZE_16: Unit size of 16 bytes | ||
161 | * @CCP_XTS_AES_UNIT_SIZE_512: Unit size of 512 bytes | ||
162 | * @CCP_XTS_AES_UNIT_SIZE_1024: Unit size of 1024 bytes | ||
163 | * @CCP_XTS_AES_UNIT_SIZE_2048: Unit size of 2048 bytes | ||
164 | * @CCP_XTS_AES_UNIT_SIZE_4096: Unit size of 4096 bytes | ||
165 | */ | ||
166 | enum ccp_xts_aes_unit_size { | ||
167 | CCP_XTS_AES_UNIT_SIZE_16 = 0, | ||
168 | CCP_XTS_AES_UNIT_SIZE_512, | ||
169 | CCP_XTS_AES_UNIT_SIZE_1024, | ||
170 | CCP_XTS_AES_UNIT_SIZE_2048, | ||
171 | CCP_XTS_AES_UNIT_SIZE_4096, | ||
172 | CCP_XTS_AES_UNIT_SIZE__LAST, | ||
173 | }; | ||
174 | |||
175 | /** | ||
176 | * struct ccp_xts_aes_engine - CCP XTS AES operation | ||
177 | * @action: AES operation (decrypt/encrypt) | ||
178 | * @unit_size: unit size of the XTS operation | ||
179 | * @key: key to be used for this XTS AES operation | ||
180 | * @key_len: length in bytes of key | ||
181 | * @iv: IV to be used for this XTS AES operation | ||
182 | * @iv_len: length in bytes of iv | ||
183 | * @src: data to be used for this operation | ||
184 | * @dst: data produced by this operation | ||
185 | * @src_len: length in bytes of data used for this operation | ||
186 | * @final: indicates final XTS operation | ||
187 | * | ||
188 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
189 | * - action, unit_size, key, key_len, iv, iv_len, src, dst, src_len, final | ||
190 | * | ||
191 | * The iv variable is used as both input and output. On completion of the | ||
192 | * AES operation the new IV overwrites the old IV. | ||
193 | */ | ||
194 | struct ccp_xts_aes_engine { | ||
195 | enum ccp_aes_action action; | ||
196 | enum ccp_xts_aes_unit_size unit_size; | ||
197 | |||
198 | struct scatterlist *key; | ||
199 | u32 key_len; /* In bytes */ | ||
200 | |||
201 | struct scatterlist *iv; | ||
202 | u32 iv_len; /* In bytes */ | ||
203 | |||
204 | struct scatterlist *src, *dst; | ||
205 | u64 src_len; /* In bytes */ | ||
206 | |||
207 | u32 final; | ||
208 | }; | ||
209 | |||
210 | /***** SHA engine *****/ | ||
211 | #define CCP_SHA_BLOCKSIZE SHA256_BLOCK_SIZE | ||
212 | #define CCP_SHA_CTXSIZE SHA256_DIGEST_SIZE | ||
213 | |||
214 | /** | ||
215 | * ccp_sha_type - type of SHA operation | ||
216 | * | ||
217 | * @CCP_SHA_TYPE_1: SHA-1 operation | ||
218 | * @CCP_SHA_TYPE_224: SHA-224 operation | ||
219 | * @CCP_SHA_TYPE_256: SHA-256 operation | ||
220 | */ | ||
221 | enum ccp_sha_type { | ||
222 | CCP_SHA_TYPE_1 = 1, | ||
223 | CCP_SHA_TYPE_224, | ||
224 | CCP_SHA_TYPE_256, | ||
225 | CCP_SHA_TYPE__LAST, | ||
226 | }; | ||
227 | |||
228 | /** | ||
229 | * struct ccp_sha_engine - CCP SHA operation | ||
230 | * @type: Type of SHA operation | ||
231 | * @ctx: current hash value | ||
232 | * @ctx_len: length in bytes of hash value | ||
233 | * @src: data to be used for this operation | ||
234 | * @src_len: length in bytes of data used for this operation | ||
235 | * @final: indicates final SHA operation | ||
236 | * @msg_bits: total length of the message in bits used in final SHA operation | ||
237 | * | ||
238 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
239 | * - type, ctx, ctx_len, src, src_len, final | ||
240 | * - msg_bits if final is non-zero | ||
241 | * | ||
242 | * The ctx variable is used as both input and output. On completion of the | ||
243 | * SHA operation the new hash value overwrites the old hash value. | ||
244 | */ | ||
245 | struct ccp_sha_engine { | ||
246 | enum ccp_sha_type type; | ||
247 | |||
248 | struct scatterlist *ctx; | ||
249 | u32 ctx_len; /* In bytes */ | ||
250 | |||
251 | struct scatterlist *src; | ||
252 | u64 src_len; /* In bytes */ | ||
253 | |||
254 | u32 final; /* Indicates final sha cmd */ | ||
255 | u64 msg_bits; /* Message length in bits required for | ||
256 | * final sha cmd */ | ||
257 | }; | ||
258 | |||
259 | /***** RSA engine *****/ | ||
260 | /** | ||
261 | * struct ccp_rsa_engine - CCP RSA operation | ||
262 | * @key_size: length in bits of RSA key | ||
263 | * @exp: RSA exponent | ||
264 | * @exp_len: length in bytes of exponent | ||
265 | * @mod: RSA modulus | ||
266 | * @mod_len: length in bytes of modulus | ||
267 | * @src: data to be used for this operation | ||
268 | * @dst: data produced by this operation | ||
269 | * @src_len: length in bytes of data used for this operation | ||
270 | * | ||
271 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
272 | * - key_size, exp, exp_len, mod, mod_len, src, dst, src_len | ||
273 | */ | ||
274 | struct ccp_rsa_engine { | ||
275 | u32 key_size; /* In bits */ | ||
276 | |||
277 | struct scatterlist *exp; | ||
278 | u32 exp_len; /* In bytes */ | ||
279 | |||
280 | struct scatterlist *mod; | ||
281 | u32 mod_len; /* In bytes */ | ||
282 | |||
283 | struct scatterlist *src, *dst; | ||
284 | u32 src_len; /* In bytes */ | ||
285 | }; | ||
286 | |||
287 | /***** Passthru engine *****/ | ||
288 | /** | ||
289 | * ccp_passthru_bitwise - type of bitwise passthru operation | ||
290 | * | ||
291 | * @CCP_PASSTHRU_BITWISE_NOOP: no bitwise operation performed | ||
292 | * @CCP_PASSTHRU_BITWISE_AND: perform bitwise AND of src with mask | ||
293 | * @CCP_PASSTHRU_BITWISE_OR: perform bitwise OR of src with mask | ||
294 | * @CCP_PASSTHRU_BITWISE_XOR: perform bitwise XOR of src with mask | ||
295 | * @CCP_PASSTHRU_BITWISE_MASK: overwrite with mask | ||
296 | */ | ||
297 | enum ccp_passthru_bitwise { | ||
298 | CCP_PASSTHRU_BITWISE_NOOP = 0, | ||
299 | CCP_PASSTHRU_BITWISE_AND, | ||
300 | CCP_PASSTHRU_BITWISE_OR, | ||
301 | CCP_PASSTHRU_BITWISE_XOR, | ||
302 | CCP_PASSTHRU_BITWISE_MASK, | ||
303 | CCP_PASSTHRU_BITWISE__LAST, | ||
304 | }; | ||
305 | |||
306 | /** | ||
307 | * ccp_passthru_byteswap - type of byteswap passthru operation | ||
308 | * | ||
309 | * @CCP_PASSTHRU_BYTESWAP_NOOP: no byte swapping performed | ||
310 | * @CCP_PASSTHRU_BYTESWAP_32BIT: swap bytes within 32-bit words | ||
311 | * @CCP_PASSTHRU_BYTESWAP_256BIT: swap bytes within 256-bit words | ||
312 | */ | ||
313 | enum ccp_passthru_byteswap { | ||
314 | CCP_PASSTHRU_BYTESWAP_NOOP = 0, | ||
315 | CCP_PASSTHRU_BYTESWAP_32BIT, | ||
316 | CCP_PASSTHRU_BYTESWAP_256BIT, | ||
317 | CCP_PASSTHRU_BYTESWAP__LAST, | ||
318 | }; | ||
319 | |||
320 | /** | ||
321 | * struct ccp_passthru_engine - CCP pass-through operation | ||
322 | * @bit_mod: bitwise operation to perform | ||
323 | * @byte_swap: byteswap operation to perform | ||
324 | * @mask: mask to be applied to data | ||
325 | * @mask_len: length in bytes of mask | ||
326 | * @src: data to be used for this operation | ||
327 | * @dst: data produced by this operation | ||
328 | * @src_len: length in bytes of data used for this operation | ||
329 | * @final: indicate final pass-through operation | ||
330 | * | ||
331 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
332 | * - bit_mod, byte_swap, src, dst, src_len | ||
333 | * - mask, mask_len if bit_mod is not CCP_PASSTHRU_BITWISE_NOOP | ||
334 | */ | ||
335 | struct ccp_passthru_engine { | ||
336 | enum ccp_passthru_bitwise bit_mod; | ||
337 | enum ccp_passthru_byteswap byte_swap; | ||
338 | |||
339 | struct scatterlist *mask; | ||
340 | u32 mask_len; /* In bytes */ | ||
341 | |||
342 | struct scatterlist *src, *dst; | ||
343 | u64 src_len; /* In bytes */ | ||
344 | |||
345 | u32 final; | ||
346 | }; | ||
347 | |||
348 | /***** ECC engine *****/ | ||
349 | #define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ | ||
350 | #define CCP_ECC_MAX_OPERANDS 6 | ||
351 | #define CCP_ECC_MAX_OUTPUTS 3 | ||
352 | |||
353 | /** | ||
354 | * ccp_ecc_function - type of ECC function | ||
355 | * | ||
356 | * @CCP_ECC_FUNCTION_MMUL_384BIT: 384-bit modular multiplication | ||
357 | * @CCP_ECC_FUNCTION_MADD_384BIT: 384-bit modular addition | ||
358 | * @CCP_ECC_FUNCTION_MINV_384BIT: 384-bit multiplicative inverse | ||
359 | * @CCP_ECC_FUNCTION_PADD_384BIT: 384-bit point addition | ||
360 | * @CCP_ECC_FUNCTION_PMUL_384BIT: 384-bit point multiplication | ||
361 | * @CCP_ECC_FUNCTION_PDBL_384BIT: 384-bit point doubling | ||
362 | */ | ||
363 | enum ccp_ecc_function { | ||
364 | CCP_ECC_FUNCTION_MMUL_384BIT = 0, | ||
365 | CCP_ECC_FUNCTION_MADD_384BIT, | ||
366 | CCP_ECC_FUNCTION_MINV_384BIT, | ||
367 | CCP_ECC_FUNCTION_PADD_384BIT, | ||
368 | CCP_ECC_FUNCTION_PMUL_384BIT, | ||
369 | CCP_ECC_FUNCTION_PDBL_384BIT, | ||
370 | }; | ||
371 | |||
372 | /** | ||
373 | * struct ccp_ecc_modular_math - CCP ECC modular math parameters | ||
374 | * @operand_1: first operand for the modular math operation | ||
375 | * @operand_1_len: length of the first operand | ||
376 | * @operand_2: second operand for the modular math operation | ||
377 | * (not used for CCP_ECC_FUNCTION_MINV_384BIT) | ||
378 | * @operand_2_len: length of the second operand | ||
379 | * (not used for CCP_ECC_FUNCTION_MINV_384BIT) | ||
380 | * @result: result of the modular math operation | ||
381 | * @result_len: length of the supplied result buffer | ||
382 | */ | ||
383 | struct ccp_ecc_modular_math { | ||
384 | struct scatterlist *operand_1; | ||
385 | unsigned int operand_1_len; /* In bytes */ | ||
386 | |||
387 | struct scatterlist *operand_2; | ||
388 | unsigned int operand_2_len; /* In bytes */ | ||
389 | |||
390 | struct scatterlist *result; | ||
391 | unsigned int result_len; /* In bytes */ | ||
392 | }; | ||
393 | |||
394 | /** | ||
395 | * struct ccp_ecc_point - CCP ECC point definition | ||
396 | * @x: the x coordinate of the ECC point | ||
397 | * @x_len: the length of the x coordinate | ||
398 | * @y: the y coordinate of the ECC point | ||
399 | * @y_len: the length of the y coordinate | ||
400 | */ | ||
401 | struct ccp_ecc_point { | ||
402 | struct scatterlist *x; | ||
403 | unsigned int x_len; /* In bytes */ | ||
404 | |||
405 | struct scatterlist *y; | ||
406 | unsigned int y_len; /* In bytes */ | ||
407 | }; | ||
408 | |||
409 | /** | ||
410 | * struct ccp_ecc_point_math - CCP ECC point math parameters | ||
411 | * @point_1: the first point of the ECC point math operation | ||
412 | * @point_2: the second point of the ECC point math operation | ||
413 | * (only used for CCP_ECC_FUNCTION_PADD_384BIT) | ||
414 | * @domain_a: the a parameter of the ECC curve | ||
415 | * @domain_a_len: the length of the a parameter | ||
416 | * @scalar: the scalar parameter for the point match operation | ||
417 | * (only used for CCP_ECC_FUNCTION_PMUL_384BIT) | ||
418 | * @scalar_len: the length of the scalar parameter | ||
419 | * (only used for CCP_ECC_FUNCTION_PMUL_384BIT) | ||
420 | * @result: the point resulting from the point math operation | ||
421 | */ | ||
422 | struct ccp_ecc_point_math { | ||
423 | struct ccp_ecc_point point_1; | ||
424 | struct ccp_ecc_point point_2; | ||
425 | |||
426 | struct scatterlist *domain_a; | ||
427 | unsigned int domain_a_len; /* In bytes */ | ||
428 | |||
429 | struct scatterlist *scalar; | ||
430 | unsigned int scalar_len; /* In bytes */ | ||
431 | |||
432 | struct ccp_ecc_point result; | ||
433 | }; | ||
434 | |||
435 | /** | ||
436 | * struct ccp_ecc_engine - CCP ECC operation | ||
437 | * @function: ECC function to perform | ||
438 | * @mod: ECC modulus | ||
439 | * @mod_len: length in bytes of modulus | ||
440 | * @mm: module math parameters | ||
441 | * @pm: point math parameters | ||
442 | * @ecc_result: result of the ECC operation | ||
443 | * | ||
444 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
445 | * - function, mod, mod_len | ||
446 | * - operand, operand_len, operand_count, output, output_len, output_count | ||
447 | * - ecc_result | ||
448 | */ | ||
449 | struct ccp_ecc_engine { | ||
450 | enum ccp_ecc_function function; | ||
451 | |||
452 | struct scatterlist *mod; | ||
453 | u32 mod_len; /* In bytes */ | ||
454 | |||
455 | union { | ||
456 | struct ccp_ecc_modular_math mm; | ||
457 | struct ccp_ecc_point_math pm; | ||
458 | } u; | ||
459 | |||
460 | u16 ecc_result; | ||
461 | }; | ||
462 | |||
463 | |||
464 | /** | ||
465 | * ccp_engine - CCP operation identifiers | ||
466 | * | ||
467 | * @CCP_ENGINE_AES: AES operation | ||
468 | * @CCP_ENGINE_XTS_AES: 128-bit XTS AES operation | ||
469 | * @CCP_ENGINE_RSVD1: unused | ||
470 | * @CCP_ENGINE_SHA: SHA operation | ||
471 | * @CCP_ENGINE_RSA: RSA operation | ||
472 | * @CCP_ENGINE_PASSTHRU: pass-through operation | ||
473 | * @CCP_ENGINE_ZLIB_DECOMPRESS: unused | ||
474 | * @CCP_ENGINE_ECC: ECC operation | ||
475 | */ | ||
476 | enum ccp_engine { | ||
477 | CCP_ENGINE_AES = 0, | ||
478 | CCP_ENGINE_XTS_AES_128, | ||
479 | CCP_ENGINE_RSVD1, | ||
480 | CCP_ENGINE_SHA, | ||
481 | CCP_ENGINE_RSA, | ||
482 | CCP_ENGINE_PASSTHRU, | ||
483 | CCP_ENGINE_ZLIB_DECOMPRESS, | ||
484 | CCP_ENGINE_ECC, | ||
485 | CCP_ENGINE__LAST, | ||
486 | }; | ||
487 | |||
488 | /* Flag values for flags member of ccp_cmd */ | ||
489 | #define CCP_CMD_MAY_BACKLOG 0x00000001 | ||
490 | |||
491 | /** | ||
492 | * struct ccp_cmd - CPP operation request | ||
493 | * @entry: list element (ccp driver use only) | ||
494 | * @work: work element used for callbacks (ccp driver use only) | ||
495 | * @ccp: CCP device to be run on (ccp driver use only) | ||
496 | * @ret: operation return code (ccp driver use only) | ||
497 | * @flags: cmd processing flags | ||
498 | * @engine: CCP operation to perform | ||
499 | * @engine_error: CCP engine return code | ||
500 | * @u: engine specific structures, refer to specific engine struct below | ||
501 | * @callback: operation completion callback function | ||
502 | * @data: parameter value to be supplied to the callback function | ||
503 | * | ||
504 | * Variables required to be set when calling ccp_enqueue_cmd(): | ||
505 | * - engine, callback | ||
506 | * - See the operation structures below for what is required for each | ||
507 | * operation. | ||
508 | */ | ||
509 | struct ccp_cmd { | ||
510 | /* The list_head, work_struct, ccp and ret variables are for use | ||
511 | * by the CCP driver only. | ||
512 | */ | ||
513 | struct list_head entry; | ||
514 | struct work_struct work; | ||
515 | struct ccp_device *ccp; | ||
516 | int ret; | ||
517 | |||
518 | u32 flags; | ||
519 | |||
520 | enum ccp_engine engine; | ||
521 | u32 engine_error; | ||
522 | |||
523 | union { | ||
524 | struct ccp_aes_engine aes; | ||
525 | struct ccp_xts_aes_engine xts; | ||
526 | struct ccp_sha_engine sha; | ||
527 | struct ccp_rsa_engine rsa; | ||
528 | struct ccp_passthru_engine passthru; | ||
529 | struct ccp_ecc_engine ecc; | ||
530 | } u; | ||
531 | |||
532 | /* Completion callback support */ | ||
533 | void (*callback)(void *data, int err); | ||
534 | void *data; | ||
535 | }; | ||
536 | |||
537 | #endif | ||
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 24545cd90a25..02ae99e8e6d3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h | |||
@@ -37,6 +37,9 @@ | |||
37 | __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ | 37 | __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ |
38 | (typeof(ptr)) (__ptr + (off)); }) | 38 | (typeof(ptr)) (__ptr + (off)); }) |
39 | 39 | ||
40 | /* Make the optimizer believe the variable can be manipulated arbitrarily. */ | ||
41 | #define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var)) | ||
42 | |||
40 | #ifdef __CHECKER__ | 43 | #ifdef __CHECKER__ |
41 | #define __must_be_array(arr) 0 | 44 | #define __must_be_array(arr) 0 |
42 | #else | 45 | #else |
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index dc1bd3dcf11f..5529c5239421 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h | |||
@@ -15,6 +15,7 @@ | |||
15 | */ | 15 | */ |
16 | #undef barrier | 16 | #undef barrier |
17 | #undef RELOC_HIDE | 17 | #undef RELOC_HIDE |
18 | #undef OPTIMIZER_HIDE_VAR | ||
18 | 19 | ||
19 | #define barrier() __memory_barrier() | 20 | #define barrier() __memory_barrier() |
20 | 21 | ||
@@ -23,6 +24,12 @@ | |||
23 | __ptr = (unsigned long) (ptr); \ | 24 | __ptr = (unsigned long) (ptr); \ |
24 | (typeof(ptr)) (__ptr + (off)); }) | 25 | (typeof(ptr)) (__ptr + (off)); }) |
25 | 26 | ||
27 | /* This should act as an optimization barrier on var. | ||
28 | * Given that this compiler does not have inline assembly, a compiler barrier | ||
29 | * is the best we can do. | ||
30 | */ | ||
31 | #define OPTIMIZER_HIDE_VAR(var) barrier() | ||
32 | |||
26 | /* Intel ECC compiler doesn't support __builtin_types_compatible_p() */ | 33 | /* Intel ECC compiler doesn't support __builtin_types_compatible_p() */ |
27 | #define __must_be_array(a) 0 | 34 | #define __must_be_array(a) 0 |
28 | 35 | ||
diff --git a/include/linux/compiler.h b/include/linux/compiler.h index fe7a686dfd8d..2472740d7ab2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h | |||
@@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); | |||
170 | (typeof(ptr)) (__ptr + (off)); }) | 170 | (typeof(ptr)) (__ptr + (off)); }) |
171 | #endif | 171 | #endif |
172 | 172 | ||
173 | #ifndef OPTIMIZER_HIDE_VAR | ||
174 | #define OPTIMIZER_HIDE_VAR(var) barrier() | ||
175 | #endif | ||
176 | |||
173 | /* Not-quite-unique ID. */ | 177 | /* Not-quite-unique ID. */ |
174 | #ifndef __UNIQUE_ID | 178 | #ifndef __UNIQUE_ID |
175 | # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) | 179 | # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) |
diff --git a/kernel/padata.c b/kernel/padata.c index 2abd25d79cc8..161402f0b517 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
@@ -112,7 +112,7 @@ int padata_do_parallel(struct padata_instance *pinst, | |||
112 | 112 | ||
113 | rcu_read_lock_bh(); | 113 | rcu_read_lock_bh(); |
114 | 114 | ||
115 | pd = rcu_dereference(pinst->pd); | 115 | pd = rcu_dereference_bh(pinst->pd); |
116 | 116 | ||
117 | err = -EINVAL; | 117 | err = -EINVAL; |
118 | if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) | 118 | if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) |