aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 21:11:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 21:11:00 -0500
commit13c789a6b219aa23f917466c7e630566106b14c2 (patch)
treead9e096ded01f433306bcd40af3a3f8dc1ddea6f
parent6dd9158ae8577372aa433e6b0eae3c3d4caa5439 (diff)
parent79ba451d66ca8402c8d052ceb50e359ddc5e1161 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 3.14: - Improved crypto_memneq helper - Use cyprto_memneq in arch-specific crypto code - Replaced orphaned DCP driver with Freescale MXS DCP driver - Added AVX/AVX2 version of AESNI-GCM encode and decode - Added AMD Cryptographic Coprocessor (CCP) driver - Misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (41 commits) crypto: aesni - fix build on x86 (32bit) crypto: mxs - Fix sparse non static symbol warning crypto: ccp - CCP device enabled/disabled changes crypto: ccp - Cleanup hash invocation calls crypto: ccp - Change data length declarations to u64 crypto: ccp - Check for caller result area before using it crypto: ccp - Cleanup scatterlist usage crypto: ccp - Apply appropriate gfp_t type to memory allocations crypto: drivers - Sort drivers/crypto/Makefile ARM: mxs: dts: Enable DCP for MXS crypto: mxs - Add Freescale MXS DCP driver crypto: mxs - Remove the old DCP driver crypto: ahash - Fully restore ahash request before completing crypto: aesni - fix build on x86 (32bit) crypto: talitos - Remove redundant dev_set_drvdata crypto: ccp - Remove redundant dev_set_drvdata crypto: crypto4xx - Remove redundant dev_set_drvdata crypto: caam - simplify and harden key parsing crypto: omap-sham - Fix Polling mode for larger blocks crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite ...
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-dcp.txt17
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm/boot/dts/imx23.dtsi4
-rw-r--r--arch/arm/boot/dts/imx28.dtsi3
-rw-r--r--arch/s390/crypto/des_s390.c6
-rw-r--r--arch/x86/crypto/Makefile1
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S2811
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c147
-rw-r--r--crypto/Makefile5
-rw-r--r--crypto/ahash.c5
-rw-r--r--crypto/memneq.c80
-rw-r--r--crypto/pcrypt.c2
-rw-r--r--crypto/tcrypt.c270
-rw-r--r--crypto/tcrypt.h10
-rw-r--r--drivers/crypto/Kconfig39
-rw-r--r--drivers/crypto/Makefile33
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c1
-rw-r--r--drivers/crypto/caam/caamalg.c36
-rw-r--r--drivers/crypto/ccp/Kconfig24
-rw-r--r--drivers/crypto/ccp/Makefile10
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c365
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-xts.c279
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c369
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c432
-rw-r--r--drivers/crypto/ccp/ccp-crypto-sha.c517
-rw-r--r--drivers/crypto/ccp/ccp-crypto.h197
-rw-r--r--drivers/crypto/ccp/ccp-dev.c595
-rw-r--r--drivers/crypto/ccp/ccp-dev.h272
-rw-r--r--drivers/crypto/ccp/ccp-ops.c2024
-rw-r--r--drivers/crypto/ccp/ccp-pci.c361
-rw-r--r--drivers/crypto/dcp.c903
-rw-r--r--drivers/crypto/mxs-dcp.c1100
-rw-r--r--drivers/crypto/omap-aes.c16
-rw-r--r--drivers/crypto/omap-sham.c19
-rw-r--r--drivers/crypto/talitos.c23
-rw-r--r--include/linux/ccp.h537
-rw-r--r--include/linux/compiler-gcc.h3
-rw-r--r--include/linux/compiler-intel.h7
-rw-r--r--include/linux/compiler.h4
-rw-r--r--kernel/padata.c2
40 files changed, 10527 insertions, 1009 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
new file mode 100644
index 000000000000..6949e50f1f16
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
@@ -0,0 +1,17 @@
1Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28 .
2
3Required properties:
4- compatible : Should be "fsl,<soc>-dcp"
5- reg : Should contain MXS DCP registers location and length
6- interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
7 must be supplied, optionally Secure IRQ can be present, but
8 is currently not implemented and not used.
9
10Example:
11
12dcp@80028000 {
13 compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
14 reg = <0x80028000 0x2000>;
15 interrupts = <52 53>;
16 status = "okay";
17};
diff --git a/MAINTAINERS b/MAINTAINERS
index 3229945a96b3..0e13d692b176 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -538,6 +538,13 @@ F: drivers/tty/serial/altera_jtaguart.c
538F: include/linux/altera_uart.h 538F: include/linux/altera_uart.h
539F: include/linux/altera_jtaguart.h 539F: include/linux/altera_jtaguart.h
540 540
541AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
542M: Tom Lendacky <thomas.lendacky@amd.com>
543L: linux-crypto@vger.kernel.org
544S: Supported
545F: drivers/crypto/ccp/
546F: include/linux/ccp.h
547
541AMD FAM15H PROCESSOR POWER MONITORING DRIVER 548AMD FAM15H PROCESSOR POWER MONITORING DRIVER
542M: Andreas Herrmann <herrmann.der.user@googlemail.com> 549M: Andreas Herrmann <herrmann.der.user@googlemail.com>
543L: lm-sensors@lm-sensors.org 550L: lm-sensors@lm-sensors.org
diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi
index c96ceaef7ddf..581b75433be6 100644
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -337,8 +337,10 @@
337 }; 337 };
338 338
339 dcp@80028000 { 339 dcp@80028000 {
340 compatible = "fsl,imx23-dcp";
340 reg = <0x80028000 0x2000>; 341 reg = <0x80028000 0x2000>;
341 status = "disabled"; 342 interrupts = <53 54>;
343 status = "okay";
342 }; 344 };
343 345
344 pxp@8002a000 { 346 pxp@8002a000 {
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index cda19c8b0a47..f8e9b20f6982 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -813,9 +813,10 @@
813 }; 813 };
814 814
815 dcp: dcp@80028000 { 815 dcp: dcp@80028000 {
816 compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
816 reg = <0x80028000 0x2000>; 817 reg = <0x80028000 0x2000>;
817 interrupts = <52 53 54>; 818 interrupts = <52 53 54>;
818 compatible = "fsl-dcp"; 819 status = "okay";
819 }; 820 };
820 821
821 pxp: pxp@8002a000 { 822 pxp: pxp@8002a000 {
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index bcca01c9989d..200f2a1b599d 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -237,9 +237,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
237 struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); 237 struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
238 u32 *flags = &tfm->crt_flags; 238 u32 *flags = &tfm->crt_flags;
239 239
240 if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && 240 if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
241 memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], 241 crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
242 DES_KEY_SIZE)) && 242 DES_KEY_SIZE)) &&
243 (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { 243 (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
244 *flags |= CRYPTO_TFM_RES_WEAK_KEY; 244 *flags |= CRYPTO_TFM_RES_WEAK_KEY;
245 return -EINVAL; 245 return -EINVAL;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e0fc24db234a..6ba54d640383 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes)
76endif 76endif
77 77
78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
79aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
79ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 80ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
80sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 81sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
81crc32c-intel-y := crc32c-intel_glue.o 82crc32c-intel-y := crc32c-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
new file mode 100644
index 000000000000..522ab68d1c88
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -0,0 +1,2811 @@
1########################################################################
2# Copyright (c) 2013, Intel Corporation
3#
4# This software is available to you under a choice of one of two
5# licenses. You may choose to be licensed under the terms of the GNU
6# General Public License (GPL) Version 2, available from the file
7# COPYING in the main directory of this source tree, or the
8# OpenIB.org BSD license below:
9#
10# Redistribution and use in source and binary forms, with or without
11# modification, are permitted provided that the following conditions are
12# met:
13#
14# * Redistributions of source code must retain the above copyright
15# notice, this list of conditions and the following disclaimer.
16#
17# * Redistributions in binary form must reproduce the above copyright
18# notice, this list of conditions and the following disclaimer in the
19# documentation and/or other materials provided with the
20# distribution.
21#
22# * Neither the name of the Intel Corporation nor the names of its
23# contributors may be used to endorse or promote products derived from
24# this software without specific prior written permission.
25#
26#
27# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
28# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
31# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR
34# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38########################################################################
39##
40## Authors:
41## Erdinc Ozturk <erdinc.ozturk@intel.com>
42## Vinodh Gopal <vinodh.gopal@intel.com>
43## James Guilford <james.guilford@intel.com>
44## Tim Chen <tim.c.chen@linux.intel.com>
45##
46## References:
47## This code was derived and highly optimized from the code described in paper:
48## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
49## on Intel Architecture Processors. August, 2010
50## The details of the implementation is explained in:
51## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
52## on Intel Architecture Processors. October, 2012.
53##
54## Assumptions:
55##
56##
57##
58## iv:
59## 0 1 2 3
60## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
61## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
62## | Salt (From the SA) |
63## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
64## | Initialization Vector |
65## | (This is the sequence number from IPSec header) |
66## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67## | 0x1 |
68## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
69##
70##
71##
72## AAD:
73## AAD padded to 128 bits with 0
74## for example, assume AAD is a u32 vector
75##
76## if AAD is 8 bytes:
77## AAD[3] = {A0, A1}#
78## padded AAD in xmm register = {A1 A0 0 0}
79##
80## 0 1 2 3
81## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
82## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
83## | SPI (A1) |
84## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
85## | 32-bit Sequence Number (A0) |
86## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
87## | 0x0 |
88## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
89##
90## AAD Format with 32-bit Sequence Number
91##
92## if AAD is 12 bytes:
93## AAD[3] = {A0, A1, A2}#
94## padded AAD in xmm register = {A2 A1 A0 0}
95##
96## 0 1 2 3
97## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
98## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99## | SPI (A2) |
100## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101## | 64-bit Extended Sequence Number {A1,A0} |
102## | |
103## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104## | 0x0 |
105## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106##
107## AAD Format with 64-bit Extended Sequence Number
108##
109##
110## aadLen:
111## from the definition of the spec, aadLen can only be 8 or 12 bytes.
112## The code additionally supports aadLen of length 16 bytes.
113##
114## TLen:
115## from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
116##
117## poly = x^128 + x^127 + x^126 + x^121 + 1
118## throughout the code, one tab and two tab indentations are used. one tab is
119## for GHASH part, two tabs is for AES part.
120##
121
122#include <linux/linkage.h>
123#include <asm/inst.h>
124
125.data
126.align 16
127
128POLY: .octa 0xC2000000000000000000000000000001
129POLY2: .octa 0xC20000000000000000000001C2000000
130TWOONE: .octa 0x00000001000000000000000000000001
131
132# order of these constants should not change.
133# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
134
135SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
136SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
137ALL_F: .octa 0xffffffffffffffffffffffffffffffff
138ZERO: .octa 0x00000000000000000000000000000000
139ONE: .octa 0x00000000000000000000000000000001
140ONEf: .octa 0x01000000000000000000000000000000
141
142.text
143
144
145##define the fields of the gcm aes context
146#{
147# u8 expanded_keys[16*11] store expanded keys
148# u8 shifted_hkey_1[16] store HashKey <<1 mod poly here
149# u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here
150# u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here
151# u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here
152# u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here
153# u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here
154# u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here
155# u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here
156# u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes)
157# u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes)
158# u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes)
159# u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes)
160# u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes)
161# u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes)
162# u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes)
163# u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes)
164#} gcm_ctx#
165
166HashKey = 16*11 # store HashKey <<1 mod poly here
167HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here
168HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here
169HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here
170HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here
171HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here
172HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here
173HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here
174HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
175HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
176HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
177HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
178HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
179HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
180HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
181HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
182
183#define arg1 %rdi
184#define arg2 %rsi
185#define arg3 %rdx
186#define arg4 %rcx
187#define arg5 %r8
188#define arg6 %r9
189#define arg7 STACK_OFFSET+8*1(%r14)
190#define arg8 STACK_OFFSET+8*2(%r14)
191#define arg9 STACK_OFFSET+8*3(%r14)
192
193i = 0
194j = 0
195
196out_order = 0
197in_order = 1
198DEC = 0
199ENC = 1
200
201.macro define_reg r n
202reg_\r = %xmm\n
203.endm
204
205.macro setreg
206.altmacro
207define_reg i %i
208define_reg j %j
209.noaltmacro
210.endm
211
212# need to push 4 registers into stack to maintain
213STACK_OFFSET = 8*4
214
215TMP1 = 16*0 # Temporary storage for AAD
216TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
217TMP3 = 16*2 # Temporary storage for AES State 3
218TMP4 = 16*3 # Temporary storage for AES State 4
219TMP5 = 16*4 # Temporary storage for AES State 5
220TMP6 = 16*5 # Temporary storage for AES State 6
221TMP7 = 16*6 # Temporary storage for AES State 7
222TMP8 = 16*7 # Temporary storage for AES State 8
223
224VARIABLE_OFFSET = 16*8
225
226################################
227# Utility Macros
228################################
229
230# Encryption of a single block
231.macro ENCRYPT_SINGLE_BLOCK XMM0
232 vpxor (arg1), \XMM0, \XMM0
233 i = 1
234 setreg
235.rep 9
236 vaesenc 16*i(arg1), \XMM0, \XMM0
237 i = (i+1)
238 setreg
239.endr
240 vaesenclast 16*10(arg1), \XMM0, \XMM0
241.endm
242
243#ifdef CONFIG_AS_AVX
244###############################################################################
245# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
246# Input: A and B (128-bits each, bit-reflected)
247# Output: C = A*B*x mod poly, (i.e. >>1 )
248# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
249# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
250###############################################################################
251.macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5
252
253 vpshufd $0b01001110, \GH, \T2
254 vpshufd $0b01001110, \HK, \T3
255 vpxor \GH , \T2, \T2 # T2 = (a1+a0)
256 vpxor \HK , \T3, \T3 # T3 = (b1+b0)
257
258 vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1
259 vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0
260 vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0)
261 vpxor \GH, \T2,\T2
262 vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0
263
264 vpslldq $8, \T2,\T3 # shift-L T3 2 DWs
265 vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs
266 vpxor \T3, \GH, \GH
267 vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK
268
269 #first phase of the reduction
270 vpslld $31, \GH, \T2 # packed right shifting << 31
271 vpslld $30, \GH, \T3 # packed right shifting shift << 30
272 vpslld $25, \GH, \T4 # packed right shifting shift << 25
273
274 vpxor \T3, \T2, \T2 # xor the shifted versions
275 vpxor \T4, \T2, \T2
276
277 vpsrldq $4, \T2, \T5 # shift-R T5 1 DW
278
279 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
280 vpxor \T2, \GH, \GH # first phase of the reduction complete
281
282 #second phase of the reduction
283
284 vpsrld $1,\GH, \T2 # packed left shifting >> 1
285 vpsrld $2,\GH, \T3 # packed left shifting >> 2
286 vpsrld $7,\GH, \T4 # packed left shifting >> 7
287 vpxor \T3, \T2, \T2 # xor the shifted versions
288 vpxor \T4, \T2, \T2
289
290 vpxor \T5, \T2, \T2
291 vpxor \T2, \GH, \GH
292 vpxor \T1, \GH, \GH # the result is in GH
293
294
295.endm
296
297.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6
298
299 # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
300 vmovdqa \HK, \T5
301
302 vpshufd $0b01001110, \T5, \T1
303 vpxor \T5, \T1, \T1
304 vmovdqa \T1, HashKey_k(arg1)
305
306 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
307 vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
308 vpshufd $0b01001110, \T5, \T1
309 vpxor \T5, \T1, \T1
310 vmovdqa \T1, HashKey_2_k(arg1)
311
312 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
313 vmovdqa \T5, HashKey_3(arg1)
314 vpshufd $0b01001110, \T5, \T1
315 vpxor \T5, \T1, \T1
316 vmovdqa \T1, HashKey_3_k(arg1)
317
318 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
319 vmovdqa \T5, HashKey_4(arg1)
320 vpshufd $0b01001110, \T5, \T1
321 vpxor \T5, \T1, \T1
322 vmovdqa \T1, HashKey_4_k(arg1)
323
324 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
325 vmovdqa \T5, HashKey_5(arg1)
326 vpshufd $0b01001110, \T5, \T1
327 vpxor \T5, \T1, \T1
328 vmovdqa \T1, HashKey_5_k(arg1)
329
330 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
331 vmovdqa \T5, HashKey_6(arg1)
332 vpshufd $0b01001110, \T5, \T1
333 vpxor \T5, \T1, \T1
334 vmovdqa \T1, HashKey_6_k(arg1)
335
336 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
337 vmovdqa \T5, HashKey_7(arg1)
338 vpshufd $0b01001110, \T5, \T1
339 vpxor \T5, \T1, \T1
340 vmovdqa \T1, HashKey_7_k(arg1)
341
342 GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
343 vmovdqa \T5, HashKey_8(arg1)
344 vpshufd $0b01001110, \T5, \T1
345 vpxor \T5, \T1, \T1
346 vmovdqa \T1, HashKey_8_k(arg1)
347
348.endm
349
350## if a = number of total plaintext bytes
351## b = floor(a/16)
352## num_initial_blocks = b mod 4#
353## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
354## r10, r11, r12, rax are clobbered
355## arg1, arg2, arg3, r14 are used as a pointer only, not modified
356
357.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
358 i = (8-\num_initial_blocks)
359 setreg
360
361 mov arg6, %r10 # r10 = AAD
362 mov arg7, %r12 # r12 = aadLen
363
364
365 mov %r12, %r11
366
367 vpxor reg_i, reg_i, reg_i
368_get_AAD_loop\@:
369 vmovd (%r10), \T1
370 vpslldq $12, \T1, \T1
371 vpsrldq $4, reg_i, reg_i
372 vpxor \T1, reg_i, reg_i
373
374 add $4, %r10
375 sub $4, %r12
376 jg _get_AAD_loop\@
377
378
379 cmp $16, %r11
380 je _get_AAD_loop2_done\@
381 mov $16, %r12
382
383_get_AAD_loop2\@:
384 vpsrldq $4, reg_i, reg_i
385 sub $4, %r12
386 cmp %r11, %r12
387 jg _get_AAD_loop2\@
388
389_get_AAD_loop2_done\@:
390
391 #byte-reflect the AAD data
392 vpshufb SHUF_MASK(%rip), reg_i, reg_i
393
394 # initialize the data pointer offset as zero
395 xor %r11, %r11
396
397 # start AES for num_initial_blocks blocks
398 mov arg5, %rax # rax = *Y0
399 vmovdqu (%rax), \CTR # CTR = Y0
400 vpshufb SHUF_MASK(%rip), \CTR, \CTR
401
402
403 i = (9-\num_initial_blocks)
404 setreg
405.rep \num_initial_blocks
406 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
407 vmovdqa \CTR, reg_i
408 vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap
409 i = (i+1)
410 setreg
411.endr
412
413 vmovdqa (arg1), \T_key
414 i = (9-\num_initial_blocks)
415 setreg
416.rep \num_initial_blocks
417 vpxor \T_key, reg_i, reg_i
418 i = (i+1)
419 setreg
420.endr
421
422 j = 1
423 setreg
424.rep 9
425 vmovdqa 16*j(arg1), \T_key
426 i = (9-\num_initial_blocks)
427 setreg
428.rep \num_initial_blocks
429 vaesenc \T_key, reg_i, reg_i
430 i = (i+1)
431 setreg
432.endr
433
434 j = (j+1)
435 setreg
436.endr
437
438
439 vmovdqa 16*10(arg1), \T_key
440 i = (9-\num_initial_blocks)
441 setreg
442.rep \num_initial_blocks
443 vaesenclast \T_key, reg_i, reg_i
444 i = (i+1)
445 setreg
446.endr
447
448 i = (9-\num_initial_blocks)
449 setreg
450.rep \num_initial_blocks
451 vmovdqu (arg3, %r11), \T1
452 vpxor \T1, reg_i, reg_i
453 vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks
454 add $16, %r11
455.if \ENC_DEC == DEC
456 vmovdqa \T1, reg_i
457.endif
458 vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations
459 i = (i+1)
460 setreg
461.endr
462
463
464 i = (8-\num_initial_blocks)
465 j = (9-\num_initial_blocks)
466 setreg
467 GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6
468
469.rep \num_initial_blocks
470 vpxor reg_i, reg_j, reg_j
471 GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
472 i = (i+1)
473 j = (j+1)
474 setreg
475.endr
476 # XMM8 has the combined result here
477
478 vmovdqa \XMM8, TMP1(%rsp)
479 vmovdqa \XMM8, \T3
480
481 cmp $128, %r13
482 jl _initial_blocks_done\@ # no need for precomputed constants
483
484###############################################################################
485# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
486 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
487 vmovdqa \CTR, \XMM1
488 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
489
490 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
491 vmovdqa \CTR, \XMM2
492 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
493
494 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
495 vmovdqa \CTR, \XMM3
496 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
497
498 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
499 vmovdqa \CTR, \XMM4
500 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
501
502 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
503 vmovdqa \CTR, \XMM5
504 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
505
506 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
507 vmovdqa \CTR, \XMM6
508 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
509
510 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
511 vmovdqa \CTR, \XMM7
512 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
513
514 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
515 vmovdqa \CTR, \XMM8
516 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
517
518 vmovdqa (arg1), \T_key
519 vpxor \T_key, \XMM1, \XMM1
520 vpxor \T_key, \XMM2, \XMM2
521 vpxor \T_key, \XMM3, \XMM3
522 vpxor \T_key, \XMM4, \XMM4
523 vpxor \T_key, \XMM5, \XMM5
524 vpxor \T_key, \XMM6, \XMM6
525 vpxor \T_key, \XMM7, \XMM7
526 vpxor \T_key, \XMM8, \XMM8
527
528 i = 1
529 setreg
530.rep 9 # do 9 rounds
531 vmovdqa 16*i(arg1), \T_key
532 vaesenc \T_key, \XMM1, \XMM1
533 vaesenc \T_key, \XMM2, \XMM2
534 vaesenc \T_key, \XMM3, \XMM3
535 vaesenc \T_key, \XMM4, \XMM4
536 vaesenc \T_key, \XMM5, \XMM5
537 vaesenc \T_key, \XMM6, \XMM6
538 vaesenc \T_key, \XMM7, \XMM7
539 vaesenc \T_key, \XMM8, \XMM8
540 i = (i+1)
541 setreg
542.endr
543
544
545 vmovdqa 16*i(arg1), \T_key
546 vaesenclast \T_key, \XMM1, \XMM1
547 vaesenclast \T_key, \XMM2, \XMM2
548 vaesenclast \T_key, \XMM3, \XMM3
549 vaesenclast \T_key, \XMM4, \XMM4
550 vaesenclast \T_key, \XMM5, \XMM5
551 vaesenclast \T_key, \XMM6, \XMM6
552 vaesenclast \T_key, \XMM7, \XMM7
553 vaesenclast \T_key, \XMM8, \XMM8
554
555 vmovdqu (arg3, %r11), \T1
556 vpxor \T1, \XMM1, \XMM1
557 vmovdqu \XMM1, (arg2 , %r11)
558 .if \ENC_DEC == DEC
559 vmovdqa \T1, \XMM1
560 .endif
561
562 vmovdqu 16*1(arg3, %r11), \T1
563 vpxor \T1, \XMM2, \XMM2
564 vmovdqu \XMM2, 16*1(arg2 , %r11)
565 .if \ENC_DEC == DEC
566 vmovdqa \T1, \XMM2
567 .endif
568
569 vmovdqu 16*2(arg3, %r11), \T1
570 vpxor \T1, \XMM3, \XMM3
571 vmovdqu \XMM3, 16*2(arg2 , %r11)
572 .if \ENC_DEC == DEC
573 vmovdqa \T1, \XMM3
574 .endif
575
576 vmovdqu 16*3(arg3, %r11), \T1
577 vpxor \T1, \XMM4, \XMM4
578 vmovdqu \XMM4, 16*3(arg2 , %r11)
579 .if \ENC_DEC == DEC
580 vmovdqa \T1, \XMM4
581 .endif
582
583 vmovdqu 16*4(arg3, %r11), \T1
584 vpxor \T1, \XMM5, \XMM5
585 vmovdqu \XMM5, 16*4(arg2 , %r11)
586 .if \ENC_DEC == DEC
587 vmovdqa \T1, \XMM5
588 .endif
589
590 vmovdqu 16*5(arg3, %r11), \T1
591 vpxor \T1, \XMM6, \XMM6
592 vmovdqu \XMM6, 16*5(arg2 , %r11)
593 .if \ENC_DEC == DEC
594 vmovdqa \T1, \XMM6
595 .endif
596
597 vmovdqu 16*6(arg3, %r11), \T1
598 vpxor \T1, \XMM7, \XMM7
599 vmovdqu \XMM7, 16*6(arg2 , %r11)
600 .if \ENC_DEC == DEC
601 vmovdqa \T1, \XMM7
602 .endif
603
604 vmovdqu 16*7(arg3, %r11), \T1
605 vpxor \T1, \XMM8, \XMM8
606 vmovdqu \XMM8, 16*7(arg2 , %r11)
607 .if \ENC_DEC == DEC
608 vmovdqa \T1, \XMM8
609 .endif
610
611 add $128, %r11
612
613 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
614 vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext
615 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
616 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
617 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
618 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
619 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
620 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
621 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
622
623###############################################################################
624
625_initial_blocks_done\@:
626
627.endm
628
629# encrypt 8 blocks at a time
630# ghash the 8 previously encrypted ciphertext blocks
631# arg1, arg2, arg3 are used as pointers only, not modified
632# r11 is the data offset value
633.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
634
635 vmovdqa \XMM1, \T2
636 vmovdqa \XMM2, TMP2(%rsp)
637 vmovdqa \XMM3, TMP3(%rsp)
638 vmovdqa \XMM4, TMP4(%rsp)
639 vmovdqa \XMM5, TMP5(%rsp)
640 vmovdqa \XMM6, TMP6(%rsp)
641 vmovdqa \XMM7, TMP7(%rsp)
642 vmovdqa \XMM8, TMP8(%rsp)
643
644.if \loop_idx == in_order
645 vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT
646 vpaddd ONE(%rip), \XMM1, \XMM2
647 vpaddd ONE(%rip), \XMM2, \XMM3
648 vpaddd ONE(%rip), \XMM3, \XMM4
649 vpaddd ONE(%rip), \XMM4, \XMM5
650 vpaddd ONE(%rip), \XMM5, \XMM6
651 vpaddd ONE(%rip), \XMM6, \XMM7
652 vpaddd ONE(%rip), \XMM7, \XMM8
653 vmovdqa \XMM8, \CTR
654
655 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
656 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
657 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
658 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
659 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
660 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
661 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
662 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
663.else
664 vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT
665 vpaddd ONEf(%rip), \XMM1, \XMM2
666 vpaddd ONEf(%rip), \XMM2, \XMM3
667 vpaddd ONEf(%rip), \XMM3, \XMM4
668 vpaddd ONEf(%rip), \XMM4, \XMM5
669 vpaddd ONEf(%rip), \XMM5, \XMM6
670 vpaddd ONEf(%rip), \XMM6, \XMM7
671 vpaddd ONEf(%rip), \XMM7, \XMM8
672 vmovdqa \XMM8, \CTR
673.endif
674
675
676 #######################################################################
677
678 vmovdqu (arg1), \T1
679 vpxor \T1, \XMM1, \XMM1
680 vpxor \T1, \XMM2, \XMM2
681 vpxor \T1, \XMM3, \XMM3
682 vpxor \T1, \XMM4, \XMM4
683 vpxor \T1, \XMM5, \XMM5
684 vpxor \T1, \XMM6, \XMM6
685 vpxor \T1, \XMM7, \XMM7
686 vpxor \T1, \XMM8, \XMM8
687
688 #######################################################################
689
690
691
692
693
694 vmovdqu 16*1(arg1), \T1
695 vaesenc \T1, \XMM1, \XMM1
696 vaesenc \T1, \XMM2, \XMM2
697 vaesenc \T1, \XMM3, \XMM3
698 vaesenc \T1, \XMM4, \XMM4
699 vaesenc \T1, \XMM5, \XMM5
700 vaesenc \T1, \XMM6, \XMM6
701 vaesenc \T1, \XMM7, \XMM7
702 vaesenc \T1, \XMM8, \XMM8
703
704 vmovdqu 16*2(arg1), \T1
705 vaesenc \T1, \XMM1, \XMM1
706 vaesenc \T1, \XMM2, \XMM2
707 vaesenc \T1, \XMM3, \XMM3
708 vaesenc \T1, \XMM4, \XMM4
709 vaesenc \T1, \XMM5, \XMM5
710 vaesenc \T1, \XMM6, \XMM6
711 vaesenc \T1, \XMM7, \XMM7
712 vaesenc \T1, \XMM8, \XMM8
713
714
715 #######################################################################
716
717 vmovdqa HashKey_8(arg1), \T5
718 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
719 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
720
721 vpshufd $0b01001110, \T2, \T6
722 vpxor \T2, \T6, \T6
723
724 vmovdqa HashKey_8_k(arg1), \T5
725 vpclmulqdq $0x00, \T5, \T6, \T6
726
727 vmovdqu 16*3(arg1), \T1
728 vaesenc \T1, \XMM1, \XMM1
729 vaesenc \T1, \XMM2, \XMM2
730 vaesenc \T1, \XMM3, \XMM3
731 vaesenc \T1, \XMM4, \XMM4
732 vaesenc \T1, \XMM5, \XMM5
733 vaesenc \T1, \XMM6, \XMM6
734 vaesenc \T1, \XMM7, \XMM7
735 vaesenc \T1, \XMM8, \XMM8
736
737 vmovdqa TMP2(%rsp), \T1
738 vmovdqa HashKey_7(arg1), \T5
739 vpclmulqdq $0x11, \T5, \T1, \T3
740 vpxor \T3, \T4, \T4
741 vpclmulqdq $0x00, \T5, \T1, \T3
742 vpxor \T3, \T7, \T7
743
744 vpshufd $0b01001110, \T1, \T3
745 vpxor \T1, \T3, \T3
746 vmovdqa HashKey_7_k(arg1), \T5
747 vpclmulqdq $0x10, \T5, \T3, \T3
748 vpxor \T3, \T6, \T6
749
750 vmovdqu 16*4(arg1), \T1
751 vaesenc \T1, \XMM1, \XMM1
752 vaesenc \T1, \XMM2, \XMM2
753 vaesenc \T1, \XMM3, \XMM3
754 vaesenc \T1, \XMM4, \XMM4
755 vaesenc \T1, \XMM5, \XMM5
756 vaesenc \T1, \XMM6, \XMM6
757 vaesenc \T1, \XMM7, \XMM7
758 vaesenc \T1, \XMM8, \XMM8
759
760 #######################################################################
761
762 vmovdqa TMP3(%rsp), \T1
763 vmovdqa HashKey_6(arg1), \T5
764 vpclmulqdq $0x11, \T5, \T1, \T3
765 vpxor \T3, \T4, \T4
766 vpclmulqdq $0x00, \T5, \T1, \T3
767 vpxor \T3, \T7, \T7
768
769 vpshufd $0b01001110, \T1, \T3
770 vpxor \T1, \T3, \T3
771 vmovdqa HashKey_6_k(arg1), \T5
772 vpclmulqdq $0x10, \T5, \T3, \T3
773 vpxor \T3, \T6, \T6
774
775 vmovdqu 16*5(arg1), \T1
776 vaesenc \T1, \XMM1, \XMM1
777 vaesenc \T1, \XMM2, \XMM2
778 vaesenc \T1, \XMM3, \XMM3
779 vaesenc \T1, \XMM4, \XMM4
780 vaesenc \T1, \XMM5, \XMM5
781 vaesenc \T1, \XMM6, \XMM6
782 vaesenc \T1, \XMM7, \XMM7
783 vaesenc \T1, \XMM8, \XMM8
784
785 vmovdqa TMP4(%rsp), \T1
786 vmovdqa HashKey_5(arg1), \T5
787 vpclmulqdq $0x11, \T5, \T1, \T3
788 vpxor \T3, \T4, \T4
789 vpclmulqdq $0x00, \T5, \T1, \T3
790 vpxor \T3, \T7, \T7
791
792 vpshufd $0b01001110, \T1, \T3
793 vpxor \T1, \T3, \T3
794 vmovdqa HashKey_5_k(arg1), \T5
795 vpclmulqdq $0x10, \T5, \T3, \T3
796 vpxor \T3, \T6, \T6
797
798 vmovdqu 16*6(arg1), \T1
799 vaesenc \T1, \XMM1, \XMM1
800 vaesenc \T1, \XMM2, \XMM2
801 vaesenc \T1, \XMM3, \XMM3
802 vaesenc \T1, \XMM4, \XMM4
803 vaesenc \T1, \XMM5, \XMM5
804 vaesenc \T1, \XMM6, \XMM6
805 vaesenc \T1, \XMM7, \XMM7
806 vaesenc \T1, \XMM8, \XMM8
807
808
809 vmovdqa TMP5(%rsp), \T1
810 vmovdqa HashKey_4(arg1), \T5
811 vpclmulqdq $0x11, \T5, \T1, \T3
812 vpxor \T3, \T4, \T4
813 vpclmulqdq $0x00, \T5, \T1, \T3
814 vpxor \T3, \T7, \T7
815
816 vpshufd $0b01001110, \T1, \T3
817 vpxor \T1, \T3, \T3
818 vmovdqa HashKey_4_k(arg1), \T5
819 vpclmulqdq $0x10, \T5, \T3, \T3
820 vpxor \T3, \T6, \T6
821
822 vmovdqu 16*7(arg1), \T1
823 vaesenc \T1, \XMM1, \XMM1
824 vaesenc \T1, \XMM2, \XMM2
825 vaesenc \T1, \XMM3, \XMM3
826 vaesenc \T1, \XMM4, \XMM4
827 vaesenc \T1, \XMM5, \XMM5
828 vaesenc \T1, \XMM6, \XMM6
829 vaesenc \T1, \XMM7, \XMM7
830 vaesenc \T1, \XMM8, \XMM8
831
832 vmovdqa TMP6(%rsp), \T1
833 vmovdqa HashKey_3(arg1), \T5
834 vpclmulqdq $0x11, \T5, \T1, \T3
835 vpxor \T3, \T4, \T4
836 vpclmulqdq $0x00, \T5, \T1, \T3
837 vpxor \T3, \T7, \T7
838
839 vpshufd $0b01001110, \T1, \T3
840 vpxor \T1, \T3, \T3
841 vmovdqa HashKey_3_k(arg1), \T5
842 vpclmulqdq $0x10, \T5, \T3, \T3
843 vpxor \T3, \T6, \T6
844
845
846 vmovdqu 16*8(arg1), \T1
847 vaesenc \T1, \XMM1, \XMM1
848 vaesenc \T1, \XMM2, \XMM2
849 vaesenc \T1, \XMM3, \XMM3
850 vaesenc \T1, \XMM4, \XMM4
851 vaesenc \T1, \XMM5, \XMM5
852 vaesenc \T1, \XMM6, \XMM6
853 vaesenc \T1, \XMM7, \XMM7
854 vaesenc \T1, \XMM8, \XMM8
855
856 vmovdqa TMP7(%rsp), \T1
857 vmovdqa HashKey_2(arg1), \T5
858 vpclmulqdq $0x11, \T5, \T1, \T3
859 vpxor \T3, \T4, \T4
860 vpclmulqdq $0x00, \T5, \T1, \T3
861 vpxor \T3, \T7, \T7
862
863 vpshufd $0b01001110, \T1, \T3
864 vpxor \T1, \T3, \T3
865 vmovdqa HashKey_2_k(arg1), \T5
866 vpclmulqdq $0x10, \T5, \T3, \T3
867 vpxor \T3, \T6, \T6
868
869 #######################################################################
870
871 vmovdqu 16*9(arg1), \T5
872 vaesenc \T5, \XMM1, \XMM1
873 vaesenc \T5, \XMM2, \XMM2
874 vaesenc \T5, \XMM3, \XMM3
875 vaesenc \T5, \XMM4, \XMM4
876 vaesenc \T5, \XMM5, \XMM5
877 vaesenc \T5, \XMM6, \XMM6
878 vaesenc \T5, \XMM7, \XMM7
879 vaesenc \T5, \XMM8, \XMM8
880
881 vmovdqa TMP8(%rsp), \T1
882 vmovdqa HashKey(arg1), \T5
883 vpclmulqdq $0x11, \T5, \T1, \T3
884 vpxor \T3, \T4, \T4
885 vpclmulqdq $0x00, \T5, \T1, \T3
886 vpxor \T3, \T7, \T7
887
888 vpshufd $0b01001110, \T1, \T3
889 vpxor \T1, \T3, \T3
890 vmovdqa HashKey_k(arg1), \T5
891 vpclmulqdq $0x10, \T5, \T3, \T3
892 vpxor \T3, \T6, \T6
893
894 vpxor \T4, \T6, \T6
895 vpxor \T7, \T6, \T6
896
897 vmovdqu 16*10(arg1), \T5
898
899 i = 0
900 j = 1
901 setreg
902.rep 8
903 vpxor 16*i(arg3, %r11), \T5, \T2
904 .if \ENC_DEC == ENC
905 vaesenclast \T2, reg_j, reg_j
906 .else
907 vaesenclast \T2, reg_j, \T3
908 vmovdqu 16*i(arg3, %r11), reg_j
909 vmovdqu \T3, 16*i(arg2, %r11)
910 .endif
911 i = (i+1)
912 j = (j+1)
913 setreg
914.endr
915 #######################################################################
916
917
918 vpslldq $8, \T6, \T3 # shift-L T3 2 DWs
919 vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs
920 vpxor \T3, \T7, \T7
921 vpxor \T4, \T6, \T6 # accumulate the results in T6:T7
922
923
924
925 #######################################################################
926 #first phase of the reduction
927 #######################################################################
928 vpslld $31, \T7, \T2 # packed right shifting << 31
929 vpslld $30, \T7, \T3 # packed right shifting shift << 30
930 vpslld $25, \T7, \T4 # packed right shifting shift << 25
931
932 vpxor \T3, \T2, \T2 # xor the shifted versions
933 vpxor \T4, \T2, \T2
934
935 vpsrldq $4, \T2, \T1 # shift-R T1 1 DW
936
937 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
938 vpxor \T2, \T7, \T7 # first phase of the reduction complete
939 #######################################################################
940 .if \ENC_DEC == ENC
941 vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
942 vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
943 vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
944 vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
945 vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
946 vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
947 vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
948 vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
949 .endif
950
951 #######################################################################
952 #second phase of the reduction
953 vpsrld $1, \T7, \T2 # packed left shifting >> 1
954 vpsrld $2, \T7, \T3 # packed left shifting >> 2
955 vpsrld $7, \T7, \T4 # packed left shifting >> 7
956 vpxor \T3, \T2, \T2 # xor the shifted versions
957 vpxor \T4, \T2, \T2
958
959 vpxor \T1, \T2, \T2
960 vpxor \T2, \T7, \T7
961 vpxor \T7, \T6, \T6 # the result is in T6
962 #######################################################################
963
964 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
965 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
966 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
967 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
968 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
969 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
970 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
971 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
972
973
974 vpxor \T6, \XMM1, \XMM1
975
976
977
978.endm
979
980
981# GHASH the last 4 ciphertext blocks.
982.macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
983
984 ## Karatsuba Method
985
986
987 vpshufd $0b01001110, \XMM1, \T2
988 vpxor \XMM1, \T2, \T2
989 vmovdqa HashKey_8(arg1), \T5
990 vpclmulqdq $0x11, \T5, \XMM1, \T6
991 vpclmulqdq $0x00, \T5, \XMM1, \T7
992
993 vmovdqa HashKey_8_k(arg1), \T3
994 vpclmulqdq $0x00, \T3, \T2, \XMM1
995
996 ######################
997
998 vpshufd $0b01001110, \XMM2, \T2
999 vpxor \XMM2, \T2, \T2
1000 vmovdqa HashKey_7(arg1), \T5
1001 vpclmulqdq $0x11, \T5, \XMM2, \T4
1002 vpxor \T4, \T6, \T6
1003
1004 vpclmulqdq $0x00, \T5, \XMM2, \T4
1005 vpxor \T4, \T7, \T7
1006
1007 vmovdqa HashKey_7_k(arg1), \T3
1008 vpclmulqdq $0x00, \T3, \T2, \T2
1009 vpxor \T2, \XMM1, \XMM1
1010
1011 ######################
1012
1013 vpshufd $0b01001110, \XMM3, \T2
1014 vpxor \XMM3, \T2, \T2
1015 vmovdqa HashKey_6(arg1), \T5
1016 vpclmulqdq $0x11, \T5, \XMM3, \T4
1017 vpxor \T4, \T6, \T6
1018
1019 vpclmulqdq $0x00, \T5, \XMM3, \T4
1020 vpxor \T4, \T7, \T7
1021
1022 vmovdqa HashKey_6_k(arg1), \T3
1023 vpclmulqdq $0x00, \T3, \T2, \T2
1024 vpxor \T2, \XMM1, \XMM1
1025
1026 ######################
1027
1028 vpshufd $0b01001110, \XMM4, \T2
1029 vpxor \XMM4, \T2, \T2
1030 vmovdqa HashKey_5(arg1), \T5
1031 vpclmulqdq $0x11, \T5, \XMM4, \T4
1032 vpxor \T4, \T6, \T6
1033
1034 vpclmulqdq $0x00, \T5, \XMM4, \T4
1035 vpxor \T4, \T7, \T7
1036
1037 vmovdqa HashKey_5_k(arg1), \T3
1038 vpclmulqdq $0x00, \T3, \T2, \T2
1039 vpxor \T2, \XMM1, \XMM1
1040
1041 ######################
1042
1043 vpshufd $0b01001110, \XMM5, \T2
1044 vpxor \XMM5, \T2, \T2
1045 vmovdqa HashKey_4(arg1), \T5
1046 vpclmulqdq $0x11, \T5, \XMM5, \T4
1047 vpxor \T4, \T6, \T6
1048
1049 vpclmulqdq $0x00, \T5, \XMM5, \T4
1050 vpxor \T4, \T7, \T7
1051
1052 vmovdqa HashKey_4_k(arg1), \T3
1053 vpclmulqdq $0x00, \T3, \T2, \T2
1054 vpxor \T2, \XMM1, \XMM1
1055
1056 ######################
1057
1058 vpshufd $0b01001110, \XMM6, \T2
1059 vpxor \XMM6, \T2, \T2
1060 vmovdqa HashKey_3(arg1), \T5
1061 vpclmulqdq $0x11, \T5, \XMM6, \T4
1062 vpxor \T4, \T6, \T6
1063
1064 vpclmulqdq $0x00, \T5, \XMM6, \T4
1065 vpxor \T4, \T7, \T7
1066
1067 vmovdqa HashKey_3_k(arg1), \T3
1068 vpclmulqdq $0x00, \T3, \T2, \T2
1069 vpxor \T2, \XMM1, \XMM1
1070
1071 ######################
1072
1073 vpshufd $0b01001110, \XMM7, \T2
1074 vpxor \XMM7, \T2, \T2
1075 vmovdqa HashKey_2(arg1), \T5
1076 vpclmulqdq $0x11, \T5, \XMM7, \T4
1077 vpxor \T4, \T6, \T6
1078
1079 vpclmulqdq $0x00, \T5, \XMM7, \T4
1080 vpxor \T4, \T7, \T7
1081
1082 vmovdqa HashKey_2_k(arg1), \T3
1083 vpclmulqdq $0x00, \T3, \T2, \T2
1084 vpxor \T2, \XMM1, \XMM1
1085
1086 ######################
1087
1088 vpshufd $0b01001110, \XMM8, \T2
1089 vpxor \XMM8, \T2, \T2
1090 vmovdqa HashKey(arg1), \T5
1091 vpclmulqdq $0x11, \T5, \XMM8, \T4
1092 vpxor \T4, \T6, \T6
1093
1094 vpclmulqdq $0x00, \T5, \XMM8, \T4
1095 vpxor \T4, \T7, \T7
1096
1097 vmovdqa HashKey_k(arg1), \T3
1098 vpclmulqdq $0x00, \T3, \T2, \T2
1099
1100 vpxor \T2, \XMM1, \XMM1
1101 vpxor \T6, \XMM1, \XMM1
1102 vpxor \T7, \XMM1, \T2
1103
1104
1105
1106
1107 vpslldq $8, \T2, \T4
1108 vpsrldq $8, \T2, \T2
1109
1110 vpxor \T4, \T7, \T7
1111 vpxor \T2, \T6, \T6 # <T6:T7> holds the result of
1112 # the accumulated carry-less multiplications
1113
1114 #######################################################################
1115 #first phase of the reduction
1116 vpslld $31, \T7, \T2 # packed right shifting << 31
1117 vpslld $30, \T7, \T3 # packed right shifting shift << 30
1118 vpslld $25, \T7, \T4 # packed right shifting shift << 25
1119
1120 vpxor \T3, \T2, \T2 # xor the shifted versions
1121 vpxor \T4, \T2, \T2
1122
1123 vpsrldq $4, \T2, \T1 # shift-R T1 1 DW
1124
1125 vpslldq $12, \T2, \T2 # shift-L T2 3 DWs
1126 vpxor \T2, \T7, \T7 # first phase of the reduction complete
1127 #######################################################################
1128
1129
1130 #second phase of the reduction
1131 vpsrld $1, \T7, \T2 # packed left shifting >> 1
1132 vpsrld $2, \T7, \T3 # packed left shifting >> 2
1133 vpsrld $7, \T7, \T4 # packed left shifting >> 7
1134 vpxor \T3, \T2, \T2 # xor the shifted versions
1135 vpxor \T4, \T2, \T2
1136
1137 vpxor \T1, \T2, \T2
1138 vpxor \T2, \T7, \T7
1139 vpxor \T7, \T6, \T6 # the result is in T6
1140
1141.endm
1142
1143
1144# combined for GCM encrypt and decrypt functions
1145# clobbering all xmm registers
1146# clobbering r10, r11, r12, r13, r14, r15
1147.macro GCM_ENC_DEC_AVX ENC_DEC
1148
1149 #the number of pushes must equal STACK_OFFSET
1150 push %r12
1151 push %r13
1152 push %r14
1153 push %r15
1154
1155 mov %rsp, %r14
1156
1157
1158
1159
1160 sub $VARIABLE_OFFSET, %rsp
1161 and $~63, %rsp # align rsp to 64 bytes
1162
1163
1164 vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
1165
1166 mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
1167 and $-16, %r13 # r13 = r13 - (r13 mod 16)
1168
1169 mov %r13, %r12
1170 shr $4, %r12
1171 and $7, %r12
1172 jz _initial_num_blocks_is_0\@
1173
1174 cmp $7, %r12
1175 je _initial_num_blocks_is_7\@
1176 cmp $6, %r12
1177 je _initial_num_blocks_is_6\@
1178 cmp $5, %r12
1179 je _initial_num_blocks_is_5\@
1180 cmp $4, %r12
1181 je _initial_num_blocks_is_4\@
1182 cmp $3, %r12
1183 je _initial_num_blocks_is_3\@
1184 cmp $2, %r12
1185 je _initial_num_blocks_is_2\@
1186
1187 jmp _initial_num_blocks_is_1\@
1188
1189_initial_num_blocks_is_7\@:
1190 INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1191 sub $16*7, %r13
1192 jmp _initial_blocks_encrypted\@
1193
1194_initial_num_blocks_is_6\@:
1195 INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1196 sub $16*6, %r13
1197 jmp _initial_blocks_encrypted\@
1198
1199_initial_num_blocks_is_5\@:
1200 INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1201 sub $16*5, %r13
1202 jmp _initial_blocks_encrypted\@
1203
1204_initial_num_blocks_is_4\@:
1205 INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1206 sub $16*4, %r13
1207 jmp _initial_blocks_encrypted\@
1208
1209_initial_num_blocks_is_3\@:
1210 INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1211 sub $16*3, %r13
1212 jmp _initial_blocks_encrypted\@
1213
1214_initial_num_blocks_is_2\@:
1215 INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1216 sub $16*2, %r13
1217 jmp _initial_blocks_encrypted\@
1218
1219_initial_num_blocks_is_1\@:
1220 INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1221 sub $16*1, %r13
1222 jmp _initial_blocks_encrypted\@
1223
1224_initial_num_blocks_is_0\@:
1225 INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
1226
1227
1228_initial_blocks_encrypted\@:
1229 cmp $0, %r13
1230 je _zero_cipher_left\@
1231
1232 sub $128, %r13
1233 je _eight_cipher_left\@
1234
1235
1236
1237
1238 vmovd %xmm9, %r15d
1239 and $255, %r15d
1240 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1241
1242
1243_encrypt_by_8_new\@:
1244 cmp $(255-8), %r15d
1245 jg _encrypt_by_8\@
1246
1247
1248
1249 add $8, %r15b
1250 GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
1251 add $128, %r11
1252 sub $128, %r13
1253 jne _encrypt_by_8_new\@
1254
1255 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1256 jmp _eight_cipher_left\@
1257
1258_encrypt_by_8\@:
1259 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1260 add $8, %r15b
1261 GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
1262 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1263 add $128, %r11
1264 sub $128, %r13
1265 jne _encrypt_by_8_new\@
1266
1267 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1268
1269
1270
1271
1272_eight_cipher_left\@:
1273 GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
1274
1275
1276_zero_cipher_left\@:
1277 cmp $16, arg4
1278 jl _only_less_than_16\@
1279
1280 mov arg4, %r13
1281 and $15, %r13 # r13 = (arg4 mod 16)
1282
1283 je _multiple_of_16_bytes\@
1284
1285 # handle the last <16 Byte block seperately
1286
1287
1288 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
1289 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1290 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
1291
1292 sub $16, %r11
1293 add %r13, %r11
1294 vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
1295
1296 lea SHIFT_MASK+16(%rip), %r12
1297 sub %r13, %r12 # adjust the shuffle mask pointer to be
1298 # able to shift 16-r13 bytes (r13 is the
1299 # number of bytes in plaintext mod 16)
1300 vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
1301 vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
1302 jmp _final_ghash_mul\@
1303
1304_only_less_than_16\@:
1305 # check for 0 length
1306 mov arg4, %r13
1307 and $15, %r13 # r13 = (arg4 mod 16)
1308
1309 je _multiple_of_16_bytes\@
1310
1311 # handle the last <16 Byte block seperately
1312
1313
1314 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
1315 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1316 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
1317
1318
1319 lea SHIFT_MASK+16(%rip), %r12
1320 sub %r13, %r12 # adjust the shuffle mask pointer to be
1321 # able to shift 16-r13 bytes (r13 is the
1322 # number of bytes in plaintext mod 16)
1323
1324_get_last_16_byte_loop\@:
1325 movb (arg3, %r11), %al
1326 movb %al, TMP1 (%rsp , %r11)
1327 add $1, %r11
1328 cmp %r13, %r11
1329 jne _get_last_16_byte_loop\@
1330
1331 vmovdqu TMP1(%rsp), %xmm1
1332
1333 sub $16, %r11
1334
1335_final_ghash_mul\@:
1336 .if \ENC_DEC == DEC
1337 vmovdqa %xmm1, %xmm2
1338 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
1339 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
1340 # mask out top 16-r13 bytes of xmm9
1341 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
1342 vpand %xmm1, %xmm2, %xmm2
1343 vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
1344 vpxor %xmm2, %xmm14, %xmm14
1345 #GHASH computation for the last <16 Byte block
1346 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
1347 sub %r13, %r11
1348 add $16, %r11
1349 .else
1350 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
1351 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
1352 # mask out top 16-r13 bytes of xmm9
1353 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
1354 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
1355 vpxor %xmm9, %xmm14, %xmm14
1356 #GHASH computation for the last <16 Byte block
1357 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
1358 sub %r13, %r11
1359 add $16, %r11
1360 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
1361 .endif
1362
1363
1364 #############################
1365 # output r13 Bytes
1366 vmovq %xmm9, %rax
1367 cmp $8, %r13
1368 jle _less_than_8_bytes_left\@
1369
1370 mov %rax, (arg2 , %r11)
1371 add $8, %r11
1372 vpsrldq $8, %xmm9, %xmm9
1373 vmovq %xmm9, %rax
1374 sub $8, %r13
1375
1376_less_than_8_bytes_left\@:
1377 movb %al, (arg2 , %r11)
1378 add $1, %r11
1379 shr $8, %rax
1380 sub $1, %r13
1381 jne _less_than_8_bytes_left\@
1382 #############################
1383
1384_multiple_of_16_bytes\@:
1385 mov arg7, %r12 # r12 = aadLen (number of bytes)
1386 shl $3, %r12 # convert into number of bits
1387 vmovd %r12d, %xmm15 # len(A) in xmm15
1388
1389 shl $3, arg4 # len(C) in bits (*128)
1390 vmovq arg4, %xmm1
1391 vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
1392 vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
1393
1394 vpxor %xmm15, %xmm14, %xmm14
1395 GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
1396 vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
1397
1398 mov arg5, %rax # rax = *Y0
1399 vmovdqu (%rax), %xmm9 # xmm9 = Y0
1400
1401 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
1402
1403 vpxor %xmm14, %xmm9, %xmm9
1404
1405
1406
1407_return_T\@:
1408 mov arg8, %r10 # r10 = authTag
1409 mov arg9, %r11 # r11 = auth_tag_len
1410
1411 cmp $16, %r11
1412 je _T_16\@
1413
1414 cmp $12, %r11
1415 je _T_12\@
1416
1417_T_8\@:
1418 vmovq %xmm9, %rax
1419 mov %rax, (%r10)
1420 jmp _return_T_done\@
1421_T_12\@:
1422 vmovq %xmm9, %rax
1423 mov %rax, (%r10)
1424 vpsrldq $8, %xmm9, %xmm9
1425 vmovd %xmm9, %eax
1426 mov %eax, 8(%r10)
1427 jmp _return_T_done\@
1428
1429_T_16\@:
1430 vmovdqu %xmm9, (%r10)
1431
1432_return_T_done\@:
1433 mov %r14, %rsp
1434
1435 pop %r15
1436 pop %r14
1437 pop %r13
1438 pop %r12
1439.endm
1440
1441
1442#############################################################
1443#void aesni_gcm_precomp_avx_gen2
1444# (gcm_data *my_ctx_data,
1445# u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
1446#############################################################
1447ENTRY(aesni_gcm_precomp_avx_gen2)
1448 #the number of pushes must equal STACK_OFFSET
1449 push %r12
1450 push %r13
1451 push %r14
1452 push %r15
1453
1454 mov %rsp, %r14
1455
1456
1457
1458 sub $VARIABLE_OFFSET, %rsp
1459 and $~63, %rsp # align rsp to 64 bytes
1460
1461 vmovdqu (arg2), %xmm6 # xmm6 = HashKey
1462
1463 vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
1464 ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
1465 vmovdqa %xmm6, %xmm2
1466 vpsllq $1, %xmm6, %xmm6
1467 vpsrlq $63, %xmm2, %xmm2
1468 vmovdqa %xmm2, %xmm1
1469 vpslldq $8, %xmm2, %xmm2
1470 vpsrldq $8, %xmm1, %xmm1
1471 vpor %xmm2, %xmm6, %xmm6
1472 #reduction
1473 vpshufd $0b00100100, %xmm1, %xmm2
1474 vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
1475 vpand POLY(%rip), %xmm2, %xmm2
1476 vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
1477 #######################################################################
1478 vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
1479
1480
1481 PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
1482
1483 mov %r14, %rsp
1484
1485 pop %r15
1486 pop %r14
1487 pop %r13
1488 pop %r12
1489 ret
1490ENDPROC(aesni_gcm_precomp_avx_gen2)
1491
1492###############################################################################
1493#void aesni_gcm_enc_avx_gen2(
1494# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
1495# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
1496# const u8 *in, /* Plaintext input */
1497# u64 plaintext_len, /* Length of data in Bytes for encryption. */
1498# u8 *iv, /* Pre-counter block j0: 4 byte salt
1499# (from Security Association) concatenated with 8 byte
1500# Initialisation Vector (from IPSec ESP Payload)
1501# concatenated with 0x00000001. 16-byte aligned pointer. */
1502# const u8 *aad, /* Additional Authentication Data (AAD)*/
1503# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
1504# u8 *auth_tag, /* Authenticated Tag output. */
1505# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
1506# Valid values are 16 (most likely), 12 or 8. */
1507###############################################################################
1508ENTRY(aesni_gcm_enc_avx_gen2)
1509 GCM_ENC_DEC_AVX ENC
1510 ret
1511ENDPROC(aesni_gcm_enc_avx_gen2)
1512
1513###############################################################################
1514#void aesni_gcm_dec_avx_gen2(
1515# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
1516# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
1517# const u8 *in, /* Ciphertext input */
1518# u64 plaintext_len, /* Length of data in Bytes for encryption. */
1519# u8 *iv, /* Pre-counter block j0: 4 byte salt
1520# (from Security Association) concatenated with 8 byte
1521# Initialisation Vector (from IPSec ESP Payload)
1522# concatenated with 0x00000001. 16-byte aligned pointer. */
1523# const u8 *aad, /* Additional Authentication Data (AAD)*/
1524# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
1525# u8 *auth_tag, /* Authenticated Tag output. */
1526# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
1527# Valid values are 16 (most likely), 12 or 8. */
1528###############################################################################
1529ENTRY(aesni_gcm_dec_avx_gen2)
1530 GCM_ENC_DEC_AVX DEC
1531 ret
1532ENDPROC(aesni_gcm_dec_avx_gen2)
1533#endif /* CONFIG_AS_AVX */
1534
1535#ifdef CONFIG_AS_AVX2
1536###############################################################################
1537# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
1538# Input: A and B (128-bits each, bit-reflected)
1539# Output: C = A*B*x mod poly, (i.e. >>1 )
1540# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
1541# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
1542###############################################################################
1543.macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5
1544
1545 vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1
1546 vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0
1547 vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0
1548 vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1
1549 vpxor \T3, \GH, \GH
1550
1551
1552 vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs
1553 vpslldq $8 , \GH, \GH # shift-L GH 2 DWs
1554
1555 vpxor \T3, \T1, \T1
1556 vpxor \T2, \GH, \GH
1557
1558 #######################################################################
1559 #first phase of the reduction
1560 vmovdqa POLY2(%rip), \T3
1561
1562 vpclmulqdq $0x01, \GH, \T3, \T2
1563 vpslldq $8, \T2, \T2 # shift-L T2 2 DWs
1564
1565 vpxor \T2, \GH, \GH # first phase of the reduction complete
1566 #######################################################################
1567 #second phase of the reduction
1568 vpclmulqdq $0x00, \GH, \T3, \T2
1569 vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
1570
1571 vpclmulqdq $0x10, \GH, \T3, \GH
1572 vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts)
1573
1574 vpxor \T2, \GH, \GH # second phase of the reduction complete
1575 #######################################################################
1576 vpxor \T1, \GH, \GH # the result is in GH
1577
1578
1579.endm
1580
1581.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6
1582
1583 # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
1584 vmovdqa \HK, \T5
1585 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
1586 vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
1587
1588 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
1589 vmovdqa \T5, HashKey_3(arg1)
1590
1591 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
1592 vmovdqa \T5, HashKey_4(arg1)
1593
1594 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
1595 vmovdqa \T5, HashKey_5(arg1)
1596
1597 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
1598 vmovdqa \T5, HashKey_6(arg1)
1599
1600 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
1601 vmovdqa \T5, HashKey_7(arg1)
1602
1603 GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
1604 vmovdqa \T5, HashKey_8(arg1)
1605
1606.endm
1607
1608
1609## if a = number of total plaintext bytes
1610## b = floor(a/16)
1611## num_initial_blocks = b mod 4#
1612## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
1613## r10, r11, r12, rax are clobbered
1614## arg1, arg2, arg3, r14 are used as a pointer only, not modified
1615
1616.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
1617 i = (8-\num_initial_blocks)
1618 setreg
1619
1620 mov arg6, %r10 # r10 = AAD
1621 mov arg7, %r12 # r12 = aadLen
1622
1623
1624 mov %r12, %r11
1625
1626 vpxor reg_i, reg_i, reg_i
1627_get_AAD_loop\@:
1628 vmovd (%r10), \T1
1629 vpslldq $12, \T1, \T1
1630 vpsrldq $4, reg_i, reg_i
1631 vpxor \T1, reg_i, reg_i
1632
1633 add $4, %r10
1634 sub $4, %r12
1635 jg _get_AAD_loop\@
1636
1637
1638 cmp $16, %r11
1639 je _get_AAD_loop2_done\@
1640 mov $16, %r12
1641
1642_get_AAD_loop2\@:
1643 vpsrldq $4, reg_i, reg_i
1644 sub $4, %r12
1645 cmp %r11, %r12
1646 jg _get_AAD_loop2\@
1647
1648_get_AAD_loop2_done\@:
1649
1650 #byte-reflect the AAD data
1651 vpshufb SHUF_MASK(%rip), reg_i, reg_i
1652
1653 # initialize the data pointer offset as zero
1654 xor %r11, %r11
1655
1656 # start AES for num_initial_blocks blocks
1657 mov arg5, %rax # rax = *Y0
1658 vmovdqu (%rax), \CTR # CTR = Y0
1659 vpshufb SHUF_MASK(%rip), \CTR, \CTR
1660
1661
1662 i = (9-\num_initial_blocks)
1663 setreg
1664.rep \num_initial_blocks
1665 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1666 vmovdqa \CTR, reg_i
1667 vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap
1668 i = (i+1)
1669 setreg
1670.endr
1671
1672 vmovdqa (arg1), \T_key
1673 i = (9-\num_initial_blocks)
1674 setreg
1675.rep \num_initial_blocks
1676 vpxor \T_key, reg_i, reg_i
1677 i = (i+1)
1678 setreg
1679.endr
1680
1681 j = 1
1682 setreg
1683.rep 9
1684 vmovdqa 16*j(arg1), \T_key
1685 i = (9-\num_initial_blocks)
1686 setreg
1687.rep \num_initial_blocks
1688 vaesenc \T_key, reg_i, reg_i
1689 i = (i+1)
1690 setreg
1691.endr
1692
1693 j = (j+1)
1694 setreg
1695.endr
1696
1697
1698 vmovdqa 16*10(arg1), \T_key
1699 i = (9-\num_initial_blocks)
1700 setreg
1701.rep \num_initial_blocks
1702 vaesenclast \T_key, reg_i, reg_i
1703 i = (i+1)
1704 setreg
1705.endr
1706
1707 i = (9-\num_initial_blocks)
1708 setreg
1709.rep \num_initial_blocks
1710 vmovdqu (arg3, %r11), \T1
1711 vpxor \T1, reg_i, reg_i
1712 vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for
1713 # num_initial_blocks blocks
1714 add $16, %r11
1715.if \ENC_DEC == DEC
1716 vmovdqa \T1, reg_i
1717.endif
1718 vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations
1719 i = (i+1)
1720 setreg
1721.endr
1722
1723
1724 i = (8-\num_initial_blocks)
1725 j = (9-\num_initial_blocks)
1726 setreg
1727 GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6
1728
1729.rep \num_initial_blocks
1730 vpxor reg_i, reg_j, reg_j
1731 GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
1732 i = (i+1)
1733 j = (j+1)
1734 setreg
1735.endr
1736 # XMM8 has the combined result here
1737
1738 vmovdqa \XMM8, TMP1(%rsp)
1739 vmovdqa \XMM8, \T3
1740
1741 cmp $128, %r13
1742 jl _initial_blocks_done\@ # no need for precomputed constants
1743
1744###############################################################################
1745# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
1746 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1747 vmovdqa \CTR, \XMM1
1748 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1749
1750 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1751 vmovdqa \CTR, \XMM2
1752 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1753
1754 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1755 vmovdqa \CTR, \XMM3
1756 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1757
1758 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1759 vmovdqa \CTR, \XMM4
1760 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1761
1762 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1763 vmovdqa \CTR, \XMM5
1764 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1765
1766 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1767 vmovdqa \CTR, \XMM6
1768 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1769
1770 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1771 vmovdqa \CTR, \XMM7
1772 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1773
1774 vpaddd ONE(%rip), \CTR, \CTR # INCR Y0
1775 vmovdqa \CTR, \XMM8
1776 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1777
1778 vmovdqa (arg1), \T_key
1779 vpxor \T_key, \XMM1, \XMM1
1780 vpxor \T_key, \XMM2, \XMM2
1781 vpxor \T_key, \XMM3, \XMM3
1782 vpxor \T_key, \XMM4, \XMM4
1783 vpxor \T_key, \XMM5, \XMM5
1784 vpxor \T_key, \XMM6, \XMM6
1785 vpxor \T_key, \XMM7, \XMM7
1786 vpxor \T_key, \XMM8, \XMM8
1787
1788 i = 1
1789 setreg
1790.rep 9 # do 9 rounds
1791 vmovdqa 16*i(arg1), \T_key
1792 vaesenc \T_key, \XMM1, \XMM1
1793 vaesenc \T_key, \XMM2, \XMM2
1794 vaesenc \T_key, \XMM3, \XMM3
1795 vaesenc \T_key, \XMM4, \XMM4
1796 vaesenc \T_key, \XMM5, \XMM5
1797 vaesenc \T_key, \XMM6, \XMM6
1798 vaesenc \T_key, \XMM7, \XMM7
1799 vaesenc \T_key, \XMM8, \XMM8
1800 i = (i+1)
1801 setreg
1802.endr
1803
1804
1805 vmovdqa 16*i(arg1), \T_key
1806 vaesenclast \T_key, \XMM1, \XMM1
1807 vaesenclast \T_key, \XMM2, \XMM2
1808 vaesenclast \T_key, \XMM3, \XMM3
1809 vaesenclast \T_key, \XMM4, \XMM4
1810 vaesenclast \T_key, \XMM5, \XMM5
1811 vaesenclast \T_key, \XMM6, \XMM6
1812 vaesenclast \T_key, \XMM7, \XMM7
1813 vaesenclast \T_key, \XMM8, \XMM8
1814
1815 vmovdqu (arg3, %r11), \T1
1816 vpxor \T1, \XMM1, \XMM1
1817 vmovdqu \XMM1, (arg2 , %r11)
1818 .if \ENC_DEC == DEC
1819 vmovdqa \T1, \XMM1
1820 .endif
1821
1822 vmovdqu 16*1(arg3, %r11), \T1
1823 vpxor \T1, \XMM2, \XMM2
1824 vmovdqu \XMM2, 16*1(arg2 , %r11)
1825 .if \ENC_DEC == DEC
1826 vmovdqa \T1, \XMM2
1827 .endif
1828
1829 vmovdqu 16*2(arg3, %r11), \T1
1830 vpxor \T1, \XMM3, \XMM3
1831 vmovdqu \XMM3, 16*2(arg2 , %r11)
1832 .if \ENC_DEC == DEC
1833 vmovdqa \T1, \XMM3
1834 .endif
1835
1836 vmovdqu 16*3(arg3, %r11), \T1
1837 vpxor \T1, \XMM4, \XMM4
1838 vmovdqu \XMM4, 16*3(arg2 , %r11)
1839 .if \ENC_DEC == DEC
1840 vmovdqa \T1, \XMM4
1841 .endif
1842
1843 vmovdqu 16*4(arg3, %r11), \T1
1844 vpxor \T1, \XMM5, \XMM5
1845 vmovdqu \XMM5, 16*4(arg2 , %r11)
1846 .if \ENC_DEC == DEC
1847 vmovdqa \T1, \XMM5
1848 .endif
1849
1850 vmovdqu 16*5(arg3, %r11), \T1
1851 vpxor \T1, \XMM6, \XMM6
1852 vmovdqu \XMM6, 16*5(arg2 , %r11)
1853 .if \ENC_DEC == DEC
1854 vmovdqa \T1, \XMM6
1855 .endif
1856
1857 vmovdqu 16*6(arg3, %r11), \T1
1858 vpxor \T1, \XMM7, \XMM7
1859 vmovdqu \XMM7, 16*6(arg2 , %r11)
1860 .if \ENC_DEC == DEC
1861 vmovdqa \T1, \XMM7
1862 .endif
1863
1864 vmovdqu 16*7(arg3, %r11), \T1
1865 vpxor \T1, \XMM8, \XMM8
1866 vmovdqu \XMM8, 16*7(arg2 , %r11)
1867 .if \ENC_DEC == DEC
1868 vmovdqa \T1, \XMM8
1869 .endif
1870
1871 add $128, %r11
1872
1873 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1874 vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with
1875 # the corresponding ciphertext
1876 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1877 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1878 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1879 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1880 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1881 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1882 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1883
1884###############################################################################
1885
1886_initial_blocks_done\@:
1887
1888
1889.endm
1890
1891
1892
1893# encrypt 8 blocks at a time
1894# ghash the 8 previously encrypted ciphertext blocks
1895# arg1, arg2, arg3 are used as pointers only, not modified
1896# r11 is the data offset value
1897.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
1898
1899 vmovdqa \XMM1, \T2
1900 vmovdqa \XMM2, TMP2(%rsp)
1901 vmovdqa \XMM3, TMP3(%rsp)
1902 vmovdqa \XMM4, TMP4(%rsp)
1903 vmovdqa \XMM5, TMP5(%rsp)
1904 vmovdqa \XMM6, TMP6(%rsp)
1905 vmovdqa \XMM7, TMP7(%rsp)
1906 vmovdqa \XMM8, TMP8(%rsp)
1907
1908.if \loop_idx == in_order
1909 vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT
1910 vpaddd ONE(%rip), \XMM1, \XMM2
1911 vpaddd ONE(%rip), \XMM2, \XMM3
1912 vpaddd ONE(%rip), \XMM3, \XMM4
1913 vpaddd ONE(%rip), \XMM4, \XMM5
1914 vpaddd ONE(%rip), \XMM5, \XMM6
1915 vpaddd ONE(%rip), \XMM6, \XMM7
1916 vpaddd ONE(%rip), \XMM7, \XMM8
1917 vmovdqa \XMM8, \CTR
1918
1919 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
1920 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
1921 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
1922 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
1923 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
1924 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
1925 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
1926 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
1927.else
1928 vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT
1929 vpaddd ONEf(%rip), \XMM1, \XMM2
1930 vpaddd ONEf(%rip), \XMM2, \XMM3
1931 vpaddd ONEf(%rip), \XMM3, \XMM4
1932 vpaddd ONEf(%rip), \XMM4, \XMM5
1933 vpaddd ONEf(%rip), \XMM5, \XMM6
1934 vpaddd ONEf(%rip), \XMM6, \XMM7
1935 vpaddd ONEf(%rip), \XMM7, \XMM8
1936 vmovdqa \XMM8, \CTR
1937.endif
1938
1939
1940 #######################################################################
1941
1942 vmovdqu (arg1), \T1
1943 vpxor \T1, \XMM1, \XMM1
1944 vpxor \T1, \XMM2, \XMM2
1945 vpxor \T1, \XMM3, \XMM3
1946 vpxor \T1, \XMM4, \XMM4
1947 vpxor \T1, \XMM5, \XMM5
1948 vpxor \T1, \XMM6, \XMM6
1949 vpxor \T1, \XMM7, \XMM7
1950 vpxor \T1, \XMM8, \XMM8
1951
1952 #######################################################################
1953
1954
1955
1956
1957
1958 vmovdqu 16*1(arg1), \T1
1959 vaesenc \T1, \XMM1, \XMM1
1960 vaesenc \T1, \XMM2, \XMM2
1961 vaesenc \T1, \XMM3, \XMM3
1962 vaesenc \T1, \XMM4, \XMM4
1963 vaesenc \T1, \XMM5, \XMM5
1964 vaesenc \T1, \XMM6, \XMM6
1965 vaesenc \T1, \XMM7, \XMM7
1966 vaesenc \T1, \XMM8, \XMM8
1967
1968 vmovdqu 16*2(arg1), \T1
1969 vaesenc \T1, \XMM1, \XMM1
1970 vaesenc \T1, \XMM2, \XMM2
1971 vaesenc \T1, \XMM3, \XMM3
1972 vaesenc \T1, \XMM4, \XMM4
1973 vaesenc \T1, \XMM5, \XMM5
1974 vaesenc \T1, \XMM6, \XMM6
1975 vaesenc \T1, \XMM7, \XMM7
1976 vaesenc \T1, \XMM8, \XMM8
1977
1978
1979 #######################################################################
1980
1981 vmovdqa HashKey_8(arg1), \T5
1982 vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
1983 vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
1984 vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0
1985 vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1
1986 vpxor \T5, \T6, \T6
1987
1988 vmovdqu 16*3(arg1), \T1
1989 vaesenc \T1, \XMM1, \XMM1
1990 vaesenc \T1, \XMM2, \XMM2
1991 vaesenc \T1, \XMM3, \XMM3
1992 vaesenc \T1, \XMM4, \XMM4
1993 vaesenc \T1, \XMM5, \XMM5
1994 vaesenc \T1, \XMM6, \XMM6
1995 vaesenc \T1, \XMM7, \XMM7
1996 vaesenc \T1, \XMM8, \XMM8
1997
1998 vmovdqa TMP2(%rsp), \T1
1999 vmovdqa HashKey_7(arg1), \T5
2000 vpclmulqdq $0x11, \T5, \T1, \T3
2001 vpxor \T3, \T4, \T4
2002
2003 vpclmulqdq $0x00, \T5, \T1, \T3
2004 vpxor \T3, \T7, \T7
2005
2006 vpclmulqdq $0x01, \T5, \T1, \T3
2007 vpxor \T3, \T6, \T6
2008
2009 vpclmulqdq $0x10, \T5, \T1, \T3
2010 vpxor \T3, \T6, \T6
2011
2012 vmovdqu 16*4(arg1), \T1
2013 vaesenc \T1, \XMM1, \XMM1
2014 vaesenc \T1, \XMM2, \XMM2
2015 vaesenc \T1, \XMM3, \XMM3
2016 vaesenc \T1, \XMM4, \XMM4
2017 vaesenc \T1, \XMM5, \XMM5
2018 vaesenc \T1, \XMM6, \XMM6
2019 vaesenc \T1, \XMM7, \XMM7
2020 vaesenc \T1, \XMM8, \XMM8
2021
2022 #######################################################################
2023
2024 vmovdqa TMP3(%rsp), \T1
2025 vmovdqa HashKey_6(arg1), \T5
2026 vpclmulqdq $0x11, \T5, \T1, \T3
2027 vpxor \T3, \T4, \T4
2028
2029 vpclmulqdq $0x00, \T5, \T1, \T3
2030 vpxor \T3, \T7, \T7
2031
2032 vpclmulqdq $0x01, \T5, \T1, \T3
2033 vpxor \T3, \T6, \T6
2034
2035 vpclmulqdq $0x10, \T5, \T1, \T3
2036 vpxor \T3, \T6, \T6
2037
2038 vmovdqu 16*5(arg1), \T1
2039 vaesenc \T1, \XMM1, \XMM1
2040 vaesenc \T1, \XMM2, \XMM2
2041 vaesenc \T1, \XMM3, \XMM3
2042 vaesenc \T1, \XMM4, \XMM4
2043 vaesenc \T1, \XMM5, \XMM5
2044 vaesenc \T1, \XMM6, \XMM6
2045 vaesenc \T1, \XMM7, \XMM7
2046 vaesenc \T1, \XMM8, \XMM8
2047
2048 vmovdqa TMP4(%rsp), \T1
2049 vmovdqa HashKey_5(arg1), \T5
2050 vpclmulqdq $0x11, \T5, \T1, \T3
2051 vpxor \T3, \T4, \T4
2052
2053 vpclmulqdq $0x00, \T5, \T1, \T3
2054 vpxor \T3, \T7, \T7
2055
2056 vpclmulqdq $0x01, \T5, \T1, \T3
2057 vpxor \T3, \T6, \T6
2058
2059 vpclmulqdq $0x10, \T5, \T1, \T3
2060 vpxor \T3, \T6, \T6
2061
2062 vmovdqu 16*6(arg1), \T1
2063 vaesenc \T1, \XMM1, \XMM1
2064 vaesenc \T1, \XMM2, \XMM2
2065 vaesenc \T1, \XMM3, \XMM3
2066 vaesenc \T1, \XMM4, \XMM4
2067 vaesenc \T1, \XMM5, \XMM5
2068 vaesenc \T1, \XMM6, \XMM6
2069 vaesenc \T1, \XMM7, \XMM7
2070 vaesenc \T1, \XMM8, \XMM8
2071
2072
2073 vmovdqa TMP5(%rsp), \T1
2074 vmovdqa HashKey_4(arg1), \T5
2075 vpclmulqdq $0x11, \T5, \T1, \T3
2076 vpxor \T3, \T4, \T4
2077
2078 vpclmulqdq $0x00, \T5, \T1, \T3
2079 vpxor \T3, \T7, \T7
2080
2081 vpclmulqdq $0x01, \T5, \T1, \T3
2082 vpxor \T3, \T6, \T6
2083
2084 vpclmulqdq $0x10, \T5, \T1, \T3
2085 vpxor \T3, \T6, \T6
2086
2087 vmovdqu 16*7(arg1), \T1
2088 vaesenc \T1, \XMM1, \XMM1
2089 vaesenc \T1, \XMM2, \XMM2
2090 vaesenc \T1, \XMM3, \XMM3
2091 vaesenc \T1, \XMM4, \XMM4
2092 vaesenc \T1, \XMM5, \XMM5
2093 vaesenc \T1, \XMM6, \XMM6
2094 vaesenc \T1, \XMM7, \XMM7
2095 vaesenc \T1, \XMM8, \XMM8
2096
2097 vmovdqa TMP6(%rsp), \T1
2098 vmovdqa HashKey_3(arg1), \T5
2099 vpclmulqdq $0x11, \T5, \T1, \T3
2100 vpxor \T3, \T4, \T4
2101
2102 vpclmulqdq $0x00, \T5, \T1, \T3
2103 vpxor \T3, \T7, \T7
2104
2105 vpclmulqdq $0x01, \T5, \T1, \T3
2106 vpxor \T3, \T6, \T6
2107
2108 vpclmulqdq $0x10, \T5, \T1, \T3
2109 vpxor \T3, \T6, \T6
2110
2111 vmovdqu 16*8(arg1), \T1
2112 vaesenc \T1, \XMM1, \XMM1
2113 vaesenc \T1, \XMM2, \XMM2
2114 vaesenc \T1, \XMM3, \XMM3
2115 vaesenc \T1, \XMM4, \XMM4
2116 vaesenc \T1, \XMM5, \XMM5
2117 vaesenc \T1, \XMM6, \XMM6
2118 vaesenc \T1, \XMM7, \XMM7
2119 vaesenc \T1, \XMM8, \XMM8
2120
2121 vmovdqa TMP7(%rsp), \T1
2122 vmovdqa HashKey_2(arg1), \T5
2123 vpclmulqdq $0x11, \T5, \T1, \T3
2124 vpxor \T3, \T4, \T4
2125
2126 vpclmulqdq $0x00, \T5, \T1, \T3
2127 vpxor \T3, \T7, \T7
2128
2129 vpclmulqdq $0x01, \T5, \T1, \T3
2130 vpxor \T3, \T6, \T6
2131
2132 vpclmulqdq $0x10, \T5, \T1, \T3
2133 vpxor \T3, \T6, \T6
2134
2135
2136 #######################################################################
2137
2138 vmovdqu 16*9(arg1), \T5
2139 vaesenc \T5, \XMM1, \XMM1
2140 vaesenc \T5, \XMM2, \XMM2
2141 vaesenc \T5, \XMM3, \XMM3
2142 vaesenc \T5, \XMM4, \XMM4
2143 vaesenc \T5, \XMM5, \XMM5
2144 vaesenc \T5, \XMM6, \XMM6
2145 vaesenc \T5, \XMM7, \XMM7
2146 vaesenc \T5, \XMM8, \XMM8
2147
2148 vmovdqa TMP8(%rsp), \T1
2149 vmovdqa HashKey(arg1), \T5
2150
2151 vpclmulqdq $0x00, \T5, \T1, \T3
2152 vpxor \T3, \T7, \T7
2153
2154 vpclmulqdq $0x01, \T5, \T1, \T3
2155 vpxor \T3, \T6, \T6
2156
2157 vpclmulqdq $0x10, \T5, \T1, \T3
2158 vpxor \T3, \T6, \T6
2159
2160 vpclmulqdq $0x11, \T5, \T1, \T3
2161 vpxor \T3, \T4, \T1
2162
2163
2164 vmovdqu 16*10(arg1), \T5
2165
2166 i = 0
2167 j = 1
2168 setreg
2169.rep 8
2170 vpxor 16*i(arg3, %r11), \T5, \T2
2171 .if \ENC_DEC == ENC
2172 vaesenclast \T2, reg_j, reg_j
2173 .else
2174 vaesenclast \T2, reg_j, \T3
2175 vmovdqu 16*i(arg3, %r11), reg_j
2176 vmovdqu \T3, 16*i(arg2, %r11)
2177 .endif
2178 i = (i+1)
2179 j = (j+1)
2180 setreg
2181.endr
2182 #######################################################################
2183
2184
2185 vpslldq $8, \T6, \T3 # shift-L T3 2 DWs
2186 vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs
2187 vpxor \T3, \T7, \T7
2188 vpxor \T6, \T1, \T1 # accumulate the results in T1:T7
2189
2190
2191
2192 #######################################################################
2193 #first phase of the reduction
2194 vmovdqa POLY2(%rip), \T3
2195
2196 vpclmulqdq $0x01, \T7, \T3, \T2
2197 vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs
2198
2199 vpxor \T2, \T7, \T7 # first phase of the reduction complete
2200 #######################################################################
2201 .if \ENC_DEC == ENC
2202 vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
2203 vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
2204 vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
2205 vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
2206 vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
2207 vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
2208 vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
2209 vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
2210 .endif
2211
2212 #######################################################################
2213 #second phase of the reduction
2214 vpclmulqdq $0x00, \T7, \T3, \T2
2215 vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
2216
2217 vpclmulqdq $0x10, \T7, \T3, \T4
2218 vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
2219
2220 vpxor \T2, \T4, \T4 # second phase of the reduction complete
2221 #######################################################################
2222 vpxor \T4, \T1, \T1 # the result is in T1
2223
2224 vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap
2225 vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap
2226 vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap
2227 vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap
2228 vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap
2229 vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap
2230 vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap
2231 vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap
2232
2233
2234 vpxor \T1, \XMM1, \XMM1
2235
2236
2237
2238.endm
2239
2240
2241# GHASH the last 4 ciphertext blocks.
2242.macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
2243
2244 ## Karatsuba Method
2245
2246 vmovdqa HashKey_8(arg1), \T5
2247
2248 vpshufd $0b01001110, \XMM1, \T2
2249 vpshufd $0b01001110, \T5, \T3
2250 vpxor \XMM1, \T2, \T2
2251 vpxor \T5, \T3, \T3
2252
2253 vpclmulqdq $0x11, \T5, \XMM1, \T6
2254 vpclmulqdq $0x00, \T5, \XMM1, \T7
2255
2256 vpclmulqdq $0x00, \T3, \T2, \XMM1
2257
2258 ######################
2259
2260 vmovdqa HashKey_7(arg1), \T5
2261 vpshufd $0b01001110, \XMM2, \T2
2262 vpshufd $0b01001110, \T5, \T3
2263 vpxor \XMM2, \T2, \T2
2264 vpxor \T5, \T3, \T3
2265
2266 vpclmulqdq $0x11, \T5, \XMM2, \T4
2267 vpxor \T4, \T6, \T6
2268
2269 vpclmulqdq $0x00, \T5, \XMM2, \T4
2270 vpxor \T4, \T7, \T7
2271
2272 vpclmulqdq $0x00, \T3, \T2, \T2
2273
2274 vpxor \T2, \XMM1, \XMM1
2275
2276 ######################
2277
2278 vmovdqa HashKey_6(arg1), \T5
2279 vpshufd $0b01001110, \XMM3, \T2
2280 vpshufd $0b01001110, \T5, \T3
2281 vpxor \XMM3, \T2, \T2
2282 vpxor \T5, \T3, \T3
2283
2284 vpclmulqdq $0x11, \T5, \XMM3, \T4
2285 vpxor \T4, \T6, \T6
2286
2287 vpclmulqdq $0x00, \T5, \XMM3, \T4
2288 vpxor \T4, \T7, \T7
2289
2290 vpclmulqdq $0x00, \T3, \T2, \T2
2291
2292 vpxor \T2, \XMM1, \XMM1
2293
2294 ######################
2295
2296 vmovdqa HashKey_5(arg1), \T5
2297 vpshufd $0b01001110, \XMM4, \T2
2298 vpshufd $0b01001110, \T5, \T3
2299 vpxor \XMM4, \T2, \T2
2300 vpxor \T5, \T3, \T3
2301
2302 vpclmulqdq $0x11, \T5, \XMM4, \T4
2303 vpxor \T4, \T6, \T6
2304
2305 vpclmulqdq $0x00, \T5, \XMM4, \T4
2306 vpxor \T4, \T7, \T7
2307
2308 vpclmulqdq $0x00, \T3, \T2, \T2
2309
2310 vpxor \T2, \XMM1, \XMM1
2311
2312 ######################
2313
2314 vmovdqa HashKey_4(arg1), \T5
2315 vpshufd $0b01001110, \XMM5, \T2
2316 vpshufd $0b01001110, \T5, \T3
2317 vpxor \XMM5, \T2, \T2
2318 vpxor \T5, \T3, \T3
2319
2320 vpclmulqdq $0x11, \T5, \XMM5, \T4
2321 vpxor \T4, \T6, \T6
2322
2323 vpclmulqdq $0x00, \T5, \XMM5, \T4
2324 vpxor \T4, \T7, \T7
2325
2326 vpclmulqdq $0x00, \T3, \T2, \T2
2327
2328 vpxor \T2, \XMM1, \XMM1
2329
2330 ######################
2331
2332 vmovdqa HashKey_3(arg1), \T5
2333 vpshufd $0b01001110, \XMM6, \T2
2334 vpshufd $0b01001110, \T5, \T3
2335 vpxor \XMM6, \T2, \T2
2336 vpxor \T5, \T3, \T3
2337
2338 vpclmulqdq $0x11, \T5, \XMM6, \T4
2339 vpxor \T4, \T6, \T6
2340
2341 vpclmulqdq $0x00, \T5, \XMM6, \T4
2342 vpxor \T4, \T7, \T7
2343
2344 vpclmulqdq $0x00, \T3, \T2, \T2
2345
2346 vpxor \T2, \XMM1, \XMM1
2347
2348 ######################
2349
2350 vmovdqa HashKey_2(arg1), \T5
2351 vpshufd $0b01001110, \XMM7, \T2
2352 vpshufd $0b01001110, \T5, \T3
2353 vpxor \XMM7, \T2, \T2
2354 vpxor \T5, \T3, \T3
2355
2356 vpclmulqdq $0x11, \T5, \XMM7, \T4
2357 vpxor \T4, \T6, \T6
2358
2359 vpclmulqdq $0x00, \T5, \XMM7, \T4
2360 vpxor \T4, \T7, \T7
2361
2362 vpclmulqdq $0x00, \T3, \T2, \T2
2363
2364 vpxor \T2, \XMM1, \XMM1
2365
2366 ######################
2367
2368 vmovdqa HashKey(arg1), \T5
2369 vpshufd $0b01001110, \XMM8, \T2
2370 vpshufd $0b01001110, \T5, \T3
2371 vpxor \XMM8, \T2, \T2
2372 vpxor \T5, \T3, \T3
2373
2374 vpclmulqdq $0x11, \T5, \XMM8, \T4
2375 vpxor \T4, \T6, \T6
2376
2377 vpclmulqdq $0x00, \T5, \XMM8, \T4
2378 vpxor \T4, \T7, \T7
2379
2380 vpclmulqdq $0x00, \T3, \T2, \T2
2381
2382 vpxor \T2, \XMM1, \XMM1
2383 vpxor \T6, \XMM1, \XMM1
2384 vpxor \T7, \XMM1, \T2
2385
2386
2387
2388
2389 vpslldq $8, \T2, \T4
2390 vpsrldq $8, \T2, \T2
2391
2392 vpxor \T4, \T7, \T7
2393 vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the
2394 # accumulated carry-less multiplications
2395
2396 #######################################################################
2397 #first phase of the reduction
2398 vmovdqa POLY2(%rip), \T3
2399
2400 vpclmulqdq $0x01, \T7, \T3, \T2
2401 vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs
2402
2403 vpxor \T2, \T7, \T7 # first phase of the reduction complete
2404 #######################################################################
2405
2406
2407 #second phase of the reduction
2408 vpclmulqdq $0x00, \T7, \T3, \T2
2409 vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
2410
2411 vpclmulqdq $0x10, \T7, \T3, \T4
2412 vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts)
2413
2414 vpxor \T2, \T4, \T4 # second phase of the reduction complete
2415 #######################################################################
2416 vpxor \T4, \T6, \T6 # the result is in T6
2417.endm
2418
2419
2420
2421# combined for GCM encrypt and decrypt functions
2422# clobbering all xmm registers
2423# clobbering r10, r11, r12, r13, r14, r15
2424.macro GCM_ENC_DEC_AVX2 ENC_DEC
2425
2426 #the number of pushes must equal STACK_OFFSET
2427 push %r12
2428 push %r13
2429 push %r14
2430 push %r15
2431
2432 mov %rsp, %r14
2433
2434
2435
2436
2437 sub $VARIABLE_OFFSET, %rsp
2438 and $~63, %rsp # align rsp to 64 bytes
2439
2440
2441 vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
2442
2443 mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
2444 and $-16, %r13 # r13 = r13 - (r13 mod 16)
2445
2446 mov %r13, %r12
2447 shr $4, %r12
2448 and $7, %r12
2449 jz _initial_num_blocks_is_0\@
2450
2451 cmp $7, %r12
2452 je _initial_num_blocks_is_7\@
2453 cmp $6, %r12
2454 je _initial_num_blocks_is_6\@
2455 cmp $5, %r12
2456 je _initial_num_blocks_is_5\@
2457 cmp $4, %r12
2458 je _initial_num_blocks_is_4\@
2459 cmp $3, %r12
2460 je _initial_num_blocks_is_3\@
2461 cmp $2, %r12
2462 je _initial_num_blocks_is_2\@
2463
2464 jmp _initial_num_blocks_is_1\@
2465
2466_initial_num_blocks_is_7\@:
2467 INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2468 sub $16*7, %r13
2469 jmp _initial_blocks_encrypted\@
2470
2471_initial_num_blocks_is_6\@:
2472 INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2473 sub $16*6, %r13
2474 jmp _initial_blocks_encrypted\@
2475
2476_initial_num_blocks_is_5\@:
2477 INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2478 sub $16*5, %r13
2479 jmp _initial_blocks_encrypted\@
2480
2481_initial_num_blocks_is_4\@:
2482 INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2483 sub $16*4, %r13
2484 jmp _initial_blocks_encrypted\@
2485
2486_initial_num_blocks_is_3\@:
2487 INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2488 sub $16*3, %r13
2489 jmp _initial_blocks_encrypted\@
2490
2491_initial_num_blocks_is_2\@:
2492 INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2493 sub $16*2, %r13
2494 jmp _initial_blocks_encrypted\@
2495
2496_initial_num_blocks_is_1\@:
2497 INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2498 sub $16*1, %r13
2499 jmp _initial_blocks_encrypted\@
2500
2501_initial_num_blocks_is_0\@:
2502 INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
2503
2504
2505_initial_blocks_encrypted\@:
2506 cmp $0, %r13
2507 je _zero_cipher_left\@
2508
2509 sub $128, %r13
2510 je _eight_cipher_left\@
2511
2512
2513
2514
2515 vmovd %xmm9, %r15d
2516 and $255, %r15d
2517 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2518
2519
2520_encrypt_by_8_new\@:
2521 cmp $(255-8), %r15d
2522 jg _encrypt_by_8\@
2523
2524
2525
2526 add $8, %r15b
2527 GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
2528 add $128, %r11
2529 sub $128, %r13
2530 jne _encrypt_by_8_new\@
2531
2532 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2533 jmp _eight_cipher_left\@
2534
2535_encrypt_by_8\@:
2536 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2537 add $8, %r15b
2538 GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
2539 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2540 add $128, %r11
2541 sub $128, %r13
2542 jne _encrypt_by_8_new\@
2543
2544 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2545
2546
2547
2548
2549_eight_cipher_left\@:
2550 GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
2551
2552
2553_zero_cipher_left\@:
2554 cmp $16, arg4
2555 jl _only_less_than_16\@
2556
2557 mov arg4, %r13
2558 and $15, %r13 # r13 = (arg4 mod 16)
2559
2560 je _multiple_of_16_bytes\@
2561
2562 # handle the last <16 Byte block seperately
2563
2564
2565 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
2566 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2567 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
2568
2569 sub $16, %r11
2570 add %r13, %r11
2571 vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
2572
2573 lea SHIFT_MASK+16(%rip), %r12
2574 sub %r13, %r12 # adjust the shuffle mask pointer
2575 # to be able to shift 16-r13 bytes
2576 # (r13 is the number of bytes in plaintext mod 16)
2577 vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
2578 vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
2579 jmp _final_ghash_mul\@
2580
2581_only_less_than_16\@:
2582 # check for 0 length
2583 mov arg4, %r13
2584 and $15, %r13 # r13 = (arg4 mod 16)
2585
2586 je _multiple_of_16_bytes\@
2587
2588 # handle the last <16 Byte block seperately
2589
2590
2591 vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
2592 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2593 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
2594
2595
2596 lea SHIFT_MASK+16(%rip), %r12
2597 sub %r13, %r12 # adjust the shuffle mask pointer to be
2598 # able to shift 16-r13 bytes (r13 is the
2599 # number of bytes in plaintext mod 16)
2600
2601_get_last_16_byte_loop\@:
2602 movb (arg3, %r11), %al
2603 movb %al, TMP1 (%rsp , %r11)
2604 add $1, %r11
2605 cmp %r13, %r11
2606 jne _get_last_16_byte_loop\@
2607
2608 vmovdqu TMP1(%rsp), %xmm1
2609
2610 sub $16, %r11
2611
2612_final_ghash_mul\@:
2613 .if \ENC_DEC == DEC
2614 vmovdqa %xmm1, %xmm2
2615 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
2616 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
2617 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
2618 vpand %xmm1, %xmm2, %xmm2
2619 vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
2620 vpxor %xmm2, %xmm14, %xmm14
2621 #GHASH computation for the last <16 Byte block
2622 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
2623 sub %r13, %r11
2624 add $16, %r11
2625 .else
2626 vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
2627 vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
2628 vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
2629 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
2630 vpxor %xmm9, %xmm14, %xmm14
2631 #GHASH computation for the last <16 Byte block
2632 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
2633 sub %r13, %r11
2634 add $16, %r11
2635 vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
2636 .endif
2637
2638
2639 #############################
2640 # output r13 Bytes
2641 vmovq %xmm9, %rax
2642 cmp $8, %r13
2643 jle _less_than_8_bytes_left\@
2644
2645 mov %rax, (arg2 , %r11)
2646 add $8, %r11
2647 vpsrldq $8, %xmm9, %xmm9
2648 vmovq %xmm9, %rax
2649 sub $8, %r13
2650
2651_less_than_8_bytes_left\@:
2652 movb %al, (arg2 , %r11)
2653 add $1, %r11
2654 shr $8, %rax
2655 sub $1, %r13
2656 jne _less_than_8_bytes_left\@
2657 #############################
2658
2659_multiple_of_16_bytes\@:
2660 mov arg7, %r12 # r12 = aadLen (number of bytes)
2661 shl $3, %r12 # convert into number of bits
2662 vmovd %r12d, %xmm15 # len(A) in xmm15
2663
2664 shl $3, arg4 # len(C) in bits (*128)
2665 vmovq arg4, %xmm1
2666 vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
2667 vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
2668
2669 vpxor %xmm15, %xmm14, %xmm14
2670 GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
2671 vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
2672
2673 mov arg5, %rax # rax = *Y0
2674 vmovdqu (%rax), %xmm9 # xmm9 = Y0
2675
2676 ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
2677
2678 vpxor %xmm14, %xmm9, %xmm9
2679
2680
2681
2682_return_T\@:
2683 mov arg8, %r10 # r10 = authTag
2684 mov arg9, %r11 # r11 = auth_tag_len
2685
2686 cmp $16, %r11
2687 je _T_16\@
2688
2689 cmp $12, %r11
2690 je _T_12\@
2691
2692_T_8\@:
2693 vmovq %xmm9, %rax
2694 mov %rax, (%r10)
2695 jmp _return_T_done\@
2696_T_12\@:
2697 vmovq %xmm9, %rax
2698 mov %rax, (%r10)
2699 vpsrldq $8, %xmm9, %xmm9
2700 vmovd %xmm9, %eax
2701 mov %eax, 8(%r10)
2702 jmp _return_T_done\@
2703
2704_T_16\@:
2705 vmovdqu %xmm9, (%r10)
2706
2707_return_T_done\@:
2708 mov %r14, %rsp
2709
2710 pop %r15
2711 pop %r14
2712 pop %r13
2713 pop %r12
2714.endm
2715
2716
2717#############################################################
2718#void aesni_gcm_precomp_avx_gen4
2719# (gcm_data *my_ctx_data,
2720# u8 *hash_subkey)# /* H, the Hash sub key input.
2721# Data starts on a 16-byte boundary. */
2722#############################################################
2723ENTRY(aesni_gcm_precomp_avx_gen4)
2724 #the number of pushes must equal STACK_OFFSET
2725 push %r12
2726 push %r13
2727 push %r14
2728 push %r15
2729
2730 mov %rsp, %r14
2731
2732
2733
2734 sub $VARIABLE_OFFSET, %rsp
2735 and $~63, %rsp # align rsp to 64 bytes
2736
2737 vmovdqu (arg2), %xmm6 # xmm6 = HashKey
2738
2739 vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
2740 ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
2741 vmovdqa %xmm6, %xmm2
2742 vpsllq $1, %xmm6, %xmm6
2743 vpsrlq $63, %xmm2, %xmm2
2744 vmovdqa %xmm2, %xmm1
2745 vpslldq $8, %xmm2, %xmm2
2746 vpsrldq $8, %xmm1, %xmm1
2747 vpor %xmm2, %xmm6, %xmm6
2748 #reduction
2749 vpshufd $0b00100100, %xmm1, %xmm2
2750 vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
2751 vpand POLY(%rip), %xmm2, %xmm2
2752 vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
2753 #######################################################################
2754 vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
2755
2756
2757 PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
2758
2759 mov %r14, %rsp
2760
2761 pop %r15
2762 pop %r14
2763 pop %r13
2764 pop %r12
2765 ret
2766ENDPROC(aesni_gcm_precomp_avx_gen4)
2767
2768
2769###############################################################################
2770#void aesni_gcm_enc_avx_gen4(
2771# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
2772# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
2773# const u8 *in, /* Plaintext input */
2774# u64 plaintext_len, /* Length of data in Bytes for encryption. */
2775# u8 *iv, /* Pre-counter block j0: 4 byte salt
2776# (from Security Association) concatenated with 8 byte
2777# Initialisation Vector (from IPSec ESP Payload)
2778# concatenated with 0x00000001. 16-byte aligned pointer. */
2779# const u8 *aad, /* Additional Authentication Data (AAD)*/
2780# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
2781# u8 *auth_tag, /* Authenticated Tag output. */
2782# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
2783# Valid values are 16 (most likely), 12 or 8. */
2784###############################################################################
2785ENTRY(aesni_gcm_enc_avx_gen4)
2786 GCM_ENC_DEC_AVX2 ENC
2787 ret
2788ENDPROC(aesni_gcm_enc_avx_gen4)
2789
2790###############################################################################
2791#void aesni_gcm_dec_avx_gen4(
2792# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
2793# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
2794# const u8 *in, /* Ciphertext input */
2795# u64 plaintext_len, /* Length of data in Bytes for encryption. */
2796# u8 *iv, /* Pre-counter block j0: 4 byte salt
2797# (from Security Association) concatenated with 8 byte
2798# Initialisation Vector (from IPSec ESP Payload)
2799# concatenated with 0x00000001. 16-byte aligned pointer. */
2800# const u8 *aad, /* Additional Authentication Data (AAD)*/
2801# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
2802# u8 *auth_tag, /* Authenticated Tag output. */
2803# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
2804# Valid values are 16 (most likely), 12 or 8. */
2805###############################################################################
2806ENTRY(aesni_gcm_dec_avx_gen4)
2807 GCM_ENC_DEC_AVX2 DEC
2808 ret
2809ENDPROC(aesni_gcm_dec_avx_gen4)
2810
2811#endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 835488b745ee..948ad0e77741 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
101int crypto_fpu_init(void); 101int crypto_fpu_init(void);
102void crypto_fpu_exit(void); 102void crypto_fpu_exit(void);
103 103
104#define AVX_GEN2_OPTSIZE 640
105#define AVX_GEN4_OPTSIZE 4096
106
104#ifdef CONFIG_X86_64 107#ifdef CONFIG_X86_64
105asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 108asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
106 const u8 *in, unsigned int len, u8 *iv); 109 const u8 *in, unsigned int len, u8 *iv);
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
150 u8 *hash_subkey, const u8 *aad, unsigned long aad_len, 153 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
151 u8 *auth_tag, unsigned long auth_tag_len); 154 u8 *auth_tag, unsigned long auth_tag_len);
152 155
156
157#ifdef CONFIG_AS_AVX
158/*
159 * asmlinkage void aesni_gcm_precomp_avx_gen2()
160 * gcm_data *my_ctx_data, context data
161 * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
162 */
163asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
164
165asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
166 const u8 *in, unsigned long plaintext_len, u8 *iv,
167 const u8 *aad, unsigned long aad_len,
168 u8 *auth_tag, unsigned long auth_tag_len);
169
170asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
171 const u8 *in, unsigned long ciphertext_len, u8 *iv,
172 const u8 *aad, unsigned long aad_len,
173 u8 *auth_tag, unsigned long auth_tag_len);
174
175static void aesni_gcm_enc_avx(void *ctx, u8 *out,
176 const u8 *in, unsigned long plaintext_len, u8 *iv,
177 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
178 u8 *auth_tag, unsigned long auth_tag_len)
179{
180 if (plaintext_len < AVX_GEN2_OPTSIZE) {
181 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
182 aad_len, auth_tag, auth_tag_len);
183 } else {
184 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
185 aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
186 aad_len, auth_tag, auth_tag_len);
187 }
188}
189
190static void aesni_gcm_dec_avx(void *ctx, u8 *out,
191 const u8 *in, unsigned long ciphertext_len, u8 *iv,
192 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
193 u8 *auth_tag, unsigned long auth_tag_len)
194{
195 if (ciphertext_len < AVX_GEN2_OPTSIZE) {
196 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
197 aad_len, auth_tag, auth_tag_len);
198 } else {
199 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
200 aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
201 aad_len, auth_tag, auth_tag_len);
202 }
203}
204#endif
205
206#ifdef CONFIG_AS_AVX2
207/*
208 * asmlinkage void aesni_gcm_precomp_avx_gen4()
209 * gcm_data *my_ctx_data, context data
210 * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
211 */
212asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
213
214asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
215 const u8 *in, unsigned long plaintext_len, u8 *iv,
216 const u8 *aad, unsigned long aad_len,
217 u8 *auth_tag, unsigned long auth_tag_len);
218
219asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
220 const u8 *in, unsigned long ciphertext_len, u8 *iv,
221 const u8 *aad, unsigned long aad_len,
222 u8 *auth_tag, unsigned long auth_tag_len);
223
224static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
225 const u8 *in, unsigned long plaintext_len, u8 *iv,
226 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
227 u8 *auth_tag, unsigned long auth_tag_len)
228{
229 if (plaintext_len < AVX_GEN2_OPTSIZE) {
230 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
231 aad_len, auth_tag, auth_tag_len);
232 } else if (plaintext_len < AVX_GEN4_OPTSIZE) {
233 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
234 aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
235 aad_len, auth_tag, auth_tag_len);
236 } else {
237 aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
238 aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
239 aad_len, auth_tag, auth_tag_len);
240 }
241}
242
243static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
244 const u8 *in, unsigned long ciphertext_len, u8 *iv,
245 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
246 u8 *auth_tag, unsigned long auth_tag_len)
247{
248 if (ciphertext_len < AVX_GEN2_OPTSIZE) {
249 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
250 aad, aad_len, auth_tag, auth_tag_len);
251 } else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
252 aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
253 aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
254 aad_len, auth_tag, auth_tag_len);
255 } else {
256 aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
257 aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
258 aad_len, auth_tag, auth_tag_len);
259 }
260}
261#endif
262
263static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
264 const u8 *in, unsigned long plaintext_len, u8 *iv,
265 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
266 u8 *auth_tag, unsigned long auth_tag_len);
267
268static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
269 const u8 *in, unsigned long ciphertext_len, u8 *iv,
270 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
271 u8 *auth_tag, unsigned long auth_tag_len);
272
153static inline struct 273static inline struct
154aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) 274aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
155{ 275{
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
915 dst = src; 1035 dst = src;
916 } 1036 }
917 1037
918 aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, 1038 aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
919 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst 1039 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
920 + ((unsigned long)req->cryptlen), auth_tag_len); 1040 + ((unsigned long)req->cryptlen), auth_tag_len);
921 1041
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
996 dst = src; 1116 dst = src;
997 } 1117 }
998 1118
999 aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, 1119 aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
1000 ctx->hash_subkey, assoc, (unsigned long)req->assoclen, 1120 ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
1001 authTag, auth_tag_len); 1121 authTag, auth_tag_len);
1002 1122
1003 /* Compare generated tag with passed in tag. */ 1123 /* Compare generated tag with passed in tag. */
1004 retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? 1124 retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
1005 -EBADMSG : 0; 1125 -EBADMSG : 0;
1006 1126
1007 if (one_entry_in_sg) { 1127 if (one_entry_in_sg) {
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void)
1353 1473
1354 if (!x86_match_cpu(aesni_cpu_id)) 1474 if (!x86_match_cpu(aesni_cpu_id))
1355 return -ENODEV; 1475 return -ENODEV;
1476#ifdef CONFIG_X86_64
1477#ifdef CONFIG_AS_AVX2
1478 if (boot_cpu_has(X86_FEATURE_AVX2)) {
1479 pr_info("AVX2 version of gcm_enc/dec engaged.\n");
1480 aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
1481 aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
1482 } else
1483#endif
1484#ifdef CONFIG_AS_AVX
1485 if (boot_cpu_has(X86_FEATURE_AVX)) {
1486 pr_info("AVX version of gcm_enc/dec engaged.\n");
1487 aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
1488 aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
1489 } else
1490#endif
1491 {
1492 pr_info("SSE version of gcm_enc/dec engaged.\n");
1493 aesni_gcm_enc_tfm = aesni_gcm_enc;
1494 aesni_gcm_dec_tfm = aesni_gcm_dec;
1495 }
1496#endif
1356 1497
1357 err = crypto_fpu_init(); 1498 err = crypto_fpu_init();
1358 if (err) 1499 if (err)
diff --git a/crypto/Makefile b/crypto/Makefile
index 989c510da8cc..b29402a7b9b5 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -2,11 +2,6 @@
2# Cryptographic API 2# Cryptographic API
3# 3#
4 4
5# memneq MUST be built with -Os or -O0 to prevent early-return optimizations
6# that will defeat memneq's actual purpose to prevent timing attacks.
7CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3
8CFLAGS_memneq.o := -Os
9
10obj-$(CONFIG_CRYPTO) += crypto.o 5obj-$(CONFIG_CRYPTO) += crypto.o
11crypto-y := api.o cipher.o compress.o memneq.o 6crypto-y := api.o cipher.o compress.o memneq.o
12 7
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 793a27f2493e..a92dc382f781 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -213,7 +213,10 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
213 213
214 ahash_op_unaligned_finish(areq, err); 214 ahash_op_unaligned_finish(areq, err);
215 215
216 complete(data, err); 216 areq->base.complete = complete;
217 areq->base.data = data;
218
219 complete(&areq->base, err);
217} 220}
218 221
219static int ahash_op_unaligned(struct ahash_request *req, 222static int ahash_op_unaligned(struct ahash_request *req,
diff --git a/crypto/memneq.c b/crypto/memneq.c
index cd0162221c14..afed1bd16aee 100644
--- a/crypto/memneq.c
+++ b/crypto/memneq.c
@@ -72,6 +72,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
72#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) 72#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
73 while (size >= sizeof(unsigned long)) { 73 while (size >= sizeof(unsigned long)) {
74 neq |= *(unsigned long *)a ^ *(unsigned long *)b; 74 neq |= *(unsigned long *)a ^ *(unsigned long *)b;
75 OPTIMIZER_HIDE_VAR(neq);
75 a += sizeof(unsigned long); 76 a += sizeof(unsigned long);
76 b += sizeof(unsigned long); 77 b += sizeof(unsigned long);
77 size -= sizeof(unsigned long); 78 size -= sizeof(unsigned long);
@@ -79,6 +80,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
79#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ 80#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
80 while (size > 0) { 81 while (size > 0) {
81 neq |= *(unsigned char *)a ^ *(unsigned char *)b; 82 neq |= *(unsigned char *)a ^ *(unsigned char *)b;
83 OPTIMIZER_HIDE_VAR(neq);
82 a += 1; 84 a += 1;
83 b += 1; 85 b += 1;
84 size -= 1; 86 size -= 1;
@@ -89,33 +91,61 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
89/* Loop-free fast-path for frequently used 16-byte size */ 91/* Loop-free fast-path for frequently used 16-byte size */
90static inline unsigned long __crypto_memneq_16(const void *a, const void *b) 92static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
91{ 93{
94 unsigned long neq = 0;
95
92#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 96#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
93 if (sizeof(unsigned long) == 8) 97 if (sizeof(unsigned long) == 8) {
94 return ((*(unsigned long *)(a) ^ *(unsigned long *)(b)) 98 neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
95 | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8))); 99 OPTIMIZER_HIDE_VAR(neq);
96 else if (sizeof(unsigned int) == 4) 100 neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
97 return ((*(unsigned int *)(a) ^ *(unsigned int *)(b)) 101 OPTIMIZER_HIDE_VAR(neq);
98 | (*(unsigned int *)(a+4) ^ *(unsigned int *)(b+4)) 102 } else if (sizeof(unsigned int) == 4) {
99 | (*(unsigned int *)(a+8) ^ *(unsigned int *)(b+8)) 103 neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
100 | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12))); 104 OPTIMIZER_HIDE_VAR(neq);
101 else 105 neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
106 OPTIMIZER_HIDE_VAR(neq);
107 neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
108 OPTIMIZER_HIDE_VAR(neq);
109 neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
110 OPTIMIZER_HIDE_VAR(neq);
111 } else
102#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ 112#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
103 return ((*(unsigned char *)(a) ^ *(unsigned char *)(b)) 113 {
104 | (*(unsigned char *)(a+1) ^ *(unsigned char *)(b+1)) 114 neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
105 | (*(unsigned char *)(a+2) ^ *(unsigned char *)(b+2)) 115 OPTIMIZER_HIDE_VAR(neq);
106 | (*(unsigned char *)(a+3) ^ *(unsigned char *)(b+3)) 116 neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
107 | (*(unsigned char *)(a+4) ^ *(unsigned char *)(b+4)) 117 OPTIMIZER_HIDE_VAR(neq);
108 | (*(unsigned char *)(a+5) ^ *(unsigned char *)(b+5)) 118 neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
109 | (*(unsigned char *)(a+6) ^ *(unsigned char *)(b+6)) 119 OPTIMIZER_HIDE_VAR(neq);
110 | (*(unsigned char *)(a+7) ^ *(unsigned char *)(b+7)) 120 neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
111 | (*(unsigned char *)(a+8) ^ *(unsigned char *)(b+8)) 121 OPTIMIZER_HIDE_VAR(neq);
112 | (*(unsigned char *)(a+9) ^ *(unsigned char *)(b+9)) 122 neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
113 | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10)) 123 OPTIMIZER_HIDE_VAR(neq);
114 | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11)) 124 neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
115 | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12)) 125 OPTIMIZER_HIDE_VAR(neq);
116 | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13)) 126 neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
117 | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14)) 127 OPTIMIZER_HIDE_VAR(neq);
118 | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15))); 128 neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
129 OPTIMIZER_HIDE_VAR(neq);
130 neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
131 OPTIMIZER_HIDE_VAR(neq);
132 neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
133 OPTIMIZER_HIDE_VAR(neq);
134 neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
135 OPTIMIZER_HIDE_VAR(neq);
136 neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
137 OPTIMIZER_HIDE_VAR(neq);
138 neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
139 OPTIMIZER_HIDE_VAR(neq);
140 neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
141 OPTIMIZER_HIDE_VAR(neq);
142 neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
143 OPTIMIZER_HIDE_VAR(neq);
144 neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
145 OPTIMIZER_HIDE_VAR(neq);
146 }
147
148 return neq;
119} 149}
120 150
121/* Compare two areas of memory without leaking timing information, 151/* Compare two areas of memory without leaking timing information,
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index f8c920cafe63..309d345ead95 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -78,7 +78,7 @@ static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
78 cpu = *cb_cpu; 78 cpu = *cb_cpu;
79 79
80 rcu_read_lock_bh(); 80 rcu_read_lock_bh();
81 cpumask = rcu_dereference(pcrypt->cb_cpumask); 81 cpumask = rcu_dereference_bh(pcrypt->cb_cpumask);
82 if (cpumask_test_cpu(cpu, cpumask->mask)) 82 if (cpumask_test_cpu(cpu, cpumask->mask))
83 goto out; 83 goto out;
84 84
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 001f07cdb828..0d9003ae8c61 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -137,7 +137,272 @@ out:
137 return ret; 137 return ret;
138} 138}
139 139
140static int test_aead_jiffies(struct aead_request *req, int enc,
141 int blen, int sec)
142{
143 unsigned long start, end;
144 int bcount;
145 int ret;
146
147 for (start = jiffies, end = start + sec * HZ, bcount = 0;
148 time_before(jiffies, end); bcount++) {
149 if (enc)
150 ret = crypto_aead_encrypt(req);
151 else
152 ret = crypto_aead_decrypt(req);
153
154 if (ret)
155 return ret;
156 }
157
158 printk("%d operations in %d seconds (%ld bytes)\n",
159 bcount, sec, (long)bcount * blen);
160 return 0;
161}
162
163static int test_aead_cycles(struct aead_request *req, int enc, int blen)
164{
165 unsigned long cycles = 0;
166 int ret = 0;
167 int i;
168
169 local_irq_disable();
170
171 /* Warm-up run. */
172 for (i = 0; i < 4; i++) {
173 if (enc)
174 ret = crypto_aead_encrypt(req);
175 else
176 ret = crypto_aead_decrypt(req);
177
178 if (ret)
179 goto out;
180 }
181
182 /* The real thing. */
183 for (i = 0; i < 8; i++) {
184 cycles_t start, end;
185
186 start = get_cycles();
187 if (enc)
188 ret = crypto_aead_encrypt(req);
189 else
190 ret = crypto_aead_decrypt(req);
191 end = get_cycles();
192
193 if (ret)
194 goto out;
195
196 cycles += end - start;
197 }
198
199out:
200 local_irq_enable();
201
202 if (ret == 0)
203 printk("1 operation in %lu cycles (%d bytes)\n",
204 (cycles + 4) / 8, blen);
205
206 return ret;
207}
208
140static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; 209static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
210static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
211
212#define XBUFSIZE 8
213#define MAX_IVLEN 32
214
215static int testmgr_alloc_buf(char *buf[XBUFSIZE])
216{
217 int i;
218
219 for (i = 0; i < XBUFSIZE; i++) {
220 buf[i] = (void *)__get_free_page(GFP_KERNEL);
221 if (!buf[i])
222 goto err_free_buf;
223 }
224
225 return 0;
226
227err_free_buf:
228 while (i-- > 0)
229 free_page((unsigned long)buf[i]);
230
231 return -ENOMEM;
232}
233
234static void testmgr_free_buf(char *buf[XBUFSIZE])
235{
236 int i;
237
238 for (i = 0; i < XBUFSIZE; i++)
239 free_page((unsigned long)buf[i]);
240}
241
242static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
243 unsigned int buflen)
244{
245 int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
246 int k, rem;
247
248 np = (np > XBUFSIZE) ? XBUFSIZE : np;
249 rem = buflen % PAGE_SIZE;
250 if (np > XBUFSIZE) {
251 rem = PAGE_SIZE;
252 np = XBUFSIZE;
253 }
254 sg_init_table(sg, np);
255 for (k = 0; k < np; ++k) {
256 if (k == (np-1))
257 sg_set_buf(&sg[k], xbuf[k], rem);
258 else
259 sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE);
260 }
261}
262
263static void test_aead_speed(const char *algo, int enc, unsigned int sec,
264 struct aead_speed_template *template,
265 unsigned int tcount, u8 authsize,
266 unsigned int aad_size, u8 *keysize)
267{
268 unsigned int i, j;
269 struct crypto_aead *tfm;
270 int ret = -ENOMEM;
271 const char *key;
272 struct aead_request *req;
273 struct scatterlist *sg;
274 struct scatterlist *asg;
275 struct scatterlist *sgout;
276 const char *e;
277 void *assoc;
278 char iv[MAX_IVLEN];
279 char *xbuf[XBUFSIZE];
280 char *xoutbuf[XBUFSIZE];
281 char *axbuf[XBUFSIZE];
282 unsigned int *b_size;
283 unsigned int iv_len;
284
285 if (enc == ENCRYPT)
286 e = "encryption";
287 else
288 e = "decryption";
289
290 if (testmgr_alloc_buf(xbuf))
291 goto out_noxbuf;
292 if (testmgr_alloc_buf(axbuf))
293 goto out_noaxbuf;
294 if (testmgr_alloc_buf(xoutbuf))
295 goto out_nooutbuf;
296
297 sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL);
298 if (!sg)
299 goto out_nosg;
300 asg = &sg[8];
301 sgout = &asg[8];
302
303
304 printk(KERN_INFO "\ntesting speed of %s %s\n", algo, e);
305
306 tfm = crypto_alloc_aead(algo, 0, 0);
307
308 if (IS_ERR(tfm)) {
309 pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
310 PTR_ERR(tfm));
311 return;
312 }
313
314 req = aead_request_alloc(tfm, GFP_KERNEL);
315 if (!req) {
316 pr_err("alg: aead: Failed to allocate request for %s\n",
317 algo);
318 goto out;
319 }
320
321 i = 0;
322 do {
323 b_size = aead_sizes;
324 do {
325 assoc = axbuf[0];
326
327 if (aad_size < PAGE_SIZE)
328 memset(assoc, 0xff, aad_size);
329 else {
330 pr_err("associate data length (%u) too big\n",
331 aad_size);
332 goto out_nosg;
333 }
334 sg_init_one(&asg[0], assoc, aad_size);
335
336 if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) {
337 pr_err("template (%u) too big for tvmem (%lu)\n",
338 *keysize + *b_size,
339 TVMEMSIZE * PAGE_SIZE);
340 goto out;
341 }
342
343 key = tvmem[0];
344 for (j = 0; j < tcount; j++) {
345 if (template[j].klen == *keysize) {
346 key = template[j].key;
347 break;
348 }
349 }
350 ret = crypto_aead_setkey(tfm, key, *keysize);
351 ret = crypto_aead_setauthsize(tfm, authsize);
352
353 iv_len = crypto_aead_ivsize(tfm);
354 if (iv_len)
355 memset(&iv, 0xff, iv_len);
356
357 crypto_aead_clear_flags(tfm, ~0);
358 printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
359 i, *keysize * 8, *b_size);
360
361
362 memset(tvmem[0], 0xff, PAGE_SIZE);
363
364 if (ret) {
365 pr_err("setkey() failed flags=%x\n",
366 crypto_aead_get_flags(tfm));
367 goto out;
368 }
369
370 sg_init_aead(&sg[0], xbuf,
371 *b_size + (enc ? authsize : 0));
372
373 sg_init_aead(&sgout[0], xoutbuf,
374 *b_size + (enc ? authsize : 0));
375
376 aead_request_set_crypt(req, sg, sgout, *b_size, iv);
377 aead_request_set_assoc(req, asg, aad_size);
378
379 if (sec)
380 ret = test_aead_jiffies(req, enc, *b_size, sec);
381 else
382 ret = test_aead_cycles(req, enc, *b_size);
383
384 if (ret) {
385 pr_err("%s() failed return code=%d\n", e, ret);
386 break;
387 }
388 b_size++;
389 i++;
390 } while (*b_size);
391 keysize++;
392 } while (*keysize);
393
394out:
395 crypto_free_aead(tfm);
396 kfree(sg);
397out_nosg:
398 testmgr_free_buf(xoutbuf);
399out_nooutbuf:
400 testmgr_free_buf(axbuf);
401out_noaxbuf:
402 testmgr_free_buf(xbuf);
403out_noxbuf:
404 return;
405}
141 406
142static void test_cipher_speed(const char *algo, int enc, unsigned int sec, 407static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
143 struct cipher_speed_template *template, 408 struct cipher_speed_template *template,
@@ -1427,6 +1692,11 @@ static int do_test(int m)
1427 speed_template_32_64); 1692 speed_template_32_64);
1428 break; 1693 break;
1429 1694
1695 case 211:
1696 test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec,
1697 NULL, 0, 16, 8, aead_speed_template_20);
1698 break;
1699
1430 case 300: 1700 case 300:
1431 /* fall through */ 1701 /* fall through */
1432 1702
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index ecdeeb1a7b05..6c7e21a09f78 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -22,6 +22,11 @@ struct cipher_speed_template {
22 unsigned int klen; 22 unsigned int klen;
23}; 23};
24 24
25struct aead_speed_template {
26 const char *key;
27 unsigned int klen;
28};
29
25struct hash_speed { 30struct hash_speed {
26 unsigned int blen; /* buffer length */ 31 unsigned int blen; /* buffer length */
27 unsigned int plen; /* per-update length */ 32 unsigned int plen; /* per-update length */
@@ -58,6 +63,11 @@ static u8 speed_template_32_48_64[] = {32, 48, 64, 0};
58static u8 speed_template_32_64[] = {32, 64, 0}; 63static u8 speed_template_32_64[] = {32, 64, 0};
59 64
60/* 65/*
66 * AEAD speed tests
67 */
68static u8 aead_speed_template_20[] = {20, 0};
69
70/*
61 * Digest speed tests 71 * Digest speed tests
62 */ 72 */
63static struct hash_speed generic_hash_speed_template[] = { 73static struct hash_speed generic_hash_speed_template[] = {
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index f4fd837bcb82..13857f5d28f7 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -289,16 +289,6 @@ config CRYPTO_DEV_SAHARA
289 This option enables support for the SAHARA HW crypto accelerator 289 This option enables support for the SAHARA HW crypto accelerator
290 found in some Freescale i.MX chips. 290 found in some Freescale i.MX chips.
291 291
292config CRYPTO_DEV_DCP
293 tristate "Support for the DCP engine"
294 depends on ARCH_MXS && OF
295 select CRYPTO_BLKCIPHER
296 select CRYPTO_AES
297 select CRYPTO_CBC
298 help
299 This options enables support for the hardware crypto-acceleration
300 capabilities of the DCP co-processor
301
302config CRYPTO_DEV_S5P 292config CRYPTO_DEV_S5P
303 tristate "Support for Samsung S5PV210 crypto accelerator" 293 tristate "Support for Samsung S5PV210 crypto accelerator"
304 depends on ARCH_S5PV210 294 depends on ARCH_S5PV210
@@ -399,4 +389,33 @@ config CRYPTO_DEV_ATMEL_SHA
399 To compile this driver as a module, choose M here: the module 389 To compile this driver as a module, choose M here: the module
400 will be called atmel-sha. 390 will be called atmel-sha.
401 391
392config CRYPTO_DEV_CCP
393 bool "Support for AMD Cryptographic Coprocessor"
394 depends on X86 && PCI
395 default n
396 help
397 The AMD Cryptographic Coprocessor provides hardware support
398 for encryption, hashing and related operations.
399
400if CRYPTO_DEV_CCP
401 source "drivers/crypto/ccp/Kconfig"
402endif
403
404config CRYPTO_DEV_MXS_DCP
405 tristate "Support for Freescale MXS DCP"
406 depends on ARCH_MXS
407 select CRYPTO_SHA1
408 select CRYPTO_SHA256
409 select CRYPTO_CBC
410 select CRYPTO_ECB
411 select CRYPTO_AES
412 select CRYPTO_BLKCIPHER
413 select CRYPTO_ALGAPI
414 help
415 The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB
416 co-processor on the die.
417
418 To compile this driver as a module, choose M here: the module
419 will be called mxs-dcp.
420
402endif # CRYPTO_HW 421endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index b4946ddd2550..0bc6aa0a54d7 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,24 +1,25 @@
1obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o 1obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
2obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o 2obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
3obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
4obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
5obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
6obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
3obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o 7obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
4obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
5n2_crypto-y := n2_core.o n2_asm.o
6obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o 8obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
7obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
8obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
9obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
10obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o 9obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
11obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ 10obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
12obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o 11obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
12obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
13n2_crypto-y := n2_core.o n2_asm.o
14obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
13obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o 15obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
16obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
17obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
18obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
14obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o 19obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
15obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o 20obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
16obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o
17obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o 21obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
22obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
23obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
18obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o 24obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
19obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ 25obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
20obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
21obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
22obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
23obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
24obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index efaf6302405f..37f9cc98ba17 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -724,7 +724,6 @@ static void crypto4xx_stop_all(struct crypto4xx_core_device *core_dev)
724 crypto4xx_destroy_pdr(core_dev->dev); 724 crypto4xx_destroy_pdr(core_dev->dev);
725 crypto4xx_destroy_gdr(core_dev->dev); 725 crypto4xx_destroy_gdr(core_dev->dev);
726 crypto4xx_destroy_sdr(core_dev->dev); 726 crypto4xx_destroy_sdr(core_dev->dev);
727 dev_set_drvdata(core_dev->device, NULL);
728 iounmap(core_dev->dev->ce_base); 727 iounmap(core_dev->dev->ce_base);
729 kfree(core_dev->dev); 728 kfree(core_dev->dev);
730 kfree(core_dev); 729 kfree(core_dev);
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 4cf5dec826e1..b71f2fd749df 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -467,24 +467,10 @@ static int aead_setkey(struct crypto_aead *aead,
467 static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; 467 static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
468 struct caam_ctx *ctx = crypto_aead_ctx(aead); 468 struct caam_ctx *ctx = crypto_aead_ctx(aead);
469 struct device *jrdev = ctx->jrdev; 469 struct device *jrdev = ctx->jrdev;
470 struct rtattr *rta = (void *)key; 470 struct crypto_authenc_keys keys;
471 struct crypto_authenc_key_param *param;
472 unsigned int authkeylen;
473 unsigned int enckeylen;
474 int ret = 0; 471 int ret = 0;
475 472
476 param = RTA_DATA(rta); 473 if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
477 enckeylen = be32_to_cpu(param->enckeylen);
478
479 key += RTA_ALIGN(rta->rta_len);
480 keylen -= RTA_ALIGN(rta->rta_len);
481
482 if (keylen < enckeylen)
483 goto badkey;
484
485 authkeylen = keylen - enckeylen;
486
487 if (keylen > CAAM_MAX_KEY_SIZE)
488 goto badkey; 474 goto badkey;
489 475
490 /* Pick class 2 key length from algorithm submask */ 476 /* Pick class 2 key length from algorithm submask */
@@ -492,25 +478,29 @@ static int aead_setkey(struct crypto_aead *aead,
492 OP_ALG_ALGSEL_SHIFT] * 2; 478 OP_ALG_ALGSEL_SHIFT] * 2;
493 ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); 479 ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
494 480
481 if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE)
482 goto badkey;
483
495#ifdef DEBUG 484#ifdef DEBUG
496 printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", 485 printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
497 keylen, enckeylen, authkeylen); 486 keys.authkeylen + keys.enckeylen, keys.enckeylen,
487 keys.authkeylen);
498 printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", 488 printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
499 ctx->split_key_len, ctx->split_key_pad_len); 489 ctx->split_key_len, ctx->split_key_pad_len);
500 print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", 490 print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
501 DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); 491 DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
502#endif 492#endif
503 493
504 ret = gen_split_aead_key(ctx, key, authkeylen); 494 ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
505 if (ret) { 495 if (ret) {
506 goto badkey; 496 goto badkey;
507 } 497 }
508 498
509 /* postpend encryption key to auth split key */ 499 /* postpend encryption key to auth split key */
510 memcpy(ctx->key + ctx->split_key_pad_len, key + authkeylen, enckeylen); 500 memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen);
511 501
512 ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + 502 ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len +
513 enckeylen, DMA_TO_DEVICE); 503 keys.enckeylen, DMA_TO_DEVICE);
514 if (dma_mapping_error(jrdev, ctx->key_dma)) { 504 if (dma_mapping_error(jrdev, ctx->key_dma)) {
515 dev_err(jrdev, "unable to map key i/o memory\n"); 505 dev_err(jrdev, "unable to map key i/o memory\n");
516 return -ENOMEM; 506 return -ENOMEM;
@@ -518,15 +508,15 @@ static int aead_setkey(struct crypto_aead *aead,
518#ifdef DEBUG 508#ifdef DEBUG
519 print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", 509 print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
520 DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, 510 DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
521 ctx->split_key_pad_len + enckeylen, 1); 511 ctx->split_key_pad_len + keys.enckeylen, 1);
522#endif 512#endif
523 513
524 ctx->enckeylen = enckeylen; 514 ctx->enckeylen = keys.enckeylen;
525 515
526 ret = aead_set_sh_desc(aead); 516 ret = aead_set_sh_desc(aead);
527 if (ret) { 517 if (ret) {
528 dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len + 518 dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len +
529 enckeylen, DMA_TO_DEVICE); 519 keys.enckeylen, DMA_TO_DEVICE);
530 } 520 }
531 521
532 return ret; 522 return ret;
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
new file mode 100644
index 000000000000..7639ffc36c68
--- /dev/null
+++ b/drivers/crypto/ccp/Kconfig
@@ -0,0 +1,24 @@
1config CRYPTO_DEV_CCP_DD
2 tristate "Cryptographic Coprocessor device driver"
3 depends on CRYPTO_DEV_CCP
4 default m
5 select HW_RANDOM
6 help
7 Provides the interface to use the AMD Cryptographic Coprocessor
8 which can be used to accelerate or offload encryption operations
9 such as SHA, AES and more. If you choose 'M' here, this module
10 will be called ccp.
11
12config CRYPTO_DEV_CCP_CRYPTO
13 tristate "Encryption and hashing acceleration support"
14 depends on CRYPTO_DEV_CCP_DD
15 default m
16 select CRYPTO_ALGAPI
17 select CRYPTO_HASH
18 select CRYPTO_BLKCIPHER
19 select CRYPTO_AUTHENC
20 help
21 Support for using the cryptographic API with the AMD Cryptographic
22 Coprocessor. This module supports acceleration and offload of SHA
23 and AES algorithms. If you choose 'M' here, this module will be
24 called ccp_crypto.
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
new file mode 100644
index 000000000000..d3505a018720
--- /dev/null
+++ b/drivers/crypto/ccp/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
2ccp-objs := ccp-dev.o ccp-ops.o
3ccp-objs += ccp-pci.o
4
5obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
6ccp-crypto-objs := ccp-crypto-main.o \
7 ccp-crypto-aes.o \
8 ccp-crypto-aes-cmac.o \
9 ccp-crypto-aes-xts.o \
10 ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
new file mode 100644
index 000000000000..8e162ad82085
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -0,0 +1,365 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <linux/scatterlist.h>
17#include <linux/crypto.h>
18#include <crypto/algapi.h>
19#include <crypto/aes.h>
20#include <crypto/hash.h>
21#include <crypto/internal/hash.h>
22#include <crypto/scatterwalk.h>
23
24#include "ccp-crypto.h"
25
26
27static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
28 int ret)
29{
30 struct ahash_request *req = ahash_request_cast(async_req);
31 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
32 struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
33 unsigned int digest_size = crypto_ahash_digestsize(tfm);
34
35 if (ret)
36 goto e_free;
37
38 if (rctx->hash_rem) {
39 /* Save remaining data to buffer */
40 unsigned int offset = rctx->nbytes - rctx->hash_rem;
41 scatterwalk_map_and_copy(rctx->buf, rctx->src,
42 offset, rctx->hash_rem, 0);
43 rctx->buf_count = rctx->hash_rem;
44 } else
45 rctx->buf_count = 0;
46
47 /* Update result area if supplied */
48 if (req->result)
49 memcpy(req->result, rctx->iv, digest_size);
50
51e_free:
52 sg_free_table(&rctx->data_sg);
53
54 return ret;
55}
56
57static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
58 unsigned int final)
59{
60 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
61 struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
62 struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
63 struct scatterlist *sg, *cmac_key_sg = NULL;
64 unsigned int block_size =
65 crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
66 unsigned int need_pad, sg_count;
67 gfp_t gfp;
68 u64 len;
69 int ret;
70
71 if (!ctx->u.aes.key_len)
72 return -EINVAL;
73
74 if (nbytes)
75 rctx->null_msg = 0;
76
77 len = (u64)rctx->buf_count + (u64)nbytes;
78
79 if (!final && (len <= block_size)) {
80 scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
81 0, nbytes, 0);
82 rctx->buf_count += nbytes;
83
84 return 0;
85 }
86
87 rctx->src = req->src;
88 rctx->nbytes = nbytes;
89
90 rctx->final = final;
91 rctx->hash_rem = final ? 0 : len & (block_size - 1);
92 rctx->hash_cnt = len - rctx->hash_rem;
93 if (!final && !rctx->hash_rem) {
94 /* CCP can't do zero length final, so keep some data around */
95 rctx->hash_cnt -= block_size;
96 rctx->hash_rem = block_size;
97 }
98
99 if (final && (rctx->null_msg || (len & (block_size - 1))))
100 need_pad = 1;
101 else
102 need_pad = 0;
103
104 sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv));
105
106 /* Build the data scatterlist table - allocate enough entries for all
107 * possible data pieces (buffer, input data, padding)
108 */
109 sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2;
110 gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
111 GFP_KERNEL : GFP_ATOMIC;
112 ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
113 if (ret)
114 return ret;
115
116 sg = NULL;
117 if (rctx->buf_count) {
118 sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
119 sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
120 }
121
122 if (nbytes)
123 sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
124
125 if (need_pad) {
126 int pad_length = block_size - (len & (block_size - 1));
127
128 rctx->hash_cnt += pad_length;
129
130 memset(rctx->pad, 0, sizeof(rctx->pad));
131 rctx->pad[0] = 0x80;
132 sg_init_one(&rctx->pad_sg, rctx->pad, pad_length);
133 sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg);
134 }
135 if (sg) {
136 sg_mark_end(sg);
137 sg = rctx->data_sg.sgl;
138 }
139
140 /* Initialize the K1/K2 scatterlist */
141 if (final)
142 cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg
143 : &ctx->u.aes.k1_sg;
144
145 memset(&rctx->cmd, 0, sizeof(rctx->cmd));
146 INIT_LIST_HEAD(&rctx->cmd.entry);
147 rctx->cmd.engine = CCP_ENGINE_AES;
148 rctx->cmd.u.aes.type = ctx->u.aes.type;
149 rctx->cmd.u.aes.mode = ctx->u.aes.mode;
150 rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT;
151 rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
152 rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
153 rctx->cmd.u.aes.iv = &rctx->iv_sg;
154 rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE;
155 rctx->cmd.u.aes.src = sg;
156 rctx->cmd.u.aes.src_len = rctx->hash_cnt;
157 rctx->cmd.u.aes.dst = NULL;
158 rctx->cmd.u.aes.cmac_key = cmac_key_sg;
159 rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len;
160 rctx->cmd.u.aes.cmac_final = final;
161
162 ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
163
164 return ret;
165}
166
167static int ccp_aes_cmac_init(struct ahash_request *req)
168{
169 struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
170
171 memset(rctx, 0, sizeof(*rctx));
172
173 rctx->null_msg = 1;
174
175 return 0;
176}
177
178static int ccp_aes_cmac_update(struct ahash_request *req)
179{
180 return ccp_do_cmac_update(req, req->nbytes, 0);
181}
182
183static int ccp_aes_cmac_final(struct ahash_request *req)
184{
185 return ccp_do_cmac_update(req, 0, 1);
186}
187
188static int ccp_aes_cmac_finup(struct ahash_request *req)
189{
190 return ccp_do_cmac_update(req, req->nbytes, 1);
191}
192
193static int ccp_aes_cmac_digest(struct ahash_request *req)
194{
195 int ret;
196
197 ret = ccp_aes_cmac_init(req);
198 if (ret)
199 return ret;
200
201 return ccp_aes_cmac_finup(req);
202}
203
204static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
205 unsigned int key_len)
206{
207 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
208 struct ccp_crypto_ahash_alg *alg =
209 ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
210 u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
211 u64 rb_hi = 0x00, rb_lo = 0x87;
212 __be64 *gk;
213 int ret;
214
215 switch (key_len) {
216 case AES_KEYSIZE_128:
217 ctx->u.aes.type = CCP_AES_TYPE_128;
218 break;
219 case AES_KEYSIZE_192:
220 ctx->u.aes.type = CCP_AES_TYPE_192;
221 break;
222 case AES_KEYSIZE_256:
223 ctx->u.aes.type = CCP_AES_TYPE_256;
224 break;
225 default:
226 crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
227 return -EINVAL;
228 }
229 ctx->u.aes.mode = alg->mode;
230
231 /* Set to zero until complete */
232 ctx->u.aes.key_len = 0;
233
234 /* Set the key for the AES cipher used to generate the keys */
235 ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len);
236 if (ret)
237 return ret;
238
239 /* Encrypt a block of zeroes - use key area in context */
240 memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
241 crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key,
242 ctx->u.aes.key);
243
244 /* Generate K1 and K2 */
245 k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key));
246 k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1));
247
248 k1_hi = (k0_hi << 1) | (k0_lo >> 63);
249 k1_lo = k0_lo << 1;
250 if (ctx->u.aes.key[0] & 0x80) {
251 k1_hi ^= rb_hi;
252 k1_lo ^= rb_lo;
253 }
254 gk = (__be64 *)ctx->u.aes.k1;
255 *gk = cpu_to_be64(k1_hi);
256 gk++;
257 *gk = cpu_to_be64(k1_lo);
258
259 k2_hi = (k1_hi << 1) | (k1_lo >> 63);
260 k2_lo = k1_lo << 1;
261 if (ctx->u.aes.k1[0] & 0x80) {
262 k2_hi ^= rb_hi;
263 k2_lo ^= rb_lo;
264 }
265 gk = (__be64 *)ctx->u.aes.k2;
266 *gk = cpu_to_be64(k2_hi);
267 gk++;
268 *gk = cpu_to_be64(k2_lo);
269
270 ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1);
271 sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1));
272 sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2));
273
274 /* Save the supplied key */
275 memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
276 memcpy(ctx->u.aes.key, key, key_len);
277 ctx->u.aes.key_len = key_len;
278 sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
279
280 return ret;
281}
282
283static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
284{
285 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
286 struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
287 struct crypto_cipher *cipher_tfm;
288
289 ctx->complete = ccp_aes_cmac_complete;
290 ctx->u.aes.key_len = 0;
291
292 crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
293
294 cipher_tfm = crypto_alloc_cipher("aes", 0,
295 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
296 if (IS_ERR(cipher_tfm)) {
297 pr_warn("could not load aes cipher driver\n");
298 return PTR_ERR(cipher_tfm);
299 }
300 ctx->u.aes.tfm_cipher = cipher_tfm;
301
302 return 0;
303}
304
305static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm)
306{
307 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
308
309 if (ctx->u.aes.tfm_cipher)
310 crypto_free_cipher(ctx->u.aes.tfm_cipher);
311 ctx->u.aes.tfm_cipher = NULL;
312}
313
314int ccp_register_aes_cmac_algs(struct list_head *head)
315{
316 struct ccp_crypto_ahash_alg *ccp_alg;
317 struct ahash_alg *alg;
318 struct hash_alg_common *halg;
319 struct crypto_alg *base;
320 int ret;
321
322 ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
323 if (!ccp_alg)
324 return -ENOMEM;
325
326 INIT_LIST_HEAD(&ccp_alg->entry);
327 ccp_alg->mode = CCP_AES_MODE_CMAC;
328
329 alg = &ccp_alg->alg;
330 alg->init = ccp_aes_cmac_init;
331 alg->update = ccp_aes_cmac_update;
332 alg->final = ccp_aes_cmac_final;
333 alg->finup = ccp_aes_cmac_finup;
334 alg->digest = ccp_aes_cmac_digest;
335 alg->setkey = ccp_aes_cmac_setkey;
336
337 halg = &alg->halg;
338 halg->digestsize = AES_BLOCK_SIZE;
339
340 base = &halg->base;
341 snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
342 snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp");
343 base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
344 CRYPTO_ALG_KERN_DRIVER_ONLY |
345 CRYPTO_ALG_NEED_FALLBACK;
346 base->cra_blocksize = AES_BLOCK_SIZE;
347 base->cra_ctxsize = sizeof(struct ccp_ctx);
348 base->cra_priority = CCP_CRA_PRIORITY;
349 base->cra_type = &crypto_ahash_type;
350 base->cra_init = ccp_aes_cmac_cra_init;
351 base->cra_exit = ccp_aes_cmac_cra_exit;
352 base->cra_module = THIS_MODULE;
353
354 ret = crypto_register_ahash(alg);
355 if (ret) {
356 pr_err("%s ahash algorithm registration error (%d)\n",
357 base->cra_name, ret);
358 kfree(ccp_alg);
359 return ret;
360 }
361
362 list_add(&ccp_alg->entry, head);
363
364 return 0;
365}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
new file mode 100644
index 000000000000..0237ab58f242
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -0,0 +1,279 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <linux/scatterlist.h>
17#include <linux/crypto.h>
18#include <crypto/algapi.h>
19#include <crypto/aes.h>
20#include <crypto/scatterwalk.h>
21
22#include "ccp-crypto.h"
23
24
25struct ccp_aes_xts_def {
26 const char *name;
27 const char *drv_name;
28};
29
30static struct ccp_aes_xts_def aes_xts_algs[] = {
31 {
32 .name = "xts(aes)",
33 .drv_name = "xts-aes-ccp",
34 },
35};
36
37struct ccp_unit_size_map {
38 unsigned int size;
39 u32 value;
40};
41
42static struct ccp_unit_size_map unit_size_map[] = {
43 {
44 .size = 4096,
45 .value = CCP_XTS_AES_UNIT_SIZE_4096,
46 },
47 {
48 .size = 2048,
49 .value = CCP_XTS_AES_UNIT_SIZE_2048,
50 },
51 {
52 .size = 1024,
53 .value = CCP_XTS_AES_UNIT_SIZE_1024,
54 },
55 {
56 .size = 512,
57 .value = CCP_XTS_AES_UNIT_SIZE_512,
58 },
59 {
60 .size = 256,
61 .value = CCP_XTS_AES_UNIT_SIZE__LAST,
62 },
63 {
64 .size = 128,
65 .value = CCP_XTS_AES_UNIT_SIZE__LAST,
66 },
67 {
68 .size = 64,
69 .value = CCP_XTS_AES_UNIT_SIZE__LAST,
70 },
71 {
72 .size = 32,
73 .value = CCP_XTS_AES_UNIT_SIZE__LAST,
74 },
75 {
76 .size = 16,
77 .value = CCP_XTS_AES_UNIT_SIZE_16,
78 },
79 {
80 .size = 1,
81 .value = CCP_XTS_AES_UNIT_SIZE__LAST,
82 },
83};
84
85static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
86{
87 struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
88 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
89
90 if (ret)
91 return ret;
92
93 memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
94
95 return 0;
96}
97
98static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
99 unsigned int key_len)
100{
101 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
102
103 /* Only support 128-bit AES key with a 128-bit Tweak key,
104 * otherwise use the fallback
105 */
106 switch (key_len) {
107 case AES_KEYSIZE_128 * 2:
108 memcpy(ctx->u.aes.key, key, key_len);
109 break;
110 }
111 ctx->u.aes.key_len = key_len / 2;
112 sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
113
114 return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key,
115 key_len);
116}
117
118static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
119 unsigned int encrypt)
120{
121 struct crypto_tfm *tfm =
122 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
123 struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
124 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
125 unsigned int unit;
126 int ret;
127
128 if (!ctx->u.aes.key_len)
129 return -EINVAL;
130
131 if (req->nbytes & (AES_BLOCK_SIZE - 1))
132 return -EINVAL;
133
134 if (!req->info)
135 return -EINVAL;
136
137 for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++)
138 if (!(req->nbytes & (unit_size_map[unit].size - 1)))
139 break;
140
141 if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) ||
142 (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
143 /* Use the fallback to process the request for any
144 * unsupported unit sizes or key sizes
145 */
146 ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher);
147 ret = (encrypt) ? crypto_ablkcipher_encrypt(req) :
148 crypto_ablkcipher_decrypt(req);
149 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
150
151 return ret;
152 }
153
154 memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
155 sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE);
156
157 memset(&rctx->cmd, 0, sizeof(rctx->cmd));
158 INIT_LIST_HEAD(&rctx->cmd.entry);
159 rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
160 rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
161 : CCP_AES_ACTION_DECRYPT;
162 rctx->cmd.u.xts.unit_size = unit_size_map[unit].value;
163 rctx->cmd.u.xts.key = &ctx->u.aes.key_sg;
164 rctx->cmd.u.xts.key_len = ctx->u.aes.key_len;
165 rctx->cmd.u.xts.iv = &rctx->iv_sg;
166 rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE;
167 rctx->cmd.u.xts.src = req->src;
168 rctx->cmd.u.xts.src_len = req->nbytes;
169 rctx->cmd.u.xts.dst = req->dst;
170
171 ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
172
173 return ret;
174}
175
176static int ccp_aes_xts_encrypt(struct ablkcipher_request *req)
177{
178 return ccp_aes_xts_crypt(req, 1);
179}
180
181static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
182{
183 return ccp_aes_xts_crypt(req, 0);
184}
185
186static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
187{
188 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
189 struct crypto_ablkcipher *fallback_tfm;
190
191 ctx->complete = ccp_aes_xts_complete;
192 ctx->u.aes.key_len = 0;
193
194 fallback_tfm = crypto_alloc_ablkcipher(tfm->__crt_alg->cra_name, 0,
195 CRYPTO_ALG_ASYNC |
196 CRYPTO_ALG_NEED_FALLBACK);
197 if (IS_ERR(fallback_tfm)) {
198 pr_warn("could not load fallback driver %s\n",
199 tfm->__crt_alg->cra_name);
200 return PTR_ERR(fallback_tfm);
201 }
202 ctx->u.aes.tfm_ablkcipher = fallback_tfm;
203
204 tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) +
205 fallback_tfm->base.crt_ablkcipher.reqsize;
206
207 return 0;
208}
209
210static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
211{
212 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
213
214 if (ctx->u.aes.tfm_ablkcipher)
215 crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher);
216 ctx->u.aes.tfm_ablkcipher = NULL;
217}
218
219
220static int ccp_register_aes_xts_alg(struct list_head *head,
221 const struct ccp_aes_xts_def *def)
222{
223 struct ccp_crypto_ablkcipher_alg *ccp_alg;
224 struct crypto_alg *alg;
225 int ret;
226
227 ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
228 if (!ccp_alg)
229 return -ENOMEM;
230
231 INIT_LIST_HEAD(&ccp_alg->entry);
232
233 alg = &ccp_alg->alg;
234
235 snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
236 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
237 def->drv_name);
238 alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
239 CRYPTO_ALG_KERN_DRIVER_ONLY |
240 CRYPTO_ALG_NEED_FALLBACK;
241 alg->cra_blocksize = AES_BLOCK_SIZE;
242 alg->cra_ctxsize = sizeof(struct ccp_ctx);
243 alg->cra_priority = CCP_CRA_PRIORITY;
244 alg->cra_type = &crypto_ablkcipher_type;
245 alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey;
246 alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt;
247 alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt;
248 alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2;
249 alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2;
250 alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE;
251 alg->cra_init = ccp_aes_xts_cra_init;
252 alg->cra_exit = ccp_aes_xts_cra_exit;
253 alg->cra_module = THIS_MODULE;
254
255 ret = crypto_register_alg(alg);
256 if (ret) {
257 pr_err("%s ablkcipher algorithm registration error (%d)\n",
258 alg->cra_name, ret);
259 kfree(ccp_alg);
260 return ret;
261 }
262
263 list_add(&ccp_alg->entry, head);
264
265 return 0;
266}
267
268int ccp_register_aes_xts_algs(struct list_head *head)
269{
270 int i, ret;
271
272 for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) {
273 ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]);
274 if (ret)
275 return ret;
276 }
277
278 return 0;
279}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
new file mode 100644
index 000000000000..e46490db0f63
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -0,0 +1,369 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) AES crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <linux/scatterlist.h>
17#include <linux/crypto.h>
18#include <crypto/algapi.h>
19#include <crypto/aes.h>
20#include <crypto/ctr.h>
21#include <crypto/scatterwalk.h>
22
23#include "ccp-crypto.h"
24
25
26static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
27{
28 struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
29 struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
30 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
31
32 if (ret)
33 return ret;
34
35 if (ctx->u.aes.mode != CCP_AES_MODE_ECB)
36 memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
37
38 return 0;
39}
40
41static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
42 unsigned int key_len)
43{
44 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
45 struct ccp_crypto_ablkcipher_alg *alg =
46 ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
47
48 switch (key_len) {
49 case AES_KEYSIZE_128:
50 ctx->u.aes.type = CCP_AES_TYPE_128;
51 break;
52 case AES_KEYSIZE_192:
53 ctx->u.aes.type = CCP_AES_TYPE_192;
54 break;
55 case AES_KEYSIZE_256:
56 ctx->u.aes.type = CCP_AES_TYPE_256;
57 break;
58 default:
59 crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
60 return -EINVAL;
61 }
62 ctx->u.aes.mode = alg->mode;
63 ctx->u.aes.key_len = key_len;
64
65 memcpy(ctx->u.aes.key, key, key_len);
66 sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
67
68 return 0;
69}
70
71static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
72{
73 struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
74 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
75 struct scatterlist *iv_sg = NULL;
76 unsigned int iv_len = 0;
77 int ret;
78
79 if (!ctx->u.aes.key_len)
80 return -EINVAL;
81
82 if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
83 (ctx->u.aes.mode == CCP_AES_MODE_CBC) ||
84 (ctx->u.aes.mode == CCP_AES_MODE_CFB)) &&
85 (req->nbytes & (AES_BLOCK_SIZE - 1)))
86 return -EINVAL;
87
88 if (ctx->u.aes.mode != CCP_AES_MODE_ECB) {
89 if (!req->info)
90 return -EINVAL;
91
92 memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
93 iv_sg = &rctx->iv_sg;
94 iv_len = AES_BLOCK_SIZE;
95 sg_init_one(iv_sg, rctx->iv, iv_len);
96 }
97
98 memset(&rctx->cmd, 0, sizeof(rctx->cmd));
99 INIT_LIST_HEAD(&rctx->cmd.entry);
100 rctx->cmd.engine = CCP_ENGINE_AES;
101 rctx->cmd.u.aes.type = ctx->u.aes.type;
102 rctx->cmd.u.aes.mode = ctx->u.aes.mode;
103 rctx->cmd.u.aes.action =
104 (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT;
105 rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
106 rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
107 rctx->cmd.u.aes.iv = iv_sg;
108 rctx->cmd.u.aes.iv_len = iv_len;
109 rctx->cmd.u.aes.src = req->src;
110 rctx->cmd.u.aes.src_len = req->nbytes;
111 rctx->cmd.u.aes.dst = req->dst;
112
113 ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
114
115 return ret;
116}
117
118static int ccp_aes_encrypt(struct ablkcipher_request *req)
119{
120 return ccp_aes_crypt(req, true);
121}
122
123static int ccp_aes_decrypt(struct ablkcipher_request *req)
124{
125 return ccp_aes_crypt(req, false);
126}
127
128static int ccp_aes_cra_init(struct crypto_tfm *tfm)
129{
130 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
131
132 ctx->complete = ccp_aes_complete;
133 ctx->u.aes.key_len = 0;
134
135 tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
136
137 return 0;
138}
139
140static void ccp_aes_cra_exit(struct crypto_tfm *tfm)
141{
142}
143
144static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
145 int ret)
146{
147 struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
148 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
149
150 /* Restore the original pointer */
151 req->info = rctx->rfc3686_info;
152
153 return ccp_aes_complete(async_req, ret);
154}
155
156static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
157 unsigned int key_len)
158{
159 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
160
161 if (key_len < CTR_RFC3686_NONCE_SIZE)
162 return -EINVAL;
163
164 key_len -= CTR_RFC3686_NONCE_SIZE;
165 memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE);
166
167 return ccp_aes_setkey(tfm, key, key_len);
168}
169
170static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
171{
172 struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
173 struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
174 u8 *iv;
175
176 /* Initialize the CTR block */
177 iv = rctx->rfc3686_iv;
178 memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE);
179
180 iv += CTR_RFC3686_NONCE_SIZE;
181 memcpy(iv, req->info, CTR_RFC3686_IV_SIZE);
182
183 iv += CTR_RFC3686_IV_SIZE;
184 *(__be32 *)iv = cpu_to_be32(1);
185
186 /* Point to the new IV */
187 rctx->rfc3686_info = req->info;
188 req->info = rctx->rfc3686_iv;
189
190 return ccp_aes_crypt(req, encrypt);
191}
192
193static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req)
194{
195 return ccp_aes_rfc3686_crypt(req, true);
196}
197
198static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req)
199{
200 return ccp_aes_rfc3686_crypt(req, false);
201}
202
203static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm)
204{
205 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
206
207 ctx->complete = ccp_aes_rfc3686_complete;
208 ctx->u.aes.key_len = 0;
209
210 tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
211
212 return 0;
213}
214
215static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm)
216{
217}
218
219static struct crypto_alg ccp_aes_defaults = {
220 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
221 CRYPTO_ALG_ASYNC |
222 CRYPTO_ALG_KERN_DRIVER_ONLY |
223 CRYPTO_ALG_NEED_FALLBACK,
224 .cra_blocksize = AES_BLOCK_SIZE,
225 .cra_ctxsize = sizeof(struct ccp_ctx),
226 .cra_priority = CCP_CRA_PRIORITY,
227 .cra_type = &crypto_ablkcipher_type,
228 .cra_init = ccp_aes_cra_init,
229 .cra_exit = ccp_aes_cra_exit,
230 .cra_module = THIS_MODULE,
231 .cra_ablkcipher = {
232 .setkey = ccp_aes_setkey,
233 .encrypt = ccp_aes_encrypt,
234 .decrypt = ccp_aes_decrypt,
235 .min_keysize = AES_MIN_KEY_SIZE,
236 .max_keysize = AES_MAX_KEY_SIZE,
237 },
238};
239
240static struct crypto_alg ccp_aes_rfc3686_defaults = {
241 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
242 CRYPTO_ALG_ASYNC |
243 CRYPTO_ALG_KERN_DRIVER_ONLY |
244 CRYPTO_ALG_NEED_FALLBACK,
245 .cra_blocksize = CTR_RFC3686_BLOCK_SIZE,
246 .cra_ctxsize = sizeof(struct ccp_ctx),
247 .cra_priority = CCP_CRA_PRIORITY,
248 .cra_type = &crypto_ablkcipher_type,
249 .cra_init = ccp_aes_rfc3686_cra_init,
250 .cra_exit = ccp_aes_rfc3686_cra_exit,
251 .cra_module = THIS_MODULE,
252 .cra_ablkcipher = {
253 .setkey = ccp_aes_rfc3686_setkey,
254 .encrypt = ccp_aes_rfc3686_encrypt,
255 .decrypt = ccp_aes_rfc3686_decrypt,
256 .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
257 .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
258 },
259};
260
261struct ccp_aes_def {
262 enum ccp_aes_mode mode;
263 const char *name;
264 const char *driver_name;
265 unsigned int blocksize;
266 unsigned int ivsize;
267 struct crypto_alg *alg_defaults;
268};
269
270static struct ccp_aes_def aes_algs[] = {
271 {
272 .mode = CCP_AES_MODE_ECB,
273 .name = "ecb(aes)",
274 .driver_name = "ecb-aes-ccp",
275 .blocksize = AES_BLOCK_SIZE,
276 .ivsize = 0,
277 .alg_defaults = &ccp_aes_defaults,
278 },
279 {
280 .mode = CCP_AES_MODE_CBC,
281 .name = "cbc(aes)",
282 .driver_name = "cbc-aes-ccp",
283 .blocksize = AES_BLOCK_SIZE,
284 .ivsize = AES_BLOCK_SIZE,
285 .alg_defaults = &ccp_aes_defaults,
286 },
287 {
288 .mode = CCP_AES_MODE_CFB,
289 .name = "cfb(aes)",
290 .driver_name = "cfb-aes-ccp",
291 .blocksize = AES_BLOCK_SIZE,
292 .ivsize = AES_BLOCK_SIZE,
293 .alg_defaults = &ccp_aes_defaults,
294 },
295 {
296 .mode = CCP_AES_MODE_OFB,
297 .name = "ofb(aes)",
298 .driver_name = "ofb-aes-ccp",
299 .blocksize = 1,
300 .ivsize = AES_BLOCK_SIZE,
301 .alg_defaults = &ccp_aes_defaults,
302 },
303 {
304 .mode = CCP_AES_MODE_CTR,
305 .name = "ctr(aes)",
306 .driver_name = "ctr-aes-ccp",
307 .blocksize = 1,
308 .ivsize = AES_BLOCK_SIZE,
309 .alg_defaults = &ccp_aes_defaults,
310 },
311 {
312 .mode = CCP_AES_MODE_CTR,
313 .name = "rfc3686(ctr(aes))",
314 .driver_name = "rfc3686-ctr-aes-ccp",
315 .blocksize = 1,
316 .ivsize = CTR_RFC3686_IV_SIZE,
317 .alg_defaults = &ccp_aes_rfc3686_defaults,
318 },
319};
320
321static int ccp_register_aes_alg(struct list_head *head,
322 const struct ccp_aes_def *def)
323{
324 struct ccp_crypto_ablkcipher_alg *ccp_alg;
325 struct crypto_alg *alg;
326 int ret;
327
328 ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
329 if (!ccp_alg)
330 return -ENOMEM;
331
332 INIT_LIST_HEAD(&ccp_alg->entry);
333
334 ccp_alg->mode = def->mode;
335
336 /* Copy the defaults and override as necessary */
337 alg = &ccp_alg->alg;
338 *alg = *def->alg_defaults;
339 snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
340 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
341 def->driver_name);
342 alg->cra_blocksize = def->blocksize;
343 alg->cra_ablkcipher.ivsize = def->ivsize;
344
345 ret = crypto_register_alg(alg);
346 if (ret) {
347 pr_err("%s ablkcipher algorithm registration error (%d)\n",
348 alg->cra_name, ret);
349 kfree(ccp_alg);
350 return ret;
351 }
352
353 list_add(&ccp_alg->entry, head);
354
355 return 0;
356}
357
358int ccp_register_aes_algs(struct list_head *head)
359{
360 int i, ret;
361
362 for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
363 ret = ccp_register_aes_alg(head, &aes_algs[i]);
364 if (ret)
365 return ret;
366 }
367
368 return 0;
369}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
new file mode 100644
index 000000000000..2636f044789d
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -0,0 +1,432 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/list.h>
16#include <linux/ccp.h>
17#include <linux/scatterlist.h>
18#include <crypto/internal/hash.h>
19
20#include "ccp-crypto.h"
21
22MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
23MODULE_LICENSE("GPL");
24MODULE_VERSION("1.0.0");
25MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support");
26
27
28/* List heads for the supported algorithms */
29static LIST_HEAD(hash_algs);
30static LIST_HEAD(cipher_algs);
31
32/* For any tfm, requests for that tfm on the same CPU must be returned
33 * in the order received. With multiple queues available, the CCP can
34 * process more than one cmd at a time. Therefore we must maintain
35 * a cmd list to insure the proper ordering of requests on a given tfm/cpu
36 * combination.
37 */
38struct ccp_crypto_cpu_queue {
39 struct list_head cmds;
40 struct list_head *backlog;
41 unsigned int cmd_count;
42};
43#define CCP_CRYPTO_MAX_QLEN 50
44
45struct ccp_crypto_percpu_queue {
46 struct ccp_crypto_cpu_queue __percpu *cpu_queue;
47};
48static struct ccp_crypto_percpu_queue req_queue;
49
50struct ccp_crypto_cmd {
51 struct list_head entry;
52
53 struct ccp_cmd *cmd;
54
55 /* Save the crypto_tfm and crypto_async_request addresses
56 * separately to avoid any reference to a possibly invalid
57 * crypto_async_request structure after invoking the request
58 * callback
59 */
60 struct crypto_async_request *req;
61 struct crypto_tfm *tfm;
62
63 /* Used for held command processing to determine state */
64 int ret;
65
66 int cpu;
67};
68
69struct ccp_crypto_cpu {
70 struct work_struct work;
71 struct completion completion;
72 struct ccp_crypto_cmd *crypto_cmd;
73 int err;
74};
75
76
77static inline bool ccp_crypto_success(int err)
78{
79 if (err && (err != -EINPROGRESS) && (err != -EBUSY))
80 return false;
81
82 return true;
83}
84
85/*
86 * ccp_crypto_cmd_complete must be called while running on the appropriate
87 * cpu and the caller must have done a get_cpu to disable preemption
88 */
89static struct ccp_crypto_cmd *ccp_crypto_cmd_complete(
90 struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog)
91{
92 struct ccp_crypto_cpu_queue *cpu_queue;
93 struct ccp_crypto_cmd *held = NULL, *tmp;
94
95 *backlog = NULL;
96
97 cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
98
99 /* Held cmds will be after the current cmd in the queue so start
100 * searching for a cmd with a matching tfm for submission.
101 */
102 tmp = crypto_cmd;
103 list_for_each_entry_continue(tmp, &cpu_queue->cmds, entry) {
104 if (crypto_cmd->tfm != tmp->tfm)
105 continue;
106 held = tmp;
107 break;
108 }
109
110 /* Process the backlog:
111 * Because cmds can be executed from any point in the cmd list
112 * special precautions have to be taken when handling the backlog.
113 */
114 if (cpu_queue->backlog != &cpu_queue->cmds) {
115 /* Skip over this cmd if it is the next backlog cmd */
116 if (cpu_queue->backlog == &crypto_cmd->entry)
117 cpu_queue->backlog = crypto_cmd->entry.next;
118
119 *backlog = container_of(cpu_queue->backlog,
120 struct ccp_crypto_cmd, entry);
121 cpu_queue->backlog = cpu_queue->backlog->next;
122
123 /* Skip over this cmd if it is now the next backlog cmd */
124 if (cpu_queue->backlog == &crypto_cmd->entry)
125 cpu_queue->backlog = crypto_cmd->entry.next;
126 }
127
128 /* Remove the cmd entry from the list of cmds */
129 cpu_queue->cmd_count--;
130 list_del(&crypto_cmd->entry);
131
132 return held;
133}
134
135static void ccp_crypto_complete_on_cpu(struct work_struct *work)
136{
137 struct ccp_crypto_cpu *cpu_work =
138 container_of(work, struct ccp_crypto_cpu, work);
139 struct ccp_crypto_cmd *crypto_cmd = cpu_work->crypto_cmd;
140 struct ccp_crypto_cmd *held, *next, *backlog;
141 struct crypto_async_request *req = crypto_cmd->req;
142 struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
143 int cpu, ret;
144
145 cpu = get_cpu();
146
147 if (cpu_work->err == -EINPROGRESS) {
148 /* Only propogate the -EINPROGRESS if necessary */
149 if (crypto_cmd->ret == -EBUSY) {
150 crypto_cmd->ret = -EINPROGRESS;
151 req->complete(req, -EINPROGRESS);
152 }
153
154 goto e_cpu;
155 }
156
157 /* Operation has completed - update the queue before invoking
158 * the completion callbacks and retrieve the next cmd (cmd with
159 * a matching tfm) that can be submitted to the CCP.
160 */
161 held = ccp_crypto_cmd_complete(crypto_cmd, &backlog);
162 if (backlog) {
163 backlog->ret = -EINPROGRESS;
164 backlog->req->complete(backlog->req, -EINPROGRESS);
165 }
166
167 /* Transition the state from -EBUSY to -EINPROGRESS first */
168 if (crypto_cmd->ret == -EBUSY)
169 req->complete(req, -EINPROGRESS);
170
171 /* Completion callbacks */
172 ret = cpu_work->err;
173 if (ctx->complete)
174 ret = ctx->complete(req, ret);
175 req->complete(req, ret);
176
177 /* Submit the next cmd */
178 while (held) {
179 ret = ccp_enqueue_cmd(held->cmd);
180 if (ccp_crypto_success(ret))
181 break;
182
183 /* Error occurred, report it and get the next entry */
184 held->req->complete(held->req, ret);
185
186 next = ccp_crypto_cmd_complete(held, &backlog);
187 if (backlog) {
188 backlog->ret = -EINPROGRESS;
189 backlog->req->complete(backlog->req, -EINPROGRESS);
190 }
191
192 kfree(held);
193 held = next;
194 }
195
196 kfree(crypto_cmd);
197
198e_cpu:
199 put_cpu();
200
201 complete(&cpu_work->completion);
202}
203
204static void ccp_crypto_complete(void *data, int err)
205{
206 struct ccp_crypto_cmd *crypto_cmd = data;
207 struct ccp_crypto_cpu cpu_work;
208
209 INIT_WORK(&cpu_work.work, ccp_crypto_complete_on_cpu);
210 init_completion(&cpu_work.completion);
211 cpu_work.crypto_cmd = crypto_cmd;
212 cpu_work.err = err;
213
214 schedule_work_on(crypto_cmd->cpu, &cpu_work.work);
215
216 /* Keep the completion call synchronous */
217 wait_for_completion(&cpu_work.completion);
218}
219
220static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd)
221{
222 struct ccp_crypto_cpu_queue *cpu_queue;
223 struct ccp_crypto_cmd *active = NULL, *tmp;
224 int cpu, ret;
225
226 cpu = get_cpu();
227 crypto_cmd->cpu = cpu;
228
229 cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
230
231 /* Check if the cmd can/should be queued */
232 if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
233 ret = -EBUSY;
234 if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
235 goto e_cpu;
236 }
237
238 /* Look for an entry with the same tfm. If there is a cmd
239 * with the same tfm in the list for this cpu then the current
240 * cmd cannot be submitted to the CCP yet.
241 */
242 list_for_each_entry(tmp, &cpu_queue->cmds, entry) {
243 if (crypto_cmd->tfm != tmp->tfm)
244 continue;
245 active = tmp;
246 break;
247 }
248
249 ret = -EINPROGRESS;
250 if (!active) {
251 ret = ccp_enqueue_cmd(crypto_cmd->cmd);
252 if (!ccp_crypto_success(ret))
253 goto e_cpu;
254 }
255
256 if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
257 ret = -EBUSY;
258 if (cpu_queue->backlog == &cpu_queue->cmds)
259 cpu_queue->backlog = &crypto_cmd->entry;
260 }
261 crypto_cmd->ret = ret;
262
263 cpu_queue->cmd_count++;
264 list_add_tail(&crypto_cmd->entry, &cpu_queue->cmds);
265
266e_cpu:
267 put_cpu();
268
269 return ret;
270}
271
272/**
273 * ccp_crypto_enqueue_request - queue an crypto async request for processing
274 * by the CCP
275 *
276 * @req: crypto_async_request struct to be processed
277 * @cmd: ccp_cmd struct to be sent to the CCP
278 */
279int ccp_crypto_enqueue_request(struct crypto_async_request *req,
280 struct ccp_cmd *cmd)
281{
282 struct ccp_crypto_cmd *crypto_cmd;
283 gfp_t gfp;
284 int ret;
285
286 gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
287
288 crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp);
289 if (!crypto_cmd)
290 return -ENOMEM;
291
292 /* The tfm pointer must be saved and not referenced from the
293 * crypto_async_request (req) pointer because it is used after
294 * completion callback for the request and the req pointer
295 * might not be valid anymore.
296 */
297 crypto_cmd->cmd = cmd;
298 crypto_cmd->req = req;
299 crypto_cmd->tfm = req->tfm;
300
301 cmd->callback = ccp_crypto_complete;
302 cmd->data = crypto_cmd;
303
304 if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
305 cmd->flags |= CCP_CMD_MAY_BACKLOG;
306 else
307 cmd->flags &= ~CCP_CMD_MAY_BACKLOG;
308
309 ret = ccp_crypto_enqueue_cmd(crypto_cmd);
310 if (!ccp_crypto_success(ret))
311 kfree(crypto_cmd);
312
313 return ret;
314}
315
316struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
317 struct scatterlist *sg_add)
318{
319 struct scatterlist *sg, *sg_last = NULL;
320
321 for (sg = table->sgl; sg; sg = sg_next(sg))
322 if (!sg_page(sg))
323 break;
324 BUG_ON(!sg);
325
326 for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) {
327 sg_set_page(sg, sg_page(sg_add), sg_add->length,
328 sg_add->offset);
329 sg_last = sg;
330 }
331 BUG_ON(sg_add);
332
333 return sg_last;
334}
335
336static int ccp_register_algs(void)
337{
338 int ret;
339
340 ret = ccp_register_aes_algs(&cipher_algs);
341 if (ret)
342 return ret;
343
344 ret = ccp_register_aes_cmac_algs(&hash_algs);
345 if (ret)
346 return ret;
347
348 ret = ccp_register_aes_xts_algs(&cipher_algs);
349 if (ret)
350 return ret;
351
352 ret = ccp_register_sha_algs(&hash_algs);
353 if (ret)
354 return ret;
355
356 return 0;
357}
358
359static void ccp_unregister_algs(void)
360{
361 struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
362 struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
363
364 list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
365 crypto_unregister_ahash(&ahash_alg->alg);
366 list_del(&ahash_alg->entry);
367 kfree(ahash_alg);
368 }
369
370 list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) {
371 crypto_unregister_alg(&ablk_alg->alg);
372 list_del(&ablk_alg->entry);
373 kfree(ablk_alg);
374 }
375}
376
377static int ccp_init_queues(void)
378{
379 struct ccp_crypto_cpu_queue *cpu_queue;
380 int cpu;
381
382 req_queue.cpu_queue = alloc_percpu(struct ccp_crypto_cpu_queue);
383 if (!req_queue.cpu_queue)
384 return -ENOMEM;
385
386 for_each_possible_cpu(cpu) {
387 cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
388 INIT_LIST_HEAD(&cpu_queue->cmds);
389 cpu_queue->backlog = &cpu_queue->cmds;
390 cpu_queue->cmd_count = 0;
391 }
392
393 return 0;
394}
395
396static void ccp_fini_queue(void)
397{
398 struct ccp_crypto_cpu_queue *cpu_queue;
399 int cpu;
400
401 for_each_possible_cpu(cpu) {
402 cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
403 BUG_ON(!list_empty(&cpu_queue->cmds));
404 }
405 free_percpu(req_queue.cpu_queue);
406}
407
408static int ccp_crypto_init(void)
409{
410 int ret;
411
412 ret = ccp_init_queues();
413 if (ret)
414 return ret;
415
416 ret = ccp_register_algs();
417 if (ret) {
418 ccp_unregister_algs();
419 ccp_fini_queue();
420 }
421
422 return ret;
423}
424
425static void ccp_crypto_exit(void)
426{
427 ccp_unregister_algs();
428 ccp_fini_queue();
429}
430
431module_init(ccp_crypto_init);
432module_exit(ccp_crypto_exit);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
new file mode 100644
index 000000000000..3867290b3531
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -0,0 +1,517 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <linux/scatterlist.h>
17#include <linux/crypto.h>
18#include <crypto/algapi.h>
19#include <crypto/hash.h>
20#include <crypto/internal/hash.h>
21#include <crypto/sha.h>
22#include <crypto/scatterwalk.h>
23
24#include "ccp-crypto.h"
25
26
27struct ccp_sha_result {
28 struct completion completion;
29 int err;
30};
31
32static void ccp_sync_hash_complete(struct crypto_async_request *req, int err)
33{
34 struct ccp_sha_result *result = req->data;
35
36 if (err == -EINPROGRESS)
37 return;
38
39 result->err = err;
40 complete(&result->completion);
41}
42
43static int ccp_sync_hash(struct crypto_ahash *tfm, u8 *buf,
44 struct scatterlist *sg, unsigned int len)
45{
46 struct ccp_sha_result result;
47 struct ahash_request *req;
48 int ret;
49
50 init_completion(&result.completion);
51
52 req = ahash_request_alloc(tfm, GFP_KERNEL);
53 if (!req)
54 return -ENOMEM;
55
56 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
57 ccp_sync_hash_complete, &result);
58 ahash_request_set_crypt(req, sg, buf, len);
59
60 ret = crypto_ahash_digest(req);
61 if ((ret == -EINPROGRESS) || (ret == -EBUSY)) {
62 ret = wait_for_completion_interruptible(&result.completion);
63 if (!ret)
64 ret = result.err;
65 }
66
67 ahash_request_free(req);
68
69 return ret;
70}
71
72static int ccp_sha_finish_hmac(struct crypto_async_request *async_req)
73{
74 struct ahash_request *req = ahash_request_cast(async_req);
75 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
76 struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
77 struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
78 struct scatterlist sg[2];
79 unsigned int block_size =
80 crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
81 unsigned int digest_size = crypto_ahash_digestsize(tfm);
82
83 sg_init_table(sg, ARRAY_SIZE(sg));
84 sg_set_buf(&sg[0], ctx->u.sha.opad, block_size);
85 sg_set_buf(&sg[1], rctx->ctx, digest_size);
86
87 return ccp_sync_hash(ctx->u.sha.hmac_tfm, req->result, sg,
88 block_size + digest_size);
89}
90
91static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
92{
93 struct ahash_request *req = ahash_request_cast(async_req);
94 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
95 struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
96 struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
97 unsigned int digest_size = crypto_ahash_digestsize(tfm);
98
99 if (ret)
100 goto e_free;
101
102 if (rctx->hash_rem) {
103 /* Save remaining data to buffer */
104 unsigned int offset = rctx->nbytes - rctx->hash_rem;
105 scatterwalk_map_and_copy(rctx->buf, rctx->src,
106 offset, rctx->hash_rem, 0);
107 rctx->buf_count = rctx->hash_rem;
108 } else
109 rctx->buf_count = 0;
110
111 /* Update result area if supplied */
112 if (req->result)
113 memcpy(req->result, rctx->ctx, digest_size);
114
115 /* If we're doing an HMAC, we need to perform that on the final op */
116 if (rctx->final && ctx->u.sha.key_len)
117 ret = ccp_sha_finish_hmac(async_req);
118
119e_free:
120 sg_free_table(&rctx->data_sg);
121
122 return ret;
123}
124
125static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
126 unsigned int final)
127{
128 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
129 struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
130 struct scatterlist *sg;
131 unsigned int block_size =
132 crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
133 unsigned int sg_count;
134 gfp_t gfp;
135 u64 len;
136 int ret;
137
138 len = (u64)rctx->buf_count + (u64)nbytes;
139
140 if (!final && (len <= block_size)) {
141 scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
142 0, nbytes, 0);
143 rctx->buf_count += nbytes;
144
145 return 0;
146 }
147
148 rctx->src = req->src;
149 rctx->nbytes = nbytes;
150
151 rctx->final = final;
152 rctx->hash_rem = final ? 0 : len & (block_size - 1);
153 rctx->hash_cnt = len - rctx->hash_rem;
154 if (!final && !rctx->hash_rem) {
155 /* CCP can't do zero length final, so keep some data around */
156 rctx->hash_cnt -= block_size;
157 rctx->hash_rem = block_size;
158 }
159
160 /* Initialize the context scatterlist */
161 sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx));
162
163 sg = NULL;
164 if (rctx->buf_count && nbytes) {
165 /* Build the data scatterlist table - allocate enough entries
166 * for both data pieces (buffer and input data)
167 */
168 gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
169 GFP_KERNEL : GFP_ATOMIC;
170 sg_count = sg_nents(req->src) + 1;
171 ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
172 if (ret)
173 return ret;
174
175 sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
176 sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
177 sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
178 sg_mark_end(sg);
179
180 sg = rctx->data_sg.sgl;
181 } else if (rctx->buf_count) {
182 sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
183
184 sg = &rctx->buf_sg;
185 } else if (nbytes) {
186 sg = req->src;
187 }
188
189 rctx->msg_bits += (rctx->hash_cnt << 3); /* Total in bits */
190
191 memset(&rctx->cmd, 0, sizeof(rctx->cmd));
192 INIT_LIST_HEAD(&rctx->cmd.entry);
193 rctx->cmd.engine = CCP_ENGINE_SHA;
194 rctx->cmd.u.sha.type = rctx->type;
195 rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
196 rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
197 rctx->cmd.u.sha.src = sg;
198 rctx->cmd.u.sha.src_len = rctx->hash_cnt;
199 rctx->cmd.u.sha.final = rctx->final;
200 rctx->cmd.u.sha.msg_bits = rctx->msg_bits;
201
202 rctx->first = 0;
203
204 ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
205
206 return ret;
207}
208
209static int ccp_sha_init(struct ahash_request *req)
210{
211 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
212 struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
213 struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
214 struct ccp_crypto_ahash_alg *alg =
215 ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
216 unsigned int block_size =
217 crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
218
219 memset(rctx, 0, sizeof(*rctx));
220
221 memcpy(rctx->ctx, alg->init, sizeof(rctx->ctx));
222 rctx->type = alg->type;
223 rctx->first = 1;
224
225 if (ctx->u.sha.key_len) {
226 /* Buffer the HMAC key for first update */
227 memcpy(rctx->buf, ctx->u.sha.ipad, block_size);
228 rctx->buf_count = block_size;
229 }
230
231 return 0;
232}
233
234static int ccp_sha_update(struct ahash_request *req)
235{
236 return ccp_do_sha_update(req, req->nbytes, 0);
237}
238
239static int ccp_sha_final(struct ahash_request *req)
240{
241 return ccp_do_sha_update(req, 0, 1);
242}
243
244static int ccp_sha_finup(struct ahash_request *req)
245{
246 return ccp_do_sha_update(req, req->nbytes, 1);
247}
248
249static int ccp_sha_digest(struct ahash_request *req)
250{
251 int ret;
252
253 ret = ccp_sha_init(req);
254 if (ret)
255 return ret;
256
257 return ccp_sha_finup(req);
258}
259
260static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
261 unsigned int key_len)
262{
263 struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
264 struct scatterlist sg;
265 unsigned int block_size =
266 crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
267 unsigned int digest_size = crypto_ahash_digestsize(tfm);
268 int i, ret;
269
270 /* Set to zero until complete */
271 ctx->u.sha.key_len = 0;
272
273 /* Clear key area to provide zero padding for keys smaller
274 * than the block size
275 */
276 memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key));
277
278 if (key_len > block_size) {
279 /* Must hash the input key */
280 sg_init_one(&sg, key, key_len);
281 ret = ccp_sync_hash(tfm, ctx->u.sha.key, &sg, key_len);
282 if (ret) {
283 crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
284 return -EINVAL;
285 }
286
287 key_len = digest_size;
288 } else
289 memcpy(ctx->u.sha.key, key, key_len);
290
291 for (i = 0; i < block_size; i++) {
292 ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
293 ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
294 }
295
296 ctx->u.sha.key_len = key_len;
297
298 return 0;
299}
300
301static int ccp_sha_cra_init(struct crypto_tfm *tfm)
302{
303 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
304 struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
305
306 ctx->complete = ccp_sha_complete;
307 ctx->u.sha.key_len = 0;
308
309 crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx));
310
311 return 0;
312}
313
314static void ccp_sha_cra_exit(struct crypto_tfm *tfm)
315{
316}
317
318static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
319{
320 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
321 struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
322 struct crypto_ahash *hmac_tfm;
323
324 hmac_tfm = crypto_alloc_ahash(alg->child_alg,
325 CRYPTO_ALG_TYPE_AHASH, 0);
326 if (IS_ERR(hmac_tfm)) {
327 pr_warn("could not load driver %s need for HMAC support\n",
328 alg->child_alg);
329 return PTR_ERR(hmac_tfm);
330 }
331
332 ctx->u.sha.hmac_tfm = hmac_tfm;
333
334 return ccp_sha_cra_init(tfm);
335}
336
337static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
338{
339 struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
340
341 if (ctx->u.sha.hmac_tfm)
342 crypto_free_ahash(ctx->u.sha.hmac_tfm);
343
344 ccp_sha_cra_exit(tfm);
345}
346
347static const __be32 sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
348 cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
349 cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
350 cpu_to_be32(SHA1_H4), 0, 0, 0,
351};
352
353static const __be32 sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
354 cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
355 cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
356 cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
357 cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
358};
359
360static const __be32 sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
361 cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
362 cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
363 cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
364 cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
365};
366
367struct ccp_sha_def {
368 const char *name;
369 const char *drv_name;
370 const __be32 *init;
371 enum ccp_sha_type type;
372 u32 digest_size;
373 u32 block_size;
374};
375
376static struct ccp_sha_def sha_algs[] = {
377 {
378 .name = "sha1",
379 .drv_name = "sha1-ccp",
380 .init = sha1_init,
381 .type = CCP_SHA_TYPE_1,
382 .digest_size = SHA1_DIGEST_SIZE,
383 .block_size = SHA1_BLOCK_SIZE,
384 },
385 {
386 .name = "sha224",
387 .drv_name = "sha224-ccp",
388 .init = sha224_init,
389 .type = CCP_SHA_TYPE_224,
390 .digest_size = SHA224_DIGEST_SIZE,
391 .block_size = SHA224_BLOCK_SIZE,
392 },
393 {
394 .name = "sha256",
395 .drv_name = "sha256-ccp",
396 .init = sha256_init,
397 .type = CCP_SHA_TYPE_256,
398 .digest_size = SHA256_DIGEST_SIZE,
399 .block_size = SHA256_BLOCK_SIZE,
400 },
401};
402
403static int ccp_register_hmac_alg(struct list_head *head,
404 const struct ccp_sha_def *def,
405 const struct ccp_crypto_ahash_alg *base_alg)
406{
407 struct ccp_crypto_ahash_alg *ccp_alg;
408 struct ahash_alg *alg;
409 struct hash_alg_common *halg;
410 struct crypto_alg *base;
411 int ret;
412
413 ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
414 if (!ccp_alg)
415 return -ENOMEM;
416
417 /* Copy the base algorithm and only change what's necessary */
418 *ccp_alg = *base_alg;
419 INIT_LIST_HEAD(&ccp_alg->entry);
420
421 strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
422
423 alg = &ccp_alg->alg;
424 alg->setkey = ccp_sha_setkey;
425
426 halg = &alg->halg;
427
428 base = &halg->base;
429 snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name);
430 snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s",
431 def->drv_name);
432 base->cra_init = ccp_hmac_sha_cra_init;
433 base->cra_exit = ccp_hmac_sha_cra_exit;
434
435 ret = crypto_register_ahash(alg);
436 if (ret) {
437 pr_err("%s ahash algorithm registration error (%d)\n",
438 base->cra_name, ret);
439 kfree(ccp_alg);
440 return ret;
441 }
442
443 list_add(&ccp_alg->entry, head);
444
445 return ret;
446}
447
448static int ccp_register_sha_alg(struct list_head *head,
449 const struct ccp_sha_def *def)
450{
451 struct ccp_crypto_ahash_alg *ccp_alg;
452 struct ahash_alg *alg;
453 struct hash_alg_common *halg;
454 struct crypto_alg *base;
455 int ret;
456
457 ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
458 if (!ccp_alg)
459 return -ENOMEM;
460
461 INIT_LIST_HEAD(&ccp_alg->entry);
462
463 ccp_alg->init = def->init;
464 ccp_alg->type = def->type;
465
466 alg = &ccp_alg->alg;
467 alg->init = ccp_sha_init;
468 alg->update = ccp_sha_update;
469 alg->final = ccp_sha_final;
470 alg->finup = ccp_sha_finup;
471 alg->digest = ccp_sha_digest;
472
473 halg = &alg->halg;
474 halg->digestsize = def->digest_size;
475
476 base = &halg->base;
477 snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
478 snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
479 def->drv_name);
480 base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
481 CRYPTO_ALG_KERN_DRIVER_ONLY |
482 CRYPTO_ALG_NEED_FALLBACK;
483 base->cra_blocksize = def->block_size;
484 base->cra_ctxsize = sizeof(struct ccp_ctx);
485 base->cra_priority = CCP_CRA_PRIORITY;
486 base->cra_type = &crypto_ahash_type;
487 base->cra_init = ccp_sha_cra_init;
488 base->cra_exit = ccp_sha_cra_exit;
489 base->cra_module = THIS_MODULE;
490
491 ret = crypto_register_ahash(alg);
492 if (ret) {
493 pr_err("%s ahash algorithm registration error (%d)\n",
494 base->cra_name, ret);
495 kfree(ccp_alg);
496 return ret;
497 }
498
499 list_add(&ccp_alg->entry, head);
500
501 ret = ccp_register_hmac_alg(head, def, ccp_alg);
502
503 return ret;
504}
505
506int ccp_register_sha_algs(struct list_head *head)
507{
508 int i, ret;
509
510 for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
511 ret = ccp_register_sha_alg(head, &sha_algs[i]);
512 if (ret)
513 return ret;
514 }
515
516 return 0;
517}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
new file mode 100644
index 000000000000..b222231b6169
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -0,0 +1,197 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) crypto API support
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#ifndef __CCP_CRYPTO_H__
14#define __CCP_CRYPTO_H__
15
16
17#include <linux/list.h>
18#include <linux/wait.h>
19#include <linux/pci.h>
20#include <linux/ccp.h>
21#include <linux/crypto.h>
22#include <crypto/algapi.h>
23#include <crypto/aes.h>
24#include <crypto/ctr.h>
25#include <crypto/hash.h>
26#include <crypto/sha.h>
27
28
29#define CCP_CRA_PRIORITY 300
30
31struct ccp_crypto_ablkcipher_alg {
32 struct list_head entry;
33
34 u32 mode;
35
36 struct crypto_alg alg;
37};
38
39struct ccp_crypto_ahash_alg {
40 struct list_head entry;
41
42 const __be32 *init;
43 u32 type;
44 u32 mode;
45
46 /* Child algorithm used for HMAC, CMAC, etc */
47 char child_alg[CRYPTO_MAX_ALG_NAME];
48
49 struct ahash_alg alg;
50};
51
52static inline struct ccp_crypto_ablkcipher_alg *
53 ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
54{
55 struct crypto_alg *alg = tfm->__crt_alg;
56
57 return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg);
58}
59
60static inline struct ccp_crypto_ahash_alg *
61 ccp_crypto_ahash_alg(struct crypto_tfm *tfm)
62{
63 struct crypto_alg *alg = tfm->__crt_alg;
64 struct ahash_alg *ahash_alg;
65
66 ahash_alg = container_of(alg, struct ahash_alg, halg.base);
67
68 return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
69}
70
71
72/***** AES related defines *****/
73struct ccp_aes_ctx {
74 /* Fallback cipher for XTS with unsupported unit sizes */
75 struct crypto_ablkcipher *tfm_ablkcipher;
76
77 /* Cipher used to generate CMAC K1/K2 keys */
78 struct crypto_cipher *tfm_cipher;
79
80 enum ccp_engine engine;
81 enum ccp_aes_type type;
82 enum ccp_aes_mode mode;
83
84 struct scatterlist key_sg;
85 unsigned int key_len;
86 u8 key[AES_MAX_KEY_SIZE];
87
88 u8 nonce[CTR_RFC3686_NONCE_SIZE];
89
90 /* CMAC key structures */
91 struct scatterlist k1_sg;
92 struct scatterlist k2_sg;
93 unsigned int kn_len;
94 u8 k1[AES_BLOCK_SIZE];
95 u8 k2[AES_BLOCK_SIZE];
96};
97
98struct ccp_aes_req_ctx {
99 struct scatterlist iv_sg;
100 u8 iv[AES_BLOCK_SIZE];
101
102 /* Fields used for RFC3686 requests */
103 u8 *rfc3686_info;
104 u8 rfc3686_iv[AES_BLOCK_SIZE];
105
106 struct ccp_cmd cmd;
107};
108
109struct ccp_aes_cmac_req_ctx {
110 unsigned int null_msg;
111 unsigned int final;
112
113 struct scatterlist *src;
114 unsigned int nbytes;
115
116 u64 hash_cnt;
117 unsigned int hash_rem;
118
119 struct sg_table data_sg;
120
121 struct scatterlist iv_sg;
122 u8 iv[AES_BLOCK_SIZE];
123
124 struct scatterlist buf_sg;
125 unsigned int buf_count;
126 u8 buf[AES_BLOCK_SIZE];
127
128 struct scatterlist pad_sg;
129 unsigned int pad_count;
130 u8 pad[AES_BLOCK_SIZE];
131
132 struct ccp_cmd cmd;
133};
134
135/***** SHA related defines *****/
136#define MAX_SHA_CONTEXT_SIZE SHA256_DIGEST_SIZE
137#define MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE
138
139struct ccp_sha_ctx {
140 unsigned int key_len;
141 u8 key[MAX_SHA_BLOCK_SIZE];
142 u8 ipad[MAX_SHA_BLOCK_SIZE];
143 u8 opad[MAX_SHA_BLOCK_SIZE];
144 struct crypto_ahash *hmac_tfm;
145};
146
147struct ccp_sha_req_ctx {
148 enum ccp_sha_type type;
149
150 u64 msg_bits;
151
152 unsigned int first;
153 unsigned int final;
154
155 struct scatterlist *src;
156 unsigned int nbytes;
157
158 u64 hash_cnt;
159 unsigned int hash_rem;
160
161 struct sg_table data_sg;
162
163 struct scatterlist ctx_sg;
164 u8 ctx[MAX_SHA_CONTEXT_SIZE];
165
166 struct scatterlist buf_sg;
167 unsigned int buf_count;
168 u8 buf[MAX_SHA_BLOCK_SIZE];
169
170 /* HMAC support field */
171 struct scatterlist pad_sg;
172
173 /* CCP driver command */
174 struct ccp_cmd cmd;
175};
176
177/***** Common Context Structure *****/
178struct ccp_ctx {
179 int (*complete)(struct crypto_async_request *req, int ret);
180
181 union {
182 struct ccp_aes_ctx aes;
183 struct ccp_sha_ctx sha;
184 } u;
185};
186
187int ccp_crypto_enqueue_request(struct crypto_async_request *req,
188 struct ccp_cmd *cmd);
189struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
190 struct scatterlist *sg_add);
191
192int ccp_register_aes_algs(struct list_head *head);
193int ccp_register_aes_cmac_algs(struct list_head *head);
194int ccp_register_aes_xts_algs(struct list_head *head);
195int ccp_register_sha_algs(struct list_head *head);
196
197#endif
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
new file mode 100644
index 000000000000..c3bc21264600
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -0,0 +1,595 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kthread.h>
16#include <linux/sched.h>
17#include <linux/interrupt.h>
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/delay.h>
21#include <linux/hw_random.h>
22#include <linux/cpu.h>
23#include <asm/cpu_device_id.h>
24#include <linux/ccp.h>
25
26#include "ccp-dev.h"
27
28MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
29MODULE_LICENSE("GPL");
30MODULE_VERSION("1.0.0");
31MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
32
33
34static struct ccp_device *ccp_dev;
35static inline struct ccp_device *ccp_get_device(void)
36{
37 return ccp_dev;
38}
39
40static inline void ccp_add_device(struct ccp_device *ccp)
41{
42 ccp_dev = ccp;
43}
44
45static inline void ccp_del_device(struct ccp_device *ccp)
46{
47 ccp_dev = NULL;
48}
49
50/**
51 * ccp_enqueue_cmd - queue an operation for processing by the CCP
52 *
53 * @cmd: ccp_cmd struct to be processed
54 *
55 * Queue a cmd to be processed by the CCP. If queueing the cmd
56 * would exceed the defined length of the cmd queue the cmd will
57 * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
58 * result in a return code of -EBUSY.
59 *
60 * The callback routine specified in the ccp_cmd struct will be
61 * called to notify the caller of completion (if the cmd was not
62 * backlogged) or advancement out of the backlog. If the cmd has
63 * advanced out of the backlog the "err" value of the callback
64 * will be -EINPROGRESS. Any other "err" value during callback is
65 * the result of the operation.
66 *
67 * The cmd has been successfully queued if:
68 * the return code is -EINPROGRESS or
69 * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
70 */
71int ccp_enqueue_cmd(struct ccp_cmd *cmd)
72{
73 struct ccp_device *ccp = ccp_get_device();
74 unsigned long flags;
75 unsigned int i;
76 int ret;
77
78 if (!ccp)
79 return -ENODEV;
80
81 /* Caller must supply a callback routine */
82 if (!cmd->callback)
83 return -EINVAL;
84
85 cmd->ccp = ccp;
86
87 spin_lock_irqsave(&ccp->cmd_lock, flags);
88
89 i = ccp->cmd_q_count;
90
91 if (ccp->cmd_count >= MAX_CMD_QLEN) {
92 ret = -EBUSY;
93 if (cmd->flags & CCP_CMD_MAY_BACKLOG)
94 list_add_tail(&cmd->entry, &ccp->backlog);
95 } else {
96 ret = -EINPROGRESS;
97 ccp->cmd_count++;
98 list_add_tail(&cmd->entry, &ccp->cmd);
99
100 /* Find an idle queue */
101 if (!ccp->suspending) {
102 for (i = 0; i < ccp->cmd_q_count; i++) {
103 if (ccp->cmd_q[i].active)
104 continue;
105
106 break;
107 }
108 }
109 }
110
111 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
112
113 /* If we found an idle queue, wake it up */
114 if (i < ccp->cmd_q_count)
115 wake_up_process(ccp->cmd_q[i].kthread);
116
117 return ret;
118}
119EXPORT_SYMBOL_GPL(ccp_enqueue_cmd);
120
121static void ccp_do_cmd_backlog(struct work_struct *work)
122{
123 struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
124 struct ccp_device *ccp = cmd->ccp;
125 unsigned long flags;
126 unsigned int i;
127
128 cmd->callback(cmd->data, -EINPROGRESS);
129
130 spin_lock_irqsave(&ccp->cmd_lock, flags);
131
132 ccp->cmd_count++;
133 list_add_tail(&cmd->entry, &ccp->cmd);
134
135 /* Find an idle queue */
136 for (i = 0; i < ccp->cmd_q_count; i++) {
137 if (ccp->cmd_q[i].active)
138 continue;
139
140 break;
141 }
142
143 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
144
145 /* If we found an idle queue, wake it up */
146 if (i < ccp->cmd_q_count)
147 wake_up_process(ccp->cmd_q[i].kthread);
148}
149
150static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q)
151{
152 struct ccp_device *ccp = cmd_q->ccp;
153 struct ccp_cmd *cmd = NULL;
154 struct ccp_cmd *backlog = NULL;
155 unsigned long flags;
156
157 spin_lock_irqsave(&ccp->cmd_lock, flags);
158
159 cmd_q->active = 0;
160
161 if (ccp->suspending) {
162 cmd_q->suspended = 1;
163
164 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
165 wake_up_interruptible(&ccp->suspend_queue);
166
167 return NULL;
168 }
169
170 if (ccp->cmd_count) {
171 cmd_q->active = 1;
172
173 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
174 list_del(&cmd->entry);
175
176 ccp->cmd_count--;
177 }
178
179 if (!list_empty(&ccp->backlog)) {
180 backlog = list_first_entry(&ccp->backlog, struct ccp_cmd,
181 entry);
182 list_del(&backlog->entry);
183 }
184
185 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
186
187 if (backlog) {
188 INIT_WORK(&backlog->work, ccp_do_cmd_backlog);
189 schedule_work(&backlog->work);
190 }
191
192 return cmd;
193}
194
195static void ccp_do_cmd_complete(struct work_struct *work)
196{
197 struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
198
199 cmd->callback(cmd->data, cmd->ret);
200}
201
202static int ccp_cmd_queue_thread(void *data)
203{
204 struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
205 struct ccp_cmd *cmd;
206
207 set_current_state(TASK_INTERRUPTIBLE);
208 while (!kthread_should_stop()) {
209 schedule();
210
211 set_current_state(TASK_INTERRUPTIBLE);
212
213 cmd = ccp_dequeue_cmd(cmd_q);
214 if (!cmd)
215 continue;
216
217 __set_current_state(TASK_RUNNING);
218
219 /* Execute the command */
220 cmd->ret = ccp_run_cmd(cmd_q, cmd);
221
222 /* Schedule the completion callback */
223 INIT_WORK(&cmd->work, ccp_do_cmd_complete);
224 schedule_work(&cmd->work);
225 }
226
227 __set_current_state(TASK_RUNNING);
228
229 return 0;
230}
231
232static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
233{
234 struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
235 u32 trng_value;
236 int len = min_t(int, sizeof(trng_value), max);
237
238 /*
239 * Locking is provided by the caller so we can update device
240 * hwrng-related fields safely
241 */
242 trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
243 if (!trng_value) {
244 /* Zero is returned if not data is available or if a
245 * bad-entropy error is present. Assume an error if
246 * we exceed TRNG_RETRIES reads of zero.
247 */
248 if (ccp->hwrng_retries++ > TRNG_RETRIES)
249 return -EIO;
250
251 return 0;
252 }
253
254 /* Reset the counter and save the rng value */
255 ccp->hwrng_retries = 0;
256 memcpy(data, &trng_value, len);
257
258 return len;
259}
260
261/**
262 * ccp_alloc_struct - allocate and initialize the ccp_device struct
263 *
264 * @dev: device struct of the CCP
265 */
266struct ccp_device *ccp_alloc_struct(struct device *dev)
267{
268 struct ccp_device *ccp;
269
270 ccp = kzalloc(sizeof(*ccp), GFP_KERNEL);
271 if (ccp == NULL) {
272 dev_err(dev, "unable to allocate device struct\n");
273 return NULL;
274 }
275 ccp->dev = dev;
276
277 INIT_LIST_HEAD(&ccp->cmd);
278 INIT_LIST_HEAD(&ccp->backlog);
279
280 spin_lock_init(&ccp->cmd_lock);
281 mutex_init(&ccp->req_mutex);
282 mutex_init(&ccp->ksb_mutex);
283 ccp->ksb_count = KSB_COUNT;
284 ccp->ksb_start = 0;
285
286 return ccp;
287}
288
289/**
290 * ccp_init - initialize the CCP device
291 *
292 * @ccp: ccp_device struct
293 */
294int ccp_init(struct ccp_device *ccp)
295{
296 struct device *dev = ccp->dev;
297 struct ccp_cmd_queue *cmd_q;
298 struct dma_pool *dma_pool;
299 char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
300 unsigned int qmr, qim, i;
301 int ret;
302
303 /* Find available queues */
304 qim = 0;
305 qmr = ioread32(ccp->io_regs + Q_MASK_REG);
306 for (i = 0; i < MAX_HW_QUEUES; i++) {
307 if (!(qmr & (1 << i)))
308 continue;
309
310 /* Allocate a dma pool for this queue */
311 snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i);
312 dma_pool = dma_pool_create(dma_pool_name, dev,
313 CCP_DMAPOOL_MAX_SIZE,
314 CCP_DMAPOOL_ALIGN, 0);
315 if (!dma_pool) {
316 dev_err(dev, "unable to allocate dma pool\n");
317 ret = -ENOMEM;
318 goto e_pool;
319 }
320
321 cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
322 ccp->cmd_q_count++;
323
324 cmd_q->ccp = ccp;
325 cmd_q->id = i;
326 cmd_q->dma_pool = dma_pool;
327
328 /* Reserve 2 KSB regions for the queue */
329 cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
330 cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
331 ccp->ksb_count -= 2;
332
333 /* Preset some register values and masks that are queue
334 * number dependent
335 */
336 cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
337 (CMD_Q_STATUS_INCR * i);
338 cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
339 (CMD_Q_STATUS_INCR * i);
340 cmd_q->int_ok = 1 << (i * 2);
341 cmd_q->int_err = 1 << ((i * 2) + 1);
342
343 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
344
345 init_waitqueue_head(&cmd_q->int_queue);
346
347 /* Build queue interrupt mask (two interrupts per queue) */
348 qim |= cmd_q->int_ok | cmd_q->int_err;
349
350 dev_dbg(dev, "queue #%u available\n", i);
351 }
352 if (ccp->cmd_q_count == 0) {
353 dev_notice(dev, "no command queues available\n");
354 ret = -EIO;
355 goto e_pool;
356 }
357 dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
358
359 /* Disable and clear interrupts until ready */
360 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
361 for (i = 0; i < ccp->cmd_q_count; i++) {
362 cmd_q = &ccp->cmd_q[i];
363
364 ioread32(cmd_q->reg_int_status);
365 ioread32(cmd_q->reg_status);
366 }
367 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
368
369 /* Request an irq */
370 ret = ccp->get_irq(ccp);
371 if (ret) {
372 dev_err(dev, "unable to allocate an IRQ\n");
373 goto e_pool;
374 }
375
376 /* Initialize the queues used to wait for KSB space and suspend */
377 init_waitqueue_head(&ccp->ksb_queue);
378 init_waitqueue_head(&ccp->suspend_queue);
379
380 /* Create a kthread for each queue */
381 for (i = 0; i < ccp->cmd_q_count; i++) {
382 struct task_struct *kthread;
383
384 cmd_q = &ccp->cmd_q[i];
385
386 kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
387 "ccp-q%u", cmd_q->id);
388 if (IS_ERR(kthread)) {
389 dev_err(dev, "error creating queue thread (%ld)\n",
390 PTR_ERR(kthread));
391 ret = PTR_ERR(kthread);
392 goto e_kthread;
393 }
394
395 cmd_q->kthread = kthread;
396 wake_up_process(kthread);
397 }
398
399 /* Register the RNG */
400 ccp->hwrng.name = "ccp-rng";
401 ccp->hwrng.read = ccp_trng_read;
402 ret = hwrng_register(&ccp->hwrng);
403 if (ret) {
404 dev_err(dev, "error registering hwrng (%d)\n", ret);
405 goto e_kthread;
406 }
407
408 /* Make the device struct available before enabling interrupts */
409 ccp_add_device(ccp);
410
411 /* Enable interrupts */
412 iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
413
414 return 0;
415
416e_kthread:
417 for (i = 0; i < ccp->cmd_q_count; i++)
418 if (ccp->cmd_q[i].kthread)
419 kthread_stop(ccp->cmd_q[i].kthread);
420
421 ccp->free_irq(ccp);
422
423e_pool:
424 for (i = 0; i < ccp->cmd_q_count; i++)
425 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
426
427 return ret;
428}
429
430/**
431 * ccp_destroy - tear down the CCP device
432 *
433 * @ccp: ccp_device struct
434 */
435void ccp_destroy(struct ccp_device *ccp)
436{
437 struct ccp_cmd_queue *cmd_q;
438 struct ccp_cmd *cmd;
439 unsigned int qim, i;
440
441 /* Remove general access to the device struct */
442 ccp_del_device(ccp);
443
444 /* Unregister the RNG */
445 hwrng_unregister(&ccp->hwrng);
446
447 /* Stop the queue kthreads */
448 for (i = 0; i < ccp->cmd_q_count; i++)
449 if (ccp->cmd_q[i].kthread)
450 kthread_stop(ccp->cmd_q[i].kthread);
451
452 /* Build queue interrupt mask (two interrupt masks per queue) */
453 qim = 0;
454 for (i = 0; i < ccp->cmd_q_count; i++) {
455 cmd_q = &ccp->cmd_q[i];
456 qim |= cmd_q->int_ok | cmd_q->int_err;
457 }
458
459 /* Disable and clear interrupts */
460 iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
461 for (i = 0; i < ccp->cmd_q_count; i++) {
462 cmd_q = &ccp->cmd_q[i];
463
464 ioread32(cmd_q->reg_int_status);
465 ioread32(cmd_q->reg_status);
466 }
467 iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
468
469 ccp->free_irq(ccp);
470
471 for (i = 0; i < ccp->cmd_q_count; i++)
472 dma_pool_destroy(ccp->cmd_q[i].dma_pool);
473
474 /* Flush the cmd and backlog queue */
475 while (!list_empty(&ccp->cmd)) {
476 /* Invoke the callback directly with an error code */
477 cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
478 list_del(&cmd->entry);
479 cmd->callback(cmd->data, -ENODEV);
480 }
481 while (!list_empty(&ccp->backlog)) {
482 /* Invoke the callback directly with an error code */
483 cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
484 list_del(&cmd->entry);
485 cmd->callback(cmd->data, -ENODEV);
486 }
487}
488
489/**
490 * ccp_irq_handler - handle interrupts generated by the CCP device
491 *
492 * @irq: the irq associated with the interrupt
493 * @data: the data value supplied when the irq was created
494 */
495irqreturn_t ccp_irq_handler(int irq, void *data)
496{
497 struct device *dev = data;
498 struct ccp_device *ccp = dev_get_drvdata(dev);
499 struct ccp_cmd_queue *cmd_q;
500 u32 q_int, status;
501 unsigned int i;
502
503 status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
504
505 for (i = 0; i < ccp->cmd_q_count; i++) {
506 cmd_q = &ccp->cmd_q[i];
507
508 q_int = status & (cmd_q->int_ok | cmd_q->int_err);
509 if (q_int) {
510 cmd_q->int_status = status;
511 cmd_q->q_status = ioread32(cmd_q->reg_status);
512 cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
513
514 /* On error, only save the first error value */
515 if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
516 cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
517
518 cmd_q->int_rcvd = 1;
519
520 /* Acknowledge the interrupt and wake the kthread */
521 iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
522 wake_up_interruptible(&cmd_q->int_queue);
523 }
524 }
525
526 return IRQ_HANDLED;
527}
528
529#ifdef CONFIG_PM
530bool ccp_queues_suspended(struct ccp_device *ccp)
531{
532 unsigned int suspended = 0;
533 unsigned long flags;
534 unsigned int i;
535
536 spin_lock_irqsave(&ccp->cmd_lock, flags);
537
538 for (i = 0; i < ccp->cmd_q_count; i++)
539 if (ccp->cmd_q[i].suspended)
540 suspended++;
541
542 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
543
544 return ccp->cmd_q_count == suspended;
545}
546#endif
547
548static const struct x86_cpu_id ccp_support[] = {
549 { X86_VENDOR_AMD, 22, },
550};
551
552static int __init ccp_mod_init(void)
553{
554 struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
555 int ret;
556
557 if (!x86_match_cpu(ccp_support))
558 return -ENODEV;
559
560 switch (cpuinfo->x86) {
561 case 22:
562 if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63))
563 return -ENODEV;
564
565 ret = ccp_pci_init();
566 if (ret)
567 return ret;
568
569 /* Don't leave the driver loaded if init failed */
570 if (!ccp_get_device()) {
571 ccp_pci_exit();
572 return -ENODEV;
573 }
574
575 return 0;
576
577 break;
578 }
579
580 return -ENODEV;
581}
582
583static void __exit ccp_mod_exit(void)
584{
585 struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
586
587 switch (cpuinfo->x86) {
588 case 22:
589 ccp_pci_exit();
590 break;
591 }
592}
593
594module_init(ccp_mod_init);
595module_exit(ccp_mod_exit);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
new file mode 100644
index 000000000000..7ec536e702ec
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -0,0 +1,272 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#ifndef __CCP_DEV_H__
14#define __CCP_DEV_H__
15
16#include <linux/device.h>
17#include <linux/pci.h>
18#include <linux/spinlock.h>
19#include <linux/mutex.h>
20#include <linux/list.h>
21#include <linux/wait.h>
22#include <linux/dmapool.h>
23#include <linux/hw_random.h>
24
25
26#define IO_OFFSET 0x20000
27
28#define MAX_DMAPOOL_NAME_LEN 32
29
30#define MAX_HW_QUEUES 5
31#define MAX_CMD_QLEN 100
32
33#define TRNG_RETRIES 10
34
35
36/****** Register Mappings ******/
37#define Q_MASK_REG 0x000
38#define TRNG_OUT_REG 0x00c
39#define IRQ_MASK_REG 0x040
40#define IRQ_STATUS_REG 0x200
41
42#define DEL_CMD_Q_JOB 0x124
43#define DEL_Q_ACTIVE 0x00000200
44#define DEL_Q_ID_SHIFT 6
45
46#define CMD_REQ0 0x180
47#define CMD_REQ_INCR 0x04
48
49#define CMD_Q_STATUS_BASE 0x210
50#define CMD_Q_INT_STATUS_BASE 0x214
51#define CMD_Q_STATUS_INCR 0x20
52
53#define CMD_Q_CACHE 0x228
54#define CMD_Q_CACHE_INC 0x20
55
56#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f);
57#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f);
58
59/****** REQ0 Related Values ******/
60#define REQ0_WAIT_FOR_WRITE 0x00000004
61#define REQ0_INT_ON_COMPLETE 0x00000002
62#define REQ0_STOP_ON_COMPLETE 0x00000001
63
64#define REQ0_CMD_Q_SHIFT 9
65#define REQ0_JOBID_SHIFT 3
66
67/****** REQ1 Related Values ******/
68#define REQ1_PROTECT_SHIFT 27
69#define REQ1_ENGINE_SHIFT 23
70#define REQ1_KEY_KSB_SHIFT 2
71
72#define REQ1_EOM 0x00000002
73#define REQ1_INIT 0x00000001
74
75/* AES Related Values */
76#define REQ1_AES_TYPE_SHIFT 21
77#define REQ1_AES_MODE_SHIFT 18
78#define REQ1_AES_ACTION_SHIFT 17
79#define REQ1_AES_CFB_SIZE_SHIFT 10
80
81/* XTS-AES Related Values */
82#define REQ1_XTS_AES_SIZE_SHIFT 10
83
84/* SHA Related Values */
85#define REQ1_SHA_TYPE_SHIFT 21
86
87/* RSA Related Values */
88#define REQ1_RSA_MOD_SIZE_SHIFT 10
89
90/* Pass-Through Related Values */
91#define REQ1_PT_BW_SHIFT 12
92#define REQ1_PT_BS_SHIFT 10
93
94/* ECC Related Values */
95#define REQ1_ECC_AFFINE_CONVERT 0x00200000
96#define REQ1_ECC_FUNCTION_SHIFT 18
97
98/****** REQ4 Related Values ******/
99#define REQ4_KSB_SHIFT 18
100#define REQ4_MEMTYPE_SHIFT 16
101
102/****** REQ6 Related Values ******/
103#define REQ6_MEMTYPE_SHIFT 16
104
105
106/****** Key Storage Block ******/
107#define KSB_START 77
108#define KSB_END 127
109#define KSB_COUNT (KSB_END - KSB_START + 1)
110#define CCP_KSB_BITS 256
111#define CCP_KSB_BYTES 32
112
113#define CCP_JOBID_MASK 0x0000003f
114
115#define CCP_DMAPOOL_MAX_SIZE 64
116#define CCP_DMAPOOL_ALIGN (1 << 5)
117
118#define CCP_REVERSE_BUF_SIZE 64
119
120#define CCP_AES_KEY_KSB_COUNT 1
121#define CCP_AES_CTX_KSB_COUNT 1
122
123#define CCP_XTS_AES_KEY_KSB_COUNT 1
124#define CCP_XTS_AES_CTX_KSB_COUNT 1
125
126#define CCP_SHA_KSB_COUNT 1
127
128#define CCP_RSA_MAX_WIDTH 4096
129
130#define CCP_PASSTHRU_BLOCKSIZE 256
131#define CCP_PASSTHRU_MASKSIZE 32
132#define CCP_PASSTHRU_KSB_COUNT 1
133
134#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */
135#define CCP_ECC_MAX_OPERANDS 6
136#define CCP_ECC_MAX_OUTPUTS 3
137#define CCP_ECC_SRC_BUF_SIZE 448
138#define CCP_ECC_DST_BUF_SIZE 192
139#define CCP_ECC_OPERAND_SIZE 64
140#define CCP_ECC_OUTPUT_SIZE 64
141#define CCP_ECC_RESULT_OFFSET 60
142#define CCP_ECC_RESULT_SUCCESS 0x0001
143
144
145struct ccp_device;
146struct ccp_cmd;
147
148struct ccp_cmd_queue {
149 struct ccp_device *ccp;
150
151 /* Queue identifier */
152 u32 id;
153
154 /* Queue dma pool */
155 struct dma_pool *dma_pool;
156
157 /* Queue reserved KSB regions */
158 u32 ksb_key;
159 u32 ksb_ctx;
160
161 /* Queue processing thread */
162 struct task_struct *kthread;
163 unsigned int active;
164 unsigned int suspended;
165
166 /* Number of free command slots available */
167 unsigned int free_slots;
168
169 /* Interrupt masks */
170 u32 int_ok;
171 u32 int_err;
172
173 /* Register addresses for queue */
174 void __iomem *reg_status;
175 void __iomem *reg_int_status;
176
177 /* Status values from job */
178 u32 int_status;
179 u32 q_status;
180 u32 q_int_status;
181 u32 cmd_error;
182
183 /* Interrupt wait queue */
184 wait_queue_head_t int_queue;
185 unsigned int int_rcvd;
186} ____cacheline_aligned;
187
188struct ccp_device {
189 struct device *dev;
190
191 /*
192 * Bus specific device information
193 */
194 void *dev_specific;
195 int (*get_irq)(struct ccp_device *ccp);
196 void (*free_irq)(struct ccp_device *ccp);
197
198 /*
199 * I/O area used for device communication. The register mapping
200 * starts at an offset into the mapped bar.
201 * The CMD_REQx registers and the Delete_Cmd_Queue_Job register
202 * need to be protected while a command queue thread is accessing
203 * them.
204 */
205 struct mutex req_mutex ____cacheline_aligned;
206 void __iomem *io_map;
207 void __iomem *io_regs;
208
209 /*
210 * Master lists that all cmds are queued on. Because there can be
211 * more than one CCP command queue that can process a cmd a separate
212 * backlog list is neeeded so that the backlog completion call
213 * completes before the cmd is available for execution.
214 */
215 spinlock_t cmd_lock ____cacheline_aligned;
216 unsigned int cmd_count;
217 struct list_head cmd;
218 struct list_head backlog;
219
220 /*
221 * The command queues. These represent the queues available on the
222 * CCP that are available for processing cmds
223 */
224 struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
225 unsigned int cmd_q_count;
226
227 /*
228 * Support for the CCP True RNG
229 */
230 struct hwrng hwrng;
231 unsigned int hwrng_retries;
232
233 /*
234 * A counter used to generate job-ids for cmds submitted to the CCP
235 */
236 atomic_t current_id ____cacheline_aligned;
237
238 /*
239 * The CCP uses key storage blocks (KSB) to maintain context for certain
240 * operations. To prevent multiple cmds from using the same KSB range
241 * a command queue reserves a KSB range for the duration of the cmd.
242 * Each queue, will however, reserve 2 KSB blocks for operations that
243 * only require single KSB entries (eg. AES context/iv and key) in order
244 * to avoid allocation contention. This will reserve at most 10 KSB
245 * entries, leaving 40 KSB entries available for dynamic allocation.
246 */
247 struct mutex ksb_mutex ____cacheline_aligned;
248 DECLARE_BITMAP(ksb, KSB_COUNT);
249 wait_queue_head_t ksb_queue;
250 unsigned int ksb_avail;
251 unsigned int ksb_count;
252 u32 ksb_start;
253
254 /* Suspend support */
255 unsigned int suspending;
256 wait_queue_head_t suspend_queue;
257};
258
259
260int ccp_pci_init(void);
261void ccp_pci_exit(void);
262
263struct ccp_device *ccp_alloc_struct(struct device *dev);
264int ccp_init(struct ccp_device *ccp);
265void ccp_destroy(struct ccp_device *ccp);
266bool ccp_queues_suspended(struct ccp_device *ccp);
267
268irqreturn_t ccp_irq_handler(int irq, void *data);
269
270int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
271
272#endif
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
new file mode 100644
index 000000000000..71ed3ade7e12
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -0,0 +1,2024 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/mutex.h>
22#include <linux/delay.h>
23#include <linux/ccp.h>
24#include <linux/scatterlist.h>
25#include <crypto/scatterwalk.h>
26
27#include "ccp-dev.h"
28
29
30enum ccp_memtype {
31 CCP_MEMTYPE_SYSTEM = 0,
32 CCP_MEMTYPE_KSB,
33 CCP_MEMTYPE_LOCAL,
34 CCP_MEMTYPE__LAST,
35};
36
37struct ccp_dma_info {
38 dma_addr_t address;
39 unsigned int offset;
40 unsigned int length;
41 enum dma_data_direction dir;
42};
43
44struct ccp_dm_workarea {
45 struct device *dev;
46 struct dma_pool *dma_pool;
47 unsigned int length;
48
49 u8 *address;
50 struct ccp_dma_info dma;
51};
52
53struct ccp_sg_workarea {
54 struct scatterlist *sg;
55 unsigned int nents;
56 unsigned int length;
57
58 struct scatterlist *dma_sg;
59 struct device *dma_dev;
60 unsigned int dma_count;
61 enum dma_data_direction dma_dir;
62
63 unsigned int sg_used;
64
65 u64 bytes_left;
66};
67
68struct ccp_data {
69 struct ccp_sg_workarea sg_wa;
70 struct ccp_dm_workarea dm_wa;
71};
72
73struct ccp_mem {
74 enum ccp_memtype type;
75 union {
76 struct ccp_dma_info dma;
77 u32 ksb;
78 } u;
79};
80
81struct ccp_aes_op {
82 enum ccp_aes_type type;
83 enum ccp_aes_mode mode;
84 enum ccp_aes_action action;
85};
86
87struct ccp_xts_aes_op {
88 enum ccp_aes_action action;
89 enum ccp_xts_aes_unit_size unit_size;
90};
91
92struct ccp_sha_op {
93 enum ccp_sha_type type;
94 u64 msg_bits;
95};
96
97struct ccp_rsa_op {
98 u32 mod_size;
99 u32 input_len;
100};
101
102struct ccp_passthru_op {
103 enum ccp_passthru_bitwise bit_mod;
104 enum ccp_passthru_byteswap byte_swap;
105};
106
107struct ccp_ecc_op {
108 enum ccp_ecc_function function;
109};
110
111struct ccp_op {
112 struct ccp_cmd_queue *cmd_q;
113
114 u32 jobid;
115 u32 ioc;
116 u32 soc;
117 u32 ksb_key;
118 u32 ksb_ctx;
119 u32 init;
120 u32 eom;
121
122 struct ccp_mem src;
123 struct ccp_mem dst;
124
125 union {
126 struct ccp_aes_op aes;
127 struct ccp_xts_aes_op xts;
128 struct ccp_sha_op sha;
129 struct ccp_rsa_op rsa;
130 struct ccp_passthru_op passthru;
131 struct ccp_ecc_op ecc;
132 } u;
133};
134
135/* The CCP cannot perform zero-length sha operations so the caller
136 * is required to buffer data for the final operation. However, a
137 * sha operation for a message with a total length of zero is valid
138 * so known values are required to supply the result.
139 */
140static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
141 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
142 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
143 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
144 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
145};
146
147static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
148 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
149 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
150 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
151 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
152};
153
154static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
155 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
156 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
157 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
158 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
159};
160
161static u32 ccp_addr_lo(struct ccp_dma_info *info)
162{
163 return lower_32_bits(info->address + info->offset);
164}
165
166static u32 ccp_addr_hi(struct ccp_dma_info *info)
167{
168 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
169}
170
171static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
172{
173 struct ccp_cmd_queue *cmd_q = op->cmd_q;
174 struct ccp_device *ccp = cmd_q->ccp;
175 void __iomem *cr_addr;
176 u32 cr0, cmd;
177 unsigned int i;
178 int ret = 0;
179
180 /* We could read a status register to see how many free slots
181 * are actually available, but reading that register resets it
182 * and you could lose some error information.
183 */
184 cmd_q->free_slots--;
185
186 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
187 | (op->jobid << REQ0_JOBID_SHIFT)
188 | REQ0_WAIT_FOR_WRITE;
189
190 if (op->soc)
191 cr0 |= REQ0_STOP_ON_COMPLETE
192 | REQ0_INT_ON_COMPLETE;
193
194 if (op->ioc || !cmd_q->free_slots)
195 cr0 |= REQ0_INT_ON_COMPLETE;
196
197 /* Start at CMD_REQ1 */
198 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
199
200 mutex_lock(&ccp->req_mutex);
201
202 /* Write CMD_REQ1 through CMD_REQx first */
203 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
204 iowrite32(*(cr + i), cr_addr);
205
206 /* Tell the CCP to start */
207 wmb();
208 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
209
210 mutex_unlock(&ccp->req_mutex);
211
212 if (cr0 & REQ0_INT_ON_COMPLETE) {
213 /* Wait for the job to complete */
214 ret = wait_event_interruptible(cmd_q->int_queue,
215 cmd_q->int_rcvd);
216 if (ret || cmd_q->cmd_error) {
217 /* On error delete all related jobs from the queue */
218 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
219 | op->jobid;
220
221 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
222
223 if (!ret)
224 ret = -EIO;
225 } else if (op->soc) {
226 /* Delete just head job from the queue on SoC */
227 cmd = DEL_Q_ACTIVE
228 | (cmd_q->id << DEL_Q_ID_SHIFT)
229 | op->jobid;
230
231 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
232 }
233
234 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
235
236 cmd_q->int_rcvd = 0;
237 }
238
239 return ret;
240}
241
242static int ccp_perform_aes(struct ccp_op *op)
243{
244 u32 cr[6];
245
246 /* Fill out the register contents for REQ1 through REQ6 */
247 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
248 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
249 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
250 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
251 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
252 cr[1] = op->src.u.dma.length - 1;
253 cr[2] = ccp_addr_lo(&op->src.u.dma);
254 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
255 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
256 | ccp_addr_hi(&op->src.u.dma);
257 cr[4] = ccp_addr_lo(&op->dst.u.dma);
258 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
259 | ccp_addr_hi(&op->dst.u.dma);
260
261 if (op->u.aes.mode == CCP_AES_MODE_CFB)
262 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
263
264 if (op->eom)
265 cr[0] |= REQ1_EOM;
266
267 if (op->init)
268 cr[0] |= REQ1_INIT;
269
270 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
271}
272
273static int ccp_perform_xts_aes(struct ccp_op *op)
274{
275 u32 cr[6];
276
277 /* Fill out the register contents for REQ1 through REQ6 */
278 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
279 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
280 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
281 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
282 cr[1] = op->src.u.dma.length - 1;
283 cr[2] = ccp_addr_lo(&op->src.u.dma);
284 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
285 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
286 | ccp_addr_hi(&op->src.u.dma);
287 cr[4] = ccp_addr_lo(&op->dst.u.dma);
288 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
289 | ccp_addr_hi(&op->dst.u.dma);
290
291 if (op->eom)
292 cr[0] |= REQ1_EOM;
293
294 if (op->init)
295 cr[0] |= REQ1_INIT;
296
297 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
298}
299
300static int ccp_perform_sha(struct ccp_op *op)
301{
302 u32 cr[6];
303
304 /* Fill out the register contents for REQ1 through REQ6 */
305 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
306 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
307 | REQ1_INIT;
308 cr[1] = op->src.u.dma.length - 1;
309 cr[2] = ccp_addr_lo(&op->src.u.dma);
310 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
311 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
312 | ccp_addr_hi(&op->src.u.dma);
313
314 if (op->eom) {
315 cr[0] |= REQ1_EOM;
316 cr[4] = lower_32_bits(op->u.sha.msg_bits);
317 cr[5] = upper_32_bits(op->u.sha.msg_bits);
318 } else {
319 cr[4] = 0;
320 cr[5] = 0;
321 }
322
323 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
324}
325
326static int ccp_perform_rsa(struct ccp_op *op)
327{
328 u32 cr[6];
329
330 /* Fill out the register contents for REQ1 through REQ6 */
331 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
332 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
333 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
334 | REQ1_EOM;
335 cr[1] = op->u.rsa.input_len - 1;
336 cr[2] = ccp_addr_lo(&op->src.u.dma);
337 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
338 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
339 | ccp_addr_hi(&op->src.u.dma);
340 cr[4] = ccp_addr_lo(&op->dst.u.dma);
341 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
342 | ccp_addr_hi(&op->dst.u.dma);
343
344 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
345}
346
347static int ccp_perform_passthru(struct ccp_op *op)
348{
349 u32 cr[6];
350
351 /* Fill out the register contents for REQ1 through REQ6 */
352 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
353 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
354 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
355
356 if (op->src.type == CCP_MEMTYPE_SYSTEM)
357 cr[1] = op->src.u.dma.length - 1;
358 else
359 cr[1] = op->dst.u.dma.length - 1;
360
361 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
362 cr[2] = ccp_addr_lo(&op->src.u.dma);
363 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
364 | ccp_addr_hi(&op->src.u.dma);
365
366 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
367 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
368 } else {
369 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
370 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
371 }
372
373 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
374 cr[4] = ccp_addr_lo(&op->dst.u.dma);
375 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
376 | ccp_addr_hi(&op->dst.u.dma);
377 } else {
378 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
379 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
380 }
381
382 if (op->eom)
383 cr[0] |= REQ1_EOM;
384
385 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
386}
387
388static int ccp_perform_ecc(struct ccp_op *op)
389{
390 u32 cr[6];
391
392 /* Fill out the register contents for REQ1 through REQ6 */
393 cr[0] = REQ1_ECC_AFFINE_CONVERT
394 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
395 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
396 | REQ1_EOM;
397 cr[1] = op->src.u.dma.length - 1;
398 cr[2] = ccp_addr_lo(&op->src.u.dma);
399 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
400 | ccp_addr_hi(&op->src.u.dma);
401 cr[4] = ccp_addr_lo(&op->dst.u.dma);
402 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
403 | ccp_addr_hi(&op->dst.u.dma);
404
405 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
406}
407
408static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
409{
410 int start;
411
412 for (;;) {
413 mutex_lock(&ccp->ksb_mutex);
414
415 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
416 ccp->ksb_count,
417 ccp->ksb_start,
418 count, 0);
419 if (start <= ccp->ksb_count) {
420 bitmap_set(ccp->ksb, start, count);
421
422 mutex_unlock(&ccp->ksb_mutex);
423 break;
424 }
425
426 ccp->ksb_avail = 0;
427
428 mutex_unlock(&ccp->ksb_mutex);
429
430 /* Wait for KSB entries to become available */
431 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
432 return 0;
433 }
434
435 return KSB_START + start;
436}
437
438static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
439 unsigned int count)
440{
441 if (!start)
442 return;
443
444 mutex_lock(&ccp->ksb_mutex);
445
446 bitmap_clear(ccp->ksb, start - KSB_START, count);
447
448 ccp->ksb_avail = 1;
449
450 mutex_unlock(&ccp->ksb_mutex);
451
452 wake_up_interruptible_all(&ccp->ksb_queue);
453}
454
455static u32 ccp_gen_jobid(struct ccp_device *ccp)
456{
457 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
458}
459
460static void ccp_sg_free(struct ccp_sg_workarea *wa)
461{
462 if (wa->dma_count)
463 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
464
465 wa->dma_count = 0;
466}
467
468static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
469 struct scatterlist *sg, u64 len,
470 enum dma_data_direction dma_dir)
471{
472 memset(wa, 0, sizeof(*wa));
473
474 wa->sg = sg;
475 if (!sg)
476 return 0;
477
478 wa->nents = sg_nents(sg);
479 wa->length = sg->length;
480 wa->bytes_left = len;
481 wa->sg_used = 0;
482
483 if (len == 0)
484 return 0;
485
486 if (dma_dir == DMA_NONE)
487 return 0;
488
489 wa->dma_sg = sg;
490 wa->dma_dev = dev;
491 wa->dma_dir = dma_dir;
492 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
493 if (!wa->dma_count)
494 return -ENOMEM;
495
496
497 return 0;
498}
499
500static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
501{
502 unsigned int nbytes = min_t(u64, len, wa->bytes_left);
503
504 if (!wa->sg)
505 return;
506
507 wa->sg_used += nbytes;
508 wa->bytes_left -= nbytes;
509 if (wa->sg_used == wa->sg->length) {
510 wa->sg = sg_next(wa->sg);
511 wa->sg_used = 0;
512 }
513}
514
515static void ccp_dm_free(struct ccp_dm_workarea *wa)
516{
517 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
518 if (wa->address)
519 dma_pool_free(wa->dma_pool, wa->address,
520 wa->dma.address);
521 } else {
522 if (wa->dma.address)
523 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
524 wa->dma.dir);
525 kfree(wa->address);
526 }
527
528 wa->address = NULL;
529 wa->dma.address = 0;
530}
531
532static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
533 struct ccp_cmd_queue *cmd_q,
534 unsigned int len,
535 enum dma_data_direction dir)
536{
537 memset(wa, 0, sizeof(*wa));
538
539 if (!len)
540 return 0;
541
542 wa->dev = cmd_q->ccp->dev;
543 wa->length = len;
544
545 if (len <= CCP_DMAPOOL_MAX_SIZE) {
546 wa->dma_pool = cmd_q->dma_pool;
547
548 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
549 &wa->dma.address);
550 if (!wa->address)
551 return -ENOMEM;
552
553 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
554
555 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
556 } else {
557 wa->address = kzalloc(len, GFP_KERNEL);
558 if (!wa->address)
559 return -ENOMEM;
560
561 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
562 dir);
563 if (!wa->dma.address)
564 return -ENOMEM;
565
566 wa->dma.length = len;
567 }
568 wa->dma.dir = dir;
569
570 return 0;
571}
572
573static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
574 struct scatterlist *sg, unsigned int sg_offset,
575 unsigned int len)
576{
577 WARN_ON(!wa->address);
578
579 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
580 0);
581}
582
583static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
584 struct scatterlist *sg, unsigned int sg_offset,
585 unsigned int len)
586{
587 WARN_ON(!wa->address);
588
589 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
590 1);
591}
592
593static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
594 struct scatterlist *sg,
595 unsigned int len, unsigned int se_len,
596 bool sign_extend)
597{
598 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
599 u8 buffer[CCP_REVERSE_BUF_SIZE];
600
601 BUG_ON(se_len > sizeof(buffer));
602
603 sg_offset = len;
604 dm_offset = 0;
605 nbytes = len;
606 while (nbytes) {
607 ksb_len = min_t(unsigned int, nbytes, se_len);
608 sg_offset -= ksb_len;
609
610 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
611 for (i = 0; i < ksb_len; i++)
612 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
613
614 dm_offset += ksb_len;
615 nbytes -= ksb_len;
616
617 if ((ksb_len != se_len) && sign_extend) {
618 /* Must sign-extend to nearest sign-extend length */
619 if (wa->address[dm_offset - 1] & 0x80)
620 memset(wa->address + dm_offset, 0xff,
621 se_len - ksb_len);
622 }
623 }
624}
625
626static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
627 struct scatterlist *sg,
628 unsigned int len)
629{
630 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
631 u8 buffer[CCP_REVERSE_BUF_SIZE];
632
633 sg_offset = 0;
634 dm_offset = len;
635 nbytes = len;
636 while (nbytes) {
637 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
638 dm_offset -= ksb_len;
639
640 for (i = 0; i < ksb_len; i++)
641 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
642 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
643
644 sg_offset += ksb_len;
645 nbytes -= ksb_len;
646 }
647}
648
649static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
650{
651 ccp_dm_free(&data->dm_wa);
652 ccp_sg_free(&data->sg_wa);
653}
654
655static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
656 struct scatterlist *sg, u64 sg_len,
657 unsigned int dm_len,
658 enum dma_data_direction dir)
659{
660 int ret;
661
662 memset(data, 0, sizeof(*data));
663
664 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
665 dir);
666 if (ret)
667 goto e_err;
668
669 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
670 if (ret)
671 goto e_err;
672
673 return 0;
674
675e_err:
676 ccp_free_data(data, cmd_q);
677
678 return ret;
679}
680
681static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
682{
683 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
684 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
685 unsigned int buf_count, nbytes;
686
687 /* Clear the buffer if setting it */
688 if (!from)
689 memset(dm_wa->address, 0, dm_wa->length);
690
691 if (!sg_wa->sg)
692 return 0;
693
694 /* Perform the copy operation
695 * nbytes will always be <= UINT_MAX because dm_wa->length is
696 * an unsigned int
697 */
698 nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
699 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
700 nbytes, from);
701
702 /* Update the structures and generate the count */
703 buf_count = 0;
704 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
705 nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
706 dm_wa->length - buf_count);
707 nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
708
709 buf_count += nbytes;
710 ccp_update_sg_workarea(sg_wa, nbytes);
711 }
712
713 return buf_count;
714}
715
716static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
717{
718 return ccp_queue_buf(data, 0);
719}
720
721static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
722{
723 return ccp_queue_buf(data, 1);
724}
725
726static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
727 struct ccp_op *op, unsigned int block_size,
728 bool blocksize_op)
729{
730 unsigned int sg_src_len, sg_dst_len, op_len;
731
732 /* The CCP can only DMA from/to one address each per operation. This
733 * requires that we find the smallest DMA area between the source
734 * and destination. The resulting len values will always be <= UINT_MAX
735 * because the dma length is an unsigned int.
736 */
737 sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
738 sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
739
740 if (dst) {
741 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
742 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
743 op_len = min(sg_src_len, sg_dst_len);
744 } else
745 op_len = sg_src_len;
746
747 /* The data operation length will be at least block_size in length
748 * or the smaller of available sg room remaining for the source or
749 * the destination
750 */
751 op_len = max(op_len, block_size);
752
753 /* Unless we have to buffer data, there's no reason to wait */
754 op->soc = 0;
755
756 if (sg_src_len < block_size) {
757 /* Not enough data in the sg element, so it
758 * needs to be buffered into a blocksize chunk
759 */
760 int cp_len = ccp_fill_queue_buf(src);
761
762 op->soc = 1;
763 op->src.u.dma.address = src->dm_wa.dma.address;
764 op->src.u.dma.offset = 0;
765 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
766 } else {
767 /* Enough data in the sg element, but we need to
768 * adjust for any previously copied data
769 */
770 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
771 op->src.u.dma.offset = src->sg_wa.sg_used;
772 op->src.u.dma.length = op_len & ~(block_size - 1);
773
774 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
775 }
776
777 if (dst) {
778 if (sg_dst_len < block_size) {
779 /* Not enough room in the sg element or we're on the
780 * last piece of data (when using padding), so the
781 * output needs to be buffered into a blocksize chunk
782 */
783 op->soc = 1;
784 op->dst.u.dma.address = dst->dm_wa.dma.address;
785 op->dst.u.dma.offset = 0;
786 op->dst.u.dma.length = op->src.u.dma.length;
787 } else {
788 /* Enough room in the sg element, but we need to
789 * adjust for any previously used area
790 */
791 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
792 op->dst.u.dma.offset = dst->sg_wa.sg_used;
793 op->dst.u.dma.length = op->src.u.dma.length;
794 }
795 }
796}
797
798static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
799 struct ccp_op *op)
800{
801 op->init = 0;
802
803 if (dst) {
804 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
805 ccp_empty_queue_buf(dst);
806 else
807 ccp_update_sg_workarea(&dst->sg_wa,
808 op->dst.u.dma.length);
809 }
810}
811
812static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
813 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
814 u32 byte_swap, bool from)
815{
816 struct ccp_op op;
817
818 memset(&op, 0, sizeof(op));
819
820 op.cmd_q = cmd_q;
821 op.jobid = jobid;
822 op.eom = 1;
823
824 if (from) {
825 op.soc = 1;
826 op.src.type = CCP_MEMTYPE_KSB;
827 op.src.u.ksb = ksb;
828 op.dst.type = CCP_MEMTYPE_SYSTEM;
829 op.dst.u.dma.address = wa->dma.address;
830 op.dst.u.dma.length = wa->length;
831 } else {
832 op.src.type = CCP_MEMTYPE_SYSTEM;
833 op.src.u.dma.address = wa->dma.address;
834 op.src.u.dma.length = wa->length;
835 op.dst.type = CCP_MEMTYPE_KSB;
836 op.dst.u.ksb = ksb;
837 }
838
839 op.u.passthru.byte_swap = byte_swap;
840
841 return ccp_perform_passthru(&op);
842}
843
844static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
845 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
846 u32 byte_swap)
847{
848 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
849}
850
851static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
852 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
853 u32 byte_swap)
854{
855 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
856}
857
858static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
859 struct ccp_cmd *cmd)
860{
861 struct ccp_aes_engine *aes = &cmd->u.aes;
862 struct ccp_dm_workarea key, ctx;
863 struct ccp_data src;
864 struct ccp_op op;
865 unsigned int dm_offset;
866 int ret;
867
868 if (!((aes->key_len == AES_KEYSIZE_128) ||
869 (aes->key_len == AES_KEYSIZE_192) ||
870 (aes->key_len == AES_KEYSIZE_256)))
871 return -EINVAL;
872
873 if (aes->src_len & (AES_BLOCK_SIZE - 1))
874 return -EINVAL;
875
876 if (aes->iv_len != AES_BLOCK_SIZE)
877 return -EINVAL;
878
879 if (!aes->key || !aes->iv || !aes->src)
880 return -EINVAL;
881
882 if (aes->cmac_final) {
883 if (aes->cmac_key_len != AES_BLOCK_SIZE)
884 return -EINVAL;
885
886 if (!aes->cmac_key)
887 return -EINVAL;
888 }
889
890 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
891 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
892
893 ret = -EIO;
894 memset(&op, 0, sizeof(op));
895 op.cmd_q = cmd_q;
896 op.jobid = ccp_gen_jobid(cmd_q->ccp);
897 op.ksb_key = cmd_q->ksb_key;
898 op.ksb_ctx = cmd_q->ksb_ctx;
899 op.init = 1;
900 op.u.aes.type = aes->type;
901 op.u.aes.mode = aes->mode;
902 op.u.aes.action = aes->action;
903
904 /* All supported key sizes fit in a single (32-byte) KSB entry
905 * and must be in little endian format. Use the 256-bit byte
906 * swap passthru option to convert from big endian to little
907 * endian.
908 */
909 ret = ccp_init_dm_workarea(&key, cmd_q,
910 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
911 DMA_TO_DEVICE);
912 if (ret)
913 return ret;
914
915 dm_offset = CCP_KSB_BYTES - aes->key_len;
916 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
917 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
918 CCP_PASSTHRU_BYTESWAP_256BIT);
919 if (ret) {
920 cmd->engine_error = cmd_q->cmd_error;
921 goto e_key;
922 }
923
924 /* The AES context fits in a single (32-byte) KSB entry and
925 * must be in little endian format. Use the 256-bit byte swap
926 * passthru option to convert from big endian to little endian.
927 */
928 ret = ccp_init_dm_workarea(&ctx, cmd_q,
929 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
930 DMA_BIDIRECTIONAL);
931 if (ret)
932 goto e_key;
933
934 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
935 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
936 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
937 CCP_PASSTHRU_BYTESWAP_256BIT);
938 if (ret) {
939 cmd->engine_error = cmd_q->cmd_error;
940 goto e_ctx;
941 }
942
943 /* Send data to the CCP AES engine */
944 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
945 AES_BLOCK_SIZE, DMA_TO_DEVICE);
946 if (ret)
947 goto e_ctx;
948
949 while (src.sg_wa.bytes_left) {
950 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
951 if (aes->cmac_final && !src.sg_wa.bytes_left) {
952 op.eom = 1;
953
954 /* Push the K1/K2 key to the CCP now */
955 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
956 op.ksb_ctx,
957 CCP_PASSTHRU_BYTESWAP_256BIT);
958 if (ret) {
959 cmd->engine_error = cmd_q->cmd_error;
960 goto e_src;
961 }
962
963 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
964 aes->cmac_key_len);
965 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
966 CCP_PASSTHRU_BYTESWAP_256BIT);
967 if (ret) {
968 cmd->engine_error = cmd_q->cmd_error;
969 goto e_src;
970 }
971 }
972
973 ret = ccp_perform_aes(&op);
974 if (ret) {
975 cmd->engine_error = cmd_q->cmd_error;
976 goto e_src;
977 }
978
979 ccp_process_data(&src, NULL, &op);
980 }
981
982 /* Retrieve the AES context - convert from LE to BE using
983 * 32-byte (256-bit) byteswapping
984 */
985 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
986 CCP_PASSTHRU_BYTESWAP_256BIT);
987 if (ret) {
988 cmd->engine_error = cmd_q->cmd_error;
989 goto e_src;
990 }
991
992 /* ...but we only need AES_BLOCK_SIZE bytes */
993 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
994 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
995
996e_src:
997 ccp_free_data(&src, cmd_q);
998
999e_ctx:
1000 ccp_dm_free(&ctx);
1001
1002e_key:
1003 ccp_dm_free(&key);
1004
1005 return ret;
1006}
1007
1008static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1009{
1010 struct ccp_aes_engine *aes = &cmd->u.aes;
1011 struct ccp_dm_workarea key, ctx;
1012 struct ccp_data src, dst;
1013 struct ccp_op op;
1014 unsigned int dm_offset;
1015 bool in_place = false;
1016 int ret;
1017
1018 if (aes->mode == CCP_AES_MODE_CMAC)
1019 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1020
1021 if (!((aes->key_len == AES_KEYSIZE_128) ||
1022 (aes->key_len == AES_KEYSIZE_192) ||
1023 (aes->key_len == AES_KEYSIZE_256)))
1024 return -EINVAL;
1025
1026 if (((aes->mode == CCP_AES_MODE_ECB) ||
1027 (aes->mode == CCP_AES_MODE_CBC) ||
1028 (aes->mode == CCP_AES_MODE_CFB)) &&
1029 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1030 return -EINVAL;
1031
1032 if (!aes->key || !aes->src || !aes->dst)
1033 return -EINVAL;
1034
1035 if (aes->mode != CCP_AES_MODE_ECB) {
1036 if (aes->iv_len != AES_BLOCK_SIZE)
1037 return -EINVAL;
1038
1039 if (!aes->iv)
1040 return -EINVAL;
1041 }
1042
1043 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1044 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1045
1046 ret = -EIO;
1047 memset(&op, 0, sizeof(op));
1048 op.cmd_q = cmd_q;
1049 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1050 op.ksb_key = cmd_q->ksb_key;
1051 op.ksb_ctx = cmd_q->ksb_ctx;
1052 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1053 op.u.aes.type = aes->type;
1054 op.u.aes.mode = aes->mode;
1055 op.u.aes.action = aes->action;
1056
1057 /* All supported key sizes fit in a single (32-byte) KSB entry
1058 * and must be in little endian format. Use the 256-bit byte
1059 * swap passthru option to convert from big endian to little
1060 * endian.
1061 */
1062 ret = ccp_init_dm_workarea(&key, cmd_q,
1063 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1064 DMA_TO_DEVICE);
1065 if (ret)
1066 return ret;
1067
1068 dm_offset = CCP_KSB_BYTES - aes->key_len;
1069 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1070 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1071 CCP_PASSTHRU_BYTESWAP_256BIT);
1072 if (ret) {
1073 cmd->engine_error = cmd_q->cmd_error;
1074 goto e_key;
1075 }
1076
1077 /* The AES context fits in a single (32-byte) KSB entry and
1078 * must be in little endian format. Use the 256-bit byte swap
1079 * passthru option to convert from big endian to little endian.
1080 */
1081 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1082 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1083 DMA_BIDIRECTIONAL);
1084 if (ret)
1085 goto e_key;
1086
1087 if (aes->mode != CCP_AES_MODE_ECB) {
1088 /* Load the AES context - conver to LE */
1089 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1090 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1091 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1092 CCP_PASSTHRU_BYTESWAP_256BIT);
1093 if (ret) {
1094 cmd->engine_error = cmd_q->cmd_error;
1095 goto e_ctx;
1096 }
1097 }
1098
1099 /* Prepare the input and output data workareas. For in-place
1100 * operations we need to set the dma direction to BIDIRECTIONAL
1101 * and copy the src workarea to the dst workarea.
1102 */
1103 if (sg_virt(aes->src) == sg_virt(aes->dst))
1104 in_place = true;
1105
1106 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1107 AES_BLOCK_SIZE,
1108 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1109 if (ret)
1110 goto e_ctx;
1111
1112 if (in_place)
1113 dst = src;
1114 else {
1115 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1116 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1117 if (ret)
1118 goto e_src;
1119 }
1120
1121 /* Send data to the CCP AES engine */
1122 while (src.sg_wa.bytes_left) {
1123 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1124 if (!src.sg_wa.bytes_left) {
1125 op.eom = 1;
1126
1127 /* Since we don't retrieve the AES context in ECB
1128 * mode we have to wait for the operation to complete
1129 * on the last piece of data
1130 */
1131 if (aes->mode == CCP_AES_MODE_ECB)
1132 op.soc = 1;
1133 }
1134
1135 ret = ccp_perform_aes(&op);
1136 if (ret) {
1137 cmd->engine_error = cmd_q->cmd_error;
1138 goto e_dst;
1139 }
1140
1141 ccp_process_data(&src, &dst, &op);
1142 }
1143
1144 if (aes->mode != CCP_AES_MODE_ECB) {
1145 /* Retrieve the AES context - convert from LE to BE using
1146 * 32-byte (256-bit) byteswapping
1147 */
1148 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1149 CCP_PASSTHRU_BYTESWAP_256BIT);
1150 if (ret) {
1151 cmd->engine_error = cmd_q->cmd_error;
1152 goto e_dst;
1153 }
1154
1155 /* ...but we only need AES_BLOCK_SIZE bytes */
1156 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1157 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1158 }
1159
1160e_dst:
1161 if (!in_place)
1162 ccp_free_data(&dst, cmd_q);
1163
1164e_src:
1165 ccp_free_data(&src, cmd_q);
1166
1167e_ctx:
1168 ccp_dm_free(&ctx);
1169
1170e_key:
1171 ccp_dm_free(&key);
1172
1173 return ret;
1174}
1175
1176static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1177 struct ccp_cmd *cmd)
1178{
1179 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1180 struct ccp_dm_workarea key, ctx;
1181 struct ccp_data src, dst;
1182 struct ccp_op op;
1183 unsigned int unit_size, dm_offset;
1184 bool in_place = false;
1185 int ret;
1186
1187 switch (xts->unit_size) {
1188 case CCP_XTS_AES_UNIT_SIZE_16:
1189 unit_size = 16;
1190 break;
1191 case CCP_XTS_AES_UNIT_SIZE_512:
1192 unit_size = 512;
1193 break;
1194 case CCP_XTS_AES_UNIT_SIZE_1024:
1195 unit_size = 1024;
1196 break;
1197 case CCP_XTS_AES_UNIT_SIZE_2048:
1198 unit_size = 2048;
1199 break;
1200 case CCP_XTS_AES_UNIT_SIZE_4096:
1201 unit_size = 4096;
1202 break;
1203
1204 default:
1205 return -EINVAL;
1206 }
1207
1208 if (xts->key_len != AES_KEYSIZE_128)
1209 return -EINVAL;
1210
1211 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1212 return -EINVAL;
1213
1214 if (xts->iv_len != AES_BLOCK_SIZE)
1215 return -EINVAL;
1216
1217 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1218 return -EINVAL;
1219
1220 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1221 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1222
1223 ret = -EIO;
1224 memset(&op, 0, sizeof(op));
1225 op.cmd_q = cmd_q;
1226 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1227 op.ksb_key = cmd_q->ksb_key;
1228 op.ksb_ctx = cmd_q->ksb_ctx;
1229 op.init = 1;
1230 op.u.xts.action = xts->action;
1231 op.u.xts.unit_size = xts->unit_size;
1232
1233 /* All supported key sizes fit in a single (32-byte) KSB entry
1234 * and must be in little endian format. Use the 256-bit byte
1235 * swap passthru option to convert from big endian to little
1236 * endian.
1237 */
1238 ret = ccp_init_dm_workarea(&key, cmd_q,
1239 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1240 DMA_TO_DEVICE);
1241 if (ret)
1242 return ret;
1243
1244 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1245 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1246 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1247 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1248 CCP_PASSTHRU_BYTESWAP_256BIT);
1249 if (ret) {
1250 cmd->engine_error = cmd_q->cmd_error;
1251 goto e_key;
1252 }
1253
1254 /* The AES context fits in a single (32-byte) KSB entry and
1255 * for XTS is already in little endian format so no byte swapping
1256 * is needed.
1257 */
1258 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1259 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1260 DMA_BIDIRECTIONAL);
1261 if (ret)
1262 goto e_key;
1263
1264 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1265 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1266 CCP_PASSTHRU_BYTESWAP_NOOP);
1267 if (ret) {
1268 cmd->engine_error = cmd_q->cmd_error;
1269 goto e_ctx;
1270 }
1271
1272 /* Prepare the input and output data workareas. For in-place
1273 * operations we need to set the dma direction to BIDIRECTIONAL
1274 * and copy the src workarea to the dst workarea.
1275 */
1276 if (sg_virt(xts->src) == sg_virt(xts->dst))
1277 in_place = true;
1278
1279 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1280 unit_size,
1281 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1282 if (ret)
1283 goto e_ctx;
1284
1285 if (in_place)
1286 dst = src;
1287 else {
1288 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1289 unit_size, DMA_FROM_DEVICE);
1290 if (ret)
1291 goto e_src;
1292 }
1293
1294 /* Send data to the CCP AES engine */
1295 while (src.sg_wa.bytes_left) {
1296 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1297 if (!src.sg_wa.bytes_left)
1298 op.eom = 1;
1299
1300 ret = ccp_perform_xts_aes(&op);
1301 if (ret) {
1302 cmd->engine_error = cmd_q->cmd_error;
1303 goto e_dst;
1304 }
1305
1306 ccp_process_data(&src, &dst, &op);
1307 }
1308
1309 /* Retrieve the AES context - convert from LE to BE using
1310 * 32-byte (256-bit) byteswapping
1311 */
1312 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1313 CCP_PASSTHRU_BYTESWAP_256BIT);
1314 if (ret) {
1315 cmd->engine_error = cmd_q->cmd_error;
1316 goto e_dst;
1317 }
1318
1319 /* ...but we only need AES_BLOCK_SIZE bytes */
1320 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1321 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1322
1323e_dst:
1324 if (!in_place)
1325 ccp_free_data(&dst, cmd_q);
1326
1327e_src:
1328 ccp_free_data(&src, cmd_q);
1329
1330e_ctx:
1331 ccp_dm_free(&ctx);
1332
1333e_key:
1334 ccp_dm_free(&key);
1335
1336 return ret;
1337}
1338
1339static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1340{
1341 struct ccp_sha_engine *sha = &cmd->u.sha;
1342 struct ccp_dm_workarea ctx;
1343 struct ccp_data src;
1344 struct ccp_op op;
1345 int ret;
1346
1347 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1348 return -EINVAL;
1349
1350 if (!sha->ctx)
1351 return -EINVAL;
1352
1353 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1354 return -EINVAL;
1355
1356 if (!sha->src_len) {
1357 const u8 *sha_zero;
1358
1359 /* Not final, just return */
1360 if (!sha->final)
1361 return 0;
1362
1363 /* CCP can't do a zero length sha operation so the caller
1364 * must buffer the data.
1365 */
1366 if (sha->msg_bits)
1367 return -EINVAL;
1368
1369 /* A sha operation for a message with a total length of zero,
1370 * return known result.
1371 */
1372 switch (sha->type) {
1373 case CCP_SHA_TYPE_1:
1374 sha_zero = ccp_sha1_zero;
1375 break;
1376 case CCP_SHA_TYPE_224:
1377 sha_zero = ccp_sha224_zero;
1378 break;
1379 case CCP_SHA_TYPE_256:
1380 sha_zero = ccp_sha256_zero;
1381 break;
1382 default:
1383 return -EINVAL;
1384 }
1385
1386 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1387 sha->ctx_len, 1);
1388
1389 return 0;
1390 }
1391
1392 if (!sha->src)
1393 return -EINVAL;
1394
1395 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1396
1397 memset(&op, 0, sizeof(op));
1398 op.cmd_q = cmd_q;
1399 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1400 op.ksb_ctx = cmd_q->ksb_ctx;
1401 op.u.sha.type = sha->type;
1402 op.u.sha.msg_bits = sha->msg_bits;
1403
1404 /* The SHA context fits in a single (32-byte) KSB entry and
1405 * must be in little endian format. Use the 256-bit byte swap
1406 * passthru option to convert from big endian to little endian.
1407 */
1408 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1409 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1410 DMA_BIDIRECTIONAL);
1411 if (ret)
1412 return ret;
1413
1414 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1415 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1416 CCP_PASSTHRU_BYTESWAP_256BIT);
1417 if (ret) {
1418 cmd->engine_error = cmd_q->cmd_error;
1419 goto e_ctx;
1420 }
1421
1422 /* Send data to the CCP SHA engine */
1423 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1424 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1425 if (ret)
1426 goto e_ctx;
1427
1428 while (src.sg_wa.bytes_left) {
1429 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1430 if (sha->final && !src.sg_wa.bytes_left)
1431 op.eom = 1;
1432
1433 ret = ccp_perform_sha(&op);
1434 if (ret) {
1435 cmd->engine_error = cmd_q->cmd_error;
1436 goto e_data;
1437 }
1438
1439 ccp_process_data(&src, NULL, &op);
1440 }
1441
1442 /* Retrieve the SHA context - convert from LE to BE using
1443 * 32-byte (256-bit) byteswapping to BE
1444 */
1445 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1446 CCP_PASSTHRU_BYTESWAP_256BIT);
1447 if (ret) {
1448 cmd->engine_error = cmd_q->cmd_error;
1449 goto e_data;
1450 }
1451
1452 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1453
1454e_data:
1455 ccp_free_data(&src, cmd_q);
1456
1457e_ctx:
1458 ccp_dm_free(&ctx);
1459
1460 return ret;
1461}
1462
1463static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1464{
1465 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1466 struct ccp_dm_workarea exp, src;
1467 struct ccp_data dst;
1468 struct ccp_op op;
1469 unsigned int ksb_count, i_len, o_len;
1470 int ret;
1471
1472 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1473 return -EINVAL;
1474
1475 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1476 return -EINVAL;
1477
1478 /* The RSA modulus must precede the message being acted upon, so
1479 * it must be copied to a DMA area where the message and the
1480 * modulus can be concatenated. Therefore the input buffer
1481 * length required is twice the output buffer length (which
1482 * must be a multiple of 256-bits).
1483 */
1484 o_len = ((rsa->key_size + 255) / 256) * 32;
1485 i_len = o_len * 2;
1486
1487 ksb_count = o_len / CCP_KSB_BYTES;
1488
1489 memset(&op, 0, sizeof(op));
1490 op.cmd_q = cmd_q;
1491 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1492 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1493 if (!op.ksb_key)
1494 return -EIO;
1495
1496 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1497 * be in little endian format. Reverse copy each 32-byte chunk
1498 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1499 * and each byte within that chunk and do not perform any byte swap
1500 * operations on the passthru operation.
1501 */
1502 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1503 if (ret)
1504 goto e_ksb;
1505
1506 ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1507 true);
1508 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1509 CCP_PASSTHRU_BYTESWAP_NOOP);
1510 if (ret) {
1511 cmd->engine_error = cmd_q->cmd_error;
1512 goto e_exp;
1513 }
1514
1515 /* Concatenate the modulus and the message. Both the modulus and
1516 * the operands must be in little endian format. Since the input
1517 * is in big endian format it must be converted.
1518 */
1519 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1520 if (ret)
1521 goto e_exp;
1522
1523 ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1524 true);
1525 src.address += o_len; /* Adjust the address for the copy operation */
1526 ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1527 true);
1528 src.address -= o_len; /* Reset the address to original value */
1529
1530 /* Prepare the output area for the operation */
1531 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1532 o_len, DMA_FROM_DEVICE);
1533 if (ret)
1534 goto e_src;
1535
1536 op.soc = 1;
1537 op.src.u.dma.address = src.dma.address;
1538 op.src.u.dma.offset = 0;
1539 op.src.u.dma.length = i_len;
1540 op.dst.u.dma.address = dst.dm_wa.dma.address;
1541 op.dst.u.dma.offset = 0;
1542 op.dst.u.dma.length = o_len;
1543
1544 op.u.rsa.mod_size = rsa->key_size;
1545 op.u.rsa.input_len = i_len;
1546
1547 ret = ccp_perform_rsa(&op);
1548 if (ret) {
1549 cmd->engine_error = cmd_q->cmd_error;
1550 goto e_dst;
1551 }
1552
1553 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1554
1555e_dst:
1556 ccp_free_data(&dst, cmd_q);
1557
1558e_src:
1559 ccp_dm_free(&src);
1560
1561e_exp:
1562 ccp_dm_free(&exp);
1563
1564e_ksb:
1565 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1566
1567 return ret;
1568}
1569
1570static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1571 struct ccp_cmd *cmd)
1572{
1573 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1574 struct ccp_dm_workarea mask;
1575 struct ccp_data src, dst;
1576 struct ccp_op op;
1577 bool in_place = false;
1578 unsigned int i;
1579 int ret;
1580
1581 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1582 return -EINVAL;
1583
1584 if (!pt->src || !pt->dst)
1585 return -EINVAL;
1586
1587 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1588 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1589 return -EINVAL;
1590 if (!pt->mask)
1591 return -EINVAL;
1592 }
1593
1594 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1595
1596 memset(&op, 0, sizeof(op));
1597 op.cmd_q = cmd_q;
1598 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1599
1600 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1601 /* Load the mask */
1602 op.ksb_key = cmd_q->ksb_key;
1603
1604 ret = ccp_init_dm_workarea(&mask, cmd_q,
1605 CCP_PASSTHRU_KSB_COUNT *
1606 CCP_KSB_BYTES,
1607 DMA_TO_DEVICE);
1608 if (ret)
1609 return ret;
1610
1611 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1612 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1613 CCP_PASSTHRU_BYTESWAP_NOOP);
1614 if (ret) {
1615 cmd->engine_error = cmd_q->cmd_error;
1616 goto e_mask;
1617 }
1618 }
1619
1620 /* Prepare the input and output data workareas. For in-place
1621 * operations we need to set the dma direction to BIDIRECTIONAL
1622 * and copy the src workarea to the dst workarea.
1623 */
1624 if (sg_virt(pt->src) == sg_virt(pt->dst))
1625 in_place = true;
1626
1627 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1628 CCP_PASSTHRU_MASKSIZE,
1629 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1630 if (ret)
1631 goto e_mask;
1632
1633 if (in_place)
1634 dst = src;
1635 else {
1636 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1637 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1638 if (ret)
1639 goto e_src;
1640 }
1641
1642 /* Send data to the CCP Passthru engine
1643 * Because the CCP engine works on a single source and destination
1644 * dma address at a time, each entry in the source scatterlist
1645 * (after the dma_map_sg call) must be less than or equal to the
1646 * (remaining) length in the destination scatterlist entry and the
1647 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1648 */
1649 dst.sg_wa.sg_used = 0;
1650 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1651 if (!dst.sg_wa.sg ||
1652 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1653 ret = -EINVAL;
1654 goto e_dst;
1655 }
1656
1657 if (i == src.sg_wa.dma_count) {
1658 op.eom = 1;
1659 op.soc = 1;
1660 }
1661
1662 op.src.type = CCP_MEMTYPE_SYSTEM;
1663 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1664 op.src.u.dma.offset = 0;
1665 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1666
1667 op.dst.type = CCP_MEMTYPE_SYSTEM;
1668 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1669 op.src.u.dma.offset = dst.sg_wa.sg_used;
1670 op.src.u.dma.length = op.src.u.dma.length;
1671
1672 ret = ccp_perform_passthru(&op);
1673 if (ret) {
1674 cmd->engine_error = cmd_q->cmd_error;
1675 goto e_dst;
1676 }
1677
1678 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1679 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1680 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1681 dst.sg_wa.sg_used = 0;
1682 }
1683 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1684 }
1685
1686e_dst:
1687 if (!in_place)
1688 ccp_free_data(&dst, cmd_q);
1689
1690e_src:
1691 ccp_free_data(&src, cmd_q);
1692
1693e_mask:
1694 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1695 ccp_dm_free(&mask);
1696
1697 return ret;
1698}
1699
1700static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1701{
1702 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1703 struct ccp_dm_workarea src, dst;
1704 struct ccp_op op;
1705 int ret;
1706 u8 *save;
1707
1708 if (!ecc->u.mm.operand_1 ||
1709 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1710 return -EINVAL;
1711
1712 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1713 if (!ecc->u.mm.operand_2 ||
1714 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1715 return -EINVAL;
1716
1717 if (!ecc->u.mm.result ||
1718 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1719 return -EINVAL;
1720
1721 memset(&op, 0, sizeof(op));
1722 op.cmd_q = cmd_q;
1723 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1724
1725 /* Concatenate the modulus and the operands. Both the modulus and
1726 * the operands must be in little endian format. Since the input
1727 * is in big endian format it must be converted and placed in a
1728 * fixed length buffer.
1729 */
1730 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1731 DMA_TO_DEVICE);
1732 if (ret)
1733 return ret;
1734
1735 /* Save the workarea address since it is updated in order to perform
1736 * the concatenation
1737 */
1738 save = src.address;
1739
1740 /* Copy the ECC modulus */
1741 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1742 CCP_ECC_OPERAND_SIZE, true);
1743 src.address += CCP_ECC_OPERAND_SIZE;
1744
1745 /* Copy the first operand */
1746 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1747 ecc->u.mm.operand_1_len,
1748 CCP_ECC_OPERAND_SIZE, true);
1749 src.address += CCP_ECC_OPERAND_SIZE;
1750
1751 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1752 /* Copy the second operand */
1753 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1754 ecc->u.mm.operand_2_len,
1755 CCP_ECC_OPERAND_SIZE, true);
1756 src.address += CCP_ECC_OPERAND_SIZE;
1757 }
1758
1759 /* Restore the workarea address */
1760 src.address = save;
1761
1762 /* Prepare the output area for the operation */
1763 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1764 DMA_FROM_DEVICE);
1765 if (ret)
1766 goto e_src;
1767
1768 op.soc = 1;
1769 op.src.u.dma.address = src.dma.address;
1770 op.src.u.dma.offset = 0;
1771 op.src.u.dma.length = src.length;
1772 op.dst.u.dma.address = dst.dma.address;
1773 op.dst.u.dma.offset = 0;
1774 op.dst.u.dma.length = dst.length;
1775
1776 op.u.ecc.function = cmd->u.ecc.function;
1777
1778 ret = ccp_perform_ecc(&op);
1779 if (ret) {
1780 cmd->engine_error = cmd_q->cmd_error;
1781 goto e_dst;
1782 }
1783
1784 ecc->ecc_result = le16_to_cpup(
1785 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1786 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1787 ret = -EIO;
1788 goto e_dst;
1789 }
1790
1791 /* Save the ECC result */
1792 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1793
1794e_dst:
1795 ccp_dm_free(&dst);
1796
1797e_src:
1798 ccp_dm_free(&src);
1799
1800 return ret;
1801}
1802
1803static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1804{
1805 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1806 struct ccp_dm_workarea src, dst;
1807 struct ccp_op op;
1808 int ret;
1809 u8 *save;
1810
1811 if (!ecc->u.pm.point_1.x ||
1812 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1813 !ecc->u.pm.point_1.y ||
1814 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1815 return -EINVAL;
1816
1817 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1818 if (!ecc->u.pm.point_2.x ||
1819 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1820 !ecc->u.pm.point_2.y ||
1821 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1822 return -EINVAL;
1823 } else {
1824 if (!ecc->u.pm.domain_a ||
1825 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1826 return -EINVAL;
1827
1828 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1829 if (!ecc->u.pm.scalar ||
1830 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1831 return -EINVAL;
1832 }
1833
1834 if (!ecc->u.pm.result.x ||
1835 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1836 !ecc->u.pm.result.y ||
1837 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1838 return -EINVAL;
1839
1840 memset(&op, 0, sizeof(op));
1841 op.cmd_q = cmd_q;
1842 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1843
1844 /* Concatenate the modulus and the operands. Both the modulus and
1845 * the operands must be in little endian format. Since the input
1846 * is in big endian format it must be converted and placed in a
1847 * fixed length buffer.
1848 */
1849 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1850 DMA_TO_DEVICE);
1851 if (ret)
1852 return ret;
1853
1854 /* Save the workarea address since it is updated in order to perform
1855 * the concatenation
1856 */
1857 save = src.address;
1858
1859 /* Copy the ECC modulus */
1860 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1861 CCP_ECC_OPERAND_SIZE, true);
1862 src.address += CCP_ECC_OPERAND_SIZE;
1863
1864 /* Copy the first point X and Y coordinate */
1865 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1866 ecc->u.pm.point_1.x_len,
1867 CCP_ECC_OPERAND_SIZE, true);
1868 src.address += CCP_ECC_OPERAND_SIZE;
1869 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1870 ecc->u.pm.point_1.y_len,
1871 CCP_ECC_OPERAND_SIZE, true);
1872 src.address += CCP_ECC_OPERAND_SIZE;
1873
1874 /* Set the first point Z coordianate to 1 */
1875 *(src.address) = 0x01;
1876 src.address += CCP_ECC_OPERAND_SIZE;
1877
1878 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1879 /* Copy the second point X and Y coordinate */
1880 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1881 ecc->u.pm.point_2.x_len,
1882 CCP_ECC_OPERAND_SIZE, true);
1883 src.address += CCP_ECC_OPERAND_SIZE;
1884 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1885 ecc->u.pm.point_2.y_len,
1886 CCP_ECC_OPERAND_SIZE, true);
1887 src.address += CCP_ECC_OPERAND_SIZE;
1888
1889 /* Set the second point Z coordianate to 1 */
1890 *(src.address) = 0x01;
1891 src.address += CCP_ECC_OPERAND_SIZE;
1892 } else {
1893 /* Copy the Domain "a" parameter */
1894 ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1895 ecc->u.pm.domain_a_len,
1896 CCP_ECC_OPERAND_SIZE, true);
1897 src.address += CCP_ECC_OPERAND_SIZE;
1898
1899 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1900 /* Copy the scalar value */
1901 ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1902 ecc->u.pm.scalar_len,
1903 CCP_ECC_OPERAND_SIZE, true);
1904 src.address += CCP_ECC_OPERAND_SIZE;
1905 }
1906 }
1907
1908 /* Restore the workarea address */
1909 src.address = save;
1910
1911 /* Prepare the output area for the operation */
1912 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1913 DMA_FROM_DEVICE);
1914 if (ret)
1915 goto e_src;
1916
1917 op.soc = 1;
1918 op.src.u.dma.address = src.dma.address;
1919 op.src.u.dma.offset = 0;
1920 op.src.u.dma.length = src.length;
1921 op.dst.u.dma.address = dst.dma.address;
1922 op.dst.u.dma.offset = 0;
1923 op.dst.u.dma.length = dst.length;
1924
1925 op.u.ecc.function = cmd->u.ecc.function;
1926
1927 ret = ccp_perform_ecc(&op);
1928 if (ret) {
1929 cmd->engine_error = cmd_q->cmd_error;
1930 goto e_dst;
1931 }
1932
1933 ecc->ecc_result = le16_to_cpup(
1934 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1935 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1936 ret = -EIO;
1937 goto e_dst;
1938 }
1939
1940 /* Save the workarea address since it is updated as we walk through
1941 * to copy the point math result
1942 */
1943 save = dst.address;
1944
1945 /* Save the ECC result X and Y coordinates */
1946 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1947 CCP_ECC_MODULUS_BYTES);
1948 dst.address += CCP_ECC_OUTPUT_SIZE;
1949 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1950 CCP_ECC_MODULUS_BYTES);
1951 dst.address += CCP_ECC_OUTPUT_SIZE;
1952
1953 /* Restore the workarea address */
1954 dst.address = save;
1955
1956e_dst:
1957 ccp_dm_free(&dst);
1958
1959e_src:
1960 ccp_dm_free(&src);
1961
1962 return ret;
1963}
1964
1965static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1966{
1967 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1968
1969 ecc->ecc_result = 0;
1970
1971 if (!ecc->mod ||
1972 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1973 return -EINVAL;
1974
1975 switch (ecc->function) {
1976 case CCP_ECC_FUNCTION_MMUL_384BIT:
1977 case CCP_ECC_FUNCTION_MADD_384BIT:
1978 case CCP_ECC_FUNCTION_MINV_384BIT:
1979 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1980
1981 case CCP_ECC_FUNCTION_PADD_384BIT:
1982 case CCP_ECC_FUNCTION_PMUL_384BIT:
1983 case CCP_ECC_FUNCTION_PDBL_384BIT:
1984 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1985
1986 default:
1987 return -EINVAL;
1988 }
1989}
1990
1991int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1992{
1993 int ret;
1994
1995 cmd->engine_error = 0;
1996 cmd_q->cmd_error = 0;
1997 cmd_q->int_rcvd = 0;
1998 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1999
2000 switch (cmd->engine) {
2001 case CCP_ENGINE_AES:
2002 ret = ccp_run_aes_cmd(cmd_q, cmd);
2003 break;
2004 case CCP_ENGINE_XTS_AES_128:
2005 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2006 break;
2007 case CCP_ENGINE_SHA:
2008 ret = ccp_run_sha_cmd(cmd_q, cmd);
2009 break;
2010 case CCP_ENGINE_RSA:
2011 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2012 break;
2013 case CCP_ENGINE_PASSTHRU:
2014 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2015 break;
2016 case CCP_ENGINE_ECC:
2017 ret = ccp_run_ecc_cmd(cmd_q, cmd);
2018 break;
2019 default:
2020 ret = -EINVAL;
2021 }
2022
2023 return ret;
2024}
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
new file mode 100644
index 000000000000..93319f9db753
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -0,0 +1,361 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/delay.h>
22#include <linux/ccp.h>
23
24#include "ccp-dev.h"
25
26#define IO_BAR 2
27#define MSIX_VECTORS 2
28
29struct ccp_msix {
30 u32 vector;
31 char name[16];
32};
33
34struct ccp_pci {
35 int msix_count;
36 struct ccp_msix msix[MSIX_VECTORS];
37};
38
39static int ccp_get_msix_irqs(struct ccp_device *ccp)
40{
41 struct ccp_pci *ccp_pci = ccp->dev_specific;
42 struct device *dev = ccp->dev;
43 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
44 struct msix_entry msix_entry[MSIX_VECTORS];
45 unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
46 int v, ret;
47
48 for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
49 msix_entry[v].entry = v;
50
51 while ((ret = pci_enable_msix(pdev, msix_entry, v)) > 0)
52 v = ret;
53 if (ret)
54 return ret;
55
56 ccp_pci->msix_count = v;
57 for (v = 0; v < ccp_pci->msix_count; v++) {
58 /* Set the interrupt names and request the irqs */
59 snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v);
60 ccp_pci->msix[v].vector = msix_entry[v].vector;
61 ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler,
62 0, ccp_pci->msix[v].name, dev);
63 if (ret) {
64 dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
65 ret);
66 goto e_irq;
67 }
68 }
69
70 return 0;
71
72e_irq:
73 while (v--)
74 free_irq(ccp_pci->msix[v].vector, dev);
75
76 pci_disable_msix(pdev);
77
78 ccp_pci->msix_count = 0;
79
80 return ret;
81}
82
83static int ccp_get_msi_irq(struct ccp_device *ccp)
84{
85 struct device *dev = ccp->dev;
86 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
87 int ret;
88
89 ret = pci_enable_msi(pdev);
90 if (ret)
91 return ret;
92
93 ret = request_irq(pdev->irq, ccp_irq_handler, 0, "ccp", dev);
94 if (ret) {
95 dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
96 goto e_msi;
97 }
98
99 return 0;
100
101e_msi:
102 pci_disable_msi(pdev);
103
104 return ret;
105}
106
107static int ccp_get_irqs(struct ccp_device *ccp)
108{
109 struct device *dev = ccp->dev;
110 int ret;
111
112 ret = ccp_get_msix_irqs(ccp);
113 if (!ret)
114 return 0;
115
116 /* Couldn't get MSI-X vectors, try MSI */
117 dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
118 ret = ccp_get_msi_irq(ccp);
119 if (!ret)
120 return 0;
121
122 /* Couldn't get MSI interrupt */
123 dev_notice(dev, "could not enable MSI (%d)\n", ret);
124
125 return ret;
126}
127
128static void ccp_free_irqs(struct ccp_device *ccp)
129{
130 struct ccp_pci *ccp_pci = ccp->dev_specific;
131 struct device *dev = ccp->dev;
132 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
133
134 if (ccp_pci->msix_count) {
135 while (ccp_pci->msix_count--)
136 free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
137 dev);
138 pci_disable_msix(pdev);
139 } else {
140 free_irq(pdev->irq, dev);
141 pci_disable_msi(pdev);
142 }
143}
144
145static int ccp_find_mmio_area(struct ccp_device *ccp)
146{
147 struct device *dev = ccp->dev;
148 struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
149 resource_size_t io_len;
150 unsigned long io_flags;
151 int bar;
152
153 io_flags = pci_resource_flags(pdev, IO_BAR);
154 io_len = pci_resource_len(pdev, IO_BAR);
155 if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
156 return IO_BAR;
157
158 for (bar = 0; bar < PCI_STD_RESOURCE_END; bar++) {
159 io_flags = pci_resource_flags(pdev, bar);
160 io_len = pci_resource_len(pdev, bar);
161 if ((io_flags & IORESOURCE_MEM) &&
162 (io_len >= (IO_OFFSET + 0x800)))
163 return bar;
164 }
165
166 return -EIO;
167}
168
169static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
170{
171 struct ccp_device *ccp;
172 struct ccp_pci *ccp_pci;
173 struct device *dev = &pdev->dev;
174 unsigned int bar;
175 int ret;
176
177 ret = -ENOMEM;
178 ccp = ccp_alloc_struct(dev);
179 if (!ccp)
180 goto e_err;
181
182 ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL);
183 if (!ccp_pci) {
184 ret = -ENOMEM;
185 goto e_free1;
186 }
187 ccp->dev_specific = ccp_pci;
188 ccp->get_irq = ccp_get_irqs;
189 ccp->free_irq = ccp_free_irqs;
190
191 ret = pci_request_regions(pdev, "ccp");
192 if (ret) {
193 dev_err(dev, "pci_request_regions failed (%d)\n", ret);
194 goto e_free2;
195 }
196
197 ret = pci_enable_device(pdev);
198 if (ret) {
199 dev_err(dev, "pci_enable_device failed (%d)\n", ret);
200 goto e_regions;
201 }
202
203 pci_set_master(pdev);
204
205 ret = ccp_find_mmio_area(ccp);
206 if (ret < 0)
207 goto e_device;
208 bar = ret;
209
210 ret = -EIO;
211 ccp->io_map = pci_iomap(pdev, bar, 0);
212 if (ccp->io_map == NULL) {
213 dev_err(dev, "pci_iomap failed\n");
214 goto e_device;
215 }
216 ccp->io_regs = ccp->io_map + IO_OFFSET;
217
218 ret = dma_set_mask(dev, DMA_BIT_MASK(48));
219 if (ret == 0) {
220 ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(48));
221 if (ret) {
222 dev_err(dev,
223 "pci_set_consistent_dma_mask failed (%d)\n",
224 ret);
225 goto e_bar0;
226 }
227 } else {
228 ret = dma_set_mask(dev, DMA_BIT_MASK(32));
229 if (ret) {
230 dev_err(dev, "pci_set_dma_mask failed (%d)\n", ret);
231 goto e_bar0;
232 }
233 }
234
235 dev_set_drvdata(dev, ccp);
236
237 ret = ccp_init(ccp);
238 if (ret)
239 goto e_bar0;
240
241 dev_notice(dev, "enabled\n");
242
243 return 0;
244
245e_bar0:
246 pci_iounmap(pdev, ccp->io_map);
247
248e_device:
249 pci_disable_device(pdev);
250
251e_regions:
252 pci_release_regions(pdev);
253
254e_free2:
255 kfree(ccp_pci);
256
257e_free1:
258 kfree(ccp);
259
260e_err:
261 dev_notice(dev, "initialization failed\n");
262 return ret;
263}
264
265static void ccp_pci_remove(struct pci_dev *pdev)
266{
267 struct device *dev = &pdev->dev;
268 struct ccp_device *ccp = dev_get_drvdata(dev);
269
270 if (!ccp)
271 return;
272
273 ccp_destroy(ccp);
274
275 pci_iounmap(pdev, ccp->io_map);
276
277 pci_disable_device(pdev);
278
279 pci_release_regions(pdev);
280
281 kfree(ccp);
282
283 dev_notice(dev, "disabled\n");
284}
285
286#ifdef CONFIG_PM
287static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
288{
289 struct device *dev = &pdev->dev;
290 struct ccp_device *ccp = dev_get_drvdata(dev);
291 unsigned long flags;
292 unsigned int i;
293
294 spin_lock_irqsave(&ccp->cmd_lock, flags);
295
296 ccp->suspending = 1;
297
298 /* Wake all the queue kthreads to prepare for suspend */
299 for (i = 0; i < ccp->cmd_q_count; i++)
300 wake_up_process(ccp->cmd_q[i].kthread);
301
302 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
303
304 /* Wait for all queue kthreads to say they're done */
305 while (!ccp_queues_suspended(ccp))
306 wait_event_interruptible(ccp->suspend_queue,
307 ccp_queues_suspended(ccp));
308
309 return 0;
310}
311
312static int ccp_pci_resume(struct pci_dev *pdev)
313{
314 struct device *dev = &pdev->dev;
315 struct ccp_device *ccp = dev_get_drvdata(dev);
316 unsigned long flags;
317 unsigned int i;
318
319 spin_lock_irqsave(&ccp->cmd_lock, flags);
320
321 ccp->suspending = 0;
322
323 /* Wake up all the kthreads */
324 for (i = 0; i < ccp->cmd_q_count; i++) {
325 ccp->cmd_q[i].suspended = 0;
326 wake_up_process(ccp->cmd_q[i].kthread);
327 }
328
329 spin_unlock_irqrestore(&ccp->cmd_lock, flags);
330
331 return 0;
332}
333#endif
334
335static DEFINE_PCI_DEVICE_TABLE(ccp_pci_table) = {
336 { PCI_VDEVICE(AMD, 0x1537), },
337 /* Last entry must be zero */
338 { 0, }
339};
340MODULE_DEVICE_TABLE(pci, ccp_pci_table);
341
342static struct pci_driver ccp_pci_driver = {
343 .name = "AMD Cryptographic Coprocessor",
344 .id_table = ccp_pci_table,
345 .probe = ccp_pci_probe,
346 .remove = ccp_pci_remove,
347#ifdef CONFIG_PM
348 .suspend = ccp_pci_suspend,
349 .resume = ccp_pci_resume,
350#endif
351};
352
353int ccp_pci_init(void)
354{
355 return pci_register_driver(&ccp_pci_driver);
356}
357
358void ccp_pci_exit(void)
359{
360 pci_unregister_driver(&ccp_pci_driver);
361}
diff --git a/drivers/crypto/dcp.c b/drivers/crypto/dcp.c
deleted file mode 100644
index 247ab8048f5b..000000000000
--- a/drivers/crypto/dcp.c
+++ /dev/null
@@ -1,903 +0,0 @@
1/*
2 * Cryptographic API.
3 *
4 * Support for DCP cryptographic accelerator.
5 *
6 * Copyright (c) 2013
7 * Author: Tobias Rauter <tobias.rauter@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation.
12 *
13 * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c
14 */
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/errno.h>
18#include <linux/kernel.h>
19#include <linux/platform_device.h>
20#include <linux/dma-mapping.h>
21#include <linux/io.h>
22#include <linux/mutex.h>
23#include <linux/interrupt.h>
24#include <linux/completion.h>
25#include <linux/workqueue.h>
26#include <linux/delay.h>
27#include <linux/crypto.h>
28#include <linux/miscdevice.h>
29
30#include <crypto/scatterwalk.h>
31#include <crypto/aes.h>
32
33
34/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/
35#define DBS_IOCTL_BASE 'd'
36#define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16])
37#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16])
38
39/* DCP channel used for AES */
40#define USED_CHANNEL 1
41/* Ring Buffers' maximum size */
42#define DCP_MAX_PKG 20
43
44/* Control Register */
45#define DCP_REG_CTRL 0x000
46#define DCP_CTRL_SFRST (1<<31)
47#define DCP_CTRL_CLKGATE (1<<30)
48#define DCP_CTRL_CRYPTO_PRESENT (1<<29)
49#define DCP_CTRL_SHA_PRESENT (1<<28)
50#define DCP_CTRL_GATHER_RES_WRITE (1<<23)
51#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22)
52#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21)
53#define DCP_CTRL_CH_IRQ_E_0 0x01
54#define DCP_CTRL_CH_IRQ_E_1 0x02
55#define DCP_CTRL_CH_IRQ_E_2 0x04
56#define DCP_CTRL_CH_IRQ_E_3 0x08
57
58/* Status register */
59#define DCP_REG_STAT 0x010
60#define DCP_STAT_OTP_KEY_READY (1<<28)
61#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F)
62#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F)
63#define DCP_STAT_IRQ(stat) (stat&0x0F)
64#define DCP_STAT_CHAN_0 (0x01)
65#define DCP_STAT_CHAN_1 (0x02)
66#define DCP_STAT_CHAN_2 (0x04)
67#define DCP_STAT_CHAN_3 (0x08)
68
69/* Channel Control Register */
70#define DCP_REG_CHAN_CTRL 0x020
71#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16)
72#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100)
73#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200)
74#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400)
75#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800)
76#define DCP_CHAN_CTRL_ENABLE_0 (0x01)
77#define DCP_CHAN_CTRL_ENABLE_1 (0x02)
78#define DCP_CHAN_CTRL_ENABLE_2 (0x04)
79#define DCP_CHAN_CTRL_ENABLE_3 (0x08)
80
81/*
82 * Channel Registers:
83 * The DCP has 4 channels. Each of this channels
84 * has 4 registers (command pointer, semaphore, status and options).
85 * The address of register REG of channel CHAN is obtained by
86 * dcp_chan_reg(REG, CHAN)
87 */
88#define DCP_REG_CHAN_PTR 0x00000100
89#define DCP_REG_CHAN_SEMA 0x00000110
90#define DCP_REG_CHAN_STAT 0x00000120
91#define DCP_REG_CHAN_OPT 0x00000130
92
93#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000
94#define DCP_CHAN_STAT_NO_CHAIN 0x020000
95#define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000
96#define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000
97#define DCP_CHAN_STAT_INVALID_MODE 0x050000
98#define DCP_CHAN_STAT_PAGEFAULT 0x40
99#define DCP_CHAN_STAT_DST 0x20
100#define DCP_CHAN_STAT_SRC 0x10
101#define DCP_CHAN_STAT_PACKET 0x08
102#define DCP_CHAN_STAT_SETUP 0x04
103#define DCP_CHAN_STAT_MISMATCH 0x02
104
105/* hw packet control*/
106
107#define DCP_PKT_PAYLOAD_KEY (1<<11)
108#define DCP_PKT_OTP_KEY (1<<10)
109#define DCP_PKT_CIPHER_INIT (1<<9)
110#define DCP_PKG_CIPHER_ENCRYPT (1<<8)
111#define DCP_PKT_CIPHER_ENABLE (1<<5)
112#define DCP_PKT_DECR_SEM (1<<1)
113#define DCP_PKT_CHAIN (1<<2)
114#define DCP_PKT_IRQ 1
115
116#define DCP_PKT_MODE_CBC (1<<4)
117#define DCP_PKT_KEYSELECT_OTP (0xFF<<8)
118
119/* cipher flags */
120#define DCP_ENC 0x0001
121#define DCP_DEC 0x0002
122#define DCP_ECB 0x0004
123#define DCP_CBC 0x0008
124#define DCP_CBC_INIT 0x0010
125#define DCP_NEW_KEY 0x0040
126#define DCP_OTP_KEY 0x0080
127#define DCP_AES 0x1000
128
129/* DCP Flags */
130#define DCP_FLAG_BUSY 0x01
131#define DCP_FLAG_PRODUCING 0x02
132
133/* clock defines */
134#define CLOCK_ON 1
135#define CLOCK_OFF 0
136
137struct dcp_dev_req_ctx {
138 int mode;
139};
140
141struct dcp_op {
142 unsigned int flags;
143 u8 key[AES_KEYSIZE_128];
144 int keylen;
145
146 struct ablkcipher_request *req;
147 struct crypto_ablkcipher *fallback;
148
149 uint32_t stat;
150 uint32_t pkt1;
151 uint32_t pkt2;
152 struct ablkcipher_walk walk;
153};
154
155struct dcp_dev {
156 struct device *dev;
157 void __iomem *dcp_regs_base;
158
159 int dcp_vmi_irq;
160 int dcp_irq;
161
162 spinlock_t queue_lock;
163 struct crypto_queue queue;
164
165 uint32_t pkt_produced;
166 uint32_t pkt_consumed;
167
168 struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG];
169 dma_addr_t hw_phys_pkg;
170
171 /* [KEY][IV] Both with 16 Bytes */
172 u8 *payload_base;
173 dma_addr_t payload_base_dma;
174
175
176 struct tasklet_struct done_task;
177 struct tasklet_struct queue_task;
178 struct timer_list watchdog;
179
180 unsigned long flags;
181
182 struct dcp_op *ctx;
183
184 struct miscdevice dcp_bootstream_misc;
185};
186
187struct dcp_hw_packet {
188 uint32_t next;
189 uint32_t pkt1;
190 uint32_t pkt2;
191 uint32_t src;
192 uint32_t dst;
193 uint32_t size;
194 uint32_t payload;
195 uint32_t stat;
196};
197
198static struct dcp_dev *global_dev;
199
200static inline u32 dcp_chan_reg(u32 reg, int chan)
201{
202 return reg + (chan) * 0x40;
203}
204
205static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg)
206{
207 writel(data, dev->dcp_regs_base + reg);
208}
209
210static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg)
211{
212 writel(data, dev->dcp_regs_base + (reg | 0x04));
213}
214
215static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg)
216{
217 writel(data, dev->dcp_regs_base + (reg | 0x08));
218}
219
220static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg)
221{
222 writel(data, dev->dcp_regs_base + (reg | 0x0C));
223}
224
225static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg)
226{
227 return readl(dev->dcp_regs_base + reg);
228}
229
230static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt)
231{
232 dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
233 dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE);
234 dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt);
235}
236
237static int dcp_dma_map(struct dcp_dev *dev,
238 struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt)
239{
240 dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt);
241 /* align to length = 16 */
242 pkt->size = walk->nbytes - (walk->nbytes % 16);
243
244 pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset,
245 pkt->size, DMA_TO_DEVICE);
246
247 if (pkt->src == 0) {
248 dev_err(dev->dev, "Unable to map src");
249 return -ENOMEM;
250 }
251
252 pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset,
253 pkt->size, DMA_FROM_DEVICE);
254
255 if (pkt->dst == 0) {
256 dev_err(dev->dev, "Unable to map dst");
257 dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
258 return -ENOMEM;
259 }
260
261 return 0;
262}
263
264static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt,
265 uint8_t last)
266{
267 struct dcp_op *ctx = dev->ctx;
268 pkt->pkt1 = ctx->pkt1;
269 pkt->pkt2 = ctx->pkt2;
270
271 pkt->payload = (u32) dev->payload_base_dma;
272 pkt->stat = 0;
273
274 if (ctx->flags & DCP_CBC_INIT) {
275 pkt->pkt1 |= DCP_PKT_CIPHER_INIT;
276 ctx->flags &= ~DCP_CBC_INIT;
277 }
278
279 mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500));
280 pkt->pkt1 |= DCP_PKT_IRQ;
281 if (!last)
282 pkt->pkt1 |= DCP_PKT_CHAIN;
283
284 dev->pkt_produced++;
285
286 dcp_write(dev, 1,
287 dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL));
288}
289
290static void dcp_op_proceed(struct dcp_dev *dev)
291{
292 struct dcp_op *ctx = dev->ctx;
293 struct dcp_hw_packet *pkt;
294
295 while (ctx->walk.nbytes) {
296 int err = 0;
297
298 pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG];
299 err = dcp_dma_map(dev, &ctx->walk, pkt);
300 if (err) {
301 dev->ctx->stat |= err;
302 /* start timer to wait for already set up calls */
303 mod_timer(&dev->watchdog,
304 jiffies + msecs_to_jiffies(500));
305 break;
306 }
307
308
309 err = ctx->walk.nbytes - pkt->size;
310 ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err);
311
312 dcp_op_one(dev, pkt, ctx->walk.nbytes == 0);
313 /* we have to wait if no space is left in buffer */
314 if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG)
315 break;
316 }
317 clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
318}
319
320static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk)
321{
322 struct dcp_op *ctx = dev->ctx;
323
324 if (ctx->flags & DCP_NEW_KEY) {
325 memcpy(dev->payload_base, ctx->key, ctx->keylen);
326 ctx->flags &= ~DCP_NEW_KEY;
327 }
328
329 ctx->pkt1 = 0;
330 ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE;
331 ctx->pkt1 |= DCP_PKT_DECR_SEM;
332
333 if (ctx->flags & DCP_OTP_KEY)
334 ctx->pkt1 |= DCP_PKT_OTP_KEY;
335 else
336 ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY;
337
338 if (ctx->flags & DCP_ENC)
339 ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT;
340
341 ctx->pkt2 = 0;
342 if (ctx->flags & DCP_CBC)
343 ctx->pkt2 |= DCP_PKT_MODE_CBC;
344
345 dev->pkt_produced = 0;
346 dev->pkt_consumed = 0;
347
348 ctx->stat = 0;
349 dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
350 dcp_write(dev, (u32) dev->hw_phys_pkg,
351 dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL));
352
353 set_bit(DCP_FLAG_PRODUCING, &dev->flags);
354
355 if (use_walk) {
356 ablkcipher_walk_init(&ctx->walk, ctx->req->dst,
357 ctx->req->src, ctx->req->nbytes);
358 ablkcipher_walk_phys(ctx->req, &ctx->walk);
359 dcp_op_proceed(dev);
360 } else {
361 dcp_op_one(dev, dev->hw_pkg[0], 1);
362 clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
363 }
364}
365
366static void dcp_done_task(unsigned long data)
367{
368 struct dcp_dev *dev = (struct dcp_dev *)data;
369 struct dcp_hw_packet *last_packet;
370 int fin;
371 fin = 0;
372
373 for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG];
374 last_packet->stat == 1;
375 last_packet =
376 dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) {
377
378 dcp_dma_unmap(dev, last_packet);
379 last_packet->stat = 0;
380 fin++;
381 }
382 /* the last call of this function already consumed this IRQ's packet */
383 if (fin == 0)
384 return;
385
386 dev_dbg(dev->dev,
387 "Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d",
388 dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed);
389
390 last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG];
391 if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) {
392 if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags))
393 dcp_op_proceed(dev);
394 return;
395 }
396
397 while (unlikely(dev->pkt_consumed < dev->pkt_produced)) {
398 dcp_dma_unmap(dev,
399 dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]);
400 }
401
402 if (dev->ctx->flags & DCP_OTP_KEY) {
403 /* we used the miscdevice, no walk to finish */
404 clear_bit(DCP_FLAG_BUSY, &dev->flags);
405 return;
406 }
407
408 ablkcipher_walk_complete(&dev->ctx->walk);
409 dev->ctx->req->base.complete(&dev->ctx->req->base,
410 dev->ctx->stat);
411 dev->ctx->req = NULL;
412 /* in case there are other requests in the queue */
413 tasklet_schedule(&dev->queue_task);
414}
415
416static void dcp_watchdog(unsigned long data)
417{
418 struct dcp_dev *dev = (struct dcp_dev *)data;
419 dev->ctx->stat |= dcp_read(dev,
420 dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
421
422 dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat);
423
424 if (!dev->ctx->stat)
425 dev->ctx->stat = -ETIMEDOUT;
426
427 dcp_done_task(data);
428}
429
430
431static irqreturn_t dcp_common_irq(int irq, void *context)
432{
433 u32 msk;
434 struct dcp_dev *dev = (struct dcp_dev *) context;
435
436 del_timer(&dev->watchdog);
437
438 msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT));
439 dcp_clear(dev, msk, DCP_REG_STAT);
440 if (msk == 0)
441 return IRQ_NONE;
442
443 dev->ctx->stat |= dcp_read(dev,
444 dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
445
446 if (msk & DCP_STAT_CHAN_1)
447 tasklet_schedule(&dev->done_task);
448
449 return IRQ_HANDLED;
450}
451
452static irqreturn_t dcp_vmi_irq(int irq, void *context)
453{
454 return dcp_common_irq(irq, context);
455}
456
457static irqreturn_t dcp_irq(int irq, void *context)
458{
459 return dcp_common_irq(irq, context);
460}
461
462static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx)
463{
464 dev->ctx = ctx;
465
466 if ((ctx->flags & DCP_CBC) && ctx->req->info) {
467 ctx->flags |= DCP_CBC_INIT;
468 memcpy(dev->payload_base + AES_KEYSIZE_128,
469 ctx->req->info, AES_KEYSIZE_128);
470 }
471
472 dcp_op_start(dev, 1);
473}
474
475static void dcp_queue_task(unsigned long data)
476{
477 struct dcp_dev *dev = (struct dcp_dev *) data;
478 struct crypto_async_request *async_req, *backlog;
479 struct crypto_ablkcipher *tfm;
480 struct dcp_op *ctx;
481 struct dcp_dev_req_ctx *rctx;
482 struct ablkcipher_request *req;
483 unsigned long flags;
484
485 spin_lock_irqsave(&dev->queue_lock, flags);
486
487 backlog = crypto_get_backlog(&dev->queue);
488 async_req = crypto_dequeue_request(&dev->queue);
489
490 spin_unlock_irqrestore(&dev->queue_lock, flags);
491
492 if (!async_req)
493 goto ret_nothing_done;
494
495 if (backlog)
496 backlog->complete(backlog, -EINPROGRESS);
497
498 req = ablkcipher_request_cast(async_req);
499 tfm = crypto_ablkcipher_reqtfm(req);
500 rctx = ablkcipher_request_ctx(req);
501 ctx = crypto_ablkcipher_ctx(tfm);
502
503 if (!req->src || !req->dst)
504 goto ret_nothing_done;
505
506 ctx->flags |= rctx->mode;
507 ctx->req = req;
508
509 dcp_crypt(dev, ctx);
510
511 return;
512
513ret_nothing_done:
514 clear_bit(DCP_FLAG_BUSY, &dev->flags);
515}
516
517
518static int dcp_cra_init(struct crypto_tfm *tfm)
519{
520 const char *name = tfm->__crt_alg->cra_name;
521 struct dcp_op *ctx = crypto_tfm_ctx(tfm);
522
523 tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx);
524
525 ctx->fallback = crypto_alloc_ablkcipher(name, 0,
526 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
527
528 if (IS_ERR(ctx->fallback)) {
529 dev_err(global_dev->dev, "Error allocating fallback algo %s\n",
530 name);
531 return PTR_ERR(ctx->fallback);
532 }
533
534 return 0;
535}
536
537static void dcp_cra_exit(struct crypto_tfm *tfm)
538{
539 struct dcp_op *ctx = crypto_tfm_ctx(tfm);
540
541 if (ctx->fallback)
542 crypto_free_ablkcipher(ctx->fallback);
543
544 ctx->fallback = NULL;
545}
546
547/* async interface */
548static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
549 unsigned int len)
550{
551 struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm);
552 unsigned int ret = 0;
553 ctx->keylen = len;
554 ctx->flags = 0;
555 if (len == AES_KEYSIZE_128) {
556 if (memcmp(ctx->key, key, AES_KEYSIZE_128)) {
557 memcpy(ctx->key, key, len);
558 ctx->flags |= DCP_NEW_KEY;
559 }
560 return 0;
561 }
562
563 ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
564 ctx->fallback->base.crt_flags |=
565 (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
566
567 ret = crypto_ablkcipher_setkey(ctx->fallback, key, len);
568 if (ret) {
569 struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
570
571 tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
572 tfm_aux->crt_flags |=
573 (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
574 }
575 return ret;
576}
577
578static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode)
579{
580 struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req);
581 struct dcp_dev *dev = global_dev;
582 unsigned long flags;
583 int err = 0;
584
585 if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
586 return -EINVAL;
587
588 rctx->mode = mode;
589
590 spin_lock_irqsave(&dev->queue_lock, flags);
591 err = ablkcipher_enqueue_request(&dev->queue, req);
592 spin_unlock_irqrestore(&dev->queue_lock, flags);
593
594 flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags);
595
596 if (!(flags & DCP_FLAG_BUSY))
597 tasklet_schedule(&dev->queue_task);
598
599 return err;
600}
601
602static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
603{
604 struct crypto_tfm *tfm =
605 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
606 struct dcp_op *ctx = crypto_ablkcipher_ctx(
607 crypto_ablkcipher_reqtfm(req));
608
609 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
610 int err = 0;
611 ablkcipher_request_set_tfm(req, ctx->fallback);
612 err = crypto_ablkcipher_encrypt(req);
613 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
614 return err;
615 }
616
617 return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC);
618}
619
620static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
621{
622 struct crypto_tfm *tfm =
623 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
624 struct dcp_op *ctx = crypto_ablkcipher_ctx(
625 crypto_ablkcipher_reqtfm(req));
626
627 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
628 int err = 0;
629 ablkcipher_request_set_tfm(req, ctx->fallback);
630 err = crypto_ablkcipher_decrypt(req);
631 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
632 return err;
633 }
634 return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC);
635}
636
637static struct crypto_alg algs[] = {
638 {
639 .cra_name = "cbc(aes)",
640 .cra_driver_name = "dcp-cbc-aes",
641 .cra_alignmask = 3,
642 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
643 CRYPTO_ALG_NEED_FALLBACK,
644 .cra_blocksize = AES_KEYSIZE_128,
645 .cra_type = &crypto_ablkcipher_type,
646 .cra_priority = 300,
647 .cra_u.ablkcipher = {
648 .min_keysize = AES_KEYSIZE_128,
649 .max_keysize = AES_KEYSIZE_128,
650 .setkey = dcp_aes_setkey,
651 .encrypt = dcp_aes_cbc_encrypt,
652 .decrypt = dcp_aes_cbc_decrypt,
653 .ivsize = AES_KEYSIZE_128,
654 }
655
656 },
657};
658
659/* DCP bootstream verification interface: uses OTP key for crypto */
660static int dcp_bootstream_open(struct inode *inode, struct file *file)
661{
662 file->private_data = container_of((file->private_data),
663 struct dcp_dev, dcp_bootstream_misc);
664 return 0;
665}
666
667static long dcp_bootstream_ioctl(struct file *file,
668 unsigned int cmd, unsigned long arg)
669{
670 struct dcp_dev *dev = (struct dcp_dev *) file->private_data;
671 void __user *argp = (void __user *)arg;
672 int ret;
673
674 if (dev == NULL)
675 return -EBADF;
676
677 if (cmd != DBS_ENC && cmd != DBS_DEC)
678 return -EINVAL;
679
680 if (copy_from_user(dev->payload_base, argp, 16))
681 return -EFAULT;
682
683 if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags))
684 return -EAGAIN;
685
686 dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL);
687 if (!dev->ctx) {
688 dev_err(dev->dev,
689 "cannot allocate context for OTP crypto");
690 clear_bit(DCP_FLAG_BUSY, &dev->flags);
691 return -ENOMEM;
692 }
693
694 dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT;
695 dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC;
696 dev->hw_pkg[0]->src = dev->payload_base_dma;
697 dev->hw_pkg[0]->dst = dev->payload_base_dma;
698 dev->hw_pkg[0]->size = 16;
699
700 dcp_op_start(dev, 0);
701
702 while (test_bit(DCP_FLAG_BUSY, &dev->flags))
703 cpu_relax();
704
705 ret = dev->ctx->stat;
706 if (!ret && copy_to_user(argp, dev->payload_base, 16))
707 ret = -EFAULT;
708
709 kfree(dev->ctx);
710
711 return ret;
712}
713
714static const struct file_operations dcp_bootstream_fops = {
715 .owner = THIS_MODULE,
716 .unlocked_ioctl = dcp_bootstream_ioctl,
717 .open = dcp_bootstream_open,
718};
719
720static int dcp_probe(struct platform_device *pdev)
721{
722 struct dcp_dev *dev = NULL;
723 struct resource *r;
724 int i, ret, j;
725
726 dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
727 if (!dev)
728 return -ENOMEM;
729
730 global_dev = dev;
731 dev->dev = &pdev->dev;
732
733 platform_set_drvdata(pdev, dev);
734
735 r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
736 dev->dcp_regs_base = devm_ioremap_resource(&pdev->dev, r);
737 if (IS_ERR(dev->dcp_regs_base))
738 return PTR_ERR(dev->dcp_regs_base);
739
740 dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL);
741 udelay(10);
742 dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL);
743
744 dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE |
745 DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1,
746 DCP_REG_CTRL);
747
748 dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL);
749
750 for (i = 0; i < 4; i++)
751 dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i));
752
753 dcp_clear(dev, -1, DCP_REG_STAT);
754
755
756 r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
757 if (!r) {
758 dev_err(&pdev->dev, "can't get IRQ resource (0)\n");
759 return -EIO;
760 }
761 dev->dcp_vmi_irq = r->start;
762 ret = devm_request_irq(&pdev->dev, dev->dcp_vmi_irq, dcp_vmi_irq, 0,
763 "dcp", dev);
764 if (ret != 0) {
765 dev_err(&pdev->dev, "can't request_irq (0)\n");
766 return -EIO;
767 }
768
769 r = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
770 if (!r) {
771 dev_err(&pdev->dev, "can't get IRQ resource (1)\n");
772 return -EIO;
773 }
774 dev->dcp_irq = r->start;
775 ret = devm_request_irq(&pdev->dev, dev->dcp_irq, dcp_irq, 0, "dcp",
776 dev);
777 if (ret != 0) {
778 dev_err(&pdev->dev, "can't request_irq (1)\n");
779 return -EIO;
780 }
781
782 dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev,
783 DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
784 &dev->hw_phys_pkg,
785 GFP_KERNEL);
786 if (!dev->hw_pkg[0]) {
787 dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
788 return -ENOMEM;
789 }
790
791 for (i = 1; i < DCP_MAX_PKG; i++) {
792 dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg
793 + i * sizeof(struct dcp_hw_packet);
794 dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1;
795 }
796 dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg;
797
798
799 dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
800 &dev->payload_base_dma, GFP_KERNEL);
801 if (!dev->payload_base) {
802 dev_err(&pdev->dev, "Could not allocate memory for key\n");
803 ret = -ENOMEM;
804 goto err_free_hw_packet;
805 }
806 tasklet_init(&dev->queue_task, dcp_queue_task,
807 (unsigned long) dev);
808 tasklet_init(&dev->done_task, dcp_done_task,
809 (unsigned long) dev);
810 spin_lock_init(&dev->queue_lock);
811
812 crypto_init_queue(&dev->queue, 10);
813
814 init_timer(&dev->watchdog);
815 dev->watchdog.function = &dcp_watchdog;
816 dev->watchdog.data = (unsigned long)dev;
817
818 dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR,
819 dev->dcp_bootstream_misc.name = "dcpboot",
820 dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops,
821 ret = misc_register(&dev->dcp_bootstream_misc);
822 if (ret != 0) {
823 dev_err(dev->dev, "Unable to register misc device\n");
824 goto err_free_key_iv;
825 }
826
827 for (i = 0; i < ARRAY_SIZE(algs); i++) {
828 algs[i].cra_priority = 300;
829 algs[i].cra_ctxsize = sizeof(struct dcp_op);
830 algs[i].cra_module = THIS_MODULE;
831 algs[i].cra_init = dcp_cra_init;
832 algs[i].cra_exit = dcp_cra_exit;
833 if (crypto_register_alg(&algs[i])) {
834 dev_err(&pdev->dev, "register algorithm failed\n");
835 ret = -ENOMEM;
836 goto err_unregister;
837 }
838 }
839 dev_notice(&pdev->dev, "DCP crypto enabled.!\n");
840
841 return 0;
842
843err_unregister:
844 for (j = 0; j < i; j++)
845 crypto_unregister_alg(&algs[j]);
846err_free_key_iv:
847 tasklet_kill(&dev->done_task);
848 tasklet_kill(&dev->queue_task);
849 dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
850 dev->payload_base_dma);
851err_free_hw_packet:
852 dma_free_coherent(&pdev->dev, DCP_MAX_PKG *
853 sizeof(struct dcp_hw_packet), dev->hw_pkg[0],
854 dev->hw_phys_pkg);
855
856 return ret;
857}
858
859static int dcp_remove(struct platform_device *pdev)
860{
861 struct dcp_dev *dev;
862 int j;
863 dev = platform_get_drvdata(pdev);
864
865 misc_deregister(&dev->dcp_bootstream_misc);
866
867 for (j = 0; j < ARRAY_SIZE(algs); j++)
868 crypto_unregister_alg(&algs[j]);
869
870 tasklet_kill(&dev->done_task);
871 tasklet_kill(&dev->queue_task);
872
873 dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
874 dev->payload_base_dma);
875
876 dma_free_coherent(&pdev->dev,
877 DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
878 dev->hw_pkg[0], dev->hw_phys_pkg);
879
880 return 0;
881}
882
883static struct of_device_id fs_dcp_of_match[] = {
884 { .compatible = "fsl-dcp"},
885 {},
886};
887
888static struct platform_driver fs_dcp_driver = {
889 .probe = dcp_probe,
890 .remove = dcp_remove,
891 .driver = {
892 .name = "fsl-dcp",
893 .owner = THIS_MODULE,
894 .of_match_table = fs_dcp_of_match
895 }
896};
897
898module_platform_driver(fs_dcp_driver);
899
900
901MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>");
902MODULE_DESCRIPTION("Freescale DCP Crypto Driver");
903MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
new file mode 100644
index 000000000000..a6db7fa6f891
--- /dev/null
+++ b/drivers/crypto/mxs-dcp.c
@@ -0,0 +1,1100 @@
1/*
2 * Freescale i.MX23/i.MX28 Data Co-Processor driver
3 *
4 * Copyright (C) 2013 Marek Vasut <marex@denx.de>
5 *
6 * The code contained herein is licensed under the GNU General Public
7 * License. You may obtain a copy of the GNU General Public License
8 * Version 2 or later at the following locations:
9 *
10 * http://www.opensource.org/licenses/gpl-license.html
11 * http://www.gnu.org/copyleft/gpl.html
12 */
13
14#include <linux/crypto.h>
15#include <linux/dma-mapping.h>
16#include <linux/interrupt.h>
17#include <linux/io.h>
18#include <linux/kernel.h>
19#include <linux/kthread.h>
20#include <linux/module.h>
21#include <linux/of.h>
22#include <linux/platform_device.h>
23#include <linux/stmp_device.h>
24
25#include <crypto/aes.h>
26#include <crypto/sha.h>
27#include <crypto/internal/hash.h>
28
29#define DCP_MAX_CHANS 4
30#define DCP_BUF_SZ PAGE_SIZE
31
32/* DCP DMA descriptor. */
33struct dcp_dma_desc {
34 uint32_t next_cmd_addr;
35 uint32_t control0;
36 uint32_t control1;
37 uint32_t source;
38 uint32_t destination;
39 uint32_t size;
40 uint32_t payload;
41 uint32_t status;
42};
43
44/* Coherent aligned block for bounce buffering. */
45struct dcp_coherent_block {
46 uint8_t aes_in_buf[DCP_BUF_SZ];
47 uint8_t aes_out_buf[DCP_BUF_SZ];
48 uint8_t sha_in_buf[DCP_BUF_SZ];
49
50 uint8_t aes_key[2 * AES_KEYSIZE_128];
51 uint8_t sha_digest[SHA256_DIGEST_SIZE];
52
53 struct dcp_dma_desc desc[DCP_MAX_CHANS];
54};
55
56struct dcp {
57 struct device *dev;
58 void __iomem *base;
59
60 uint32_t caps;
61
62 struct dcp_coherent_block *coh;
63
64 struct completion completion[DCP_MAX_CHANS];
65 struct mutex mutex[DCP_MAX_CHANS];
66 struct task_struct *thread[DCP_MAX_CHANS];
67 struct crypto_queue queue[DCP_MAX_CHANS];
68};
69
70enum dcp_chan {
71 DCP_CHAN_HASH_SHA = 0,
72 DCP_CHAN_CRYPTO = 2,
73};
74
75struct dcp_async_ctx {
76 /* Common context */
77 enum dcp_chan chan;
78 uint32_t fill;
79
80 /* SHA Hash-specific context */
81 struct mutex mutex;
82 uint32_t alg;
83 unsigned int hot:1;
84
85 /* Crypto-specific context */
86 unsigned int enc:1;
87 unsigned int ecb:1;
88 struct crypto_ablkcipher *fallback;
89 unsigned int key_len;
90 uint8_t key[AES_KEYSIZE_128];
91};
92
93struct dcp_sha_req_ctx {
94 unsigned int init:1;
95 unsigned int fini:1;
96};
97
98/*
99 * There can even be only one instance of the MXS DCP due to the
100 * design of Linux Crypto API.
101 */
102static struct dcp *global_sdcp;
103static DEFINE_MUTEX(global_mutex);
104
105/* DCP register layout. */
106#define MXS_DCP_CTRL 0x00
107#define MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES (1 << 23)
108#define MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING (1 << 22)
109
110#define MXS_DCP_STAT 0x10
111#define MXS_DCP_STAT_CLR 0x18
112#define MXS_DCP_STAT_IRQ_MASK 0xf
113
114#define MXS_DCP_CHANNELCTRL 0x20
115#define MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK 0xff
116
117#define MXS_DCP_CAPABILITY1 0x40
118#define MXS_DCP_CAPABILITY1_SHA256 (4 << 16)
119#define MXS_DCP_CAPABILITY1_SHA1 (1 << 16)
120#define MXS_DCP_CAPABILITY1_AES128 (1 << 0)
121
122#define MXS_DCP_CONTEXT 0x50
123
124#define MXS_DCP_CH_N_CMDPTR(n) (0x100 + ((n) * 0x40))
125
126#define MXS_DCP_CH_N_SEMA(n) (0x110 + ((n) * 0x40))
127
128#define MXS_DCP_CH_N_STAT(n) (0x120 + ((n) * 0x40))
129#define MXS_DCP_CH_N_STAT_CLR(n) (0x128 + ((n) * 0x40))
130
131/* DMA descriptor bits. */
132#define MXS_DCP_CONTROL0_HASH_TERM (1 << 13)
133#define MXS_DCP_CONTROL0_HASH_INIT (1 << 12)
134#define MXS_DCP_CONTROL0_PAYLOAD_KEY (1 << 11)
135#define MXS_DCP_CONTROL0_CIPHER_ENCRYPT (1 << 8)
136#define MXS_DCP_CONTROL0_CIPHER_INIT (1 << 9)
137#define MXS_DCP_CONTROL0_ENABLE_HASH (1 << 6)
138#define MXS_DCP_CONTROL0_ENABLE_CIPHER (1 << 5)
139#define MXS_DCP_CONTROL0_DECR_SEMAPHORE (1 << 1)
140#define MXS_DCP_CONTROL0_INTERRUPT (1 << 0)
141
142#define MXS_DCP_CONTROL1_HASH_SELECT_SHA256 (2 << 16)
143#define MXS_DCP_CONTROL1_HASH_SELECT_SHA1 (0 << 16)
144#define MXS_DCP_CONTROL1_CIPHER_MODE_CBC (1 << 4)
145#define MXS_DCP_CONTROL1_CIPHER_MODE_ECB (0 << 4)
146#define MXS_DCP_CONTROL1_CIPHER_SELECT_AES128 (0 << 0)
147
148static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
149{
150 struct dcp *sdcp = global_sdcp;
151 const int chan = actx->chan;
152 uint32_t stat;
153 int ret;
154 struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
155
156 dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
157 DMA_TO_DEVICE);
158
159 reinit_completion(&sdcp->completion[chan]);
160
161 /* Clear status register. */
162 writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(chan));
163
164 /* Load the DMA descriptor. */
165 writel(desc_phys, sdcp->base + MXS_DCP_CH_N_CMDPTR(chan));
166
167 /* Increment the semaphore to start the DMA transfer. */
168 writel(1, sdcp->base + MXS_DCP_CH_N_SEMA(chan));
169
170 ret = wait_for_completion_timeout(&sdcp->completion[chan],
171 msecs_to_jiffies(1000));
172 if (!ret) {
173 dev_err(sdcp->dev, "Channel %i timeout (DCP_STAT=0x%08x)\n",
174 chan, readl(sdcp->base + MXS_DCP_STAT));
175 return -ETIMEDOUT;
176 }
177
178 stat = readl(sdcp->base + MXS_DCP_CH_N_STAT(chan));
179 if (stat & 0xff) {
180 dev_err(sdcp->dev, "Channel %i error (CH_STAT=0x%08x)\n",
181 chan, stat);
182 return -EINVAL;
183 }
184
185 dma_unmap_single(sdcp->dev, desc_phys, sizeof(*desc), DMA_TO_DEVICE);
186
187 return 0;
188}
189
190/*
191 * Encryption (AES128)
192 */
193static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init)
194{
195 struct dcp *sdcp = global_sdcp;
196 struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
197 int ret;
198
199 dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
200 2 * AES_KEYSIZE_128,
201 DMA_TO_DEVICE);
202 dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
203 DCP_BUF_SZ, DMA_TO_DEVICE);
204 dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
205 DCP_BUF_SZ, DMA_FROM_DEVICE);
206
207 /* Fill in the DMA descriptor. */
208 desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
209 MXS_DCP_CONTROL0_INTERRUPT |
210 MXS_DCP_CONTROL0_ENABLE_CIPHER;
211
212 /* Payload contains the key. */
213 desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY;
214
215 if (actx->enc)
216 desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT;
217 if (init)
218 desc->control0 |= MXS_DCP_CONTROL0_CIPHER_INIT;
219
220 desc->control1 = MXS_DCP_CONTROL1_CIPHER_SELECT_AES128;
221
222 if (actx->ecb)
223 desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_ECB;
224 else
225 desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC;
226
227 desc->next_cmd_addr = 0;
228 desc->source = src_phys;
229 desc->destination = dst_phys;
230 desc->size = actx->fill;
231 desc->payload = key_phys;
232 desc->status = 0;
233
234 ret = mxs_dcp_start_dma(actx);
235
236 dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
237 DMA_TO_DEVICE);
238 dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
239 dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
240
241 return ret;
242}
243
244static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
245{
246 struct dcp *sdcp = global_sdcp;
247
248 struct ablkcipher_request *req = ablkcipher_request_cast(arq);
249 struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
250
251 struct scatterlist *dst = req->dst;
252 struct scatterlist *src = req->src;
253 const int nents = sg_nents(req->src);
254
255 const int out_off = DCP_BUF_SZ;
256 uint8_t *in_buf = sdcp->coh->aes_in_buf;
257 uint8_t *out_buf = sdcp->coh->aes_out_buf;
258
259 uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
260 uint32_t dst_off = 0;
261
262 uint8_t *key = sdcp->coh->aes_key;
263
264 int ret = 0;
265 int split = 0;
266 unsigned int i, len, clen, rem = 0;
267 int init = 0;
268
269 actx->fill = 0;
270
271 /* Copy the key from the temporary location. */
272 memcpy(key, actx->key, actx->key_len);
273
274 if (!actx->ecb) {
275 /* Copy the CBC IV just past the key. */
276 memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128);
277 /* CBC needs the INIT set. */
278 init = 1;
279 } else {
280 memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
281 }
282
283 for_each_sg(req->src, src, nents, i) {
284 src_buf = sg_virt(src);
285 len = sg_dma_len(src);
286
287 do {
288 if (actx->fill + len > out_off)
289 clen = out_off - actx->fill;
290 else
291 clen = len;
292
293 memcpy(in_buf + actx->fill, src_buf, clen);
294 len -= clen;
295 src_buf += clen;
296 actx->fill += clen;
297
298 /*
299 * If we filled the buffer or this is the last SG,
300 * submit the buffer.
301 */
302 if (actx->fill == out_off || sg_is_last(src)) {
303 ret = mxs_dcp_run_aes(actx, init);
304 if (ret)
305 return ret;
306 init = 0;
307
308 out_tmp = out_buf;
309 while (dst && actx->fill) {
310 if (!split) {
311 dst_buf = sg_virt(dst);
312 dst_off = 0;
313 }
314 rem = min(sg_dma_len(dst) - dst_off,
315 actx->fill);
316
317 memcpy(dst_buf + dst_off, out_tmp, rem);
318 out_tmp += rem;
319 dst_off += rem;
320 actx->fill -= rem;
321
322 if (dst_off == sg_dma_len(dst)) {
323 dst = sg_next(dst);
324 split = 0;
325 } else {
326 split = 1;
327 }
328 }
329 }
330 } while (len);
331 }
332
333 return ret;
334}
335
336static int dcp_chan_thread_aes(void *data)
337{
338 struct dcp *sdcp = global_sdcp;
339 const int chan = DCP_CHAN_CRYPTO;
340
341 struct crypto_async_request *backlog;
342 struct crypto_async_request *arq;
343
344 int ret;
345
346 do {
347 __set_current_state(TASK_INTERRUPTIBLE);
348
349 mutex_lock(&sdcp->mutex[chan]);
350 backlog = crypto_get_backlog(&sdcp->queue[chan]);
351 arq = crypto_dequeue_request(&sdcp->queue[chan]);
352 mutex_unlock(&sdcp->mutex[chan]);
353
354 if (backlog)
355 backlog->complete(backlog, -EINPROGRESS);
356
357 if (arq) {
358 ret = mxs_dcp_aes_block_crypt(arq);
359 arq->complete(arq, ret);
360 continue;
361 }
362
363 schedule();
364 } while (!kthread_should_stop());
365
366 return 0;
367}
368
369static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
370{
371 struct crypto_tfm *tfm =
372 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
373 struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(
374 crypto_ablkcipher_reqtfm(req));
375 int ret;
376
377 ablkcipher_request_set_tfm(req, ctx->fallback);
378
379 if (enc)
380 ret = crypto_ablkcipher_encrypt(req);
381 else
382 ret = crypto_ablkcipher_decrypt(req);
383
384 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
385
386 return ret;
387}
388
389static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
390{
391 struct dcp *sdcp = global_sdcp;
392 struct crypto_async_request *arq = &req->base;
393 struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
394 int ret;
395
396 if (unlikely(actx->key_len != AES_KEYSIZE_128))
397 return mxs_dcp_block_fallback(req, enc);
398
399 actx->enc = enc;
400 actx->ecb = ecb;
401 actx->chan = DCP_CHAN_CRYPTO;
402
403 mutex_lock(&sdcp->mutex[actx->chan]);
404 ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
405 mutex_unlock(&sdcp->mutex[actx->chan]);
406
407 wake_up_process(sdcp->thread[actx->chan]);
408
409 return -EINPROGRESS;
410}
411
412static int mxs_dcp_aes_ecb_decrypt(struct ablkcipher_request *req)
413{
414 return mxs_dcp_aes_enqueue(req, 0, 1);
415}
416
417static int mxs_dcp_aes_ecb_encrypt(struct ablkcipher_request *req)
418{
419 return mxs_dcp_aes_enqueue(req, 1, 1);
420}
421
422static int mxs_dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
423{
424 return mxs_dcp_aes_enqueue(req, 0, 0);
425}
426
427static int mxs_dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
428{
429 return mxs_dcp_aes_enqueue(req, 1, 0);
430}
431
432static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
433 unsigned int len)
434{
435 struct dcp_async_ctx *actx = crypto_ablkcipher_ctx(tfm);
436 unsigned int ret;
437
438 /*
439 * AES 128 is supposed by the hardware, store key into temporary
440 * buffer and exit. We must use the temporary buffer here, since
441 * there can still be an operation in progress.
442 */
443 actx->key_len = len;
444 if (len == AES_KEYSIZE_128) {
445 memcpy(actx->key, key, len);
446 return 0;
447 }
448
449 /* Check if the key size is supported by kernel at all. */
450 if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) {
451 tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
452 return -EINVAL;
453 }
454
455 /*
456 * If the requested AES key size is not supported by the hardware,
457 * but is supported by in-kernel software implementation, we use
458 * software fallback.
459 */
460 actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
461 actx->fallback->base.crt_flags |=
462 tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK;
463
464 ret = crypto_ablkcipher_setkey(actx->fallback, key, len);
465 if (!ret)
466 return 0;
467
468 tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
469 tfm->base.crt_flags |=
470 actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK;
471
472 return ret;
473}
474
475static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
476{
477 const char *name = tfm->__crt_alg->cra_name;
478 const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
479 struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
480 struct crypto_ablkcipher *blk;
481
482 blk = crypto_alloc_ablkcipher(name, 0, flags);
483 if (IS_ERR(blk))
484 return PTR_ERR(blk);
485
486 actx->fallback = blk;
487 tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_async_ctx);
488 return 0;
489}
490
491static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm)
492{
493 struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
494
495 crypto_free_ablkcipher(actx->fallback);
496 actx->fallback = NULL;
497}
498
499/*
500 * Hashing (SHA1/SHA256)
501 */
502static int mxs_dcp_run_sha(struct ahash_request *req)
503{
504 struct dcp *sdcp = global_sdcp;
505 int ret;
506
507 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
508 struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
509 struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
510
511 struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
512 dma_addr_t digest_phys = dma_map_single(sdcp->dev,
513 sdcp->coh->sha_digest,
514 SHA256_DIGEST_SIZE,
515 DMA_FROM_DEVICE);
516
517 dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
518 DCP_BUF_SZ, DMA_TO_DEVICE);
519
520 /* Fill in the DMA descriptor. */
521 desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
522 MXS_DCP_CONTROL0_INTERRUPT |
523 MXS_DCP_CONTROL0_ENABLE_HASH;
524 if (rctx->init)
525 desc->control0 |= MXS_DCP_CONTROL0_HASH_INIT;
526
527 desc->control1 = actx->alg;
528 desc->next_cmd_addr = 0;
529 desc->source = buf_phys;
530 desc->destination = 0;
531 desc->size = actx->fill;
532 desc->payload = 0;
533 desc->status = 0;
534
535 /* Set HASH_TERM bit for last transfer block. */
536 if (rctx->fini) {
537 desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
538 desc->payload = digest_phys;
539 }
540
541 ret = mxs_dcp_start_dma(actx);
542
543 dma_unmap_single(sdcp->dev, digest_phys, SHA256_DIGEST_SIZE,
544 DMA_FROM_DEVICE);
545 dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
546
547 return ret;
548}
549
550static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
551{
552 struct dcp *sdcp = global_sdcp;
553
554 struct ahash_request *req = ahash_request_cast(arq);
555 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
556 struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
557 struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
558 struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
559 const int nents = sg_nents(req->src);
560
561 uint8_t *digest = sdcp->coh->sha_digest;
562 uint8_t *in_buf = sdcp->coh->sha_in_buf;
563
564 uint8_t *src_buf;
565
566 struct scatterlist *src;
567
568 unsigned int i, len, clen;
569 int ret;
570
571 int fin = rctx->fini;
572 if (fin)
573 rctx->fini = 0;
574
575 for_each_sg(req->src, src, nents, i) {
576 src_buf = sg_virt(src);
577 len = sg_dma_len(src);
578
579 do {
580 if (actx->fill + len > DCP_BUF_SZ)
581 clen = DCP_BUF_SZ - actx->fill;
582 else
583 clen = len;
584
585 memcpy(in_buf + actx->fill, src_buf, clen);
586 len -= clen;
587 src_buf += clen;
588 actx->fill += clen;
589
590 /*
591 * If we filled the buffer and still have some
592 * more data, submit the buffer.
593 */
594 if (len && actx->fill == DCP_BUF_SZ) {
595 ret = mxs_dcp_run_sha(req);
596 if (ret)
597 return ret;
598 actx->fill = 0;
599 rctx->init = 0;
600 }
601 } while (len);
602 }
603
604 if (fin) {
605 rctx->fini = 1;
606
607 /* Submit whatever is left. */
608 ret = mxs_dcp_run_sha(req);
609 if (ret || !req->result)
610 return ret;
611 actx->fill = 0;
612
613 /* For some reason, the result is flipped. */
614 for (i = 0; i < halg->digestsize; i++)
615 req->result[i] = digest[halg->digestsize - i - 1];
616 }
617
618 return 0;
619}
620
621static int dcp_chan_thread_sha(void *data)
622{
623 struct dcp *sdcp = global_sdcp;
624 const int chan = DCP_CHAN_HASH_SHA;
625
626 struct crypto_async_request *backlog;
627 struct crypto_async_request *arq;
628
629 struct dcp_sha_req_ctx *rctx;
630
631 struct ahash_request *req;
632 int ret, fini;
633
634 do {
635 __set_current_state(TASK_INTERRUPTIBLE);
636
637 mutex_lock(&sdcp->mutex[chan]);
638 backlog = crypto_get_backlog(&sdcp->queue[chan]);
639 arq = crypto_dequeue_request(&sdcp->queue[chan]);
640 mutex_unlock(&sdcp->mutex[chan]);
641
642 if (backlog)
643 backlog->complete(backlog, -EINPROGRESS);
644
645 if (arq) {
646 req = ahash_request_cast(arq);
647 rctx = ahash_request_ctx(req);
648
649 ret = dcp_sha_req_to_buf(arq);
650 fini = rctx->fini;
651 arq->complete(arq, ret);
652 if (!fini)
653 continue;
654 }
655
656 schedule();
657 } while (!kthread_should_stop());
658
659 return 0;
660}
661
662static int dcp_sha_init(struct ahash_request *req)
663{
664 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
665 struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
666
667 struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
668
669 /*
670 * Start hashing session. The code below only inits the
671 * hashing session context, nothing more.
672 */
673 memset(actx, 0, sizeof(*actx));
674
675 if (strcmp(halg->base.cra_name, "sha1") == 0)
676 actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA1;
677 else
678 actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA256;
679
680 actx->fill = 0;
681 actx->hot = 0;
682 actx->chan = DCP_CHAN_HASH_SHA;
683
684 mutex_init(&actx->mutex);
685
686 return 0;
687}
688
689static int dcp_sha_update_fx(struct ahash_request *req, int fini)
690{
691 struct dcp *sdcp = global_sdcp;
692
693 struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
694 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
695 struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
696
697 int ret;
698
699 /*
700 * Ignore requests that have no data in them and are not
701 * the trailing requests in the stream of requests.
702 */
703 if (!req->nbytes && !fini)
704 return 0;
705
706 mutex_lock(&actx->mutex);
707
708 rctx->fini = fini;
709
710 if (!actx->hot) {
711 actx->hot = 1;
712 rctx->init = 1;
713 }
714
715 mutex_lock(&sdcp->mutex[actx->chan]);
716 ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
717 mutex_unlock(&sdcp->mutex[actx->chan]);
718
719 wake_up_process(sdcp->thread[actx->chan]);
720 mutex_unlock(&actx->mutex);
721
722 return -EINPROGRESS;
723}
724
725static int dcp_sha_update(struct ahash_request *req)
726{
727 return dcp_sha_update_fx(req, 0);
728}
729
730static int dcp_sha_final(struct ahash_request *req)
731{
732 ahash_request_set_crypt(req, NULL, req->result, 0);
733 req->nbytes = 0;
734 return dcp_sha_update_fx(req, 1);
735}
736
737static int dcp_sha_finup(struct ahash_request *req)
738{
739 return dcp_sha_update_fx(req, 1);
740}
741
742static int dcp_sha_digest(struct ahash_request *req)
743{
744 int ret;
745
746 ret = dcp_sha_init(req);
747 if (ret)
748 return ret;
749
750 return dcp_sha_finup(req);
751}
752
753static int dcp_sha_cra_init(struct crypto_tfm *tfm)
754{
755 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
756 sizeof(struct dcp_sha_req_ctx));
757 return 0;
758}
759
760static void dcp_sha_cra_exit(struct crypto_tfm *tfm)
761{
762}
763
764/* AES 128 ECB and AES 128 CBC */
765static struct crypto_alg dcp_aes_algs[] = {
766 {
767 .cra_name = "ecb(aes)",
768 .cra_driver_name = "ecb-aes-dcp",
769 .cra_priority = 400,
770 .cra_alignmask = 15,
771 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
772 CRYPTO_ALG_ASYNC |
773 CRYPTO_ALG_NEED_FALLBACK,
774 .cra_init = mxs_dcp_aes_fallback_init,
775 .cra_exit = mxs_dcp_aes_fallback_exit,
776 .cra_blocksize = AES_BLOCK_SIZE,
777 .cra_ctxsize = sizeof(struct dcp_async_ctx),
778 .cra_type = &crypto_ablkcipher_type,
779 .cra_module = THIS_MODULE,
780 .cra_u = {
781 .ablkcipher = {
782 .min_keysize = AES_MIN_KEY_SIZE,
783 .max_keysize = AES_MAX_KEY_SIZE,
784 .setkey = mxs_dcp_aes_setkey,
785 .encrypt = mxs_dcp_aes_ecb_encrypt,
786 .decrypt = mxs_dcp_aes_ecb_decrypt
787 },
788 },
789 }, {
790 .cra_name = "cbc(aes)",
791 .cra_driver_name = "cbc-aes-dcp",
792 .cra_priority = 400,
793 .cra_alignmask = 15,
794 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
795 CRYPTO_ALG_ASYNC |
796 CRYPTO_ALG_NEED_FALLBACK,
797 .cra_init = mxs_dcp_aes_fallback_init,
798 .cra_exit = mxs_dcp_aes_fallback_exit,
799 .cra_blocksize = AES_BLOCK_SIZE,
800 .cra_ctxsize = sizeof(struct dcp_async_ctx),
801 .cra_type = &crypto_ablkcipher_type,
802 .cra_module = THIS_MODULE,
803 .cra_u = {
804 .ablkcipher = {
805 .min_keysize = AES_MIN_KEY_SIZE,
806 .max_keysize = AES_MAX_KEY_SIZE,
807 .setkey = mxs_dcp_aes_setkey,
808 .encrypt = mxs_dcp_aes_cbc_encrypt,
809 .decrypt = mxs_dcp_aes_cbc_decrypt,
810 .ivsize = AES_BLOCK_SIZE,
811 },
812 },
813 },
814};
815
816/* SHA1 */
817static struct ahash_alg dcp_sha1_alg = {
818 .init = dcp_sha_init,
819 .update = dcp_sha_update,
820 .final = dcp_sha_final,
821 .finup = dcp_sha_finup,
822 .digest = dcp_sha_digest,
823 .halg = {
824 .digestsize = SHA1_DIGEST_SIZE,
825 .base = {
826 .cra_name = "sha1",
827 .cra_driver_name = "sha1-dcp",
828 .cra_priority = 400,
829 .cra_alignmask = 63,
830 .cra_flags = CRYPTO_ALG_ASYNC,
831 .cra_blocksize = SHA1_BLOCK_SIZE,
832 .cra_ctxsize = sizeof(struct dcp_async_ctx),
833 .cra_module = THIS_MODULE,
834 .cra_init = dcp_sha_cra_init,
835 .cra_exit = dcp_sha_cra_exit,
836 },
837 },
838};
839
840/* SHA256 */
841static struct ahash_alg dcp_sha256_alg = {
842 .init = dcp_sha_init,
843 .update = dcp_sha_update,
844 .final = dcp_sha_final,
845 .finup = dcp_sha_finup,
846 .digest = dcp_sha_digest,
847 .halg = {
848 .digestsize = SHA256_DIGEST_SIZE,
849 .base = {
850 .cra_name = "sha256",
851 .cra_driver_name = "sha256-dcp",
852 .cra_priority = 400,
853 .cra_alignmask = 63,
854 .cra_flags = CRYPTO_ALG_ASYNC,
855 .cra_blocksize = SHA256_BLOCK_SIZE,
856 .cra_ctxsize = sizeof(struct dcp_async_ctx),
857 .cra_module = THIS_MODULE,
858 .cra_init = dcp_sha_cra_init,
859 .cra_exit = dcp_sha_cra_exit,
860 },
861 },
862};
863
864static irqreturn_t mxs_dcp_irq(int irq, void *context)
865{
866 struct dcp *sdcp = context;
867 uint32_t stat;
868 int i;
869
870 stat = readl(sdcp->base + MXS_DCP_STAT);
871 stat &= MXS_DCP_STAT_IRQ_MASK;
872 if (!stat)
873 return IRQ_NONE;
874
875 /* Clear the interrupts. */
876 writel(stat, sdcp->base + MXS_DCP_STAT_CLR);
877
878 /* Complete the DMA requests that finished. */
879 for (i = 0; i < DCP_MAX_CHANS; i++)
880 if (stat & (1 << i))
881 complete(&sdcp->completion[i]);
882
883 return IRQ_HANDLED;
884}
885
886static int mxs_dcp_probe(struct platform_device *pdev)
887{
888 struct device *dev = &pdev->dev;
889 struct dcp *sdcp = NULL;
890 int i, ret;
891
892 struct resource *iores;
893 int dcp_vmi_irq, dcp_irq;
894
895 mutex_lock(&global_mutex);
896 if (global_sdcp) {
897 dev_err(dev, "Only one DCP instance allowed!\n");
898 ret = -ENODEV;
899 goto err_mutex;
900 }
901
902 iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
903 dcp_vmi_irq = platform_get_irq(pdev, 0);
904 dcp_irq = platform_get_irq(pdev, 1);
905 if (dcp_vmi_irq < 0 || dcp_irq < 0) {
906 ret = -EINVAL;
907 goto err_mutex;
908 }
909
910 sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL);
911 if (!sdcp) {
912 ret = -ENOMEM;
913 goto err_mutex;
914 }
915
916 sdcp->dev = dev;
917 sdcp->base = devm_ioremap_resource(dev, iores);
918 if (IS_ERR(sdcp->base)) {
919 ret = PTR_ERR(sdcp->base);
920 goto err_mutex;
921 }
922
923 ret = devm_request_irq(dev, dcp_vmi_irq, mxs_dcp_irq, 0,
924 "dcp-vmi-irq", sdcp);
925 if (ret) {
926 dev_err(dev, "Failed to claim DCP VMI IRQ!\n");
927 goto err_mutex;
928 }
929
930 ret = devm_request_irq(dev, dcp_irq, mxs_dcp_irq, 0,
931 "dcp-irq", sdcp);
932 if (ret) {
933 dev_err(dev, "Failed to claim DCP IRQ!\n");
934 goto err_mutex;
935 }
936
937 /* Allocate coherent helper block. */
938 sdcp->coh = kzalloc(sizeof(struct dcp_coherent_block), GFP_KERNEL);
939 if (!sdcp->coh) {
940 dev_err(dev, "Error allocating coherent block\n");
941 ret = -ENOMEM;
942 goto err_mutex;
943 }
944
945 /* Restart the DCP block. */
946 stmp_reset_block(sdcp->base);
947
948 /* Initialize control register. */
949 writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES |
950 MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING | 0xf,
951 sdcp->base + MXS_DCP_CTRL);
952
953 /* Enable all DCP DMA channels. */
954 writel(MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK,
955 sdcp->base + MXS_DCP_CHANNELCTRL);
956
957 /*
958 * We do not enable context switching. Give the context buffer a
959 * pointer to an illegal address so if context switching is
960 * inadvertantly enabled, the DCP will return an error instead of
961 * trashing good memory. The DCP DMA cannot access ROM, so any ROM
962 * address will do.
963 */
964 writel(0xffff0000, sdcp->base + MXS_DCP_CONTEXT);
965 for (i = 0; i < DCP_MAX_CHANS; i++)
966 writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(i));
967 writel(0xffffffff, sdcp->base + MXS_DCP_STAT_CLR);
968
969 global_sdcp = sdcp;
970
971 platform_set_drvdata(pdev, sdcp);
972
973 for (i = 0; i < DCP_MAX_CHANS; i++) {
974 mutex_init(&sdcp->mutex[i]);
975 init_completion(&sdcp->completion[i]);
976 crypto_init_queue(&sdcp->queue[i], 50);
977 }
978
979 /* Create the SHA and AES handler threads. */
980 sdcp->thread[DCP_CHAN_HASH_SHA] = kthread_run(dcp_chan_thread_sha,
981 NULL, "mxs_dcp_chan/sha");
982 if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) {
983 dev_err(dev, "Error starting SHA thread!\n");
984 ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]);
985 goto err_free_coherent;
986 }
987
988 sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes,
989 NULL, "mxs_dcp_chan/aes");
990 if (IS_ERR(sdcp->thread[DCP_CHAN_CRYPTO])) {
991 dev_err(dev, "Error starting SHA thread!\n");
992 ret = PTR_ERR(sdcp->thread[DCP_CHAN_CRYPTO]);
993 goto err_destroy_sha_thread;
994 }
995
996 /* Register the various crypto algorithms. */
997 sdcp->caps = readl(sdcp->base + MXS_DCP_CAPABILITY1);
998
999 if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) {
1000 ret = crypto_register_algs(dcp_aes_algs,
1001 ARRAY_SIZE(dcp_aes_algs));
1002 if (ret) {
1003 /* Failed to register algorithm. */
1004 dev_err(dev, "Failed to register AES crypto!\n");
1005 goto err_destroy_aes_thread;
1006 }
1007 }
1008
1009 if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1) {
1010 ret = crypto_register_ahash(&dcp_sha1_alg);
1011 if (ret) {
1012 dev_err(dev, "Failed to register %s hash!\n",
1013 dcp_sha1_alg.halg.base.cra_name);
1014 goto err_unregister_aes;
1015 }
1016 }
1017
1018 if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256) {
1019 ret = crypto_register_ahash(&dcp_sha256_alg);
1020 if (ret) {
1021 dev_err(dev, "Failed to register %s hash!\n",
1022 dcp_sha256_alg.halg.base.cra_name);
1023 goto err_unregister_sha1;
1024 }
1025 }
1026
1027 return 0;
1028
1029err_unregister_sha1:
1030 if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1)
1031 crypto_unregister_ahash(&dcp_sha1_alg);
1032
1033err_unregister_aes:
1034 if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
1035 crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
1036
1037err_destroy_aes_thread:
1038 kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
1039
1040err_destroy_sha_thread:
1041 kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
1042
1043err_free_coherent:
1044 kfree(sdcp->coh);
1045err_mutex:
1046 mutex_unlock(&global_mutex);
1047 return ret;
1048}
1049
1050static int mxs_dcp_remove(struct platform_device *pdev)
1051{
1052 struct dcp *sdcp = platform_get_drvdata(pdev);
1053
1054 kfree(sdcp->coh);
1055
1056 if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256)
1057 crypto_unregister_ahash(&dcp_sha256_alg);
1058
1059 if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1)
1060 crypto_unregister_ahash(&dcp_sha1_alg);
1061
1062 if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
1063 crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
1064
1065 kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
1066 kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
1067
1068 platform_set_drvdata(pdev, NULL);
1069
1070 mutex_lock(&global_mutex);
1071 global_sdcp = NULL;
1072 mutex_unlock(&global_mutex);
1073
1074 return 0;
1075}
1076
1077static const struct of_device_id mxs_dcp_dt_ids[] = {
1078 { .compatible = "fsl,imx23-dcp", .data = NULL, },
1079 { .compatible = "fsl,imx28-dcp", .data = NULL, },
1080 { /* sentinel */ }
1081};
1082
1083MODULE_DEVICE_TABLE(of, mxs_dcp_dt_ids);
1084
1085static struct platform_driver mxs_dcp_driver = {
1086 .probe = mxs_dcp_probe,
1087 .remove = mxs_dcp_remove,
1088 .driver = {
1089 .name = "mxs-dcp",
1090 .owner = THIS_MODULE,
1091 .of_match_table = mxs_dcp_dt_ids,
1092 },
1093};
1094
1095module_platform_driver(mxs_dcp_driver);
1096
1097MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
1098MODULE_DESCRIPTION("Freescale MXS DCP Driver");
1099MODULE_LICENSE("GPL");
1100MODULE_ALIAS("platform:mxs-dcp");
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index a9ccbf14096e..dde41f1df608 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -784,6 +784,7 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
784static int omap_aes_cra_init(struct crypto_tfm *tfm) 784static int omap_aes_cra_init(struct crypto_tfm *tfm)
785{ 785{
786 struct omap_aes_dev *dd = NULL; 786 struct omap_aes_dev *dd = NULL;
787 int err;
787 788
788 /* Find AES device, currently picks the first device */ 789 /* Find AES device, currently picks the first device */
789 spin_lock_bh(&list_lock); 790 spin_lock_bh(&list_lock);
@@ -792,7 +793,13 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
792 } 793 }
793 spin_unlock_bh(&list_lock); 794 spin_unlock_bh(&list_lock);
794 795
795 pm_runtime_get_sync(dd->dev); 796 err = pm_runtime_get_sync(dd->dev);
797 if (err < 0) {
798 dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
799 __func__, err);
800 return err;
801 }
802
796 tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx); 803 tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
797 804
798 return 0; 805 return 0;
@@ -1182,7 +1189,12 @@ static int omap_aes_probe(struct platform_device *pdev)
1182 dd->phys_base = res.start; 1189 dd->phys_base = res.start;
1183 1190
1184 pm_runtime_enable(dev); 1191 pm_runtime_enable(dev);
1185 pm_runtime_get_sync(dev); 1192 err = pm_runtime_get_sync(dev);
1193 if (err < 0) {
1194 dev_err(dev, "%s: failed to get_sync(%d)\n",
1195 __func__, err);
1196 goto err_res;
1197 }
1186 1198
1187 omap_aes_dma_stop(dd); 1199 omap_aes_dma_stop(dd);
1188 1200
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index e45aaaf0db30..a727a6a59653 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -789,10 +789,13 @@ static int omap_sham_update_cpu(struct omap_sham_dev *dd)
789 dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n", 789 dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n",
790 ctx->bufcnt, ctx->digcnt, final); 790 ctx->bufcnt, ctx->digcnt, final);
791 791
792 bufcnt = ctx->bufcnt; 792 if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
793 ctx->bufcnt = 0; 793 bufcnt = ctx->bufcnt;
794 ctx->bufcnt = 0;
795 return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final);
796 }
794 797
795 return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final); 798 return 0;
796} 799}
797 800
798static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) 801static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
@@ -1103,6 +1106,9 @@ static int omap_sham_update(struct ahash_request *req)
1103 return 0; 1106 return 0;
1104 } 1107 }
1105 1108
1109 if (dd->polling_mode)
1110 ctx->flags |= BIT(FLAGS_CPU);
1111
1106 return omap_sham_enqueue(req, OP_UPDATE); 1112 return omap_sham_enqueue(req, OP_UPDATE);
1107} 1113}
1108 1114
@@ -1970,7 +1976,8 @@ err_algs:
1970 crypto_unregister_ahash( 1976 crypto_unregister_ahash(
1971 &dd->pdata->algs_info[i].algs_list[j]); 1977 &dd->pdata->algs_info[i].algs_list[j]);
1972 pm_runtime_disable(dev); 1978 pm_runtime_disable(dev);
1973 dma_release_channel(dd->dma_lch); 1979 if (dd->dma_lch)
1980 dma_release_channel(dd->dma_lch);
1974data_err: 1981data_err:
1975 dev_err(dev, "initialization failed.\n"); 1982 dev_err(dev, "initialization failed.\n");
1976 1983
@@ -1994,7 +2001,9 @@ static int omap_sham_remove(struct platform_device *pdev)
1994 &dd->pdata->algs_info[i].algs_list[j]); 2001 &dd->pdata->algs_info[i].algs_list[j]);
1995 tasklet_kill(&dd->done_task); 2002 tasklet_kill(&dd->done_task);
1996 pm_runtime_disable(&pdev->dev); 2003 pm_runtime_disable(&pdev->dev);
1997 dma_release_channel(dd->dma_lch); 2004
2005 if (dd->dma_lch)
2006 dma_release_channel(dd->dma_lch);
1998 2007
1999 return 0; 2008 return 0;
2000} 2009}
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index b44f4ddc565c..5967667e1a8f 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -338,20 +338,29 @@ DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE)
338static u32 current_desc_hdr(struct device *dev, int ch) 338static u32 current_desc_hdr(struct device *dev, int ch)
339{ 339{
340 struct talitos_private *priv = dev_get_drvdata(dev); 340 struct talitos_private *priv = dev_get_drvdata(dev);
341 int tail = priv->chan[ch].tail; 341 int tail, iter;
342 dma_addr_t cur_desc; 342 dma_addr_t cur_desc;
343 343
344 cur_desc = in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO); 344 cur_desc = ((u64)in_be32(priv->chan[ch].reg + TALITOS_CDPR)) << 32;
345 cur_desc |= in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO);
345 346
346 while (priv->chan[ch].fifo[tail].dma_desc != cur_desc) { 347 if (!cur_desc) {
347 tail = (tail + 1) & (priv->fifo_len - 1); 348 dev_err(dev, "CDPR is NULL, giving up search for offending descriptor\n");
348 if (tail == priv->chan[ch].tail) { 349 return 0;
350 }
351
352 tail = priv->chan[ch].tail;
353
354 iter = tail;
355 while (priv->chan[ch].fifo[iter].dma_desc != cur_desc) {
356 iter = (iter + 1) & (priv->fifo_len - 1);
357 if (iter == tail) {
349 dev_err(dev, "couldn't locate current descriptor\n"); 358 dev_err(dev, "couldn't locate current descriptor\n");
350 return 0; 359 return 0;
351 } 360 }
352 } 361 }
353 362
354 return priv->chan[ch].fifo[tail].desc->hdr; 363 return priv->chan[ch].fifo[iter].desc->hdr;
355} 364}
356 365
357/* 366/*
@@ -2486,8 +2495,6 @@ static int talitos_remove(struct platform_device *ofdev)
2486 2495
2487 iounmap(priv->reg); 2496 iounmap(priv->reg);
2488 2497
2489 dev_set_drvdata(dev, NULL);
2490
2491 kfree(priv); 2498 kfree(priv);
2492 2499
2493 return 0; 2500 return 0;
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
new file mode 100644
index 000000000000..b941ab9f762b
--- /dev/null
+++ b/include/linux/ccp.h
@@ -0,0 +1,537 @@
1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#ifndef __CPP_H__
14#define __CPP_H__
15
16#include <linux/scatterlist.h>
17#include <linux/workqueue.h>
18#include <linux/list.h>
19#include <crypto/aes.h>
20#include <crypto/sha.h>
21
22
23struct ccp_device;
24struct ccp_cmd;
25
26#if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \
27 defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE)
28
29/**
30 * ccp_enqueue_cmd - queue an operation for processing by the CCP
31 *
32 * @cmd: ccp_cmd struct to be processed
33 *
34 * Refer to the ccp_cmd struct below for required fields.
35 *
36 * Queue a cmd to be processed by the CCP. If queueing the cmd
37 * would exceed the defined length of the cmd queue the cmd will
38 * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
39 * result in a return code of -EBUSY.
40 *
41 * The callback routine specified in the ccp_cmd struct will be
42 * called to notify the caller of completion (if the cmd was not
43 * backlogged) or advancement out of the backlog. If the cmd has
44 * advanced out of the backlog the "err" value of the callback
45 * will be -EINPROGRESS. Any other "err" value during callback is
46 * the result of the operation.
47 *
48 * The cmd has been successfully queued if:
49 * the return code is -EINPROGRESS or
50 * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
51 */
52int ccp_enqueue_cmd(struct ccp_cmd *cmd);
53
54#else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */
55
56static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd)
57{
58 return -ENODEV;
59}
60
61#endif /* CONFIG_CRYPTO_DEV_CCP_DD */
62
63
64/***** AES engine *****/
65/**
66 * ccp_aes_type - AES key size
67 *
68 * @CCP_AES_TYPE_128: 128-bit key
69 * @CCP_AES_TYPE_192: 192-bit key
70 * @CCP_AES_TYPE_256: 256-bit key
71 */
72enum ccp_aes_type {
73 CCP_AES_TYPE_128 = 0,
74 CCP_AES_TYPE_192,
75 CCP_AES_TYPE_256,
76 CCP_AES_TYPE__LAST,
77};
78
79/**
80 * ccp_aes_mode - AES operation mode
81 *
82 * @CCP_AES_MODE_ECB: ECB mode
83 * @CCP_AES_MODE_CBC: CBC mode
84 * @CCP_AES_MODE_OFB: OFB mode
85 * @CCP_AES_MODE_CFB: CFB mode
86 * @CCP_AES_MODE_CTR: CTR mode
87 * @CCP_AES_MODE_CMAC: CMAC mode
88 */
89enum ccp_aes_mode {
90 CCP_AES_MODE_ECB = 0,
91 CCP_AES_MODE_CBC,
92 CCP_AES_MODE_OFB,
93 CCP_AES_MODE_CFB,
94 CCP_AES_MODE_CTR,
95 CCP_AES_MODE_CMAC,
96 CCP_AES_MODE__LAST,
97};
98
99/**
100 * ccp_aes_mode - AES operation mode
101 *
102 * @CCP_AES_ACTION_DECRYPT: AES decrypt operation
103 * @CCP_AES_ACTION_ENCRYPT: AES encrypt operation
104 */
105enum ccp_aes_action {
106 CCP_AES_ACTION_DECRYPT = 0,
107 CCP_AES_ACTION_ENCRYPT,
108 CCP_AES_ACTION__LAST,
109};
110
111/**
112 * struct ccp_aes_engine - CCP AES operation
113 * @type: AES operation key size
114 * @mode: AES operation mode
115 * @action: AES operation (decrypt/encrypt)
116 * @key: key to be used for this AES operation
117 * @key_len: length in bytes of key
118 * @iv: IV to be used for this AES operation
119 * @iv_len: length in bytes of iv
120 * @src: data to be used for this operation
121 * @dst: data produced by this operation
122 * @src_len: length in bytes of data used for this operation
123 * @cmac_final: indicates final operation when running in CMAC mode
124 * @cmac_key: K1/K2 key used in final CMAC operation
125 * @cmac_key_len: length in bytes of cmac_key
126 *
127 * Variables required to be set when calling ccp_enqueue_cmd():
128 * - type, mode, action, key, key_len, src, dst, src_len
129 * - iv, iv_len for any mode other than ECB
130 * - cmac_final for CMAC mode
131 * - cmac_key, cmac_key_len for CMAC mode if cmac_final is non-zero
132 *
133 * The iv variable is used as both input and output. On completion of the
134 * AES operation the new IV overwrites the old IV.
135 */
136struct ccp_aes_engine {
137 enum ccp_aes_type type;
138 enum ccp_aes_mode mode;
139 enum ccp_aes_action action;
140
141 struct scatterlist *key;
142 u32 key_len; /* In bytes */
143
144 struct scatterlist *iv;
145 u32 iv_len; /* In bytes */
146
147 struct scatterlist *src, *dst;
148 u64 src_len; /* In bytes */
149
150 u32 cmac_final; /* Indicates final cmac cmd */
151 struct scatterlist *cmac_key; /* K1/K2 cmac key required for
152 * final cmac cmd */
153 u32 cmac_key_len; /* In bytes */
154};
155
156/***** XTS-AES engine *****/
157/**
158 * ccp_xts_aes_unit_size - XTS unit size
159 *
160 * @CCP_XTS_AES_UNIT_SIZE_16: Unit size of 16 bytes
161 * @CCP_XTS_AES_UNIT_SIZE_512: Unit size of 512 bytes
162 * @CCP_XTS_AES_UNIT_SIZE_1024: Unit size of 1024 bytes
163 * @CCP_XTS_AES_UNIT_SIZE_2048: Unit size of 2048 bytes
164 * @CCP_XTS_AES_UNIT_SIZE_4096: Unit size of 4096 bytes
165 */
166enum ccp_xts_aes_unit_size {
167 CCP_XTS_AES_UNIT_SIZE_16 = 0,
168 CCP_XTS_AES_UNIT_SIZE_512,
169 CCP_XTS_AES_UNIT_SIZE_1024,
170 CCP_XTS_AES_UNIT_SIZE_2048,
171 CCP_XTS_AES_UNIT_SIZE_4096,
172 CCP_XTS_AES_UNIT_SIZE__LAST,
173};
174
175/**
176 * struct ccp_xts_aes_engine - CCP XTS AES operation
177 * @action: AES operation (decrypt/encrypt)
178 * @unit_size: unit size of the XTS operation
179 * @key: key to be used for this XTS AES operation
180 * @key_len: length in bytes of key
181 * @iv: IV to be used for this XTS AES operation
182 * @iv_len: length in bytes of iv
183 * @src: data to be used for this operation
184 * @dst: data produced by this operation
185 * @src_len: length in bytes of data used for this operation
186 * @final: indicates final XTS operation
187 *
188 * Variables required to be set when calling ccp_enqueue_cmd():
189 * - action, unit_size, key, key_len, iv, iv_len, src, dst, src_len, final
190 *
191 * The iv variable is used as both input and output. On completion of the
192 * AES operation the new IV overwrites the old IV.
193 */
194struct ccp_xts_aes_engine {
195 enum ccp_aes_action action;
196 enum ccp_xts_aes_unit_size unit_size;
197
198 struct scatterlist *key;
199 u32 key_len; /* In bytes */
200
201 struct scatterlist *iv;
202 u32 iv_len; /* In bytes */
203
204 struct scatterlist *src, *dst;
205 u64 src_len; /* In bytes */
206
207 u32 final;
208};
209
210/***** SHA engine *****/
211#define CCP_SHA_BLOCKSIZE SHA256_BLOCK_SIZE
212#define CCP_SHA_CTXSIZE SHA256_DIGEST_SIZE
213
214/**
215 * ccp_sha_type - type of SHA operation
216 *
217 * @CCP_SHA_TYPE_1: SHA-1 operation
218 * @CCP_SHA_TYPE_224: SHA-224 operation
219 * @CCP_SHA_TYPE_256: SHA-256 operation
220 */
221enum ccp_sha_type {
222 CCP_SHA_TYPE_1 = 1,
223 CCP_SHA_TYPE_224,
224 CCP_SHA_TYPE_256,
225 CCP_SHA_TYPE__LAST,
226};
227
228/**
229 * struct ccp_sha_engine - CCP SHA operation
230 * @type: Type of SHA operation
231 * @ctx: current hash value
232 * @ctx_len: length in bytes of hash value
233 * @src: data to be used for this operation
234 * @src_len: length in bytes of data used for this operation
235 * @final: indicates final SHA operation
236 * @msg_bits: total length of the message in bits used in final SHA operation
237 *
238 * Variables required to be set when calling ccp_enqueue_cmd():
239 * - type, ctx, ctx_len, src, src_len, final
240 * - msg_bits if final is non-zero
241 *
242 * The ctx variable is used as both input and output. On completion of the
243 * SHA operation the new hash value overwrites the old hash value.
244 */
245struct ccp_sha_engine {
246 enum ccp_sha_type type;
247
248 struct scatterlist *ctx;
249 u32 ctx_len; /* In bytes */
250
251 struct scatterlist *src;
252 u64 src_len; /* In bytes */
253
254 u32 final; /* Indicates final sha cmd */
255 u64 msg_bits; /* Message length in bits required for
256 * final sha cmd */
257};
258
259/***** RSA engine *****/
260/**
261 * struct ccp_rsa_engine - CCP RSA operation
262 * @key_size: length in bits of RSA key
263 * @exp: RSA exponent
264 * @exp_len: length in bytes of exponent
265 * @mod: RSA modulus
266 * @mod_len: length in bytes of modulus
267 * @src: data to be used for this operation
268 * @dst: data produced by this operation
269 * @src_len: length in bytes of data used for this operation
270 *
271 * Variables required to be set when calling ccp_enqueue_cmd():
272 * - key_size, exp, exp_len, mod, mod_len, src, dst, src_len
273 */
274struct ccp_rsa_engine {
275 u32 key_size; /* In bits */
276
277 struct scatterlist *exp;
278 u32 exp_len; /* In bytes */
279
280 struct scatterlist *mod;
281 u32 mod_len; /* In bytes */
282
283 struct scatterlist *src, *dst;
284 u32 src_len; /* In bytes */
285};
286
287/***** Passthru engine *****/
288/**
289 * ccp_passthru_bitwise - type of bitwise passthru operation
290 *
291 * @CCP_PASSTHRU_BITWISE_NOOP: no bitwise operation performed
292 * @CCP_PASSTHRU_BITWISE_AND: perform bitwise AND of src with mask
293 * @CCP_PASSTHRU_BITWISE_OR: perform bitwise OR of src with mask
294 * @CCP_PASSTHRU_BITWISE_XOR: perform bitwise XOR of src with mask
295 * @CCP_PASSTHRU_BITWISE_MASK: overwrite with mask
296 */
297enum ccp_passthru_bitwise {
298 CCP_PASSTHRU_BITWISE_NOOP = 0,
299 CCP_PASSTHRU_BITWISE_AND,
300 CCP_PASSTHRU_BITWISE_OR,
301 CCP_PASSTHRU_BITWISE_XOR,
302 CCP_PASSTHRU_BITWISE_MASK,
303 CCP_PASSTHRU_BITWISE__LAST,
304};
305
306/**
307 * ccp_passthru_byteswap - type of byteswap passthru operation
308 *
309 * @CCP_PASSTHRU_BYTESWAP_NOOP: no byte swapping performed
310 * @CCP_PASSTHRU_BYTESWAP_32BIT: swap bytes within 32-bit words
311 * @CCP_PASSTHRU_BYTESWAP_256BIT: swap bytes within 256-bit words
312 */
313enum ccp_passthru_byteswap {
314 CCP_PASSTHRU_BYTESWAP_NOOP = 0,
315 CCP_PASSTHRU_BYTESWAP_32BIT,
316 CCP_PASSTHRU_BYTESWAP_256BIT,
317 CCP_PASSTHRU_BYTESWAP__LAST,
318};
319
320/**
321 * struct ccp_passthru_engine - CCP pass-through operation
322 * @bit_mod: bitwise operation to perform
323 * @byte_swap: byteswap operation to perform
324 * @mask: mask to be applied to data
325 * @mask_len: length in bytes of mask
326 * @src: data to be used for this operation
327 * @dst: data produced by this operation
328 * @src_len: length in bytes of data used for this operation
329 * @final: indicate final pass-through operation
330 *
331 * Variables required to be set when calling ccp_enqueue_cmd():
332 * - bit_mod, byte_swap, src, dst, src_len
333 * - mask, mask_len if bit_mod is not CCP_PASSTHRU_BITWISE_NOOP
334 */
335struct ccp_passthru_engine {
336 enum ccp_passthru_bitwise bit_mod;
337 enum ccp_passthru_byteswap byte_swap;
338
339 struct scatterlist *mask;
340 u32 mask_len; /* In bytes */
341
342 struct scatterlist *src, *dst;
343 u64 src_len; /* In bytes */
344
345 u32 final;
346};
347
348/***** ECC engine *****/
349#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */
350#define CCP_ECC_MAX_OPERANDS 6
351#define CCP_ECC_MAX_OUTPUTS 3
352
353/**
354 * ccp_ecc_function - type of ECC function
355 *
356 * @CCP_ECC_FUNCTION_MMUL_384BIT: 384-bit modular multiplication
357 * @CCP_ECC_FUNCTION_MADD_384BIT: 384-bit modular addition
358 * @CCP_ECC_FUNCTION_MINV_384BIT: 384-bit multiplicative inverse
359 * @CCP_ECC_FUNCTION_PADD_384BIT: 384-bit point addition
360 * @CCP_ECC_FUNCTION_PMUL_384BIT: 384-bit point multiplication
361 * @CCP_ECC_FUNCTION_PDBL_384BIT: 384-bit point doubling
362 */
363enum ccp_ecc_function {
364 CCP_ECC_FUNCTION_MMUL_384BIT = 0,
365 CCP_ECC_FUNCTION_MADD_384BIT,
366 CCP_ECC_FUNCTION_MINV_384BIT,
367 CCP_ECC_FUNCTION_PADD_384BIT,
368 CCP_ECC_FUNCTION_PMUL_384BIT,
369 CCP_ECC_FUNCTION_PDBL_384BIT,
370};
371
372/**
373 * struct ccp_ecc_modular_math - CCP ECC modular math parameters
374 * @operand_1: first operand for the modular math operation
375 * @operand_1_len: length of the first operand
376 * @operand_2: second operand for the modular math operation
377 * (not used for CCP_ECC_FUNCTION_MINV_384BIT)
378 * @operand_2_len: length of the second operand
379 * (not used for CCP_ECC_FUNCTION_MINV_384BIT)
380 * @result: result of the modular math operation
381 * @result_len: length of the supplied result buffer
382 */
383struct ccp_ecc_modular_math {
384 struct scatterlist *operand_1;
385 unsigned int operand_1_len; /* In bytes */
386
387 struct scatterlist *operand_2;
388 unsigned int operand_2_len; /* In bytes */
389
390 struct scatterlist *result;
391 unsigned int result_len; /* In bytes */
392};
393
394/**
395 * struct ccp_ecc_point - CCP ECC point definition
396 * @x: the x coordinate of the ECC point
397 * @x_len: the length of the x coordinate
398 * @y: the y coordinate of the ECC point
399 * @y_len: the length of the y coordinate
400 */
401struct ccp_ecc_point {
402 struct scatterlist *x;
403 unsigned int x_len; /* In bytes */
404
405 struct scatterlist *y;
406 unsigned int y_len; /* In bytes */
407};
408
409/**
410 * struct ccp_ecc_point_math - CCP ECC point math parameters
411 * @point_1: the first point of the ECC point math operation
412 * @point_2: the second point of the ECC point math operation
413 * (only used for CCP_ECC_FUNCTION_PADD_384BIT)
414 * @domain_a: the a parameter of the ECC curve
415 * @domain_a_len: the length of the a parameter
416 * @scalar: the scalar parameter for the point match operation
417 * (only used for CCP_ECC_FUNCTION_PMUL_384BIT)
418 * @scalar_len: the length of the scalar parameter
419 * (only used for CCP_ECC_FUNCTION_PMUL_384BIT)
420 * @result: the point resulting from the point math operation
421 */
422struct ccp_ecc_point_math {
423 struct ccp_ecc_point point_1;
424 struct ccp_ecc_point point_2;
425
426 struct scatterlist *domain_a;
427 unsigned int domain_a_len; /* In bytes */
428
429 struct scatterlist *scalar;
430 unsigned int scalar_len; /* In bytes */
431
432 struct ccp_ecc_point result;
433};
434
435/**
436 * struct ccp_ecc_engine - CCP ECC operation
437 * @function: ECC function to perform
438 * @mod: ECC modulus
439 * @mod_len: length in bytes of modulus
440 * @mm: module math parameters
441 * @pm: point math parameters
442 * @ecc_result: result of the ECC operation
443 *
444 * Variables required to be set when calling ccp_enqueue_cmd():
445 * - function, mod, mod_len
446 * - operand, operand_len, operand_count, output, output_len, output_count
447 * - ecc_result
448 */
449struct ccp_ecc_engine {
450 enum ccp_ecc_function function;
451
452 struct scatterlist *mod;
453 u32 mod_len; /* In bytes */
454
455 union {
456 struct ccp_ecc_modular_math mm;
457 struct ccp_ecc_point_math pm;
458 } u;
459
460 u16 ecc_result;
461};
462
463
464/**
465 * ccp_engine - CCP operation identifiers
466 *
467 * @CCP_ENGINE_AES: AES operation
468 * @CCP_ENGINE_XTS_AES: 128-bit XTS AES operation
469 * @CCP_ENGINE_RSVD1: unused
470 * @CCP_ENGINE_SHA: SHA operation
471 * @CCP_ENGINE_RSA: RSA operation
472 * @CCP_ENGINE_PASSTHRU: pass-through operation
473 * @CCP_ENGINE_ZLIB_DECOMPRESS: unused
474 * @CCP_ENGINE_ECC: ECC operation
475 */
476enum ccp_engine {
477 CCP_ENGINE_AES = 0,
478 CCP_ENGINE_XTS_AES_128,
479 CCP_ENGINE_RSVD1,
480 CCP_ENGINE_SHA,
481 CCP_ENGINE_RSA,
482 CCP_ENGINE_PASSTHRU,
483 CCP_ENGINE_ZLIB_DECOMPRESS,
484 CCP_ENGINE_ECC,
485 CCP_ENGINE__LAST,
486};
487
488/* Flag values for flags member of ccp_cmd */
489#define CCP_CMD_MAY_BACKLOG 0x00000001
490
491/**
492 * struct ccp_cmd - CPP operation request
493 * @entry: list element (ccp driver use only)
494 * @work: work element used for callbacks (ccp driver use only)
495 * @ccp: CCP device to be run on (ccp driver use only)
496 * @ret: operation return code (ccp driver use only)
497 * @flags: cmd processing flags
498 * @engine: CCP operation to perform
499 * @engine_error: CCP engine return code
500 * @u: engine specific structures, refer to specific engine struct below
501 * @callback: operation completion callback function
502 * @data: parameter value to be supplied to the callback function
503 *
504 * Variables required to be set when calling ccp_enqueue_cmd():
505 * - engine, callback
506 * - See the operation structures below for what is required for each
507 * operation.
508 */
509struct ccp_cmd {
510 /* The list_head, work_struct, ccp and ret variables are for use
511 * by the CCP driver only.
512 */
513 struct list_head entry;
514 struct work_struct work;
515 struct ccp_device *ccp;
516 int ret;
517
518 u32 flags;
519
520 enum ccp_engine engine;
521 u32 engine_error;
522
523 union {
524 struct ccp_aes_engine aes;
525 struct ccp_xts_aes_engine xts;
526 struct ccp_sha_engine sha;
527 struct ccp_rsa_engine rsa;
528 struct ccp_passthru_engine passthru;
529 struct ccp_ecc_engine ecc;
530 } u;
531
532 /* Completion callback support */
533 void (*callback)(void *data, int err);
534 void *data;
535};
536
537#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 24545cd90a25..02ae99e8e6d3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -37,6 +37,9 @@
37 __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ 37 __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
38 (typeof(ptr)) (__ptr + (off)); }) 38 (typeof(ptr)) (__ptr + (off)); })
39 39
40/* Make the optimizer believe the variable can be manipulated arbitrarily. */
41#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
42
40#ifdef __CHECKER__ 43#ifdef __CHECKER__
41#define __must_be_array(arr) 0 44#define __must_be_array(arr) 0
42#else 45#else
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index dc1bd3dcf11f..5529c5239421 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -15,6 +15,7 @@
15 */ 15 */
16#undef barrier 16#undef barrier
17#undef RELOC_HIDE 17#undef RELOC_HIDE
18#undef OPTIMIZER_HIDE_VAR
18 19
19#define barrier() __memory_barrier() 20#define barrier() __memory_barrier()
20 21
@@ -23,6 +24,12 @@
23 __ptr = (unsigned long) (ptr); \ 24 __ptr = (unsigned long) (ptr); \
24 (typeof(ptr)) (__ptr + (off)); }) 25 (typeof(ptr)) (__ptr + (off)); })
25 26
27/* This should act as an optimization barrier on var.
28 * Given that this compiler does not have inline assembly, a compiler barrier
29 * is the best we can do.
30 */
31#define OPTIMIZER_HIDE_VAR(var) barrier()
32
26/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */ 33/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
27#define __must_be_array(a) 0 34#define __must_be_array(a) 0
28 35
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index fe7a686dfd8d..2472740d7ab2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
170 (typeof(ptr)) (__ptr + (off)); }) 170 (typeof(ptr)) (__ptr + (off)); })
171#endif 171#endif
172 172
173#ifndef OPTIMIZER_HIDE_VAR
174#define OPTIMIZER_HIDE_VAR(var) barrier()
175#endif
176
173/* Not-quite-unique ID. */ 177/* Not-quite-unique ID. */
174#ifndef __UNIQUE_ID 178#ifndef __UNIQUE_ID
175# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) 179# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
diff --git a/kernel/padata.c b/kernel/padata.c
index 2abd25d79cc8..161402f0b517 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -112,7 +112,7 @@ int padata_do_parallel(struct padata_instance *pinst,
112 112
113 rcu_read_lock_bh(); 113 rcu_read_lock_bh();
114 114
115 pd = rcu_dereference(pinst->pd); 115 pd = rcu_dereference_bh(pinst->pd);
116 116
117 err = -EINVAL; 117 err = -EINVAL;
118 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) 118 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)