aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:53:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 17:53:12 -0400
commit797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch)
tree1383dc469c26ad37fdf960f682d9a48c782935c5
parentc8d8566952fda026966784a62f324c8352f77430 (diff)
parent3862de1f6c442d53bd828d39f86d07d933a70605 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - XTS mode optimisation for twofish/cast6/camellia/aes on x86 - AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia - SSSE3/AVX/AVX2 optimisations for sha256/sha512 - Added driver for SAHARA2 crypto accelerator - Fix for GMAC when used in non-IPsec secnarios - Added generic CMAC implementation (including IPsec glue) - IP update for crypto/atmel - Support for more than one device in hwrng/timeriomem - Added Broadcom BCM2835 RNG driver - Misc fixes * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits) crypto: caam - fix job ring cleanup code crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher crypto: tcrypt - add async cipher speed tests for blowfish crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2 crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86 crypto: aesni_intel - add more optimized XTS mode for x86-64 crypto: x86/camellia-aesni-avx - add more optimized XTS code crypto: cast6-avx: use new optimized XTS code crypto: x86/twofish-avx - use optimized XTS code crypto: x86 - add more optimized XTS-mode for serpent-avx xfrm: add rfc4494 AES-CMAC-96 support crypto: add CMAC support to CryptoAPI crypto: testmgr - add empty test vectors for null ciphers crypto: testmgr - add AES GMAC test vectors crypto: gcm - fix rfc4543 to handle async crypto correctly crypto: gcm - make GMAC work when dst and src are different hwrng: timeriomem - added devicetree hooks ...
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt15
-rw-r--r--Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt18
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt13
-rw-r--r--Documentation/hw_random.txt2
-rw-r--r--arch/arm/mach-at91/at91sam9g45_devices.c14
-rw-r--r--arch/x86/crypto/Makefile57
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S117
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c80
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S180
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S1368
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c586
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c104
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c91
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S6
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S10
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S61
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx2.S180
-rw-r--r--arch/x86/crypto/glue_helper.c97
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S45
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S800
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c562
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c145
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S496
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S772
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S506
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c275
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S423
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S743
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S421
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c282
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/twofish-avx2-asm_64.S600
-rw-r--r--arch/x86/crypto/twofish_avx2_glue.c584
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c101
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/crypto/blowfish.h43
-rw-r--r--arch/x86/include/asm/crypto/camellia.h19
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h24
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h29
-rw-r--r--arch/x86/include/asm/crypto/twofish.h18
-rw-r--r--crypto/Kconfig133
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/cmac.c315
-rw-r--r--crypto/crypto_user.c4
-rw-r--r--crypto/gcm.c116
-rw-r--r--crypto/sha256_generic.c11
-rw-r--r--crypto/sha512_generic.c13
-rw-r--r--crypto/tcrypt.c30
-rw-r--r--crypto/testmgr.c95
-rw-r--r--crypto/testmgr.h1314
-rw-r--r--drivers/char/hw_random/Kconfig12
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/bcm2835-rng.c113
-rw-r--r--drivers/char/hw_random/exynos-rng.c3
-rw-r--r--drivers/char/hw_random/mxc-rnga.c21
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c190
-rw-r--r--drivers/crypto/Kconfig18
-rw-r--r--drivers/crypto/Makefile1
-rw-r--r--drivers/crypto/atmel-aes.c471
-rw-r--r--drivers/crypto/atmel-sha-regs.h7
-rw-r--r--drivers/crypto/atmel-sha.c586
-rw-r--r--drivers/crypto/atmel-tdes-regs.h2
-rw-r--r--drivers/crypto/atmel-tdes.c394
-rw-r--r--drivers/crypto/bfin_crc.c6
-rw-r--r--drivers/crypto/caam/Kconfig2
-rw-r--r--drivers/crypto/caam/caamalg.c6
-rw-r--r--drivers/crypto/caam/caamhash.c4
-rw-r--r--drivers/crypto/caam/ctrl.c3
-rw-r--r--drivers/crypto/caam/error.c10
-rw-r--r--drivers/crypto/caam/intern.h1
-rw-r--r--drivers/crypto/caam/jr.c4
-rw-r--r--drivers/crypto/caam/key_gen.c2
-rw-r--r--drivers/crypto/caam/key_gen.h2
-rw-r--r--drivers/crypto/caam/regs.h4
-rw-r--r--drivers/crypto/omap-aes.c15
-rw-r--r--drivers/crypto/omap-sham.c15
-rw-r--r--drivers/crypto/picoxcell_crypto.c4
-rw-r--r--drivers/crypto/sahara.c1070
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c6
-rw-r--r--include/crypto/sha.h5
-rw-r--r--include/linux/platform_data/atmel-aes.h22
-rw-r--r--include/linux/platform_data/crypto-atmel.h22
-rw-r--r--include/linux/timeriomem-rng.h5
-rw-r--r--net/xfrm/xfrm_algo.c13
88 files changed, 15378 insertions, 744 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
new file mode 100644
index 000000000000..5c65eccd0e56
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
@@ -0,0 +1,15 @@
1Freescale SAHARA Cryptographic Accelerator included in some i.MX chips.
2Currently only i.MX27 is supported.
3
4Required properties:
5- compatible : Should be "fsl,<soc>-sahara"
6- reg : Should contain SAHARA registers location and length
7- interrupts : Should contain SAHARA interrupt number
8
9Example:
10
11sah@10025000 {
12 compatible = "fsl,imx27-sahara";
13 reg = < 0x10025000 0x800>;
14 interrupts = <75>;
15};
diff --git a/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
new file mode 100644
index 000000000000..6616d15866a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
@@ -0,0 +1,18 @@
1HWRNG support for the timeriomem_rng driver
2
3Required properties:
4- compatible : "timeriomem_rng"
5- reg : base address to sample from
6- period : wait time in microseconds to use between samples
7
8N.B. currently 'reg' must be four bytes wide and aligned
9
10Example:
11
12hwrng@44 {
13 #address-cells = <1>;
14 #size-cells = <1>;
15 compatible = "timeriomem_rng";
16 reg = <0x44 0x04>;
17 period = <1000000>;
18};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
new file mode 100644
index 000000000000..07ccdaa68324
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -0,0 +1,13 @@
1BCM2835 Random number generator
2
3Required properties:
4
5- compatible : should be "brcm,bcm2835-rng"
6- reg : Specifies base physical address and size of the registers.
7
8Example:
9
10rng {
11 compatible = "brcm,bcm2835-rng";
12 reg = <0x7e104000 0x10>;
13};
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
index 690f52550c80..026e237bbc87 100644
--- a/Documentation/hw_random.txt
+++ b/Documentation/hw_random.txt
@@ -63,7 +63,7 @@ Intel RNG Driver notes:
63 63
64 * FIXME: support poll(2) 64 * FIXME: support poll(2)
65 65
66 NOTE: request_mem_region was removed, for two reasons: 66 NOTE: request_mem_region was removed, for three reasons:
67 1) Only one RNG is supported by this driver, 2) The location 67 1) Only one RNG is supported by this driver, 2) The location
68 used by the RNG is a fixed location in MMIO-addressable memory, 68 used by the RNG is a fixed location in MMIO-addressable memory,
69 3) users with properly working BIOS e820 handling will always 69 3) users with properly working BIOS e820 handling will always
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 827c9f2a70fb..f0bf68268ca2 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -18,7 +18,7 @@
18#include <linux/platform_device.h> 18#include <linux/platform_device.h>
19#include <linux/i2c-gpio.h> 19#include <linux/i2c-gpio.h>
20#include <linux/atmel-mci.h> 20#include <linux/atmel-mci.h>
21#include <linux/platform_data/atmel-aes.h> 21#include <linux/platform_data/crypto-atmel.h>
22 22
23#include <linux/platform_data/at91_adc.h> 23#include <linux/platform_data/at91_adc.h>
24 24
@@ -1900,7 +1900,8 @@ static void __init at91_add_device_tdes(void) {}
1900 * -------------------------------------------------------------------- */ 1900 * -------------------------------------------------------------------- */
1901 1901
1902#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE) 1902#if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE)
1903static struct aes_platform_data aes_data; 1903static struct crypto_platform_data aes_data;
1904static struct crypto_dma_data alt_atslave;
1904static u64 aes_dmamask = DMA_BIT_MASK(32); 1905static u64 aes_dmamask = DMA_BIT_MASK(32);
1905 1906
1906static struct resource aes_resources[] = { 1907static struct resource aes_resources[] = {
@@ -1931,23 +1932,20 @@ static struct platform_device at91sam9g45_aes_device = {
1931static void __init at91_add_device_aes(void) 1932static void __init at91_add_device_aes(void)
1932{ 1933{
1933 struct at_dma_slave *atslave; 1934 struct at_dma_slave *atslave;
1934 struct aes_dma_data *alt_atslave;
1935
1936 alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL);
1937 1935
1938 /* DMA TX slave channel configuration */ 1936 /* DMA TX slave channel configuration */
1939 atslave = &alt_atslave->txdata; 1937 atslave = &alt_atslave.txdata;
1940 atslave->dma_dev = &at_hdmac_device.dev; 1938 atslave->dma_dev = &at_hdmac_device.dev;
1941 atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW | 1939 atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW |
1942 ATC_SRC_PER(AT_DMA_ID_AES_RX); 1940 ATC_SRC_PER(AT_DMA_ID_AES_RX);
1943 1941
1944 /* DMA RX slave channel configuration */ 1942 /* DMA RX slave channel configuration */
1945 atslave = &alt_atslave->rxdata; 1943 atslave = &alt_atslave.rxdata;
1946 atslave->dma_dev = &at_hdmac_device.dev; 1944 atslave->dma_dev = &at_hdmac_device.dev;
1947 atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW | 1945 atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW |
1948 ATC_DST_PER(AT_DMA_ID_AES_TX); 1946 ATC_DST_PER(AT_DMA_ID_AES_TX);
1949 1947
1950 aes_data.dma_slave = alt_atslave; 1948 aes_data.dma_slave = &alt_atslave;
1951 platform_device_register(&at91sam9g45_aes_device); 1949 platform_device_register(&at91sam9g45_aes_device);
1952} 1950}
1953#else 1951#else
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 63947a8f9f0f..a3a0ed80f17c 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,10 @@
2# Arch-specific CryptoAPI modules. 2# Arch-specific CryptoAPI modules.
3# 3#
4 4
5avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
6avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
7 $(comma)4)$(comma)%ymm2,yes,no)
8
5obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o 9obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
6obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o 10obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
7 11
@@ -12,22 +16,37 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
12 16
13obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 17obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
14obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o 18obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
15obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
16obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
17obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
18obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 19obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
19obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 20obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
20obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 21obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
21obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
22obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 22obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
23obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o 23obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
24obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
25obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 24obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
26obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o 25obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
27 26
28obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 27obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
29obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 28obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
30obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o 29obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
30obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
31obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
32
33# These modules require assembler to support AVX.
34ifeq ($(avx_supported),yes)
35 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
36 camellia-aesni-avx-x86_64.o
37 obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
38 obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
39 obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
40 obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
41endif
42
43# These modules require assembler to support AVX2.
44ifeq ($(avx2_supported),yes)
45 obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
46 obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
47 obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
48 obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
49endif
31 50
32aes-i586-y := aes-i586-asm_32.o aes_glue.o 51aes-i586-y := aes-i586-asm_32.o aes_glue.o
33twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 52twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -36,21 +55,35 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
36 55
37aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 56aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
38camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o 57camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
39camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
40 camellia_aesni_avx_glue.o
41cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
42cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
43blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 58blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
44twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 59twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
45twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 60twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
46twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
47salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 61salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
48serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o 62serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
49serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o 63
64ifeq ($(avx_supported),yes)
65 camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
66 camellia_aesni_avx_glue.o
67 cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
68 cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
69 twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \
70 twofish_avx_glue.o
71 serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \
72 serpent_avx_glue.o
73endif
74
75ifeq ($(avx2_supported),yes)
76 blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
77 camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
78 serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
79 twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
80endif
50 81
51aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 82aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
52ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 83ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
53sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 84sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
54crc32c-intel-y := crc32c-intel_glue.o 85crc32c-intel-y := crc32c-intel_glue.o
55crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o 86crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
56crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o 87crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
88sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
89sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 04b797767b9e..62fe22cd4cba 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -34,6 +34,10 @@
34 34
35#ifdef __x86_64__ 35#ifdef __x86_64__
36.data 36.data
37.align 16
38.Lgf128mul_x_ble_mask:
39 .octa 0x00000000000000010000000000000087
40
37POLY: .octa 0xC2000000000000000000000000000001 41POLY: .octa 0xC2000000000000000000000000000001
38TWOONE: .octa 0x00000001000000000000000000000001 42TWOONE: .octa 0x00000001000000000000000000000001
39 43
@@ -105,6 +109,8 @@ enc: .octa 0x2
105#define CTR %xmm11 109#define CTR %xmm11
106#define INC %xmm12 110#define INC %xmm12
107 111
112#define GF128MUL_MASK %xmm10
113
108#ifdef __x86_64__ 114#ifdef __x86_64__
109#define AREG %rax 115#define AREG %rax
110#define KEYP %rdi 116#define KEYP %rdi
@@ -2636,4 +2642,115 @@ ENTRY(aesni_ctr_enc)
2636.Lctr_enc_just_ret: 2642.Lctr_enc_just_ret:
2637 ret 2643 ret
2638ENDPROC(aesni_ctr_enc) 2644ENDPROC(aesni_ctr_enc)
2645
2646/*
2647 * _aesni_gf128mul_x_ble: internal ABI
2648 * Multiply in GF(2^128) for XTS IVs
2649 * input:
2650 * IV: current IV
2651 * GF128MUL_MASK == mask with 0x87 and 0x01
2652 * output:
2653 * IV: next IV
2654 * changed:
2655 * CTR: == temporary value
2656 */
2657#define _aesni_gf128mul_x_ble() \
2658 pshufd $0x13, IV, CTR; \
2659 paddq IV, IV; \
2660 psrad $31, CTR; \
2661 pand GF128MUL_MASK, CTR; \
2662 pxor CTR, IV;
2663
2664/*
2665 * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
2666 * bool enc, u8 *iv)
2667 */
2668ENTRY(aesni_xts_crypt8)
2669 cmpb $0, %cl
2670 movl $0, %ecx
2671 movl $240, %r10d
2672 leaq _aesni_enc4, %r11
2673 leaq _aesni_dec4, %rax
2674 cmovel %r10d, %ecx
2675 cmoveq %rax, %r11
2676
2677 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
2678 movups (IVP), IV
2679
2680 mov 480(KEYP), KLEN
2681 addq %rcx, KEYP
2682
2683 movdqa IV, STATE1
2684 pxor 0x00(INP), STATE1
2685 movdqu IV, 0x00(OUTP)
2686
2687 _aesni_gf128mul_x_ble()
2688 movdqa IV, STATE2
2689 pxor 0x10(INP), STATE2
2690 movdqu IV, 0x10(OUTP)
2691
2692 _aesni_gf128mul_x_ble()
2693 movdqa IV, STATE3
2694 pxor 0x20(INP), STATE3
2695 movdqu IV, 0x20(OUTP)
2696
2697 _aesni_gf128mul_x_ble()
2698 movdqa IV, STATE4
2699 pxor 0x30(INP), STATE4
2700 movdqu IV, 0x30(OUTP)
2701
2702 call *%r11
2703
2704 pxor 0x00(OUTP), STATE1
2705 movdqu STATE1, 0x00(OUTP)
2706
2707 _aesni_gf128mul_x_ble()
2708 movdqa IV, STATE1
2709 pxor 0x40(INP), STATE1
2710 movdqu IV, 0x40(OUTP)
2711
2712 pxor 0x10(OUTP), STATE2
2713 movdqu STATE2, 0x10(OUTP)
2714
2715 _aesni_gf128mul_x_ble()
2716 movdqa IV, STATE2
2717 pxor 0x50(INP), STATE2
2718 movdqu IV, 0x50(OUTP)
2719
2720 pxor 0x20(OUTP), STATE3
2721 movdqu STATE3, 0x20(OUTP)
2722
2723 _aesni_gf128mul_x_ble()
2724 movdqa IV, STATE3
2725 pxor 0x60(INP), STATE3
2726 movdqu IV, 0x60(OUTP)
2727
2728 pxor 0x30(OUTP), STATE4
2729 movdqu STATE4, 0x30(OUTP)
2730
2731 _aesni_gf128mul_x_ble()
2732 movdqa IV, STATE4
2733 pxor 0x70(INP), STATE4
2734 movdqu IV, 0x70(OUTP)
2735
2736 _aesni_gf128mul_x_ble()
2737 movups IV, (IVP)
2738
2739 call *%r11
2740
2741 pxor 0x40(OUTP), STATE1
2742 movdqu STATE1, 0x40(OUTP)
2743
2744 pxor 0x50(OUTP), STATE2
2745 movdqu STATE2, 0x50(OUTP)
2746
2747 pxor 0x60(OUTP), STATE3
2748 movdqu STATE3, 0x60(OUTP)
2749
2750 pxor 0x70(OUTP), STATE4
2751 movdqu STATE4, 0x70(OUTP)
2752
2753 ret
2754ENDPROC(aesni_xts_crypt8)
2755
2639#endif 2756#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index a0795da22c02..f80e668785c0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -39,6 +39,9 @@
39#include <crypto/internal/aead.h> 39#include <crypto/internal/aead.h>
40#include <linux/workqueue.h> 40#include <linux/workqueue.h>
41#include <linux/spinlock.h> 41#include <linux/spinlock.h>
42#ifdef CONFIG_X86_64
43#include <asm/crypto/glue_helper.h>
44#endif
42 45
43#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) 46#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
44#define HAS_PCBC 47#define HAS_PCBC
@@ -102,6 +105,9 @@ void crypto_fpu_exit(void);
102asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 105asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
103 const u8 *in, unsigned int len, u8 *iv); 106 const u8 *in, unsigned int len, u8 *iv);
104 107
108asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
109 const u8 *in, bool enc, u8 *iv);
110
105/* asmlinkage void aesni_gcm_enc() 111/* asmlinkage void aesni_gcm_enc()
106 * void *ctx, AES Key schedule. Starts on a 16 byte boundary. 112 * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
107 * u8 *out, Ciphertext output. Encrypt in-place is allowed. 113 * u8 *out, Ciphertext output. Encrypt in-place is allowed.
@@ -510,6 +516,78 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
510 aesni_enc(ctx, out, in); 516 aesni_enc(ctx, out, in);
511} 517}
512 518
519#ifdef CONFIG_X86_64
520
521static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
522{
523 glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
524}
525
526static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
527{
528 glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
529}
530
531static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
532{
533 aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
534}
535
536static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
537{
538 aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
539}
540
541static const struct common_glue_ctx aesni_enc_xts = {
542 .num_funcs = 2,
543 .fpu_blocks_limit = 1,
544
545 .funcs = { {
546 .num_blocks = 8,
547 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
548 }, {
549 .num_blocks = 1,
550 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
551 } }
552};
553
554static const struct common_glue_ctx aesni_dec_xts = {
555 .num_funcs = 2,
556 .fpu_blocks_limit = 1,
557
558 .funcs = { {
559 .num_blocks = 8,
560 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
561 }, {
562 .num_blocks = 1,
563 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
564 } }
565};
566
567static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
568 struct scatterlist *src, unsigned int nbytes)
569{
570 struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
571
572 return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
573 XTS_TWEAK_CAST(aesni_xts_tweak),
574 aes_ctx(ctx->raw_tweak_ctx),
575 aes_ctx(ctx->raw_crypt_ctx));
576}
577
578static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
579 struct scatterlist *src, unsigned int nbytes)
580{
581 struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
582
583 return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
584 XTS_TWEAK_CAST(aesni_xts_tweak),
585 aes_ctx(ctx->raw_tweak_ctx),
586 aes_ctx(ctx->raw_crypt_ctx));
587}
588
589#else
590
513static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 591static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
514 struct scatterlist *src, unsigned int nbytes) 592 struct scatterlist *src, unsigned int nbytes)
515{ 593{
@@ -560,6 +638,8 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
560 return ret; 638 return ret;
561} 639}
562 640
641#endif
642
563#ifdef CONFIG_X86_64 643#ifdef CONFIG_X86_64
564static int rfc4106_init(struct crypto_tfm *tfm) 644static int rfc4106_init(struct crypto_tfm *tfm)
565{ 645{
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S
new file mode 100644
index 000000000000..784452e0d05d
--- /dev/null
+++ b/arch/x86/crypto/blowfish-avx2-asm_64.S
@@ -0,0 +1,449 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14
15.file "blowfish-avx2-asm_64.S"
16
17.data
18.align 32
19
20.Lprefetch_mask:
21.long 0*64
22.long 1*64
23.long 2*64
24.long 3*64
25.long 4*64
26.long 5*64
27.long 6*64
28.long 7*64
29
30.Lbswap32_mask:
31.long 0x00010203
32.long 0x04050607
33.long 0x08090a0b
34.long 0x0c0d0e0f
35
36.Lbswap128_mask:
37 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
38.Lbswap_iv_mask:
39 .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
40
41.text
42/* structure of crypto context */
43#define p 0
44#define s0 ((16 + 2) * 4)
45#define s1 ((16 + 2 + (1 * 256)) * 4)
46#define s2 ((16 + 2 + (2 * 256)) * 4)
47#define s3 ((16 + 2 + (3 * 256)) * 4)
48
49/* register macros */
50#define CTX %rdi
51#define RIO %rdx
52
53#define RS0 %rax
54#define RS1 %r8
55#define RS2 %r9
56#define RS3 %r10
57
58#define RLOOP %r11
59#define RLOOPd %r11d
60
61#define RXr0 %ymm8
62#define RXr1 %ymm9
63#define RXr2 %ymm10
64#define RXr3 %ymm11
65#define RXl0 %ymm12
66#define RXl1 %ymm13
67#define RXl2 %ymm14
68#define RXl3 %ymm15
69
70/* temp regs */
71#define RT0 %ymm0
72#define RT0x %xmm0
73#define RT1 %ymm1
74#define RT1x %xmm1
75#define RIDX0 %ymm2
76#define RIDX1 %ymm3
77#define RIDX1x %xmm3
78#define RIDX2 %ymm4
79#define RIDX3 %ymm5
80
81/* vpgatherdd mask and '-1' */
82#define RNOT %ymm6
83
84/* byte mask, (-1 >> 24) */
85#define RBYTE %ymm7
86
87/***********************************************************************
88 * 32-way AVX2 blowfish
89 ***********************************************************************/
90#define F(xl, xr) \
91 vpsrld $24, xl, RIDX0; \
92 vpsrld $16, xl, RIDX1; \
93 vpsrld $8, xl, RIDX2; \
94 vpand RBYTE, RIDX1, RIDX1; \
95 vpand RBYTE, RIDX2, RIDX2; \
96 vpand RBYTE, xl, RIDX3; \
97 \
98 vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
99 vpcmpeqd RNOT, RNOT, RNOT; \
100 vpcmpeqd RIDX0, RIDX0, RIDX0; \
101 \
102 vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
103 vpcmpeqd RIDX1, RIDX1, RIDX1; \
104 vpaddd RT0, RT1, RT0; \
105 \
106 vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
107 vpxor RT0, RT1, RT0; \
108 \
109 vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
110 vpcmpeqd RNOT, RNOT, RNOT; \
111 vpaddd RT0, RT1, RT0; \
112 \
113 vpxor RT0, xr, xr;
114
115#define add_roundkey(xl, nmem) \
116 vpbroadcastd nmem, RT0; \
117 vpxor RT0, xl ## 0, xl ## 0; \
118 vpxor RT0, xl ## 1, xl ## 1; \
119 vpxor RT0, xl ## 2, xl ## 2; \
120 vpxor RT0, xl ## 3, xl ## 3;
121
122#define round_enc() \
123 add_roundkey(RXr, p(CTX,RLOOP,4)); \
124 F(RXl0, RXr0); \
125 F(RXl1, RXr1); \
126 F(RXl2, RXr2); \
127 F(RXl3, RXr3); \
128 \
129 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
130 F(RXr0, RXl0); \
131 F(RXr1, RXl1); \
132 F(RXr2, RXl2); \
133 F(RXr3, RXl3);
134
135#define round_dec() \
136 add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
137 F(RXl0, RXr0); \
138 F(RXl1, RXr1); \
139 F(RXl2, RXr2); \
140 F(RXl3, RXr3); \
141 \
142 add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
143 F(RXr0, RXl0); \
144 F(RXr1, RXl1); \
145 F(RXr2, RXl2); \
146 F(RXr3, RXl3);
147
148#define init_round_constants() \
149 vpcmpeqd RNOT, RNOT, RNOT; \
150 leaq s0(CTX), RS0; \
151 leaq s1(CTX), RS1; \
152 leaq s2(CTX), RS2; \
153 leaq s3(CTX), RS3; \
154 vpsrld $24, RNOT, RBYTE;
155
156#define transpose_2x2(x0, x1, t0) \
157 vpunpckldq x0, x1, t0; \
158 vpunpckhdq x0, x1, x1; \
159 \
160 vpunpcklqdq t0, x1, x0; \
161 vpunpckhqdq t0, x1, x1;
162
163#define read_block(xl, xr) \
164 vbroadcasti128 .Lbswap32_mask, RT1; \
165 \
166 vpshufb RT1, xl ## 0, xl ## 0; \
167 vpshufb RT1, xr ## 0, xr ## 0; \
168 vpshufb RT1, xl ## 1, xl ## 1; \
169 vpshufb RT1, xr ## 1, xr ## 1; \
170 vpshufb RT1, xl ## 2, xl ## 2; \
171 vpshufb RT1, xr ## 2, xr ## 2; \
172 vpshufb RT1, xl ## 3, xl ## 3; \
173 vpshufb RT1, xr ## 3, xr ## 3; \
174 \
175 transpose_2x2(xl ## 0, xr ## 0, RT0); \
176 transpose_2x2(xl ## 1, xr ## 1, RT0); \
177 transpose_2x2(xl ## 2, xr ## 2, RT0); \
178 transpose_2x2(xl ## 3, xr ## 3, RT0);
179
180#define write_block(xl, xr) \
181 vbroadcasti128 .Lbswap32_mask, RT1; \
182 \
183 transpose_2x2(xl ## 0, xr ## 0, RT0); \
184 transpose_2x2(xl ## 1, xr ## 1, RT0); \
185 transpose_2x2(xl ## 2, xr ## 2, RT0); \
186 transpose_2x2(xl ## 3, xr ## 3, RT0); \
187 \
188 vpshufb RT1, xl ## 0, xl ## 0; \
189 vpshufb RT1, xr ## 0, xr ## 0; \
190 vpshufb RT1, xl ## 1, xl ## 1; \
191 vpshufb RT1, xr ## 1, xr ## 1; \
192 vpshufb RT1, xl ## 2, xl ## 2; \
193 vpshufb RT1, xr ## 2, xr ## 2; \
194 vpshufb RT1, xl ## 3, xl ## 3; \
195 vpshufb RT1, xr ## 3, xr ## 3;
196
197.align 8
198__blowfish_enc_blk32:
199 /* input:
200 * %rdi: ctx, CTX
201 * RXl0..4, RXr0..4: plaintext
202 * output:
203 * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
204 */
205 init_round_constants();
206
207 read_block(RXl, RXr);
208
209 movl $1, RLOOPd;
210 add_roundkey(RXl, p+4*(0)(CTX));
211
212.align 4
213.L__enc_loop:
214 round_enc();
215
216 leal 2(RLOOPd), RLOOPd;
217 cmpl $17, RLOOPd;
218 jne .L__enc_loop;
219
220 add_roundkey(RXr, p+4*(17)(CTX));
221
222 write_block(RXl, RXr);
223
224 ret;
225ENDPROC(__blowfish_enc_blk32)
226
227.align 8
228__blowfish_dec_blk32:
229 /* input:
230 * %rdi: ctx, CTX
231 * RXl0..4, RXr0..4: ciphertext
232 * output:
233 * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
234 */
235 init_round_constants();
236
237 read_block(RXl, RXr);
238
239 movl $14, RLOOPd;
240 add_roundkey(RXl, p+4*(17)(CTX));
241
242.align 4
243.L__dec_loop:
244 round_dec();
245
246 addl $-2, RLOOPd;
247 jns .L__dec_loop;
248
249 add_roundkey(RXr, p+4*(0)(CTX));
250
251 write_block(RXl, RXr);
252
253 ret;
254ENDPROC(__blowfish_dec_blk32)
255
256ENTRY(blowfish_ecb_enc_32way)
257 /* input:
258 * %rdi: ctx, CTX
259 * %rsi: dst
260 * %rdx: src
261 */
262
263 vzeroupper;
264
265 vmovdqu 0*32(%rdx), RXl0;
266 vmovdqu 1*32(%rdx), RXr0;
267 vmovdqu 2*32(%rdx), RXl1;
268 vmovdqu 3*32(%rdx), RXr1;
269 vmovdqu 4*32(%rdx), RXl2;
270 vmovdqu 5*32(%rdx), RXr2;
271 vmovdqu 6*32(%rdx), RXl3;
272 vmovdqu 7*32(%rdx), RXr3;
273
274 call __blowfish_enc_blk32;
275
276 vmovdqu RXr0, 0*32(%rsi);
277 vmovdqu RXl0, 1*32(%rsi);
278 vmovdqu RXr1, 2*32(%rsi);
279 vmovdqu RXl1, 3*32(%rsi);
280 vmovdqu RXr2, 4*32(%rsi);
281 vmovdqu RXl2, 5*32(%rsi);
282 vmovdqu RXr3, 6*32(%rsi);
283 vmovdqu RXl3, 7*32(%rsi);
284
285 vzeroupper;
286
287 ret;
288ENDPROC(blowfish_ecb_enc_32way)
289
290ENTRY(blowfish_ecb_dec_32way)
291 /* input:
292 * %rdi: ctx, CTX
293 * %rsi: dst
294 * %rdx: src
295 */
296
297 vzeroupper;
298
299 vmovdqu 0*32(%rdx), RXl0;
300 vmovdqu 1*32(%rdx), RXr0;
301 vmovdqu 2*32(%rdx), RXl1;
302 vmovdqu 3*32(%rdx), RXr1;
303 vmovdqu 4*32(%rdx), RXl2;
304 vmovdqu 5*32(%rdx), RXr2;
305 vmovdqu 6*32(%rdx), RXl3;
306 vmovdqu 7*32(%rdx), RXr3;
307
308 call __blowfish_dec_blk32;
309
310 vmovdqu RXr0, 0*32(%rsi);
311 vmovdqu RXl0, 1*32(%rsi);
312 vmovdqu RXr1, 2*32(%rsi);
313 vmovdqu RXl1, 3*32(%rsi);
314 vmovdqu RXr2, 4*32(%rsi);
315 vmovdqu RXl2, 5*32(%rsi);
316 vmovdqu RXr3, 6*32(%rsi);
317 vmovdqu RXl3, 7*32(%rsi);
318
319 vzeroupper;
320
321 ret;
322ENDPROC(blowfish_ecb_dec_32way)
323
324ENTRY(blowfish_cbc_dec_32way)
325 /* input:
326 * %rdi: ctx, CTX
327 * %rsi: dst
328 * %rdx: src
329 */
330
331 vzeroupper;
332
333 vmovdqu 0*32(%rdx), RXl0;
334 vmovdqu 1*32(%rdx), RXr0;
335 vmovdqu 2*32(%rdx), RXl1;
336 vmovdqu 3*32(%rdx), RXr1;
337 vmovdqu 4*32(%rdx), RXl2;
338 vmovdqu 5*32(%rdx), RXr2;
339 vmovdqu 6*32(%rdx), RXl3;
340 vmovdqu 7*32(%rdx), RXr3;
341
342 call __blowfish_dec_blk32;
343
344 /* xor with src */
345 vmovq (%rdx), RT0x;
346 vpshufd $0x4f, RT0x, RT0x;
347 vinserti128 $1, 8(%rdx), RT0, RT0;
348 vpxor RT0, RXr0, RXr0;
349 vpxor 0*32+24(%rdx), RXl0, RXl0;
350 vpxor 1*32+24(%rdx), RXr1, RXr1;
351 vpxor 2*32+24(%rdx), RXl1, RXl1;
352 vpxor 3*32+24(%rdx), RXr2, RXr2;
353 vpxor 4*32+24(%rdx), RXl2, RXl2;
354 vpxor 5*32+24(%rdx), RXr3, RXr3;
355 vpxor 6*32+24(%rdx), RXl3, RXl3;
356
357 vmovdqu RXr0, (0*32)(%rsi);
358 vmovdqu RXl0, (1*32)(%rsi);
359 vmovdqu RXr1, (2*32)(%rsi);
360 vmovdqu RXl1, (3*32)(%rsi);
361 vmovdqu RXr2, (4*32)(%rsi);
362 vmovdqu RXl2, (5*32)(%rsi);
363 vmovdqu RXr3, (6*32)(%rsi);
364 vmovdqu RXl3, (7*32)(%rsi);
365
366 vzeroupper;
367
368 ret;
369ENDPROC(blowfish_cbc_dec_32way)
370
371ENTRY(blowfish_ctr_32way)
372 /* input:
373 * %rdi: ctx, CTX
374 * %rsi: dst
375 * %rdx: src
376 * %rcx: iv (big endian, 64bit)
377 */
378
379 vzeroupper;
380
381 vpcmpeqd RT0, RT0, RT0;
382 vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
383
384 vpcmpeqd RT1x, RT1x, RT1x;
385 vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
386 vpxor RIDX0, RIDX0, RIDX0;
387 vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
388
389 vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
390
391 vpcmpeqd RT1, RT1, RT1;
392 vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
393 vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
394
395 vbroadcasti128 .Lbswap_iv_mask, RIDX0;
396 vbroadcasti128 .Lbswap128_mask, RIDX1;
397
398 /* load IV and byteswap */
399 vmovq (%rcx), RT1x;
400 vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
401 vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
402
403 /* construct IVs */
404 vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
405 vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
406 vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
407 vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
408 vpsubq RIDX2, RT1, RT1;
409 vpshufb RIDX1, RT1, RXl1;
410 vpsubq RIDX2, RT1, RT1;
411 vpshufb RIDX1, RT1, RXr1;
412 vpsubq RIDX2, RT1, RT1;
413 vpshufb RIDX1, RT1, RXl2;
414 vpsubq RIDX2, RT1, RT1;
415 vpshufb RIDX1, RT1, RXr2;
416 vpsubq RIDX2, RT1, RT1;
417 vpshufb RIDX1, RT1, RXl3;
418 vpsubq RIDX2, RT1, RT1;
419 vpshufb RIDX1, RT1, RXr3;
420
421 /* store last IV */
422 vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
423 vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
424 vmovq RT1x, (%rcx);
425
426 call __blowfish_enc_blk32;
427
428 /* dst = src ^ iv */
429 vpxor 0*32(%rdx), RXr0, RXr0;
430 vpxor 1*32(%rdx), RXl0, RXl0;
431 vpxor 2*32(%rdx), RXr1, RXr1;
432 vpxor 3*32(%rdx), RXl1, RXl1;
433 vpxor 4*32(%rdx), RXr2, RXr2;
434 vpxor 5*32(%rdx), RXl2, RXl2;
435 vpxor 6*32(%rdx), RXr3, RXr3;
436 vpxor 7*32(%rdx), RXl3, RXl3;
437 vmovdqu RXr0, (0*32)(%rsi);
438 vmovdqu RXl0, (1*32)(%rsi);
439 vmovdqu RXr1, (2*32)(%rsi);
440 vmovdqu RXl1, (3*32)(%rsi);
441 vmovdqu RXr2, (4*32)(%rsi);
442 vmovdqu RXl2, (5*32)(%rsi);
443 vmovdqu RXr3, (6*32)(%rsi);
444 vmovdqu RXl3, (7*32)(%rsi);
445
446 vzeroupper;
447
448 ret;
449ENDPROC(blowfish_ctr_32way)
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c
new file mode 100644
index 000000000000..4417e9aea78d
--- /dev/null
+++ b/arch/x86/crypto/blowfish_avx2_glue.c
@@ -0,0 +1,585 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/types.h>
25#include <linux/crypto.h>
26#include <linux/err.h>
27#include <crypto/algapi.h>
28#include <crypto/blowfish.h>
29#include <crypto/cryptd.h>
30#include <crypto/ctr.h>
31#include <asm/i387.h>
32#include <asm/xcr.h>
33#include <asm/xsave.h>
34#include <asm/crypto/blowfish.h>
35#include <asm/crypto/ablk_helper.h>
36#include <crypto/scatterwalk.h>
37
38#define BF_AVX2_PARALLEL_BLOCKS 32
39
40/* 32-way AVX2 parallel cipher functions */
41asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
42 const u8 *src);
43asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
44 const u8 *src);
45asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
46 const u8 *src);
47asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
48 __be64 *iv);
49
50static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
51{
52 if (fpu_enabled)
53 return true;
54
55 /* FPU is only used when chunk to be processed is large enough, so
56 * do not enable FPU until it is necessary.
57 */
58 if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
59 return false;
60
61 kernel_fpu_begin();
62 return true;
63}
64
65static inline void bf_fpu_end(bool fpu_enabled)
66{
67 if (fpu_enabled)
68 kernel_fpu_end();
69}
70
71static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
72 bool enc)
73{
74 bool fpu_enabled = false;
75 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
76 const unsigned int bsize = BF_BLOCK_SIZE;
77 unsigned int nbytes;
78 int err;
79
80 err = blkcipher_walk_virt(desc, walk);
81 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
82
83 while ((nbytes = walk->nbytes)) {
84 u8 *wsrc = walk->src.virt.addr;
85 u8 *wdst = walk->dst.virt.addr;
86
87 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
88
89 /* Process multi-block AVX2 batch */
90 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
91 do {
92 if (enc)
93 blowfish_ecb_enc_32way(ctx, wdst, wsrc);
94 else
95 blowfish_ecb_dec_32way(ctx, wdst, wsrc);
96
97 wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
98 wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
99 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
100 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
101
102 if (nbytes < bsize)
103 goto done;
104 }
105
106 /* Process multi-block batch */
107 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
108 do {
109 if (enc)
110 blowfish_enc_blk_4way(ctx, wdst, wsrc);
111 else
112 blowfish_dec_blk_4way(ctx, wdst, wsrc);
113
114 wsrc += bsize * BF_PARALLEL_BLOCKS;
115 wdst += bsize * BF_PARALLEL_BLOCKS;
116 nbytes -= bsize * BF_PARALLEL_BLOCKS;
117 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
118
119 if (nbytes < bsize)
120 goto done;
121 }
122
123 /* Handle leftovers */
124 do {
125 if (enc)
126 blowfish_enc_blk(ctx, wdst, wsrc);
127 else
128 blowfish_dec_blk(ctx, wdst, wsrc);
129
130 wsrc += bsize;
131 wdst += bsize;
132 nbytes -= bsize;
133 } while (nbytes >= bsize);
134
135done:
136 err = blkcipher_walk_done(desc, walk, nbytes);
137 }
138
139 bf_fpu_end(fpu_enabled);
140 return err;
141}
142
143static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
144 struct scatterlist *src, unsigned int nbytes)
145{
146 struct blkcipher_walk walk;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 return ecb_crypt(desc, &walk, true);
150}
151
152static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153 struct scatterlist *src, unsigned int nbytes)
154{
155 struct blkcipher_walk walk;
156
157 blkcipher_walk_init(&walk, dst, src, nbytes);
158 return ecb_crypt(desc, &walk, false);
159}
160
161static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
162 struct blkcipher_walk *walk)
163{
164 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
165 unsigned int bsize = BF_BLOCK_SIZE;
166 unsigned int nbytes = walk->nbytes;
167 u64 *src = (u64 *)walk->src.virt.addr;
168 u64 *dst = (u64 *)walk->dst.virt.addr;
169 u64 *iv = (u64 *)walk->iv;
170
171 do {
172 *dst = *src ^ *iv;
173 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
174 iv = dst;
175
176 src += 1;
177 dst += 1;
178 nbytes -= bsize;
179 } while (nbytes >= bsize);
180
181 *(u64 *)walk->iv = *iv;
182 return nbytes;
183}
184
185static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186 struct scatterlist *src, unsigned int nbytes)
187{
188 struct blkcipher_walk walk;
189 int err;
190
191 blkcipher_walk_init(&walk, dst, src, nbytes);
192 err = blkcipher_walk_virt(desc, &walk);
193
194 while ((nbytes = walk.nbytes)) {
195 nbytes = __cbc_encrypt(desc, &walk);
196 err = blkcipher_walk_done(desc, &walk, nbytes);
197 }
198
199 return err;
200}
201
202static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
203 struct blkcipher_walk *walk)
204{
205 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
206 const unsigned int bsize = BF_BLOCK_SIZE;
207 unsigned int nbytes = walk->nbytes;
208 u64 *src = (u64 *)walk->src.virt.addr;
209 u64 *dst = (u64 *)walk->dst.virt.addr;
210 u64 last_iv;
211 int i;
212
213 /* Start of the last block. */
214 src += nbytes / bsize - 1;
215 dst += nbytes / bsize - 1;
216
217 last_iv = *src;
218
219 /* Process multi-block AVX2 batch */
220 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
221 do {
222 nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
223 src -= BF_AVX2_PARALLEL_BLOCKS - 1;
224 dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
225
226 blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
227
228 nbytes -= bsize;
229 if (nbytes < bsize)
230 goto done;
231
232 *dst ^= *(src - 1);
233 src -= 1;
234 dst -= 1;
235 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
236
237 if (nbytes < bsize)
238 goto done;
239 }
240
241 /* Process multi-block batch */
242 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
243 u64 ivs[BF_PARALLEL_BLOCKS - 1];
244
245 do {
246 nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
247 src -= BF_PARALLEL_BLOCKS - 1;
248 dst -= BF_PARALLEL_BLOCKS - 1;
249
250 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
251 ivs[i] = src[i];
252
253 blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
254
255 for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
256 dst[i + 1] ^= ivs[i];
257
258 nbytes -= bsize;
259 if (nbytes < bsize)
260 goto done;
261
262 *dst ^= *(src - 1);
263 src -= 1;
264 dst -= 1;
265 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
266
267 if (nbytes < bsize)
268 goto done;
269 }
270
271 /* Handle leftovers */
272 for (;;) {
273 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
274
275 nbytes -= bsize;
276 if (nbytes < bsize)
277 break;
278
279 *dst ^= *(src - 1);
280 src -= 1;
281 dst -= 1;
282 }
283
284done:
285 *dst ^= *(u64 *)walk->iv;
286 *(u64 *)walk->iv = last_iv;
287
288 return nbytes;
289}
290
291static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 bool fpu_enabled = false;
295 struct blkcipher_walk walk;
296 int err;
297
298 blkcipher_walk_init(&walk, dst, src, nbytes);
299 err = blkcipher_walk_virt(desc, &walk);
300 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301
302 while ((nbytes = walk.nbytes)) {
303 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
304 nbytes = __cbc_decrypt(desc, &walk);
305 err = blkcipher_walk_done(desc, &walk, nbytes);
306 }
307
308 bf_fpu_end(fpu_enabled);
309 return err;
310}
311
312static void ctr_crypt_final(struct blkcipher_desc *desc,
313 struct blkcipher_walk *walk)
314{
315 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
316 u8 *ctrblk = walk->iv;
317 u8 keystream[BF_BLOCK_SIZE];
318 u8 *src = walk->src.virt.addr;
319 u8 *dst = walk->dst.virt.addr;
320 unsigned int nbytes = walk->nbytes;
321
322 blowfish_enc_blk(ctx, keystream, ctrblk);
323 crypto_xor(keystream, src, nbytes);
324 memcpy(dst, keystream, nbytes);
325
326 crypto_inc(ctrblk, BF_BLOCK_SIZE);
327}
328
329static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
330 struct blkcipher_walk *walk)
331{
332 struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333 unsigned int bsize = BF_BLOCK_SIZE;
334 unsigned int nbytes = walk->nbytes;
335 u64 *src = (u64 *)walk->src.virt.addr;
336 u64 *dst = (u64 *)walk->dst.virt.addr;
337 int i;
338
339 /* Process multi-block AVX2 batch */
340 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
341 do {
342 blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
343 (__be64 *)walk->iv);
344
345 src += BF_AVX2_PARALLEL_BLOCKS;
346 dst += BF_AVX2_PARALLEL_BLOCKS;
347 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
348 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
349
350 if (nbytes < bsize)
351 goto done;
352 }
353
354 /* Process four block batch */
355 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
356 __be64 ctrblocks[BF_PARALLEL_BLOCKS];
357 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
358
359 do {
360 /* create ctrblks for parallel encrypt */
361 for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
362 if (dst != src)
363 dst[i] = src[i];
364
365 ctrblocks[i] = cpu_to_be64(ctrblk++);
366 }
367
368 blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
369 (u8 *)ctrblocks);
370
371 src += BF_PARALLEL_BLOCKS;
372 dst += BF_PARALLEL_BLOCKS;
373 nbytes -= bsize * BF_PARALLEL_BLOCKS;
374 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
375
376 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
377
378 if (nbytes < bsize)
379 goto done;
380 }
381
382 /* Handle leftovers */
383 do {
384 u64 ctrblk;
385
386 if (dst != src)
387 *dst = *src;
388
389 ctrblk = *(u64 *)walk->iv;
390 be64_add_cpu((__be64 *)walk->iv, 1);
391
392 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
393
394 src += 1;
395 dst += 1;
396 } while ((nbytes -= bsize) >= bsize);
397
398done:
399 return nbytes;
400}
401
402static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
403 struct scatterlist *src, unsigned int nbytes)
404{
405 bool fpu_enabled = false;
406 struct blkcipher_walk walk;
407 int err;
408
409 blkcipher_walk_init(&walk, dst, src, nbytes);
410 err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
411 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
412
413 while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
414 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
415 nbytes = __ctr_crypt(desc, &walk);
416 err = blkcipher_walk_done(desc, &walk, nbytes);
417 }
418
419 bf_fpu_end(fpu_enabled);
420
421 if (walk.nbytes) {
422 ctr_crypt_final(desc, &walk);
423 err = blkcipher_walk_done(desc, &walk, 0);
424 }
425
426 return err;
427}
428
429static struct crypto_alg bf_algs[6] = { {
430 .cra_name = "__ecb-blowfish-avx2",
431 .cra_driver_name = "__driver-ecb-blowfish-avx2",
432 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = BF_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct bf_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = BF_MIN_KEY_SIZE,
442 .max_keysize = BF_MAX_KEY_SIZE,
443 .setkey = blowfish_setkey,
444 .encrypt = ecb_encrypt,
445 .decrypt = ecb_decrypt,
446 },
447 },
448}, {
449 .cra_name = "__cbc-blowfish-avx2",
450 .cra_driver_name = "__driver-cbc-blowfish-avx2",
451 .cra_priority = 0,
452 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
453 .cra_blocksize = BF_BLOCK_SIZE,
454 .cra_ctxsize = sizeof(struct bf_ctx),
455 .cra_alignmask = 0,
456 .cra_type = &crypto_blkcipher_type,
457 .cra_module = THIS_MODULE,
458 .cra_u = {
459 .blkcipher = {
460 .min_keysize = BF_MIN_KEY_SIZE,
461 .max_keysize = BF_MAX_KEY_SIZE,
462 .setkey = blowfish_setkey,
463 .encrypt = cbc_encrypt,
464 .decrypt = cbc_decrypt,
465 },
466 },
467}, {
468 .cra_name = "__ctr-blowfish-avx2",
469 .cra_driver_name = "__driver-ctr-blowfish-avx2",
470 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
472 .cra_blocksize = 1,
473 .cra_ctxsize = sizeof(struct bf_ctx),
474 .cra_alignmask = 0,
475 .cra_type = &crypto_blkcipher_type,
476 .cra_module = THIS_MODULE,
477 .cra_u = {
478 .blkcipher = {
479 .min_keysize = BF_MIN_KEY_SIZE,
480 .max_keysize = BF_MAX_KEY_SIZE,
481 .ivsize = BF_BLOCK_SIZE,
482 .setkey = blowfish_setkey,
483 .encrypt = ctr_crypt,
484 .decrypt = ctr_crypt,
485 },
486 },
487}, {
488 .cra_name = "ecb(blowfish)",
489 .cra_driver_name = "ecb-blowfish-avx2",
490 .cra_priority = 400,
491 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
492 .cra_blocksize = BF_BLOCK_SIZE,
493 .cra_ctxsize = sizeof(struct async_helper_ctx),
494 .cra_alignmask = 0,
495 .cra_type = &crypto_ablkcipher_type,
496 .cra_module = THIS_MODULE,
497 .cra_init = ablk_init,
498 .cra_exit = ablk_exit,
499 .cra_u = {
500 .ablkcipher = {
501 .min_keysize = BF_MIN_KEY_SIZE,
502 .max_keysize = BF_MAX_KEY_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_decrypt,
506 },
507 },
508}, {
509 .cra_name = "cbc(blowfish)",
510 .cra_driver_name = "cbc-blowfish-avx2",
511 .cra_priority = 400,
512 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513 .cra_blocksize = BF_BLOCK_SIZE,
514 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_alignmask = 0,
516 .cra_type = &crypto_ablkcipher_type,
517 .cra_module = THIS_MODULE,
518 .cra_init = ablk_init,
519 .cra_exit = ablk_exit,
520 .cra_u = {
521 .ablkcipher = {
522 .min_keysize = BF_MIN_KEY_SIZE,
523 .max_keysize = BF_MAX_KEY_SIZE,
524 .ivsize = BF_BLOCK_SIZE,
525 .setkey = ablk_set_key,
526 .encrypt = __ablk_encrypt,
527 .decrypt = ablk_decrypt,
528 },
529 },
530}, {
531 .cra_name = "ctr(blowfish)",
532 .cra_driver_name = "ctr-blowfish-avx2",
533 .cra_priority = 400,
534 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
535 .cra_blocksize = 1,
536 .cra_ctxsize = sizeof(struct async_helper_ctx),
537 .cra_alignmask = 0,
538 .cra_type = &crypto_ablkcipher_type,
539 .cra_module = THIS_MODULE,
540 .cra_init = ablk_init,
541 .cra_exit = ablk_exit,
542 .cra_u = {
543 .ablkcipher = {
544 .min_keysize = BF_MIN_KEY_SIZE,
545 .max_keysize = BF_MAX_KEY_SIZE,
546 .ivsize = BF_BLOCK_SIZE,
547 .setkey = ablk_set_key,
548 .encrypt = ablk_encrypt,
549 .decrypt = ablk_encrypt,
550 .geniv = "chainiv",
551 },
552 },
553} };
554
555
556static int __init init(void)
557{
558 u64 xcr0;
559
560 if (!cpu_has_avx2 || !cpu_has_osxsave) {
561 pr_info("AVX2 instructions are not detected.\n");
562 return -ENODEV;
563 }
564
565 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
566 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
567 pr_info("AVX detected but unusable.\n");
568 return -ENODEV;
569 }
570
571 return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
572}
573
574static void __exit fini(void)
575{
576 crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
577}
578
579module_init(init);
580module_exit(fini);
581
582MODULE_LICENSE("GPL");
583MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584MODULE_ALIAS("blowfish");
585MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 50ec333b70e6..3548d76dbaa9 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for assembler optimized version of Blowfish 2 * Glue Code for assembler optimized version of Blowfish
3 * 3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,40 +32,24 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/types.h> 33#include <linux/types.h>
34#include <crypto/algapi.h> 34#include <crypto/algapi.h>
35#include <asm/crypto/blowfish.h>
35 36
36/* regular block cipher functions */ 37/* regular block cipher functions */
37asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 38asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
38 bool xor); 39 bool xor);
40EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
41
39asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 42asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
43EXPORT_SYMBOL_GPL(blowfish_dec_blk);
40 44
41/* 4-way parallel cipher functions */ 45/* 4-way parallel cipher functions */
42asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 46asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
43 const u8 *src, bool xor); 47 const u8 *src, bool xor);
48EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
49
44asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 50asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
45 const u8 *src); 51 const u8 *src);
46 52EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
47static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
48{
49 __blowfish_enc_blk(ctx, dst, src, false);
50}
51
52static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 __blowfish_enc_blk(ctx, dst, src, true);
56}
57
58static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
59 const u8 *src)
60{
61 __blowfish_enc_blk_4way(ctx, dst, src, false);
62}
63
64static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
65 const u8 *src)
66{
67 __blowfish_enc_blk_4way(ctx, dst, src, true);
68}
69 53
70static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 54static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
71{ 55{
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index cfc163469c71..ce71f9212409 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * x86_64/AVX/AES-NI assembler implementation of Camellia 2 * x86_64/AVX/AES-NI assembler implementation of Camellia
3 * 3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -589,6 +589,10 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
589.Lbswap128_mask: 589.Lbswap128_mask:
590 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 590 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
591 591
592/* For XTS mode IV generation */
593.Lxts_gf128mul_and_shl1_mask:
594 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
595
592/* 596/*
593 * pre-SubByte transform 597 * pre-SubByte transform
594 * 598 *
@@ -1090,3 +1094,177 @@ ENTRY(camellia_ctr_16way)
1090 1094
1091 ret; 1095 ret;
1092ENDPROC(camellia_ctr_16way) 1096ENDPROC(camellia_ctr_16way)
1097
1098#define gf128mul_x_ble(iv, mask, tmp) \
1099 vpsrad $31, iv, tmp; \
1100 vpaddq iv, iv, iv; \
1101 vpshufd $0x13, tmp, tmp; \
1102 vpand mask, tmp, tmp; \
1103 vpxor tmp, iv, iv;
1104
1105.align 8
1106camellia_xts_crypt_16way:
1107 /* input:
1108 * %rdi: ctx, CTX
1109 * %rsi: dst (16 blocks)
1110 * %rdx: src (16 blocks)
1111 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1112 * %r8: index for input whitening key
1113 * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
1114 */
1115
1116 subq $(16 * 16), %rsp;
1117 movq %rsp, %rax;
1118
1119 vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
1120
1121 /* load IV */
1122 vmovdqu (%rcx), %xmm0;
1123 vpxor 0 * 16(%rdx), %xmm0, %xmm15;
1124 vmovdqu %xmm15, 15 * 16(%rax);
1125 vmovdqu %xmm0, 0 * 16(%rsi);
1126
1127 /* construct IVs */
1128 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1129 vpxor 1 * 16(%rdx), %xmm0, %xmm15;
1130 vmovdqu %xmm15, 14 * 16(%rax);
1131 vmovdqu %xmm0, 1 * 16(%rsi);
1132
1133 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1134 vpxor 2 * 16(%rdx), %xmm0, %xmm13;
1135 vmovdqu %xmm0, 2 * 16(%rsi);
1136
1137 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1138 vpxor 3 * 16(%rdx), %xmm0, %xmm12;
1139 vmovdqu %xmm0, 3 * 16(%rsi);
1140
1141 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1142 vpxor 4 * 16(%rdx), %xmm0, %xmm11;
1143 vmovdqu %xmm0, 4 * 16(%rsi);
1144
1145 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1146 vpxor 5 * 16(%rdx), %xmm0, %xmm10;
1147 vmovdqu %xmm0, 5 * 16(%rsi);
1148
1149 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1150 vpxor 6 * 16(%rdx), %xmm0, %xmm9;
1151 vmovdqu %xmm0, 6 * 16(%rsi);
1152
1153 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1154 vpxor 7 * 16(%rdx), %xmm0, %xmm8;
1155 vmovdqu %xmm0, 7 * 16(%rsi);
1156
1157 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1158 vpxor 8 * 16(%rdx), %xmm0, %xmm7;
1159 vmovdqu %xmm0, 8 * 16(%rsi);
1160
1161 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1162 vpxor 9 * 16(%rdx), %xmm0, %xmm6;
1163 vmovdqu %xmm0, 9 * 16(%rsi);
1164
1165 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1166 vpxor 10 * 16(%rdx), %xmm0, %xmm5;
1167 vmovdqu %xmm0, 10 * 16(%rsi);
1168
1169 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1170 vpxor 11 * 16(%rdx), %xmm0, %xmm4;
1171 vmovdqu %xmm0, 11 * 16(%rsi);
1172
1173 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1174 vpxor 12 * 16(%rdx), %xmm0, %xmm3;
1175 vmovdqu %xmm0, 12 * 16(%rsi);
1176
1177 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1178 vpxor 13 * 16(%rdx), %xmm0, %xmm2;
1179 vmovdqu %xmm0, 13 * 16(%rsi);
1180
1181 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1182 vpxor 14 * 16(%rdx), %xmm0, %xmm1;
1183 vmovdqu %xmm0, 14 * 16(%rsi);
1184
1185 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1186 vpxor 15 * 16(%rdx), %xmm0, %xmm15;
1187 vmovdqu %xmm15, 0 * 16(%rax);
1188 vmovdqu %xmm0, 15 * 16(%rsi);
1189
1190 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1191 vmovdqu %xmm0, (%rcx);
1192
1193 /* inpack16_pre: */
1194 vmovq (key_table)(CTX, %r8, 8), %xmm15;
1195 vpshufb .Lpack_bswap, %xmm15, %xmm15;
1196 vpxor 0 * 16(%rax), %xmm15, %xmm0;
1197 vpxor %xmm1, %xmm15, %xmm1;
1198 vpxor %xmm2, %xmm15, %xmm2;
1199 vpxor %xmm3, %xmm15, %xmm3;
1200 vpxor %xmm4, %xmm15, %xmm4;
1201 vpxor %xmm5, %xmm15, %xmm5;
1202 vpxor %xmm6, %xmm15, %xmm6;
1203 vpxor %xmm7, %xmm15, %xmm7;
1204 vpxor %xmm8, %xmm15, %xmm8;
1205 vpxor %xmm9, %xmm15, %xmm9;
1206 vpxor %xmm10, %xmm15, %xmm10;
1207 vpxor %xmm11, %xmm15, %xmm11;
1208 vpxor %xmm12, %xmm15, %xmm12;
1209 vpxor %xmm13, %xmm15, %xmm13;
1210 vpxor 14 * 16(%rax), %xmm15, %xmm14;
1211 vpxor 15 * 16(%rax), %xmm15, %xmm15;
1212
1213 call *%r9;
1214
1215 addq $(16 * 16), %rsp;
1216
1217 vpxor 0 * 16(%rsi), %xmm7, %xmm7;
1218 vpxor 1 * 16(%rsi), %xmm6, %xmm6;
1219 vpxor 2 * 16(%rsi), %xmm5, %xmm5;
1220 vpxor 3 * 16(%rsi), %xmm4, %xmm4;
1221 vpxor 4 * 16(%rsi), %xmm3, %xmm3;
1222 vpxor 5 * 16(%rsi), %xmm2, %xmm2;
1223 vpxor 6 * 16(%rsi), %xmm1, %xmm1;
1224 vpxor 7 * 16(%rsi), %xmm0, %xmm0;
1225 vpxor 8 * 16(%rsi), %xmm15, %xmm15;
1226 vpxor 9 * 16(%rsi), %xmm14, %xmm14;
1227 vpxor 10 * 16(%rsi), %xmm13, %xmm13;
1228 vpxor 11 * 16(%rsi), %xmm12, %xmm12;
1229 vpxor 12 * 16(%rsi), %xmm11, %xmm11;
1230 vpxor 13 * 16(%rsi), %xmm10, %xmm10;
1231 vpxor 14 * 16(%rsi), %xmm9, %xmm9;
1232 vpxor 15 * 16(%rsi), %xmm8, %xmm8;
1233 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
1234 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
1235 %xmm8, %rsi);
1236
1237 ret;
1238ENDPROC(camellia_xts_crypt_16way)
1239
1240ENTRY(camellia_xts_enc_16way)
1241 /* input:
1242 * %rdi: ctx, CTX
1243 * %rsi: dst (16 blocks)
1244 * %rdx: src (16 blocks)
1245 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1246 */
1247 xorl %r8d, %r8d; /* input whitening key, 0 for enc */
1248
1249 leaq __camellia_enc_blk16, %r9;
1250
1251 jmp camellia_xts_crypt_16way;
1252ENDPROC(camellia_xts_enc_16way)
1253
1254ENTRY(camellia_xts_dec_16way)
1255 /* input:
1256 * %rdi: ctx, CTX
1257 * %rsi: dst (16 blocks)
1258 * %rdx: src (16 blocks)
1259 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1260 */
1261
1262 cmpl $16, key_length(CTX);
1263 movl $32, %r8d;
1264 movl $24, %eax;
1265 cmovel %eax, %r8d; /* input whitening key, last for dec */
1266
1267 leaq __camellia_dec_blk16, %r9;
1268
1269 jmp camellia_xts_crypt_16way;
1270ENDPROC(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
new file mode 100644
index 000000000000..91a1878fcc3e
--- /dev/null
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -0,0 +1,1368 @@
1/*
2 * x86_64/AVX2/AES-NI assembler implementation of Camellia
3 *
4 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14
15#define CAMELLIA_TABLE_BYTE_LEN 272
16
17/* struct camellia_ctx: */
18#define key_table 0
19#define key_length CAMELLIA_TABLE_BYTE_LEN
20
21/* register macros */
22#define CTX %rdi
23#define RIO %r8
24
25/**********************************************************************
26 helper macros
27 **********************************************************************/
28#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
29 vpand x, mask4bit, tmp0; \
30 vpandn x, mask4bit, x; \
31 vpsrld $4, x, x; \
32 \
33 vpshufb tmp0, lo_t, tmp0; \
34 vpshufb x, hi_t, x; \
35 vpxor tmp0, x, x;
36
37#define ymm0_x xmm0
38#define ymm1_x xmm1
39#define ymm2_x xmm2
40#define ymm3_x xmm3
41#define ymm4_x xmm4
42#define ymm5_x xmm5
43#define ymm6_x xmm6
44#define ymm7_x xmm7
45#define ymm8_x xmm8
46#define ymm9_x xmm9
47#define ymm10_x xmm10
48#define ymm11_x xmm11
49#define ymm12_x xmm12
50#define ymm13_x xmm13
51#define ymm14_x xmm14
52#define ymm15_x xmm15
53
54/*
55 * AES-NI instructions do not support ymmX registers, so we need splitting and
56 * merging.
57 */
58#define vaesenclast256(zero, yreg, tmp) \
59 vextracti128 $1, yreg, tmp##_x; \
60 vaesenclast zero##_x, yreg##_x, yreg##_x; \
61 vaesenclast zero##_x, tmp##_x, tmp##_x; \
62 vinserti128 $1, tmp##_x, yreg, yreg;
63
64/**********************************************************************
65 32-way camellia
66 **********************************************************************/
67
68/*
69 * IN:
70 * x0..x7: byte-sliced AB state
71 * mem_cd: register pointer storing CD state
72 * key: index for key material
73 * OUT:
74 * x0..x7: new byte-sliced CD state
75 */
76#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
77 t7, mem_cd, key) \
78 /* \
79 * S-function with AES subbytes \
80 */ \
81 vbroadcasti128 .Linv_shift_row, t4; \
82 vpbroadcastb .L0f0f0f0f, t7; \
83 vbroadcasti128 .Lpre_tf_lo_s1, t0; \
84 vbroadcasti128 .Lpre_tf_hi_s1, t1; \
85 \
86 /* AES inverse shift rows */ \
87 vpshufb t4, x0, x0; \
88 vpshufb t4, x7, x7; \
89 vpshufb t4, x1, x1; \
90 vpshufb t4, x4, x4; \
91 vpshufb t4, x2, x2; \
92 vpshufb t4, x5, x5; \
93 vpshufb t4, x3, x3; \
94 vpshufb t4, x6, x6; \
95 \
96 /* prefilter sboxes 1, 2 and 3 */ \
97 vbroadcasti128 .Lpre_tf_lo_s4, t2; \
98 vbroadcasti128 .Lpre_tf_hi_s4, t3; \
99 filter_8bit(x0, t0, t1, t7, t6); \
100 filter_8bit(x7, t0, t1, t7, t6); \
101 filter_8bit(x1, t0, t1, t7, t6); \
102 filter_8bit(x4, t0, t1, t7, t6); \
103 filter_8bit(x2, t0, t1, t7, t6); \
104 filter_8bit(x5, t0, t1, t7, t6); \
105 \
106 /* prefilter sbox 4 */ \
107 vpxor t4##_x, t4##_x, t4##_x; \
108 filter_8bit(x3, t2, t3, t7, t6); \
109 filter_8bit(x6, t2, t3, t7, t6); \
110 \
111 /* AES subbytes + AES shift rows */ \
112 vbroadcasti128 .Lpost_tf_lo_s1, t0; \
113 vbroadcasti128 .Lpost_tf_hi_s1, t1; \
114 vaesenclast256(t4, x0, t5); \
115 vaesenclast256(t4, x7, t5); \
116 vaesenclast256(t4, x1, t5); \
117 vaesenclast256(t4, x4, t5); \
118 vaesenclast256(t4, x2, t5); \
119 vaesenclast256(t4, x5, t5); \
120 vaesenclast256(t4, x3, t5); \
121 vaesenclast256(t4, x6, t5); \
122 \
123 /* postfilter sboxes 1 and 4 */ \
124 vbroadcasti128 .Lpost_tf_lo_s3, t2; \
125 vbroadcasti128 .Lpost_tf_hi_s3, t3; \
126 filter_8bit(x0, t0, t1, t7, t6); \
127 filter_8bit(x7, t0, t1, t7, t6); \
128 filter_8bit(x3, t0, t1, t7, t6); \
129 filter_8bit(x6, t0, t1, t7, t6); \
130 \
131 /* postfilter sbox 3 */ \
132 vbroadcasti128 .Lpost_tf_lo_s2, t4; \
133 vbroadcasti128 .Lpost_tf_hi_s2, t5; \
134 filter_8bit(x2, t2, t3, t7, t6); \
135 filter_8bit(x5, t2, t3, t7, t6); \
136 \
137 vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \
138 \
139 /* postfilter sbox 2 */ \
140 filter_8bit(x1, t4, t5, t7, t2); \
141 filter_8bit(x4, t4, t5, t7, t2); \
142 \
143 vpsrldq $1, t0, t1; \
144 vpsrldq $2, t0, t2; \
145 vpsrldq $3, t0, t3; \
146 vpsrldq $4, t0, t4; \
147 vpsrldq $5, t0, t5; \
148 vpsrldq $6, t0, t6; \
149 vpsrldq $7, t0, t7; \
150 vpbroadcastb t0##_x, t0; \
151 vpbroadcastb t1##_x, t1; \
152 vpbroadcastb t2##_x, t2; \
153 vpbroadcastb t3##_x, t3; \
154 vpbroadcastb t4##_x, t4; \
155 vpbroadcastb t6##_x, t6; \
156 vpbroadcastb t5##_x, t5; \
157 vpbroadcastb t7##_x, t7; \
158 \
159 /* P-function */ \
160 vpxor x5, x0, x0; \
161 vpxor x6, x1, x1; \
162 vpxor x7, x2, x2; \
163 vpxor x4, x3, x3; \
164 \
165 vpxor x2, x4, x4; \
166 vpxor x3, x5, x5; \
167 vpxor x0, x6, x6; \
168 vpxor x1, x7, x7; \
169 \
170 vpxor x7, x0, x0; \
171 vpxor x4, x1, x1; \
172 vpxor x5, x2, x2; \
173 vpxor x6, x3, x3; \
174 \
175 vpxor x3, x4, x4; \
176 vpxor x0, x5, x5; \
177 vpxor x1, x6, x6; \
178 vpxor x2, x7, x7; /* note: high and low parts swapped */ \
179 \
180 /* Add key material and result to CD (x becomes new CD) */ \
181 \
182 vpxor t7, x0, x0; \
183 vpxor 4 * 32(mem_cd), x0, x0; \
184 \
185 vpxor t6, x1, x1; \
186 vpxor 5 * 32(mem_cd), x1, x1; \
187 \
188 vpxor t5, x2, x2; \
189 vpxor 6 * 32(mem_cd), x2, x2; \
190 \
191 vpxor t4, x3, x3; \
192 vpxor 7 * 32(mem_cd), x3, x3; \
193 \
194 vpxor t3, x4, x4; \
195 vpxor 0 * 32(mem_cd), x4, x4; \
196 \
197 vpxor t2, x5, x5; \
198 vpxor 1 * 32(mem_cd), x5, x5; \
199 \
200 vpxor t1, x6, x6; \
201 vpxor 2 * 32(mem_cd), x6, x6; \
202 \
203 vpxor t0, x7, x7; \
204 vpxor 3 * 32(mem_cd), x7, x7;
205
206/*
207 * Size optimization... with inlined roundsm16 binary would be over 5 times
208 * larger and would only marginally faster.
209 */
210.align 8
211roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
212 roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
213 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
214 %rcx, (%r9));
215 ret;
216ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
217
218.align 8
219roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
220 roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
221 %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
222 %rax, (%r9));
223 ret;
224ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
225
226/*
227 * IN/OUT:
228 * x0..x7: byte-sliced AB state preloaded
229 * mem_ab: byte-sliced AB state in memory
230 * mem_cb: byte-sliced CD state in memory
231 */
232#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
233 y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
234 leaq (key_table + (i) * 8)(CTX), %r9; \
235 call roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
236 \
237 vmovdqu x0, 4 * 32(mem_cd); \
238 vmovdqu x1, 5 * 32(mem_cd); \
239 vmovdqu x2, 6 * 32(mem_cd); \
240 vmovdqu x3, 7 * 32(mem_cd); \
241 vmovdqu x4, 0 * 32(mem_cd); \
242 vmovdqu x5, 1 * 32(mem_cd); \
243 vmovdqu x6, 2 * 32(mem_cd); \
244 vmovdqu x7, 3 * 32(mem_cd); \
245 \
246 leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
247 call roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
248 \
249 store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
250
251#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
252
253#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
254 /* Store new AB state */ \
255 vmovdqu x4, 4 * 32(mem_ab); \
256 vmovdqu x5, 5 * 32(mem_ab); \
257 vmovdqu x6, 6 * 32(mem_ab); \
258 vmovdqu x7, 7 * 32(mem_ab); \
259 vmovdqu x0, 0 * 32(mem_ab); \
260 vmovdqu x1, 1 * 32(mem_ab); \
261 vmovdqu x2, 2 * 32(mem_ab); \
262 vmovdqu x3, 3 * 32(mem_ab);
263
264#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
265 y6, y7, mem_ab, mem_cd, i) \
266 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
267 y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
268 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
269 y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
270 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
271 y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
272
273#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
274 y6, y7, mem_ab, mem_cd, i) \
275 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
276 y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
277 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
278 y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
279 two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
280 y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
281
282/*
283 * IN:
284 * v0..3: byte-sliced 32-bit integers
285 * OUT:
286 * v0..3: (IN <<< 1)
287 */
288#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \
289 vpcmpgtb v0, zero, t0; \
290 vpaddb v0, v0, v0; \
291 vpabsb t0, t0; \
292 \
293 vpcmpgtb v1, zero, t1; \
294 vpaddb v1, v1, v1; \
295 vpabsb t1, t1; \
296 \
297 vpcmpgtb v2, zero, t2; \
298 vpaddb v2, v2, v2; \
299 vpabsb t2, t2; \
300 \
301 vpor t0, v1, v1; \
302 \
303 vpcmpgtb v3, zero, t0; \
304 vpaddb v3, v3, v3; \
305 vpabsb t0, t0; \
306 \
307 vpor t1, v2, v2; \
308 vpor t2, v3, v3; \
309 vpor t0, v0, v0;
310
311/*
312 * IN:
313 * r: byte-sliced AB state in memory
314 * l: byte-sliced CD state in memory
315 * OUT:
316 * x0..x7: new byte-sliced CD state
317 */
318#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
319 tt1, tt2, tt3, kll, klr, krl, krr) \
320 /* \
321 * t0 = kll; \
322 * t0 &= ll; \
323 * lr ^= rol32(t0, 1); \
324 */ \
325 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
326 vpxor tt0, tt0, tt0; \
327 vpbroadcastb t0##_x, t3; \
328 vpsrldq $1, t0, t0; \
329 vpbroadcastb t0##_x, t2; \
330 vpsrldq $1, t0, t0; \
331 vpbroadcastb t0##_x, t1; \
332 vpsrldq $1, t0, t0; \
333 vpbroadcastb t0##_x, t0; \
334 \
335 vpand l0, t0, t0; \
336 vpand l1, t1, t1; \
337 vpand l2, t2, t2; \
338 vpand l3, t3, t3; \
339 \
340 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
341 \
342 vpxor l4, t0, l4; \
343 vmovdqu l4, 4 * 32(l); \
344 vpxor l5, t1, l5; \
345 vmovdqu l5, 5 * 32(l); \
346 vpxor l6, t2, l6; \
347 vmovdqu l6, 6 * 32(l); \
348 vpxor l7, t3, l7; \
349 vmovdqu l7, 7 * 32(l); \
350 \
351 /* \
352 * t2 = krr; \
353 * t2 |= rr; \
354 * rl ^= t2; \
355 */ \
356 \
357 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
358 vpbroadcastb t0##_x, t3; \
359 vpsrldq $1, t0, t0; \
360 vpbroadcastb t0##_x, t2; \
361 vpsrldq $1, t0, t0; \
362 vpbroadcastb t0##_x, t1; \
363 vpsrldq $1, t0, t0; \
364 vpbroadcastb t0##_x, t0; \
365 \
366 vpor 4 * 32(r), t0, t0; \
367 vpor 5 * 32(r), t1, t1; \
368 vpor 6 * 32(r), t2, t2; \
369 vpor 7 * 32(r), t3, t3; \
370 \
371 vpxor 0 * 32(r), t0, t0; \
372 vpxor 1 * 32(r), t1, t1; \
373 vpxor 2 * 32(r), t2, t2; \
374 vpxor 3 * 32(r), t3, t3; \
375 vmovdqu t0, 0 * 32(r); \
376 vmovdqu t1, 1 * 32(r); \
377 vmovdqu t2, 2 * 32(r); \
378 vmovdqu t3, 3 * 32(r); \
379 \
380 /* \
381 * t2 = krl; \
382 * t2 &= rl; \
383 * rr ^= rol32(t2, 1); \
384 */ \
385 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
386 vpbroadcastb t0##_x, t3; \
387 vpsrldq $1, t0, t0; \
388 vpbroadcastb t0##_x, t2; \
389 vpsrldq $1, t0, t0; \
390 vpbroadcastb t0##_x, t1; \
391 vpsrldq $1, t0, t0; \
392 vpbroadcastb t0##_x, t0; \
393 \
394 vpand 0 * 32(r), t0, t0; \
395 vpand 1 * 32(r), t1, t1; \
396 vpand 2 * 32(r), t2, t2; \
397 vpand 3 * 32(r), t3, t3; \
398 \
399 rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
400 \
401 vpxor 4 * 32(r), t0, t0; \
402 vpxor 5 * 32(r), t1, t1; \
403 vpxor 6 * 32(r), t2, t2; \
404 vpxor 7 * 32(r), t3, t3; \
405 vmovdqu t0, 4 * 32(r); \
406 vmovdqu t1, 5 * 32(r); \
407 vmovdqu t2, 6 * 32(r); \
408 vmovdqu t3, 7 * 32(r); \
409 \
410 /* \
411 * t0 = klr; \
412 * t0 |= lr; \
413 * ll ^= t0; \
414 */ \
415 \
416 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
417 vpbroadcastb t0##_x, t3; \
418 vpsrldq $1, t0, t0; \
419 vpbroadcastb t0##_x, t2; \
420 vpsrldq $1, t0, t0; \
421 vpbroadcastb t0##_x, t1; \
422 vpsrldq $1, t0, t0; \
423 vpbroadcastb t0##_x, t0; \
424 \
425 vpor l4, t0, t0; \
426 vpor l5, t1, t1; \
427 vpor l6, t2, t2; \
428 vpor l7, t3, t3; \
429 \
430 vpxor l0, t0, l0; \
431 vmovdqu l0, 0 * 32(l); \
432 vpxor l1, t1, l1; \
433 vmovdqu l1, 1 * 32(l); \
434 vpxor l2, t2, l2; \
435 vmovdqu l2, 2 * 32(l); \
436 vpxor l3, t3, l3; \
437 vmovdqu l3, 3 * 32(l);
438
439#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
440 vpunpckhdq x1, x0, t2; \
441 vpunpckldq x1, x0, x0; \
442 \
443 vpunpckldq x3, x2, t1; \
444 vpunpckhdq x3, x2, x2; \
445 \
446 vpunpckhqdq t1, x0, x1; \
447 vpunpcklqdq t1, x0, x0; \
448 \
449 vpunpckhqdq x2, t2, x3; \
450 vpunpcklqdq x2, t2, x2;
451
452#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
453 a3, b3, c3, d3, st0, st1) \
454 vmovdqu d2, st0; \
455 vmovdqu d3, st1; \
456 transpose_4x4(a0, a1, a2, a3, d2, d3); \
457 transpose_4x4(b0, b1, b2, b3, d2, d3); \
458 vmovdqu st0, d2; \
459 vmovdqu st1, d3; \
460 \
461 vmovdqu a0, st0; \
462 vmovdqu a1, st1; \
463 transpose_4x4(c0, c1, c2, c3, a0, a1); \
464 transpose_4x4(d0, d1, d2, d3, a0, a1); \
465 \
466 vbroadcasti128 .Lshufb_16x16b, a0; \
467 vmovdqu st1, a1; \
468 vpshufb a0, a2, a2; \
469 vpshufb a0, a3, a3; \
470 vpshufb a0, b0, b0; \
471 vpshufb a0, b1, b1; \
472 vpshufb a0, b2, b2; \
473 vpshufb a0, b3, b3; \
474 vpshufb a0, a1, a1; \
475 vpshufb a0, c0, c0; \
476 vpshufb a0, c1, c1; \
477 vpshufb a0, c2, c2; \
478 vpshufb a0, c3, c3; \
479 vpshufb a0, d0, d0; \
480 vpshufb a0, d1, d1; \
481 vpshufb a0, d2, d2; \
482 vpshufb a0, d3, d3; \
483 vmovdqu d3, st1; \
484 vmovdqu st0, d3; \
485 vpshufb a0, d3, a0; \
486 vmovdqu d2, st0; \
487 \
488 transpose_4x4(a0, b0, c0, d0, d2, d3); \
489 transpose_4x4(a1, b1, c1, d1, d2, d3); \
490 vmovdqu st0, d2; \
491 vmovdqu st1, d3; \
492 \
493 vmovdqu b0, st0; \
494 vmovdqu b1, st1; \
495 transpose_4x4(a2, b2, c2, d2, b0, b1); \
496 transpose_4x4(a3, b3, c3, d3, b0, b1); \
497 vmovdqu st0, b0; \
498 vmovdqu st1, b1; \
499 /* does not adjust output bytes inside vectors */
500
501/* load blocks to registers and apply pre-whitening */
502#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
503 y6, y7, rio, key) \
504 vpbroadcastq key, x0; \
505 vpshufb .Lpack_bswap, x0, x0; \
506 \
507 vpxor 0 * 32(rio), x0, y7; \
508 vpxor 1 * 32(rio), x0, y6; \
509 vpxor 2 * 32(rio), x0, y5; \
510 vpxor 3 * 32(rio), x0, y4; \
511 vpxor 4 * 32(rio), x0, y3; \
512 vpxor 5 * 32(rio), x0, y2; \
513 vpxor 6 * 32(rio), x0, y1; \
514 vpxor 7 * 32(rio), x0, y0; \
515 vpxor 8 * 32(rio), x0, x7; \
516 vpxor 9 * 32(rio), x0, x6; \
517 vpxor 10 * 32(rio), x0, x5; \
518 vpxor 11 * 32(rio), x0, x4; \
519 vpxor 12 * 32(rio), x0, x3; \
520 vpxor 13 * 32(rio), x0, x2; \
521 vpxor 14 * 32(rio), x0, x1; \
522 vpxor 15 * 32(rio), x0, x0;
523
524/* byteslice pre-whitened blocks and store to temporary memory */
525#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
526 y6, y7, mem_ab, mem_cd) \
527 byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
528 y4, y5, y6, y7, (mem_ab), (mem_cd)); \
529 \
530 vmovdqu x0, 0 * 32(mem_ab); \
531 vmovdqu x1, 1 * 32(mem_ab); \
532 vmovdqu x2, 2 * 32(mem_ab); \
533 vmovdqu x3, 3 * 32(mem_ab); \
534 vmovdqu x4, 4 * 32(mem_ab); \
535 vmovdqu x5, 5 * 32(mem_ab); \
536 vmovdqu x6, 6 * 32(mem_ab); \
537 vmovdqu x7, 7 * 32(mem_ab); \
538 vmovdqu y0, 0 * 32(mem_cd); \
539 vmovdqu y1, 1 * 32(mem_cd); \
540 vmovdqu y2, 2 * 32(mem_cd); \
541 vmovdqu y3, 3 * 32(mem_cd); \
542 vmovdqu y4, 4 * 32(mem_cd); \
543 vmovdqu y5, 5 * 32(mem_cd); \
544 vmovdqu y6, 6 * 32(mem_cd); \
545 vmovdqu y7, 7 * 32(mem_cd);
546
547/* de-byteslice, apply post-whitening and store blocks */
548#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
549 y5, y6, y7, key, stack_tmp0, stack_tmp1) \
550 byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
551 y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
552 \
553 vmovdqu x0, stack_tmp0; \
554 \
555 vpbroadcastq key, x0; \
556 vpshufb .Lpack_bswap, x0, x0; \
557 \
558 vpxor x0, y7, y7; \
559 vpxor x0, y6, y6; \
560 vpxor x0, y5, y5; \
561 vpxor x0, y4, y4; \
562 vpxor x0, y3, y3; \
563 vpxor x0, y2, y2; \
564 vpxor x0, y1, y1; \
565 vpxor x0, y0, y0; \
566 vpxor x0, x7, x7; \
567 vpxor x0, x6, x6; \
568 vpxor x0, x5, x5; \
569 vpxor x0, x4, x4; \
570 vpxor x0, x3, x3; \
571 vpxor x0, x2, x2; \
572 vpxor x0, x1, x1; \
573 vpxor stack_tmp0, x0, x0;
574
575#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
576 y6, y7, rio) \
577 vmovdqu x0, 0 * 32(rio); \
578 vmovdqu x1, 1 * 32(rio); \
579 vmovdqu x2, 2 * 32(rio); \
580 vmovdqu x3, 3 * 32(rio); \
581 vmovdqu x4, 4 * 32(rio); \
582 vmovdqu x5, 5 * 32(rio); \
583 vmovdqu x6, 6 * 32(rio); \
584 vmovdqu x7, 7 * 32(rio); \
585 vmovdqu y0, 8 * 32(rio); \
586 vmovdqu y1, 9 * 32(rio); \
587 vmovdqu y2, 10 * 32(rio); \
588 vmovdqu y3, 11 * 32(rio); \
589 vmovdqu y4, 12 * 32(rio); \
590 vmovdqu y5, 13 * 32(rio); \
591 vmovdqu y6, 14 * 32(rio); \
592 vmovdqu y7, 15 * 32(rio);
593
594.data
595.align 32
596
597#define SHUFB_BYTES(idx) \
598 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
599
600.Lshufb_16x16b:
601 .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
602 .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
603
604.Lpack_bswap:
605 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
606 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
607
608/* For CTR-mode IV byteswap */
609.Lbswap128_mask:
610 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
611
612/* For XTS mode */
613.Lxts_gf128mul_and_shl1_mask_0:
614 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
615.Lxts_gf128mul_and_shl1_mask_1:
616 .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
617
618/*
619 * pre-SubByte transform
620 *
621 * pre-lookup for sbox1, sbox2, sbox3:
622 * swap_bitendianness(
623 * isom_map_camellia_to_aes(
624 * camellia_f(
625 * swap_bitendianess(in)
626 * )
627 * )
628 * )
629 *
630 * (note: '⊕ 0xc5' inside camellia_f())
631 */
632.Lpre_tf_lo_s1:
633 .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
634 .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
635.Lpre_tf_hi_s1:
636 .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
637 .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
638
639/*
640 * pre-SubByte transform
641 *
642 * pre-lookup for sbox4:
643 * swap_bitendianness(
644 * isom_map_camellia_to_aes(
645 * camellia_f(
646 * swap_bitendianess(in <<< 1)
647 * )
648 * )
649 * )
650 *
651 * (note: '⊕ 0xc5' inside camellia_f())
652 */
653.Lpre_tf_lo_s4:
654 .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
655 .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
656.Lpre_tf_hi_s4:
657 .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
658 .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
659
660/*
661 * post-SubByte transform
662 *
663 * post-lookup for sbox1, sbox4:
664 * swap_bitendianness(
665 * camellia_h(
666 * isom_map_aes_to_camellia(
667 * swap_bitendianness(
668 * aes_inverse_affine_transform(in)
669 * )
670 * )
671 * )
672 * )
673 *
674 * (note: '⊕ 0x6e' inside camellia_h())
675 */
676.Lpost_tf_lo_s1:
677 .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
678 .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
679.Lpost_tf_hi_s1:
680 .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
681 .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
682
683/*
684 * post-SubByte transform
685 *
686 * post-lookup for sbox2:
687 * swap_bitendianness(
688 * camellia_h(
689 * isom_map_aes_to_camellia(
690 * swap_bitendianness(
691 * aes_inverse_affine_transform(in)
692 * )
693 * )
694 * )
695 * ) <<< 1
696 *
697 * (note: '⊕ 0x6e' inside camellia_h())
698 */
699.Lpost_tf_lo_s2:
700 .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
701 .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
702.Lpost_tf_hi_s2:
703 .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
704 .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
705
706/*
707 * post-SubByte transform
708 *
709 * post-lookup for sbox3:
710 * swap_bitendianness(
711 * camellia_h(
712 * isom_map_aes_to_camellia(
713 * swap_bitendianness(
714 * aes_inverse_affine_transform(in)
715 * )
716 * )
717 * )
718 * ) >>> 1
719 *
720 * (note: '⊕ 0x6e' inside camellia_h())
721 */
722.Lpost_tf_lo_s3:
723 .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
724 .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
725.Lpost_tf_hi_s3:
726 .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
727 .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
728
729/* For isolating SubBytes from AESENCLAST, inverse shift row */
730.Linv_shift_row:
731 .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
732 .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
733
734.align 4
735/* 4-bit mask */
736.L0f0f0f0f:
737 .long 0x0f0f0f0f
738
739.text
740
741.align 8
742__camellia_enc_blk32:
743 /* input:
744 * %rdi: ctx, CTX
745 * %rax: temporary storage, 512 bytes
746 * %ymm0..%ymm15: 32 plaintext blocks
747 * output:
748 * %ymm0..%ymm15: 32 encrypted blocks, order swapped:
749 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
750 */
751
752 leaq 8 * 32(%rax), %rcx;
753
754 inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
755 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
756 %ymm15, %rax, %rcx);
757
758 enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
759 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
760 %ymm15, %rax, %rcx, 0);
761
762 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
763 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
764 %ymm15,
765 ((key_table + (8) * 8) + 0)(CTX),
766 ((key_table + (8) * 8) + 4)(CTX),
767 ((key_table + (8) * 8) + 8)(CTX),
768 ((key_table + (8) * 8) + 12)(CTX));
769
770 enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
771 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
772 %ymm15, %rax, %rcx, 8);
773
774 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
775 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
776 %ymm15,
777 ((key_table + (16) * 8) + 0)(CTX),
778 ((key_table + (16) * 8) + 4)(CTX),
779 ((key_table + (16) * 8) + 8)(CTX),
780 ((key_table + (16) * 8) + 12)(CTX));
781
782 enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
783 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
784 %ymm15, %rax, %rcx, 16);
785
786 movl $24, %r8d;
787 cmpl $16, key_length(CTX);
788 jne .Lenc_max32;
789
790.Lenc_done:
791 /* load CD for output */
792 vmovdqu 0 * 32(%rcx), %ymm8;
793 vmovdqu 1 * 32(%rcx), %ymm9;
794 vmovdqu 2 * 32(%rcx), %ymm10;
795 vmovdqu 3 * 32(%rcx), %ymm11;
796 vmovdqu 4 * 32(%rcx), %ymm12;
797 vmovdqu 5 * 32(%rcx), %ymm13;
798 vmovdqu 6 * 32(%rcx), %ymm14;
799 vmovdqu 7 * 32(%rcx), %ymm15;
800
801 outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
802 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
803 %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
804
805 ret;
806
807.align 8
808.Lenc_max32:
809 movl $32, %r8d;
810
811 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
812 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
813 %ymm15,
814 ((key_table + (24) * 8) + 0)(CTX),
815 ((key_table + (24) * 8) + 4)(CTX),
816 ((key_table + (24) * 8) + 8)(CTX),
817 ((key_table + (24) * 8) + 12)(CTX));
818
819 enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
820 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
821 %ymm15, %rax, %rcx, 24);
822
823 jmp .Lenc_done;
824ENDPROC(__camellia_enc_blk32)
825
826.align 8
827__camellia_dec_blk32:
828 /* input:
829 * %rdi: ctx, CTX
830 * %rax: temporary storage, 512 bytes
831 * %r8d: 24 for 16 byte key, 32 for larger
832 * %ymm0..%ymm15: 16 encrypted blocks
833 * output:
834 * %ymm0..%ymm15: 16 plaintext blocks, order swapped:
835 * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
836 */
837
838 leaq 8 * 32(%rax), %rcx;
839
840 inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
841 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
842 %ymm15, %rax, %rcx);
843
844 cmpl $32, %r8d;
845 je .Ldec_max32;
846
847.Ldec_max24:
848 dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
849 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
850 %ymm15, %rax, %rcx, 16);
851
852 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
853 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
854 %ymm15,
855 ((key_table + (16) * 8) + 8)(CTX),
856 ((key_table + (16) * 8) + 12)(CTX),
857 ((key_table + (16) * 8) + 0)(CTX),
858 ((key_table + (16) * 8) + 4)(CTX));
859
860 dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
861 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
862 %ymm15, %rax, %rcx, 8);
863
864 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
865 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
866 %ymm15,
867 ((key_table + (8) * 8) + 8)(CTX),
868 ((key_table + (8) * 8) + 12)(CTX),
869 ((key_table + (8) * 8) + 0)(CTX),
870 ((key_table + (8) * 8) + 4)(CTX));
871
872 dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
873 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
874 %ymm15, %rax, %rcx, 0);
875
876 /* load CD for output */
877 vmovdqu 0 * 32(%rcx), %ymm8;
878 vmovdqu 1 * 32(%rcx), %ymm9;
879 vmovdqu 2 * 32(%rcx), %ymm10;
880 vmovdqu 3 * 32(%rcx), %ymm11;
881 vmovdqu 4 * 32(%rcx), %ymm12;
882 vmovdqu 5 * 32(%rcx), %ymm13;
883 vmovdqu 6 * 32(%rcx), %ymm14;
884 vmovdqu 7 * 32(%rcx), %ymm15;
885
886 outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
887 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
888 %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
889
890 ret;
891
892.align 8
893.Ldec_max32:
894 dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
895 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
896 %ymm15, %rax, %rcx, 24);
897
898 fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
899 %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
900 %ymm15,
901 ((key_table + (24) * 8) + 8)(CTX),
902 ((key_table + (24) * 8) + 12)(CTX),
903 ((key_table + (24) * 8) + 0)(CTX),
904 ((key_table + (24) * 8) + 4)(CTX));
905
906 jmp .Ldec_max24;
907ENDPROC(__camellia_dec_blk32)
908
909ENTRY(camellia_ecb_enc_32way)
910 /* input:
911 * %rdi: ctx, CTX
912 * %rsi: dst (32 blocks)
913 * %rdx: src (32 blocks)
914 */
915
916 vzeroupper;
917
918 inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
919 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
920 %ymm15, %rdx, (key_table)(CTX));
921
922 /* now dst can be used as temporary buffer (even in src == dst case) */
923 movq %rsi, %rax;
924
925 call __camellia_enc_blk32;
926
927 write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
928 %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
929 %ymm8, %rsi);
930
931 vzeroupper;
932
933 ret;
934ENDPROC(camellia_ecb_enc_32way)
935
936ENTRY(camellia_ecb_dec_32way)
937 /* input:
938 * %rdi: ctx, CTX
939 * %rsi: dst (32 blocks)
940 * %rdx: src (32 blocks)
941 */
942
943 vzeroupper;
944
945 cmpl $16, key_length(CTX);
946 movl $32, %r8d;
947 movl $24, %eax;
948 cmovel %eax, %r8d; /* max */
949
950 inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
951 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
952 %ymm15, %rdx, (key_table)(CTX, %r8, 8));
953
954 /* now dst can be used as temporary buffer (even in src == dst case) */
955 movq %rsi, %rax;
956
957 call __camellia_dec_blk32;
958
959 write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
960 %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
961 %ymm8, %rsi);
962
963 vzeroupper;
964
965 ret;
966ENDPROC(camellia_ecb_dec_32way)
967
968ENTRY(camellia_cbc_dec_32way)
969 /* input:
970 * %rdi: ctx, CTX
971 * %rsi: dst (32 blocks)
972 * %rdx: src (32 blocks)
973 */
974
975 vzeroupper;
976
977 cmpl $16, key_length(CTX);
978 movl $32, %r8d;
979 movl $24, %eax;
980 cmovel %eax, %r8d; /* max */
981
982 inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
983 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
984 %ymm15, %rdx, (key_table)(CTX, %r8, 8));
985
986 movq %rsp, %r10;
987 cmpq %rsi, %rdx;
988 je .Lcbc_dec_use_stack;
989
990 /* dst can be used as temporary storage, src is not overwritten. */
991 movq %rsi, %rax;
992 jmp .Lcbc_dec_continue;
993
994.Lcbc_dec_use_stack:
995 /*
996 * dst still in-use (because dst == src), so use stack for temporary
997 * storage.
998 */
999 subq $(16 * 32), %rsp;
1000 movq %rsp, %rax;
1001
1002.Lcbc_dec_continue:
1003 call __camellia_dec_blk32;
1004
1005 vmovdqu %ymm7, (%rax);
1006 vpxor %ymm7, %ymm7, %ymm7;
1007 vinserti128 $1, (%rdx), %ymm7, %ymm7;
1008 vpxor (%rax), %ymm7, %ymm7;
1009 movq %r10, %rsp;
1010 vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
1011 vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
1012 vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
1013 vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3;
1014 vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2;
1015 vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1;
1016 vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0;
1017 vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15;
1018 vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14;
1019 vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13;
1020 vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12;
1021 vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11;
1022 vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10;
1023 vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9;
1024 vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8;
1025 write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
1026 %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
1027 %ymm8, %rsi);
1028
1029 vzeroupper;
1030
1031 ret;
1032ENDPROC(camellia_cbc_dec_32way)
1033
1034#define inc_le128(x, minus_one, tmp) \
1035 vpcmpeqq minus_one, x, tmp; \
1036 vpsubq minus_one, x, x; \
1037 vpslldq $8, tmp, tmp; \
1038 vpsubq tmp, x, x;
1039
1040#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
1041 vpcmpeqq minus_one, x, tmp1; \
1042 vpcmpeqq minus_two, x, tmp2; \
1043 vpsubq minus_two, x, x; \
1044 vpor tmp2, tmp1, tmp1; \
1045 vpslldq $8, tmp1, tmp1; \
1046 vpsubq tmp1, x, x;
1047
1048ENTRY(camellia_ctr_32way)
1049 /* input:
1050 * %rdi: ctx, CTX
1051 * %rsi: dst (32 blocks)
1052 * %rdx: src (32 blocks)
1053 * %rcx: iv (little endian, 128bit)
1054 */
1055
1056 vzeroupper;
1057
1058 movq %rsp, %r10;
1059 cmpq %rsi, %rdx;
1060 je .Lctr_use_stack;
1061
1062 /* dst can be used as temporary storage, src is not overwritten. */
1063 movq %rsi, %rax;
1064 jmp .Lctr_continue;
1065
1066.Lctr_use_stack:
1067 subq $(16 * 32), %rsp;
1068 movq %rsp, %rax;
1069
1070.Lctr_continue:
1071 vpcmpeqd %ymm15, %ymm15, %ymm15;
1072 vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
1073 vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */
1074
1075 /* load IV and byteswap */
1076 vmovdqu (%rcx), %xmm0;
1077 vmovdqa %xmm0, %xmm1;
1078 inc_le128(%xmm0, %xmm15, %xmm14);
1079 vbroadcasti128 .Lbswap128_mask, %ymm14;
1080 vinserti128 $1, %xmm0, %ymm1, %ymm0;
1081 vpshufb %ymm14, %ymm0, %ymm13;
1082 vmovdqu %ymm13, 15 * 32(%rax);
1083
1084 /* construct IVs */
1085 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */
1086 vpshufb %ymm14, %ymm0, %ymm13;
1087 vmovdqu %ymm13, 14 * 32(%rax);
1088 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1089 vpshufb %ymm14, %ymm0, %ymm13;
1090 vmovdqu %ymm13, 13 * 32(%rax);
1091 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1092 vpshufb %ymm14, %ymm0, %ymm13;
1093 vmovdqu %ymm13, 12 * 32(%rax);
1094 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1095 vpshufb %ymm14, %ymm0, %ymm13;
1096 vmovdqu %ymm13, 11 * 32(%rax);
1097 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1098 vpshufb %ymm14, %ymm0, %ymm10;
1099 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1100 vpshufb %ymm14, %ymm0, %ymm9;
1101 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1102 vpshufb %ymm14, %ymm0, %ymm8;
1103 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1104 vpshufb %ymm14, %ymm0, %ymm7;
1105 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1106 vpshufb %ymm14, %ymm0, %ymm6;
1107 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1108 vpshufb %ymm14, %ymm0, %ymm5;
1109 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1110 vpshufb %ymm14, %ymm0, %ymm4;
1111 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1112 vpshufb %ymm14, %ymm0, %ymm3;
1113 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1114 vpshufb %ymm14, %ymm0, %ymm2;
1115 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1116 vpshufb %ymm14, %ymm0, %ymm1;
1117 add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
1118 vextracti128 $1, %ymm0, %xmm13;
1119 vpshufb %ymm14, %ymm0, %ymm0;
1120 inc_le128(%xmm13, %xmm15, %xmm14);
1121 vmovdqu %xmm13, (%rcx);
1122
1123 /* inpack32_pre: */
1124 vpbroadcastq (key_table)(CTX), %ymm15;
1125 vpshufb .Lpack_bswap, %ymm15, %ymm15;
1126 vpxor %ymm0, %ymm15, %ymm0;
1127 vpxor %ymm1, %ymm15, %ymm1;
1128 vpxor %ymm2, %ymm15, %ymm2;
1129 vpxor %ymm3, %ymm15, %ymm3;
1130 vpxor %ymm4, %ymm15, %ymm4;
1131 vpxor %ymm5, %ymm15, %ymm5;
1132 vpxor %ymm6, %ymm15, %ymm6;
1133 vpxor %ymm7, %ymm15, %ymm7;
1134 vpxor %ymm8, %ymm15, %ymm8;
1135 vpxor %ymm9, %ymm15, %ymm9;
1136 vpxor %ymm10, %ymm15, %ymm10;
1137 vpxor 11 * 32(%rax), %ymm15, %ymm11;
1138 vpxor 12 * 32(%rax), %ymm15, %ymm12;
1139 vpxor 13 * 32(%rax), %ymm15, %ymm13;
1140 vpxor 14 * 32(%rax), %ymm15, %ymm14;
1141 vpxor 15 * 32(%rax), %ymm15, %ymm15;
1142
1143 call __camellia_enc_blk32;
1144
1145 movq %r10, %rsp;
1146
1147 vpxor 0 * 32(%rdx), %ymm7, %ymm7;
1148 vpxor 1 * 32(%rdx), %ymm6, %ymm6;
1149 vpxor 2 * 32(%rdx), %ymm5, %ymm5;
1150 vpxor 3 * 32(%rdx), %ymm4, %ymm4;
1151 vpxor 4 * 32(%rdx), %ymm3, %ymm3;
1152 vpxor 5 * 32(%rdx), %ymm2, %ymm2;
1153 vpxor 6 * 32(%rdx), %ymm1, %ymm1;
1154 vpxor 7 * 32(%rdx), %ymm0, %ymm0;
1155 vpxor 8 * 32(%rdx), %ymm15, %ymm15;
1156 vpxor 9 * 32(%rdx), %ymm14, %ymm14;
1157 vpxor 10 * 32(%rdx), %ymm13, %ymm13;
1158 vpxor 11 * 32(%rdx), %ymm12, %ymm12;
1159 vpxor 12 * 32(%rdx), %ymm11, %ymm11;
1160 vpxor 13 * 32(%rdx), %ymm10, %ymm10;
1161 vpxor 14 * 32(%rdx), %ymm9, %ymm9;
1162 vpxor 15 * 32(%rdx), %ymm8, %ymm8;
1163 write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
1164 %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
1165 %ymm8, %rsi);
1166
1167 vzeroupper;
1168
1169 ret;
1170ENDPROC(camellia_ctr_32way)
1171
1172#define gf128mul_x_ble(iv, mask, tmp) \
1173 vpsrad $31, iv, tmp; \
1174 vpaddq iv, iv, iv; \
1175 vpshufd $0x13, tmp, tmp; \
1176 vpand mask, tmp, tmp; \
1177 vpxor tmp, iv, iv;
1178
1179#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
1180 vpsrad $31, iv, tmp0; \
1181 vpaddq iv, iv, tmp1; \
1182 vpsllq $2, iv, iv; \
1183 vpshufd $0x13, tmp0, tmp0; \
1184 vpsrad $31, tmp1, tmp1; \
1185 vpand mask2, tmp0, tmp0; \
1186 vpshufd $0x13, tmp1, tmp1; \
1187 vpxor tmp0, iv, iv; \
1188 vpand mask1, tmp1, tmp1; \
1189 vpxor tmp1, iv, iv;
1190
1191.align 8
1192camellia_xts_crypt_32way:
1193 /* input:
1194 * %rdi: ctx, CTX
1195 * %rsi: dst (32 blocks)
1196 * %rdx: src (32 blocks)
1197 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1198 * %r8: index for input whitening key
1199 * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32
1200 */
1201
1202 vzeroupper;
1203
1204 subq $(16 * 32), %rsp;
1205 movq %rsp, %rax;
1206
1207 vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12;
1208
1209 /* load IV and construct second IV */
1210 vmovdqu (%rcx), %xmm0;
1211 vmovdqa %xmm0, %xmm15;
1212 gf128mul_x_ble(%xmm0, %xmm12, %xmm13);
1213 vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13;
1214 vinserti128 $1, %xmm0, %ymm15, %ymm0;
1215 vpxor 0 * 32(%rdx), %ymm0, %ymm15;
1216 vmovdqu %ymm15, 15 * 32(%rax);
1217 vmovdqu %ymm0, 0 * 32(%rsi);
1218
1219 /* construct IVs */
1220 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1221 vpxor 1 * 32(%rdx), %ymm0, %ymm15;
1222 vmovdqu %ymm15, 14 * 32(%rax);
1223 vmovdqu %ymm0, 1 * 32(%rsi);
1224
1225 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1226 vpxor 2 * 32(%rdx), %ymm0, %ymm15;
1227 vmovdqu %ymm15, 13 * 32(%rax);
1228 vmovdqu %ymm0, 2 * 32(%rsi);
1229
1230 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1231 vpxor 3 * 32(%rdx), %ymm0, %ymm15;
1232 vmovdqu %ymm15, 12 * 32(%rax);
1233 vmovdqu %ymm0, 3 * 32(%rsi);
1234
1235 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1236 vpxor 4 * 32(%rdx), %ymm0, %ymm11;
1237 vmovdqu %ymm0, 4 * 32(%rsi);
1238
1239 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1240 vpxor 5 * 32(%rdx), %ymm0, %ymm10;
1241 vmovdqu %ymm0, 5 * 32(%rsi);
1242
1243 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1244 vpxor 6 * 32(%rdx), %ymm0, %ymm9;
1245 vmovdqu %ymm0, 6 * 32(%rsi);
1246
1247 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1248 vpxor 7 * 32(%rdx), %ymm0, %ymm8;
1249 vmovdqu %ymm0, 7 * 32(%rsi);
1250
1251 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1252 vpxor 8 * 32(%rdx), %ymm0, %ymm7;
1253 vmovdqu %ymm0, 8 * 32(%rsi);
1254
1255 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1256 vpxor 9 * 32(%rdx), %ymm0, %ymm6;
1257 vmovdqu %ymm0, 9 * 32(%rsi);
1258
1259 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1260 vpxor 10 * 32(%rdx), %ymm0, %ymm5;
1261 vmovdqu %ymm0, 10 * 32(%rsi);
1262
1263 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1264 vpxor 11 * 32(%rdx), %ymm0, %ymm4;
1265 vmovdqu %ymm0, 11 * 32(%rsi);
1266
1267 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1268 vpxor 12 * 32(%rdx), %ymm0, %ymm3;
1269 vmovdqu %ymm0, 12 * 32(%rsi);
1270
1271 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1272 vpxor 13 * 32(%rdx), %ymm0, %ymm2;
1273 vmovdqu %ymm0, 13 * 32(%rsi);
1274
1275 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1276 vpxor 14 * 32(%rdx), %ymm0, %ymm1;
1277 vmovdqu %ymm0, 14 * 32(%rsi);
1278
1279 gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
1280 vpxor 15 * 32(%rdx), %ymm0, %ymm15;
1281 vmovdqu %ymm15, 0 * 32(%rax);
1282 vmovdqu %ymm0, 15 * 32(%rsi);
1283
1284 vextracti128 $1, %ymm0, %xmm0;
1285 gf128mul_x_ble(%xmm0, %xmm12, %xmm15);
1286 vmovdqu %xmm0, (%rcx);
1287
1288 /* inpack32_pre: */
1289 vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
1290 vpshufb .Lpack_bswap, %ymm15, %ymm15;
1291 vpxor 0 * 32(%rax), %ymm15, %ymm0;
1292 vpxor %ymm1, %ymm15, %ymm1;
1293 vpxor %ymm2, %ymm15, %ymm2;
1294 vpxor %ymm3, %ymm15, %ymm3;
1295 vpxor %ymm4, %ymm15, %ymm4;
1296 vpxor %ymm5, %ymm15, %ymm5;
1297 vpxor %ymm6, %ymm15, %ymm6;
1298 vpxor %ymm7, %ymm15, %ymm7;
1299 vpxor %ymm8, %ymm15, %ymm8;
1300 vpxor %ymm9, %ymm15, %ymm9;
1301 vpxor %ymm10, %ymm15, %ymm10;
1302 vpxor %ymm11, %ymm15, %ymm11;
1303 vpxor 12 * 32(%rax), %ymm15, %ymm12;
1304 vpxor 13 * 32(%rax), %ymm15, %ymm13;
1305 vpxor 14 * 32(%rax), %ymm15, %ymm14;
1306 vpxor 15 * 32(%rax), %ymm15, %ymm15;
1307
1308 call *%r9;
1309
1310 addq $(16 * 32), %rsp;
1311
1312 vpxor 0 * 32(%rsi), %ymm7, %ymm7;
1313 vpxor 1 * 32(%rsi), %ymm6, %ymm6;
1314 vpxor 2 * 32(%rsi), %ymm5, %ymm5;
1315 vpxor 3 * 32(%rsi), %ymm4, %ymm4;
1316 vpxor 4 * 32(%rsi), %ymm3, %ymm3;
1317 vpxor 5 * 32(%rsi), %ymm2, %ymm2;
1318 vpxor 6 * 32(%rsi), %ymm1, %ymm1;
1319 vpxor 7 * 32(%rsi), %ymm0, %ymm0;
1320 vpxor 8 * 32(%rsi), %ymm15, %ymm15;
1321 vpxor 9 * 32(%rsi), %ymm14, %ymm14;
1322 vpxor 10 * 32(%rsi), %ymm13, %ymm13;
1323 vpxor 11 * 32(%rsi), %ymm12, %ymm12;
1324 vpxor 12 * 32(%rsi), %ymm11, %ymm11;
1325 vpxor 13 * 32(%rsi), %ymm10, %ymm10;
1326 vpxor 14 * 32(%rsi), %ymm9, %ymm9;
1327 vpxor 15 * 32(%rsi), %ymm8, %ymm8;
1328 write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
1329 %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
1330 %ymm8, %rsi);
1331
1332 vzeroupper;
1333
1334 ret;
1335ENDPROC(camellia_xts_crypt_32way)
1336
1337ENTRY(camellia_xts_enc_32way)
1338 /* input:
1339 * %rdi: ctx, CTX
1340 * %rsi: dst (32 blocks)
1341 * %rdx: src (32 blocks)
1342 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1343 */
1344
1345 xorl %r8d, %r8d; /* input whitening key, 0 for enc */
1346
1347 leaq __camellia_enc_blk32, %r9;
1348
1349 jmp camellia_xts_crypt_32way;
1350ENDPROC(camellia_xts_enc_32way)
1351
1352ENTRY(camellia_xts_dec_32way)
1353 /* input:
1354 * %rdi: ctx, CTX
1355 * %rsi: dst (32 blocks)
1356 * %rdx: src (32 blocks)
1357 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1358 */
1359
1360 cmpl $16, key_length(CTX);
1361 movl $32, %r8d;
1362 movl $24, %eax;
1363 cmovel %eax, %r8d; /* input whitening key, last for dec */
1364
1365 leaq __camellia_dec_blk32, %r9;
1366
1367 jmp camellia_xts_crypt_32way;
1368ENDPROC(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
new file mode 100644
index 000000000000..414fe5d7946b
--- /dev/null
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -0,0 +1,586 @@
1/*
2 * Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
3 *
4 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/lrw.h>
20#include <crypto/xts.h>
21#include <asm/xcr.h>
22#include <asm/xsave.h>
23#include <asm/crypto/camellia.h>
24#include <asm/crypto/ablk_helper.h>
25#include <asm/crypto/glue_helper.h>
26
27#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
28#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
29
30/* 32-way AVX2/AES-NI parallel cipher functions */
31asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
32 const u8 *src);
33asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
34 const u8 *src);
35
36asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
37 const u8 *src);
38asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
39 const u8 *src, le128 *iv);
40
41asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
44 const u8 *src, le128 *iv);
45
46static const struct common_glue_ctx camellia_enc = {
47 .num_funcs = 4,
48 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
49
50 .funcs = { {
51 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
52 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
53 }, {
54 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
55 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
56 }, {
57 .num_blocks = 2,
58 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
59 }, {
60 .num_blocks = 1,
61 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
62 } }
63};
64
65static const struct common_glue_ctx camellia_ctr = {
66 .num_funcs = 4,
67 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
68
69 .funcs = { {
70 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
71 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
72 }, {
73 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
74 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
75 }, {
76 .num_blocks = 2,
77 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
78 }, {
79 .num_blocks = 1,
80 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
81 } }
82};
83
84static const struct common_glue_ctx camellia_enc_xts = {
85 .num_funcs = 3,
86 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
87
88 .funcs = { {
89 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
90 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
91 }, {
92 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
93 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
94 }, {
95 .num_blocks = 1,
96 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
97 } }
98};
99
100static const struct common_glue_ctx camellia_dec = {
101 .num_funcs = 4,
102 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
103
104 .funcs = { {
105 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
106 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
107 }, {
108 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
109 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
110 }, {
111 .num_blocks = 2,
112 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
113 }, {
114 .num_blocks = 1,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
116 } }
117};
118
119static const struct common_glue_ctx camellia_dec_cbc = {
120 .num_funcs = 4,
121 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
122
123 .funcs = { {
124 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
125 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
126 }, {
127 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
128 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
129 }, {
130 .num_blocks = 2,
131 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
132 }, {
133 .num_blocks = 1,
134 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
135 } }
136};
137
138static const struct common_glue_ctx camellia_dec_xts = {
139 .num_funcs = 3,
140 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
141
142 .funcs = { {
143 .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
144 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
145 }, {
146 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
147 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
148 }, {
149 .num_blocks = 1,
150 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
151 } }
152};
153
154static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155 struct scatterlist *src, unsigned int nbytes)
156{
157 return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
158}
159
160static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
162{
163 return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
164}
165
166static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
170 dst, src, nbytes);
171}
172
173static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
174 struct scatterlist *src, unsigned int nbytes)
175{
176 return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
177 nbytes);
178}
179
180static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
181 struct scatterlist *src, unsigned int nbytes)
182{
183 return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
184}
185
186static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
187{
188 return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
189 CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
190 nbytes);
191}
192
193static inline void camellia_fpu_end(bool fpu_enabled)
194{
195 glue_fpu_end(fpu_enabled);
196}
197
198static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
199 unsigned int key_len)
200{
201 return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
202 &tfm->crt_flags);
203}
204
205struct crypt_priv {
206 struct camellia_ctx *ctx;
207 bool fpu_enabled;
208};
209
210static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
211{
212 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
213 struct crypt_priv *ctx = priv;
214 int i;
215
216 ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
217
218 if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
219 camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
220 srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
221 nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
222 }
223
224 if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
225 camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
226 srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
227 nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
228 }
229
230 while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
231 camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
232 srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
233 nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
234 }
235
236 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
237 camellia_enc_blk(ctx->ctx, srcdst, srcdst);
238}
239
240static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
241{
242 const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
243 struct crypt_priv *ctx = priv;
244 int i;
245
246 ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
247
248 if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
249 camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
250 srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
251 nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
252 }
253
254 if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
255 camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
256 srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
257 nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
258 }
259
260 while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
261 camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
262 srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
263 nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
264 }
265
266 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
267 camellia_dec_blk(ctx->ctx, srcdst, srcdst);
268}
269
270static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
271 struct scatterlist *src, unsigned int nbytes)
272{
273 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
274 be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
275 struct crypt_priv crypt_ctx = {
276 .ctx = &ctx->camellia_ctx,
277 .fpu_enabled = false,
278 };
279 struct lrw_crypt_req req = {
280 .tbuf = buf,
281 .tbuflen = sizeof(buf),
282
283 .table_ctx = &ctx->lrw_table,
284 .crypt_ctx = &crypt_ctx,
285 .crypt_fn = encrypt_callback,
286 };
287 int ret;
288
289 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
290 ret = lrw_crypt(desc, dst, src, nbytes, &req);
291 camellia_fpu_end(crypt_ctx.fpu_enabled);
292
293 return ret;
294}
295
296static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
297 struct scatterlist *src, unsigned int nbytes)
298{
299 struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
300 be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
301 struct crypt_priv crypt_ctx = {
302 .ctx = &ctx->camellia_ctx,
303 .fpu_enabled = false,
304 };
305 struct lrw_crypt_req req = {
306 .tbuf = buf,
307 .tbuflen = sizeof(buf),
308
309 .table_ctx = &ctx->lrw_table,
310 .crypt_ctx = &crypt_ctx,
311 .crypt_fn = decrypt_callback,
312 };
313 int ret;
314
315 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
316 ret = lrw_crypt(desc, dst, src, nbytes, &req);
317 camellia_fpu_end(crypt_ctx.fpu_enabled);
318
319 return ret;
320}
321
322static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
323 struct scatterlist *src, unsigned int nbytes)
324{
325 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
326
327 return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
328 XTS_TWEAK_CAST(camellia_enc_blk),
329 &ctx->tweak_ctx, &ctx->crypt_ctx);
330}
331
332static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
333 struct scatterlist *src, unsigned int nbytes)
334{
335 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
336
337 return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
338 XTS_TWEAK_CAST(camellia_enc_blk),
339 &ctx->tweak_ctx, &ctx->crypt_ctx);
340}
341
342static struct crypto_alg cmll_algs[10] = { {
343 .cra_name = "__ecb-camellia-aesni-avx2",
344 .cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
345 .cra_priority = 0,
346 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
347 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
348 .cra_ctxsize = sizeof(struct camellia_ctx),
349 .cra_alignmask = 0,
350 .cra_type = &crypto_blkcipher_type,
351 .cra_module = THIS_MODULE,
352 .cra_u = {
353 .blkcipher = {
354 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
355 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
356 .setkey = camellia_setkey,
357 .encrypt = ecb_encrypt,
358 .decrypt = ecb_decrypt,
359 },
360 },
361}, {
362 .cra_name = "__cbc-camellia-aesni-avx2",
363 .cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
364 .cra_priority = 0,
365 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
366 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
367 .cra_ctxsize = sizeof(struct camellia_ctx),
368 .cra_alignmask = 0,
369 .cra_type = &crypto_blkcipher_type,
370 .cra_module = THIS_MODULE,
371 .cra_u = {
372 .blkcipher = {
373 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
374 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
375 .setkey = camellia_setkey,
376 .encrypt = cbc_encrypt,
377 .decrypt = cbc_decrypt,
378 },
379 },
380}, {
381 .cra_name = "__ctr-camellia-aesni-avx2",
382 .cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
383 .cra_priority = 0,
384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
385 .cra_blocksize = 1,
386 .cra_ctxsize = sizeof(struct camellia_ctx),
387 .cra_alignmask = 0,
388 .cra_type = &crypto_blkcipher_type,
389 .cra_module = THIS_MODULE,
390 .cra_u = {
391 .blkcipher = {
392 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
393 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
394 .ivsize = CAMELLIA_BLOCK_SIZE,
395 .setkey = camellia_setkey,
396 .encrypt = ctr_crypt,
397 .decrypt = ctr_crypt,
398 },
399 },
400}, {
401 .cra_name = "__lrw-camellia-aesni-avx2",
402 .cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
403 .cra_priority = 0,
404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
405 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
407 .cra_alignmask = 0,
408 .cra_type = &crypto_blkcipher_type,
409 .cra_module = THIS_MODULE,
410 .cra_exit = lrw_camellia_exit_tfm,
411 .cra_u = {
412 .blkcipher = {
413 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
414 CAMELLIA_BLOCK_SIZE,
415 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
416 CAMELLIA_BLOCK_SIZE,
417 .ivsize = CAMELLIA_BLOCK_SIZE,
418 .setkey = lrw_camellia_setkey,
419 .encrypt = lrw_encrypt,
420 .decrypt = lrw_decrypt,
421 },
422 },
423}, {
424 .cra_name = "__xts-camellia-aesni-avx2",
425 .cra_driver_name = "__driver-xts-camellia-aesni-avx2",
426 .cra_priority = 0,
427 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
428 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
429 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
430 .cra_alignmask = 0,
431 .cra_type = &crypto_blkcipher_type,
432 .cra_module = THIS_MODULE,
433 .cra_u = {
434 .blkcipher = {
435 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
436 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
437 .ivsize = CAMELLIA_BLOCK_SIZE,
438 .setkey = xts_camellia_setkey,
439 .encrypt = xts_encrypt,
440 .decrypt = xts_decrypt,
441 },
442 },
443}, {
444 .cra_name = "ecb(camellia)",
445 .cra_driver_name = "ecb-camellia-aesni-avx2",
446 .cra_priority = 500,
447 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
448 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
449 .cra_ctxsize = sizeof(struct async_helper_ctx),
450 .cra_alignmask = 0,
451 .cra_type = &crypto_ablkcipher_type,
452 .cra_module = THIS_MODULE,
453 .cra_init = ablk_init,
454 .cra_exit = ablk_exit,
455 .cra_u = {
456 .ablkcipher = {
457 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
458 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
459 .setkey = ablk_set_key,
460 .encrypt = ablk_encrypt,
461 .decrypt = ablk_decrypt,
462 },
463 },
464}, {
465 .cra_name = "cbc(camellia)",
466 .cra_driver_name = "cbc-camellia-aesni-avx2",
467 .cra_priority = 500,
468 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
469 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
470 .cra_ctxsize = sizeof(struct async_helper_ctx),
471 .cra_alignmask = 0,
472 .cra_type = &crypto_ablkcipher_type,
473 .cra_module = THIS_MODULE,
474 .cra_init = ablk_init,
475 .cra_exit = ablk_exit,
476 .cra_u = {
477 .ablkcipher = {
478 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
479 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
480 .ivsize = CAMELLIA_BLOCK_SIZE,
481 .setkey = ablk_set_key,
482 .encrypt = __ablk_encrypt,
483 .decrypt = ablk_decrypt,
484 },
485 },
486}, {
487 .cra_name = "ctr(camellia)",
488 .cra_driver_name = "ctr-camellia-aesni-avx2",
489 .cra_priority = 500,
490 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
491 .cra_blocksize = 1,
492 .cra_ctxsize = sizeof(struct async_helper_ctx),
493 .cra_alignmask = 0,
494 .cra_type = &crypto_ablkcipher_type,
495 .cra_module = THIS_MODULE,
496 .cra_init = ablk_init,
497 .cra_exit = ablk_exit,
498 .cra_u = {
499 .ablkcipher = {
500 .min_keysize = CAMELLIA_MIN_KEY_SIZE,
501 .max_keysize = CAMELLIA_MAX_KEY_SIZE,
502 .ivsize = CAMELLIA_BLOCK_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_encrypt,
506 .geniv = "chainiv",
507 },
508 },
509}, {
510 .cra_name = "lrw(camellia)",
511 .cra_driver_name = "lrw-camellia-aesni-avx2",
512 .cra_priority = 500,
513 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
514 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
515 .cra_ctxsize = sizeof(struct async_helper_ctx),
516 .cra_alignmask = 0,
517 .cra_type = &crypto_ablkcipher_type,
518 .cra_module = THIS_MODULE,
519 .cra_init = ablk_init,
520 .cra_exit = ablk_exit,
521 .cra_u = {
522 .ablkcipher = {
523 .min_keysize = CAMELLIA_MIN_KEY_SIZE +
524 CAMELLIA_BLOCK_SIZE,
525 .max_keysize = CAMELLIA_MAX_KEY_SIZE +
526 CAMELLIA_BLOCK_SIZE,
527 .ivsize = CAMELLIA_BLOCK_SIZE,
528 .setkey = ablk_set_key,
529 .encrypt = ablk_encrypt,
530 .decrypt = ablk_decrypt,
531 },
532 },
533}, {
534 .cra_name = "xts(camellia)",
535 .cra_driver_name = "xts-camellia-aesni-avx2",
536 .cra_priority = 500,
537 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
538 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
539 .cra_ctxsize = sizeof(struct async_helper_ctx),
540 .cra_alignmask = 0,
541 .cra_type = &crypto_ablkcipher_type,
542 .cra_module = THIS_MODULE,
543 .cra_init = ablk_init,
544 .cra_exit = ablk_exit,
545 .cra_u = {
546 .ablkcipher = {
547 .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
548 .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
549 .ivsize = CAMELLIA_BLOCK_SIZE,
550 .setkey = ablk_set_key,
551 .encrypt = ablk_encrypt,
552 .decrypt = ablk_decrypt,
553 },
554 },
555} };
556
557static int __init camellia_aesni_init(void)
558{
559 u64 xcr0;
560
561 if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
562 pr_info("AVX2 or AES-NI instructions are not detected.\n");
563 return -ENODEV;
564 }
565
566 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
567 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
568 pr_info("AVX2 detected but unusable.\n");
569 return -ENODEV;
570 }
571
572 return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
573}
574
575static void __exit camellia_aesni_fini(void)
576{
577 crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
578}
579
580module_init(camellia_aesni_init);
581module_exit(camellia_aesni_fini);
582
583MODULE_LICENSE("GPL");
584MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
585MODULE_ALIAS("camellia");
586MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 96cbb6068fce..37fd0c0a81ea 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia 2 * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
3 * 3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -26,16 +26,44 @@
26 26
27#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 27#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
28 28
29/* 16-way AES-NI parallel cipher functions */ 29/* 16-way parallel cipher functions (avx/aes-ni) */
30asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, 30asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
31 const u8 *src); 31 const u8 *src);
32EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
33
32asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, 34asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
33 const u8 *src); 35 const u8 *src);
36EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
34 37
35asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, 38asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
36 const u8 *src); 39 const u8 *src);
40EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
41
37asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, 42asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
38 const u8 *src, le128 *iv); 43 const u8 *src, le128 *iv);
44EXPORT_SYMBOL_GPL(camellia_ctr_16way);
45
46asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
47 const u8 *src, le128 *iv);
48EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
49
50asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
51 const u8 *src, le128 *iv);
52EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
53
54void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
55{
56 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
57 GLUE_FUNC_CAST(camellia_enc_blk));
58}
59EXPORT_SYMBOL_GPL(camellia_xts_enc);
60
61void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
62{
63 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
64 GLUE_FUNC_CAST(camellia_dec_blk));
65}
66EXPORT_SYMBOL_GPL(camellia_xts_dec);
39 67
40static const struct common_glue_ctx camellia_enc = { 68static const struct common_glue_ctx camellia_enc = {
41 .num_funcs = 3, 69 .num_funcs = 3,
@@ -69,6 +97,19 @@ static const struct common_glue_ctx camellia_ctr = {
69 } } 97 } }
70}; 98};
71 99
100static const struct common_glue_ctx camellia_enc_xts = {
101 .num_funcs = 2,
102 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
103
104 .funcs = { {
105 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
106 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
107 }, {
108 .num_blocks = 1,
109 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
110 } }
111};
112
72static const struct common_glue_ctx camellia_dec = { 113static const struct common_glue_ctx camellia_dec = {
73 .num_funcs = 3, 114 .num_funcs = 3,
74 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, 115 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
@@ -101,6 +142,19 @@ static const struct common_glue_ctx camellia_dec_cbc = {
101 } } 142 } }
102}; 143};
103 144
145static const struct common_glue_ctx camellia_dec_xts = {
146 .num_funcs = 2,
147 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
148
149 .funcs = { {
150 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
151 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
152 }, {
153 .num_blocks = 1,
154 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
155 } }
156};
157
104static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 158static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
105 struct scatterlist *src, unsigned int nbytes) 159 struct scatterlist *src, unsigned int nbytes)
106{ 160{
@@ -261,54 +315,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
261 struct scatterlist *src, unsigned int nbytes) 315 struct scatterlist *src, unsigned int nbytes)
262{ 316{
263 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 317 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
264 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
265 struct crypt_priv crypt_ctx = {
266 .ctx = &ctx->crypt_ctx,
267 .fpu_enabled = false,
268 };
269 struct xts_crypt_req req = {
270 .tbuf = buf,
271 .tbuflen = sizeof(buf),
272 318
273 .tweak_ctx = &ctx->tweak_ctx, 319 return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
274 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), 320 XTS_TWEAK_CAST(camellia_enc_blk),
275 .crypt_ctx = &crypt_ctx, 321 &ctx->tweak_ctx, &ctx->crypt_ctx);
276 .crypt_fn = encrypt_callback,
277 };
278 int ret;
279
280 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
281 ret = xts_crypt(desc, dst, src, nbytes, &req);
282 camellia_fpu_end(crypt_ctx.fpu_enabled);
283
284 return ret;
285} 322}
286 323
287static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 324static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
288 struct scatterlist *src, unsigned int nbytes) 325 struct scatterlist *src, unsigned int nbytes)
289{ 326{
290 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 327 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
291 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
292 struct crypt_priv crypt_ctx = {
293 .ctx = &ctx->crypt_ctx,
294 .fpu_enabled = false,
295 };
296 struct xts_crypt_req req = {
297 .tbuf = buf,
298 .tbuflen = sizeof(buf),
299
300 .tweak_ctx = &ctx->tweak_ctx,
301 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
302 .crypt_ctx = &crypt_ctx,
303 .crypt_fn = decrypt_callback,
304 };
305 int ret;
306 328
307 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 329 return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
308 ret = xts_crypt(desc, dst, src, nbytes, &req); 330 XTS_TWEAK_CAST(camellia_enc_blk),
309 camellia_fpu_end(crypt_ctx.fpu_enabled); 331 &ctx->tweak_ctx, &ctx->crypt_ctx);
310
311 return ret;
312} 332}
313 333
314static struct crypto_alg cmll_algs[10] = { { 334static struct crypto_alg cmll_algs[10] = { {
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index f93b6105a0ce..e3531f833951 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -4,7 +4,7 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 7 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -227,6 +227,8 @@
227.data 227.data
228 228
229.align 16 229.align 16
230.Lxts_gf128mul_and_shl1_mask:
231 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
230.Lbswap_mask: 232.Lbswap_mask:
231 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 233 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
232.Lbswap128_mask: 234.Lbswap128_mask:
@@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way)
424 426
425 ret; 427 ret;
426ENDPROC(cast6_ctr_8way) 428ENDPROC(cast6_ctr_8way)
429
430ENTRY(cast6_xts_enc_8way)
431 /* input:
432 * %rdi: ctx, CTX
433 * %rsi: dst
434 * %rdx: src
435 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
436 */
437
438 movq %rsi, %r11;
439
440 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
441 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
442 RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
443
444 call __cast6_enc_blk8;
445
446 /* dst <= regs xor IVs(in dst) */
447 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
448
449 ret;
450ENDPROC(cast6_xts_enc_8way)
451
452ENTRY(cast6_xts_dec_8way)
453 /* input:
454 * %rdi: ctx, CTX
455 * %rsi: dst
456 * %rdx: src
457 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
458 */
459
460 movq %rsi, %r11;
461
462 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
463 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
464 RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
465
466 call __cast6_dec_blk8;
467
468 /* dst <= regs xor IVs(in dst) */
469 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
470
471 ret;
472ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 92f7ca24790a..8d0dfb86a559 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
@@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
50asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, 52asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
51 le128 *iv); 53 le128 *iv);
52 54
55asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
56 const u8 *src, le128 *iv);
57asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
58 const u8 *src, le128 *iv);
59
60static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
61{
62 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
63 GLUE_FUNC_CAST(__cast6_encrypt));
64}
65
66static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
67{
68 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
69 GLUE_FUNC_CAST(__cast6_decrypt));
70}
71
53static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 72static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
54{ 73{
55 be128 ctrblk; 74 be128 ctrblk;
@@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = {
87 } } 106 } }
88}; 107};
89 108
109static const struct common_glue_ctx cast6_enc_xts = {
110 .num_funcs = 2,
111 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
112
113 .funcs = { {
114 .num_blocks = CAST6_PARALLEL_BLOCKS,
115 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
116 }, {
117 .num_blocks = 1,
118 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
119 } }
120};
121
90static const struct common_glue_ctx cast6_dec = { 122static const struct common_glue_ctx cast6_dec = {
91 .num_funcs = 2, 123 .num_funcs = 2,
92 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, 124 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
@@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = {
113 } } 145 } }
114}; 146};
115 147
148static const struct common_glue_ctx cast6_dec_xts = {
149 .num_funcs = 2,
150 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
151
152 .funcs = { {
153 .num_blocks = CAST6_PARALLEL_BLOCKS,
154 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
155 }, {
156 .num_blocks = 1,
157 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
158 } }
159};
160
116static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 161static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
117 struct scatterlist *src, unsigned int nbytes) 162 struct scatterlist *src, unsigned int nbytes)
118{ 163{
@@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
307 struct scatterlist *src, unsigned int nbytes) 352 struct scatterlist *src, unsigned int nbytes)
308{ 353{
309 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 354 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
310 be128 buf[CAST6_PARALLEL_BLOCKS];
311 struct crypt_priv crypt_ctx = {
312 .ctx = &ctx->crypt_ctx,
313 .fpu_enabled = false,
314 };
315 struct xts_crypt_req req = {
316 .tbuf = buf,
317 .tbuflen = sizeof(buf),
318 355
319 .tweak_ctx = &ctx->tweak_ctx, 356 return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
320 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), 357 XTS_TWEAK_CAST(__cast6_encrypt),
321 .crypt_ctx = &crypt_ctx, 358 &ctx->tweak_ctx, &ctx->crypt_ctx);
322 .crypt_fn = encrypt_callback,
323 };
324 int ret;
325
326 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
327 ret = xts_crypt(desc, dst, src, nbytes, &req);
328 cast6_fpu_end(crypt_ctx.fpu_enabled);
329
330 return ret;
331} 359}
332 360
333static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 361static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
334 struct scatterlist *src, unsigned int nbytes) 362 struct scatterlist *src, unsigned int nbytes)
335{ 363{
336 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 364 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
337 be128 buf[CAST6_PARALLEL_BLOCKS];
338 struct crypt_priv crypt_ctx = {
339 .ctx = &ctx->crypt_ctx,
340 .fpu_enabled = false,
341 };
342 struct xts_crypt_req req = {
343 .tbuf = buf,
344 .tbuflen = sizeof(buf),
345
346 .tweak_ctx = &ctx->tweak_ctx,
347 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
348 .crypt_ctx = &crypt_ctx,
349 .crypt_fn = decrypt_callback,
350 };
351 int ret;
352 365
353 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 366 return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
354 ret = xts_crypt(desc, dst, src, nbytes, &req); 367 XTS_TWEAK_CAST(__cast6_encrypt),
355 cast6_fpu_end(crypt_ctx.fpu_enabled); 368 &ctx->tweak_ctx, &ctx->crypt_ctx);
356
357 return ret;
358} 369}
359 370
360static struct crypto_alg cast6_algs[10] = { { 371static struct crypto_alg cast6_algs[10] = { {
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index c8335014a044..94c27df8a549 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -101,9 +101,8 @@
101 * uint crc32_pclmul_le_16(unsigned char const *buffer, 101 * uint crc32_pclmul_le_16(unsigned char const *buffer,
102 * size_t len, uint crc32) 102 * size_t len, uint crc32)
103 */ 103 */
104.globl crc32_pclmul_le_16 104
105.align 4, 0x90 105ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
106crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
107 movdqa (BUF), %xmm1 106 movdqa (BUF), %xmm1
108 movdqa 0x10(BUF), %xmm2 107 movdqa 0x10(BUF), %xmm2
109 movdqa 0x20(BUF), %xmm3 108 movdqa 0x20(BUF), %xmm3
@@ -244,3 +243,4 @@ fold_64:
244 pextrd $0x01, %xmm1, %eax 243 pextrd $0x01, %xmm1, %eax
245 244
246 ret 245 ret
246ENDPROC(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index cf1a7ec4cc3a..dbc4339b5417 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -1,9 +1,10 @@
1/* 1/*
2 * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) 2 * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
3 * 3 *
4 * The white paper on CRC32C calculations with PCLMULQDQ instruction can be 4 * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
5 * downloaded from: 5 * downloaded from:
6 * http://download.intel.com/design/intarch/papers/323405.pdf 6 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
7 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
7 * 8 *
8 * Copyright (C) 2012 Intel Corporation. 9 * Copyright (C) 2012 Intel Corporation.
9 * 10 *
@@ -42,6 +43,7 @@
42 * SOFTWARE. 43 * SOFTWARE.
43 */ 44 */
44 45
46#include <asm/inst.h>
45#include <linux/linkage.h> 47#include <linux/linkage.h>
46 48
47## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction 49## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -225,10 +227,10 @@ LABEL crc_ %i
225 movdqa (bufp), %xmm0 # 2 consts: K1:K2 227 movdqa (bufp), %xmm0 # 2 consts: K1:K2
226 228
227 movq crc_init, %xmm1 # CRC for block 1 229 movq crc_init, %xmm1 # CRC for block 1
228 pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2 230 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
229 231
230 movq crc1, %xmm2 # CRC for block 2 232 movq crc1, %xmm2 # CRC for block 2
231 pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 233 PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
232 234
233 pxor %xmm2,%xmm1 235 pxor %xmm2,%xmm1
234 movq %xmm1, %rax 236 movq %xmm1, %rax
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
index f7b6ea2ddfdb..02ee2308fb38 100644
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * Shared glue code for 128bit block ciphers, AVX assembler macros 2 * Shared glue code for 128bit block ciphers, AVX assembler macros
3 * 3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -89,3 +89,62 @@
89 vpxor (6*16)(src), x6, x6; \ 89 vpxor (6*16)(src), x6, x6; \
90 vpxor (7*16)(src), x7, x7; \ 90 vpxor (7*16)(src), x7, x7; \
91 store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); 91 store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
92
93#define gf128mul_x_ble(iv, mask, tmp) \
94 vpsrad $31, iv, tmp; \
95 vpaddq iv, iv, iv; \
96 vpshufd $0x13, tmp, tmp; \
97 vpand mask, tmp, tmp; \
98 vpxor tmp, iv, iv;
99
100#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
101 t1, xts_gf128mul_and_shl1_mask) \
102 vmovdqa xts_gf128mul_and_shl1_mask, t0; \
103 \
104 /* load IV */ \
105 vmovdqu (iv), tiv; \
106 vpxor (0*16)(src), tiv, x0; \
107 vmovdqu tiv, (0*16)(dst); \
108 \
109 /* construct and store IVs, also xor with source */ \
110 gf128mul_x_ble(tiv, t0, t1); \
111 vpxor (1*16)(src), tiv, x1; \
112 vmovdqu tiv, (1*16)(dst); \
113 \
114 gf128mul_x_ble(tiv, t0, t1); \
115 vpxor (2*16)(src), tiv, x2; \
116 vmovdqu tiv, (2*16)(dst); \
117 \
118 gf128mul_x_ble(tiv, t0, t1); \
119 vpxor (3*16)(src), tiv, x3; \
120 vmovdqu tiv, (3*16)(dst); \
121 \
122 gf128mul_x_ble(tiv, t0, t1); \
123 vpxor (4*16)(src), tiv, x4; \
124 vmovdqu tiv, (4*16)(dst); \
125 \
126 gf128mul_x_ble(tiv, t0, t1); \
127 vpxor (5*16)(src), tiv, x5; \
128 vmovdqu tiv, (5*16)(dst); \
129 \
130 gf128mul_x_ble(tiv, t0, t1); \
131 vpxor (6*16)(src), tiv, x6; \
132 vmovdqu tiv, (6*16)(dst); \
133 \
134 gf128mul_x_ble(tiv, t0, t1); \
135 vpxor (7*16)(src), tiv, x7; \
136 vmovdqu tiv, (7*16)(dst); \
137 \
138 gf128mul_x_ble(tiv, t0, t1); \
139 vmovdqu tiv, (iv);
140
141#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
142 vpxor (0*16)(dst), x0, x0; \
143 vpxor (1*16)(dst), x1, x1; \
144 vpxor (2*16)(dst), x2, x2; \
145 vpxor (3*16)(dst), x3, x3; \
146 vpxor (4*16)(dst), x4, x4; \
147 vpxor (5*16)(dst), x5, x5; \
148 vpxor (6*16)(dst), x6, x6; \
149 vpxor (7*16)(dst), x7, x7; \
150 store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S
new file mode 100644
index 000000000000..a53ac11dd385
--- /dev/null
+++ b/arch/x86/crypto/glue_helper-asm-avx2.S
@@ -0,0 +1,180 @@
1/*
2 * Shared glue code for 128bit block ciphers, AVX2 assembler macros
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
14 vmovdqu (0*32)(src), x0; \
15 vmovdqu (1*32)(src), x1; \
16 vmovdqu (2*32)(src), x2; \
17 vmovdqu (3*32)(src), x3; \
18 vmovdqu (4*32)(src), x4; \
19 vmovdqu (5*32)(src), x5; \
20 vmovdqu (6*32)(src), x6; \
21 vmovdqu (7*32)(src), x7;
22
23#define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
24 vmovdqu x0, (0*32)(dst); \
25 vmovdqu x1, (1*32)(dst); \
26 vmovdqu x2, (2*32)(dst); \
27 vmovdqu x3, (3*32)(dst); \
28 vmovdqu x4, (4*32)(dst); \
29 vmovdqu x5, (5*32)(dst); \
30 vmovdqu x6, (6*32)(dst); \
31 vmovdqu x7, (7*32)(dst);
32
33#define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \
34 vpxor t0, t0, t0; \
35 vinserti128 $1, (src), t0, t0; \
36 vpxor t0, x0, x0; \
37 vpxor (0*32+16)(src), x1, x1; \
38 vpxor (1*32+16)(src), x2, x2; \
39 vpxor (2*32+16)(src), x3, x3; \
40 vpxor (3*32+16)(src), x4, x4; \
41 vpxor (4*32+16)(src), x5, x5; \
42 vpxor (5*32+16)(src), x6, x6; \
43 vpxor (6*32+16)(src), x7, x7; \
44 store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
45
46#define inc_le128(x, minus_one, tmp) \
47 vpcmpeqq minus_one, x, tmp; \
48 vpsubq minus_one, x, x; \
49 vpslldq $8, tmp, tmp; \
50 vpsubq tmp, x, x;
51
52#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
53 vpcmpeqq minus_one, x, tmp1; \
54 vpcmpeqq minus_two, x, tmp2; \
55 vpsubq minus_two, x, x; \
56 vpor tmp2, tmp1, tmp1; \
57 vpslldq $8, tmp1, tmp1; \
58 vpsubq tmp1, x, x;
59
60#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \
61 t1x, t2, t2x, t3, t3x, t4, t5) \
62 vpcmpeqd t0, t0, t0; \
63 vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \
64 vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\
65 \
66 /* load IV and byteswap */ \
67 vmovdqu (iv), t2x; \
68 vmovdqa t2x, t3x; \
69 inc_le128(t2x, t0x, t1x); \
70 vbroadcasti128 bswap, t1; \
71 vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \
72 vpshufb t1, t2, x0; \
73 \
74 /* construct IVs */ \
75 add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \
76 vpshufb t1, t2, x1; \
77 add2_le128(t2, t0, t4, t3, t5); \
78 vpshufb t1, t2, x2; \
79 add2_le128(t2, t0, t4, t3, t5); \
80 vpshufb t1, t2, x3; \
81 add2_le128(t2, t0, t4, t3, t5); \
82 vpshufb t1, t2, x4; \
83 add2_le128(t2, t0, t4, t3, t5); \
84 vpshufb t1, t2, x5; \
85 add2_le128(t2, t0, t4, t3, t5); \
86 vpshufb t1, t2, x6; \
87 add2_le128(t2, t0, t4, t3, t5); \
88 vpshufb t1, t2, x7; \
89 vextracti128 $1, t2, t2x; \
90 inc_le128(t2x, t0x, t3x); \
91 vmovdqu t2x, (iv);
92
93#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
94 vpxor (0*32)(src), x0, x0; \
95 vpxor (1*32)(src), x1, x1; \
96 vpxor (2*32)(src), x2, x2; \
97 vpxor (3*32)(src), x3, x3; \
98 vpxor (4*32)(src), x4, x4; \
99 vpxor (5*32)(src), x5, x5; \
100 vpxor (6*32)(src), x6, x6; \
101 vpxor (7*32)(src), x7, x7; \
102 store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
103
104#define gf128mul_x_ble(iv, mask, tmp) \
105 vpsrad $31, iv, tmp; \
106 vpaddq iv, iv, iv; \
107 vpshufd $0x13, tmp, tmp; \
108 vpand mask, tmp, tmp; \
109 vpxor tmp, iv, iv;
110
111#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
112 vpsrad $31, iv, tmp0; \
113 vpaddq iv, iv, tmp1; \
114 vpsllq $2, iv, iv; \
115 vpshufd $0x13, tmp0, tmp0; \
116 vpsrad $31, tmp1, tmp1; \
117 vpand mask2, tmp0, tmp0; \
118 vpshufd $0x13, tmp1, tmp1; \
119 vpxor tmp0, iv, iv; \
120 vpand mask1, tmp1, tmp1; \
121 vpxor tmp1, iv, iv;
122
123#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \
124 tivx, t0, t0x, t1, t1x, t2, t2x, t3, \
125 xts_gf128mul_and_shl1_mask_0, \
126 xts_gf128mul_and_shl1_mask_1) \
127 vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \
128 \
129 /* load IV and construct second IV */ \
130 vmovdqu (iv), tivx; \
131 vmovdqa tivx, t0x; \
132 gf128mul_x_ble(tivx, t1x, t2x); \
133 vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \
134 vinserti128 $1, tivx, t0, tiv; \
135 vpxor (0*32)(src), tiv, x0; \
136 vmovdqu tiv, (0*32)(dst); \
137 \
138 /* construct and store IVs, also xor with source */ \
139 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
140 vpxor (1*32)(src), tiv, x1; \
141 vmovdqu tiv, (1*32)(dst); \
142 \
143 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
144 vpxor (2*32)(src), tiv, x2; \
145 vmovdqu tiv, (2*32)(dst); \
146 \
147 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
148 vpxor (3*32)(src), tiv, x3; \
149 vmovdqu tiv, (3*32)(dst); \
150 \
151 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
152 vpxor (4*32)(src), tiv, x4; \
153 vmovdqu tiv, (4*32)(dst); \
154 \
155 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
156 vpxor (5*32)(src), tiv, x5; \
157 vmovdqu tiv, (5*32)(dst); \
158 \
159 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
160 vpxor (6*32)(src), tiv, x6; \
161 vmovdqu tiv, (6*32)(dst); \
162 \
163 gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
164 vpxor (7*32)(src), tiv, x7; \
165 vmovdqu tiv, (7*32)(dst); \
166 \
167 vextracti128 $1, tiv, tivx; \
168 gf128mul_x_ble(tivx, t1x, t2x); \
169 vmovdqu tivx, (iv);
170
171#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
172 vpxor (0*32)(dst), x0, x0; \
173 vpxor (1*32)(dst), x1, x1; \
174 vpxor (2*32)(dst), x2, x2; \
175 vpxor (3*32)(dst), x3, x3; \
176 vpxor (4*32)(dst), x4, x4; \
177 vpxor (5*32)(dst), x5, x5; \
178 vpxor (6*32)(dst), x6, x6; \
179 vpxor (7*32)(dst), x7, x7; \
180 store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 22ce4f683e55..432f1d76ceb8 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Shared glue code for 128bit block ciphers 2 * Shared glue code for 128bit block ciphers
3 * 3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: 6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> 7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -304,4 +304,99 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
304} 304}
305EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); 305EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
306 306
307static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
308 void *ctx,
309 struct blkcipher_desc *desc,
310 struct blkcipher_walk *walk)
311{
312 const unsigned int bsize = 128 / 8;
313 unsigned int nbytes = walk->nbytes;
314 u128 *src = (u128 *)walk->src.virt.addr;
315 u128 *dst = (u128 *)walk->dst.virt.addr;
316 unsigned int num_blocks, func_bytes;
317 unsigned int i;
318
319 /* Process multi-block batch */
320 for (i = 0; i < gctx->num_funcs; i++) {
321 num_blocks = gctx->funcs[i].num_blocks;
322 func_bytes = bsize * num_blocks;
323
324 if (nbytes >= func_bytes) {
325 do {
326 gctx->funcs[i].fn_u.xts(ctx, dst, src,
327 (le128 *)walk->iv);
328
329 src += num_blocks;
330 dst += num_blocks;
331 nbytes -= func_bytes;
332 } while (nbytes >= func_bytes);
333
334 if (nbytes < bsize)
335 goto done;
336 }
337 }
338
339done:
340 return nbytes;
341}
342
343/* for implementations implementing faster XTS IV generator */
344int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
345 struct blkcipher_desc *desc, struct scatterlist *dst,
346 struct scatterlist *src, unsigned int nbytes,
347 void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
348 void *tweak_ctx, void *crypt_ctx)
349{
350 const unsigned int bsize = 128 / 8;
351 bool fpu_enabled = false;
352 struct blkcipher_walk walk;
353 int err;
354
355 blkcipher_walk_init(&walk, dst, src, nbytes);
356
357 err = blkcipher_walk_virt(desc, &walk);
358 nbytes = walk.nbytes;
359 if (!nbytes)
360 return err;
361
362 /* set minimum length to bsize, for tweak_fn */
363 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
364 desc, fpu_enabled,
365 nbytes < bsize ? bsize : nbytes);
366
367 /* calculate first value of T */
368 tweak_fn(tweak_ctx, walk.iv, walk.iv);
369
370 while (nbytes) {
371 nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
372
373 err = blkcipher_walk_done(desc, &walk, nbytes);
374 nbytes = walk.nbytes;
375 }
376
377 glue_fpu_end(fpu_enabled);
378
379 return err;
380}
381EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
382
383void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
384 common_glue_func_t fn)
385{
386 le128 ivblk = *iv;
387
388 /* generate next IV */
389 le128_gf128mul_x_ble(iv, &ivblk);
390
391 /* CC <- T xor C */
392 u128_xor(dst, src, (u128 *)&ivblk);
393
394 /* PP <- D(Key2,CC) */
395 fn(ctx, (u8 *)dst, (u8 *)dst);
396
397 /* P <- T xor PP */
398 u128_xor(dst, dst, (u128 *)&ivblk);
399}
400EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
401
307MODULE_LICENSE("GPL"); 402MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 43c938612b74..2f202f49872b 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -4,8 +4,7 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by 7 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
9 * 8 *
10 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -34,6 +33,8 @@
34 33
35.Lbswap128_mask: 34.Lbswap128_mask:
36 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 35 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
36.Lxts_gf128mul_and_shl1_mask:
37 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
37 38
38.text 39.text
39 40
@@ -739,3 +740,43 @@ ENTRY(serpent_ctr_8way_avx)
739 740
740 ret; 741 ret;
741ENDPROC(serpent_ctr_8way_avx) 742ENDPROC(serpent_ctr_8way_avx)
743
744ENTRY(serpent_xts_enc_8way_avx)
745 /* input:
746 * %rdi: ctx, CTX
747 * %rsi: dst
748 * %rdx: src
749 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
750 */
751
752 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
753 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
754 RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
755
756 call __serpent_enc_blk8_avx;
757
758 /* dst <= regs xor IVs(in dst) */
759 store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
760
761 ret;
762ENDPROC(serpent_xts_enc_8way_avx)
763
764ENTRY(serpent_xts_dec_8way_avx)
765 /* input:
766 * %rdi: ctx, CTX
767 * %rsi: dst
768 * %rdx: src
769 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
770 */
771
772 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
773 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
774 RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
775
776 call __serpent_dec_blk8_avx;
777
778 /* dst <= regs xor IVs(in dst) */
779 store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
780
781 ret;
782ENDPROC(serpent_xts_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
new file mode 100644
index 000000000000..b222085cccac
--- /dev/null
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -0,0 +1,800 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Serpent
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Based on AVX assembler implementation of Serpent by:
7 * Copyright © 2012 Johannes Goetzfried
8 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 */
16
17#include <linux/linkage.h>
18#include "glue_helper-asm-avx2.S"
19
20.file "serpent-avx2-asm_64.S"
21
22.data
23.align 16
24
25.Lbswap128_mask:
26 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
27.Lxts_gf128mul_and_shl1_mask_0:
28 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
29.Lxts_gf128mul_and_shl1_mask_1:
30 .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
31
32.text
33
34#define CTX %rdi
35
36#define RNOT %ymm0
37#define tp %ymm1
38
39#define RA1 %ymm2
40#define RA2 %ymm3
41#define RB1 %ymm4
42#define RB2 %ymm5
43#define RC1 %ymm6
44#define RC2 %ymm7
45#define RD1 %ymm8
46#define RD2 %ymm9
47#define RE1 %ymm10
48#define RE2 %ymm11
49
50#define RK0 %ymm12
51#define RK1 %ymm13
52#define RK2 %ymm14
53#define RK3 %ymm15
54
55#define RK0x %xmm12
56#define RK1x %xmm13
57#define RK2x %xmm14
58#define RK3x %xmm15
59
60#define S0_1(x0, x1, x2, x3, x4) \
61 vpor x0, x3, tp; \
62 vpxor x3, x0, x0; \
63 vpxor x2, x3, x4; \
64 vpxor RNOT, x4, x4; \
65 vpxor x1, tp, x3; \
66 vpand x0, x1, x1; \
67 vpxor x4, x1, x1; \
68 vpxor x0, x2, x2;
69#define S0_2(x0, x1, x2, x3, x4) \
70 vpxor x3, x0, x0; \
71 vpor x0, x4, x4; \
72 vpxor x2, x0, x0; \
73 vpand x1, x2, x2; \
74 vpxor x2, x3, x3; \
75 vpxor RNOT, x1, x1; \
76 vpxor x4, x2, x2; \
77 vpxor x2, x1, x1;
78
79#define S1_1(x0, x1, x2, x3, x4) \
80 vpxor x0, x1, tp; \
81 vpxor x3, x0, x0; \
82 vpxor RNOT, x3, x3; \
83 vpand tp, x1, x4; \
84 vpor tp, x0, x0; \
85 vpxor x2, x3, x3; \
86 vpxor x3, x0, x0; \
87 vpxor x3, tp, x1;
88#define S1_2(x0, x1, x2, x3, x4) \
89 vpxor x4, x3, x3; \
90 vpor x4, x1, x1; \
91 vpxor x2, x4, x4; \
92 vpand x0, x2, x2; \
93 vpxor x1, x2, x2; \
94 vpor x0, x1, x1; \
95 vpxor RNOT, x0, x0; \
96 vpxor x2, x0, x0; \
97 vpxor x1, x4, x4;
98
99#define S2_1(x0, x1, x2, x3, x4) \
100 vpxor RNOT, x3, x3; \
101 vpxor x0, x1, x1; \
102 vpand x2, x0, tp; \
103 vpxor x3, tp, tp; \
104 vpor x0, x3, x3; \
105 vpxor x1, x2, x2; \
106 vpxor x1, x3, x3; \
107 vpand tp, x1, x1;
108#define S2_2(x0, x1, x2, x3, x4) \
109 vpxor x2, tp, tp; \
110 vpand x3, x2, x2; \
111 vpor x1, x3, x3; \
112 vpxor RNOT, tp, tp; \
113 vpxor tp, x3, x3; \
114 vpxor tp, x0, x4; \
115 vpxor x2, tp, x0; \
116 vpor x2, x1, x1;
117
118#define S3_1(x0, x1, x2, x3, x4) \
119 vpxor x3, x1, tp; \
120 vpor x0, x3, x3; \
121 vpand x0, x1, x4; \
122 vpxor x2, x0, x0; \
123 vpxor tp, x2, x2; \
124 vpand x3, tp, x1; \
125 vpxor x3, x2, x2; \
126 vpor x4, x0, x0; \
127 vpxor x3, x4, x4;
128#define S3_2(x0, x1, x2, x3, x4) \
129 vpxor x0, x1, x1; \
130 vpand x3, x0, x0; \
131 vpand x4, x3, x3; \
132 vpxor x2, x3, x3; \
133 vpor x1, x4, x4; \
134 vpand x1, x2, x2; \
135 vpxor x3, x4, x4; \
136 vpxor x3, x0, x0; \
137 vpxor x2, x3, x3;
138
139#define S4_1(x0, x1, x2, x3, x4) \
140 vpand x0, x3, tp; \
141 vpxor x3, x0, x0; \
142 vpxor x2, tp, tp; \
143 vpor x3, x2, x2; \
144 vpxor x1, x0, x0; \
145 vpxor tp, x3, x4; \
146 vpor x0, x2, x2; \
147 vpxor x1, x2, x2;
148#define S4_2(x0, x1, x2, x3, x4) \
149 vpand x0, x1, x1; \
150 vpxor x4, x1, x1; \
151 vpand x2, x4, x4; \
152 vpxor tp, x2, x2; \
153 vpxor x0, x4, x4; \
154 vpor x1, tp, x3; \
155 vpxor RNOT, x1, x1; \
156 vpxor x0, x3, x3;
157
158#define S5_1(x0, x1, x2, x3, x4) \
159 vpor x0, x1, tp; \
160 vpxor tp, x2, x2; \
161 vpxor RNOT, x3, x3; \
162 vpxor x0, x1, x4; \
163 vpxor x2, x0, x0; \
164 vpand x4, tp, x1; \
165 vpor x3, x4, x4; \
166 vpxor x0, x4, x4;
167#define S5_2(x0, x1, x2, x3, x4) \
168 vpand x3, x0, x0; \
169 vpxor x3, x1, x1; \
170 vpxor x2, x3, x3; \
171 vpxor x1, x0, x0; \
172 vpand x4, x2, x2; \
173 vpxor x2, x1, x1; \
174 vpand x0, x2, x2; \
175 vpxor x2, x3, x3;
176
177#define S6_1(x0, x1, x2, x3, x4) \
178 vpxor x0, x3, x3; \
179 vpxor x2, x1, tp; \
180 vpxor x0, x2, x2; \
181 vpand x3, x0, x0; \
182 vpor x3, tp, tp; \
183 vpxor RNOT, x1, x4; \
184 vpxor tp, x0, x0; \
185 vpxor x2, tp, x1;
186#define S6_2(x0, x1, x2, x3, x4) \
187 vpxor x4, x3, x3; \
188 vpxor x0, x4, x4; \
189 vpand x0, x2, x2; \
190 vpxor x1, x4, x4; \
191 vpxor x3, x2, x2; \
192 vpand x1, x3, x3; \
193 vpxor x0, x3, x3; \
194 vpxor x2, x1, x1;
195
196#define S7_1(x0, x1, x2, x3, x4) \
197 vpxor RNOT, x1, tp; \
198 vpxor RNOT, x0, x0; \
199 vpand x2, tp, x1; \
200 vpxor x3, x1, x1; \
201 vpor tp, x3, x3; \
202 vpxor x2, tp, x4; \
203 vpxor x3, x2, x2; \
204 vpxor x0, x3, x3; \
205 vpor x1, x0, x0;
206#define S7_2(x0, x1, x2, x3, x4) \
207 vpand x0, x2, x2; \
208 vpxor x4, x0, x0; \
209 vpxor x3, x4, x4; \
210 vpand x0, x3, x3; \
211 vpxor x1, x4, x4; \
212 vpxor x4, x2, x2; \
213 vpxor x1, x3, x3; \
214 vpor x0, x4, x4; \
215 vpxor x1, x4, x4;
216
217#define SI0_1(x0, x1, x2, x3, x4) \
218 vpxor x0, x1, x1; \
219 vpor x1, x3, tp; \
220 vpxor x1, x3, x4; \
221 vpxor RNOT, x0, x0; \
222 vpxor tp, x2, x2; \
223 vpxor x0, tp, x3; \
224 vpand x1, x0, x0; \
225 vpxor x2, x0, x0;
226#define SI0_2(x0, x1, x2, x3, x4) \
227 vpand x3, x2, x2; \
228 vpxor x4, x3, x3; \
229 vpxor x3, x2, x2; \
230 vpxor x3, x1, x1; \
231 vpand x0, x3, x3; \
232 vpxor x0, x1, x1; \
233 vpxor x2, x0, x0; \
234 vpxor x3, x4, x4;
235
236#define SI1_1(x0, x1, x2, x3, x4) \
237 vpxor x3, x1, x1; \
238 vpxor x2, x0, tp; \
239 vpxor RNOT, x2, x2; \
240 vpor x1, x0, x4; \
241 vpxor x3, x4, x4; \
242 vpand x1, x3, x3; \
243 vpxor x2, x1, x1; \
244 vpand x4, x2, x2;
245#define SI1_2(x0, x1, x2, x3, x4) \
246 vpxor x1, x4, x4; \
247 vpor x3, x1, x1; \
248 vpxor tp, x3, x3; \
249 vpxor tp, x2, x2; \
250 vpor x4, tp, x0; \
251 vpxor x4, x2, x2; \
252 vpxor x0, x1, x1; \
253 vpxor x1, x4, x4;
254
255#define SI2_1(x0, x1, x2, x3, x4) \
256 vpxor x1, x2, x2; \
257 vpxor RNOT, x3, tp; \
258 vpor x2, tp, tp; \
259 vpxor x3, x2, x2; \
260 vpxor x0, x3, x4; \
261 vpxor x1, tp, x3; \
262 vpor x2, x1, x1; \
263 vpxor x0, x2, x2;
264#define SI2_2(x0, x1, x2, x3, x4) \
265 vpxor x4, x1, x1; \
266 vpor x3, x4, x4; \
267 vpxor x3, x2, x2; \
268 vpxor x2, x4, x4; \
269 vpand x1, x2, x2; \
270 vpxor x3, x2, x2; \
271 vpxor x4, x3, x3; \
272 vpxor x0, x4, x4;
273
274#define SI3_1(x0, x1, x2, x3, x4) \
275 vpxor x1, x2, x2; \
276 vpand x2, x1, tp; \
277 vpxor x0, tp, tp; \
278 vpor x1, x0, x0; \
279 vpxor x3, x1, x4; \
280 vpxor x3, x0, x0; \
281 vpor tp, x3, x3; \
282 vpxor x2, tp, x1;
283#define SI3_2(x0, x1, x2, x3, x4) \
284 vpxor x3, x1, x1; \
285 vpxor x2, x0, x0; \
286 vpxor x3, x2, x2; \
287 vpand x1, x3, x3; \
288 vpxor x0, x1, x1; \
289 vpand x2, x0, x0; \
290 vpxor x3, x4, x4; \
291 vpxor x0, x3, x3; \
292 vpxor x1, x0, x0;
293
294#define SI4_1(x0, x1, x2, x3, x4) \
295 vpxor x3, x2, x2; \
296 vpand x1, x0, tp; \
297 vpxor x2, tp, tp; \
298 vpor x3, x2, x2; \
299 vpxor RNOT, x0, x4; \
300 vpxor tp, x1, x1; \
301 vpxor x2, tp, x0; \
302 vpand x4, x2, x2;
303#define SI4_2(x0, x1, x2, x3, x4) \
304 vpxor x0, x2, x2; \
305 vpor x4, x0, x0; \
306 vpxor x3, x0, x0; \
307 vpand x2, x3, x3; \
308 vpxor x3, x4, x4; \
309 vpxor x1, x3, x3; \
310 vpand x0, x1, x1; \
311 vpxor x1, x4, x4; \
312 vpxor x3, x0, x0;
313
314#define SI5_1(x0, x1, x2, x3, x4) \
315 vpor x2, x1, tp; \
316 vpxor x1, x2, x2; \
317 vpxor x3, tp, tp; \
318 vpand x1, x3, x3; \
319 vpxor x3, x2, x2; \
320 vpor x0, x3, x3; \
321 vpxor RNOT, x0, x0; \
322 vpxor x2, x3, x3; \
323 vpor x0, x2, x2;
324#define SI5_2(x0, x1, x2, x3, x4) \
325 vpxor tp, x1, x4; \
326 vpxor x4, x2, x2; \
327 vpand x0, x4, x4; \
328 vpxor tp, x0, x0; \
329 vpxor x3, tp, x1; \
330 vpand x2, x0, x0; \
331 vpxor x3, x2, x2; \
332 vpxor x2, x0, x0; \
333 vpxor x4, x2, x2; \
334 vpxor x3, x4, x4;
335
336#define SI6_1(x0, x1, x2, x3, x4) \
337 vpxor x2, x0, x0; \
338 vpand x3, x0, tp; \
339 vpxor x3, x2, x2; \
340 vpxor x2, tp, tp; \
341 vpxor x1, x3, x3; \
342 vpor x0, x2, x2; \
343 vpxor x3, x2, x2; \
344 vpand tp, x3, x3;
345#define SI6_2(x0, x1, x2, x3, x4) \
346 vpxor RNOT, tp, tp; \
347 vpxor x1, x3, x3; \
348 vpand x2, x1, x1; \
349 vpxor tp, x0, x4; \
350 vpxor x4, x3, x3; \
351 vpxor x2, x4, x4; \
352 vpxor x1, tp, x0; \
353 vpxor x0, x2, x2;
354
355#define SI7_1(x0, x1, x2, x3, x4) \
356 vpand x0, x3, tp; \
357 vpxor x2, x0, x0; \
358 vpor x3, x2, x2; \
359 vpxor x1, x3, x4; \
360 vpxor RNOT, x0, x0; \
361 vpor tp, x1, x1; \
362 vpxor x0, x4, x4; \
363 vpand x2, x0, x0; \
364 vpxor x1, x0, x0;
365#define SI7_2(x0, x1, x2, x3, x4) \
366 vpand x2, x1, x1; \
367 vpxor x2, tp, x3; \
368 vpxor x3, x4, x4; \
369 vpand x3, x2, x2; \
370 vpor x0, x3, x3; \
371 vpxor x4, x1, x1; \
372 vpxor x4, x3, x3; \
373 vpand x0, x4, x4; \
374 vpxor x2, x4, x4;
375
376#define get_key(i,j,t) \
377 vpbroadcastd (4*(i)+(j))*4(CTX), t;
378
379#define K2(x0, x1, x2, x3, x4, i) \
380 get_key(i, 0, RK0); \
381 get_key(i, 1, RK1); \
382 get_key(i, 2, RK2); \
383 get_key(i, 3, RK3); \
384 vpxor RK0, x0 ## 1, x0 ## 1; \
385 vpxor RK1, x1 ## 1, x1 ## 1; \
386 vpxor RK2, x2 ## 1, x2 ## 1; \
387 vpxor RK3, x3 ## 1, x3 ## 1; \
388 vpxor RK0, x0 ## 2, x0 ## 2; \
389 vpxor RK1, x1 ## 2, x1 ## 2; \
390 vpxor RK2, x2 ## 2, x2 ## 2; \
391 vpxor RK3, x3 ## 2, x3 ## 2;
392
393#define LK2(x0, x1, x2, x3, x4, i) \
394 vpslld $13, x0 ## 1, x4 ## 1; \
395 vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \
396 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
397 vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
398 vpslld $3, x2 ## 1, x4 ## 1; \
399 vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \
400 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
401 vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
402 vpslld $13, x0 ## 2, x4 ## 2; \
403 vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \
404 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
405 vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
406 vpslld $3, x2 ## 2, x4 ## 2; \
407 vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \
408 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
409 vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
410 vpslld $1, x1 ## 1, x4 ## 1; \
411 vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \
412 vpor x4 ## 1, x1 ## 1, x1 ## 1; \
413 vpslld $3, x0 ## 1, x4 ## 1; \
414 vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
415 vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
416 get_key(i, 1, RK1); \
417 vpslld $1, x1 ## 2, x4 ## 2; \
418 vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \
419 vpor x4 ## 2, x1 ## 2, x1 ## 2; \
420 vpslld $3, x0 ## 2, x4 ## 2; \
421 vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
422 vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
423 get_key(i, 3, RK3); \
424 vpslld $7, x3 ## 1, x4 ## 1; \
425 vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \
426 vpor x4 ## 1, x3 ## 1, x3 ## 1; \
427 vpslld $7, x1 ## 1, x4 ## 1; \
428 vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
429 vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
430 vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
431 vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
432 get_key(i, 0, RK0); \
433 vpslld $7, x3 ## 2, x4 ## 2; \
434 vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \
435 vpor x4 ## 2, x3 ## 2, x3 ## 2; \
436 vpslld $7, x1 ## 2, x4 ## 2; \
437 vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
438 vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
439 vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
440 vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
441 get_key(i, 2, RK2); \
442 vpxor RK1, x1 ## 1, x1 ## 1; \
443 vpxor RK3, x3 ## 1, x3 ## 1; \
444 vpslld $5, x0 ## 1, x4 ## 1; \
445 vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \
446 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
447 vpslld $22, x2 ## 1, x4 ## 1; \
448 vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \
449 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
450 vpxor RK0, x0 ## 1, x0 ## 1; \
451 vpxor RK2, x2 ## 1, x2 ## 1; \
452 vpxor RK1, x1 ## 2, x1 ## 2; \
453 vpxor RK3, x3 ## 2, x3 ## 2; \
454 vpslld $5, x0 ## 2, x4 ## 2; \
455 vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \
456 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
457 vpslld $22, x2 ## 2, x4 ## 2; \
458 vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \
459 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
460 vpxor RK0, x0 ## 2, x0 ## 2; \
461 vpxor RK2, x2 ## 2, x2 ## 2;
462
463#define KL2(x0, x1, x2, x3, x4, i) \
464 vpxor RK0, x0 ## 1, x0 ## 1; \
465 vpxor RK2, x2 ## 1, x2 ## 1; \
466 vpsrld $5, x0 ## 1, x4 ## 1; \
467 vpslld $(32 - 5), x0 ## 1, x0 ## 1; \
468 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
469 vpxor RK3, x3 ## 1, x3 ## 1; \
470 vpxor RK1, x1 ## 1, x1 ## 1; \
471 vpsrld $22, x2 ## 1, x4 ## 1; \
472 vpslld $(32 - 22), x2 ## 1, x2 ## 1; \
473 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
474 vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
475 vpxor RK0, x0 ## 2, x0 ## 2; \
476 vpxor RK2, x2 ## 2, x2 ## 2; \
477 vpsrld $5, x0 ## 2, x4 ## 2; \
478 vpslld $(32 - 5), x0 ## 2, x0 ## 2; \
479 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
480 vpxor RK3, x3 ## 2, x3 ## 2; \
481 vpxor RK1, x1 ## 2, x1 ## 2; \
482 vpsrld $22, x2 ## 2, x4 ## 2; \
483 vpslld $(32 - 22), x2 ## 2, x2 ## 2; \
484 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
485 vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
486 vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
487 vpslld $7, x1 ## 1, x4 ## 1; \
488 vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
489 vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
490 vpsrld $1, x1 ## 1, x4 ## 1; \
491 vpslld $(32 - 1), x1 ## 1, x1 ## 1; \
492 vpor x4 ## 1, x1 ## 1, x1 ## 1; \
493 vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
494 vpslld $7, x1 ## 2, x4 ## 2; \
495 vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
496 vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
497 vpsrld $1, x1 ## 2, x4 ## 2; \
498 vpslld $(32 - 1), x1 ## 2, x1 ## 2; \
499 vpor x4 ## 2, x1 ## 2, x1 ## 2; \
500 vpsrld $7, x3 ## 1, x4 ## 1; \
501 vpslld $(32 - 7), x3 ## 1, x3 ## 1; \
502 vpor x4 ## 1, x3 ## 1, x3 ## 1; \
503 vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
504 vpslld $3, x0 ## 1, x4 ## 1; \
505 vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
506 vpsrld $7, x3 ## 2, x4 ## 2; \
507 vpslld $(32 - 7), x3 ## 2, x3 ## 2; \
508 vpor x4 ## 2, x3 ## 2, x3 ## 2; \
509 vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
510 vpslld $3, x0 ## 2, x4 ## 2; \
511 vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
512 vpsrld $13, x0 ## 1, x4 ## 1; \
513 vpslld $(32 - 13), x0 ## 1, x0 ## 1; \
514 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
515 vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
516 vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
517 vpsrld $3, x2 ## 1, x4 ## 1; \
518 vpslld $(32 - 3), x2 ## 1, x2 ## 1; \
519 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
520 vpsrld $13, x0 ## 2, x4 ## 2; \
521 vpslld $(32 - 13), x0 ## 2, x0 ## 2; \
522 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
523 vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
524 vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
525 vpsrld $3, x2 ## 2, x4 ## 2; \
526 vpslld $(32 - 3), x2 ## 2, x2 ## 2; \
527 vpor x4 ## 2, x2 ## 2, x2 ## 2;
528
529#define S(SBOX, x0, x1, x2, x3, x4) \
530 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
531 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
532 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
533 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
534
535#define SP(SBOX, x0, x1, x2, x3, x4, i) \
536 get_key(i, 0, RK0); \
537 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
538 get_key(i, 2, RK2); \
539 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
540 get_key(i, 3, RK3); \
541 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
542 get_key(i, 1, RK1); \
543 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
544
545#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
546 vpunpckldq x1, x0, t0; \
547 vpunpckhdq x1, x0, t2; \
548 vpunpckldq x3, x2, t1; \
549 vpunpckhdq x3, x2, x3; \
550 \
551 vpunpcklqdq t1, t0, x0; \
552 vpunpckhqdq t1, t0, x1; \
553 vpunpcklqdq x3, t2, x2; \
554 vpunpckhqdq x3, t2, x3;
555
556#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
557 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
558
559#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
560 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
561
562.align 8
563__serpent_enc_blk16:
564 /* input:
565 * %rdi: ctx, CTX
566 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
567 * output:
568 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
569 */
570
571 vpcmpeqd RNOT, RNOT, RNOT;
572
573 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
574 read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
575
576 K2(RA, RB, RC, RD, RE, 0);
577 S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
578 S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
579 S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
580 S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
581 S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
582 S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
583 S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
584 S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
585 S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
586 S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
587 S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
588 S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
589 S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
590 S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
591 S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
592 S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
593 S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
594 S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
595 S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
596 S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
597 S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
598 S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
599 S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
600 S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
601 S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
602 S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
603 S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
604 S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
605 S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
606 S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
607 S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
608 S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
609
610 write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
611 write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
612
613 ret;
614ENDPROC(__serpent_enc_blk16)
615
616.align 8
617__serpent_dec_blk16:
618 /* input:
619 * %rdi: ctx, CTX
620 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
621 * output:
622 * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext
623 */
624
625 vpcmpeqd RNOT, RNOT, RNOT;
626
627 read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
628 read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
629
630 K2(RA, RB, RC, RD, RE, 32);
631 SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
632 SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
633 SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
634 SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
635 SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
636 SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
637 SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
638 SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
639 SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
640 SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
641 SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
642 SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
643 SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
644 SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
645 SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
646 SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
647 SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
648 SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
649 SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
650 SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
651 SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
652 SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
653 SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
654 SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
655 SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
656 SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
657 SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
658 SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
659 SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
660 SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
661 SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
662 S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
663
664 write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
665 write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
666
667 ret;
668ENDPROC(__serpent_dec_blk16)
669
670ENTRY(serpent_ecb_enc_16way)
671 /* input:
672 * %rdi: ctx, CTX
673 * %rsi: dst
674 * %rdx: src
675 */
676
677 vzeroupper;
678
679 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
680
681 call __serpent_enc_blk16;
682
683 store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
684
685 vzeroupper;
686
687 ret;
688ENDPROC(serpent_ecb_enc_16way)
689
690ENTRY(serpent_ecb_dec_16way)
691 /* input:
692 * %rdi: ctx, CTX
693 * %rsi: dst
694 * %rdx: src
695 */
696
697 vzeroupper;
698
699 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
700
701 call __serpent_dec_blk16;
702
703 store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
704
705 vzeroupper;
706
707 ret;
708ENDPROC(serpent_ecb_dec_16way)
709
710ENTRY(serpent_cbc_dec_16way)
711 /* input:
712 * %rdi: ctx, CTX
713 * %rsi: dst
714 * %rdx: src
715 */
716
717 vzeroupper;
718
719 load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
720
721 call __serpent_dec_blk16;
722
723 store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2,
724 RK0);
725
726 vzeroupper;
727
728 ret;
729ENDPROC(serpent_cbc_dec_16way)
730
731ENTRY(serpent_ctr_16way)
732 /* input:
733 * %rdi: ctx, CTX
734 * %rsi: dst (16 blocks)
735 * %rdx: src (16 blocks)
736 * %rcx: iv (little endian, 128bit)
737 */
738
739 vzeroupper;
740
741 load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
742 RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
743 tp);
744
745 call __serpent_enc_blk16;
746
747 store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
748
749 vzeroupper;
750
751 ret;
752ENDPROC(serpent_ctr_16way)
753
754ENTRY(serpent_xts_enc_16way)
755 /* input:
756 * %rdi: ctx, CTX
757 * %rsi: dst (16 blocks)
758 * %rdx: src (16 blocks)
759 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
760 */
761
762 vzeroupper;
763
764 load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
765 RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
766 .Lxts_gf128mul_and_shl1_mask_0,
767 .Lxts_gf128mul_and_shl1_mask_1);
768
769 call __serpent_enc_blk16;
770
771 store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
772
773 vzeroupper;
774
775 ret;
776ENDPROC(serpent_xts_enc_16way)
777
778ENTRY(serpent_xts_dec_16way)
779 /* input:
780 * %rdi: ctx, CTX
781 * %rsi: dst (16 blocks)
782 * %rdx: src (16 blocks)
783 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
784 */
785
786 vzeroupper;
787
788 load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
789 RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
790 .Lxts_gf128mul_and_shl1_mask_0,
791 .Lxts_gf128mul_and_shl1_mask_1);
792
793 call __serpent_dec_blk16;
794
795 store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
796
797 vzeroupper;
798
799 ret;
800ENDPROC(serpent_xts_dec_16way)
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
new file mode 100644
index 000000000000..23aabc6c20a5
--- /dev/null
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -0,0 +1,562 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Serpent
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/lrw.h>
20#include <crypto/xts.h>
21#include <crypto/serpent.h>
22#include <asm/xcr.h>
23#include <asm/xsave.h>
24#include <asm/crypto/serpent-avx.h>
25#include <asm/crypto/ablk_helper.h>
26#include <asm/crypto/glue_helper.h>
27
28#define SERPENT_AVX2_PARALLEL_BLOCKS 16
29
30/* 16-way AVX2 parallel cipher functions */
31asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
32 const u8 *src);
33asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
34 const u8 *src);
35asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
36
37asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
38 le128 *iv);
39asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
40 const u8 *src, le128 *iv);
41asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43
44static const struct common_glue_ctx serpent_enc = {
45 .num_funcs = 3,
46 .fpu_blocks_limit = 8,
47
48 .funcs = { {
49 .num_blocks = 16,
50 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
51 }, {
52 .num_blocks = 8,
53 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
54 }, {
55 .num_blocks = 1,
56 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
57 } }
58};
59
60static const struct common_glue_ctx serpent_ctr = {
61 .num_funcs = 3,
62 .fpu_blocks_limit = 8,
63
64 .funcs = { {
65 .num_blocks = 16,
66 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
67 }, {
68 .num_blocks = 8,
69 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
70 }, {
71 .num_blocks = 1,
72 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
73 } }
74};
75
76static const struct common_glue_ctx serpent_enc_xts = {
77 .num_funcs = 3,
78 .fpu_blocks_limit = 8,
79
80 .funcs = { {
81 .num_blocks = 16,
82 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
83 }, {
84 .num_blocks = 8,
85 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
86 }, {
87 .num_blocks = 1,
88 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
89 } }
90};
91
92static const struct common_glue_ctx serpent_dec = {
93 .num_funcs = 3,
94 .fpu_blocks_limit = 8,
95
96 .funcs = { {
97 .num_blocks = 16,
98 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
99 }, {
100 .num_blocks = 8,
101 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
102 }, {
103 .num_blocks = 1,
104 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
105 } }
106};
107
108static const struct common_glue_ctx serpent_dec_cbc = {
109 .num_funcs = 3,
110 .fpu_blocks_limit = 8,
111
112 .funcs = { {
113 .num_blocks = 16,
114 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
115 }, {
116 .num_blocks = 8,
117 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
118 }, {
119 .num_blocks = 1,
120 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
121 } }
122};
123
124static const struct common_glue_ctx serpent_dec_xts = {
125 .num_funcs = 3,
126 .fpu_blocks_limit = 8,
127
128 .funcs = { {
129 .num_blocks = 16,
130 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
131 }, {
132 .num_blocks = 8,
133 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
134 }, {
135 .num_blocks = 1,
136 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
137 } }
138};
139
140static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
141 struct scatterlist *src, unsigned int nbytes)
142{
143 return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
144}
145
146static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
147 struct scatterlist *src, unsigned int nbytes)
148{
149 return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
150}
151
152static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153 struct scatterlist *src, unsigned int nbytes)
154{
155 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
156 dst, src, nbytes);
157}
158
159static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
160 struct scatterlist *src, unsigned int nbytes)
161{
162 return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
163 nbytes);
164}
165
166static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
170}
171
172static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
173{
174 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
175 return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
176}
177
178static inline void serpent_fpu_end(bool fpu_enabled)
179{
180 glue_fpu_end(fpu_enabled);
181}
182
183struct crypt_priv {
184 struct serpent_ctx *ctx;
185 bool fpu_enabled;
186};
187
188static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
189{
190 const unsigned int bsize = SERPENT_BLOCK_SIZE;
191 struct crypt_priv *ctx = priv;
192 int i;
193
194 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
195
196 if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
197 serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
198 srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
199 nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
200 }
201
202 while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
203 serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
204 srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
205 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
206 }
207
208 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
209 __serpent_encrypt(ctx->ctx, srcdst, srcdst);
210}
211
212static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
213{
214 const unsigned int bsize = SERPENT_BLOCK_SIZE;
215 struct crypt_priv *ctx = priv;
216 int i;
217
218 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
219
220 if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
221 serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
222 srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
223 nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
224 }
225
226 while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
227 serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
228 srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
229 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
230 }
231
232 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
233 __serpent_decrypt(ctx->ctx, srcdst, srcdst);
234}
235
236static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
237 struct scatterlist *src, unsigned int nbytes)
238{
239 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
240 be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
241 struct crypt_priv crypt_ctx = {
242 .ctx = &ctx->serpent_ctx,
243 .fpu_enabled = false,
244 };
245 struct lrw_crypt_req req = {
246 .tbuf = buf,
247 .tbuflen = sizeof(buf),
248
249 .table_ctx = &ctx->lrw_table,
250 .crypt_ctx = &crypt_ctx,
251 .crypt_fn = encrypt_callback,
252 };
253 int ret;
254
255 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
256 ret = lrw_crypt(desc, dst, src, nbytes, &req);
257 serpent_fpu_end(crypt_ctx.fpu_enabled);
258
259 return ret;
260}
261
262static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
263 struct scatterlist *src, unsigned int nbytes)
264{
265 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
266 be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
267 struct crypt_priv crypt_ctx = {
268 .ctx = &ctx->serpent_ctx,
269 .fpu_enabled = false,
270 };
271 struct lrw_crypt_req req = {
272 .tbuf = buf,
273 .tbuflen = sizeof(buf),
274
275 .table_ctx = &ctx->lrw_table,
276 .crypt_ctx = &crypt_ctx,
277 .crypt_fn = decrypt_callback,
278 };
279 int ret;
280
281 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
282 ret = lrw_crypt(desc, dst, src, nbytes, &req);
283 serpent_fpu_end(crypt_ctx.fpu_enabled);
284
285 return ret;
286}
287
288static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
289 struct scatterlist *src, unsigned int nbytes)
290{
291 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
292
293 return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
294 XTS_TWEAK_CAST(__serpent_encrypt),
295 &ctx->tweak_ctx, &ctx->crypt_ctx);
296}
297
298static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
299 struct scatterlist *src, unsigned int nbytes)
300{
301 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
302
303 return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
304 XTS_TWEAK_CAST(__serpent_encrypt),
305 &ctx->tweak_ctx, &ctx->crypt_ctx);
306}
307
308static struct crypto_alg srp_algs[10] = { {
309 .cra_name = "__ecb-serpent-avx2",
310 .cra_driver_name = "__driver-ecb-serpent-avx2",
311 .cra_priority = 0,
312 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
313 .cra_blocksize = SERPENT_BLOCK_SIZE,
314 .cra_ctxsize = sizeof(struct serpent_ctx),
315 .cra_alignmask = 0,
316 .cra_type = &crypto_blkcipher_type,
317 .cra_module = THIS_MODULE,
318 .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list),
319 .cra_u = {
320 .blkcipher = {
321 .min_keysize = SERPENT_MIN_KEY_SIZE,
322 .max_keysize = SERPENT_MAX_KEY_SIZE,
323 .setkey = serpent_setkey,
324 .encrypt = ecb_encrypt,
325 .decrypt = ecb_decrypt,
326 },
327 },
328}, {
329 .cra_name = "__cbc-serpent-avx2",
330 .cra_driver_name = "__driver-cbc-serpent-avx2",
331 .cra_priority = 0,
332 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
333 .cra_blocksize = SERPENT_BLOCK_SIZE,
334 .cra_ctxsize = sizeof(struct serpent_ctx),
335 .cra_alignmask = 0,
336 .cra_type = &crypto_blkcipher_type,
337 .cra_module = THIS_MODULE,
338 .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list),
339 .cra_u = {
340 .blkcipher = {
341 .min_keysize = SERPENT_MIN_KEY_SIZE,
342 .max_keysize = SERPENT_MAX_KEY_SIZE,
343 .setkey = serpent_setkey,
344 .encrypt = cbc_encrypt,
345 .decrypt = cbc_decrypt,
346 },
347 },
348}, {
349 .cra_name = "__ctr-serpent-avx2",
350 .cra_driver_name = "__driver-ctr-serpent-avx2",
351 .cra_priority = 0,
352 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
353 .cra_blocksize = 1,
354 .cra_ctxsize = sizeof(struct serpent_ctx),
355 .cra_alignmask = 0,
356 .cra_type = &crypto_blkcipher_type,
357 .cra_module = THIS_MODULE,
358 .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list),
359 .cra_u = {
360 .blkcipher = {
361 .min_keysize = SERPENT_MIN_KEY_SIZE,
362 .max_keysize = SERPENT_MAX_KEY_SIZE,
363 .ivsize = SERPENT_BLOCK_SIZE,
364 .setkey = serpent_setkey,
365 .encrypt = ctr_crypt,
366 .decrypt = ctr_crypt,
367 },
368 },
369}, {
370 .cra_name = "__lrw-serpent-avx2",
371 .cra_driver_name = "__driver-lrw-serpent-avx2",
372 .cra_priority = 0,
373 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
374 .cra_blocksize = SERPENT_BLOCK_SIZE,
375 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
376 .cra_alignmask = 0,
377 .cra_type = &crypto_blkcipher_type,
378 .cra_module = THIS_MODULE,
379 .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list),
380 .cra_exit = lrw_serpent_exit_tfm,
381 .cra_u = {
382 .blkcipher = {
383 .min_keysize = SERPENT_MIN_KEY_SIZE +
384 SERPENT_BLOCK_SIZE,
385 .max_keysize = SERPENT_MAX_KEY_SIZE +
386 SERPENT_BLOCK_SIZE,
387 .ivsize = SERPENT_BLOCK_SIZE,
388 .setkey = lrw_serpent_setkey,
389 .encrypt = lrw_encrypt,
390 .decrypt = lrw_decrypt,
391 },
392 },
393}, {
394 .cra_name = "__xts-serpent-avx2",
395 .cra_driver_name = "__driver-xts-serpent-avx2",
396 .cra_priority = 0,
397 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
398 .cra_blocksize = SERPENT_BLOCK_SIZE,
399 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
400 .cra_alignmask = 0,
401 .cra_type = &crypto_blkcipher_type,
402 .cra_module = THIS_MODULE,
403 .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list),
404 .cra_u = {
405 .blkcipher = {
406 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
407 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
408 .ivsize = SERPENT_BLOCK_SIZE,
409 .setkey = xts_serpent_setkey,
410 .encrypt = xts_encrypt,
411 .decrypt = xts_decrypt,
412 },
413 },
414}, {
415 .cra_name = "ecb(serpent)",
416 .cra_driver_name = "ecb-serpent-avx2",
417 .cra_priority = 600,
418 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
419 .cra_blocksize = SERPENT_BLOCK_SIZE,
420 .cra_ctxsize = sizeof(struct async_helper_ctx),
421 .cra_alignmask = 0,
422 .cra_type = &crypto_ablkcipher_type,
423 .cra_module = THIS_MODULE,
424 .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list),
425 .cra_init = ablk_init,
426 .cra_exit = ablk_exit,
427 .cra_u = {
428 .ablkcipher = {
429 .min_keysize = SERPENT_MIN_KEY_SIZE,
430 .max_keysize = SERPENT_MAX_KEY_SIZE,
431 .setkey = ablk_set_key,
432 .encrypt = ablk_encrypt,
433 .decrypt = ablk_decrypt,
434 },
435 },
436}, {
437 .cra_name = "cbc(serpent)",
438 .cra_driver_name = "cbc-serpent-avx2",
439 .cra_priority = 600,
440 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
441 .cra_blocksize = SERPENT_BLOCK_SIZE,
442 .cra_ctxsize = sizeof(struct async_helper_ctx),
443 .cra_alignmask = 0,
444 .cra_type = &crypto_ablkcipher_type,
445 .cra_module = THIS_MODULE,
446 .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list),
447 .cra_init = ablk_init,
448 .cra_exit = ablk_exit,
449 .cra_u = {
450 .ablkcipher = {
451 .min_keysize = SERPENT_MIN_KEY_SIZE,
452 .max_keysize = SERPENT_MAX_KEY_SIZE,
453 .ivsize = SERPENT_BLOCK_SIZE,
454 .setkey = ablk_set_key,
455 .encrypt = __ablk_encrypt,
456 .decrypt = ablk_decrypt,
457 },
458 },
459}, {
460 .cra_name = "ctr(serpent)",
461 .cra_driver_name = "ctr-serpent-avx2",
462 .cra_priority = 600,
463 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
464 .cra_blocksize = 1,
465 .cra_ctxsize = sizeof(struct async_helper_ctx),
466 .cra_alignmask = 0,
467 .cra_type = &crypto_ablkcipher_type,
468 .cra_module = THIS_MODULE,
469 .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list),
470 .cra_init = ablk_init,
471 .cra_exit = ablk_exit,
472 .cra_u = {
473 .ablkcipher = {
474 .min_keysize = SERPENT_MIN_KEY_SIZE,
475 .max_keysize = SERPENT_MAX_KEY_SIZE,
476 .ivsize = SERPENT_BLOCK_SIZE,
477 .setkey = ablk_set_key,
478 .encrypt = ablk_encrypt,
479 .decrypt = ablk_encrypt,
480 .geniv = "chainiv",
481 },
482 },
483}, {
484 .cra_name = "lrw(serpent)",
485 .cra_driver_name = "lrw-serpent-avx2",
486 .cra_priority = 600,
487 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
488 .cra_blocksize = SERPENT_BLOCK_SIZE,
489 .cra_ctxsize = sizeof(struct async_helper_ctx),
490 .cra_alignmask = 0,
491 .cra_type = &crypto_ablkcipher_type,
492 .cra_module = THIS_MODULE,
493 .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list),
494 .cra_init = ablk_init,
495 .cra_exit = ablk_exit,
496 .cra_u = {
497 .ablkcipher = {
498 .min_keysize = SERPENT_MIN_KEY_SIZE +
499 SERPENT_BLOCK_SIZE,
500 .max_keysize = SERPENT_MAX_KEY_SIZE +
501 SERPENT_BLOCK_SIZE,
502 .ivsize = SERPENT_BLOCK_SIZE,
503 .setkey = ablk_set_key,
504 .encrypt = ablk_encrypt,
505 .decrypt = ablk_decrypt,
506 },
507 },
508}, {
509 .cra_name = "xts(serpent)",
510 .cra_driver_name = "xts-serpent-avx2",
511 .cra_priority = 600,
512 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513 .cra_blocksize = SERPENT_BLOCK_SIZE,
514 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_alignmask = 0,
516 .cra_type = &crypto_ablkcipher_type,
517 .cra_module = THIS_MODULE,
518 .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list),
519 .cra_init = ablk_init,
520 .cra_exit = ablk_exit,
521 .cra_u = {
522 .ablkcipher = {
523 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
524 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
525 .ivsize = SERPENT_BLOCK_SIZE,
526 .setkey = ablk_set_key,
527 .encrypt = ablk_encrypt,
528 .decrypt = ablk_decrypt,
529 },
530 },
531} };
532
533static int __init init(void)
534{
535 u64 xcr0;
536
537 if (!cpu_has_avx2 || !cpu_has_osxsave) {
538 pr_info("AVX2 instructions are not detected.\n");
539 return -ENODEV;
540 }
541
542 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
543 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
544 pr_info("AVX detected but unusable.\n");
545 return -ENODEV;
546 }
547
548 return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
549}
550
551static void __exit fini(void)
552{
553 crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
554}
555
556module_init(init);
557module_exit(fini);
558
559MODULE_LICENSE("GPL");
560MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
561MODULE_ALIAS("serpent");
562MODULE_ALIAS("serpent-asm");
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 52abaaf28e7f..9ae83cf8d21e 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -4,8 +4,7 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Glue code based on serpent_sse2_glue.c by: 7 * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
9 * 8 *
10 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -42,7 +41,32 @@
42#include <asm/crypto/ablk_helper.h> 41#include <asm/crypto/ablk_helper.h>
43#include <asm/crypto/glue_helper.h> 42#include <asm/crypto/glue_helper.h>
44 43
45static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 44/* 8-way parallel cipher functions */
45asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
46 const u8 *src);
47EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
48
49asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
50 const u8 *src);
51EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
52
53asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
54 const u8 *src);
55EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
56
57asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
58 const u8 *src, le128 *iv);
59EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
60
61asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
62 const u8 *src, le128 *iv);
63EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
64
65asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
66 const u8 *src, le128 *iv);
67EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
68
69void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
46{ 70{
47 be128 ctrblk; 71 be128 ctrblk;
48 72
@@ -52,6 +76,22 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
52 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 76 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
53 u128_xor(dst, src, (u128 *)&ctrblk); 77 u128_xor(dst, src, (u128 *)&ctrblk);
54} 78}
79EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
80
81void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
82{
83 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
84 GLUE_FUNC_CAST(__serpent_encrypt));
85}
86EXPORT_SYMBOL_GPL(serpent_xts_enc);
87
88void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
89{
90 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
91 GLUE_FUNC_CAST(__serpent_decrypt));
92}
93EXPORT_SYMBOL_GPL(serpent_xts_dec);
94
55 95
56static const struct common_glue_ctx serpent_enc = { 96static const struct common_glue_ctx serpent_enc = {
57 .num_funcs = 2, 97 .num_funcs = 2,
@@ -75,7 +115,20 @@ static const struct common_glue_ctx serpent_ctr = {
75 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } 115 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
76 }, { 116 }, {
77 .num_blocks = 1, 117 .num_blocks = 1,
78 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } 118 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
119 } }
120};
121
122static const struct common_glue_ctx serpent_enc_xts = {
123 .num_funcs = 2,
124 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
125
126 .funcs = { {
127 .num_blocks = SERPENT_PARALLEL_BLOCKS,
128 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
129 }, {
130 .num_blocks = 1,
131 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
79 } } 132 } }
80}; 133};
81 134
@@ -105,6 +158,19 @@ static const struct common_glue_ctx serpent_dec_cbc = {
105 } } 158 } }
106}; 159};
107 160
161static const struct common_glue_ctx serpent_dec_xts = {
162 .num_funcs = 2,
163 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
164
165 .funcs = { {
166 .num_blocks = SERPENT_PARALLEL_BLOCKS,
167 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
168 }, {
169 .num_blocks = 1,
170 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
171 } }
172};
173
108static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 174static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
109 struct scatterlist *src, unsigned int nbytes) 175 struct scatterlist *src, unsigned int nbytes)
110{ 176{
@@ -187,13 +253,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
187 __serpent_decrypt(ctx->ctx, srcdst, srcdst); 253 __serpent_decrypt(ctx->ctx, srcdst, srcdst);
188} 254}
189 255
190struct serpent_lrw_ctx { 256int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
191 struct lrw_table_ctx lrw_table; 257 unsigned int keylen)
192 struct serpent_ctx serpent_ctx;
193};
194
195static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
196 unsigned int keylen)
197{ 258{
198 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 259 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
199 int err; 260 int err;
@@ -206,6 +267,7 @@ static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
206 return lrw_init_table(&ctx->lrw_table, key + keylen - 267 return lrw_init_table(&ctx->lrw_table, key + keylen -
207 SERPENT_BLOCK_SIZE); 268 SERPENT_BLOCK_SIZE);
208} 269}
270EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
209 271
210static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 272static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
211 struct scatterlist *src, unsigned int nbytes) 273 struct scatterlist *src, unsigned int nbytes)
@@ -259,20 +321,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
259 return ret; 321 return ret;
260} 322}
261 323
262static void lrw_exit_tfm(struct crypto_tfm *tfm) 324void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
263{ 325{
264 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 326 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
265 327
266 lrw_free_table(&ctx->lrw_table); 328 lrw_free_table(&ctx->lrw_table);
267} 329}
330EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
268 331
269struct serpent_xts_ctx { 332int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
270 struct serpent_ctx tweak_ctx; 333 unsigned int keylen)
271 struct serpent_ctx crypt_ctx;
272};
273
274static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
275 unsigned int keylen)
276{ 334{
277 struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); 335 struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
278 u32 *flags = &tfm->crt_flags; 336 u32 *flags = &tfm->crt_flags;
@@ -294,59 +352,26 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
294 /* second half of xts-key is for tweak */ 352 /* second half of xts-key is for tweak */
295 return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); 353 return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
296} 354}
355EXPORT_SYMBOL_GPL(xts_serpent_setkey);
297 356
298static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 357static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
299 struct scatterlist *src, unsigned int nbytes) 358 struct scatterlist *src, unsigned int nbytes)
300{ 359{
301 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 360 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
302 be128 buf[SERPENT_PARALLEL_BLOCKS];
303 struct crypt_priv crypt_ctx = {
304 .ctx = &ctx->crypt_ctx,
305 .fpu_enabled = false,
306 };
307 struct xts_crypt_req req = {
308 .tbuf = buf,
309 .tbuflen = sizeof(buf),
310
311 .tweak_ctx = &ctx->tweak_ctx,
312 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
313 .crypt_ctx = &crypt_ctx,
314 .crypt_fn = encrypt_callback,
315 };
316 int ret;
317
318 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
319 ret = xts_crypt(desc, dst, src, nbytes, &req);
320 serpent_fpu_end(crypt_ctx.fpu_enabled);
321 361
322 return ret; 362 return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
363 XTS_TWEAK_CAST(__serpent_encrypt),
364 &ctx->tweak_ctx, &ctx->crypt_ctx);
323} 365}
324 366
325static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 367static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
326 struct scatterlist *src, unsigned int nbytes) 368 struct scatterlist *src, unsigned int nbytes)
327{ 369{
328 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 370 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
329 be128 buf[SERPENT_PARALLEL_BLOCKS];
330 struct crypt_priv crypt_ctx = {
331 .ctx = &ctx->crypt_ctx,
332 .fpu_enabled = false,
333 };
334 struct xts_crypt_req req = {
335 .tbuf = buf,
336 .tbuflen = sizeof(buf),
337
338 .tweak_ctx = &ctx->tweak_ctx,
339 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
340 .crypt_ctx = &crypt_ctx,
341 .crypt_fn = decrypt_callback,
342 };
343 int ret;
344 371
345 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 372 return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
346 ret = xts_crypt(desc, dst, src, nbytes, &req); 373 XTS_TWEAK_CAST(__serpent_encrypt),
347 serpent_fpu_end(crypt_ctx.fpu_enabled); 374 &ctx->tweak_ctx, &ctx->crypt_ctx);
348
349 return ret;
350} 375}
351 376
352static struct crypto_alg serpent_algs[10] = { { 377static struct crypto_alg serpent_algs[10] = { {
@@ -417,7 +442,7 @@ static struct crypto_alg serpent_algs[10] = { {
417 .cra_alignmask = 0, 442 .cra_alignmask = 0,
418 .cra_type = &crypto_blkcipher_type, 443 .cra_type = &crypto_blkcipher_type,
419 .cra_module = THIS_MODULE, 444 .cra_module = THIS_MODULE,
420 .cra_exit = lrw_exit_tfm, 445 .cra_exit = lrw_serpent_exit_tfm,
421 .cra_u = { 446 .cra_u = {
422 .blkcipher = { 447 .blkcipher = {
423 .min_keysize = SERPENT_MIN_KEY_SIZE + 448 .min_keysize = SERPENT_MIN_KEY_SIZE +
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
new file mode 100644
index 000000000000..56610c4bf31b
--- /dev/null
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -0,0 +1,496 @@
1########################################################################
2# Implement fast SHA-256 with AVX1 instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# Tim Chen <tim.c.chen@linux.intel.com>
10#
11# This software is available to you under a choice of one of two
12# licenses. You may choose to be licensed under the terms of the GNU
13# General Public License (GPL) Version 2, available from the file
14# COPYING in the main directory of this source tree, or the
15# OpenIB.org BSD license below:
16#
17# Redistribution and use in source and binary forms, with or
18# without modification, are permitted provided that the following
19# conditions are met:
20#
21# - Redistributions of source code must retain the above
22# copyright notice, this list of conditions and the following
23# disclaimer.
24#
25# - Redistributions in binary form must reproduce the above
26# copyright notice, this list of conditions and the following
27# disclaimer in the documentation and/or other materials
28# provided with the distribution.
29#
30# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37# SOFTWARE.
38########################################################################
39#
40# This code is described in an Intel White-Paper:
41# "Fast SHA-256 Implementations on Intel Architecture Processors"
42#
43# To find it, surf to http://www.intel.com/p/en_US/embedded
44# and search for that title.
45#
46########################################################################
47# This code schedules 1 block at a time, with 4 lanes per block
48########################################################################
49
50#ifdef CONFIG_AS_AVX
51#include <linux/linkage.h>
52
53## assume buffers not aligned
54#define VMOVDQ vmovdqu
55
56################################ Define Macros
57
58# addm [mem], reg
59# Add reg to mem using reg-mem add and store
60.macro addm p1 p2
61 add \p1, \p2
62 mov \p2, \p1
63.endm
64
65
66.macro MY_ROR p1 p2
67 shld $(32-(\p1)), \p2, \p2
68.endm
69
70################################
71
72# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
73# Load xmm with mem and byte swap each dword
74.macro COPY_XMM_AND_BSWAP p1 p2 p3
75 VMOVDQ \p2, \p1
76 vpshufb \p3, \p1, \p1
77.endm
78
79################################
80
81X0 = %xmm4
82X1 = %xmm5
83X2 = %xmm6
84X3 = %xmm7
85
86XTMP0 = %xmm0
87XTMP1 = %xmm1
88XTMP2 = %xmm2
89XTMP3 = %xmm3
90XTMP4 = %xmm8
91XFER = %xmm9
92XTMP5 = %xmm11
93
94SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
95SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
96BYTE_FLIP_MASK = %xmm13
97
98NUM_BLKS = %rdx # 3rd arg
99CTX = %rsi # 2nd arg
100INP = %rdi # 1st arg
101
102SRND = %rdi # clobbers INP
103c = %ecx
104d = %r8d
105e = %edx
106TBL = %rbp
107a = %eax
108b = %ebx
109
110f = %r9d
111g = %r10d
112h = %r11d
113
114y0 = %r13d
115y1 = %r14d
116y2 = %r15d
117
118
119_INP_END_SIZE = 8
120_INP_SIZE = 8
121_XFER_SIZE = 8
122_XMM_SAVE_SIZE = 0
123
124_INP_END = 0
125_INP = _INP_END + _INP_END_SIZE
126_XFER = _INP + _INP_SIZE
127_XMM_SAVE = _XFER + _XFER_SIZE
128STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE
129
130# rotate_Xs
131# Rotate values of symbols X0...X3
132.macro rotate_Xs
133X_ = X0
134X0 = X1
135X1 = X2
136X2 = X3
137X3 = X_
138.endm
139
140# ROTATE_ARGS
141# Rotate values of symbols a...h
142.macro ROTATE_ARGS
143TMP_ = h
144h = g
145g = f
146f = e
147e = d
148d = c
149c = b
150b = a
151a = TMP_
152.endm
153
154.macro FOUR_ROUNDS_AND_SCHED
155 ## compute s0 four at a time and s1 two at a time
156 ## compute W[-16] + W[-7] 4 at a time
157
158 mov e, y0 # y0 = e
159 MY_ROR (25-11), y0 # y0 = e >> (25-11)
160 mov a, y1 # y1 = a
161 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
162 MY_ROR (22-13), y1 # y1 = a >> (22-13)
163 xor e, y0 # y0 = e ^ (e >> (25-11))
164 mov f, y2 # y2 = f
165 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
166 xor a, y1 # y1 = a ^ (a >> (22-13)
167 xor g, y2 # y2 = f^g
168 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
169 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
170 and e, y2 # y2 = (f^g)&e
171 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
172 ## compute s0
173 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
174 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
175 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
176 xor g, y2 # y2 = CH = ((f^g)&e)^g
177 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
178 add y0, y2 # y2 = S1 + CH
179 add _XFER(%rsp), y2 # y2 = k + w + S1 + CH
180 mov a, y0 # y0 = a
181 add y2, h # h = h + S1 + CH + k + w
182 mov a, y2 # y2 = a
183 vpsrld $7, XTMP1, XTMP2
184 or c, y0 # y0 = a|c
185 add h, d # d = d + h + S1 + CH + k + w
186 and c, y2 # y2 = a&c
187 vpslld $(32-7), XTMP1, XTMP3
188 and b, y0 # y0 = (a|c)&b
189 add y1, h # h = h + S1 + CH + k + w + S0
190 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
191 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
192 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
193 ROTATE_ARGS
194 mov e, y0 # y0 = e
195 mov a, y1 # y1 = a
196 MY_ROR (25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
198 mov f, y2 # y2 = f
199 MY_ROR (22-13), y1 # y1 = a >> (22-13)
200 vpsrld $18, XTMP1, XTMP2 #
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
203 xor g, y2 # y2 = f^g
204 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
205 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
207 and e, y2 # y2 = (f^g)&e
208 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
209 vpslld $(32-18), XTMP1, XTMP1
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
211 xor g, y2 # y2 = CH = ((f^g)&e)^g
212 vpxor XTMP1, XTMP3, XTMP3 #
213 add y0, y2 # y2 = S1 + CH
214 add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
215 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
217 mov a, y0 # y0 = a
218 add y2, h # h = h + S1 + CH + k + w
219 mov a, y2 # y2 = a
220 vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0
221 or c, y0 # y0 = a|c
222 add h, d # d = d + h + S1 + CH + k + w
223 and c, y2 # y2 = a&c
224 ## compute low s1
225 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
226 and b, y0 # y0 = (a|c)&b
227 add y1, h # h = h + S1 + CH + k + w + S0
228 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
229 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
230 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
231 ROTATE_ARGS
232 mov e, y0 # y0 = e
233 mov a, y1 # y1 = a
234 MY_ROR (25-11), y0 # y0 = e >> (25-11)
235 xor e, y0 # y0 = e ^ (e >> (25-11))
236 MY_ROR (22-13), y1 # y1 = a >> (22-13)
237 mov f, y2 # y2 = f
238 xor a, y1 # y1 = a ^ (a >> (22-13)
239 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
240 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
241 xor g, y2 # y2 = f^g
242 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
243 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
244 and e, y2 # y2 = (f^g)&e
245 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
246 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
247 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
248 xor g, y2 # y2 = CH = ((f^g)&e)^g
249 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
250 vpxor XTMP3, XTMP2, XTMP2 #
251 add y0, y2 # y2 = S1 + CH
252 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
253 add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
254 vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA}
255 mov a, y0 # y0 = a
256 add y2, h # h = h + S1 + CH + k + w
257 mov a, y2 # y2 = a
258 vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA}
259 or c, y0 # y0 = a|c
260 add h, d # d = d + h + S1 + CH + k + w
261 and c, y2 # y2 = a&c
262 vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
263 and b, y0 # y0 = (a|c)&b
264 add y1, h # h = h + S1 + CH + k + w + S0
265 ## compute high s1
266 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
267 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
268 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
269 ROTATE_ARGS
270 mov e, y0 # y0 = e
271 MY_ROR (25-11), y0 # y0 = e >> (25-11)
272 mov a, y1 # y1 = a
273 MY_ROR (22-13), y1 # y1 = a >> (22-13)
274 xor e, y0 # y0 = e ^ (e >> (25-11))
275 mov f, y2 # y2 = f
276 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
277 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
278 xor a, y1 # y1 = a ^ (a >> (22-13)
279 xor g, y2 # y2 = f^g
280 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
281 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
282 and e, y2 # y2 = (f^g)&e
283 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
284 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
285 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
286 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
287 xor g, y2 # y2 = CH = ((f^g)&e)^g
288 vpxor XTMP3, XTMP2, XTMP2
289 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
290 add y0, y2 # y2 = S1 + CH
291 add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
292 vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC}
293 mov a, y0 # y0 = a
294 add y2, h # h = h + S1 + CH + k + w
295 mov a, y2 # y2 = a
296 vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00}
297 or c, y0 # y0 = a|c
298 add h, d # d = d + h + S1 + CH + k + w
299 and c, y2 # y2 = a&c
300 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
301 and b, y0 # y0 = (a|c)&b
302 add y1, h # h = h + S1 + CH + k + w + S0
303 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
304 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
305 ROTATE_ARGS
306 rotate_Xs
307.endm
308
309## input is [rsp + _XFER + %1 * 4]
310.macro DO_ROUND round
311 mov e, y0 # y0 = e
312 MY_ROR (25-11), y0 # y0 = e >> (25-11)
313 mov a, y1 # y1 = a
314 xor e, y0 # y0 = e ^ (e >> (25-11))
315 MY_ROR (22-13), y1 # y1 = a >> (22-13)
316 mov f, y2 # y2 = f
317 xor a, y1 # y1 = a ^ (a >> (22-13)
318 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
319 xor g, y2 # y2 = f^g
320 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
321 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
322 and e, y2 # y2 = (f^g)&e
323 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
324 MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
325 xor g, y2 # y2 = CH = ((f^g)&e)^g
326 add y0, y2 # y2 = S1 + CH
327 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
328 offset = \round * 4 + _XFER #
329 add offset(%rsp), y2 # y2 = k + w + S1 + CH
330 mov a, y0 # y0 = a
331 add y2, h # h = h + S1 + CH + k + w
332 mov a, y2 # y2 = a
333 or c, y0 # y0 = a|c
334 add h, d # d = d + h + S1 + CH + k + w
335 and c, y2 # y2 = a&c
336 and b, y0 # y0 = (a|c)&b
337 add y1, h # h = h + S1 + CH + k + w + S0
338 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
339 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
340 ROTATE_ARGS
341.endm
342
343########################################################################
344## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
345## arg 1 : pointer to input data
346## arg 2 : pointer to digest
347## arg 3 : Num blocks
348########################################################################
349.text
350ENTRY(sha256_transform_avx)
351.align 32
352 pushq %rbx
353 pushq %rbp
354 pushq %r13
355 pushq %r14
356 pushq %r15
357 pushq %r12
358
359 mov %rsp, %r12
360 subq $STACK_SIZE, %rsp # allocate stack space
361 and $~15, %rsp # align stack pointer
362
363 shl $6, NUM_BLKS # convert to bytes
364 jz done_hash
365 add INP, NUM_BLKS # pointer to end of data
366 mov NUM_BLKS, _INP_END(%rsp)
367
368 ## load initial digest
369 mov 4*0(CTX), a
370 mov 4*1(CTX), b
371 mov 4*2(CTX), c
372 mov 4*3(CTX), d
373 mov 4*4(CTX), e
374 mov 4*5(CTX), f
375 mov 4*6(CTX), g
376 mov 4*7(CTX), h
377
378 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
379 vmovdqa _SHUF_00BA(%rip), SHUF_00BA
380 vmovdqa _SHUF_DC00(%rip), SHUF_DC00
381loop0:
382 lea K256(%rip), TBL
383
384 ## byte swap first 16 dwords
385 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
386 COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK
387 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
388 COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK
389
390 mov INP, _INP(%rsp)
391
392 ## schedule 48 input dwords, by doing 3 rounds of 16 each
393 mov $3, SRND
394.align 16
395loop1:
396 vpaddd (TBL), X0, XFER
397 vmovdqa XFER, _XFER(%rsp)
398 FOUR_ROUNDS_AND_SCHED
399
400 vpaddd 1*16(TBL), X0, XFER
401 vmovdqa XFER, _XFER(%rsp)
402 FOUR_ROUNDS_AND_SCHED
403
404 vpaddd 2*16(TBL), X0, XFER
405 vmovdqa XFER, _XFER(%rsp)
406 FOUR_ROUNDS_AND_SCHED
407
408 vpaddd 3*16(TBL), X0, XFER
409 vmovdqa XFER, _XFER(%rsp)
410 add $4*16, TBL
411 FOUR_ROUNDS_AND_SCHED
412
413 sub $1, SRND
414 jne loop1
415
416 mov $2, SRND
417loop2:
418 vpaddd (TBL), X0, XFER
419 vmovdqa XFER, _XFER(%rsp)
420 DO_ROUND 0
421 DO_ROUND 1
422 DO_ROUND 2
423 DO_ROUND 3
424
425 vpaddd 1*16(TBL), X1, XFER
426 vmovdqa XFER, _XFER(%rsp)
427 add $2*16, TBL
428 DO_ROUND 0
429 DO_ROUND 1
430 DO_ROUND 2
431 DO_ROUND 3
432
433 vmovdqa X2, X0
434 vmovdqa X3, X1
435
436 sub $1, SRND
437 jne loop2
438
439 addm (4*0)(CTX),a
440 addm (4*1)(CTX),b
441 addm (4*2)(CTX),c
442 addm (4*3)(CTX),d
443 addm (4*4)(CTX),e
444 addm (4*5)(CTX),f
445 addm (4*6)(CTX),g
446 addm (4*7)(CTX),h
447
448 mov _INP(%rsp), INP
449 add $64, INP
450 cmp _INP_END(%rsp), INP
451 jne loop0
452
453done_hash:
454
455 mov %r12, %rsp
456
457 popq %r12
458 popq %r15
459 popq %r14
460 popq %r13
461 popq %rbp
462 popq %rbx
463 ret
464ENDPROC(sha256_transform_avx)
465
466.data
467.align 64
468K256:
469 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
470 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
471 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
472 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
473 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
474 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
475 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
476 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
477 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
478 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
479 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
480 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
481 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
482 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
483 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
484 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
485
486PSHUFFLE_BYTE_FLIP_MASK:
487 .octa 0x0c0d0e0f08090a0b0405060700010203
488
489# shuffle xBxA -> 00BA
490_SHUF_00BA:
491 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
492
493# shuffle xDxC -> DC00
494_SHUF_DC00:
495 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
496#endif
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
new file mode 100644
index 000000000000..9e86944c539d
--- /dev/null
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -0,0 +1,772 @@
1########################################################################
2# Implement fast SHA-256 with AVX2 instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# Tim Chen <tim.c.chen@linux.intel.com>
10#
11# This software is available to you under a choice of one of two
12# licenses. You may choose to be licensed under the terms of the GNU
13# General Public License (GPL) Version 2, available from the file
14# COPYING in the main directory of this source tree, or the
15# OpenIB.org BSD license below:
16#
17# Redistribution and use in source and binary forms, with or
18# without modification, are permitted provided that the following
19# conditions are met:
20#
21# - Redistributions of source code must retain the above
22# copyright notice, this list of conditions and the following
23# disclaimer.
24#
25# - Redistributions in binary form must reproduce the above
26# copyright notice, this list of conditions and the following
27# disclaimer in the documentation and/or other materials
28# provided with the distribution.
29#
30# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37# SOFTWARE.
38#
39########################################################################
40#
41# This code is described in an Intel White-Paper:
42# "Fast SHA-256 Implementations on Intel Architecture Processors"
43#
44# To find it, surf to http://www.intel.com/p/en_US/embedded
45# and search for that title.
46#
47########################################################################
48# This code schedules 2 blocks at a time, with 4 lanes per block
49########################################################################
50
51#ifdef CONFIG_AS_AVX2
52#include <linux/linkage.h>
53
54## assume buffers not aligned
55#define VMOVDQ vmovdqu
56
57################################ Define Macros
58
59# addm [mem], reg
60# Add reg to mem using reg-mem add and store
61.macro addm p1 p2
62 add \p1, \p2
63 mov \p2, \p1
64.endm
65
66################################
67
68X0 = %ymm4
69X1 = %ymm5
70X2 = %ymm6
71X3 = %ymm7
72
73# XMM versions of above
74XWORD0 = %xmm4
75XWORD1 = %xmm5
76XWORD2 = %xmm6
77XWORD3 = %xmm7
78
79XTMP0 = %ymm0
80XTMP1 = %ymm1
81XTMP2 = %ymm2
82XTMP3 = %ymm3
83XTMP4 = %ymm8
84XFER = %ymm9
85XTMP5 = %ymm11
86
87SHUF_00BA = %ymm10 # shuffle xBxA -> 00BA
88SHUF_DC00 = %ymm12 # shuffle xDxC -> DC00
89BYTE_FLIP_MASK = %ymm13
90
91X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
92
93NUM_BLKS = %rdx # 3rd arg
94CTX = %rsi # 2nd arg
95INP = %rdi # 1st arg
96c = %ecx
97d = %r8d
98e = %edx # clobbers NUM_BLKS
99y3 = %edi # clobbers INP
100
101
102TBL = %rbp
103SRND = CTX # SRND is same register as CTX
104
105a = %eax
106b = %ebx
107f = %r9d
108g = %r10d
109h = %r11d
110old_h = %r11d
111
112T1 = %r12d
113y0 = %r13d
114y1 = %r14d
115y2 = %r15d
116
117
118_XFER_SIZE = 2*64*4 # 2 blocks, 64 rounds, 4 bytes/round
119_XMM_SAVE_SIZE = 0
120_INP_END_SIZE = 8
121_INP_SIZE = 8
122_CTX_SIZE = 8
123_RSP_SIZE = 8
124
125_XFER = 0
126_XMM_SAVE = _XFER + _XFER_SIZE
127_INP_END = _XMM_SAVE + _XMM_SAVE_SIZE
128_INP = _INP_END + _INP_END_SIZE
129_CTX = _INP + _INP_SIZE
130_RSP = _CTX + _CTX_SIZE
131STACK_SIZE = _RSP + _RSP_SIZE
132
133# rotate_Xs
134# Rotate values of symbols X0...X3
135.macro rotate_Xs
136 X_ = X0
137 X0 = X1
138 X1 = X2
139 X2 = X3
140 X3 = X_
141.endm
142
143# ROTATE_ARGS
144# Rotate values of symbols a...h
145.macro ROTATE_ARGS
146 old_h = h
147 TMP_ = h
148 h = g
149 g = f
150 f = e
151 e = d
152 d = c
153 c = b
154 b = a
155 a = TMP_
156.endm
157
158.macro FOUR_ROUNDS_AND_SCHED disp
159################################### RND N + 0 ############################
160
161 mov a, y3 # y3 = a # MAJA
162 rorx $25, e, y0 # y0 = e >> 25 # S1A
163 rorx $11, e, y1 # y1 = e >> 11 # S1B
164
165 addl \disp(%rsp, SRND), h # h = k + w + h # --
166 or c, y3 # y3 = a|c # MAJA
167 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
168 mov f, y2 # y2 = f # CH
169 rorx $13, a, T1 # T1 = a >> 13 # S0B
170
171 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
172 xor g, y2 # y2 = f^g # CH
173 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1
174 rorx $6, e, y1 # y1 = (e >> 6) # S1
175
176 and e, y2 # y2 = (f^g)&e # CH
177 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
178 rorx $22, a, y1 # y1 = a >> 22 # S0A
179 add h, d # d = k + w + h + d # --
180
181 and b, y3 # y3 = (a|c)&b # MAJA
182 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
183 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
184 rorx $2, a, T1 # T1 = (a >> 2) # S0
185
186 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
187 vpsrld $7, XTMP1, XTMP2
188 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
189 mov a, T1 # T1 = a # MAJB
190 and c, T1 # T1 = a&c # MAJB
191
192 add y0, y2 # y2 = S1 + CH # --
193 vpslld $(32-7), XTMP1, XTMP3
194 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
195 add y1, h # h = k + w + h + S0 # --
196
197 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
198 vpor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7
199
200 vpsrld $18, XTMP1, XTMP2
201 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
202 add y3, h # h = t1 + S0 + MAJ # --
203
204
205 ROTATE_ARGS
206
207################################### RND N + 1 ############################
208
209 mov a, y3 # y3 = a # MAJA
210 rorx $25, e, y0 # y0 = e >> 25 # S1A
211 rorx $11, e, y1 # y1 = e >> 11 # S1B
212 offset = \disp + 1*4
213 addl offset(%rsp, SRND), h # h = k + w + h # --
214 or c, y3 # y3 = a|c # MAJA
215
216
217 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
218 mov f, y2 # y2 = f # CH
219 rorx $13, a, T1 # T1 = a >> 13 # S0B
220 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
221 xor g, y2 # y2 = f^g # CH
222
223
224 rorx $6, e, y1 # y1 = (e >> 6) # S1
225 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
226 rorx $22, a, y1 # y1 = a >> 22 # S0A
227 and e, y2 # y2 = (f^g)&e # CH
228 add h, d # d = k + w + h + d # --
229
230 vpslld $(32-18), XTMP1, XTMP1
231 and b, y3 # y3 = (a|c)&b # MAJA
232 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
233
234 vpxor XTMP1, XTMP3, XTMP3
235 rorx $2, a, T1 # T1 = (a >> 2) # S0
236 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
237
238 vpxor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 ^ W[-15] ror 18
239 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
240 mov a, T1 # T1 = a # MAJB
241 and c, T1 # T1 = a&c # MAJB
242 add y0, y2 # y2 = S1 + CH # --
243
244 vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0
245 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
246 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
247 add y1, h # h = k + w + h + S0 # --
248
249 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
250 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
251 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
252 add y3, h # h = t1 + S0 + MAJ # --
253
254 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
255
256
257 ROTATE_ARGS
258
259################################### RND N + 2 ############################
260
261 mov a, y3 # y3 = a # MAJA
262 rorx $25, e, y0 # y0 = e >> 25 # S1A
263 offset = \disp + 2*4
264 addl offset(%rsp, SRND), h # h = k + w + h # --
265
266 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
267 rorx $11, e, y1 # y1 = e >> 11 # S1B
268 or c, y3 # y3 = a|c # MAJA
269 mov f, y2 # y2 = f # CH
270 xor g, y2 # y2 = f^g # CH
271
272 rorx $13, a, T1 # T1 = a >> 13 # S0B
273 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
274 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
275 and e, y2 # y2 = (f^g)&e # CH
276
277 rorx $6, e, y1 # y1 = (e >> 6) # S1
278 vpxor XTMP3, XTMP2, XTMP2
279 add h, d # d = k + w + h + d # --
280 and b, y3 # y3 = (a|c)&b # MAJA
281
282 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
283 rorx $22, a, y1 # y1 = a >> 22 # S0A
284 vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA}
285 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
286
287 vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA}
288 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
289 rorx $2, a ,T1 # T1 = (a >> 2) # S0
290 vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
291
292 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
293 mov a, T1 # T1 = a # MAJB
294 and c, T1 # T1 = a&c # MAJB
295 add y0, y2 # y2 = S1 + CH # --
296 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
297
298 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
299 add y1,h # h = k + w + h + S0 # --
300 add y2,d # d = k + w + h + d + S1 + CH = d + t1 # --
301 add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
302
303 add y3,h # h = t1 + S0 + MAJ # --
304
305
306 ROTATE_ARGS
307
308################################### RND N + 3 ############################
309
310 mov a, y3 # y3 = a # MAJA
311 rorx $25, e, y0 # y0 = e >> 25 # S1A
312 rorx $11, e, y1 # y1 = e >> 11 # S1B
313 offset = \disp + 3*4
314 addl offset(%rsp, SRND), h # h = k + w + h # --
315 or c, y3 # y3 = a|c # MAJA
316
317
318 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
319 mov f, y2 # y2 = f # CH
320 rorx $13, a, T1 # T1 = a >> 13 # S0B
321 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
322 xor g, y2 # y2 = f^g # CH
323
324
325 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
326 rorx $6, e, y1 # y1 = (e >> 6) # S1
327 and e, y2 # y2 = (f^g)&e # CH
328 add h, d # d = k + w + h + d # --
329 and b, y3 # y3 = (a|c)&b # MAJA
330
331 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
332 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
333 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
334
335 vpxor XTMP3, XTMP2, XTMP2
336 rorx $22, a, y1 # y1 = a >> 22 # S0A
337 add y0, y2 # y2 = S1 + CH # --
338
339 vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC}
340 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
341 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
342
343 rorx $2, a, T1 # T1 = (a >> 2) # S0
344 vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00}
345
346 vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]}
347 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
348 mov a, T1 # T1 = a # MAJB
349 and c, T1 # T1 = a&c # MAJB
350 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
351
352 add y1, h # h = k + w + h + S0 # --
353 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
354 add y3, h # h = t1 + S0 + MAJ # --
355
356 ROTATE_ARGS
357 rotate_Xs
358.endm
359
360.macro DO_4ROUNDS disp
361################################### RND N + 0 ###########################
362
363 mov f, y2 # y2 = f # CH
364 rorx $25, e, y0 # y0 = e >> 25 # S1A
365 rorx $11, e, y1 # y1 = e >> 11 # S1B
366 xor g, y2 # y2 = f^g # CH
367
368 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
369 rorx $6, e, y1 # y1 = (e >> 6) # S1
370 and e, y2 # y2 = (f^g)&e # CH
371
372 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
373 rorx $13, a, T1 # T1 = a >> 13 # S0B
374 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
375 rorx $22, a, y1 # y1 = a >> 22 # S0A
376 mov a, y3 # y3 = a # MAJA
377
378 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
379 rorx $2, a, T1 # T1 = (a >> 2) # S0
380 addl \disp(%rsp, SRND), h # h = k + w + h # --
381 or c, y3 # y3 = a|c # MAJA
382
383 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
384 mov a, T1 # T1 = a # MAJB
385 and b, y3 # y3 = (a|c)&b # MAJA
386 and c, T1 # T1 = a&c # MAJB
387 add y0, y2 # y2 = S1 + CH # --
388
389
390 add h, d # d = k + w + h + d # --
391 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
392 add y1, h # h = k + w + h + S0 # --
393 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
394
395 ROTATE_ARGS
396
397################################### RND N + 1 ###########################
398
399 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
400 mov f, y2 # y2 = f # CH
401 rorx $25, e, y0 # y0 = e >> 25 # S1A
402 rorx $11, e, y1 # y1 = e >> 11 # S1B
403 xor g, y2 # y2 = f^g # CH
404
405 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
406 rorx $6, e, y1 # y1 = (e >> 6) # S1
407 and e, y2 # y2 = (f^g)&e # CH
408 add y3, old_h # h = t1 + S0 + MAJ # --
409
410 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
411 rorx $13, a, T1 # T1 = a >> 13 # S0B
412 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
413 rorx $22, a, y1 # y1 = a >> 22 # S0A
414 mov a, y3 # y3 = a # MAJA
415
416 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
417 rorx $2, a, T1 # T1 = (a >> 2) # S0
418 offset = 4*1 + \disp
419 addl offset(%rsp, SRND), h # h = k + w + h # --
420 or c, y3 # y3 = a|c # MAJA
421
422 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
423 mov a, T1 # T1 = a # MAJB
424 and b, y3 # y3 = (a|c)&b # MAJA
425 and c, T1 # T1 = a&c # MAJB
426 add y0, y2 # y2 = S1 + CH # --
427
428
429 add h, d # d = k + w + h + d # --
430 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
431 add y1, h # h = k + w + h + S0 # --
432
433 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
434
435 ROTATE_ARGS
436
437################################### RND N + 2 ##############################
438
439 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
440 mov f, y2 # y2 = f # CH
441 rorx $25, e, y0 # y0 = e >> 25 # S1A
442 rorx $11, e, y1 # y1 = e >> 11 # S1B
443 xor g, y2 # y2 = f^g # CH
444
445 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
446 rorx $6, e, y1 # y1 = (e >> 6) # S1
447 and e, y2 # y2 = (f^g)&e # CH
448 add y3, old_h # h = t1 + S0 + MAJ # --
449
450 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
451 rorx $13, a, T1 # T1 = a >> 13 # S0B
452 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
453 rorx $22, a, y1 # y1 = a >> 22 # S0A
454 mov a, y3 # y3 = a # MAJA
455
456 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
457 rorx $2, a, T1 # T1 = (a >> 2) # S0
458 offset = 4*2 + \disp
459 addl offset(%rsp, SRND), h # h = k + w + h # --
460 or c, y3 # y3 = a|c # MAJA
461
462 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
463 mov a, T1 # T1 = a # MAJB
464 and b, y3 # y3 = (a|c)&b # MAJA
465 and c, T1 # T1 = a&c # MAJB
466 add y0, y2 # y2 = S1 + CH # --
467
468
469 add h, d # d = k + w + h + d # --
470 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
471 add y1, h # h = k + w + h + S0 # --
472
473 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
474
475 ROTATE_ARGS
476
477################################### RND N + 3 ###########################
478
479 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
480 mov f, y2 # y2 = f # CH
481 rorx $25, e, y0 # y0 = e >> 25 # S1A
482 rorx $11, e, y1 # y1 = e >> 11 # S1B
483 xor g, y2 # y2 = f^g # CH
484
485 xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1
486 rorx $6, e, y1 # y1 = (e >> 6) # S1
487 and e, y2 # y2 = (f^g)&e # CH
488 add y3, old_h # h = t1 + S0 + MAJ # --
489
490 xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1
491 rorx $13, a, T1 # T1 = a >> 13 # S0B
492 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
493 rorx $22, a, y1 # y1 = a >> 22 # S0A
494 mov a, y3 # y3 = a # MAJA
495
496 xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0
497 rorx $2, a, T1 # T1 = (a >> 2) # S0
498 offset = 4*3 + \disp
499 addl offset(%rsp, SRND), h # h = k + w + h # --
500 or c, y3 # y3 = a|c # MAJA
501
502 xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0
503 mov a, T1 # T1 = a # MAJB
504 and b, y3 # y3 = (a|c)&b # MAJA
505 and c, T1 # T1 = a&c # MAJB
506 add y0, y2 # y2 = S1 + CH # --
507
508
509 add h, d # d = k + w + h + d # --
510 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
511 add y1, h # h = k + w + h + S0 # --
512
513 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
514
515
516 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
517
518 add y3, h # h = t1 + S0 + MAJ # --
519
520 ROTATE_ARGS
521
522.endm
523
524########################################################################
525## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
526## arg 1 : pointer to input data
527## arg 2 : pointer to digest
528## arg 3 : Num blocks
529########################################################################
530.text
531ENTRY(sha256_transform_rorx)
532.align 32
533 pushq %rbx
534 pushq %rbp
535 pushq %r12
536 pushq %r13
537 pushq %r14
538 pushq %r15
539
540 mov %rsp, %rax
541 subq $STACK_SIZE, %rsp
542 and $-32, %rsp # align rsp to 32 byte boundary
543 mov %rax, _RSP(%rsp)
544
545
546 shl $6, NUM_BLKS # convert to bytes
547 jz done_hash
548 lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block
549 mov NUM_BLKS, _INP_END(%rsp)
550
551 cmp NUM_BLKS, INP
552 je only_one_block
553
554 ## load initial digest
555 mov (CTX), a
556 mov 4*1(CTX), b
557 mov 4*2(CTX), c
558 mov 4*3(CTX), d
559 mov 4*4(CTX), e
560 mov 4*5(CTX), f
561 mov 4*6(CTX), g
562 mov 4*7(CTX), h
563
564 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
565 vmovdqa _SHUF_00BA(%rip), SHUF_00BA
566 vmovdqa _SHUF_DC00(%rip), SHUF_DC00
567
568 mov CTX, _CTX(%rsp)
569
570loop0:
571 lea K256(%rip), TBL
572
573 ## Load first 16 dwords from two blocks
574 VMOVDQ 0*32(INP),XTMP0
575 VMOVDQ 1*32(INP),XTMP1
576 VMOVDQ 2*32(INP),XTMP2
577 VMOVDQ 3*32(INP),XTMP3
578
579 ## byte swap data
580 vpshufb BYTE_FLIP_MASK, XTMP0, XTMP0
581 vpshufb BYTE_FLIP_MASK, XTMP1, XTMP1
582 vpshufb BYTE_FLIP_MASK, XTMP2, XTMP2
583 vpshufb BYTE_FLIP_MASK, XTMP3, XTMP3
584
585 ## transpose data into high/low halves
586 vperm2i128 $0x20, XTMP2, XTMP0, X0
587 vperm2i128 $0x31, XTMP2, XTMP0, X1
588 vperm2i128 $0x20, XTMP3, XTMP1, X2
589 vperm2i128 $0x31, XTMP3, XTMP1, X3
590
591last_block_enter:
592 add $64, INP
593 mov INP, _INP(%rsp)
594
595 ## schedule 48 input dwords, by doing 3 rounds of 12 each
596 xor SRND, SRND
597
598.align 16
599loop1:
600 vpaddd 0*32(TBL, SRND), X0, XFER
601 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
602 FOUR_ROUNDS_AND_SCHED _XFER + 0*32
603
604 vpaddd 1*32(TBL, SRND), X0, XFER
605 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
606 FOUR_ROUNDS_AND_SCHED _XFER + 1*32
607
608 vpaddd 2*32(TBL, SRND), X0, XFER
609 vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
610 FOUR_ROUNDS_AND_SCHED _XFER + 2*32
611
612 vpaddd 3*32(TBL, SRND), X0, XFER
613 vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
614 FOUR_ROUNDS_AND_SCHED _XFER + 3*32
615
616 add $4*32, SRND
617 cmp $3*4*32, SRND
618 jb loop1
619
620loop2:
621 ## Do last 16 rounds with no scheduling
622 vpaddd 0*32(TBL, SRND), X0, XFER
623 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
624 DO_4ROUNDS _XFER + 0*32
625 vpaddd 1*32(TBL, SRND), X1, XFER
626 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
627 DO_4ROUNDS _XFER + 1*32
628 add $2*32, SRND
629
630 vmovdqa X2, X0
631 vmovdqa X3, X1
632
633 cmp $4*4*32, SRND
634 jb loop2
635
636 mov _CTX(%rsp), CTX
637 mov _INP(%rsp), INP
638
639 addm (4*0)(CTX),a
640 addm (4*1)(CTX),b
641 addm (4*2)(CTX),c
642 addm (4*3)(CTX),d
643 addm (4*4)(CTX),e
644 addm (4*5)(CTX),f
645 addm (4*6)(CTX),g
646 addm (4*7)(CTX),h
647
648 cmp _INP_END(%rsp), INP
649 ja done_hash
650
651 #### Do second block using previously scheduled results
652 xor SRND, SRND
653.align 16
654loop3:
655 DO_4ROUNDS _XFER + 0*32 + 16
656 DO_4ROUNDS _XFER + 1*32 + 16
657 add $2*32, SRND
658 cmp $4*4*32, SRND
659 jb loop3
660
661 mov _CTX(%rsp), CTX
662 mov _INP(%rsp), INP
663 add $64, INP
664
665 addm (4*0)(CTX),a
666 addm (4*1)(CTX),b
667 addm (4*2)(CTX),c
668 addm (4*3)(CTX),d
669 addm (4*4)(CTX),e
670 addm (4*5)(CTX),f
671 addm (4*6)(CTX),g
672 addm (4*7)(CTX),h
673
674 cmp _INP_END(%rsp), INP
675 jb loop0
676 ja done_hash
677
678do_last_block:
679 #### do last block
680 lea K256(%rip), TBL
681
682 VMOVDQ 0*16(INP),XWORD0
683 VMOVDQ 1*16(INP),XWORD1
684 VMOVDQ 2*16(INP),XWORD2
685 VMOVDQ 3*16(INP),XWORD3
686
687 vpshufb X_BYTE_FLIP_MASK, XWORD0, XWORD0
688 vpshufb X_BYTE_FLIP_MASK, XWORD1, XWORD1
689 vpshufb X_BYTE_FLIP_MASK, XWORD2, XWORD2
690 vpshufb X_BYTE_FLIP_MASK, XWORD3, XWORD3
691
692 jmp last_block_enter
693
694only_one_block:
695
696 ## load initial digest
697 mov (4*0)(CTX),a
698 mov (4*1)(CTX),b
699 mov (4*2)(CTX),c
700 mov (4*3)(CTX),d
701 mov (4*4)(CTX),e
702 mov (4*5)(CTX),f
703 mov (4*6)(CTX),g
704 mov (4*7)(CTX),h
705
706 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
707 vmovdqa _SHUF_00BA(%rip), SHUF_00BA
708 vmovdqa _SHUF_DC00(%rip), SHUF_DC00
709
710 mov CTX, _CTX(%rsp)
711 jmp do_last_block
712
713done_hash:
714
715 mov _RSP(%rsp), %rsp
716
717 popq %r15
718 popq %r14
719 popq %r13
720 popq %r12
721 popq %rbp
722 popq %rbx
723 ret
724ENDPROC(sha256_transform_rorx)
725
726.data
727.align 64
728K256:
729 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
730 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
731 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
732 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
733 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
734 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
735 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
736 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
737 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
738 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
739 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
740 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
741 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
742 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
743 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
744 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
745 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
746 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
747 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
748 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
749 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
750 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
751 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
752 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
753 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
754 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
755 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
756 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
757 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
758 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
759 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
760 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
761
762PSHUFFLE_BYTE_FLIP_MASK:
763 .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
764
765# shuffle xBxA -> 00BA
766_SHUF_00BA:
767 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
768
769# shuffle xDxC -> DC00
770_SHUF_DC00:
771 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
772#endif
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
new file mode 100644
index 000000000000..98d3c391da81
--- /dev/null
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -0,0 +1,506 @@
1########################################################################
2# Implement fast SHA-256 with SSSE3 instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# Tim Chen <tim.c.chen@linux.intel.com>
10#
11# This software is available to you under a choice of one of two
12# licenses. You may choose to be licensed under the terms of the GNU
13# General Public License (GPL) Version 2, available from the file
14# COPYING in the main directory of this source tree, or the
15# OpenIB.org BSD license below:
16#
17# Redistribution and use in source and binary forms, with or
18# without modification, are permitted provided that the following
19# conditions are met:
20#
21# - Redistributions of source code must retain the above
22# copyright notice, this list of conditions and the following
23# disclaimer.
24#
25# - Redistributions in binary form must reproduce the above
26# copyright notice, this list of conditions and the following
27# disclaimer in the documentation and/or other materials
28# provided with the distribution.
29#
30# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37# SOFTWARE.
38#
39########################################################################
40#
41# This code is described in an Intel White-Paper:
42# "Fast SHA-256 Implementations on Intel Architecture Processors"
43#
44# To find it, surf to http://www.intel.com/p/en_US/embedded
45# and search for that title.
46#
47########################################################################
48
49#include <linux/linkage.h>
50
51## assume buffers not aligned
52#define MOVDQ movdqu
53
54################################ Define Macros
55
56# addm [mem], reg
57# Add reg to mem using reg-mem add and store
58.macro addm p1 p2
59 add \p1, \p2
60 mov \p2, \p1
61.endm
62
63################################
64
65# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
66# Load xmm with mem and byte swap each dword
67.macro COPY_XMM_AND_BSWAP p1 p2 p3
68 MOVDQ \p2, \p1
69 pshufb \p3, \p1
70.endm
71
72################################
73
74X0 = %xmm4
75X1 = %xmm5
76X2 = %xmm6
77X3 = %xmm7
78
79XTMP0 = %xmm0
80XTMP1 = %xmm1
81XTMP2 = %xmm2
82XTMP3 = %xmm3
83XTMP4 = %xmm8
84XFER = %xmm9
85
86SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
87SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
88BYTE_FLIP_MASK = %xmm12
89
90NUM_BLKS = %rdx # 3rd arg
91CTX = %rsi # 2nd arg
92INP = %rdi # 1st arg
93
94SRND = %rdi # clobbers INP
95c = %ecx
96d = %r8d
97e = %edx
98TBL = %rbp
99a = %eax
100b = %ebx
101
102f = %r9d
103g = %r10d
104h = %r11d
105
106y0 = %r13d
107y1 = %r14d
108y2 = %r15d
109
110
111
112_INP_END_SIZE = 8
113_INP_SIZE = 8
114_XFER_SIZE = 8
115_XMM_SAVE_SIZE = 0
116
117_INP_END = 0
118_INP = _INP_END + _INP_END_SIZE
119_XFER = _INP + _INP_SIZE
120_XMM_SAVE = _XFER + _XFER_SIZE
121STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE
122
123# rotate_Xs
124# Rotate values of symbols X0...X3
125.macro rotate_Xs
126X_ = X0
127X0 = X1
128X1 = X2
129X2 = X3
130X3 = X_
131.endm
132
133# ROTATE_ARGS
134# Rotate values of symbols a...h
135.macro ROTATE_ARGS
136TMP_ = h
137h = g
138g = f
139f = e
140e = d
141d = c
142c = b
143b = a
144a = TMP_
145.endm
146
147.macro FOUR_ROUNDS_AND_SCHED
148 ## compute s0 four at a time and s1 two at a time
149 ## compute W[-16] + W[-7] 4 at a time
150 movdqa X3, XTMP0
151 mov e, y0 # y0 = e
152 ror $(25-11), y0 # y0 = e >> (25-11)
153 mov a, y1 # y1 = a
154 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
155 ror $(22-13), y1 # y1 = a >> (22-13)
156 xor e, y0 # y0 = e ^ (e >> (25-11))
157 mov f, y2 # y2 = f
158 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
159 movdqa X1, XTMP1
160 xor a, y1 # y1 = a ^ (a >> (22-13)
161 xor g, y2 # y2 = f^g
162 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
163 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
164 and e, y2 # y2 = (f^g)&e
165 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
166 ## compute s0
167 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
168 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
169 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
170 xor g, y2 # y2 = CH = ((f^g)&e)^g
171 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
172 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
173 add y0, y2 # y2 = S1 + CH
174 add _XFER(%rsp) , y2 # y2 = k + w + S1 + CH
175 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
176 mov a, y0 # y0 = a
177 add y2, h # h = h + S1 + CH + k + w
178 mov a, y2 # y2 = a
179 pslld $(32-7), XTMP1 #
180 or c, y0 # y0 = a|c
181 add h, d # d = d + h + S1 + CH + k + w
182 and c, y2 # y2 = a&c
183 psrld $7, XTMP2 #
184 and b, y0 # y0 = (a|c)&b
185 add y1, h # h = h + S1 + CH + k + w + S0
186 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
187 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
188 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
189 #
190 ROTATE_ARGS #
191 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
192 mov e, y0 # y0 = e
193 mov a, y1 # y1 = a
194 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
195 ror $(25-11), y0 # y0 = e >> (25-11)
196 xor e, y0 # y0 = e ^ (e >> (25-11))
197 mov f, y2 # y2 = f
198 ror $(22-13), y1 # y1 = a >> (22-13)
199 pslld $(32-18), XTMP3 #
200 xor a, y1 # y1 = a ^ (a >> (22-13)
201 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
202 xor g, y2 # y2 = f^g
203 psrld $18, XTMP2 #
204 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
205 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
206 and e, y2 # y2 = (f^g)&e
207 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
208 pxor XTMP3, XTMP1
209 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
210 xor g, y2 # y2 = CH = ((f^g)&e)^g
211 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
212 add y0, y2 # y2 = S1 + CH
213 add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
214 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
215 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
216 mov a, y0 # y0 = a
217 add y2, h # h = h + S1 + CH + k + w
218 mov a, y2 # y2 = a
219 pxor XTMP4, XTMP1 # XTMP1 = s0
220 or c, y0 # y0 = a|c
221 add h, d # d = d + h + S1 + CH + k + w
222 and c, y2 # y2 = a&c
223 ## compute low s1
224 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
225 and b, y0 # y0 = (a|c)&b
226 add y1, h # h = h + S1 + CH + k + w + S0
227 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
228 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
229 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
230
231 ROTATE_ARGS
232 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
233 mov e, y0 # y0 = e
234 mov a, y1 # y1 = a
235 ror $(25-11), y0 # y0 = e >> (25-11)
236 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
237 xor e, y0 # y0 = e ^ (e >> (25-11))
238 ror $(22-13), y1 # y1 = a >> (22-13)
239 mov f, y2 # y2 = f
240 xor a, y1 # y1 = a ^ (a >> (22-13)
241 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
242 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
243 xor g, y2 # y2 = f^g
244 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
245 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
246 and e, y2 # y2 = (f^g)&e
247 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
248 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
249 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
250 xor g, y2 # y2 = CH = ((f^g)&e)^g
251 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
252 pxor XTMP3, XTMP2
253 add y0, y2 # y2 = S1 + CH
254 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
255 add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
256 pxor XTMP2, XTMP4 # XTMP4 = s1 {xBxA}
257 mov a, y0 # y0 = a
258 add y2, h # h = h + S1 + CH + k + w
259 mov a, y2 # y2 = a
260 pshufb SHUF_00BA, XTMP4 # XTMP4 = s1 {00BA}
261 or c, y0 # y0 = a|c
262 add h, d # d = d + h + S1 + CH + k + w
263 and c, y2 # y2 = a&c
264 paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]}
265 and b, y0 # y0 = (a|c)&b
266 add y1, h # h = h + S1 + CH + k + w + S0
267 ## compute high s1
268 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
269 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
270 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
271 #
272 ROTATE_ARGS #
273 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
274 mov e, y0 # y0 = e
275 ror $(25-11), y0 # y0 = e >> (25-11)
276 mov a, y1 # y1 = a
277 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
278 ror $(22-13), y1 # y1 = a >> (22-13)
279 xor e, y0 # y0 = e ^ (e >> (25-11))
280 mov f, y2 # y2 = f
281 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
282 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
283 xor a, y1 # y1 = a ^ (a >> (22-13)
284 xor g, y2 # y2 = f^g
285 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
286 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25
287 and e, y2 # y2 = (f^g)&e
288 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
289 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
290 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
291 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
292 xor g, y2 # y2 = CH = ((f^g)&e)^g
293 pxor XTMP3, XTMP2 #
294 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
295 add y0, y2 # y2 = S1 + CH
296 add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
297 pxor XTMP2, X0 # X0 = s1 {xDxC}
298 mov a, y0 # y0 = a
299 add y2, h # h = h + S1 + CH + k + w
300 mov a, y2 # y2 = a
301 pshufb SHUF_DC00, X0 # X0 = s1 {DC00}
302 or c, y0 # y0 = a|c
303 add h, d # d = d + h + S1 + CH + k + w
304 and c, y2 # y2 = a&c
305 paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]}
306 and b, y0 # y0 = (a|c)&b
307 add y1, h # h = h + S1 + CH + k + w + S0
308 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
309 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
310
311 ROTATE_ARGS
312 rotate_Xs
313.endm
314
315## input is [rsp + _XFER + %1 * 4]
316.macro DO_ROUND round
317 mov e, y0 # y0 = e
318 ror $(25-11), y0 # y0 = e >> (25-11)
319 mov a, y1 # y1 = a
320 xor e, y0 # y0 = e ^ (e >> (25-11))
321 ror $(22-13), y1 # y1 = a >> (22-13)
322 mov f, y2 # y2 = f
323 xor a, y1 # y1 = a ^ (a >> (22-13)
324 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
325 xor g, y2 # y2 = f^g
326 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
327 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
328 and e, y2 # y2 = (f^g)&e
329 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
330 ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
331 xor g, y2 # y2 = CH = ((f^g)&e)^g
332 add y0, y2 # y2 = S1 + CH
333 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
334 offset = \round * 4 + _XFER
335 add offset(%rsp), y2 # y2 = k + w + S1 + CH
336 mov a, y0 # y0 = a
337 add y2, h # h = h + S1 + CH + k + w
338 mov a, y2 # y2 = a
339 or c, y0 # y0 = a|c
340 add h, d # d = d + h + S1 + CH + k + w
341 and c, y2 # y2 = a&c
342 and b, y0 # y0 = (a|c)&b
343 add y1, h # h = h + S1 + CH + k + w + S0
344 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
345 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
346 ROTATE_ARGS
347.endm
348
349########################################################################
350## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
351## arg 1 : pointer to input data
352## arg 2 : pointer to digest
353## arg 3 : Num blocks
354########################################################################
355.text
356ENTRY(sha256_transform_ssse3)
357.align 32
358 pushq %rbx
359 pushq %rbp
360 pushq %r13
361 pushq %r14
362 pushq %r15
363 pushq %r12
364
365 mov %rsp, %r12
366 subq $STACK_SIZE, %rsp
367 and $~15, %rsp
368
369 shl $6, NUM_BLKS # convert to bytes
370 jz done_hash
371 add INP, NUM_BLKS
372 mov NUM_BLKS, _INP_END(%rsp) # pointer to end of data
373
374 ## load initial digest
375 mov 4*0(CTX), a
376 mov 4*1(CTX), b
377 mov 4*2(CTX), c
378 mov 4*3(CTX), d
379 mov 4*4(CTX), e
380 mov 4*5(CTX), f
381 mov 4*6(CTX), g
382 mov 4*7(CTX), h
383
384 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
385 movdqa _SHUF_00BA(%rip), SHUF_00BA
386 movdqa _SHUF_DC00(%rip), SHUF_DC00
387
388loop0:
389 lea K256(%rip), TBL
390
391 ## byte swap first 16 dwords
392 COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK
393 COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK
394 COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK
395 COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK
396
397 mov INP, _INP(%rsp)
398
399 ## schedule 48 input dwords, by doing 3 rounds of 16 each
400 mov $3, SRND
401.align 16
402loop1:
403 movdqa (TBL), XFER
404 paddd X0, XFER
405 movdqa XFER, _XFER(%rsp)
406 FOUR_ROUNDS_AND_SCHED
407
408 movdqa 1*16(TBL), XFER
409 paddd X0, XFER
410 movdqa XFER, _XFER(%rsp)
411 FOUR_ROUNDS_AND_SCHED
412
413 movdqa 2*16(TBL), XFER
414 paddd X0, XFER
415 movdqa XFER, _XFER(%rsp)
416 FOUR_ROUNDS_AND_SCHED
417
418 movdqa 3*16(TBL), XFER
419 paddd X0, XFER
420 movdqa XFER, _XFER(%rsp)
421 add $4*16, TBL
422 FOUR_ROUNDS_AND_SCHED
423
424 sub $1, SRND
425 jne loop1
426
427 mov $2, SRND
428loop2:
429 paddd (TBL), X0
430 movdqa X0, _XFER(%rsp)
431 DO_ROUND 0
432 DO_ROUND 1
433 DO_ROUND 2
434 DO_ROUND 3
435 paddd 1*16(TBL), X1
436 movdqa X1, _XFER(%rsp)
437 add $2*16, TBL
438 DO_ROUND 0
439 DO_ROUND 1
440 DO_ROUND 2
441 DO_ROUND 3
442
443 movdqa X2, X0
444 movdqa X3, X1
445
446 sub $1, SRND
447 jne loop2
448
449 addm (4*0)(CTX),a
450 addm (4*1)(CTX),b
451 addm (4*2)(CTX),c
452 addm (4*3)(CTX),d
453 addm (4*4)(CTX),e
454 addm (4*5)(CTX),f
455 addm (4*6)(CTX),g
456 addm (4*7)(CTX),h
457
458 mov _INP(%rsp), INP
459 add $64, INP
460 cmp _INP_END(%rsp), INP
461 jne loop0
462
463done_hash:
464
465 mov %r12, %rsp
466
467 popq %r12
468 popq %r15
469 popq %r14
470 popq %r13
471 popq %rbp
472 popq %rbx
473
474 ret
475ENDPROC(sha256_transform_ssse3)
476
477.data
478.align 64
479K256:
480 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
481 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
482 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
483 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
484 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
485 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
486 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
487 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
488 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
489 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
490 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
491 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
492 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
493 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
494 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
495 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
496
497PSHUFFLE_BYTE_FLIP_MASK:
498 .octa 0x0c0d0e0f08090a0b0405060700010203
499
500# shuffle xBxA -> 00BA
501_SHUF_00BA:
502 .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
503
504# shuffle xDxC -> DC00
505_SHUF_DC00:
506 .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
new file mode 100644
index 000000000000..597d4da69656
--- /dev/null
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -0,0 +1,275 @@
1/*
2 * Cryptographic API.
3 *
4 * Glue code for the SHA256 Secure Hash Algorithm assembler
5 * implementation using supplemental SSE3 / AVX / AVX2 instructions.
6 *
7 * This file is based on sha256_generic.c
8 *
9 * Copyright (C) 2013 Intel Corporation.
10 *
11 * Author:
12 * Tim Chen <tim.c.chen@linux.intel.com>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * SOFTWARE.
27 */
28
29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
32#include <crypto/internal/hash.h>
33#include <linux/init.h>
34#include <linux/module.h>
35#include <linux/mm.h>
36#include <linux/cryptohash.h>
37#include <linux/types.h>
38#include <crypto/sha.h>
39#include <asm/byteorder.h>
40#include <asm/i387.h>
41#include <asm/xcr.h>
42#include <asm/xsave.h>
43#include <linux/string.h>
44
45asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
46 u64 rounds);
47#ifdef CONFIG_AS_AVX
48asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
49 u64 rounds);
50#endif
51#ifdef CONFIG_AS_AVX2
52asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
53 u64 rounds);
54#endif
55
56static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
57
58
59static int sha256_ssse3_init(struct shash_desc *desc)
60{
61 struct sha256_state *sctx = shash_desc_ctx(desc);
62
63 sctx->state[0] = SHA256_H0;
64 sctx->state[1] = SHA256_H1;
65 sctx->state[2] = SHA256_H2;
66 sctx->state[3] = SHA256_H3;
67 sctx->state[4] = SHA256_H4;
68 sctx->state[5] = SHA256_H5;
69 sctx->state[6] = SHA256_H6;
70 sctx->state[7] = SHA256_H7;
71 sctx->count = 0;
72
73 return 0;
74}
75
76static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
77 unsigned int len, unsigned int partial)
78{
79 struct sha256_state *sctx = shash_desc_ctx(desc);
80 unsigned int done = 0;
81
82 sctx->count += len;
83
84 if (partial) {
85 done = SHA256_BLOCK_SIZE - partial;
86 memcpy(sctx->buf + partial, data, done);
87 sha256_transform_asm(sctx->buf, sctx->state, 1);
88 }
89
90 if (len - done >= SHA256_BLOCK_SIZE) {
91 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92
93 sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94
95 done += rounds * SHA256_BLOCK_SIZE;
96 }
97
98 memcpy(sctx->buf, data + done, len - done);
99
100 return 0;
101}
102
103static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
104 unsigned int len)
105{
106 struct sha256_state *sctx = shash_desc_ctx(desc);
107 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108 int res;
109
110 /* Handle the fast case right here */
111 if (partial + len < SHA256_BLOCK_SIZE) {
112 sctx->count += len;
113 memcpy(sctx->buf + partial, data, len);
114
115 return 0;
116 }
117
118 if (!irq_fpu_usable()) {
119 res = crypto_sha256_update(desc, data, len);
120 } else {
121 kernel_fpu_begin();
122 res = __sha256_ssse3_update(desc, data, len, partial);
123 kernel_fpu_end();
124 }
125
126 return res;
127}
128
129
130/* Add padding and return the message digest. */
131static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
132{
133 struct sha256_state *sctx = shash_desc_ctx(desc);
134 unsigned int i, index, padlen;
135 __be32 *dst = (__be32 *)out;
136 __be64 bits;
137 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138
139 bits = cpu_to_be64(sctx->count << 3);
140
141 /* Pad out to 56 mod 64 and append length */
142 index = sctx->count % SHA256_BLOCK_SIZE;
143 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144
145 if (!irq_fpu_usable()) {
146 crypto_sha256_update(desc, padding, padlen);
147 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148 } else {
149 kernel_fpu_begin();
150 /* We need to fill a whole block for __sha256_ssse3_update() */
151 if (padlen <= 56) {
152 sctx->count += padlen;
153 memcpy(sctx->buf + index, padding, padlen);
154 } else {
155 __sha256_ssse3_update(desc, padding, padlen, index);
156 }
157 __sha256_ssse3_update(desc, (const u8 *)&bits,
158 sizeof(bits), 56);
159 kernel_fpu_end();
160 }
161
162 /* Store state in digest */
163 for (i = 0; i < 8; i++)
164 dst[i] = cpu_to_be32(sctx->state[i]);
165
166 /* Wipe context */
167 memset(sctx, 0, sizeof(*sctx));
168
169 return 0;
170}
171
172static int sha256_ssse3_export(struct shash_desc *desc, void *out)
173{
174 struct sha256_state *sctx = shash_desc_ctx(desc);
175
176 memcpy(out, sctx, sizeof(*sctx));
177
178 return 0;
179}
180
181static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
182{
183 struct sha256_state *sctx = shash_desc_ctx(desc);
184
185 memcpy(sctx, in, sizeof(*sctx));
186
187 return 0;
188}
189
190static struct shash_alg alg = {
191 .digestsize = SHA256_DIGEST_SIZE,
192 .init = sha256_ssse3_init,
193 .update = sha256_ssse3_update,
194 .final = sha256_ssse3_final,
195 .export = sha256_ssse3_export,
196 .import = sha256_ssse3_import,
197 .descsize = sizeof(struct sha256_state),
198 .statesize = sizeof(struct sha256_state),
199 .base = {
200 .cra_name = "sha256",
201 .cra_driver_name = "sha256-ssse3",
202 .cra_priority = 150,
203 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
204 .cra_blocksize = SHA256_BLOCK_SIZE,
205 .cra_module = THIS_MODULE,
206 }
207};
208
209#ifdef CONFIG_AS_AVX
210static bool __init avx_usable(void)
211{
212 u64 xcr0;
213
214 if (!cpu_has_avx || !cpu_has_osxsave)
215 return false;
216
217 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
218 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
219 pr_info("AVX detected but unusable.\n");
220
221 return false;
222 }
223
224 return true;
225}
226#endif
227
228static int __init sha256_ssse3_mod_init(void)
229{
230 /* test for SSE3 first */
231 if (cpu_has_ssse3)
232 sha256_transform_asm = sha256_transform_ssse3;
233
234#ifdef CONFIG_AS_AVX
235 /* allow AVX to override SSSE3, it's a little faster */
236 if (avx_usable()) {
237#ifdef CONFIG_AS_AVX2
238 if (boot_cpu_has(X86_FEATURE_AVX2))
239 sha256_transform_asm = sha256_transform_rorx;
240 else
241#endif
242 sha256_transform_asm = sha256_transform_avx;
243 }
244#endif
245
246 if (sha256_transform_asm) {
247#ifdef CONFIG_AS_AVX
248 if (sha256_transform_asm == sha256_transform_avx)
249 pr_info("Using AVX optimized SHA-256 implementation\n");
250#ifdef CONFIG_AS_AVX2
251 else if (sha256_transform_asm == sha256_transform_rorx)
252 pr_info("Using AVX2 optimized SHA-256 implementation\n");
253#endif
254 else
255#endif
256 pr_info("Using SSSE3 optimized SHA-256 implementation\n");
257 return crypto_register_shash(&alg);
258 }
259 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
260
261 return -ENODEV;
262}
263
264static void __exit sha256_ssse3_mod_fini(void)
265{
266 crypto_unregister_shash(&alg);
267}
268
269module_init(sha256_ssse3_mod_init);
270module_exit(sha256_ssse3_mod_fini);
271
272MODULE_LICENSE("GPL");
273MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
274
275MODULE_ALIAS("sha256");
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
new file mode 100644
index 000000000000..974dde9bc6cd
--- /dev/null
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -0,0 +1,423 @@
1########################################################################
2# Implement fast SHA-512 with AVX instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# David Cote <david.m.cote@intel.com>
10# Tim Chen <tim.c.chen@linux.intel.com>
11#
12# This software is available to you under a choice of one of two
13# licenses. You may choose to be licensed under the terms of the GNU
14# General Public License (GPL) Version 2, available from the file
15# COPYING in the main directory of this source tree, or the
16# OpenIB.org BSD license below:
17#
18# Redistribution and use in source and binary forms, with or
19# without modification, are permitted provided that the following
20# conditions are met:
21#
22# - Redistributions of source code must retain the above
23# copyright notice, this list of conditions and the following
24# disclaimer.
25#
26# - Redistributions in binary form must reproduce the above
27# copyright notice, this list of conditions and the following
28# disclaimer in the documentation and/or other materials
29# provided with the distribution.
30#
31# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
35# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
36# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38# SOFTWARE.
39#
40########################################################################
41#
42# This code is described in an Intel White-Paper:
43# "Fast SHA-512 Implementations on Intel Architecture Processors"
44#
45# To find it, surf to http://www.intel.com/p/en_US/embedded
46# and search for that title.
47#
48########################################################################
49
50#ifdef CONFIG_AS_AVX
51#include <linux/linkage.h>
52
53.text
54
55# Virtual Registers
56# ARG1
57msg = %rdi
58# ARG2
59digest = %rsi
60# ARG3
61msglen = %rdx
62T1 = %rcx
63T2 = %r8
64a_64 = %r9
65b_64 = %r10
66c_64 = %r11
67d_64 = %r12
68e_64 = %r13
69f_64 = %r14
70g_64 = %r15
71h_64 = %rbx
72tmp0 = %rax
73
74# Local variables (stack frame)
75
76# Message Schedule
77W_SIZE = 80*8
78# W[t] + K[t] | W[t+1] + K[t+1]
79WK_SIZE = 2*8
80RSPSAVE_SIZE = 1*8
81GPRSAVE_SIZE = 5*8
82
83frame_W = 0
84frame_WK = frame_W + W_SIZE
85frame_RSPSAVE = frame_WK + WK_SIZE
86frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
87frame_size = frame_GPRSAVE + GPRSAVE_SIZE
88
89# Useful QWORD "arrays" for simpler memory references
90# MSG, DIGEST, K_t, W_t are arrays
91# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
92
93# Input message (arg1)
94#define MSG(i) 8*i(msg)
95
96# Output Digest (arg2)
97#define DIGEST(i) 8*i(digest)
98
99# SHA Constants (static mem)
100#define K_t(i) 8*i+K512(%rip)
101
102# Message Schedule (stack frame)
103#define W_t(i) 8*i+frame_W(%rsp)
104
105# W[t]+K[t] (stack frame)
106#define WK_2(i) 8*((i%2))+frame_WK(%rsp)
107
108.macro RotateState
109 # Rotate symbols a..h right
110 TMP = h_64
111 h_64 = g_64
112 g_64 = f_64
113 f_64 = e_64
114 e_64 = d_64
115 d_64 = c_64
116 c_64 = b_64
117 b_64 = a_64
118 a_64 = TMP
119.endm
120
121.macro RORQ p1 p2
122 # shld is faster than ror on Sandybridge
123 shld $(64-\p2), \p1, \p1
124.endm
125
126.macro SHA512_Round rnd
127 # Compute Round %%t
128 mov f_64, T1 # T1 = f
129 mov e_64, tmp0 # tmp = e
130 xor g_64, T1 # T1 = f ^ g
131 RORQ tmp0, 23 # 41 # tmp = e ror 23
132 and e_64, T1 # T1 = (f ^ g) & e
133 xor e_64, tmp0 # tmp = (e ror 23) ^ e
134 xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
135 idx = \rnd
136 add WK_2(idx), T1 # W[t] + K[t] from message scheduler
137 RORQ tmp0, 4 # 18 # tmp = ((e ror 23) ^ e) ror 4
138 xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e
139 mov a_64, T2 # T2 = a
140 add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h
141 RORQ tmp0, 14 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
142 add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
143 mov a_64, tmp0 # tmp = a
144 xor c_64, T2 # T2 = a ^ c
145 and c_64, tmp0 # tmp = a & c
146 and b_64, T2 # T2 = (a ^ c) & b
147 xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
148 mov a_64, tmp0 # tmp = a
149 RORQ tmp0, 5 # 39 # tmp = a ror 5
150 xor a_64, tmp0 # tmp = (a ror 5) ^ a
151 add T1, d_64 # e(next_state) = d + T1
152 RORQ tmp0, 6 # 34 # tmp = ((a ror 5) ^ a) ror 6
153 xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a
154 lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c)
155 RORQ tmp0, 28 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
156 add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a)
157 RotateState
158.endm
159
160.macro SHA512_2Sched_2Round_avx rnd
161 # Compute rounds t-2 and t-1
162 # Compute message schedule QWORDS t and t+1
163
164 # Two rounds are computed based on the values for K[t-2]+W[t-2] and
165 # K[t-1]+W[t-1] which were previously stored at WK_2 by the message
166 # scheduler.
167 # The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)].
168 # They are then added to their respective SHA512 constants at
169 # [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)]
170 # For brievity, the comments following vectored instructions only refer to
171 # the first of a pair of QWORDS.
172 # Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]}
173 # The computation of the message schedule and the rounds are tightly
174 # stitched to take advantage of instruction-level parallelism.
175
176 idx = \rnd - 2
177 vmovdqa W_t(idx), %xmm4 # XMM4 = W[t-2]
178 idx = \rnd - 15
179 vmovdqu W_t(idx), %xmm5 # XMM5 = W[t-15]
180 mov f_64, T1
181 vpsrlq $61, %xmm4, %xmm0 # XMM0 = W[t-2]>>61
182 mov e_64, tmp0
183 vpsrlq $1, %xmm5, %xmm6 # XMM6 = W[t-15]>>1
184 xor g_64, T1
185 RORQ tmp0, 23 # 41
186 vpsrlq $19, %xmm4, %xmm1 # XMM1 = W[t-2]>>19
187 and e_64, T1
188 xor e_64, tmp0
189 vpxor %xmm1, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19
190 xor g_64, T1
191 idx = \rnd
192 add WK_2(idx), T1#
193 vpsrlq $8, %xmm5, %xmm7 # XMM7 = W[t-15]>>8
194 RORQ tmp0, 4 # 18
195 vpsrlq $6, %xmm4, %xmm2 # XMM2 = W[t-2]>>6
196 xor e_64, tmp0
197 mov a_64, T2
198 add h_64, T1
199 vpxor %xmm7, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8
200 RORQ tmp0, 14 # 14
201 add tmp0, T1
202 vpsrlq $7, %xmm5, %xmm8 # XMM8 = W[t-15]>>7
203 mov a_64, tmp0
204 xor c_64, T2
205 vpsllq $(64-61), %xmm4, %xmm3 # XMM3 = W[t-2]<<3
206 and c_64, tmp0
207 and b_64, T2
208 vpxor %xmm3, %xmm2, %xmm2 # XMM2 = W[t-2]>>6 ^ W[t-2]<<3
209 xor tmp0, T2
210 mov a_64, tmp0
211 vpsllq $(64-1), %xmm5, %xmm9 # XMM9 = W[t-15]<<63
212 RORQ tmp0, 5 # 39
213 vpxor %xmm9, %xmm8, %xmm8 # XMM8 = W[t-15]>>7 ^ W[t-15]<<63
214 xor a_64, tmp0
215 add T1, d_64
216 RORQ tmp0, 6 # 34
217 xor a_64, tmp0
218 vpxor %xmm8, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^
219 # W[t-15]>>7 ^ W[t-15]<<63
220 lea (T1, T2), h_64
221 RORQ tmp0, 28 # 28
222 vpsllq $(64-19), %xmm4, %xmm4 # XMM4 = W[t-2]<<25
223 add tmp0, h_64
224 RotateState
225 vpxor %xmm4, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^
226 # W[t-2]<<25
227 mov f_64, T1
228 vpxor %xmm2, %xmm0, %xmm0 # XMM0 = s1(W[t-2])
229 mov e_64, tmp0
230 xor g_64, T1
231 idx = \rnd - 16
232 vpaddq W_t(idx), %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16]
233 idx = \rnd - 7
234 vmovdqu W_t(idx), %xmm1 # XMM1 = W[t-7]
235 RORQ tmp0, 23 # 41
236 and e_64, T1
237 xor e_64, tmp0
238 xor g_64, T1
239 vpsllq $(64-8), %xmm5, %xmm5 # XMM5 = W[t-15]<<56
240 idx = \rnd + 1
241 add WK_2(idx), T1
242 vpxor %xmm5, %xmm6, %xmm6 # XMM6 = s0(W[t-15])
243 RORQ tmp0, 4 # 18
244 vpaddq %xmm6, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15])
245 xor e_64, tmp0
246 vpaddq %xmm1, %xmm0, %xmm0 # XMM0 = W[t] = s1(W[t-2]) + W[t-7] +
247 # s0(W[t-15]) + W[t-16]
248 mov a_64, T2
249 add h_64, T1
250 RORQ tmp0, 14 # 14
251 add tmp0, T1
252 idx = \rnd
253 vmovdqa %xmm0, W_t(idx) # Store W[t]
254 vpaddq K_t(idx), %xmm0, %xmm0 # Compute W[t]+K[t]
255 vmovdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds
256 mov a_64, tmp0
257 xor c_64, T2
258 and c_64, tmp0
259 and b_64, T2
260 xor tmp0, T2
261 mov a_64, tmp0
262 RORQ tmp0, 5 # 39
263 xor a_64, tmp0
264 add T1, d_64
265 RORQ tmp0, 6 # 34
266 xor a_64, tmp0
267 lea (T1, T2), h_64
268 RORQ tmp0, 28 # 28
269 add tmp0, h_64
270 RotateState
271.endm
272
273########################################################################
274# void sha512_transform_avx(const void* M, void* D, u64 L)
275# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
276# The size of the message pointed to by M must be an integer multiple of SHA512
277# message blocks.
278# L is the message length in SHA512 blocks
279########################################################################
280ENTRY(sha512_transform_avx)
281 cmp $0, msglen
282 je nowork
283
284 # Allocate Stack Space
285 mov %rsp, %rax
286 sub $frame_size, %rsp
287 and $~(0x20 - 1), %rsp
288 mov %rax, frame_RSPSAVE(%rsp)
289
290 # Save GPRs
291 mov %rbx, frame_GPRSAVE(%rsp)
292 mov %r12, frame_GPRSAVE +8*1(%rsp)
293 mov %r13, frame_GPRSAVE +8*2(%rsp)
294 mov %r14, frame_GPRSAVE +8*3(%rsp)
295 mov %r15, frame_GPRSAVE +8*4(%rsp)
296
297updateblock:
298
299 # Load state variables
300 mov DIGEST(0), a_64
301 mov DIGEST(1), b_64
302 mov DIGEST(2), c_64
303 mov DIGEST(3), d_64
304 mov DIGEST(4), e_64
305 mov DIGEST(5), f_64
306 mov DIGEST(6), g_64
307 mov DIGEST(7), h_64
308
309 t = 0
310 .rept 80/2 + 1
311 # (80 rounds) / (2 rounds/iteration) + (1 iteration)
312 # +1 iteration because the scheduler leads hashing by 1 iteration
313 .if t < 2
314 # BSWAP 2 QWORDS
315 vmovdqa XMM_QWORD_BSWAP(%rip), %xmm1
316 vmovdqu MSG(t), %xmm0
317 vpshufb %xmm1, %xmm0, %xmm0 # BSWAP
318 vmovdqa %xmm0, W_t(t) # Store Scheduled Pair
319 vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
320 vmovdqa %xmm0, WK_2(t) # Store into WK for rounds
321 .elseif t < 16
322 # BSWAP 2 QWORDS# Compute 2 Rounds
323 vmovdqu MSG(t), %xmm0
324 vpshufb %xmm1, %xmm0, %xmm0 # BSWAP
325 SHA512_Round t-2 # Round t-2
326 vmovdqa %xmm0, W_t(t) # Store Scheduled Pair
327 vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
328 SHA512_Round t-1 # Round t-1
329 vmovdqa %xmm0, WK_2(t)# Store W[t]+K[t] into WK
330 .elseif t < 79
331 # Schedule 2 QWORDS# Compute 2 Rounds
332 SHA512_2Sched_2Round_avx t
333 .else
334 # Compute 2 Rounds
335 SHA512_Round t-2
336 SHA512_Round t-1
337 .endif
338 t = t+2
339 .endr
340
341 # Update digest
342 add a_64, DIGEST(0)
343 add b_64, DIGEST(1)
344 add c_64, DIGEST(2)
345 add d_64, DIGEST(3)
346 add e_64, DIGEST(4)
347 add f_64, DIGEST(5)
348 add g_64, DIGEST(6)
349 add h_64, DIGEST(7)
350
351 # Advance to next message block
352 add $16*8, msg
353 dec msglen
354 jnz updateblock
355
356 # Restore GPRs
357 mov frame_GPRSAVE(%rsp), %rbx
358 mov frame_GPRSAVE +8*1(%rsp), %r12
359 mov frame_GPRSAVE +8*2(%rsp), %r13
360 mov frame_GPRSAVE +8*3(%rsp), %r14
361 mov frame_GPRSAVE +8*4(%rsp), %r15
362
363 # Restore Stack Pointer
364 mov frame_RSPSAVE(%rsp), %rsp
365
366nowork:
367 ret
368ENDPROC(sha512_transform_avx)
369
370########################################################################
371### Binary Data
372
373.data
374
375.align 16
376
377# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
378XMM_QWORD_BSWAP:
379 .octa 0x08090a0b0c0d0e0f0001020304050607
380
381# K[t] used in SHA512 hashing
382K512:
383 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
384 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
385 .quad 0x3956c25bf348b538,0x59f111f1b605d019
386 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
387 .quad 0xd807aa98a3030242,0x12835b0145706fbe
388 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
389 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
390 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
391 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
392 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
393 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
394 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
395 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
396 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
397 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
398 .quad 0x06ca6351e003826f,0x142929670a0e6e70
399 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
400 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
401 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
402 .quad 0x81c2c92e47edaee6,0x92722c851482353b
403 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
404 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
405 .quad 0xd192e819d6ef5218,0xd69906245565a910
406 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
407 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
408 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
409 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
410 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
411 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
412 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
413 .quad 0x90befffa23631e28,0xa4506cebde82bde9
414 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
415 .quad 0xca273eceea26619c,0xd186b8c721c0c207
416 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
417 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
418 .quad 0x113f9804bef90dae,0x1b710b35131c471b
419 .quad 0x28db77f523047d84,0x32caab7b40c72493
420 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
421 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
422 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
423#endif
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
new file mode 100644
index 000000000000..568b96105f5c
--- /dev/null
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -0,0 +1,743 @@
1########################################################################
2# Implement fast SHA-512 with AVX2 instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# David Cote <david.m.cote@intel.com>
10# Tim Chen <tim.c.chen@linux.intel.com>
11#
12# This software is available to you under a choice of one of two
13# licenses. You may choose to be licensed under the terms of the GNU
14# General Public License (GPL) Version 2, available from the file
15# COPYING in the main directory of this source tree, or the
16# OpenIB.org BSD license below:
17#
18# Redistribution and use in source and binary forms, with or
19# without modification, are permitted provided that the following
20# conditions are met:
21#
22# - Redistributions of source code must retain the above
23# copyright notice, this list of conditions and the following
24# disclaimer.
25#
26# - Redistributions in binary form must reproduce the above
27# copyright notice, this list of conditions and the following
28# disclaimer in the documentation and/or other materials
29# provided with the distribution.
30#
31# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
35# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
36# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38# SOFTWARE.
39#
40########################################################################
41#
42# This code is described in an Intel White-Paper:
43# "Fast SHA-512 Implementations on Intel Architecture Processors"
44#
45# To find it, surf to http://www.intel.com/p/en_US/embedded
46# and search for that title.
47#
48########################################################################
49# This code schedules 1 blocks at a time, with 4 lanes per block
50########################################################################
51
52#ifdef CONFIG_AS_AVX2
53#include <linux/linkage.h>
54
55.text
56
57# Virtual Registers
58Y_0 = %ymm4
59Y_1 = %ymm5
60Y_2 = %ymm6
61Y_3 = %ymm7
62
63YTMP0 = %ymm0
64YTMP1 = %ymm1
65YTMP2 = %ymm2
66YTMP3 = %ymm3
67YTMP4 = %ymm8
68XFER = YTMP0
69
70BYTE_FLIP_MASK = %ymm9
71
72# 1st arg
73INP = %rdi
74# 2nd arg
75CTX = %rsi
76# 3rd arg
77NUM_BLKS = %rdx
78
79c = %rcx
80d = %r8
81e = %rdx
82y3 = %rdi
83
84TBL = %rbp
85
86a = %rax
87b = %rbx
88
89f = %r9
90g = %r10
91h = %r11
92old_h = %r11
93
94T1 = %r12
95y0 = %r13
96y1 = %r14
97y2 = %r15
98
99y4 = %r12
100
101# Local variables (stack frame)
102XFER_SIZE = 4*8
103SRND_SIZE = 1*8
104INP_SIZE = 1*8
105INPEND_SIZE = 1*8
106RSPSAVE_SIZE = 1*8
107GPRSAVE_SIZE = 6*8
108
109frame_XFER = 0
110frame_SRND = frame_XFER + XFER_SIZE
111frame_INP = frame_SRND + SRND_SIZE
112frame_INPEND = frame_INP + INP_SIZE
113frame_RSPSAVE = frame_INPEND + INPEND_SIZE
114frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
115frame_size = frame_GPRSAVE + GPRSAVE_SIZE
116
117## assume buffers not aligned
118#define VMOVDQ vmovdqu
119
120# addm [mem], reg
121# Add reg to mem using reg-mem add and store
122.macro addm p1 p2
123 add \p1, \p2
124 mov \p2, \p1
125.endm
126
127
128# COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask
129# Load ymm with mem and byte swap each dword
130.macro COPY_YMM_AND_BSWAP p1 p2 p3
131 VMOVDQ \p2, \p1
132 vpshufb \p3, \p1, \p1
133.endm
134# rotate_Ys
135# Rotate values of symbols Y0...Y3
136.macro rotate_Ys
137 Y_ = Y_0
138 Y_0 = Y_1
139 Y_1 = Y_2
140 Y_2 = Y_3
141 Y_3 = Y_
142.endm
143
144# RotateState
145.macro RotateState
146 # Rotate symbols a..h right
147 old_h = h
148 TMP_ = h
149 h = g
150 g = f
151 f = e
152 e = d
153 d = c
154 c = b
155 b = a
156 a = TMP_
157.endm
158
159# macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL
160# YDST = {YSRC1, YSRC2} >> RVAL*8
161.macro MY_VPALIGNR YDST YSRC1 YSRC2 RVAL
162 vperm2f128 $0x3, \YSRC2, \YSRC1, \YDST # YDST = {YS1_LO, YS2_HI}
163 vpalignr $\RVAL, \YSRC2, \YDST, \YDST # YDST = {YDS1, YS2} >> RVAL*8
164.endm
165
166.macro FOUR_ROUNDS_AND_SCHED
167################################### RND N + 0 #########################################
168
169 # Extract w[t-7]
170 MY_VPALIGNR YTMP0, Y_3, Y_2, 8 # YTMP0 = W[-7]
171 # Calculate w[t-16] + w[t-7]
172 vpaddq Y_0, YTMP0, YTMP0 # YTMP0 = W[-7] + W[-16]
173 # Extract w[t-15]
174 MY_VPALIGNR YTMP1, Y_1, Y_0, 8 # YTMP1 = W[-15]
175
176 # Calculate sigma0
177
178 # Calculate w[t-15] ror 1
179 vpsrlq $1, YTMP1, YTMP2
180 vpsllq $(64-1), YTMP1, YTMP3
181 vpor YTMP2, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1
182 # Calculate w[t-15] shr 7
183 vpsrlq $7, YTMP1, YTMP4 # YTMP4 = W[-15] >> 7
184
185 mov a, y3 # y3 = a # MAJA
186 rorx $41, e, y0 # y0 = e >> 41 # S1A
187 rorx $18, e, y1 # y1 = e >> 18 # S1B
188 add frame_XFER(%rsp),h # h = k + w + h # --
189 or c, y3 # y3 = a|c # MAJA
190 mov f, y2 # y2 = f # CH
191 rorx $34, a, T1 # T1 = a >> 34 # S0B
192
193 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
194 xor g, y2 # y2 = f^g # CH
195 rorx $14, e, y1 # y1 = (e >> 14) # S1
196
197 and e, y2 # y2 = (f^g)&e # CH
198 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
199 rorx $39, a, y1 # y1 = a >> 39 # S0A
200 add h, d # d = k + w + h + d # --
201
202 and b, y3 # y3 = (a|c)&b # MAJA
203 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
204 rorx $28, a, T1 # T1 = (a >> 28) # S0
205
206 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
207 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
208 mov a, T1 # T1 = a # MAJB
209 and c, T1 # T1 = a&c # MAJB
210
211 add y0, y2 # y2 = S1 + CH # --
212 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
213 add y1, h # h = k + w + h + S0 # --
214
215 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
216
217 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
218 add y3, h # h = t1 + S0 + MAJ # --
219
220 RotateState
221
222################################### RND N + 1 #########################################
223
224 # Calculate w[t-15] ror 8
225 vpsrlq $8, YTMP1, YTMP2
226 vpsllq $(64-8), YTMP1, YTMP1
227 vpor YTMP2, YTMP1, YTMP1 # YTMP1 = W[-15] ror 8
228 # XOR the three components
229 vpxor YTMP4, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1 ^ W[-15] >> 7
230 vpxor YTMP1, YTMP3, YTMP1 # YTMP1 = s0
231
232
233 # Add three components, w[t-16], w[t-7] and sigma0
234 vpaddq YTMP1, YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0
235 # Move to appropriate lanes for calculating w[16] and w[17]
236 vperm2f128 $0x0, YTMP0, YTMP0, Y_0 # Y_0 = W[-16] + W[-7] + s0 {BABA}
237 # Move to appropriate lanes for calculating w[18] and w[19]
238 vpand MASK_YMM_LO(%rip), YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0 {DC00}
239
240 # Calculate w[16] and w[17] in both 128 bit lanes
241
242 # Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
243 vperm2f128 $0x11, Y_3, Y_3, YTMP2 # YTMP2 = W[-2] {BABA}
244 vpsrlq $6, YTMP2, YTMP4 # YTMP4 = W[-2] >> 6 {BABA}
245
246
247 mov a, y3 # y3 = a # MAJA
248 rorx $41, e, y0 # y0 = e >> 41 # S1A
249 rorx $18, e, y1 # y1 = e >> 18 # S1B
250 add 1*8+frame_XFER(%rsp), h # h = k + w + h # --
251 or c, y3 # y3 = a|c # MAJA
252
253
254 mov f, y2 # y2 = f # CH
255 rorx $34, a, T1 # T1 = a >> 34 # S0B
256 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
257 xor g, y2 # y2 = f^g # CH
258
259
260 rorx $14, e, y1 # y1 = (e >> 14) # S1
261 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
262 rorx $39, a, y1 # y1 = a >> 39 # S0A
263 and e, y2 # y2 = (f^g)&e # CH
264 add h, d # d = k + w + h + d # --
265
266 and b, y3 # y3 = (a|c)&b # MAJA
267 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
268
269 rorx $28, a, T1 # T1 = (a >> 28) # S0
270 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
271
272 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
273 mov a, T1 # T1 = a # MAJB
274 and c, T1 # T1 = a&c # MAJB
275 add y0, y2 # y2 = S1 + CH # --
276
277 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
278 add y1, h # h = k + w + h + S0 # --
279
280 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
281 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
282 add y3, h # h = t1 + S0 + MAJ # --
283
284 RotateState
285
286
287################################### RND N + 2 #########################################
288
289 vpsrlq $19, YTMP2, YTMP3 # YTMP3 = W[-2] >> 19 {BABA}
290 vpsllq $(64-19), YTMP2, YTMP1 # YTMP1 = W[-2] << 19 {BABA}
291 vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {BABA}
292 vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA}
293 vpsrlq $61, YTMP2, YTMP3 # YTMP3 = W[-2] >> 61 {BABA}
294 vpsllq $(64-61), YTMP2, YTMP1 # YTMP1 = W[-2] << 61 {BABA}
295 vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {BABA}
296 vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^
297 # (W[-2] ror 61) ^ (W[-2] >> 6) {BABA}
298
299 # Add sigma1 to the other compunents to get w[16] and w[17]
300 vpaddq YTMP4, Y_0, Y_0 # Y_0 = {W[1], W[0], W[1], W[0]}
301
302 # Calculate sigma1 for w[18] and w[19] for upper 128 bit lane
303 vpsrlq $6, Y_0, YTMP4 # YTMP4 = W[-2] >> 6 {DC--}
304
305 mov a, y3 # y3 = a # MAJA
306 rorx $41, e, y0 # y0 = e >> 41 # S1A
307 add 2*8+frame_XFER(%rsp), h # h = k + w + h # --
308
309 rorx $18, e, y1 # y1 = e >> 18 # S1B
310 or c, y3 # y3 = a|c # MAJA
311 mov f, y2 # y2 = f # CH
312 xor g, y2 # y2 = f^g # CH
313
314 rorx $34, a, T1 # T1 = a >> 34 # S0B
315 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
316 and e, y2 # y2 = (f^g)&e # CH
317
318 rorx $14, e, y1 # y1 = (e >> 14) # S1
319 add h, d # d = k + w + h + d # --
320 and b, y3 # y3 = (a|c)&b # MAJA
321
322 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
323 rorx $39, a, y1 # y1 = a >> 39 # S0A
324 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
325
326 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
327 rorx $28, a, T1 # T1 = (a >> 28) # S0
328
329 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
330 mov a, T1 # T1 = a # MAJB
331 and c, T1 # T1 = a&c # MAJB
332 add y0, y2 # y2 = S1 + CH # --
333
334 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
335 add y1, h # h = k + w + h + S0 # --
336 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
337 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
338
339 add y3, h # h = t1 + S0 + MAJ # --
340
341 RotateState
342
343################################### RND N + 3 #########################################
344
345 vpsrlq $19, Y_0, YTMP3 # YTMP3 = W[-2] >> 19 {DC--}
346 vpsllq $(64-19), Y_0, YTMP1 # YTMP1 = W[-2] << 19 {DC--}
347 vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {DC--}
348 vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--}
349 vpsrlq $61, Y_0, YTMP3 # YTMP3 = W[-2] >> 61 {DC--}
350 vpsllq $(64-61), Y_0, YTMP1 # YTMP1 = W[-2] << 61 {DC--}
351 vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {DC--}
352 vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^
353 # (W[-2] ror 61) ^ (W[-2] >> 6) {DC--}
354
355 # Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19]
356 # to newly calculated sigma1 to get w[18] and w[19]
357 vpaddq YTMP4, YTMP0, YTMP2 # YTMP2 = {W[3], W[2], --, --}
358
359 # Form w[19, w[18], w17], w[16]
360 vpblendd $0xF0, YTMP2, Y_0, Y_0 # Y_0 = {W[3], W[2], W[1], W[0]}
361
362 mov a, y3 # y3 = a # MAJA
363 rorx $41, e, y0 # y0 = e >> 41 # S1A
364 rorx $18, e, y1 # y1 = e >> 18 # S1B
365 add 3*8+frame_XFER(%rsp), h # h = k + w + h # --
366 or c, y3 # y3 = a|c # MAJA
367
368
369 mov f, y2 # y2 = f # CH
370 rorx $34, a, T1 # T1 = a >> 34 # S0B
371 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
372 xor g, y2 # y2 = f^g # CH
373
374
375 rorx $14, e, y1 # y1 = (e >> 14) # S1
376 and e, y2 # y2 = (f^g)&e # CH
377 add h, d # d = k + w + h + d # --
378 and b, y3 # y3 = (a|c)&b # MAJA
379
380 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
381 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
382
383 rorx $39, a, y1 # y1 = a >> 39 # S0A
384 add y0, y2 # y2 = S1 + CH # --
385
386 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
387 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
388
389 rorx $28, a, T1 # T1 = (a >> 28) # S0
390
391 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
392 mov a, T1 # T1 = a # MAJB
393 and c, T1 # T1 = a&c # MAJB
394 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
395
396 add y1, h # h = k + w + h + S0 # --
397 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
398 add y3, h # h = t1 + S0 + MAJ # --
399
400 RotateState
401
402 rotate_Ys
403.endm
404
405.macro DO_4ROUNDS
406
407################################### RND N + 0 #########################################
408
409 mov f, y2 # y2 = f # CH
410 rorx $41, e, y0 # y0 = e >> 41 # S1A
411 rorx $18, e, y1 # y1 = e >> 18 # S1B
412 xor g, y2 # y2 = f^g # CH
413
414 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
415 rorx $14, e, y1 # y1 = (e >> 14) # S1
416 and e, y2 # y2 = (f^g)&e # CH
417
418 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
419 rorx $34, a, T1 # T1 = a >> 34 # S0B
420 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
421 rorx $39, a, y1 # y1 = a >> 39 # S0A
422 mov a, y3 # y3 = a # MAJA
423
424 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
425 rorx $28, a, T1 # T1 = (a >> 28) # S0
426 add frame_XFER(%rsp), h # h = k + w + h # --
427 or c, y3 # y3 = a|c # MAJA
428
429 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
430 mov a, T1 # T1 = a # MAJB
431 and b, y3 # y3 = (a|c)&b # MAJA
432 and c, T1 # T1 = a&c # MAJB
433 add y0, y2 # y2 = S1 + CH # --
434
435 add h, d # d = k + w + h + d # --
436 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
437 add y1, h # h = k + w + h + S0 # --
438
439 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
440
441 RotateState
442
443################################### RND N + 1 #########################################
444
445 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
446 mov f, y2 # y2 = f # CH
447 rorx $41, e, y0 # y0 = e >> 41 # S1A
448 rorx $18, e, y1 # y1 = e >> 18 # S1B
449 xor g, y2 # y2 = f^g # CH
450
451 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
452 rorx $14, e, y1 # y1 = (e >> 14) # S1
453 and e, y2 # y2 = (f^g)&e # CH
454 add y3, old_h # h = t1 + S0 + MAJ # --
455
456 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
457 rorx $34, a, T1 # T1 = a >> 34 # S0B
458 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
459 rorx $39, a, y1 # y1 = a >> 39 # S0A
460 mov a, y3 # y3 = a # MAJA
461
462 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
463 rorx $28, a, T1 # T1 = (a >> 28) # S0
464 add 8*1+frame_XFER(%rsp), h # h = k + w + h # --
465 or c, y3 # y3 = a|c # MAJA
466
467 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
468 mov a, T1 # T1 = a # MAJB
469 and b, y3 # y3 = (a|c)&b # MAJA
470 and c, T1 # T1 = a&c # MAJB
471 add y0, y2 # y2 = S1 + CH # --
472
473 add h, d # d = k + w + h + d # --
474 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
475 add y1, h # h = k + w + h + S0 # --
476
477 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
478
479 RotateState
480
481################################### RND N + 2 #########################################
482
483 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
484 mov f, y2 # y2 = f # CH
485 rorx $41, e, y0 # y0 = e >> 41 # S1A
486 rorx $18, e, y1 # y1 = e >> 18 # S1B
487 xor g, y2 # y2 = f^g # CH
488
489 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
490 rorx $14, e, y1 # y1 = (e >> 14) # S1
491 and e, y2 # y2 = (f^g)&e # CH
492 add y3, old_h # h = t1 + S0 + MAJ # --
493
494 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
495 rorx $34, a, T1 # T1 = a >> 34 # S0B
496 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
497 rorx $39, a, y1 # y1 = a >> 39 # S0A
498 mov a, y3 # y3 = a # MAJA
499
500 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
501 rorx $28, a, T1 # T1 = (a >> 28) # S0
502 add 8*2+frame_XFER(%rsp), h # h = k + w + h # --
503 or c, y3 # y3 = a|c # MAJA
504
505 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
506 mov a, T1 # T1 = a # MAJB
507 and b, y3 # y3 = (a|c)&b # MAJA
508 and c, T1 # T1 = a&c # MAJB
509 add y0, y2 # y2 = S1 + CH # --
510
511 add h, d # d = k + w + h + d # --
512 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
513 add y1, h # h = k + w + h + S0 # --
514
515 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
516
517 RotateState
518
519################################### RND N + 3 #########################################
520
521 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
522 mov f, y2 # y2 = f # CH
523 rorx $41, e, y0 # y0 = e >> 41 # S1A
524 rorx $18, e, y1 # y1 = e >> 18 # S1B
525 xor g, y2 # y2 = f^g # CH
526
527 xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1
528 rorx $14, e, y1 # y1 = (e >> 14) # S1
529 and e, y2 # y2 = (f^g)&e # CH
530 add y3, old_h # h = t1 + S0 + MAJ # --
531
532 xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1
533 rorx $34, a, T1 # T1 = a >> 34 # S0B
534 xor g, y2 # y2 = CH = ((f^g)&e)^g # CH
535 rorx $39, a, y1 # y1 = a >> 39 # S0A
536 mov a, y3 # y3 = a # MAJA
537
538 xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0
539 rorx $28, a, T1 # T1 = (a >> 28) # S0
540 add 8*3+frame_XFER(%rsp), h # h = k + w + h # --
541 or c, y3 # y3 = a|c # MAJA
542
543 xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0
544 mov a, T1 # T1 = a # MAJB
545 and b, y3 # y3 = (a|c)&b # MAJA
546 and c, T1 # T1 = a&c # MAJB
547 add y0, y2 # y2 = S1 + CH # --
548
549
550 add h, d # d = k + w + h + d # --
551 or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ
552 add y1, h # h = k + w + h + S0 # --
553
554 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
555
556 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
557
558 add y3, h # h = t1 + S0 + MAJ # --
559
560 RotateState
561
562.endm
563
564########################################################################
565# void sha512_transform_rorx(const void* M, void* D, uint64_t L)#
566# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
567# The size of the message pointed to by M must be an integer multiple of SHA512
568# message blocks.
569# L is the message length in SHA512 blocks
570########################################################################
571ENTRY(sha512_transform_rorx)
572 # Allocate Stack Space
573 mov %rsp, %rax
574 sub $frame_size, %rsp
575 and $~(0x20 - 1), %rsp
576 mov %rax, frame_RSPSAVE(%rsp)
577
578 # Save GPRs
579 mov %rbp, frame_GPRSAVE(%rsp)
580 mov %rbx, 8*1+frame_GPRSAVE(%rsp)
581 mov %r12, 8*2+frame_GPRSAVE(%rsp)
582 mov %r13, 8*3+frame_GPRSAVE(%rsp)
583 mov %r14, 8*4+frame_GPRSAVE(%rsp)
584 mov %r15, 8*5+frame_GPRSAVE(%rsp)
585
586 shl $7, NUM_BLKS # convert to bytes
587 jz done_hash
588 add INP, NUM_BLKS # pointer to end of data
589 mov NUM_BLKS, frame_INPEND(%rsp)
590
591 ## load initial digest
592 mov 8*0(CTX),a
593 mov 8*1(CTX),b
594 mov 8*2(CTX),c
595 mov 8*3(CTX),d
596 mov 8*4(CTX),e
597 mov 8*5(CTX),f
598 mov 8*6(CTX),g
599 mov 8*7(CTX),h
600
601 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
602
603loop0:
604 lea K512(%rip), TBL
605
606 ## byte swap first 16 dwords
607 COPY_YMM_AND_BSWAP Y_0, (INP), BYTE_FLIP_MASK
608 COPY_YMM_AND_BSWAP Y_1, 1*32(INP), BYTE_FLIP_MASK
609 COPY_YMM_AND_BSWAP Y_2, 2*32(INP), BYTE_FLIP_MASK
610 COPY_YMM_AND_BSWAP Y_3, 3*32(INP), BYTE_FLIP_MASK
611
612 mov INP, frame_INP(%rsp)
613
614 ## schedule 64 input dwords, by doing 12 rounds of 4 each
615 movq $4, frame_SRND(%rsp)
616
617.align 16
618loop1:
619 vpaddq (TBL), Y_0, XFER
620 vmovdqa XFER, frame_XFER(%rsp)
621 FOUR_ROUNDS_AND_SCHED
622
623 vpaddq 1*32(TBL), Y_0, XFER
624 vmovdqa XFER, frame_XFER(%rsp)
625 FOUR_ROUNDS_AND_SCHED
626
627 vpaddq 2*32(TBL), Y_0, XFER
628 vmovdqa XFER, frame_XFER(%rsp)
629 FOUR_ROUNDS_AND_SCHED
630
631 vpaddq 3*32(TBL), Y_0, XFER
632 vmovdqa XFER, frame_XFER(%rsp)
633 add $(4*32), TBL
634 FOUR_ROUNDS_AND_SCHED
635
636 subq $1, frame_SRND(%rsp)
637 jne loop1
638
639 movq $2, frame_SRND(%rsp)
640loop2:
641 vpaddq (TBL), Y_0, XFER
642 vmovdqa XFER, frame_XFER(%rsp)
643 DO_4ROUNDS
644 vpaddq 1*32(TBL), Y_1, XFER
645 vmovdqa XFER, frame_XFER(%rsp)
646 add $(2*32), TBL
647 DO_4ROUNDS
648
649 vmovdqa Y_2, Y_0
650 vmovdqa Y_3, Y_1
651
652 subq $1, frame_SRND(%rsp)
653 jne loop2
654
655 addm 8*0(CTX),a
656 addm 8*1(CTX),b
657 addm 8*2(CTX),c
658 addm 8*3(CTX),d
659 addm 8*4(CTX),e
660 addm 8*5(CTX),f
661 addm 8*6(CTX),g
662 addm 8*7(CTX),h
663
664 mov frame_INP(%rsp), INP
665 add $128, INP
666 cmp frame_INPEND(%rsp), INP
667 jne loop0
668
669done_hash:
670
671# Restore GPRs
672 mov frame_GPRSAVE(%rsp) ,%rbp
673 mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
674 mov 8*2+frame_GPRSAVE(%rsp) ,%r12
675 mov 8*3+frame_GPRSAVE(%rsp) ,%r13
676 mov 8*4+frame_GPRSAVE(%rsp) ,%r14
677 mov 8*5+frame_GPRSAVE(%rsp) ,%r15
678
679 # Restore Stack Pointer
680 mov frame_RSPSAVE(%rsp), %rsp
681 ret
682ENDPROC(sha512_transform_rorx)
683
684########################################################################
685### Binary Data
686
687.data
688
689.align 64
690# K[t] used in SHA512 hashing
691K512:
692 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
693 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
694 .quad 0x3956c25bf348b538,0x59f111f1b605d019
695 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
696 .quad 0xd807aa98a3030242,0x12835b0145706fbe
697 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
698 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
699 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
700 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
701 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
702 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
703 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
704 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
705 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
706 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
707 .quad 0x06ca6351e003826f,0x142929670a0e6e70
708 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
709 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
710 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
711 .quad 0x81c2c92e47edaee6,0x92722c851482353b
712 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
713 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
714 .quad 0xd192e819d6ef5218,0xd69906245565a910
715 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
716 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
717 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
718 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
719 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
720 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
721 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
722 .quad 0x90befffa23631e28,0xa4506cebde82bde9
723 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
724 .quad 0xca273eceea26619c,0xd186b8c721c0c207
725 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
726 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
727 .quad 0x113f9804bef90dae,0x1b710b35131c471b
728 .quad 0x28db77f523047d84,0x32caab7b40c72493
729 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
730 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
731 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
732
733.align 32
734
735# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
736PSHUFFLE_BYTE_FLIP_MASK:
737 .octa 0x08090a0b0c0d0e0f0001020304050607
738 .octa 0x18191a1b1c1d1e1f1011121314151617
739
740MASK_YMM_LO:
741 .octa 0x00000000000000000000000000000000
742 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
743#endif
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
new file mode 100644
index 000000000000..fb56855d51f5
--- /dev/null
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -0,0 +1,421 @@
1########################################################################
2# Implement fast SHA-512 with SSSE3 instructions. (x86_64)
3#
4# Copyright (C) 2013 Intel Corporation.
5#
6# Authors:
7# James Guilford <james.guilford@intel.com>
8# Kirk Yap <kirk.s.yap@intel.com>
9# David Cote <david.m.cote@intel.com>
10# Tim Chen <tim.c.chen@linux.intel.com>
11#
12# This software is available to you under a choice of one of two
13# licenses. You may choose to be licensed under the terms of the GNU
14# General Public License (GPL) Version 2, available from the file
15# COPYING in the main directory of this source tree, or the
16# OpenIB.org BSD license below:
17#
18# Redistribution and use in source and binary forms, with or
19# without modification, are permitted provided that the following
20# conditions are met:
21#
22# - Redistributions of source code must retain the above
23# copyright notice, this list of conditions and the following
24# disclaimer.
25#
26# - Redistributions in binary form must reproduce the above
27# copyright notice, this list of conditions and the following
28# disclaimer in the documentation and/or other materials
29# provided with the distribution.
30#
31# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
35# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
36# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38# SOFTWARE.
39#
40########################################################################
41#
42# This code is described in an Intel White-Paper:
43# "Fast SHA-512 Implementations on Intel Architecture Processors"
44#
45# To find it, surf to http://www.intel.com/p/en_US/embedded
46# and search for that title.
47#
48########################################################################
49
50#include <linux/linkage.h>
51
52.text
53
54# Virtual Registers
55# ARG1
56msg = %rdi
57# ARG2
58digest = %rsi
59# ARG3
60msglen = %rdx
61T1 = %rcx
62T2 = %r8
63a_64 = %r9
64b_64 = %r10
65c_64 = %r11
66d_64 = %r12
67e_64 = %r13
68f_64 = %r14
69g_64 = %r15
70h_64 = %rbx
71tmp0 = %rax
72
73# Local variables (stack frame)
74
75W_SIZE = 80*8
76WK_SIZE = 2*8
77RSPSAVE_SIZE = 1*8
78GPRSAVE_SIZE = 5*8
79
80frame_W = 0
81frame_WK = frame_W + W_SIZE
82frame_RSPSAVE = frame_WK + WK_SIZE
83frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
84frame_size = frame_GPRSAVE + GPRSAVE_SIZE
85
86# Useful QWORD "arrays" for simpler memory references
87# MSG, DIGEST, K_t, W_t are arrays
88# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
89
90# Input message (arg1)
91#define MSG(i) 8*i(msg)
92
93# Output Digest (arg2)
94#define DIGEST(i) 8*i(digest)
95
96# SHA Constants (static mem)
97#define K_t(i) 8*i+K512(%rip)
98
99# Message Schedule (stack frame)
100#define W_t(i) 8*i+frame_W(%rsp)
101
102# W[t]+K[t] (stack frame)
103#define WK_2(i) 8*((i%2))+frame_WK(%rsp)
104
105.macro RotateState
106 # Rotate symbols a..h right
107 TMP = h_64
108 h_64 = g_64
109 g_64 = f_64
110 f_64 = e_64
111 e_64 = d_64
112 d_64 = c_64
113 c_64 = b_64
114 b_64 = a_64
115 a_64 = TMP
116.endm
117
118.macro SHA512_Round rnd
119
120 # Compute Round %%t
121 mov f_64, T1 # T1 = f
122 mov e_64, tmp0 # tmp = e
123 xor g_64, T1 # T1 = f ^ g
124 ror $23, tmp0 # 41 # tmp = e ror 23
125 and e_64, T1 # T1 = (f ^ g) & e
126 xor e_64, tmp0 # tmp = (e ror 23) ^ e
127 xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
128 idx = \rnd
129 add WK_2(idx), T1 # W[t] + K[t] from message scheduler
130 ror $4, tmp0 # 18 # tmp = ((e ror 23) ^ e) ror 4
131 xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e
132 mov a_64, T2 # T2 = a
133 add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h
134 ror $14, tmp0 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
135 add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
136 mov a_64, tmp0 # tmp = a
137 xor c_64, T2 # T2 = a ^ c
138 and c_64, tmp0 # tmp = a & c
139 and b_64, T2 # T2 = (a ^ c) & b
140 xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
141 mov a_64, tmp0 # tmp = a
142 ror $5, tmp0 # 39 # tmp = a ror 5
143 xor a_64, tmp0 # tmp = (a ror 5) ^ a
144 add T1, d_64 # e(next_state) = d + T1
145 ror $6, tmp0 # 34 # tmp = ((a ror 5) ^ a) ror 6
146 xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a
147 lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c)
148 ror $28, tmp0 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
149 add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a)
150 RotateState
151.endm
152
153.macro SHA512_2Sched_2Round_sse rnd
154
155 # Compute rounds t-2 and t-1
156 # Compute message schedule QWORDS t and t+1
157
158 # Two rounds are computed based on the values for K[t-2]+W[t-2] and
159 # K[t-1]+W[t-1] which were previously stored at WK_2 by the message
160 # scheduler.
161 # The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)].
162 # They are then added to their respective SHA512 constants at
163 # [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)]
164 # For brievity, the comments following vectored instructions only refer to
165 # the first of a pair of QWORDS.
166 # Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]}
167 # The computation of the message schedule and the rounds are tightly
168 # stitched to take advantage of instruction-level parallelism.
169 # For clarity, integer instructions (for the rounds calculation) are indented
170 # by one tab. Vectored instructions (for the message scheduler) are indented
171 # by two tabs.
172
173 mov f_64, T1
174 idx = \rnd -2
175 movdqa W_t(idx), %xmm2 # XMM2 = W[t-2]
176 xor g_64, T1
177 and e_64, T1
178 movdqa %xmm2, %xmm0 # XMM0 = W[t-2]
179 xor g_64, T1
180 idx = \rnd
181 add WK_2(idx), T1
182 idx = \rnd - 15
183 movdqu W_t(idx), %xmm5 # XMM5 = W[t-15]
184 mov e_64, tmp0
185 ror $23, tmp0 # 41
186 movdqa %xmm5, %xmm3 # XMM3 = W[t-15]
187 xor e_64, tmp0
188 ror $4, tmp0 # 18
189 psrlq $61-19, %xmm0 # XMM0 = W[t-2] >> 42
190 xor e_64, tmp0
191 ror $14, tmp0 # 14
192 psrlq $(8-7), %xmm3 # XMM3 = W[t-15] >> 1
193 add tmp0, T1
194 add h_64, T1
195 pxor %xmm2, %xmm0 # XMM0 = (W[t-2] >> 42) ^ W[t-2]
196 mov a_64, T2
197 xor c_64, T2
198 pxor %xmm5, %xmm3 # XMM3 = (W[t-15] >> 1) ^ W[t-15]
199 and b_64, T2
200 mov a_64, tmp0
201 psrlq $(19-6), %xmm0 # XMM0 = ((W[t-2]>>42)^W[t-2])>>13
202 and c_64, tmp0
203 xor tmp0, T2
204 psrlq $(7-1), %xmm3 # XMM3 = ((W[t-15]>>1)^W[t-15])>>6
205 mov a_64, tmp0
206 ror $5, tmp0 # 39
207 pxor %xmm2, %xmm0 # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2]
208 xor a_64, tmp0
209 ror $6, tmp0 # 34
210 pxor %xmm5, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]
211 xor a_64, tmp0
212 ror $28, tmp0 # 28
213 psrlq $6, %xmm0 # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6
214 add tmp0, T2
215 add T1, d_64
216 psrlq $1, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1
217 lea (T1, T2), h_64
218 RotateState
219 movdqa %xmm2, %xmm1 # XMM1 = W[t-2]
220 mov f_64, T1
221 xor g_64, T1
222 movdqa %xmm5, %xmm4 # XMM4 = W[t-15]
223 and e_64, T1
224 xor g_64, T1
225 psllq $(64-19)-(64-61) , %xmm1 # XMM1 = W[t-2] << 42
226 idx = \rnd + 1
227 add WK_2(idx), T1
228 mov e_64, tmp0
229 psllq $(64-1)-(64-8), %xmm4 # XMM4 = W[t-15] << 7
230 ror $23, tmp0 # 41
231 xor e_64, tmp0
232 pxor %xmm2, %xmm1 # XMM1 = (W[t-2] << 42)^W[t-2]
233 ror $4, tmp0 # 18
234 xor e_64, tmp0
235 pxor %xmm5, %xmm4 # XMM4 = (W[t-15]<<7)^W[t-15]
236 ror $14, tmp0 # 14
237 add tmp0, T1
238 psllq $(64-61), %xmm1 # XMM1 = ((W[t-2] << 42)^W[t-2])<<3
239 add h_64, T1
240 mov a_64, T2
241 psllq $(64-8), %xmm4 # XMM4 = ((W[t-15]<<7)^W[t-15])<<56
242 xor c_64, T2
243 and b_64, T2
244 pxor %xmm1, %xmm0 # XMM0 = s1(W[t-2])
245 mov a_64, tmp0
246 and c_64, tmp0
247 idx = \rnd - 7
248 movdqu W_t(idx), %xmm1 # XMM1 = W[t-7]
249 xor tmp0, T2
250 pxor %xmm4, %xmm3 # XMM3 = s0(W[t-15])
251 mov a_64, tmp0
252 paddq %xmm3, %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15])
253 ror $5, tmp0 # 39
254 idx =\rnd-16
255 paddq W_t(idx), %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16]
256 xor a_64, tmp0
257 paddq %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]
258 ror $6, tmp0 # 34
259 movdqa %xmm0, W_t(\rnd) # Store scheduled qwords
260 xor a_64, tmp0
261 paddq K_t(\rnd), %xmm0 # Compute W[t]+K[t]
262 ror $28, tmp0 # 28
263 idx = \rnd
264 movdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds
265 add tmp0, T2
266 add T1, d_64
267 lea (T1, T2), h_64
268 RotateState
269.endm
270
271########################################################################
272# void sha512_transform_ssse3(const void* M, void* D, u64 L)#
273# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
274# The size of the message pointed to by M must be an integer multiple of SHA512
275# message blocks.
276# L is the message length in SHA512 blocks.
277########################################################################
278ENTRY(sha512_transform_ssse3)
279
280 cmp $0, msglen
281 je nowork
282
283 # Allocate Stack Space
284 mov %rsp, %rax
285 sub $frame_size, %rsp
286 and $~(0x20 - 1), %rsp
287 mov %rax, frame_RSPSAVE(%rsp)
288
289 # Save GPRs
290 mov %rbx, frame_GPRSAVE(%rsp)
291 mov %r12, frame_GPRSAVE +8*1(%rsp)
292 mov %r13, frame_GPRSAVE +8*2(%rsp)
293 mov %r14, frame_GPRSAVE +8*3(%rsp)
294 mov %r15, frame_GPRSAVE +8*4(%rsp)
295
296updateblock:
297
298# Load state variables
299 mov DIGEST(0), a_64
300 mov DIGEST(1), b_64
301 mov DIGEST(2), c_64
302 mov DIGEST(3), d_64
303 mov DIGEST(4), e_64
304 mov DIGEST(5), f_64
305 mov DIGEST(6), g_64
306 mov DIGEST(7), h_64
307
308 t = 0
309 .rept 80/2 + 1
310 # (80 rounds) / (2 rounds/iteration) + (1 iteration)
311 # +1 iteration because the scheduler leads hashing by 1 iteration
312 .if t < 2
313 # BSWAP 2 QWORDS
314 movdqa XMM_QWORD_BSWAP(%rip), %xmm1
315 movdqu MSG(t), %xmm0
316 pshufb %xmm1, %xmm0 # BSWAP
317 movdqa %xmm0, W_t(t) # Store Scheduled Pair
318 paddq K_t(t), %xmm0 # Compute W[t]+K[t]
319 movdqa %xmm0, WK_2(t) # Store into WK for rounds
320 .elseif t < 16
321 # BSWAP 2 QWORDS# Compute 2 Rounds
322 movdqu MSG(t), %xmm0
323 pshufb %xmm1, %xmm0 # BSWAP
324 SHA512_Round t-2 # Round t-2
325 movdqa %xmm0, W_t(t) # Store Scheduled Pair
326 paddq K_t(t), %xmm0 # Compute W[t]+K[t]
327 SHA512_Round t-1 # Round t-1
328 movdqa %xmm0, WK_2(t) # Store W[t]+K[t] into WK
329 .elseif t < 79
330 # Schedule 2 QWORDS# Compute 2 Rounds
331 SHA512_2Sched_2Round_sse t
332 .else
333 # Compute 2 Rounds
334 SHA512_Round t-2
335 SHA512_Round t-1
336 .endif
337 t = t+2
338 .endr
339
340 # Update digest
341 add a_64, DIGEST(0)
342 add b_64, DIGEST(1)
343 add c_64, DIGEST(2)
344 add d_64, DIGEST(3)
345 add e_64, DIGEST(4)
346 add f_64, DIGEST(5)
347 add g_64, DIGEST(6)
348 add h_64, DIGEST(7)
349
350 # Advance to next message block
351 add $16*8, msg
352 dec msglen
353 jnz updateblock
354
355 # Restore GPRs
356 mov frame_GPRSAVE(%rsp), %rbx
357 mov frame_GPRSAVE +8*1(%rsp), %r12
358 mov frame_GPRSAVE +8*2(%rsp), %r13
359 mov frame_GPRSAVE +8*3(%rsp), %r14
360 mov frame_GPRSAVE +8*4(%rsp), %r15
361
362 # Restore Stack Pointer
363 mov frame_RSPSAVE(%rsp), %rsp
364
365nowork:
366 ret
367ENDPROC(sha512_transform_ssse3)
368
369########################################################################
370### Binary Data
371
372.data
373
374.align 16
375
376# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
377XMM_QWORD_BSWAP:
378 .octa 0x08090a0b0c0d0e0f0001020304050607
379
380# K[t] used in SHA512 hashing
381K512:
382 .quad 0x428a2f98d728ae22,0x7137449123ef65cd
383 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
384 .quad 0x3956c25bf348b538,0x59f111f1b605d019
385 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
386 .quad 0xd807aa98a3030242,0x12835b0145706fbe
387 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
388 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
389 .quad 0x9bdc06a725c71235,0xc19bf174cf692694
390 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
391 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
392 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
393 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
394 .quad 0x983e5152ee66dfab,0xa831c66d2db43210
395 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
396 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
397 .quad 0x06ca6351e003826f,0x142929670a0e6e70
398 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
399 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
400 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
401 .quad 0x81c2c92e47edaee6,0x92722c851482353b
402 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
403 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
404 .quad 0xd192e819d6ef5218,0xd69906245565a910
405 .quad 0xf40e35855771202a,0x106aa07032bbd1b8
406 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
407 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
408 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
409 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
410 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
411 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
412 .quad 0x90befffa23631e28,0xa4506cebde82bde9
413 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
414 .quad 0xca273eceea26619c,0xd186b8c721c0c207
415 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
416 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
417 .quad 0x113f9804bef90dae,0x1b710b35131c471b
418 .quad 0x28db77f523047d84,0x32caab7b40c72493
419 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
420 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
421 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
new file mode 100644
index 000000000000..6cbd8df348d2
--- /dev/null
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -0,0 +1,282 @@
1/*
2 * Cryptographic API.
3 *
4 * Glue code for the SHA512 Secure Hash Algorithm assembler
5 * implementation using supplemental SSE3 / AVX / AVX2 instructions.
6 *
7 * This file is based on sha512_generic.c
8 *
9 * Copyright (C) 2013 Intel Corporation
10 * Author: Tim Chen <tim.c.chen@linux.intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 */
27
28#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30#include <crypto/internal/hash.h>
31#include <linux/init.h>
32#include <linux/module.h>
33#include <linux/mm.h>
34#include <linux/cryptohash.h>
35#include <linux/types.h>
36#include <crypto/sha.h>
37#include <asm/byteorder.h>
38#include <asm/i387.h>
39#include <asm/xcr.h>
40#include <asm/xsave.h>
41
42#include <linux/string.h>
43
44asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest,
45 u64 rounds);
46#ifdef CONFIG_AS_AVX
47asmlinkage void sha512_transform_avx(const char *data, u64 *digest,
48 u64 rounds);
49#endif
50#ifdef CONFIG_AS_AVX2
51asmlinkage void sha512_transform_rorx(const char *data, u64 *digest,
52 u64 rounds);
53#endif
54
55static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
56
57
58static int sha512_ssse3_init(struct shash_desc *desc)
59{
60 struct sha512_state *sctx = shash_desc_ctx(desc);
61
62 sctx->state[0] = SHA512_H0;
63 sctx->state[1] = SHA512_H1;
64 sctx->state[2] = SHA512_H2;
65 sctx->state[3] = SHA512_H3;
66 sctx->state[4] = SHA512_H4;
67 sctx->state[5] = SHA512_H5;
68 sctx->state[6] = SHA512_H6;
69 sctx->state[7] = SHA512_H7;
70 sctx->count[0] = sctx->count[1] = 0;
71
72 return 0;
73}
74
75static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
76 unsigned int len, unsigned int partial)
77{
78 struct sha512_state *sctx = shash_desc_ctx(desc);
79 unsigned int done = 0;
80
81 sctx->count[0] += len;
82 if (sctx->count[0] < len)
83 sctx->count[1]++;
84
85 if (partial) {
86 done = SHA512_BLOCK_SIZE - partial;
87 memcpy(sctx->buf + partial, data, done);
88 sha512_transform_asm(sctx->buf, sctx->state, 1);
89 }
90
91 if (len - done >= SHA512_BLOCK_SIZE) {
92 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
93
94 sha512_transform_asm(data + done, sctx->state, (u64) rounds);
95
96 done += rounds * SHA512_BLOCK_SIZE;
97 }
98
99 memcpy(sctx->buf, data + done, len - done);
100
101 return 0;
102}
103
104static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
105 unsigned int len)
106{
107 struct sha512_state *sctx = shash_desc_ctx(desc);
108 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
109 int res;
110
111 /* Handle the fast case right here */
112 if (partial + len < SHA512_BLOCK_SIZE) {
113 sctx->count[0] += len;
114 if (sctx->count[0] < len)
115 sctx->count[1]++;
116 memcpy(sctx->buf + partial, data, len);
117
118 return 0;
119 }
120
121 if (!irq_fpu_usable()) {
122 res = crypto_sha512_update(desc, data, len);
123 } else {
124 kernel_fpu_begin();
125 res = __sha512_ssse3_update(desc, data, len, partial);
126 kernel_fpu_end();
127 }
128
129 return res;
130}
131
132
133/* Add padding and return the message digest. */
134static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
135{
136 struct sha512_state *sctx = shash_desc_ctx(desc);
137 unsigned int i, index, padlen;
138 __be64 *dst = (__be64 *)out;
139 __be64 bits[2];
140 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
141
142 /* save number of bits */
143 bits[1] = cpu_to_be64(sctx->count[0] << 3);
144 bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61;
145
146 /* Pad out to 112 mod 128 and append length */
147 index = sctx->count[0] & 0x7f;
148 padlen = (index < 112) ? (112 - index) : ((128+112) - index);
149
150 if (!irq_fpu_usable()) {
151 crypto_sha512_update(desc, padding, padlen);
152 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
153 } else {
154 kernel_fpu_begin();
155 /* We need to fill a whole block for __sha512_ssse3_update() */
156 if (padlen <= 112) {
157 sctx->count[0] += padlen;
158 if (sctx->count[0] < padlen)
159 sctx->count[1]++;
160 memcpy(sctx->buf + index, padding, padlen);
161 } else {
162 __sha512_ssse3_update(desc, padding, padlen, index);
163 }
164 __sha512_ssse3_update(desc, (const u8 *)&bits,
165 sizeof(bits), 112);
166 kernel_fpu_end();
167 }
168
169 /* Store state in digest */
170 for (i = 0; i < 8; i++)
171 dst[i] = cpu_to_be64(sctx->state[i]);
172
173 /* Wipe context */
174 memset(sctx, 0, sizeof(*sctx));
175
176 return 0;
177}
178
179static int sha512_ssse3_export(struct shash_desc *desc, void *out)
180{
181 struct sha512_state *sctx = shash_desc_ctx(desc);
182
183 memcpy(out, sctx, sizeof(*sctx));
184
185 return 0;
186}
187
188static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
189{
190 struct sha512_state *sctx = shash_desc_ctx(desc);
191
192 memcpy(sctx, in, sizeof(*sctx));
193
194 return 0;
195}
196
197static struct shash_alg alg = {
198 .digestsize = SHA512_DIGEST_SIZE,
199 .init = sha512_ssse3_init,
200 .update = sha512_ssse3_update,
201 .final = sha512_ssse3_final,
202 .export = sha512_ssse3_export,
203 .import = sha512_ssse3_import,
204 .descsize = sizeof(struct sha512_state),
205 .statesize = sizeof(struct sha512_state),
206 .base = {
207 .cra_name = "sha512",
208 .cra_driver_name = "sha512-ssse3",
209 .cra_priority = 150,
210 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
211 .cra_blocksize = SHA512_BLOCK_SIZE,
212 .cra_module = THIS_MODULE,
213 }
214};
215
216#ifdef CONFIG_AS_AVX
217static bool __init avx_usable(void)
218{
219 u64 xcr0;
220
221 if (!cpu_has_avx || !cpu_has_osxsave)
222 return false;
223
224 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
225 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
226 pr_info("AVX detected but unusable.\n");
227
228 return false;
229 }
230
231 return true;
232}
233#endif
234
235static int __init sha512_ssse3_mod_init(void)
236{
237 /* test for SSE3 first */
238 if (cpu_has_ssse3)
239 sha512_transform_asm = sha512_transform_ssse3;
240
241#ifdef CONFIG_AS_AVX
242 /* allow AVX to override SSSE3, it's a little faster */
243 if (avx_usable()) {
244#ifdef CONFIG_AS_AVX2
245 if (boot_cpu_has(X86_FEATURE_AVX2))
246 sha512_transform_asm = sha512_transform_rorx;
247 else
248#endif
249 sha512_transform_asm = sha512_transform_avx;
250 }
251#endif
252
253 if (sha512_transform_asm) {
254#ifdef CONFIG_AS_AVX
255 if (sha512_transform_asm == sha512_transform_avx)
256 pr_info("Using AVX optimized SHA-512 implementation\n");
257#ifdef CONFIG_AS_AVX2
258 else if (sha512_transform_asm == sha512_transform_rorx)
259 pr_info("Using AVX2 optimized SHA-512 implementation\n");
260#endif
261 else
262#endif
263 pr_info("Using SSSE3 optimized SHA-512 implementation\n");
264 return crypto_register_shash(&alg);
265 }
266 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
267
268 return -ENODEV;
269}
270
271static void __exit sha512_ssse3_mod_fini(void)
272{
273 crypto_unregister_shash(&alg);
274}
275
276module_init(sha512_ssse3_mod_init);
277module_exit(sha512_ssse3_mod_fini);
278
279MODULE_LICENSE("GPL");
280MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
281
282MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 8d3e113b2c95..05058134c443 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -4,7 +4,7 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 7 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -33,6 +33,8 @@
33 33
34.Lbswap128_mask: 34.Lbswap128_mask:
35 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 35 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
36.Lxts_gf128mul_and_shl1_mask:
37 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
36 38
37.text 39.text
38 40
@@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way)
408 410
409 ret; 411 ret;
410ENDPROC(twofish_ctr_8way) 412ENDPROC(twofish_ctr_8way)
413
414ENTRY(twofish_xts_enc_8way)
415 /* input:
416 * %rdi: ctx, CTX
417 * %rsi: dst
418 * %rdx: src
419 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
420 */
421
422 movq %rsi, %r11;
423
424 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
425 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
426 RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
427
428 call __twofish_enc_blk8;
429
430 /* dst <= regs xor IVs(in dst) */
431 store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
432
433 ret;
434ENDPROC(twofish_xts_enc_8way)
435
436ENTRY(twofish_xts_dec_8way)
437 /* input:
438 * %rdi: ctx, CTX
439 * %rsi: dst
440 * %rdx: src
441 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
442 */
443
444 movq %rsi, %r11;
445
446 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
447 load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
448 RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
449
450 call __twofish_dec_blk8;
451
452 /* dst <= regs xor IVs(in dst) */
453 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
454
455 ret;
456ENDPROC(twofish_xts_dec_8way)
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S
new file mode 100644
index 000000000000..e1a83b9cd389
--- /dev/null
+++ b/arch/x86/crypto/twofish-avx2-asm_64.S
@@ -0,0 +1,600 @@
1/*
2 * x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/linkage.h>
14#include "glue_helper-asm-avx2.S"
15
16.file "twofish-avx2-asm_64.S"
17
18.data
19.align 16
20
21.Lvpshufb_mask0:
22.long 0x80808000
23.long 0x80808004
24.long 0x80808008
25.long 0x8080800c
26
27.Lbswap128_mask:
28 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
29.Lxts_gf128mul_and_shl1_mask_0:
30 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
31.Lxts_gf128mul_and_shl1_mask_1:
32 .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
33
34.text
35
36/* structure of crypto context */
37#define s0 0
38#define s1 1024
39#define s2 2048
40#define s3 3072
41#define w 4096
42#define k 4128
43
44/* register macros */
45#define CTX %rdi
46
47#define RS0 CTX
48#define RS1 %r8
49#define RS2 %r9
50#define RS3 %r10
51#define RK %r11
52#define RW %rax
53#define RROUND %r12
54#define RROUNDd %r12d
55
56#define RA0 %ymm8
57#define RB0 %ymm9
58#define RC0 %ymm10
59#define RD0 %ymm11
60#define RA1 %ymm12
61#define RB1 %ymm13
62#define RC1 %ymm14
63#define RD1 %ymm15
64
65/* temp regs */
66#define RX0 %ymm0
67#define RY0 %ymm1
68#define RX1 %ymm2
69#define RY1 %ymm3
70#define RT0 %ymm4
71#define RIDX %ymm5
72
73#define RX0x %xmm0
74#define RY0x %xmm1
75#define RX1x %xmm2
76#define RY1x %xmm3
77#define RT0x %xmm4
78
79/* vpgatherdd mask and '-1' */
80#define RNOT %ymm6
81
82/* byte mask, (-1 >> 24) */
83#define RBYTE %ymm7
84
85/**********************************************************************
86 16-way AVX2 twofish
87 **********************************************************************/
88#define init_round_constants() \
89 vpcmpeqd RNOT, RNOT, RNOT; \
90 vpsrld $24, RNOT, RBYTE; \
91 leaq k(CTX), RK; \
92 leaq w(CTX), RW; \
93 leaq s1(CTX), RS1; \
94 leaq s2(CTX), RS2; \
95 leaq s3(CTX), RS3; \
96
97#define g16(ab, rs0, rs1, rs2, rs3, xy) \
98 vpand RBYTE, ab ## 0, RIDX; \
99 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
100 vpcmpeqd RNOT, RNOT, RNOT; \
101 \
102 vpand RBYTE, ab ## 1, RIDX; \
103 vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
104 vpcmpeqd RNOT, RNOT, RNOT; \
105 \
106 vpsrld $8, ab ## 0, RIDX; \
107 vpand RBYTE, RIDX, RIDX; \
108 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
109 vpcmpeqd RNOT, RNOT, RNOT; \
110 vpxor RT0, xy ## 0, xy ## 0; \
111 \
112 vpsrld $8, ab ## 1, RIDX; \
113 vpand RBYTE, RIDX, RIDX; \
114 vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
115 vpcmpeqd RNOT, RNOT, RNOT; \
116 vpxor RT0, xy ## 1, xy ## 1; \
117 \
118 vpsrld $16, ab ## 0, RIDX; \
119 vpand RBYTE, RIDX, RIDX; \
120 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
121 vpcmpeqd RNOT, RNOT, RNOT; \
122 vpxor RT0, xy ## 0, xy ## 0; \
123 \
124 vpsrld $16, ab ## 1, RIDX; \
125 vpand RBYTE, RIDX, RIDX; \
126 vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
127 vpcmpeqd RNOT, RNOT, RNOT; \
128 vpxor RT0, xy ## 1, xy ## 1; \
129 \
130 vpsrld $24, ab ## 0, RIDX; \
131 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
132 vpcmpeqd RNOT, RNOT, RNOT; \
133 vpxor RT0, xy ## 0, xy ## 0; \
134 \
135 vpsrld $24, ab ## 1, RIDX; \
136 vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
137 vpcmpeqd RNOT, RNOT, RNOT; \
138 vpxor RT0, xy ## 1, xy ## 1;
139
140#define g1_16(a, x) \
141 g16(a, RS0, RS1, RS2, RS3, x);
142
143#define g2_16(b, y) \
144 g16(b, RS1, RS2, RS3, RS0, y);
145
146#define encrypt_round_end16(a, b, c, d, nk) \
147 vpaddd RY0, RX0, RX0; \
148 vpaddd RX0, RY0, RY0; \
149 vpbroadcastd nk(RK,RROUND,8), RT0; \
150 vpaddd RT0, RX0, RX0; \
151 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
152 vpaddd RT0, RY0, RY0; \
153 \
154 vpxor RY0, d ## 0, d ## 0; \
155 \
156 vpxor RX0, c ## 0, c ## 0; \
157 vpsrld $1, c ## 0, RT0; \
158 vpslld $31, c ## 0, c ## 0; \
159 vpor RT0, c ## 0, c ## 0; \
160 \
161 vpaddd RY1, RX1, RX1; \
162 vpaddd RX1, RY1, RY1; \
163 vpbroadcastd nk(RK,RROUND,8), RT0; \
164 vpaddd RT0, RX1, RX1; \
165 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
166 vpaddd RT0, RY1, RY1; \
167 \
168 vpxor RY1, d ## 1, d ## 1; \
169 \
170 vpxor RX1, c ## 1, c ## 1; \
171 vpsrld $1, c ## 1, RT0; \
172 vpslld $31, c ## 1, c ## 1; \
173 vpor RT0, c ## 1, c ## 1; \
174
175#define encrypt_round16(a, b, c, d, nk) \
176 g2_16(b, RY); \
177 \
178 vpslld $1, b ## 0, RT0; \
179 vpsrld $31, b ## 0, b ## 0; \
180 vpor RT0, b ## 0, b ## 0; \
181 \
182 vpslld $1, b ## 1, RT0; \
183 vpsrld $31, b ## 1, b ## 1; \
184 vpor RT0, b ## 1, b ## 1; \
185 \
186 g1_16(a, RX); \
187 \
188 encrypt_round_end16(a, b, c, d, nk);
189
190#define encrypt_round_first16(a, b, c, d, nk) \
191 vpslld $1, d ## 0, RT0; \
192 vpsrld $31, d ## 0, d ## 0; \
193 vpor RT0, d ## 0, d ## 0; \
194 \
195 vpslld $1, d ## 1, RT0; \
196 vpsrld $31, d ## 1, d ## 1; \
197 vpor RT0, d ## 1, d ## 1; \
198 \
199 encrypt_round16(a, b, c, d, nk);
200
201#define encrypt_round_last16(a, b, c, d, nk) \
202 g2_16(b, RY); \
203 \
204 g1_16(a, RX); \
205 \
206 encrypt_round_end16(a, b, c, d, nk);
207
208#define decrypt_round_end16(a, b, c, d, nk) \
209 vpaddd RY0, RX0, RX0; \
210 vpaddd RX0, RY0, RY0; \
211 vpbroadcastd nk(RK,RROUND,8), RT0; \
212 vpaddd RT0, RX0, RX0; \
213 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
214 vpaddd RT0, RY0, RY0; \
215 \
216 vpxor RX0, c ## 0, c ## 0; \
217 \
218 vpxor RY0, d ## 0, d ## 0; \
219 vpsrld $1, d ## 0, RT0; \
220 vpslld $31, d ## 0, d ## 0; \
221 vpor RT0, d ## 0, d ## 0; \
222 \
223 vpaddd RY1, RX1, RX1; \
224 vpaddd RX1, RY1, RY1; \
225 vpbroadcastd nk(RK,RROUND,8), RT0; \
226 vpaddd RT0, RX1, RX1; \
227 vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
228 vpaddd RT0, RY1, RY1; \
229 \
230 vpxor RX1, c ## 1, c ## 1; \
231 \
232 vpxor RY1, d ## 1, d ## 1; \
233 vpsrld $1, d ## 1, RT0; \
234 vpslld $31, d ## 1, d ## 1; \
235 vpor RT0, d ## 1, d ## 1;
236
237#define decrypt_round16(a, b, c, d, nk) \
238 g1_16(a, RX); \
239 \
240 vpslld $1, a ## 0, RT0; \
241 vpsrld $31, a ## 0, a ## 0; \
242 vpor RT0, a ## 0, a ## 0; \
243 \
244 vpslld $1, a ## 1, RT0; \
245 vpsrld $31, a ## 1, a ## 1; \
246 vpor RT0, a ## 1, a ## 1; \
247 \
248 g2_16(b, RY); \
249 \
250 decrypt_round_end16(a, b, c, d, nk);
251
252#define decrypt_round_first16(a, b, c, d, nk) \
253 vpslld $1, c ## 0, RT0; \
254 vpsrld $31, c ## 0, c ## 0; \
255 vpor RT0, c ## 0, c ## 0; \
256 \
257 vpslld $1, c ## 1, RT0; \
258 vpsrld $31, c ## 1, c ## 1; \
259 vpor RT0, c ## 1, c ## 1; \
260 \
261 decrypt_round16(a, b, c, d, nk)
262
263#define decrypt_round_last16(a, b, c, d, nk) \
264 g1_16(a, RX); \
265 \
266 g2_16(b, RY); \
267 \
268 decrypt_round_end16(a, b, c, d, nk);
269
270#define encrypt_cycle16() \
271 encrypt_round16(RA, RB, RC, RD, 0); \
272 encrypt_round16(RC, RD, RA, RB, 8);
273
274#define encrypt_cycle_first16() \
275 encrypt_round_first16(RA, RB, RC, RD, 0); \
276 encrypt_round16(RC, RD, RA, RB, 8);
277
278#define encrypt_cycle_last16() \
279 encrypt_round16(RA, RB, RC, RD, 0); \
280 encrypt_round_last16(RC, RD, RA, RB, 8);
281
282#define decrypt_cycle16(n) \
283 decrypt_round16(RC, RD, RA, RB, 8); \
284 decrypt_round16(RA, RB, RC, RD, 0);
285
286#define decrypt_cycle_first16(n) \
287 decrypt_round_first16(RC, RD, RA, RB, 8); \
288 decrypt_round16(RA, RB, RC, RD, 0);
289
290#define decrypt_cycle_last16(n) \
291 decrypt_round16(RC, RD, RA, RB, 8); \
292 decrypt_round_last16(RA, RB, RC, RD, 0);
293
294#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
295 vpunpckhdq x1, x0, t2; \
296 vpunpckldq x1, x0, x0; \
297 \
298 vpunpckldq x3, x2, t1; \
299 vpunpckhdq x3, x2, x2; \
300 \
301 vpunpckhqdq t1, x0, x1; \
302 vpunpcklqdq t1, x0, x0; \
303 \
304 vpunpckhqdq x2, t2, x3; \
305 vpunpcklqdq x2, t2, x2;
306
307#define read_blocks8(offs,a,b,c,d) \
308 transpose_4x4(a, b, c, d, RX0, RY0);
309
310#define write_blocks8(offs,a,b,c,d) \
311 transpose_4x4(a, b, c, d, RX0, RY0);
312
313#define inpack_enc8(a,b,c,d) \
314 vpbroadcastd 4*0(RW), RT0; \
315 vpxor RT0, a, a; \
316 \
317 vpbroadcastd 4*1(RW), RT0; \
318 vpxor RT0, b, b; \
319 \
320 vpbroadcastd 4*2(RW), RT0; \
321 vpxor RT0, c, c; \
322 \
323 vpbroadcastd 4*3(RW), RT0; \
324 vpxor RT0, d, d;
325
326#define outunpack_enc8(a,b,c,d) \
327 vpbroadcastd 4*4(RW), RX0; \
328 vpbroadcastd 4*5(RW), RY0; \
329 vpxor RX0, c, RX0; \
330 vpxor RY0, d, RY0; \
331 \
332 vpbroadcastd 4*6(RW), RT0; \
333 vpxor RT0, a, c; \
334 vpbroadcastd 4*7(RW), RT0; \
335 vpxor RT0, b, d; \
336 \
337 vmovdqa RX0, a; \
338 vmovdqa RY0, b;
339
340#define inpack_dec8(a,b,c,d) \
341 vpbroadcastd 4*4(RW), RX0; \
342 vpbroadcastd 4*5(RW), RY0; \
343 vpxor RX0, a, RX0; \
344 vpxor RY0, b, RY0; \
345 \
346 vpbroadcastd 4*6(RW), RT0; \
347 vpxor RT0, c, a; \
348 vpbroadcastd 4*7(RW), RT0; \
349 vpxor RT0, d, b; \
350 \
351 vmovdqa RX0, c; \
352 vmovdqa RY0, d;
353
354#define outunpack_dec8(a,b,c,d) \
355 vpbroadcastd 4*0(RW), RT0; \
356 vpxor RT0, a, a; \
357 \
358 vpbroadcastd 4*1(RW), RT0; \
359 vpxor RT0, b, b; \
360 \
361 vpbroadcastd 4*2(RW), RT0; \
362 vpxor RT0, c, c; \
363 \
364 vpbroadcastd 4*3(RW), RT0; \
365 vpxor RT0, d, d;
366
367#define read_blocks16(a,b,c,d) \
368 read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
369 read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
370
371#define write_blocks16(a,b,c,d) \
372 write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
373 write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
374
375#define xor_blocks16(a,b,c,d) \
376 xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
377 xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
378
379#define inpack_enc16(a,b,c,d) \
380 inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
381 inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
382
383#define outunpack_enc16(a,b,c,d) \
384 outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
385 outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
386
387#define inpack_dec16(a,b,c,d) \
388 inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
389 inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
390
391#define outunpack_dec16(a,b,c,d) \
392 outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
393 outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
394
395.align 8
396__twofish_enc_blk16:
397 /* input:
398 * %rdi: ctx, CTX
399 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
400 * output:
401 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
402 */
403 init_round_constants();
404
405 read_blocks16(RA, RB, RC, RD);
406 inpack_enc16(RA, RB, RC, RD);
407
408 xorl RROUNDd, RROUNDd;
409 encrypt_cycle_first16();
410 movl $2, RROUNDd;
411
412.align 4
413.L__enc_loop:
414 encrypt_cycle16();
415
416 addl $2, RROUNDd;
417 cmpl $14, RROUNDd;
418 jne .L__enc_loop;
419
420 encrypt_cycle_last16();
421
422 outunpack_enc16(RA, RB, RC, RD);
423 write_blocks16(RA, RB, RC, RD);
424
425 ret;
426ENDPROC(__twofish_enc_blk16)
427
428.align 8
429__twofish_dec_blk16:
430 /* input:
431 * %rdi: ctx, CTX
432 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
433 * output:
434 * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
435 */
436 init_round_constants();
437
438 read_blocks16(RA, RB, RC, RD);
439 inpack_dec16(RA, RB, RC, RD);
440
441 movl $14, RROUNDd;
442 decrypt_cycle_first16();
443 movl $12, RROUNDd;
444
445.align 4
446.L__dec_loop:
447 decrypt_cycle16();
448
449 addl $-2, RROUNDd;
450 jnz .L__dec_loop;
451
452 decrypt_cycle_last16();
453
454 outunpack_dec16(RA, RB, RC, RD);
455 write_blocks16(RA, RB, RC, RD);
456
457 ret;
458ENDPROC(__twofish_dec_blk16)
459
460ENTRY(twofish_ecb_enc_16way)
461 /* input:
462 * %rdi: ctx, CTX
463 * %rsi: dst
464 * %rdx: src
465 */
466
467 vzeroupper;
468 pushq %r12;
469
470 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
471
472 call __twofish_enc_blk16;
473
474 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
475
476 popq %r12;
477 vzeroupper;
478
479 ret;
480ENDPROC(twofish_ecb_enc_16way)
481
482ENTRY(twofish_ecb_dec_16way)
483 /* input:
484 * %rdi: ctx, CTX
485 * %rsi: dst
486 * %rdx: src
487 */
488
489 vzeroupper;
490 pushq %r12;
491
492 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
493
494 call __twofish_dec_blk16;
495
496 store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
497
498 popq %r12;
499 vzeroupper;
500
501 ret;
502ENDPROC(twofish_ecb_dec_16way)
503
504ENTRY(twofish_cbc_dec_16way)
505 /* input:
506 * %rdi: ctx, CTX
507 * %rsi: dst
508 * %rdx: src
509 */
510
511 vzeroupper;
512 pushq %r12;
513
514 load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
515
516 call __twofish_dec_blk16;
517
518 store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
519 RX0);
520
521 popq %r12;
522 vzeroupper;
523
524 ret;
525ENDPROC(twofish_cbc_dec_16way)
526
527ENTRY(twofish_ctr_16way)
528 /* input:
529 * %rdi: ctx, CTX
530 * %rsi: dst (16 blocks)
531 * %rdx: src (16 blocks)
532 * %rcx: iv (little endian, 128bit)
533 */
534
535 vzeroupper;
536 pushq %r12;
537
538 load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
539 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
540 RBYTE);
541
542 call __twofish_enc_blk16;
543
544 store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
545
546 popq %r12;
547 vzeroupper;
548
549 ret;
550ENDPROC(twofish_ctr_16way)
551
552.align 8
553twofish_xts_crypt_16way:
554 /* input:
555 * %rdi: ctx, CTX
556 * %rsi: dst (16 blocks)
557 * %rdx: src (16 blocks)
558 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
559 * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
560 */
561
562 vzeroupper;
563 pushq %r12;
564
565 load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
566 RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
567 .Lxts_gf128mul_and_shl1_mask_0,
568 .Lxts_gf128mul_and_shl1_mask_1);
569
570 call *%r8;
571
572 store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
573
574 popq %r12;
575 vzeroupper;
576
577 ret;
578ENDPROC(twofish_xts_crypt_16way)
579
580ENTRY(twofish_xts_enc_16way)
581 /* input:
582 * %rdi: ctx, CTX
583 * %rsi: dst (16 blocks)
584 * %rdx: src (16 blocks)
585 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
586 */
587 leaq __twofish_enc_blk16, %r8;
588 jmp twofish_xts_crypt_16way;
589ENDPROC(twofish_xts_enc_16way)
590
591ENTRY(twofish_xts_dec_16way)
592 /* input:
593 * %rdi: ctx, CTX
594 * %rsi: dst (16 blocks)
595 * %rdx: src (16 blocks)
596 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
597 */
598 leaq __twofish_dec_blk16, %r8;
599 jmp twofish_xts_crypt_16way;
600ENDPROC(twofish_xts_dec_16way)
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c
new file mode 100644
index 000000000000..ce33b5be64ee
--- /dev/null
+++ b/arch/x86/crypto/twofish_avx2_glue.c
@@ -0,0 +1,584 @@
1/*
2 * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
3 *
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/crypto.h>
16#include <linux/err.h>
17#include <crypto/algapi.h>
18#include <crypto/ctr.h>
19#include <crypto/twofish.h>
20#include <crypto/lrw.h>
21#include <crypto/xts.h>
22#include <asm/xcr.h>
23#include <asm/xsave.h>
24#include <asm/crypto/twofish.h>
25#include <asm/crypto/ablk_helper.h>
26#include <asm/crypto/glue_helper.h>
27#include <crypto/scatterwalk.h>
28
29#define TF_AVX2_PARALLEL_BLOCKS 16
30
31/* 16-way AVX2 parallel cipher functions */
32asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
37
38asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
39 le128 *iv);
40
41asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
44 const u8 *src, le128 *iv);
45
46static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
47 const u8 *src)
48{
49 __twofish_enc_blk_3way(ctx, dst, src, false);
50}
51
52static const struct common_glue_ctx twofish_enc = {
53 .num_funcs = 4,
54 .fpu_blocks_limit = 8,
55
56 .funcs = { {
57 .num_blocks = 16,
58 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
59 }, {
60 .num_blocks = 8,
61 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
62 }, {
63 .num_blocks = 3,
64 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
65 }, {
66 .num_blocks = 1,
67 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
68 } }
69};
70
71static const struct common_glue_ctx twofish_ctr = {
72 .num_funcs = 4,
73 .fpu_blocks_limit = 8,
74
75 .funcs = { {
76 .num_blocks = 16,
77 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
78 }, {
79 .num_blocks = 8,
80 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
81 }, {
82 .num_blocks = 3,
83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
84 }, {
85 .num_blocks = 1,
86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
87 } }
88};
89
90static const struct common_glue_ctx twofish_enc_xts = {
91 .num_funcs = 3,
92 .fpu_blocks_limit = 8,
93
94 .funcs = { {
95 .num_blocks = 16,
96 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
97 }, {
98 .num_blocks = 8,
99 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
100 }, {
101 .num_blocks = 1,
102 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
103 } }
104};
105
106static const struct common_glue_ctx twofish_dec = {
107 .num_funcs = 4,
108 .fpu_blocks_limit = 8,
109
110 .funcs = { {
111 .num_blocks = 16,
112 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
113 }, {
114 .num_blocks = 8,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
116 }, {
117 .num_blocks = 3,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
119 }, {
120 .num_blocks = 1,
121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
122 } }
123};
124
125static const struct common_glue_ctx twofish_dec_cbc = {
126 .num_funcs = 4,
127 .fpu_blocks_limit = 8,
128
129 .funcs = { {
130 .num_blocks = 16,
131 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
132 }, {
133 .num_blocks = 8,
134 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
135 }, {
136 .num_blocks = 3,
137 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
138 }, {
139 .num_blocks = 1,
140 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
141 } }
142};
143
144static const struct common_glue_ctx twofish_dec_xts = {
145 .num_funcs = 3,
146 .fpu_blocks_limit = 8,
147
148 .funcs = { {
149 .num_blocks = 16,
150 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
151 }, {
152 .num_blocks = 8,
153 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
154 }, {
155 .num_blocks = 1,
156 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
157 } }
158};
159
160static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
162{
163 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
164}
165
166static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
170}
171
172static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 struct scatterlist *src, unsigned int nbytes)
174{
175 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
176 dst, src, nbytes);
177}
178
179static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
183 nbytes);
184}
185
186static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
187 struct scatterlist *src, unsigned int nbytes)
188{
189 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
190}
191
192static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
193{
194 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
195 return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
196}
197
198static inline void twofish_fpu_end(bool fpu_enabled)
199{
200 glue_fpu_end(fpu_enabled);
201}
202
203struct crypt_priv {
204 struct twofish_ctx *ctx;
205 bool fpu_enabled;
206};
207
208static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
209{
210 const unsigned int bsize = TF_BLOCK_SIZE;
211 struct crypt_priv *ctx = priv;
212 int i;
213
214 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
215
216 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
217 twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
218 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
219 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
220 }
221
222 while (nbytes >= 8 * bsize) {
223 twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
224 srcdst += bsize * 8;
225 nbytes -= bsize * 8;
226 }
227
228 while (nbytes >= 3 * bsize) {
229 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
230 srcdst += bsize * 3;
231 nbytes -= bsize * 3;
232 }
233
234 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
235 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
236}
237
238static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
239{
240 const unsigned int bsize = TF_BLOCK_SIZE;
241 struct crypt_priv *ctx = priv;
242 int i;
243
244 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
245
246 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
247 twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
248 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
249 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
250 }
251
252 while (nbytes >= 8 * bsize) {
253 twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
254 srcdst += bsize * 8;
255 nbytes -= bsize * 8;
256 }
257
258 while (nbytes >= 3 * bsize) {
259 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
260 srcdst += bsize * 3;
261 nbytes -= bsize * 3;
262 }
263
264 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
265 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
266}
267
268static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
269 struct scatterlist *src, unsigned int nbytes)
270{
271 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
272 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
273 struct crypt_priv crypt_ctx = {
274 .ctx = &ctx->twofish_ctx,
275 .fpu_enabled = false,
276 };
277 struct lrw_crypt_req req = {
278 .tbuf = buf,
279 .tbuflen = sizeof(buf),
280
281 .table_ctx = &ctx->lrw_table,
282 .crypt_ctx = &crypt_ctx,
283 .crypt_fn = encrypt_callback,
284 };
285 int ret;
286
287 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
288 ret = lrw_crypt(desc, dst, src, nbytes, &req);
289 twofish_fpu_end(crypt_ctx.fpu_enabled);
290
291 return ret;
292}
293
294static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 struct scatterlist *src, unsigned int nbytes)
296{
297 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
298 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
299 struct crypt_priv crypt_ctx = {
300 .ctx = &ctx->twofish_ctx,
301 .fpu_enabled = false,
302 };
303 struct lrw_crypt_req req = {
304 .tbuf = buf,
305 .tbuflen = sizeof(buf),
306
307 .table_ctx = &ctx->lrw_table,
308 .crypt_ctx = &crypt_ctx,
309 .crypt_fn = decrypt_callback,
310 };
311 int ret;
312
313 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
314 ret = lrw_crypt(desc, dst, src, nbytes, &req);
315 twofish_fpu_end(crypt_ctx.fpu_enabled);
316
317 return ret;
318}
319
320static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
321 struct scatterlist *src, unsigned int nbytes)
322{
323 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
324
325 return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
326 XTS_TWEAK_CAST(twofish_enc_blk),
327 &ctx->tweak_ctx, &ctx->crypt_ctx);
328}
329
330static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
331 struct scatterlist *src, unsigned int nbytes)
332{
333 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
334
335 return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
336 XTS_TWEAK_CAST(twofish_enc_blk),
337 &ctx->tweak_ctx, &ctx->crypt_ctx);
338}
339
340static struct crypto_alg tf_algs[10] = { {
341 .cra_name = "__ecb-twofish-avx2",
342 .cra_driver_name = "__driver-ecb-twofish-avx2",
343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
345 .cra_blocksize = TF_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct twofish_ctx),
347 .cra_alignmask = 0,
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
350 .cra_u = {
351 .blkcipher = {
352 .min_keysize = TF_MIN_KEY_SIZE,
353 .max_keysize = TF_MAX_KEY_SIZE,
354 .setkey = twofish_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
357 },
358 },
359}, {
360 .cra_name = "__cbc-twofish-avx2",
361 .cra_driver_name = "__driver-cbc-twofish-avx2",
362 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
364 .cra_blocksize = TF_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct twofish_ctx),
366 .cra_alignmask = 0,
367 .cra_type = &crypto_blkcipher_type,
368 .cra_module = THIS_MODULE,
369 .cra_u = {
370 .blkcipher = {
371 .min_keysize = TF_MIN_KEY_SIZE,
372 .max_keysize = TF_MAX_KEY_SIZE,
373 .setkey = twofish_setkey,
374 .encrypt = cbc_encrypt,
375 .decrypt = cbc_decrypt,
376 },
377 },
378}, {
379 .cra_name = "__ctr-twofish-avx2",
380 .cra_driver_name = "__driver-ctr-twofish-avx2",
381 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
383 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct twofish_ctx),
385 .cra_alignmask = 0,
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
388 .cra_u = {
389 .blkcipher = {
390 .min_keysize = TF_MIN_KEY_SIZE,
391 .max_keysize = TF_MAX_KEY_SIZE,
392 .ivsize = TF_BLOCK_SIZE,
393 .setkey = twofish_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
396 },
397 },
398}, {
399 .cra_name = "__lrw-twofish-avx2",
400 .cra_driver_name = "__driver-lrw-twofish-avx2",
401 .cra_priority = 0,
402 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
403 .cra_blocksize = TF_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
405 .cra_alignmask = 0,
406 .cra_type = &crypto_blkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_exit = lrw_twofish_exit_tfm,
409 .cra_u = {
410 .blkcipher = {
411 .min_keysize = TF_MIN_KEY_SIZE +
412 TF_BLOCK_SIZE,
413 .max_keysize = TF_MAX_KEY_SIZE +
414 TF_BLOCK_SIZE,
415 .ivsize = TF_BLOCK_SIZE,
416 .setkey = lrw_twofish_setkey,
417 .encrypt = lrw_encrypt,
418 .decrypt = lrw_decrypt,
419 },
420 },
421}, {
422 .cra_name = "__xts-twofish-avx2",
423 .cra_driver_name = "__driver-xts-twofish-avx2",
424 .cra_priority = 0,
425 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
426 .cra_blocksize = TF_BLOCK_SIZE,
427 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
428 .cra_alignmask = 0,
429 .cra_type = &crypto_blkcipher_type,
430 .cra_module = THIS_MODULE,
431 .cra_u = {
432 .blkcipher = {
433 .min_keysize = TF_MIN_KEY_SIZE * 2,
434 .max_keysize = TF_MAX_KEY_SIZE * 2,
435 .ivsize = TF_BLOCK_SIZE,
436 .setkey = xts_twofish_setkey,
437 .encrypt = xts_encrypt,
438 .decrypt = xts_decrypt,
439 },
440 },
441}, {
442 .cra_name = "ecb(twofish)",
443 .cra_driver_name = "ecb-twofish-avx2",
444 .cra_priority = 500,
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = TF_BLOCK_SIZE,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
448 .cra_alignmask = 0,
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
453 .cra_u = {
454 .ablkcipher = {
455 .min_keysize = TF_MIN_KEY_SIZE,
456 .max_keysize = TF_MAX_KEY_SIZE,
457 .setkey = ablk_set_key,
458 .encrypt = ablk_encrypt,
459 .decrypt = ablk_decrypt,
460 },
461 },
462}, {
463 .cra_name = "cbc(twofish)",
464 .cra_driver_name = "cbc-twofish-avx2",
465 .cra_priority = 500,
466 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
467 .cra_blocksize = TF_BLOCK_SIZE,
468 .cra_ctxsize = sizeof(struct async_helper_ctx),
469 .cra_alignmask = 0,
470 .cra_type = &crypto_ablkcipher_type,
471 .cra_module = THIS_MODULE,
472 .cra_init = ablk_init,
473 .cra_exit = ablk_exit,
474 .cra_u = {
475 .ablkcipher = {
476 .min_keysize = TF_MIN_KEY_SIZE,
477 .max_keysize = TF_MAX_KEY_SIZE,
478 .ivsize = TF_BLOCK_SIZE,
479 .setkey = ablk_set_key,
480 .encrypt = __ablk_encrypt,
481 .decrypt = ablk_decrypt,
482 },
483 },
484}, {
485 .cra_name = "ctr(twofish)",
486 .cra_driver_name = "ctr-twofish-avx2",
487 .cra_priority = 500,
488 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
489 .cra_blocksize = 1,
490 .cra_ctxsize = sizeof(struct async_helper_ctx),
491 .cra_alignmask = 0,
492 .cra_type = &crypto_ablkcipher_type,
493 .cra_module = THIS_MODULE,
494 .cra_init = ablk_init,
495 .cra_exit = ablk_exit,
496 .cra_u = {
497 .ablkcipher = {
498 .min_keysize = TF_MIN_KEY_SIZE,
499 .max_keysize = TF_MAX_KEY_SIZE,
500 .ivsize = TF_BLOCK_SIZE,
501 .setkey = ablk_set_key,
502 .encrypt = ablk_encrypt,
503 .decrypt = ablk_encrypt,
504 .geniv = "chainiv",
505 },
506 },
507}, {
508 .cra_name = "lrw(twofish)",
509 .cra_driver_name = "lrw-twofish-avx2",
510 .cra_priority = 500,
511 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
512 .cra_blocksize = TF_BLOCK_SIZE,
513 .cra_ctxsize = sizeof(struct async_helper_ctx),
514 .cra_alignmask = 0,
515 .cra_type = &crypto_ablkcipher_type,
516 .cra_module = THIS_MODULE,
517 .cra_init = ablk_init,
518 .cra_exit = ablk_exit,
519 .cra_u = {
520 .ablkcipher = {
521 .min_keysize = TF_MIN_KEY_SIZE +
522 TF_BLOCK_SIZE,
523 .max_keysize = TF_MAX_KEY_SIZE +
524 TF_BLOCK_SIZE,
525 .ivsize = TF_BLOCK_SIZE,
526 .setkey = ablk_set_key,
527 .encrypt = ablk_encrypt,
528 .decrypt = ablk_decrypt,
529 },
530 },
531}, {
532 .cra_name = "xts(twofish)",
533 .cra_driver_name = "xts-twofish-avx2",
534 .cra_priority = 500,
535 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
536 .cra_blocksize = TF_BLOCK_SIZE,
537 .cra_ctxsize = sizeof(struct async_helper_ctx),
538 .cra_alignmask = 0,
539 .cra_type = &crypto_ablkcipher_type,
540 .cra_module = THIS_MODULE,
541 .cra_init = ablk_init,
542 .cra_exit = ablk_exit,
543 .cra_u = {
544 .ablkcipher = {
545 .min_keysize = TF_MIN_KEY_SIZE * 2,
546 .max_keysize = TF_MAX_KEY_SIZE * 2,
547 .ivsize = TF_BLOCK_SIZE,
548 .setkey = ablk_set_key,
549 .encrypt = ablk_encrypt,
550 .decrypt = ablk_decrypt,
551 },
552 },
553} };
554
555static int __init init(void)
556{
557 u64 xcr0;
558
559 if (!cpu_has_avx2 || !cpu_has_osxsave) {
560 pr_info("AVX2 instructions are not detected.\n");
561 return -ENODEV;
562 }
563
564 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
565 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
566 pr_info("AVX2 detected but unusable.\n");
567 return -ENODEV;
568 }
569
570 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
571}
572
573static void __exit fini(void)
574{
575 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
576}
577
578module_init(init);
579module_exit(fini);
580
581MODULE_LICENSE("GPL");
582MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
583MODULE_ALIAS("twofish");
584MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 94ac91d26e47..2047a562f6b3 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
@@ -48,13 +50,26 @@
48/* 8-way parallel cipher functions */ 50/* 8-way parallel cipher functions */
49asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, 51asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
50 const u8 *src); 52 const u8 *src);
53EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
54
51asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, 55asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
52 const u8 *src); 56 const u8 *src);
57EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
53 58
54asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, 59asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
55 const u8 *src); 60 const u8 *src);
61EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
62
56asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, 63asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
57 const u8 *src, le128 *iv); 64 const u8 *src, le128 *iv);
65EXPORT_SYMBOL_GPL(twofish_ctr_8way);
66
67asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
68 const u8 *src, le128 *iv);
69EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
70asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
71 const u8 *src, le128 *iv);
72EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
58 73
59static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 74static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
60 const u8 *src) 75 const u8 *src)
@@ -62,6 +77,20 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
62 __twofish_enc_blk_3way(ctx, dst, src, false); 77 __twofish_enc_blk_3way(ctx, dst, src, false);
63} 78}
64 79
80void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
81{
82 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
83 GLUE_FUNC_CAST(twofish_enc_blk));
84}
85EXPORT_SYMBOL_GPL(twofish_xts_enc);
86
87void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
88{
89 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
90 GLUE_FUNC_CAST(twofish_dec_blk));
91}
92EXPORT_SYMBOL_GPL(twofish_xts_dec);
93
65 94
66static const struct common_glue_ctx twofish_enc = { 95static const struct common_glue_ctx twofish_enc = {
67 .num_funcs = 3, 96 .num_funcs = 3,
@@ -95,6 +124,19 @@ static const struct common_glue_ctx twofish_ctr = {
95 } } 124 } }
96}; 125};
97 126
127static const struct common_glue_ctx twofish_enc_xts = {
128 .num_funcs = 2,
129 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
130
131 .funcs = { {
132 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
133 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
134 }, {
135 .num_blocks = 1,
136 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
137 } }
138};
139
98static const struct common_glue_ctx twofish_dec = { 140static const struct common_glue_ctx twofish_dec = {
99 .num_funcs = 3, 141 .num_funcs = 3,
100 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, 142 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
@@ -127,6 +169,19 @@ static const struct common_glue_ctx twofish_dec_cbc = {
127 } } 169 } }
128}; 170};
129 171
172static const struct common_glue_ctx twofish_dec_xts = {
173 .num_funcs = 2,
174 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
175
176 .funcs = { {
177 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
178 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
179 }, {
180 .num_blocks = 1,
181 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
182 } }
183};
184
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 185static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes) 186 struct scatterlist *src, unsigned int nbytes)
132{ 187{
@@ -275,54 +330,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
275 struct scatterlist *src, unsigned int nbytes) 330 struct scatterlist *src, unsigned int nbytes)
276{ 331{
277 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 332 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
278 be128 buf[TWOFISH_PARALLEL_BLOCKS];
279 struct crypt_priv crypt_ctx = {
280 .ctx = &ctx->crypt_ctx,
281 .fpu_enabled = false,
282 };
283 struct xts_crypt_req req = {
284 .tbuf = buf,
285 .tbuflen = sizeof(buf),
286 333
287 .tweak_ctx = &ctx->tweak_ctx, 334 return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
288 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), 335 XTS_TWEAK_CAST(twofish_enc_blk),
289 .crypt_ctx = &crypt_ctx, 336 &ctx->tweak_ctx, &ctx->crypt_ctx);
290 .crypt_fn = encrypt_callback,
291 };
292 int ret;
293
294 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
295 ret = xts_crypt(desc, dst, src, nbytes, &req);
296 twofish_fpu_end(crypt_ctx.fpu_enabled);
297
298 return ret;
299} 337}
300 338
301static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 339static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
302 struct scatterlist *src, unsigned int nbytes) 340 struct scatterlist *src, unsigned int nbytes)
303{ 341{
304 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 342 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
305 be128 buf[TWOFISH_PARALLEL_BLOCKS];
306 struct crypt_priv crypt_ctx = {
307 .ctx = &ctx->crypt_ctx,
308 .fpu_enabled = false,
309 };
310 struct xts_crypt_req req = {
311 .tbuf = buf,
312 .tbuflen = sizeof(buf),
313
314 .tweak_ctx = &ctx->tweak_ctx,
315 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
316 .crypt_ctx = &crypt_ctx,
317 .crypt_fn = decrypt_callback,
318 };
319 int ret;
320 343
321 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 344 return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
322 ret = xts_crypt(desc, dst, src, nbytes, &req); 345 XTS_TWEAK_CAST(twofish_enc_blk),
323 twofish_fpu_end(crypt_ctx.fpu_enabled); 346 &ctx->tweak_ctx, &ctx->crypt_ctx);
324
325 return ret;
326} 347}
327 348
328static struct crypto_alg twofish_algs[10] = { { 349static struct crypto_alg twofish_algs[10] = { {
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8010ebc5705f..e99ac27f95b2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -293,6 +293,7 @@ extern const char * const x86_power_flags[32];
293#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) 293#define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3)
294#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) 294#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
295#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) 295#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
296#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
296#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) 297#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
297#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) 298#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
298#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) 299#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h
new file mode 100644
index 000000000000..f097b2face10
--- /dev/null
+++ b/arch/x86/include/asm/crypto/blowfish.h
@@ -0,0 +1,43 @@
1#ifndef ASM_X86_BLOWFISH_H
2#define ASM_X86_BLOWFISH_H
3
4#include <linux/crypto.h>
5#include <crypto/blowfish.h>
6
7#define BF_PARALLEL_BLOCKS 4
8
9/* regular block cipher functions */
10asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
11 bool xor);
12asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
13
14/* 4-way parallel cipher functions */
15asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
16 const u8 *src, bool xor);
17asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
18 const u8 *src);
19
20static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
21{
22 __blowfish_enc_blk(ctx, dst, src, false);
23}
24
25static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
26 const u8 *src)
27{
28 __blowfish_enc_blk(ctx, dst, src, true);
29}
30
31static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
32 const u8 *src)
33{
34 __blowfish_enc_blk_4way(ctx, dst, src, false);
35}
36
37static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
38 const u8 *src)
39{
40 __blowfish_enc_blk_4way(ctx, dst, src, true);
41}
42
43#endif
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
index 98038add801e..bb93333d9200 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -48,6 +48,22 @@ asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
48asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, 48asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
49 const u8 *src); 49 const u8 *src);
50 50
51/* 16-way parallel cipher functions (avx/aes-ni) */
52asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
53 const u8 *src);
54asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
55 const u8 *src);
56
57asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
58 const u8 *src);
59asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
60 const u8 *src, le128 *iv);
61
62asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
63 const u8 *src, le128 *iv);
64asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
65 const u8 *src, le128 *iv);
66
51static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, 67static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
52 const u8 *src) 68 const u8 *src)
53{ 69{
@@ -79,4 +95,7 @@ extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
79extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, 95extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
80 le128 *iv); 96 le128 *iv);
81 97
98extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
99extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
100
82#endif /* ASM_X86_CAMELLIA_H */ 101#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index e2d65b061d27..1eef55596e82 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -14,10 +14,13 @@ typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
14typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); 14typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
15typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, 15typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
16 le128 *iv); 16 le128 *iv);
17typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
18 le128 *iv);
17 19
18#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) 20#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
19#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) 21#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
20#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) 22#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
23#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
21 24
22struct common_glue_func_entry { 25struct common_glue_func_entry {
23 unsigned int num_blocks; /* number of blocks that @fn will process */ 26 unsigned int num_blocks; /* number of blocks that @fn will process */
@@ -25,6 +28,7 @@ struct common_glue_func_entry {
25 common_glue_func_t ecb; 28 common_glue_func_t ecb;
26 common_glue_cbc_func_t cbc; 29 common_glue_cbc_func_t cbc;
27 common_glue_ctr_func_t ctr; 30 common_glue_ctr_func_t ctr;
31 common_glue_xts_func_t xts;
28 } fn_u; 32 } fn_u;
29}; 33};
30 34
@@ -96,6 +100,16 @@ static inline void le128_inc(le128 *i)
96 i->b = cpu_to_le64(b); 100 i->b = cpu_to_le64(b);
97} 101}
98 102
103static inline void le128_gf128mul_x_ble(le128 *dst, const le128 *src)
104{
105 u64 a = le64_to_cpu(src->a);
106 u64 b = le64_to_cpu(src->b);
107 u64 _tt = ((s64)a >> 63) & 0x87;
108
109 dst->a = cpu_to_le64((a << 1) ^ (b >> 63));
110 dst->b = cpu_to_le64((b << 1) ^ _tt);
111}
112
99extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, 113extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
100 struct blkcipher_desc *desc, 114 struct blkcipher_desc *desc,
101 struct scatterlist *dst, 115 struct scatterlist *dst,
@@ -118,4 +132,14 @@ extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
118 struct scatterlist *dst, 132 struct scatterlist *dst,
119 struct scatterlist *src, unsigned int nbytes); 133 struct scatterlist *src, unsigned int nbytes);
120 134
135extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
136 struct blkcipher_desc *desc,
137 struct scatterlist *dst,
138 struct scatterlist *src, unsigned int nbytes,
139 common_glue_func_t tweak_fn, void *tweak_ctx,
140 void *crypt_ctx);
141
142extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
143 le128 *iv, common_glue_func_t fn);
144
121#endif /* _CRYPTO_GLUE_HELPER_H */ 145#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index 0da1d3e2a55c..33c2b8a435da 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -6,6 +6,16 @@
6 6
7#define SERPENT_PARALLEL_BLOCKS 8 7#define SERPENT_PARALLEL_BLOCKS 8
8 8
9struct serpent_lrw_ctx {
10 struct lrw_table_ctx lrw_table;
11 struct serpent_ctx serpent_ctx;
12};
13
14struct serpent_xts_ctx {
15 struct serpent_ctx tweak_ctx;
16 struct serpent_ctx crypt_ctx;
17};
18
9asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, 19asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
10 const u8 *src); 20 const u8 *src);
11asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, 21asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
@@ -16,4 +26,23 @@ asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
16asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, 26asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
17 const u8 *src, le128 *iv); 27 const u8 *src, le128 *iv);
18 28
29asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
30 const u8 *src, le128 *iv);
31asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
32 const u8 *src, le128 *iv);
33
34extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
35 le128 *iv);
36
37extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
38extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
39
40extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
41 unsigned int keylen);
42
43extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm);
44
45extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
46 unsigned int keylen);
47
19#endif 48#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index 878c51ceebb5..e655c6029b45 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -28,6 +28,20 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, 28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
29 const u8 *src); 29 const u8 *src);
30 30
31/* 8-way parallel cipher functions */
32asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
33 const u8 *src);
34asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
35 const u8 *src);
36asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
37 const u8 *src);
38asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
39 const u8 *src, le128 *iv);
40asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
41 const u8 *src, le128 *iv);
42asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
43 const u8 *src, le128 *iv);
44
31/* helpers from twofish_x86_64-3way module */ 45/* helpers from twofish_x86_64-3way module */
32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); 46extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, 47extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@@ -43,4 +57,8 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
43extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 57extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
44 unsigned int keylen); 58 unsigned int keylen);
45 59
60/* helpers from twofish-avx module */
61extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
62extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
63
46#endif /* ASM_X86_TWOFISH_H */ 64#endif /* ASM_X86_TWOFISH_H */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 05c0ce52f96d..622d8a48cbe9 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -198,6 +198,7 @@ config CRYPTO_GCM
198 select CRYPTO_CTR 198 select CRYPTO_CTR
199 select CRYPTO_AEAD 199 select CRYPTO_AEAD
200 select CRYPTO_GHASH 200 select CRYPTO_GHASH
201 select CRYPTO_NULL
201 help 202 help
202 Support for Galois/Counter Mode (GCM) and Galois Message 203 Support for Galois/Counter Mode (GCM) and Galois Message
203 Authentication Code (GMAC). Required for IPSec. 204 Authentication Code (GMAC). Required for IPSec.
@@ -282,6 +283,17 @@ config CRYPTO_XTS
282 283
283comment "Hash modes" 284comment "Hash modes"
284 285
286config CRYPTO_CMAC
287 tristate "CMAC support"
288 select CRYPTO_HASH
289 select CRYPTO_MANAGER
290 help
291 Cipher-based Message Authentication Code (CMAC) specified by
292 The National Institute of Standards and Technology (NIST).
293
294 https://tools.ietf.org/html/rfc4493
295 http://csrc.nist.gov/publications/nistpubs/800-38B/SP_800-38B.pdf
296
285config CRYPTO_HMAC 297config CRYPTO_HMAC
286 tristate "HMAC support" 298 tristate "HMAC support"
287 select CRYPTO_HASH 299 select CRYPTO_HASH
@@ -322,19 +334,9 @@ config CRYPTO_CRC32C
322 by iSCSI for header and data digests and by others. 334 by iSCSI for header and data digests and by others.
323 See Castagnoli93. Module will be crc32c. 335 See Castagnoli93. Module will be crc32c.
324 336
325config CRYPTO_CRC32C_X86_64
326 bool
327 depends on X86 && 64BIT
328 select CRYPTO_HASH
329 help
330 In Intel processor with SSE4.2 supported, the processor will
331 support CRC32C calculation using hardware accelerated CRC32
332 instruction optimized with PCLMULQDQ instruction when available.
333
334config CRYPTO_CRC32C_INTEL 337config CRYPTO_CRC32C_INTEL
335 tristate "CRC32c INTEL hardware acceleration" 338 tristate "CRC32c INTEL hardware acceleration"
336 depends on X86 339 depends on X86
337 select CRYPTO_CRC32C_X86_64 if 64BIT
338 select CRYPTO_HASH 340 select CRYPTO_HASH
339 help 341 help
340 In Intel processor with SSE4.2 supported, the processor will 342 In Intel processor with SSE4.2 supported, the processor will
@@ -480,6 +482,28 @@ config CRYPTO_SHA1_SSSE3
480 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector 482 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
481 Extensions (AVX), when available. 483 Extensions (AVX), when available.
482 484
485config CRYPTO_SHA256_SSSE3
486 tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"
487 depends on X86 && 64BIT
488 select CRYPTO_SHA256
489 select CRYPTO_HASH
490 help
491 SHA-256 secure hash standard (DFIPS 180-2) implemented
492 using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector
493 Extensions version 1 (AVX1), or Advanced Vector Extensions
494 version 2 (AVX2) instructions, when available.
495
496config CRYPTO_SHA512_SSSE3
497 tristate "SHA512 digest algorithm (SSSE3/AVX/AVX2)"
498 depends on X86 && 64BIT
499 select CRYPTO_SHA512
500 select CRYPTO_HASH
501 help
502 SHA-512 secure hash standard (DFIPS 180-2) implemented
503 using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector
504 Extensions version 1 (AVX1), or Advanced Vector Extensions
505 version 2 (AVX2) instructions, when available.
506
483config CRYPTO_SHA1_SPARC64 507config CRYPTO_SHA1_SPARC64
484 tristate "SHA1 digest algorithm (SPARC64)" 508 tristate "SHA1 digest algorithm (SPARC64)"
485 depends on SPARC64 509 depends on SPARC64
@@ -654,6 +678,7 @@ config CRYPTO_AES_NI_INTEL
654 select CRYPTO_CRYPTD 678 select CRYPTO_CRYPTD
655 select CRYPTO_ABLK_HELPER_X86 679 select CRYPTO_ABLK_HELPER_X86
656 select CRYPTO_ALGAPI 680 select CRYPTO_ALGAPI
681 select CRYPTO_GLUE_HELPER_X86 if 64BIT
657 select CRYPTO_LRW 682 select CRYPTO_LRW
658 select CRYPTO_XTS 683 select CRYPTO_XTS
659 help 684 help
@@ -795,6 +820,24 @@ config CRYPTO_BLOWFISH_X86_64
795 See also: 820 See also:
796 <http://www.schneier.com/blowfish.html> 821 <http://www.schneier.com/blowfish.html>
797 822
823config CRYPTO_BLOWFISH_AVX2_X86_64
824 tristate "Blowfish cipher algorithm (x86_64/AVX2)"
825 depends on X86 && 64BIT
826 select CRYPTO_ALGAPI
827 select CRYPTO_CRYPTD
828 select CRYPTO_ABLK_HELPER_X86
829 select CRYPTO_BLOWFISH_COMMON
830 select CRYPTO_BLOWFISH_X86_64
831 help
832 Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier.
833
834 This is a variable key length cipher which can use keys from 32
835 bits to 448 bits in length. It's fast, simple and specifically
836 designed for use on "large microprocessors".
837
838 See also:
839 <http://www.schneier.com/blowfish.html>
840
798config CRYPTO_CAMELLIA 841config CRYPTO_CAMELLIA
799 tristate "Camellia cipher algorithms" 842 tristate "Camellia cipher algorithms"
800 depends on CRYPTO 843 depends on CRYPTO
@@ -851,6 +894,29 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
851 See also: 894 See also:
852 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> 895 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
853 896
897config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
898 tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
899 depends on X86 && 64BIT
900 depends on CRYPTO
901 select CRYPTO_ALGAPI
902 select CRYPTO_CRYPTD
903 select CRYPTO_ABLK_HELPER_X86
904 select CRYPTO_GLUE_HELPER_X86
905 select CRYPTO_CAMELLIA_X86_64
906 select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
907 select CRYPTO_LRW
908 select CRYPTO_XTS
909 help
910 Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
911
912 Camellia is a symmetric key block cipher developed jointly
913 at NTT and Mitsubishi Electric Corporation.
914
915 The Camellia specifies three key sizes: 128, 192 and 256 bits.
916
917 See also:
918 <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html>
919
854config CRYPTO_CAMELLIA_SPARC64 920config CRYPTO_CAMELLIA_SPARC64
855 tristate "Camellia cipher algorithm (SPARC64)" 921 tristate "Camellia cipher algorithm (SPARC64)"
856 depends on SPARC64 922 depends on SPARC64
@@ -1088,6 +1154,29 @@ config CRYPTO_SERPENT_AVX_X86_64
1088 See also: 1154 See also:
1089 <http://www.cl.cam.ac.uk/~rja14/serpent.html> 1155 <http://www.cl.cam.ac.uk/~rja14/serpent.html>
1090 1156
1157config CRYPTO_SERPENT_AVX2_X86_64
1158 tristate "Serpent cipher algorithm (x86_64/AVX2)"
1159 depends on X86 && 64BIT
1160 select CRYPTO_ALGAPI
1161 select CRYPTO_CRYPTD
1162 select CRYPTO_ABLK_HELPER_X86
1163 select CRYPTO_GLUE_HELPER_X86
1164 select CRYPTO_SERPENT
1165 select CRYPTO_SERPENT_AVX_X86_64
1166 select CRYPTO_LRW
1167 select CRYPTO_XTS
1168 help
1169 Serpent cipher algorithm, by Anderson, Biham & Knudsen.
1170
1171 Keys are allowed to be from 0 to 256 bits in length, in steps
1172 of 8 bits.
1173
1174 This module provides Serpent cipher algorithm that processes 16
1175 blocks parallel using AVX2 instruction set.
1176
1177 See also:
1178 <http://www.cl.cam.ac.uk/~rja14/serpent.html>
1179
1091config CRYPTO_TEA 1180config CRYPTO_TEA
1092 tristate "TEA, XTEA and XETA cipher algorithms" 1181 tristate "TEA, XTEA and XETA cipher algorithms"
1093 select CRYPTO_ALGAPI 1182 select CRYPTO_ALGAPI
@@ -1207,6 +1296,30 @@ config CRYPTO_TWOFISH_AVX_X86_64
1207 See also: 1296 See also:
1208 <http://www.schneier.com/twofish.html> 1297 <http://www.schneier.com/twofish.html>
1209 1298
1299config CRYPTO_TWOFISH_AVX2_X86_64
1300 tristate "Twofish cipher algorithm (x86_64/AVX2)"
1301 depends on X86 && 64BIT
1302 select CRYPTO_ALGAPI
1303 select CRYPTO_CRYPTD
1304 select CRYPTO_ABLK_HELPER_X86
1305 select CRYPTO_GLUE_HELPER_X86
1306 select CRYPTO_TWOFISH_COMMON
1307 select CRYPTO_TWOFISH_X86_64
1308 select CRYPTO_TWOFISH_X86_64_3WAY
1309 select CRYPTO_TWOFISH_AVX_X86_64
1310 select CRYPTO_LRW
1311 select CRYPTO_XTS
1312 help
1313 Twofish cipher algorithm (x86_64/AVX2).
1314
1315 Twofish was submitted as an AES (Advanced Encryption Standard)
1316 candidate cipher by researchers at CounterPane Systems. It is a
1317 16 round block cipher supporting key sizes of 128, 192, and 256
1318 bits.
1319
1320 See also:
1321 <http://www.schneier.com/twofish.html>
1322
1210comment "Compression" 1323comment "Compression"
1211 1324
1212config CRYPTO_DEFLATE 1325config CRYPTO_DEFLATE
diff --git a/crypto/Makefile b/crypto/Makefile
index be1a1bebbb86..a8e9b0fefbe9 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -32,6 +32,7 @@ cryptomgr-y := algboss.o testmgr.o
32 32
33obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o 33obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
34obj-$(CONFIG_CRYPTO_USER) += crypto_user.o 34obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
35obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
35obj-$(CONFIG_CRYPTO_HMAC) += hmac.o 36obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
36obj-$(CONFIG_CRYPTO_VMAC) += vmac.o 37obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
37obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o 38obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o
diff --git a/crypto/cmac.c b/crypto/cmac.c
new file mode 100644
index 000000000000..50880cf17fad
--- /dev/null
+++ b/crypto/cmac.c
@@ -0,0 +1,315 @@
1/*
2 * CMAC: Cipher Block Mode for Authentication
3 *
4 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * Based on work by:
7 * Copyright © 2013 Tom St Denis <tstdenis@elliptictech.com>
8 * Based on crypto/xcbc.c:
9 * Copyright © 2006 USAGI/WIDE Project,
10 * Author: Kazunori Miyazawa <miyazawa@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 */
18
19#include <crypto/internal/hash.h>
20#include <linux/err.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23
24/*
25 * +------------------------
26 * | <parent tfm>
27 * +------------------------
28 * | cmac_tfm_ctx
29 * +------------------------
30 * | consts (block size * 2)
31 * +------------------------
32 */
33struct cmac_tfm_ctx {
34 struct crypto_cipher *child;
35 u8 ctx[];
36};
37
38/*
39 * +------------------------
40 * | <shash desc>
41 * +------------------------
42 * | cmac_desc_ctx
43 * +------------------------
44 * | odds (block size)
45 * +------------------------
46 * | prev (block size)
47 * +------------------------
48 */
49struct cmac_desc_ctx {
50 unsigned int len;
51 u8 ctx[];
52};
53
54static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
55 const u8 *inkey, unsigned int keylen)
56{
57 unsigned long alignmask = crypto_shash_alignmask(parent);
58 struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
59 unsigned int bs = crypto_shash_blocksize(parent);
60 __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
61 u64 _const[2];
62 int i, err = 0;
63 u8 msb_mask, gfmask;
64
65 err = crypto_cipher_setkey(ctx->child, inkey, keylen);
66 if (err)
67 return err;
68
69 /* encrypt the zero block */
70 memset(consts, 0, bs);
71 crypto_cipher_encrypt_one(ctx->child, (u8 *)consts, (u8 *)consts);
72
73 switch (bs) {
74 case 16:
75 gfmask = 0x87;
76 _const[0] = be64_to_cpu(consts[1]);
77 _const[1] = be64_to_cpu(consts[0]);
78
79 /* gf(2^128) multiply zero-ciphertext with u and u^2 */
80 for (i = 0; i < 4; i += 2) {
81 msb_mask = ((s64)_const[1] >> 63) & gfmask;
82 _const[1] = (_const[1] << 1) | (_const[0] >> 63);
83 _const[0] = (_const[0] << 1) ^ msb_mask;
84
85 consts[i + 0] = cpu_to_be64(_const[1]);
86 consts[i + 1] = cpu_to_be64(_const[0]);
87 }
88
89 break;
90 case 8:
91 gfmask = 0x1B;
92 _const[0] = be64_to_cpu(consts[0]);
93
94 /* gf(2^64) multiply zero-ciphertext with u and u^2 */
95 for (i = 0; i < 2; i++) {
96 msb_mask = ((s64)_const[0] >> 63) & gfmask;
97 _const[0] = (_const[0] << 1) ^ msb_mask;
98
99 consts[i] = cpu_to_be64(_const[0]);
100 }
101
102 break;
103 }
104
105 return 0;
106}
107
108static int crypto_cmac_digest_init(struct shash_desc *pdesc)
109{
110 unsigned long alignmask = crypto_shash_alignmask(pdesc->tfm);
111 struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
112 int bs = crypto_shash_blocksize(pdesc->tfm);
113 u8 *prev = PTR_ALIGN((void *)ctx->ctx, alignmask + 1) + bs;
114
115 ctx->len = 0;
116 memset(prev, 0, bs);
117
118 return 0;
119}
120
121static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p,
122 unsigned int len)
123{
124 struct crypto_shash *parent = pdesc->tfm;
125 unsigned long alignmask = crypto_shash_alignmask(parent);
126 struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
127 struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
128 struct crypto_cipher *tfm = tctx->child;
129 int bs = crypto_shash_blocksize(parent);
130 u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
131 u8 *prev = odds + bs;
132
133 /* checking the data can fill the block */
134 if ((ctx->len + len) <= bs) {
135 memcpy(odds + ctx->len, p, len);
136 ctx->len += len;
137 return 0;
138 }
139
140 /* filling odds with new data and encrypting it */
141 memcpy(odds + ctx->len, p, bs - ctx->len);
142 len -= bs - ctx->len;
143 p += bs - ctx->len;
144
145 crypto_xor(prev, odds, bs);
146 crypto_cipher_encrypt_one(tfm, prev, prev);
147
148 /* clearing the length */
149 ctx->len = 0;
150
151 /* encrypting the rest of data */
152 while (len > bs) {
153 crypto_xor(prev, p, bs);
154 crypto_cipher_encrypt_one(tfm, prev, prev);
155 p += bs;
156 len -= bs;
157 }
158
159 /* keeping the surplus of blocksize */
160 if (len) {
161 memcpy(odds, p, len);
162 ctx->len = len;
163 }
164
165 return 0;
166}
167
168static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out)
169{
170 struct crypto_shash *parent = pdesc->tfm;
171 unsigned long alignmask = crypto_shash_alignmask(parent);
172 struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
173 struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
174 struct crypto_cipher *tfm = tctx->child;
175 int bs = crypto_shash_blocksize(parent);
176 u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1);
177 u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
178 u8 *prev = odds + bs;
179 unsigned int offset = 0;
180
181 if (ctx->len != bs) {
182 unsigned int rlen;
183 u8 *p = odds + ctx->len;
184
185 *p = 0x80;
186 p++;
187
188 rlen = bs - ctx->len - 1;
189 if (rlen)
190 memset(p, 0, rlen);
191
192 offset += bs;
193 }
194
195 crypto_xor(prev, odds, bs);
196 crypto_xor(prev, consts + offset, bs);
197
198 crypto_cipher_encrypt_one(tfm, out, prev);
199
200 return 0;
201}
202
203static int cmac_init_tfm(struct crypto_tfm *tfm)
204{
205 struct crypto_cipher *cipher;
206 struct crypto_instance *inst = (void *)tfm->__crt_alg;
207 struct crypto_spawn *spawn = crypto_instance_ctx(inst);
208 struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
209
210 cipher = crypto_spawn_cipher(spawn);
211 if (IS_ERR(cipher))
212 return PTR_ERR(cipher);
213
214 ctx->child = cipher;
215
216 return 0;
217};
218
219static void cmac_exit_tfm(struct crypto_tfm *tfm)
220{
221 struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
222 crypto_free_cipher(ctx->child);
223}
224
225static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
226{
227 struct shash_instance *inst;
228 struct crypto_alg *alg;
229 unsigned long alignmask;
230 int err;
231
232 err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
233 if (err)
234 return err;
235
236 alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
237 CRYPTO_ALG_TYPE_MASK);
238 if (IS_ERR(alg))
239 return PTR_ERR(alg);
240
241 switch (alg->cra_blocksize) {
242 case 16:
243 case 8:
244 break;
245 default:
246 goto out_put_alg;
247 }
248
249 inst = shash_alloc_instance("cmac", alg);
250 err = PTR_ERR(inst);
251 if (IS_ERR(inst))
252 goto out_put_alg;
253
254 err = crypto_init_spawn(shash_instance_ctx(inst), alg,
255 shash_crypto_instance(inst),
256 CRYPTO_ALG_TYPE_MASK);
257 if (err)
258 goto out_free_inst;
259
260 alignmask = alg->cra_alignmask | (sizeof(long) - 1);
261 inst->alg.base.cra_alignmask = alignmask;
262 inst->alg.base.cra_priority = alg->cra_priority;
263 inst->alg.base.cra_blocksize = alg->cra_blocksize;
264
265 inst->alg.digestsize = alg->cra_blocksize;
266 inst->alg.descsize =
267 ALIGN(sizeof(struct cmac_desc_ctx), crypto_tfm_ctx_alignment())
268 + (alignmask & ~(crypto_tfm_ctx_alignment() - 1))
269 + alg->cra_blocksize * 2;
270
271 inst->alg.base.cra_ctxsize =
272 ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1)
273 + alg->cra_blocksize * 2;
274
275 inst->alg.base.cra_init = cmac_init_tfm;
276 inst->alg.base.cra_exit = cmac_exit_tfm;
277
278 inst->alg.init = crypto_cmac_digest_init;
279 inst->alg.update = crypto_cmac_digest_update;
280 inst->alg.final = crypto_cmac_digest_final;
281 inst->alg.setkey = crypto_cmac_digest_setkey;
282
283 err = shash_register_instance(tmpl, inst);
284 if (err) {
285out_free_inst:
286 shash_free_instance(shash_crypto_instance(inst));
287 }
288
289out_put_alg:
290 crypto_mod_put(alg);
291 return err;
292}
293
294static struct crypto_template crypto_cmac_tmpl = {
295 .name = "cmac",
296 .create = cmac_create,
297 .free = shash_free_instance,
298 .module = THIS_MODULE,
299};
300
301static int __init crypto_cmac_module_init(void)
302{
303 return crypto_register_template(&crypto_cmac_tmpl);
304}
305
306static void __exit crypto_cmac_module_exit(void)
307{
308 crypto_unregister_template(&crypto_cmac_tmpl);
309}
310
311module_init(crypto_cmac_module_init);
312module_exit(crypto_cmac_module_exit);
313
314MODULE_LICENSE("GPL");
315MODULE_DESCRIPTION("CMAC keyed hash algorithm");
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index dfd511fb39ee..1512e41cd93d 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -440,7 +440,7 @@ static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
440 440
441#undef MSGSIZE 441#undef MSGSIZE
442 442
443static struct crypto_link { 443static const struct crypto_link {
444 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); 444 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
445 int (*dump)(struct sk_buff *, struct netlink_callback *); 445 int (*dump)(struct sk_buff *, struct netlink_callback *);
446 int (*done)(struct netlink_callback *); 446 int (*done)(struct netlink_callback *);
@@ -456,7 +456,7 @@ static struct crypto_link {
456static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 456static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
457{ 457{
458 struct nlattr *attrs[CRYPTOCFGA_MAX+1]; 458 struct nlattr *attrs[CRYPTOCFGA_MAX+1];
459 struct crypto_link *link; 459 const struct crypto_link *link;
460 int type, err; 460 int type, err;
461 461
462 type = nlh->nlmsg_type; 462 type = nlh->nlmsg_type;
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 13ccbda34ff9..43e1fb05ea54 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -37,8 +37,14 @@ struct crypto_rfc4106_ctx {
37 u8 nonce[4]; 37 u8 nonce[4];
38}; 38};
39 39
40struct crypto_rfc4543_instance_ctx {
41 struct crypto_aead_spawn aead;
42 struct crypto_skcipher_spawn null;
43};
44
40struct crypto_rfc4543_ctx { 45struct crypto_rfc4543_ctx {
41 struct crypto_aead *child; 46 struct crypto_aead *child;
47 struct crypto_blkcipher *null;
42 u8 nonce[4]; 48 u8 nonce[4];
43}; 49};
44 50
@@ -1094,21 +1100,36 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
1094 return crypto_aead_setauthsize(ctx->child, authsize); 1100 return crypto_aead_setauthsize(ctx->child, authsize);
1095} 1101}
1096 1102
1103static void crypto_rfc4543_done(struct crypto_async_request *areq, int err)
1104{
1105 struct aead_request *req = areq->data;
1106 struct crypto_aead *aead = crypto_aead_reqtfm(req);
1107 struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
1108
1109 if (!err) {
1110 scatterwalk_map_and_copy(rctx->auth_tag, req->dst,
1111 req->cryptlen,
1112 crypto_aead_authsize(aead), 1);
1113 }
1114
1115 aead_request_complete(req, err);
1116}
1117
1097static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, 1118static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
1098 int enc) 1119 bool enc)
1099{ 1120{
1100 struct crypto_aead *aead = crypto_aead_reqtfm(req); 1121 struct crypto_aead *aead = crypto_aead_reqtfm(req);
1101 struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead); 1122 struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
1102 struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); 1123 struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
1103 struct aead_request *subreq = &rctx->subreq; 1124 struct aead_request *subreq = &rctx->subreq;
1104 struct scatterlist *dst = req->dst; 1125 struct scatterlist *src = req->src;
1105 struct scatterlist *cipher = rctx->cipher; 1126 struct scatterlist *cipher = rctx->cipher;
1106 struct scatterlist *payload = rctx->payload; 1127 struct scatterlist *payload = rctx->payload;
1107 struct scatterlist *assoc = rctx->assoc; 1128 struct scatterlist *assoc = rctx->assoc;
1108 unsigned int authsize = crypto_aead_authsize(aead); 1129 unsigned int authsize = crypto_aead_authsize(aead);
1109 unsigned int assoclen = req->assoclen; 1130 unsigned int assoclen = req->assoclen;
1110 struct page *dstp; 1131 struct page *srcp;
1111 u8 *vdst; 1132 u8 *vsrc;
1112 u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child), 1133 u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child),
1113 crypto_aead_alignmask(ctx->child) + 1); 1134 crypto_aead_alignmask(ctx->child) + 1);
1114 1135
@@ -1119,19 +1140,19 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
1119 if (enc) 1140 if (enc)
1120 memset(rctx->auth_tag, 0, authsize); 1141 memset(rctx->auth_tag, 0, authsize);
1121 else 1142 else
1122 scatterwalk_map_and_copy(rctx->auth_tag, dst, 1143 scatterwalk_map_and_copy(rctx->auth_tag, src,
1123 req->cryptlen - authsize, 1144 req->cryptlen - authsize,
1124 authsize, 0); 1145 authsize, 0);
1125 1146
1126 sg_init_one(cipher, rctx->auth_tag, authsize); 1147 sg_init_one(cipher, rctx->auth_tag, authsize);
1127 1148
1128 /* construct the aad */ 1149 /* construct the aad */
1129 dstp = sg_page(dst); 1150 srcp = sg_page(src);
1130 vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + dst->offset; 1151 vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + src->offset;
1131 1152
1132 sg_init_table(payload, 2); 1153 sg_init_table(payload, 2);
1133 sg_set_buf(payload, req->iv, 8); 1154 sg_set_buf(payload, req->iv, 8);
1134 scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); 1155 scatterwalk_crypto_chain(payload, src, vsrc == req->iv + 8, 2);
1135 assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); 1156 assoclen += 8 + req->cryptlen - (enc ? 0 : authsize);
1136 1157
1137 if (req->assoc->length == req->assoclen) { 1158 if (req->assoc->length == req->assoclen) {
@@ -1150,14 +1171,27 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
1150 scatterwalk_crypto_chain(assoc, payload, 0, 2); 1171 scatterwalk_crypto_chain(assoc, payload, 0, 2);
1151 1172
1152 aead_request_set_tfm(subreq, ctx->child); 1173 aead_request_set_tfm(subreq, ctx->child);
1153 aead_request_set_callback(subreq, req->base.flags, req->base.complete, 1174 aead_request_set_callback(subreq, req->base.flags, crypto_rfc4543_done,
1154 req->base.data); 1175 req);
1155 aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv); 1176 aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv);
1156 aead_request_set_assoc(subreq, assoc, assoclen); 1177 aead_request_set_assoc(subreq, assoc, assoclen);
1157 1178
1158 return subreq; 1179 return subreq;
1159} 1180}
1160 1181
1182static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
1183{
1184 struct crypto_aead *aead = crypto_aead_reqtfm(req);
1185 struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
1186 unsigned int authsize = crypto_aead_authsize(aead);
1187 unsigned int nbytes = req->cryptlen - (enc ? 0 : authsize);
1188 struct blkcipher_desc desc = {
1189 .tfm = ctx->null,
1190 };
1191
1192 return crypto_blkcipher_encrypt(&desc, req->dst, req->src, nbytes);
1193}
1194
1161static int crypto_rfc4543_encrypt(struct aead_request *req) 1195static int crypto_rfc4543_encrypt(struct aead_request *req)
1162{ 1196{
1163 struct crypto_aead *aead = crypto_aead_reqtfm(req); 1197 struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -1165,7 +1199,13 @@ static int crypto_rfc4543_encrypt(struct aead_request *req)
1165 struct aead_request *subreq; 1199 struct aead_request *subreq;
1166 int err; 1200 int err;
1167 1201
1168 subreq = crypto_rfc4543_crypt(req, 1); 1202 if (req->src != req->dst) {
1203 err = crypto_rfc4543_copy_src_to_dst(req, true);
1204 if (err)
1205 return err;
1206 }
1207
1208 subreq = crypto_rfc4543_crypt(req, true);
1169 err = crypto_aead_encrypt(subreq); 1209 err = crypto_aead_encrypt(subreq);
1170 if (err) 1210 if (err)
1171 return err; 1211 return err;
@@ -1178,7 +1218,15 @@ static int crypto_rfc4543_encrypt(struct aead_request *req)
1178 1218
1179static int crypto_rfc4543_decrypt(struct aead_request *req) 1219static int crypto_rfc4543_decrypt(struct aead_request *req)
1180{ 1220{
1181 req = crypto_rfc4543_crypt(req, 0); 1221 int err;
1222
1223 if (req->src != req->dst) {
1224 err = crypto_rfc4543_copy_src_to_dst(req, false);
1225 if (err)
1226 return err;
1227 }
1228
1229 req = crypto_rfc4543_crypt(req, false);
1182 1230
1183 return crypto_aead_decrypt(req); 1231 return crypto_aead_decrypt(req);
1184} 1232}
@@ -1186,16 +1234,25 @@ static int crypto_rfc4543_decrypt(struct aead_request *req)
1186static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) 1234static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
1187{ 1235{
1188 struct crypto_instance *inst = (void *)tfm->__crt_alg; 1236 struct crypto_instance *inst = (void *)tfm->__crt_alg;
1189 struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst); 1237 struct crypto_rfc4543_instance_ctx *ictx = crypto_instance_ctx(inst);
1238 struct crypto_aead_spawn *spawn = &ictx->aead;
1190 struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); 1239 struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
1191 struct crypto_aead *aead; 1240 struct crypto_aead *aead;
1241 struct crypto_blkcipher *null;
1192 unsigned long align; 1242 unsigned long align;
1243 int err = 0;
1193 1244
1194 aead = crypto_spawn_aead(spawn); 1245 aead = crypto_spawn_aead(spawn);
1195 if (IS_ERR(aead)) 1246 if (IS_ERR(aead))
1196 return PTR_ERR(aead); 1247 return PTR_ERR(aead);
1197 1248
1249 null = crypto_spawn_blkcipher(&ictx->null.base);
1250 err = PTR_ERR(null);
1251 if (IS_ERR(null))
1252 goto err_free_aead;
1253
1198 ctx->child = aead; 1254 ctx->child = aead;
1255 ctx->null = null;
1199 1256
1200 align = crypto_aead_alignmask(aead); 1257 align = crypto_aead_alignmask(aead);
1201 align &= ~(crypto_tfm_ctx_alignment() - 1); 1258 align &= ~(crypto_tfm_ctx_alignment() - 1);
@@ -1205,6 +1262,10 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
1205 align + 16; 1262 align + 16;
1206 1263
1207 return 0; 1264 return 0;
1265
1266err_free_aead:
1267 crypto_free_aead(aead);
1268 return err;
1208} 1269}
1209 1270
1210static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) 1271static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm)
@@ -1212,6 +1273,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm)
1212 struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); 1273 struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
1213 1274
1214 crypto_free_aead(ctx->child); 1275 crypto_free_aead(ctx->child);
1276 crypto_free_blkcipher(ctx->null);
1215} 1277}
1216 1278
1217static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) 1279static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
@@ -1220,6 +1282,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
1220 struct crypto_instance *inst; 1282 struct crypto_instance *inst;
1221 struct crypto_aead_spawn *spawn; 1283 struct crypto_aead_spawn *spawn;
1222 struct crypto_alg *alg; 1284 struct crypto_alg *alg;
1285 struct crypto_rfc4543_instance_ctx *ctx;
1223 const char *ccm_name; 1286 const char *ccm_name;
1224 int err; 1287 int err;
1225 1288
@@ -1234,11 +1297,12 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
1234 if (IS_ERR(ccm_name)) 1297 if (IS_ERR(ccm_name))
1235 return ERR_CAST(ccm_name); 1298 return ERR_CAST(ccm_name);
1236 1299
1237 inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); 1300 inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
1238 if (!inst) 1301 if (!inst)
1239 return ERR_PTR(-ENOMEM); 1302 return ERR_PTR(-ENOMEM);
1240 1303
1241 spawn = crypto_instance_ctx(inst); 1304 ctx = crypto_instance_ctx(inst);
1305 spawn = &ctx->aead;
1242 crypto_set_aead_spawn(spawn, inst); 1306 crypto_set_aead_spawn(spawn, inst);
1243 err = crypto_grab_aead(spawn, ccm_name, 0, 1307 err = crypto_grab_aead(spawn, ccm_name, 0,
1244 crypto_requires_sync(algt->type, algt->mask)); 1308 crypto_requires_sync(algt->type, algt->mask));
@@ -1247,15 +1311,23 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
1247 1311
1248 alg = crypto_aead_spawn_alg(spawn); 1312 alg = crypto_aead_spawn_alg(spawn);
1249 1313
1314 crypto_set_skcipher_spawn(&ctx->null, inst);
1315 err = crypto_grab_skcipher(&ctx->null, "ecb(cipher_null)", 0,
1316 CRYPTO_ALG_ASYNC);
1317 if (err)
1318 goto out_drop_alg;
1319
1320 crypto_skcipher_spawn_alg(&ctx->null);
1321
1250 err = -EINVAL; 1322 err = -EINVAL;
1251 1323
1252 /* We only support 16-byte blocks. */ 1324 /* We only support 16-byte blocks. */
1253 if (alg->cra_aead.ivsize != 16) 1325 if (alg->cra_aead.ivsize != 16)
1254 goto out_drop_alg; 1326 goto out_drop_ecbnull;
1255 1327
1256 /* Not a stream cipher? */ 1328 /* Not a stream cipher? */
1257 if (alg->cra_blocksize != 1) 1329 if (alg->cra_blocksize != 1)
1258 goto out_drop_alg; 1330 goto out_drop_ecbnull;
1259 1331
1260 err = -ENAMETOOLONG; 1332 err = -ENAMETOOLONG;
1261 if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, 1333 if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
@@ -1263,7 +1335,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
1263 snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, 1335 snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
1264 "rfc4543(%s)", alg->cra_driver_name) >= 1336 "rfc4543(%s)", alg->cra_driver_name) >=
1265 CRYPTO_MAX_ALG_NAME) 1337 CRYPTO_MAX_ALG_NAME)
1266 goto out_drop_alg; 1338 goto out_drop_ecbnull;
1267 1339
1268 inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD; 1340 inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
1269 inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; 1341 inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
@@ -1290,6 +1362,8 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
1290out: 1362out:
1291 return inst; 1363 return inst;
1292 1364
1365out_drop_ecbnull:
1366 crypto_drop_skcipher(&ctx->null);
1293out_drop_alg: 1367out_drop_alg:
1294 crypto_drop_aead(spawn); 1368 crypto_drop_aead(spawn);
1295out_free_inst: 1369out_free_inst:
@@ -1300,7 +1374,11 @@ out_free_inst:
1300 1374
1301static void crypto_rfc4543_free(struct crypto_instance *inst) 1375static void crypto_rfc4543_free(struct crypto_instance *inst)
1302{ 1376{
1303 crypto_drop_spawn(crypto_instance_ctx(inst)); 1377 struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst);
1378
1379 crypto_drop_aead(&ctx->aead);
1380 crypto_drop_skcipher(&ctx->null);
1381
1304 kfree(inst); 1382 kfree(inst);
1305} 1383}
1306 1384
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index c3ed4ec924e1..543366779524 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -246,7 +246,7 @@ static int sha256_init(struct shash_desc *desc)
246 return 0; 246 return 0;
247} 247}
248 248
249static int sha256_update(struct shash_desc *desc, const u8 *data, 249int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
250 unsigned int len) 250 unsigned int len)
251{ 251{
252 struct sha256_state *sctx = shash_desc_ctx(desc); 252 struct sha256_state *sctx = shash_desc_ctx(desc);
@@ -277,6 +277,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
277 277
278 return 0; 278 return 0;
279} 279}
280EXPORT_SYMBOL(crypto_sha256_update);
280 281
281static int sha256_final(struct shash_desc *desc, u8 *out) 282static int sha256_final(struct shash_desc *desc, u8 *out)
282{ 283{
@@ -293,10 +294,10 @@ static int sha256_final(struct shash_desc *desc, u8 *out)
293 /* Pad out to 56 mod 64. */ 294 /* Pad out to 56 mod 64. */
294 index = sctx->count & 0x3f; 295 index = sctx->count & 0x3f;
295 pad_len = (index < 56) ? (56 - index) : ((64+56) - index); 296 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
296 sha256_update(desc, padding, pad_len); 297 crypto_sha256_update(desc, padding, pad_len);
297 298
298 /* Append length (before padding) */ 299 /* Append length (before padding) */
299 sha256_update(desc, (const u8 *)&bits, sizeof(bits)); 300 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
300 301
301 /* Store state in digest */ 302 /* Store state in digest */
302 for (i = 0; i < 8; i++) 303 for (i = 0; i < 8; i++)
@@ -339,7 +340,7 @@ static int sha256_import(struct shash_desc *desc, const void *in)
339static struct shash_alg sha256_algs[2] = { { 340static struct shash_alg sha256_algs[2] = { {
340 .digestsize = SHA256_DIGEST_SIZE, 341 .digestsize = SHA256_DIGEST_SIZE,
341 .init = sha256_init, 342 .init = sha256_init,
342 .update = sha256_update, 343 .update = crypto_sha256_update,
343 .final = sha256_final, 344 .final = sha256_final,
344 .export = sha256_export, 345 .export = sha256_export,
345 .import = sha256_import, 346 .import = sha256_import,
@@ -355,7 +356,7 @@ static struct shash_alg sha256_algs[2] = { {
355}, { 356}, {
356 .digestsize = SHA224_DIGEST_SIZE, 357 .digestsize = SHA224_DIGEST_SIZE,
357 .init = sha224_init, 358 .init = sha224_init,
358 .update = sha256_update, 359 .update = crypto_sha256_update,
359 .final = sha224_final, 360 .final = sha224_final,
360 .descsize = sizeof(struct sha256_state), 361 .descsize = sizeof(struct sha256_state),
361 .base = { 362 .base = {
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 71fcf361102d..4c5862095679 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -163,8 +163,8 @@ sha384_init(struct shash_desc *desc)
163 return 0; 163 return 0;
164} 164}
165 165
166static int 166int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
167sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) 167 unsigned int len)
168{ 168{
169 struct sha512_state *sctx = shash_desc_ctx(desc); 169 struct sha512_state *sctx = shash_desc_ctx(desc);
170 170
@@ -197,6 +197,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len)
197 197
198 return 0; 198 return 0;
199} 199}
200EXPORT_SYMBOL(crypto_sha512_update);
200 201
201static int 202static int
202sha512_final(struct shash_desc *desc, u8 *hash) 203sha512_final(struct shash_desc *desc, u8 *hash)
@@ -215,10 +216,10 @@ sha512_final(struct shash_desc *desc, u8 *hash)
215 /* Pad out to 112 mod 128. */ 216 /* Pad out to 112 mod 128. */
216 index = sctx->count[0] & 0x7f; 217 index = sctx->count[0] & 0x7f;
217 pad_len = (index < 112) ? (112 - index) : ((128+112) - index); 218 pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
218 sha512_update(desc, padding, pad_len); 219 crypto_sha512_update(desc, padding, pad_len);
219 220
220 /* Append length (before padding) */ 221 /* Append length (before padding) */
221 sha512_update(desc, (const u8 *)bits, sizeof(bits)); 222 crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits));
222 223
223 /* Store state in digest */ 224 /* Store state in digest */
224 for (i = 0; i < 8; i++) 225 for (i = 0; i < 8; i++)
@@ -245,7 +246,7 @@ static int sha384_final(struct shash_desc *desc, u8 *hash)
245static struct shash_alg sha512_algs[2] = { { 246static struct shash_alg sha512_algs[2] = { {
246 .digestsize = SHA512_DIGEST_SIZE, 247 .digestsize = SHA512_DIGEST_SIZE,
247 .init = sha512_init, 248 .init = sha512_init,
248 .update = sha512_update, 249 .update = crypto_sha512_update,
249 .final = sha512_final, 250 .final = sha512_final,
250 .descsize = sizeof(struct sha512_state), 251 .descsize = sizeof(struct sha512_state),
251 .base = { 252 .base = {
@@ -257,7 +258,7 @@ static struct shash_alg sha512_algs[2] = { {
257}, { 258}, {
258 .digestsize = SHA384_DIGEST_SIZE, 259 .digestsize = SHA384_DIGEST_SIZE,
259 .init = sha384_init, 260 .init = sha384_init,
260 .update = sha512_update, 261 .update = crypto_sha512_update,
261 .final = sha384_final, 262 .final = sha384_final,
262 .descsize = sizeof(struct sha512_state), 263 .descsize = sizeof(struct sha512_state),
263 .base = { 264 .base = {
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 87ef7d66bc20..66d254ce0d11 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1095,7 +1095,6 @@ static int do_test(int m)
1095 break; 1095 break;
1096 1096
1097 case 28: 1097 case 28:
1098
1099 ret += tcrypt_test("tgr160"); 1098 ret += tcrypt_test("tgr160");
1100 break; 1099 break;
1101 1100
@@ -1118,6 +1117,7 @@ static int do_test(int m)
1118 ret += tcrypt_test("lrw(camellia)"); 1117 ret += tcrypt_test("lrw(camellia)");
1119 ret += tcrypt_test("xts(camellia)"); 1118 ret += tcrypt_test("xts(camellia)");
1120 break; 1119 break;
1120
1121 case 33: 1121 case 33:
1122 ret += tcrypt_test("sha224"); 1122 ret += tcrypt_test("sha224");
1123 break; 1123 break;
@@ -1213,6 +1213,7 @@ static int do_test(int m)
1213 case 109: 1213 case 109:
1214 ret += tcrypt_test("vmac(aes)"); 1214 ret += tcrypt_test("vmac(aes)");
1215 break; 1215 break;
1216
1216 case 110: 1217 case 110:
1217 ret += tcrypt_test("hmac(crc32)"); 1218 ret += tcrypt_test("hmac(crc32)");
1218 break; 1219 break;
@@ -1225,6 +1226,18 @@ static int do_test(int m)
1225 ret += tcrypt_test("rfc4106(gcm(aes))"); 1226 ret += tcrypt_test("rfc4106(gcm(aes))");
1226 break; 1227 break;
1227 1228
1229 case 152:
1230 ret += tcrypt_test("rfc4543(gcm(aes))");
1231 break;
1232
1233 case 153:
1234 ret += tcrypt_test("cmac(aes)");
1235 break;
1236
1237 case 154:
1238 ret += tcrypt_test("cmac(des3_ede)");
1239 break;
1240
1228 case 200: 1241 case 200:
1229 test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, 1242 test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
1230 speed_template_16_24_32); 1243 speed_template_16_24_32);
@@ -1755,6 +1768,21 @@ static int do_test(int m)
1755 speed_template_32_64); 1768 speed_template_32_64);
1756 break; 1769 break;
1757 1770
1771 case 509:
1772 test_acipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0,
1773 speed_template_8_32);
1774 test_acipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0,
1775 speed_template_8_32);
1776 test_acipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0,
1777 speed_template_8_32);
1778 test_acipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
1779 speed_template_8_32);
1780 test_acipher_speed("ctr(blowfish)", ENCRYPT, sec, NULL, 0,
1781 speed_template_8_32);
1782 test_acipher_speed("ctr(blowfish)", DECRYPT, sec, NULL, 0,
1783 speed_template_8_32);
1784 break;
1785
1758 case 1000: 1786 case 1000:
1759 test_available(); 1787 test_available();
1760 break; 1788 break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index efd8b20e13dc..5823735cf381 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1645,19 +1645,31 @@ static const struct alg_test_desc alg_test_descs[] = {
1645 .alg = "__cbc-serpent-avx", 1645 .alg = "__cbc-serpent-avx",
1646 .test = alg_test_null, 1646 .test = alg_test_null,
1647 }, { 1647 }, {
1648 .alg = "__cbc-serpent-avx2",
1649 .test = alg_test_null,
1650 }, {
1648 .alg = "__cbc-serpent-sse2", 1651 .alg = "__cbc-serpent-sse2",
1649 .test = alg_test_null, 1652 .test = alg_test_null,
1650 }, { 1653 }, {
1651 .alg = "__cbc-twofish-avx", 1654 .alg = "__cbc-twofish-avx",
1652 .test = alg_test_null, 1655 .test = alg_test_null,
1653 }, { 1656 }, {
1657 .alg = "__cbc-twofish-avx2",
1658 .test = alg_test_null,
1659 }, {
1654 .alg = "__driver-cbc-aes-aesni", 1660 .alg = "__driver-cbc-aes-aesni",
1655 .test = alg_test_null, 1661 .test = alg_test_null,
1656 .fips_allowed = 1, 1662 .fips_allowed = 1,
1657 }, { 1663 }, {
1664 .alg = "__driver-cbc-blowfish-avx2",
1665 .test = alg_test_null,
1666 }, {
1658 .alg = "__driver-cbc-camellia-aesni", 1667 .alg = "__driver-cbc-camellia-aesni",
1659 .test = alg_test_null, 1668 .test = alg_test_null,
1660 }, { 1669 }, {
1670 .alg = "__driver-cbc-camellia-aesni-avx2",
1671 .test = alg_test_null,
1672 }, {
1661 .alg = "__driver-cbc-cast5-avx", 1673 .alg = "__driver-cbc-cast5-avx",
1662 .test = alg_test_null, 1674 .test = alg_test_null,
1663 }, { 1675 }, {
@@ -1667,19 +1679,31 @@ static const struct alg_test_desc alg_test_descs[] = {
1667 .alg = "__driver-cbc-serpent-avx", 1679 .alg = "__driver-cbc-serpent-avx",
1668 .test = alg_test_null, 1680 .test = alg_test_null,
1669 }, { 1681 }, {
1682 .alg = "__driver-cbc-serpent-avx2",
1683 .test = alg_test_null,
1684 }, {
1670 .alg = "__driver-cbc-serpent-sse2", 1685 .alg = "__driver-cbc-serpent-sse2",
1671 .test = alg_test_null, 1686 .test = alg_test_null,
1672 }, { 1687 }, {
1673 .alg = "__driver-cbc-twofish-avx", 1688 .alg = "__driver-cbc-twofish-avx",
1674 .test = alg_test_null, 1689 .test = alg_test_null,
1675 }, { 1690 }, {
1691 .alg = "__driver-cbc-twofish-avx2",
1692 .test = alg_test_null,
1693 }, {
1676 .alg = "__driver-ecb-aes-aesni", 1694 .alg = "__driver-ecb-aes-aesni",
1677 .test = alg_test_null, 1695 .test = alg_test_null,
1678 .fips_allowed = 1, 1696 .fips_allowed = 1,
1679 }, { 1697 }, {
1698 .alg = "__driver-ecb-blowfish-avx2",
1699 .test = alg_test_null,
1700 }, {
1680 .alg = "__driver-ecb-camellia-aesni", 1701 .alg = "__driver-ecb-camellia-aesni",
1681 .test = alg_test_null, 1702 .test = alg_test_null,
1682 }, { 1703 }, {
1704 .alg = "__driver-ecb-camellia-aesni-avx2",
1705 .test = alg_test_null,
1706 }, {
1683 .alg = "__driver-ecb-cast5-avx", 1707 .alg = "__driver-ecb-cast5-avx",
1684 .test = alg_test_null, 1708 .test = alg_test_null,
1685 }, { 1709 }, {
@@ -1689,12 +1713,18 @@ static const struct alg_test_desc alg_test_descs[] = {
1689 .alg = "__driver-ecb-serpent-avx", 1713 .alg = "__driver-ecb-serpent-avx",
1690 .test = alg_test_null, 1714 .test = alg_test_null,
1691 }, { 1715 }, {
1716 .alg = "__driver-ecb-serpent-avx2",
1717 .test = alg_test_null,
1718 }, {
1692 .alg = "__driver-ecb-serpent-sse2", 1719 .alg = "__driver-ecb-serpent-sse2",
1693 .test = alg_test_null, 1720 .test = alg_test_null,
1694 }, { 1721 }, {
1695 .alg = "__driver-ecb-twofish-avx", 1722 .alg = "__driver-ecb-twofish-avx",
1696 .test = alg_test_null, 1723 .test = alg_test_null,
1697 }, { 1724 }, {
1725 .alg = "__driver-ecb-twofish-avx2",
1726 .test = alg_test_null,
1727 }, {
1698 .alg = "__ghash-pclmulqdqni", 1728 .alg = "__ghash-pclmulqdqni",
1699 .test = alg_test_null, 1729 .test = alg_test_null,
1700 .fips_allowed = 1, 1730 .fips_allowed = 1,
@@ -1913,6 +1943,27 @@ static const struct alg_test_desc alg_test_descs[] = {
1913 } 1943 }
1914 } 1944 }
1915 }, { 1945 }, {
1946 .alg = "cmac(aes)",
1947 .test = alg_test_hash,
1948 .suite = {
1949 .hash = {
1950 .vecs = aes_cmac128_tv_template,
1951 .count = CMAC_AES_TEST_VECTORS
1952 }
1953 }
1954 }, {
1955 .alg = "cmac(des3_ede)",
1956 .test = alg_test_hash,
1957 .suite = {
1958 .hash = {
1959 .vecs = des3_ede_cmac64_tv_template,
1960 .count = CMAC_DES3_EDE_TEST_VECTORS
1961 }
1962 }
1963 }, {
1964 .alg = "compress_null",
1965 .test = alg_test_null,
1966 }, {
1916 .alg = "crc32c", 1967 .alg = "crc32c",
1917 .test = alg_test_crc32c, 1968 .test = alg_test_crc32c,
1918 .fips_allowed = 1, 1969 .fips_allowed = 1,
@@ -1927,16 +1978,31 @@ static const struct alg_test_desc alg_test_descs[] = {
1927 .test = alg_test_null, 1978 .test = alg_test_null,
1928 .fips_allowed = 1, 1979 .fips_allowed = 1,
1929 }, { 1980 }, {
1981 .alg = "cryptd(__driver-cbc-blowfish-avx2)",
1982 .test = alg_test_null,
1983 }, {
1930 .alg = "cryptd(__driver-cbc-camellia-aesni)", 1984 .alg = "cryptd(__driver-cbc-camellia-aesni)",
1931 .test = alg_test_null, 1985 .test = alg_test_null,
1932 }, { 1986 }, {
1987 .alg = "cryptd(__driver-cbc-camellia-aesni-avx2)",
1988 .test = alg_test_null,
1989 }, {
1990 .alg = "cryptd(__driver-cbc-serpent-avx2)",
1991 .test = alg_test_null,
1992 }, {
1933 .alg = "cryptd(__driver-ecb-aes-aesni)", 1993 .alg = "cryptd(__driver-ecb-aes-aesni)",
1934 .test = alg_test_null, 1994 .test = alg_test_null,
1935 .fips_allowed = 1, 1995 .fips_allowed = 1,
1936 }, { 1996 }, {
1997 .alg = "cryptd(__driver-ecb-blowfish-avx2)",
1998 .test = alg_test_null,
1999 }, {
1937 .alg = "cryptd(__driver-ecb-camellia-aesni)", 2000 .alg = "cryptd(__driver-ecb-camellia-aesni)",
1938 .test = alg_test_null, 2001 .test = alg_test_null,
1939 }, { 2002 }, {
2003 .alg = "cryptd(__driver-ecb-camellia-aesni-avx2)",
2004 .test = alg_test_null,
2005 }, {
1940 .alg = "cryptd(__driver-ecb-cast5-avx)", 2006 .alg = "cryptd(__driver-ecb-cast5-avx)",
1941 .test = alg_test_null, 2007 .test = alg_test_null,
1942 }, { 2008 }, {
@@ -1946,12 +2012,18 @@ static const struct alg_test_desc alg_test_descs[] = {
1946 .alg = "cryptd(__driver-ecb-serpent-avx)", 2012 .alg = "cryptd(__driver-ecb-serpent-avx)",
1947 .test = alg_test_null, 2013 .test = alg_test_null,
1948 }, { 2014 }, {
2015 .alg = "cryptd(__driver-ecb-serpent-avx2)",
2016 .test = alg_test_null,
2017 }, {
1949 .alg = "cryptd(__driver-ecb-serpent-sse2)", 2018 .alg = "cryptd(__driver-ecb-serpent-sse2)",
1950 .test = alg_test_null, 2019 .test = alg_test_null,
1951 }, { 2020 }, {
1952 .alg = "cryptd(__driver-ecb-twofish-avx)", 2021 .alg = "cryptd(__driver-ecb-twofish-avx)",
1953 .test = alg_test_null, 2022 .test = alg_test_null,
1954 }, { 2023 }, {
2024 .alg = "cryptd(__driver-ecb-twofish-avx2)",
2025 .test = alg_test_null,
2026 }, {
1955 .alg = "cryptd(__driver-gcm-aes-aesni)", 2027 .alg = "cryptd(__driver-gcm-aes-aesni)",
1956 .test = alg_test_null, 2028 .test = alg_test_null,
1957 .fips_allowed = 1, 2029 .fips_allowed = 1,
@@ -2127,6 +2199,9 @@ static const struct alg_test_desc alg_test_descs[] = {
2127 } 2199 }
2128 } 2200 }
2129 }, { 2201 }, {
2202 .alg = "digest_null",
2203 .test = alg_test_null,
2204 }, {
2130 .alg = "ecb(__aes-aesni)", 2205 .alg = "ecb(__aes-aesni)",
2131 .test = alg_test_null, 2206 .test = alg_test_null,
2132 .fips_allowed = 1, 2207 .fips_allowed = 1,
@@ -2237,6 +2312,9 @@ static const struct alg_test_desc alg_test_descs[] = {
2237 } 2312 }
2238 } 2313 }
2239 }, { 2314 }, {
2315 .alg = "ecb(cipher_null)",
2316 .test = alg_test_null,
2317 }, {
2240 .alg = "ecb(des)", 2318 .alg = "ecb(des)",
2241 .test = alg_test_skcipher, 2319 .test = alg_test_skcipher,
2242 .fips_allowed = 1, 2320 .fips_allowed = 1,
@@ -2696,8 +2774,6 @@ static const struct alg_test_desc alg_test_descs[] = {
2696 } 2774 }
2697 } 2775 }
2698 }, { 2776 }, {
2699
2700
2701 .alg = "rfc4309(ccm(aes))", 2777 .alg = "rfc4309(ccm(aes))",
2702 .test = alg_test_aead, 2778 .test = alg_test_aead,
2703 .fips_allowed = 1, 2779 .fips_allowed = 1,
@@ -2714,6 +2790,21 @@ static const struct alg_test_desc alg_test_descs[] = {
2714 } 2790 }
2715 } 2791 }
2716 }, { 2792 }, {
2793 .alg = "rfc4543(gcm(aes))",
2794 .test = alg_test_aead,
2795 .suite = {
2796 .aead = {
2797 .enc = {
2798 .vecs = aes_gcm_rfc4543_enc_tv_template,
2799 .count = AES_GCM_4543_ENC_TEST_VECTORS
2800 },
2801 .dec = {
2802 .vecs = aes_gcm_rfc4543_dec_tv_template,
2803 .count = AES_GCM_4543_DEC_TEST_VECTORS
2804 },
2805 }
2806 }
2807 }, {
2717 .alg = "rmd128", 2808 .alg = "rmd128",
2718 .test = alg_test_hash, 2809 .test = alg_test_hash,
2719 .suite = { 2810 .suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 3db1b7591559..1e701bc075b9 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1639,6 +1639,131 @@ static struct hash_testvec hmac_sha256_tv_template[] = {
1639 }, 1639 },
1640}; 1640};
1641 1641
1642#define CMAC_AES_TEST_VECTORS 6
1643
1644static struct hash_testvec aes_cmac128_tv_template[] = {
1645 { /* From NIST Special Publication 800-38B, AES-128 */
1646 .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
1647 "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
1648 .plaintext = zeroed_string,
1649 .digest = "\xbb\x1d\x69\x29\xe9\x59\x37\x28"
1650 "\x7f\xa3\x7d\x12\x9b\x75\x67\x46",
1651 .psize = 0,
1652 .ksize = 16,
1653 }, {
1654 .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
1655 "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
1656 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1657 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
1658 .digest = "\x07\x0a\x16\xb4\x6b\x4d\x41\x44"
1659 "\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c",
1660 .psize = 16,
1661 .ksize = 16,
1662 }, {
1663 .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
1664 "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
1665 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1666 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
1667 "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
1668 "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
1669 "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
1670 .digest = "\xdf\xa6\x67\x47\xde\x9a\xe6\x30"
1671 "\x30\xca\x32\x61\x14\x97\xc8\x27",
1672 .psize = 40,
1673 .ksize = 16,
1674 }, {
1675 .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
1676 "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
1677 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1678 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
1679 "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
1680 "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
1681 "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
1682 "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
1683 "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
1684 "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
1685 .digest = "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92"
1686 "\xfc\x49\x74\x17\x79\x36\x3c\xfe",
1687 .psize = 64,
1688 .ksize = 16,
1689 }, { /* From NIST Special Publication 800-38B, AES-256 */
1690 .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
1691 "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
1692 "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
1693 "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
1694 .plaintext = zeroed_string,
1695 .digest = "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e"
1696 "\xfc\x6b\x55\x1f\x46\x67\xd9\x83",
1697 .psize = 0,
1698 .ksize = 32,
1699 }, {
1700 .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
1701 "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
1702 "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
1703 "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
1704 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1705 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
1706 "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
1707 "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
1708 "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
1709 "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
1710 "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
1711 "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
1712 .digest = "\xe1\x99\x21\x90\x54\x9f\x6e\xd5"
1713 "\x69\x6a\x2c\x05\x6c\x31\x54\x10",
1714 .psize = 64,
1715 .ksize = 32,
1716 }
1717};
1718
1719#define CMAC_DES3_EDE_TEST_VECTORS 4
1720
1721static struct hash_testvec des3_ede_cmac64_tv_template[] = {
1722/*
1723 * From NIST Special Publication 800-38B, Three Key TDEA
1724 * Corrected test vectors from:
1725 * http://csrc.nist.gov/publications/nistpubs/800-38B/Updated_CMAC_Examples.pdf
1726 */
1727 {
1728 .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
1729 "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
1730 "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
1731 .plaintext = zeroed_string,
1732 .digest = "\xb7\xa6\x88\xe1\x22\xff\xaf\x95",
1733 .psize = 0,
1734 .ksize = 24,
1735 }, {
1736 .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
1737 "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
1738 "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
1739 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
1740 .digest = "\x8e\x8f\x29\x31\x36\x28\x37\x97",
1741 .psize = 8,
1742 .ksize = 24,
1743 }, {
1744 .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
1745 "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
1746 "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
1747 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1748 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
1749 "\xae\x2d\x8a\x57",
1750 .digest = "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed",
1751 .psize = 20,
1752 .ksize = 24,
1753 }, {
1754 .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
1755 "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
1756 "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
1757 .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
1758 "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
1759 "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
1760 "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
1761 .digest = "\x33\xe6\xb1\x09\x24\x00\xea\xe5",
1762 .psize = 32,
1763 .ksize = 24,
1764 }
1765};
1766
1642#define XCBC_AES_TEST_VECTORS 6 1767#define XCBC_AES_TEST_VECTORS 6
1643 1768
1644static struct hash_testvec aes_xcbc128_tv_template[] = { 1769static struct hash_testvec aes_xcbc128_tv_template[] = {
@@ -12680,6 +12805,8 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = {
12680#define AES_GCM_DEC_TEST_VECTORS 8 12805#define AES_GCM_DEC_TEST_VECTORS 8
12681#define AES_GCM_4106_ENC_TEST_VECTORS 7 12806#define AES_GCM_4106_ENC_TEST_VECTORS 7
12682#define AES_GCM_4106_DEC_TEST_VECTORS 7 12807#define AES_GCM_4106_DEC_TEST_VECTORS 7
12808#define AES_GCM_4543_ENC_TEST_VECTORS 1
12809#define AES_GCM_4543_DEC_TEST_VECTORS 2
12683#define AES_CCM_ENC_TEST_VECTORS 7 12810#define AES_CCM_ENC_TEST_VECTORS 7
12684#define AES_CCM_DEC_TEST_VECTORS 7 12811#define AES_CCM_DEC_TEST_VECTORS 7
12685#define AES_CCM_4309_ENC_TEST_VECTORS 7 12812#define AES_CCM_4309_ENC_TEST_VECTORS 7
@@ -18193,6 +18320,93 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
18193 } 18320 }
18194}; 18321};
18195 18322
18323static struct aead_testvec aes_gcm_rfc4543_enc_tv_template[] = {
18324 { /* From draft-mcgrew-gcm-test-01 */
18325 .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda"
18326 "\x90\x6a\xc7\x3c\x36\x13\xa6\x34"
18327 "\x22\x43\x3c\x64",
18328 .klen = 20,
18329 .iv = zeroed_string,
18330 .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07",
18331 .alen = 8,
18332 .input = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18333 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18334 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18335 "\x02\x00\x07\x00\x61\x62\x63\x64"
18336 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18337 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18338 "\x01\x02\x02\x01",
18339 .ilen = 52,
18340 .result = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18341 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18342 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18343 "\x02\x00\x07\x00\x61\x62\x63\x64"
18344 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18345 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18346 "\x01\x02\x02\x01\xf2\xa9\xa8\x36"
18347 "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18"
18348 "\xe4\x09\x9a\xaa",
18349 .rlen = 68,
18350 }
18351};
18352
18353static struct aead_testvec aes_gcm_rfc4543_dec_tv_template[] = {
18354 { /* From draft-mcgrew-gcm-test-01 */
18355 .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda"
18356 "\x90\x6a\xc7\x3c\x36\x13\xa6\x34"
18357 "\x22\x43\x3c\x64",
18358 .klen = 20,
18359 .iv = zeroed_string,
18360 .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07",
18361 .alen = 8,
18362 .input = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18363 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18364 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18365 "\x02\x00\x07\x00\x61\x62\x63\x64"
18366 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18367 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18368 "\x01\x02\x02\x01\xf2\xa9\xa8\x36"
18369 "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18"
18370 "\xe4\x09\x9a\xaa",
18371 .ilen = 68,
18372 .result = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18373 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18374 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18375 "\x02\x00\x07\x00\x61\x62\x63\x64"
18376 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18377 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18378 "\x01\x02\x02\x01",
18379 .rlen = 52,
18380 }, { /* nearly same as previous, but should fail */
18381 .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda"
18382 "\x90\x6a\xc7\x3c\x36\x13\xa6\x34"
18383 "\x22\x43\x3c\x64",
18384 .klen = 20,
18385 .iv = zeroed_string,
18386 .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07",
18387 .alen = 8,
18388 .input = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18389 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18390 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18391 "\x02\x00\x07\x00\x61\x62\x63\x64"
18392 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18393 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18394 "\x01\x02\x02\x01\xf2\xa9\xa8\x36"
18395 "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18"
18396 "\x00\x00\x00\x00",
18397 .ilen = 68,
18398 .novrfy = 1,
18399 .result = "\x45\x00\x00\x30\xda\x3a\x00\x00"
18400 "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
18401 "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
18402 "\x02\x00\x07\x00\x61\x62\x63\x64"
18403 "\x65\x66\x67\x68\x69\x6a\x6b\x6c"
18404 "\x6d\x6e\x6f\x70\x71\x72\x73\x74"
18405 "\x01\x02\x02\x01",
18406 .rlen = 52,
18407 },
18408};
18409
18196static struct aead_testvec aes_ccm_enc_tv_template[] = { 18410static struct aead_testvec aes_ccm_enc_tv_template[] = {
18197 { /* From RFC 3610 */ 18411 { /* From RFC 3610 */
18198 .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" 18412 .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
@@ -20783,8 +20997,72 @@ static struct cipher_testvec camellia_enc_tv_template[] = {
20783 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 20997 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
20784 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 20998 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
20785 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 20999 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
20786 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 21000 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
20787 .ilen = 496, 21001 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
21002 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
21003 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
21004 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
21005 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
21006 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
21007 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
21008 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
21009 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
21010 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
21011 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
21012 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
21013 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
21014 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
21015 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
21016 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
21017 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
21018 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
21019 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
21020 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
21021 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
21022 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
21023 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
21024 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
21025 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
21026 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
21027 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
21028 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
21029 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
21030 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
21031 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
21032 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
21033 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
21034 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
21035 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
21036 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
21037 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
21038 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
21039 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
21040 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
21041 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
21042 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
21043 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
21044 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
21045 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
21046 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
21047 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
21048 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
21049 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
21050 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
21051 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
21052 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
21053 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
21054 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
21055 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
21056 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
21057 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
21058 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
21059 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
21060 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
21061 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
21062 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
21063 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
21064 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
21065 .ilen = 1008,
20788 .result = "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA" 21066 .result = "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA"
20789 "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7" 21067 "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7"
20790 "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04" 21068 "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04"
@@ -20846,11 +21124,75 @@ static struct cipher_testvec camellia_enc_tv_template[] = {
20846 "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" 21124 "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3"
20847 "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" 21125 "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44"
20848 "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" 21126 "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4"
20849 "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB", 21127 "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB"
20850 .rlen = 496, 21128 "\xA4\xAD\xCF\x5D\xD4\x58\xC9\xCD"
21129 "\xF7\x90\x68\xCF\xC9\x11\x52\x3E"
21130 "\xE8\xA1\xA3\x78\x8B\xD0\xAC\x0A"
21131 "\xD4\xC9\xA3\xA5\x55\x30\xC8\x3E"
21132 "\xED\x28\x39\xE9\x63\xED\x41\x70"
21133 "\x51\xE3\xC4\xA0\xFC\xD5\x43\xCB"
21134 "\x4D\x65\xC8\xFD\x3A\x91\x8F\x60"
21135 "\x8A\xA6\x6D\x9D\x3E\x01\x23\x4B"
21136 "\x50\x47\xC9\xDC\x9B\xDE\x37\xC5"
21137 "\xBF\x67\xB1\x6B\x78\x38\xD5\x7E"
21138 "\xB6\xFF\x67\x83\x3B\x6E\xBE\x23"
21139 "\x45\xFA\x1D\x69\x44\xFD\xC6\xB9"
21140 "\xD0\x4A\x92\xD1\xBE\xF6\x4A\xB7"
21141 "\xCA\xA8\xA2\x9E\x13\x87\x57\x92"
21142 "\x64\x7C\x85\x0B\xB3\x29\x37\xD8"
21143 "\xE6\xAA\xAF\xC4\x03\x67\xA3\xBF"
21144 "\x2E\x45\x83\xB6\xD8\x54\x00\x89"
21145 "\xF6\xBC\x3A\x7A\x88\x58\x51\xED"
21146 "\xF4\x4E\x01\xA5\xC3\x2E\xD9\x42"
21147 "\xBD\x6E\x0D\x0B\x21\xB0\x1A\xCC"
21148 "\xA4\xD3\x3F\xDC\x9B\x81\xD8\xF1"
21149 "\xEA\x7A\x6A\xB7\x07\xC9\x6D\x91"
21150 "\x6D\x3A\xF5\x5F\xA6\xFF\x87\x1E"
21151 "\x3F\xDD\xC0\x72\xEA\xAC\x08\x15"
21152 "\x21\xE6\xC6\xB6\x0D\xD8\x51\x86"
21153 "\x2A\x03\x73\xF7\x29\xD4\xC4\xE4"
21154 "\x7F\x95\x10\xF7\xAB\x3F\x92\x23"
21155 "\xD3\xCE\x9C\x2E\x46\x3B\x63\x43"
21156 "\xBB\xC2\x82\x7A\x83\xD5\x55\xE2"
21157 "\xE7\x9B\x2F\x92\xAF\xFD\x81\x56"
21158 "\x79\xFD\x3E\xF9\x46\xE0\x25\xD4"
21159 "\x38\xDE\xBC\x2C\xC4\x7A\x2A\x8F"
21160 "\x94\x4F\xD0\xAD\x9B\x37\x18\xD4"
21161 "\x0E\x4D\x0F\x02\x3A\xDC\x5A\xA2"
21162 "\x39\x25\x55\x20\x5A\xA6\x02\x9F"
21163 "\xE6\x77\x21\x77\xE5\x4B\x7B\x0B"
21164 "\x30\xF8\x5F\x33\x0F\x49\xCD\xFF"
21165 "\xF2\xE4\x35\xF9\xF0\x63\xC3\x7E"
21166 "\xF1\xA6\x73\xB4\xDF\xE7\xBB\x78"
21167 "\xFF\x21\xA9\xF3\xF3\xCF\x5D\xBA"
21168 "\xED\x87\x98\xAC\xFE\x48\x97\x6D"
21169 "\xA6\x7F\x69\x31\xB1\xC4\xFF\x14"
21170 "\xC6\x76\xD4\x10\xDD\xF6\x49\x2C"
21171 "\x9C\xC8\x6D\x76\xC0\x8F\x5F\x55"
21172 "\x2F\x3C\x8A\x30\xAA\xC3\x16\x55"
21173 "\xC6\xFC\x8D\x8B\xB9\xE5\x80\x6C"
21174 "\xC8\x7E\xBD\x65\x58\x36\xD5\xBC"
21175 "\xF0\x33\x52\x29\x70\xF9\x5C\xE9"
21176 "\xAC\x1F\xB5\x73\x56\x66\x54\xAF"
21177 "\x1B\x8F\x7D\xED\xAB\x03\xCE\xE3"
21178 "\xAE\x47\xB6\x69\x86\xE9\x01\x31"
21179 "\x83\x18\x3D\xF4\x74\x7B\xF9\x42"
21180 "\x4C\xFD\x75\x4A\x6D\xF0\x03\xA6"
21181 "\x2B\x20\x63\xDA\x49\x65\x5E\x8B"
21182 "\xC0\x19\xE3\x8D\xD9\xF3\xB0\x34"
21183 "\xD3\x52\xFC\x68\x00\x43\x1B\x37"
21184 "\x31\x93\x51\x1C\x63\x97\x70\xB0"
21185 "\x99\x78\x83\x13\xFD\xCF\x53\x81"
21186 "\x36\x46\xB5\x42\x52\x2F\x32\xEB"
21187 "\x4A\x3D\xF1\x8F\x1C\x54\x2E\xFC"
21188 "\x41\x75\x5A\x8C\x8E\x6F\xE7\x1A"
21189 "\xAE\xEF\x3E\x82\x12\x0B\x74\x72"
21190 "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55"
21191 "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66",
21192 .rlen = 1008,
20851 .also_non_np = 1, 21193 .also_non_np = 1,
20852 .np = 2, 21194 .np = 2,
20853 .tap = { 496 - 16, 16 }, 21195 .tap = { 1008 - 16, 16 },
20854 }, 21196 },
20855}; 21197};
20856 21198
@@ -20955,8 +21297,72 @@ static struct cipher_testvec camellia_dec_tv_template[] = {
20955 "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" 21297 "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3"
20956 "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" 21298 "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44"
20957 "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" 21299 "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4"
20958 "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB", 21300 "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB"
20959 .ilen = 496, 21301 "\xA4\xAD\xCF\x5D\xD4\x58\xC9\xCD"
21302 "\xF7\x90\x68\xCF\xC9\x11\x52\x3E"
21303 "\xE8\xA1\xA3\x78\x8B\xD0\xAC\x0A"
21304 "\xD4\xC9\xA3\xA5\x55\x30\xC8\x3E"
21305 "\xED\x28\x39\xE9\x63\xED\x41\x70"
21306 "\x51\xE3\xC4\xA0\xFC\xD5\x43\xCB"
21307 "\x4D\x65\xC8\xFD\x3A\x91\x8F\x60"
21308 "\x8A\xA6\x6D\x9D\x3E\x01\x23\x4B"
21309 "\x50\x47\xC9\xDC\x9B\xDE\x37\xC5"
21310 "\xBF\x67\xB1\x6B\x78\x38\xD5\x7E"
21311 "\xB6\xFF\x67\x83\x3B\x6E\xBE\x23"
21312 "\x45\xFA\x1D\x69\x44\xFD\xC6\xB9"
21313 "\xD0\x4A\x92\xD1\xBE\xF6\x4A\xB7"
21314 "\xCA\xA8\xA2\x9E\x13\x87\x57\x92"
21315 "\x64\x7C\x85\x0B\xB3\x29\x37\xD8"
21316 "\xE6\xAA\xAF\xC4\x03\x67\xA3\xBF"
21317 "\x2E\x45\x83\xB6\xD8\x54\x00\x89"
21318 "\xF6\xBC\x3A\x7A\x88\x58\x51\xED"
21319 "\xF4\x4E\x01\xA5\xC3\x2E\xD9\x42"
21320 "\xBD\x6E\x0D\x0B\x21\xB0\x1A\xCC"
21321 "\xA4\xD3\x3F\xDC\x9B\x81\xD8\xF1"
21322 "\xEA\x7A\x6A\xB7\x07\xC9\x6D\x91"
21323 "\x6D\x3A\xF5\x5F\xA6\xFF\x87\x1E"
21324 "\x3F\xDD\xC0\x72\xEA\xAC\x08\x15"
21325 "\x21\xE6\xC6\xB6\x0D\xD8\x51\x86"
21326 "\x2A\x03\x73\xF7\x29\xD4\xC4\xE4"
21327 "\x7F\x95\x10\xF7\xAB\x3F\x92\x23"
21328 "\xD3\xCE\x9C\x2E\x46\x3B\x63\x43"
21329 "\xBB\xC2\x82\x7A\x83\xD5\x55\xE2"
21330 "\xE7\x9B\x2F\x92\xAF\xFD\x81\x56"
21331 "\x79\xFD\x3E\xF9\x46\xE0\x25\xD4"
21332 "\x38\xDE\xBC\x2C\xC4\x7A\x2A\x8F"
21333 "\x94\x4F\xD0\xAD\x9B\x37\x18\xD4"
21334 "\x0E\x4D\x0F\x02\x3A\xDC\x5A\xA2"
21335 "\x39\x25\x55\x20\x5A\xA6\x02\x9F"
21336 "\xE6\x77\x21\x77\xE5\x4B\x7B\x0B"
21337 "\x30\xF8\x5F\x33\x0F\x49\xCD\xFF"
21338 "\xF2\xE4\x35\xF9\xF0\x63\xC3\x7E"
21339 "\xF1\xA6\x73\xB4\xDF\xE7\xBB\x78"
21340 "\xFF\x21\xA9\xF3\xF3\xCF\x5D\xBA"
21341 "\xED\x87\x98\xAC\xFE\x48\x97\x6D"
21342 "\xA6\x7F\x69\x31\xB1\xC4\xFF\x14"
21343 "\xC6\x76\xD4\x10\xDD\xF6\x49\x2C"
21344 "\x9C\xC8\x6D\x76\xC0\x8F\x5F\x55"
21345 "\x2F\x3C\x8A\x30\xAA\xC3\x16\x55"
21346 "\xC6\xFC\x8D\x8B\xB9\xE5\x80\x6C"
21347 "\xC8\x7E\xBD\x65\x58\x36\xD5\xBC"
21348 "\xF0\x33\x52\x29\x70\xF9\x5C\xE9"
21349 "\xAC\x1F\xB5\x73\x56\x66\x54\xAF"
21350 "\x1B\x8F\x7D\xED\xAB\x03\xCE\xE3"
21351 "\xAE\x47\xB6\x69\x86\xE9\x01\x31"
21352 "\x83\x18\x3D\xF4\x74\x7B\xF9\x42"
21353 "\x4C\xFD\x75\x4A\x6D\xF0\x03\xA6"
21354 "\x2B\x20\x63\xDA\x49\x65\x5E\x8B"
21355 "\xC0\x19\xE3\x8D\xD9\xF3\xB0\x34"
21356 "\xD3\x52\xFC\x68\x00\x43\x1B\x37"
21357 "\x31\x93\x51\x1C\x63\x97\x70\xB0"
21358 "\x99\x78\x83\x13\xFD\xCF\x53\x81"
21359 "\x36\x46\xB5\x42\x52\x2F\x32\xEB"
21360 "\x4A\x3D\xF1\x8F\x1C\x54\x2E\xFC"
21361 "\x41\x75\x5A\x8C\x8E\x6F\xE7\x1A"
21362 "\xAE\xEF\x3E\x82\x12\x0B\x74\x72"
21363 "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55"
21364 "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66",
21365 .ilen = 1008,
20960 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" 21366 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
20961 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" 21367 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
20962 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" 21368 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -21018,11 +21424,75 @@ static struct cipher_testvec camellia_dec_tv_template[] = {
21018 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 21424 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
21019 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 21425 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
21020 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 21426 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
21021 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 21427 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
21022 .rlen = 496, 21428 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
21429 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
21430 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
21431 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
21432 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
21433 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
21434 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
21435 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
21436 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
21437 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
21438 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
21439 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
21440 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
21441 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
21442 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
21443 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
21444 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
21445 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
21446 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
21447 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
21448 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
21449 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
21450 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
21451 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
21452 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
21453 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
21454 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
21455 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
21456 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
21457 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
21458 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
21459 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
21460 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
21461 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
21462 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
21463 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
21464 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
21465 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
21466 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
21467 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
21468 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
21469 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
21470 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
21471 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
21472 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
21473 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
21474 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
21475 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
21476 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
21477 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
21478 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
21479 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
21480 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
21481 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
21482 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
21483 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
21484 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
21485 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
21486 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
21487 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
21488 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
21489 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
21490 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
21491 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
21492 .rlen = 1008,
21023 .also_non_np = 1, 21493 .also_non_np = 1,
21024 .np = 2, 21494 .np = 2,
21025 .tap = { 496 - 16, 16 }, 21495 .tap = { 1008 - 16, 16 },
21026 }, 21496 },
21027}; 21497};
21028 21498
@@ -21123,8 +21593,72 @@ static struct cipher_testvec camellia_cbc_enc_tv_template[] = {
21123 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 21593 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
21124 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 21594 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
21125 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 21595 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
21126 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 21596 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
21127 .ilen = 496, 21597 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
21598 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
21599 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
21600 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
21601 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
21602 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
21603 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
21604 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
21605 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
21606 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
21607 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
21608 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
21609 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
21610 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
21611 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
21612 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
21613 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
21614 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
21615 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
21616 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
21617 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
21618 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
21619 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
21620 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
21621 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
21622 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
21623 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
21624 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
21625 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
21626 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
21627 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
21628 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
21629 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
21630 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
21631 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
21632 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
21633 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
21634 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
21635 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
21636 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
21637 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
21638 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
21639 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
21640 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
21641 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
21642 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
21643 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
21644 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
21645 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
21646 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
21647 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
21648 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
21649 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
21650 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
21651 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
21652 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
21653 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
21654 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
21655 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
21656 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
21657 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
21658 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
21659 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
21660 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
21661 .ilen = 1008,
21128 .result = "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77" 21662 .result = "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77"
21129 "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40" 21663 "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40"
21130 "\x88\x39\xE3\xFD\x94\x4B\x25\x58" 21664 "\x88\x39\xE3\xFD\x94\x4B\x25\x58"
@@ -21186,11 +21720,75 @@ static struct cipher_testvec camellia_cbc_enc_tv_template[] = {
21186 "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" 21720 "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA"
21187 "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" 21721 "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97"
21188 "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" 21722 "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36"
21189 "\x5F\x74\x8C\x86\x5B\x71\xD0\x20", 21723 "\x5F\x74\x8C\x86\x5B\x71\xD0\x20"
21190 .rlen = 496, 21724 "\x78\x1A\x7F\x18\x8C\xD9\xCD\xF5"
21725 "\x21\x41\x56\x72\x13\xE1\x86\x07"
21726 "\x07\x26\xF3\x4F\x7B\xEA\xB5\x18"
21727 "\xFE\x94\x2D\x9F\xE0\x72\x18\x65"
21728 "\xB2\xA5\x63\x48\xB4\x13\x22\xF7"
21729 "\x25\xF1\x80\xA8\x7F\x54\x86\x7B"
21730 "\x39\xAE\x95\x0C\x09\x32\x22\x2D"
21731 "\x4D\x73\x39\x0C\x09\x2C\x7C\x10"
21732 "\xD0\x4B\x53\xF6\x90\xC5\x99\x2F"
21733 "\x15\xE1\x7F\xC6\xC5\x7A\x52\x14"
21734 "\x65\xEE\x93\x54\xD0\x66\x15\x3C"
21735 "\x4C\x68\xFD\x64\x0F\xF9\x10\x39"
21736 "\x46\x7A\xDD\x97\x20\xEE\xC7\xD2"
21737 "\x98\x4A\xB6\xE6\xF5\xA8\x1F\x4F"
21738 "\xDB\xAB\x6D\xD5\x9B\x34\x16\x97"
21739 "\x2F\x64\xE5\x37\xEF\x0E\xA1\xE9"
21740 "\xBE\x31\x31\x96\x8B\x40\x18\x75"
21741 "\x11\x75\x14\x32\xA5\x2D\x1B\x6B"
21742 "\xDB\x59\xEB\xFA\x3D\x8E\x7C\xC4"
21743 "\xDE\x68\xC8\x9F\xC9\x99\xE3\xC6"
21744 "\x71\xB0\x12\x57\x89\x0D\xC0\x2B"
21745 "\x9F\x12\x6A\x04\x67\xF1\x95\x31"
21746 "\x59\xFD\x84\x95\x2C\x9C\x5B\xEC"
21747 "\x09\xB0\x43\x96\x4A\x64\x80\x40"
21748 "\xB9\x72\x19\xDD\x70\x42\xFA\xB1"
21749 "\x4A\x2C\x0C\x0A\x60\x6E\xE3\x7C"
21750 "\x37\x5A\xBE\xA4\x62\xCF\x29\xAB"
21751 "\x7F\x4D\xA6\xB3\xE2\xB6\x64\xC6"
21752 "\x33\x0B\xF3\xD5\x01\x38\x74\xA4"
21753 "\x67\x1E\x75\x68\xC3\xAD\x76\xE9"
21754 "\xE9\xBC\xF0\xEB\xD8\xFD\x31\x8A"
21755 "\x5F\xC9\x18\x94\x4B\x86\x66\xFC"
21756 "\xBD\x0B\x3D\xB3\x9F\xFA\x1F\xD9"
21757 "\x78\xC4\xE3\x24\x1C\x67\xA2\xF8"
21758 "\x43\xBC\x76\x75\xBF\x6C\x05\xB3"
21759 "\x32\xE8\x7C\x80\xDB\xC7\xB6\x61"
21760 "\x1A\x3E\x2B\xA7\x25\xED\x8F\xA0"
21761 "\x00\x4B\xF8\x90\xCA\xD8\xFB\x12"
21762 "\xAC\x1F\x18\xE9\xD2\x5E\xA2\x8E"
21763 "\xE4\x84\x6B\x9D\xEB\x1E\x6B\xA3"
21764 "\x7B\xDC\xCE\x15\x97\x27\xB2\x65"
21765 "\xBC\x0E\x47\xAB\x55\x13\x53\xAB"
21766 "\x0E\x34\x55\x02\x5F\x27\xC5\x89"
21767 "\xDF\xC5\x70\xC4\xDD\x76\x82\xEE"
21768 "\x68\xA6\x09\xB0\xE5\x5E\xF1\x0C"
21769 "\xE3\xF3\x09\x9B\xFE\x65\x4B\xB8"
21770 "\x30\xEC\xD5\x7C\x6A\xEC\x1D\xD2"
21771 "\x93\xB7\xA1\x1A\x02\xD4\xC0\xD6"
21772 "\x8D\x4D\x83\x9A\xED\x29\x4E\x14"
21773 "\x86\xD5\x3C\x1A\xD5\xB9\x0A\x6A"
21774 "\x72\x22\xD5\x92\x38\xF1\xA1\x86"
21775 "\xB2\x41\x51\xCA\x4E\xAB\x8F\xD3"
21776 "\x80\x56\xC3\xD7\x65\xE1\xB3\x86"
21777 "\xCB\xCE\x98\xA1\xD4\x59\x1C\x06"
21778 "\x01\xED\xF8\x29\x91\x19\x5C\x9A"
21779 "\xEE\x28\x1B\x48\xD7\x32\xEF\x9F"
21780 "\x6C\x2B\x66\x4E\x78\xD5\x8B\x72"
21781 "\x80\xE7\x29\xDC\x23\x55\x98\x54"
21782 "\xB1\xFF\x3E\x95\x56\xA8\x78\x78"
21783 "\xEF\xC4\xA5\x11\x2D\x2B\xD8\x93"
21784 "\x30\x6E\x7E\x51\xBB\x42\x5F\x03"
21785 "\x43\x94\x23\x7E\xEE\xF0\xA5\x79"
21786 "\x55\x01\xD4\x58\xB2\xF2\x85\x49"
21787 "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C",
21788 .rlen = 1008,
21191 .also_non_np = 1, 21789 .also_non_np = 1,
21192 .np = 2, 21790 .np = 2,
21193 .tap = { 496 - 16, 16 }, 21791 .tap = { 1008 - 16, 16 },
21194 }, 21792 },
21195}; 21793};
21196 21794
@@ -21291,8 +21889,72 @@ static struct cipher_testvec camellia_cbc_dec_tv_template[] = {
21291 "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" 21889 "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA"
21292 "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" 21890 "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97"
21293 "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" 21891 "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36"
21294 "\x5F\x74\x8C\x86\x5B\x71\xD0\x20", 21892 "\x5F\x74\x8C\x86\x5B\x71\xD0\x20"
21295 .ilen = 496, 21893 "\x78\x1A\x7F\x18\x8C\xD9\xCD\xF5"
21894 "\x21\x41\x56\x72\x13\xE1\x86\x07"
21895 "\x07\x26\xF3\x4F\x7B\xEA\xB5\x18"
21896 "\xFE\x94\x2D\x9F\xE0\x72\x18\x65"
21897 "\xB2\xA5\x63\x48\xB4\x13\x22\xF7"
21898 "\x25\xF1\x80\xA8\x7F\x54\x86\x7B"
21899 "\x39\xAE\x95\x0C\x09\x32\x22\x2D"
21900 "\x4D\x73\x39\x0C\x09\x2C\x7C\x10"
21901 "\xD0\x4B\x53\xF6\x90\xC5\x99\x2F"
21902 "\x15\xE1\x7F\xC6\xC5\x7A\x52\x14"
21903 "\x65\xEE\x93\x54\xD0\x66\x15\x3C"
21904 "\x4C\x68\xFD\x64\x0F\xF9\x10\x39"
21905 "\x46\x7A\xDD\x97\x20\xEE\xC7\xD2"
21906 "\x98\x4A\xB6\xE6\xF5\xA8\x1F\x4F"
21907 "\xDB\xAB\x6D\xD5\x9B\x34\x16\x97"
21908 "\x2F\x64\xE5\x37\xEF\x0E\xA1\xE9"
21909 "\xBE\x31\x31\x96\x8B\x40\x18\x75"
21910 "\x11\x75\x14\x32\xA5\x2D\x1B\x6B"
21911 "\xDB\x59\xEB\xFA\x3D\x8E\x7C\xC4"
21912 "\xDE\x68\xC8\x9F\xC9\x99\xE3\xC6"
21913 "\x71\xB0\x12\x57\x89\x0D\xC0\x2B"
21914 "\x9F\x12\x6A\x04\x67\xF1\x95\x31"
21915 "\x59\xFD\x84\x95\x2C\x9C\x5B\xEC"
21916 "\x09\xB0\x43\x96\x4A\x64\x80\x40"
21917 "\xB9\x72\x19\xDD\x70\x42\xFA\xB1"
21918 "\x4A\x2C\x0C\x0A\x60\x6E\xE3\x7C"
21919 "\x37\x5A\xBE\xA4\x62\xCF\x29\xAB"
21920 "\x7F\x4D\xA6\xB3\xE2\xB6\x64\xC6"
21921 "\x33\x0B\xF3\xD5\x01\x38\x74\xA4"
21922 "\x67\x1E\x75\x68\xC3\xAD\x76\xE9"
21923 "\xE9\xBC\xF0\xEB\xD8\xFD\x31\x8A"
21924 "\x5F\xC9\x18\x94\x4B\x86\x66\xFC"
21925 "\xBD\x0B\x3D\xB3\x9F\xFA\x1F\xD9"
21926 "\x78\xC4\xE3\x24\x1C\x67\xA2\xF8"
21927 "\x43\xBC\x76\x75\xBF\x6C\x05\xB3"
21928 "\x32\xE8\x7C\x80\xDB\xC7\xB6\x61"
21929 "\x1A\x3E\x2B\xA7\x25\xED\x8F\xA0"
21930 "\x00\x4B\xF8\x90\xCA\xD8\xFB\x12"
21931 "\xAC\x1F\x18\xE9\xD2\x5E\xA2\x8E"
21932 "\xE4\x84\x6B\x9D\xEB\x1E\x6B\xA3"
21933 "\x7B\xDC\xCE\x15\x97\x27\xB2\x65"
21934 "\xBC\x0E\x47\xAB\x55\x13\x53\xAB"
21935 "\x0E\x34\x55\x02\x5F\x27\xC5\x89"
21936 "\xDF\xC5\x70\xC4\xDD\x76\x82\xEE"
21937 "\x68\xA6\x09\xB0\xE5\x5E\xF1\x0C"
21938 "\xE3\xF3\x09\x9B\xFE\x65\x4B\xB8"
21939 "\x30\xEC\xD5\x7C\x6A\xEC\x1D\xD2"
21940 "\x93\xB7\xA1\x1A\x02\xD4\xC0\xD6"
21941 "\x8D\x4D\x83\x9A\xED\x29\x4E\x14"
21942 "\x86\xD5\x3C\x1A\xD5\xB9\x0A\x6A"
21943 "\x72\x22\xD5\x92\x38\xF1\xA1\x86"
21944 "\xB2\x41\x51\xCA\x4E\xAB\x8F\xD3"
21945 "\x80\x56\xC3\xD7\x65\xE1\xB3\x86"
21946 "\xCB\xCE\x98\xA1\xD4\x59\x1C\x06"
21947 "\x01\xED\xF8\x29\x91\x19\x5C\x9A"
21948 "\xEE\x28\x1B\x48\xD7\x32\xEF\x9F"
21949 "\x6C\x2B\x66\x4E\x78\xD5\x8B\x72"
21950 "\x80\xE7\x29\xDC\x23\x55\x98\x54"
21951 "\xB1\xFF\x3E\x95\x56\xA8\x78\x78"
21952 "\xEF\xC4\xA5\x11\x2D\x2B\xD8\x93"
21953 "\x30\x6E\x7E\x51\xBB\x42\x5F\x03"
21954 "\x43\x94\x23\x7E\xEE\xF0\xA5\x79"
21955 "\x55\x01\xD4\x58\xB2\xF2\x85\x49"
21956 "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C",
21957 .ilen = 1008,
21296 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" 21958 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
21297 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" 21959 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
21298 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" 21960 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -21354,11 +22016,75 @@ static struct cipher_testvec camellia_cbc_dec_tv_template[] = {
21354 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 22016 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
21355 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 22017 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
21356 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 22018 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
21357 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 22019 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
21358 .rlen = 496, 22020 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
22021 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
22022 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
22023 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
22024 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
22025 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
22026 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
22027 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
22028 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
22029 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
22030 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
22031 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
22032 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
22033 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
22034 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
22035 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
22036 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
22037 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
22038 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
22039 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
22040 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
22041 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
22042 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
22043 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
22044 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
22045 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
22046 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
22047 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
22048 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
22049 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
22050 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
22051 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
22052 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
22053 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
22054 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
22055 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
22056 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
22057 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
22058 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
22059 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
22060 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
22061 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
22062 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
22063 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
22064 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
22065 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
22066 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
22067 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
22068 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
22069 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
22070 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
22071 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
22072 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
22073 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
22074 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
22075 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
22076 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
22077 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
22078 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
22079 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
22080 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
22081 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
22082 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
22083 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
22084 .rlen = 1008,
21359 .also_non_np = 1, 22085 .also_non_np = 1,
21360 .np = 2, 22086 .np = 2,
21361 .tap = { 496 - 16, 16 }, 22087 .tap = { 1008 - 16, 16 },
21362 }, 22088 },
21363}; 22089};
21364 22090
@@ -21567,8 +22293,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = {
21567 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 22293 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
21568 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 22294 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
21569 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" 22295 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
21570 "\x2B\xC2\x59", 22296 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
21571 .ilen = 499, 22297 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
22298 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
22299 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
22300 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
22301 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
22302 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
22303 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
22304 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
22305 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
22306 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
22307 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
22308 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
22309 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
22310 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
22311 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
22312 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
22313 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
22314 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
22315 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
22316 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
22317 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
22318 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
22319 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
22320 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
22321 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
22322 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
22323 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
22324 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
22325 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
22326 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
22327 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
22328 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
22329 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
22330 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
22331 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
22332 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
22333 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
22334 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
22335 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
22336 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
22337 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
22338 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
22339 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
22340 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
22341 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
22342 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
22343 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
22344 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
22345 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
22346 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
22347 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
22348 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
22349 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
22350 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
22351 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
22352 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
22353 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
22354 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
22355 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
22356 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
22357 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
22358 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
22359 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D"
22360 "\xE4\x7B\x12",
22361 .ilen = 1011,
21572 .result = "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11" 22362 .result = "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
21573 "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE" 22363 "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE"
21574 "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4" 22364 "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4"
@@ -21631,11 +22421,75 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = {
21631 "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" 22421 "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48"
21632 "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" 22422 "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0"
21633 "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" 22423 "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D"
21634 "\x93\x11\x1C", 22424 "\x93\x11\x1C\xE9\xD2\x9F\x6E\x90"
21635 .rlen = 499, 22425 "\xE5\x41\x4A\xE2\x3C\x45\x29\x35"
22426 "\xEC\xD6\x47\x50\xCB\x7B\xA2\x32"
22427 "\xF7\x8B\x62\xF1\xE3\x9A\xFE\xC7"
22428 "\x1D\x8C\x02\x72\x68\x09\xE9\xB6"
22429 "\x4A\x80\xE6\xB1\x56\xDF\x90\xD4"
22430 "\x93\x74\xA4\xCE\x20\x23\xBF\x48"
22431 "\xA5\xDE\x1B\xFA\x40\x69\x31\x98"
22432 "\x62\x6E\xA5\xC7\xBF\x0C\x62\xE5"
22433 "\x6D\xE1\x93\xF1\x83\x10\x1C\xCA"
22434 "\xF6\x5C\x19\xF8\x90\x78\xCB\xE4"
22435 "\x0B\x3A\xB5\xF8\x43\x86\xD3\x3F"
22436 "\xBA\x83\x34\x3C\x42\xCC\x7D\x28"
22437 "\x29\x63\x4F\xD8\x02\x17\xC5\x07"
22438 "\x2C\xA4\xAC\x79\xCB\xC3\xA9\x09"
22439 "\x81\x45\x18\xED\xE4\xCB\x42\x3B"
22440 "\x87\x2D\x23\xDC\xC5\xBA\x45\xBD"
22441 "\x92\xE5\x02\x97\x96\xCE\xAD\xEC"
22442 "\xBA\xD8\x76\xF8\xCA\xC1\x31\xEC"
22443 "\x1E\x4F\x3F\x83\xF8\x33\xE8\x6E"
22444 "\xCC\xF8\x5F\xDD\x65\x50\x99\x69"
22445 "\xAF\x48\xCE\xA5\xBA\xB6\x14\x9F"
22446 "\x05\x93\xB2\xE6\x59\xC8\x28\xFE"
22447 "\x8F\x37\xF9\x64\xB9\xA5\x56\x8F"
22448 "\xF1\x1B\x90\xEF\xAE\xEB\xFC\x09"
22449 "\x11\x7A\xF2\x19\x0A\x0A\x9A\x3C"
22450 "\xE2\x5E\x29\xFA\x31\x9B\xC1\x74"
22451 "\x1E\x10\x3E\x07\xA9\x31\x6D\xF8"
22452 "\x81\xF5\xD5\x8A\x04\x23\x51\xAC"
22453 "\xA2\xE2\x63\xFD\x27\x1F\x79\x5B"
22454 "\x1F\xE8\xDA\x11\x49\x4D\x1C\xBA"
22455 "\x54\xCC\x0F\xBA\x92\x69\xE5\xCB"
22456 "\x41\x1A\x67\xA6\x40\x82\x70\x8C"
22457 "\x19\x79\x08\xA4\x51\x20\x7D\xC9"
22458 "\x12\x27\xAE\x20\x0D\x2C\xA1\x6D"
22459 "\xF4\x55\xD4\xE7\xE6\xD4\x28\x08"
22460 "\x00\x70\x12\x56\x56\x50\xAD\x14"
22461 "\x5C\x3E\xA2\xD1\x36\x3F\x36\x48"
22462 "\xED\xB1\x57\x3E\x5D\x15\xF6\x1E"
22463 "\x53\xE9\xA4\x3E\xED\x7D\xCF\x7D"
22464 "\x29\xAF\xF3\x1E\x51\xA8\x9F\x85"
22465 "\x8B\xF0\xBB\xCE\xCC\x39\xC3\x64"
22466 "\x4B\xF2\xAD\x70\x19\xD4\x44\x8F"
22467 "\x91\x76\xE8\x15\x66\x34\x9F\xF6"
22468 "\x0F\x15\xA4\xA8\x24\xF8\x58\xB1"
22469 "\x38\x46\x47\xC7\x9B\xCA\xE9\x42"
22470 "\x44\xAA\xE6\xB5\x9C\x91\xA4\xD3"
22471 "\x16\xA0\xED\x42\xBE\xB5\x06\x19"
22472 "\xBE\x67\xE8\xBC\x22\x32\xA4\x1E"
22473 "\x93\xEB\xBE\xE9\xE1\x93\xE5\x31"
22474 "\x3A\xA2\x75\xDF\xE3\x6B\xE7\xCC"
22475 "\xB4\x70\x20\xE0\x6D\x82\x7C\xC8"
22476 "\x94\x5C\x5E\x37\x18\xAD\xED\x8B"
22477 "\x44\x86\xCA\x5E\x07\xB7\x70\x8D"
22478 "\x40\x48\x19\x73\x7C\x78\x64\x0B"
22479 "\xDB\x01\xCA\xAE\x63\x19\xE9\xD1"
22480 "\x6B\x2C\x84\x10\x45\x42\x2E\xC3"
22481 "\xDF\x7F\xAA\xE8\x87\x1B\x63\x46"
22482 "\x74\x28\x9D\x05\x30\x20\x62\x41"
22483 "\xC0\x9F\x2C\x36\x2B\x78\xD7\x26"
22484 "\xDF\x58\x51\xED\xFA\xDC\x87\x79"
22485 "\xBF\x8C\xBF\xC4\x0F\xE5\x05\xDA"
22486 "\x45\xE3\x35\x0D\x69\x91\x54\x1C"
22487 "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C"
22488 "\xF1\x6B\xD9",
22489 .rlen = 1011,
21636 .also_non_np = 1, 22490 .also_non_np = 1,
21637 .np = 2, 22491 .np = 2,
21638 .tap = { 499 - 16, 16 }, 22492 .tap = { 1011 - 16, 16 },
21639 }, { /* Generated with Crypto++ */ 22493 }, { /* Generated with Crypto++ */
21640 .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" 22494 .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
21641 "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" 22495 "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -21705,8 +22559,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = {
21705 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 22559 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
21706 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 22560 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
21707 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 22561 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
21708 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 22562 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
21709 .ilen = 496, 22563 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
22564 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
22565 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
22566 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
22567 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
22568 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
22569 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
22570 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
22571 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
22572 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
22573 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
22574 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
22575 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
22576 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
22577 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
22578 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
22579 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
22580 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
22581 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
22582 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
22583 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
22584 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
22585 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
22586 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
22587 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
22588 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
22589 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
22590 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
22591 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
22592 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
22593 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
22594 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
22595 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
22596 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
22597 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
22598 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
22599 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
22600 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
22601 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
22602 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
22603 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
22604 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
22605 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
22606 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
22607 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
22608 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
22609 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
22610 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
22611 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
22612 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
22613 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
22614 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
22615 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
22616 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
22617 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
22618 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
22619 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
22620 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
22621 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
22622 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
22623 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
22624 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
22625 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
22626 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
22627 .ilen = 1008,
21710 .result = "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9" 22628 .result = "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9"
21711 "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E" 22629 "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E"
21712 "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB" 22630 "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB"
@@ -21768,8 +22686,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = {
21768 "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" 22686 "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22"
21769 "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" 22687 "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E"
21770 "\x12\xA8\x01\x64\x16\x0B\x26\x5A" 22688 "\x12\xA8\x01\x64\x16\x0B\x26\x5A"
21771 "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C", 22689 "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C"
21772 .rlen = 496, 22690 "\xCF\xF5\xD5\xB7\x7A\x34\x23\xB6"
22691 "\xAA\x9E\xA8\x98\xA2\xF8\x3D\xD3"
22692 "\x3F\x23\x69\x63\x56\x96\x45\xD6"
22693 "\x74\x23\x1D\x5C\x63\xCC\xD8\x78"
22694 "\x16\xE2\x9C\xD2\x80\x02\xF2\x28"
22695 "\x69\x2F\xC4\xA8\x15\x15\x24\x3B"
22696 "\xCB\xF0\x14\xE4\x62\xC8\xF3\xD1"
22697 "\x03\x58\x1B\x33\x77\x74\x1F\xB4"
22698 "\x07\x86\xF2\x21\xB7\x41\xAE\xBF"
22699 "\x25\xC2\xFF\x51\xEF\xEA\xCE\xC4"
22700 "\x5F\xD9\xB8\x18\x6A\xF0\x0F\x0D"
22701 "\xF8\x04\xBB\x6D\x62\x33\x87\x26"
22702 "\x4F\x2F\x14\x6E\xDC\xDB\x66\x09"
22703 "\x2A\xEF\x7D\x84\x10\xAC\x82\x5E"
22704 "\xD2\xE4\xAD\x74\x7A\x6D\xCC\x3A"
22705 "\x7B\x62\xD8\xD6\x07\x2D\xF7\xDF"
22706 "\x9B\xB3\x82\xCF\x9C\x1D\x76\x5C"
22707 "\xAC\x7B\xD4\x9B\x45\xA1\x64\x11"
22708 "\x66\xF1\xA7\x0B\xF9\xDD\x00\xDD"
22709 "\xA4\x45\x3D\x3E\x03\xC9\x2E\xCB"
22710 "\xC3\x14\x84\x72\xFD\x41\xDC\xBD"
22711 "\x75\xBE\xA8\xE5\x16\x48\x64\x39"
22712 "\xCA\xF3\xE6\xDC\x25\x24\xF1\x6D"
22713 "\xB2\x8D\xC5\x38\x54\xD3\x5D\x6D"
22714 "\x0B\x29\x10\x15\x0E\x13\x3B\xAC"
22715 "\x7E\xCC\x9E\x3E\x18\x48\xA6\x02"
22716 "\xEF\x03\xB2\x2E\xE3\xD2\x70\x21"
22717 "\xB4\x19\x26\xBE\x3A\x3D\x05\xE0"
22718 "\xF8\x09\xAF\xE4\x31\x26\x92\x2F"
22719 "\x8F\x55\xAC\xED\x0B\xB2\xA5\x34"
22720 "\xBE\x50\xB1\x02\x22\x96\xE3\x40"
22721 "\x7B\x70\x50\x6E\x3B\xD5\xE5\xA0"
22722 "\x8E\xA2\xAD\x14\x60\x5C\x7A\x2B"
22723 "\x3D\x1B\x7F\xC1\xC0\x2C\x56\x36"
22724 "\xD2\x0A\x32\x06\x97\x34\xB9\xF4"
22725 "\x6F\x9F\x7E\x80\xD0\x9D\xF7\x6A"
22726 "\x21\xC1\xA2\x6A\xB1\x96\x5B\x4D"
22727 "\x7A\x15\x6C\xC4\x4E\xB8\xE0\x9E"
22728 "\x6C\x50\xF3\x9C\xC9\xB5\x23\xB7"
22729 "\xF1\xD4\x29\x4A\x23\xC4\xAD\x1E"
22730 "\x2C\x07\xD2\x43\x5F\x57\x93\xCA"
22731 "\x85\xF9\x9F\xAD\x4C\xF1\xE4\xB1"
22732 "\x1A\x8E\x28\xA4\xB6\x52\x77\x7E"
22733 "\x68\xC6\x47\xB9\x76\xCC\x65\x5F"
22734 "\x0B\xF9\x67\x93\xD8\x0E\x9A\x37"
22735 "\x5F\x41\xED\x64\x6C\xAD\x5F\xED"
22736 "\x3F\x8D\xFB\x8E\x1E\xA0\xE4\x1F"
22737 "\xC2\xC7\xED\x18\x43\xE1\x20\x86"
22738 "\x5D\xBC\x30\x70\x22\xA1\xDC\x53"
22739 "\x10\x3A\x8D\x47\x82\xCD\x7F\x59"
22740 "\x03\x2D\x6D\xF5\xE7\x79\xD4\x07"
22741 "\x68\x2A\xA5\x42\x19\x4D\xAF\xF5"
22742 "\xED\x47\x83\xBC\x5F\x62\x84\xDA"
22743 "\xDA\x41\xFF\xB0\x1D\x64\xA3\xC8"
22744 "\xBD\x4E\xE0\xB8\x7F\xEE\x55\x0A"
22745 "\x4E\x61\xB2\x51\xF6\x9C\x95\xF6"
22746 "\x92\xBB\xF6\xC5\xF0\x09\x86\xDE"
22747 "\x37\x9E\x29\xF9\x2A\x18\x73\x0D"
22748 "\xDC\x7E\x6B\x7B\x1B\x43\x8C\xEA"
22749 "\x13\xC8\x1A\x47\x0A\x2D\x6D\x56"
22750 "\xCD\xD2\xE7\x53\x1A\xAB\x1C\x3C"
22751 "\xC5\x9B\x03\x70\x29\x2A\x49\x09"
22752 "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71"
22753 "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36",
22754 .rlen = 1008,
21773 }, 22755 },
21774}; 22756};
21775 22757
@@ -21978,8 +22960,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = {
21978 "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" 22960 "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48"
21979 "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" 22961 "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0"
21980 "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" 22962 "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D"
21981 "\x93\x11\x1C", 22963 "\x93\x11\x1C\xE9\xD2\x9F\x6E\x90"
21982 .ilen = 499, 22964 "\xE5\x41\x4A\xE2\x3C\x45\x29\x35"
22965 "\xEC\xD6\x47\x50\xCB\x7B\xA2\x32"
22966 "\xF7\x8B\x62\xF1\xE3\x9A\xFE\xC7"
22967 "\x1D\x8C\x02\x72\x68\x09\xE9\xB6"
22968 "\x4A\x80\xE6\xB1\x56\xDF\x90\xD4"
22969 "\x93\x74\xA4\xCE\x20\x23\xBF\x48"
22970 "\xA5\xDE\x1B\xFA\x40\x69\x31\x98"
22971 "\x62\x6E\xA5\xC7\xBF\x0C\x62\xE5"
22972 "\x6D\xE1\x93\xF1\x83\x10\x1C\xCA"
22973 "\xF6\x5C\x19\xF8\x90\x78\xCB\xE4"
22974 "\x0B\x3A\xB5\xF8\x43\x86\xD3\x3F"
22975 "\xBA\x83\x34\x3C\x42\xCC\x7D\x28"
22976 "\x29\x63\x4F\xD8\x02\x17\xC5\x07"
22977 "\x2C\xA4\xAC\x79\xCB\xC3\xA9\x09"
22978 "\x81\x45\x18\xED\xE4\xCB\x42\x3B"
22979 "\x87\x2D\x23\xDC\xC5\xBA\x45\xBD"
22980 "\x92\xE5\x02\x97\x96\xCE\xAD\xEC"
22981 "\xBA\xD8\x76\xF8\xCA\xC1\x31\xEC"
22982 "\x1E\x4F\x3F\x83\xF8\x33\xE8\x6E"
22983 "\xCC\xF8\x5F\xDD\x65\x50\x99\x69"
22984 "\xAF\x48\xCE\xA5\xBA\xB6\x14\x9F"
22985 "\x05\x93\xB2\xE6\x59\xC8\x28\xFE"
22986 "\x8F\x37\xF9\x64\xB9\xA5\x56\x8F"
22987 "\xF1\x1B\x90\xEF\xAE\xEB\xFC\x09"
22988 "\x11\x7A\xF2\x19\x0A\x0A\x9A\x3C"
22989 "\xE2\x5E\x29\xFA\x31\x9B\xC1\x74"
22990 "\x1E\x10\x3E\x07\xA9\x31\x6D\xF8"
22991 "\x81\xF5\xD5\x8A\x04\x23\x51\xAC"
22992 "\xA2\xE2\x63\xFD\x27\x1F\x79\x5B"
22993 "\x1F\xE8\xDA\x11\x49\x4D\x1C\xBA"
22994 "\x54\xCC\x0F\xBA\x92\x69\xE5\xCB"
22995 "\x41\x1A\x67\xA6\x40\x82\x70\x8C"
22996 "\x19\x79\x08\xA4\x51\x20\x7D\xC9"
22997 "\x12\x27\xAE\x20\x0D\x2C\xA1\x6D"
22998 "\xF4\x55\xD4\xE7\xE6\xD4\x28\x08"
22999 "\x00\x70\x12\x56\x56\x50\xAD\x14"
23000 "\x5C\x3E\xA2\xD1\x36\x3F\x36\x48"
23001 "\xED\xB1\x57\x3E\x5D\x15\xF6\x1E"
23002 "\x53\xE9\xA4\x3E\xED\x7D\xCF\x7D"
23003 "\x29\xAF\xF3\x1E\x51\xA8\x9F\x85"
23004 "\x8B\xF0\xBB\xCE\xCC\x39\xC3\x64"
23005 "\x4B\xF2\xAD\x70\x19\xD4\x44\x8F"
23006 "\x91\x76\xE8\x15\x66\x34\x9F\xF6"
23007 "\x0F\x15\xA4\xA8\x24\xF8\x58\xB1"
23008 "\x38\x46\x47\xC7\x9B\xCA\xE9\x42"
23009 "\x44\xAA\xE6\xB5\x9C\x91\xA4\xD3"
23010 "\x16\xA0\xED\x42\xBE\xB5\x06\x19"
23011 "\xBE\x67\xE8\xBC\x22\x32\xA4\x1E"
23012 "\x93\xEB\xBE\xE9\xE1\x93\xE5\x31"
23013 "\x3A\xA2\x75\xDF\xE3\x6B\xE7\xCC"
23014 "\xB4\x70\x20\xE0\x6D\x82\x7C\xC8"
23015 "\x94\x5C\x5E\x37\x18\xAD\xED\x8B"
23016 "\x44\x86\xCA\x5E\x07\xB7\x70\x8D"
23017 "\x40\x48\x19\x73\x7C\x78\x64\x0B"
23018 "\xDB\x01\xCA\xAE\x63\x19\xE9\xD1"
23019 "\x6B\x2C\x84\x10\x45\x42\x2E\xC3"
23020 "\xDF\x7F\xAA\xE8\x87\x1B\x63\x46"
23021 "\x74\x28\x9D\x05\x30\x20\x62\x41"
23022 "\xC0\x9F\x2C\x36\x2B\x78\xD7\x26"
23023 "\xDF\x58\x51\xED\xFA\xDC\x87\x79"
23024 "\xBF\x8C\xBF\xC4\x0F\xE5\x05\xDA"
23025 "\x45\xE3\x35\x0D\x69\x91\x54\x1C"
23026 "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C"
23027 "\xF1\x6B\xD9",
23028 .ilen = 1011,
21983 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" 23029 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
21984 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" 23030 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
21985 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" 23031 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -22042,11 +23088,75 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = {
22042 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 23088 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
22043 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 23089 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
22044 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" 23090 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
22045 "\x2B\xC2\x59", 23091 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
22046 .rlen = 499, 23092 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
23093 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
23094 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
23095 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
23096 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
23097 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
23098 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
23099 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
23100 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
23101 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
23102 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
23103 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
23104 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
23105 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
23106 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
23107 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
23108 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
23109 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
23110 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
23111 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
23112 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
23113 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
23114 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
23115 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
23116 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
23117 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
23118 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
23119 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
23120 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
23121 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
23122 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
23123 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
23124 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
23125 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
23126 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
23127 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
23128 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
23129 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
23130 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
23131 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
23132 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
23133 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
23134 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
23135 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
23136 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
23137 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
23138 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
23139 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
23140 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
23141 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
23142 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
23143 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
23144 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
23145 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
23146 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
23147 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
23148 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
23149 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
23150 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
23151 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
23152 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
23153 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
23154 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D"
23155 "\xE4\x7B\x12",
23156 .rlen = 1011,
22047 .also_non_np = 1, 23157 .also_non_np = 1,
22048 .np = 2, 23158 .np = 2,
22049 .tap = { 499 - 16, 16 }, 23159 .tap = { 1011 - 16, 16 },
22050 }, { /* Generated with Crypto++ */ 23160 }, { /* Generated with Crypto++ */
22051 .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" 23161 .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
22052 "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" 23162 "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -22116,8 +23226,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = {
22116 "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" 23226 "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22"
22117 "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" 23227 "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E"
22118 "\x12\xA8\x01\x64\x16\x0B\x26\x5A" 23228 "\x12\xA8\x01\x64\x16\x0B\x26\x5A"
22119 "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C", 23229 "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C"
22120 .ilen = 496, 23230 "\xCF\xF5\xD5\xB7\x7A\x34\x23\xB6"
23231 "\xAA\x9E\xA8\x98\xA2\xF8\x3D\xD3"
23232 "\x3F\x23\x69\x63\x56\x96\x45\xD6"
23233 "\x74\x23\x1D\x5C\x63\xCC\xD8\x78"
23234 "\x16\xE2\x9C\xD2\x80\x02\xF2\x28"
23235 "\x69\x2F\xC4\xA8\x15\x15\x24\x3B"
23236 "\xCB\xF0\x14\xE4\x62\xC8\xF3\xD1"
23237 "\x03\x58\x1B\x33\x77\x74\x1F\xB4"
23238 "\x07\x86\xF2\x21\xB7\x41\xAE\xBF"
23239 "\x25\xC2\xFF\x51\xEF\xEA\xCE\xC4"
23240 "\x5F\xD9\xB8\x18\x6A\xF0\x0F\x0D"
23241 "\xF8\x04\xBB\x6D\x62\x33\x87\x26"
23242 "\x4F\x2F\x14\x6E\xDC\xDB\x66\x09"
23243 "\x2A\xEF\x7D\x84\x10\xAC\x82\x5E"
23244 "\xD2\xE4\xAD\x74\x7A\x6D\xCC\x3A"
23245 "\x7B\x62\xD8\xD6\x07\x2D\xF7\xDF"
23246 "\x9B\xB3\x82\xCF\x9C\x1D\x76\x5C"
23247 "\xAC\x7B\xD4\x9B\x45\xA1\x64\x11"
23248 "\x66\xF1\xA7\x0B\xF9\xDD\x00\xDD"
23249 "\xA4\x45\x3D\x3E\x03\xC9\x2E\xCB"
23250 "\xC3\x14\x84\x72\xFD\x41\xDC\xBD"
23251 "\x75\xBE\xA8\xE5\x16\x48\x64\x39"
23252 "\xCA\xF3\xE6\xDC\x25\x24\xF1\x6D"
23253 "\xB2\x8D\xC5\x38\x54\xD3\x5D\x6D"
23254 "\x0B\x29\x10\x15\x0E\x13\x3B\xAC"
23255 "\x7E\xCC\x9E\x3E\x18\x48\xA6\x02"
23256 "\xEF\x03\xB2\x2E\xE3\xD2\x70\x21"
23257 "\xB4\x19\x26\xBE\x3A\x3D\x05\xE0"
23258 "\xF8\x09\xAF\xE4\x31\x26\x92\x2F"
23259 "\x8F\x55\xAC\xED\x0B\xB2\xA5\x34"
23260 "\xBE\x50\xB1\x02\x22\x96\xE3\x40"
23261 "\x7B\x70\x50\x6E\x3B\xD5\xE5\xA0"
23262 "\x8E\xA2\xAD\x14\x60\x5C\x7A\x2B"
23263 "\x3D\x1B\x7F\xC1\xC0\x2C\x56\x36"
23264 "\xD2\x0A\x32\x06\x97\x34\xB9\xF4"
23265 "\x6F\x9F\x7E\x80\xD0\x9D\xF7\x6A"
23266 "\x21\xC1\xA2\x6A\xB1\x96\x5B\x4D"
23267 "\x7A\x15\x6C\xC4\x4E\xB8\xE0\x9E"
23268 "\x6C\x50\xF3\x9C\xC9\xB5\x23\xB7"
23269 "\xF1\xD4\x29\x4A\x23\xC4\xAD\x1E"
23270 "\x2C\x07\xD2\x43\x5F\x57\x93\xCA"
23271 "\x85\xF9\x9F\xAD\x4C\xF1\xE4\xB1"
23272 "\x1A\x8E\x28\xA4\xB6\x52\x77\x7E"
23273 "\x68\xC6\x47\xB9\x76\xCC\x65\x5F"
23274 "\x0B\xF9\x67\x93\xD8\x0E\x9A\x37"
23275 "\x5F\x41\xED\x64\x6C\xAD\x5F\xED"
23276 "\x3F\x8D\xFB\x8E\x1E\xA0\xE4\x1F"
23277 "\xC2\xC7\xED\x18\x43\xE1\x20\x86"
23278 "\x5D\xBC\x30\x70\x22\xA1\xDC\x53"
23279 "\x10\x3A\x8D\x47\x82\xCD\x7F\x59"
23280 "\x03\x2D\x6D\xF5\xE7\x79\xD4\x07"
23281 "\x68\x2A\xA5\x42\x19\x4D\xAF\xF5"
23282 "\xED\x47\x83\xBC\x5F\x62\x84\xDA"
23283 "\xDA\x41\xFF\xB0\x1D\x64\xA3\xC8"
23284 "\xBD\x4E\xE0\xB8\x7F\xEE\x55\x0A"
23285 "\x4E\x61\xB2\x51\xF6\x9C\x95\xF6"
23286 "\x92\xBB\xF6\xC5\xF0\x09\x86\xDE"
23287 "\x37\x9E\x29\xF9\x2A\x18\x73\x0D"
23288 "\xDC\x7E\x6B\x7B\x1B\x43\x8C\xEA"
23289 "\x13\xC8\x1A\x47\x0A\x2D\x6D\x56"
23290 "\xCD\xD2\xE7\x53\x1A\xAB\x1C\x3C"
23291 "\xC5\x9B\x03\x70\x29\x2A\x49\x09"
23292 "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71"
23293 "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36",
23294 .ilen = 1008,
22121 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" 23295 .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
22122 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" 23296 "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
22123 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" 23297 "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -22179,8 +23353,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = {
22179 "\x86\x1D\xB4\x28\xBF\x56\xED\x61" 23353 "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
22180 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" 23354 "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
22181 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" 23355 "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
22182 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", 23356 "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
22183 .rlen = 496, 23357 "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
23358 "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
23359 "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
23360 "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
23361 "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
23362 "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
23363 "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
23364 "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
23365 "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
23366 "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
23367 "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
23368 "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
23369 "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
23370 "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
23371 "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
23372 "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
23373 "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
23374 "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
23375 "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
23376 "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
23377 "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
23378 "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
23379 "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
23380 "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
23381 "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
23382 "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
23383 "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
23384 "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
23385 "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
23386 "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
23387 "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
23388 "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
23389 "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
23390 "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
23391 "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
23392 "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
23393 "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
23394 "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
23395 "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
23396 "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
23397 "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
23398 "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
23399 "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
23400 "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
23401 "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
23402 "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
23403 "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
23404 "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
23405 "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
23406 "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
23407 "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
23408 "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
23409 "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
23410 "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
23411 "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
23412 "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
23413 "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
23414 "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
23415 "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
23416 "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
23417 "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
23418 "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
23419 "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
23420 "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
23421 .rlen = 1008,
22184 }, 23422 },
22185}; 23423};
22186 23424
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index c5a0262251bc..2f9dbf7568fb 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -86,6 +86,18 @@ config HW_RANDOM_BCM63XX
86 86
87 If unusure, say Y. 87 If unusure, say Y.
88 88
89config HW_RANDOM_BCM2835
90 tristate "Broadcom BCM2835 Random Number Generator support"
91 depends on HW_RANDOM && ARCH_BCM2835
92 default HW_RANDOM
93 ---help---
94 This driver provides kernel-side support for the Random Number
95 Generator hardware found on the Broadcom BCM2835 SoCs.
96
97 To compile this driver as a module, choose M here: the
98 module will be called bcm2835-rng
99
100 If unsure, say Y.
89 101
90config HW_RANDOM_GEODE 102config HW_RANDOM_GEODE
91 tristate "AMD Geode HW Random Number Generator support" 103 tristate "AMD Geode HW Random Number Generator support"
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 1fd7eec9fbf6..bed467c9300e 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o
26obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o 26obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
27obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o 27obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
28obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o 28obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
29obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
new file mode 100644
index 000000000000..eb7f14725ebd
--- /dev/null
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -0,0 +1,113 @@
1/**
2 * Copyright (c) 2010-2012 Broadcom. All rights reserved.
3 * Copyright (c) 2013 Lubomir Rintel
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License ("GPL")
7 * version 2, as published by the Free Software Foundation.
8 */
9
10#include <linux/hw_random.h>
11#include <linux/init.h>
12#include <linux/io.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/of_address.h>
16#include <linux/of_platform.h>
17#include <linux/platform_device.h>
18#include <linux/printk.h>
19
20#define RNG_CTRL 0x0
21#define RNG_STATUS 0x4
22#define RNG_DATA 0x8
23
24/* enable rng */
25#define RNG_RBGEN 0x1
26
27/* the initial numbers generated are "less random" so will be discarded */
28#define RNG_WARMUP_COUNT 0x40000
29
30static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
31 bool wait)
32{
33 void __iomem *rng_base = (void __iomem *)rng->priv;
34
35 while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) {
36 if (!wait)
37 return 0;
38 cpu_relax();
39 }
40
41 *(u32 *)buf = __raw_readl(rng_base + RNG_DATA);
42 return sizeof(u32);
43}
44
45static struct hwrng bcm2835_rng_ops = {
46 .name = "bcm2835",
47 .read = bcm2835_rng_read,
48};
49
50static int bcm2835_rng_probe(struct platform_device *pdev)
51{
52 struct device *dev = &pdev->dev;
53 struct device_node *np = dev->of_node;
54 void __iomem *rng_base;
55 int err;
56
57 /* map peripheral */
58 rng_base = of_iomap(np, 0);
59 if (!rng_base) {
60 dev_err(dev, "failed to remap rng regs");
61 return -ENODEV;
62 }
63 bcm2835_rng_ops.priv = (unsigned long)rng_base;
64
65 /* register driver */
66 err = hwrng_register(&bcm2835_rng_ops);
67 if (err) {
68 dev_err(dev, "hwrng registration failed\n");
69 iounmap(rng_base);
70 } else {
71 dev_info(dev, "hwrng registered\n");
72
73 /* set warm-up count & enable */
74 __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
75 __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
76 }
77 return err;
78}
79
80static int bcm2835_rng_remove(struct platform_device *pdev)
81{
82 void __iomem *rng_base = (void __iomem *)bcm2835_rng_ops.priv;
83
84 /* disable rng hardware */
85 __raw_writel(0, rng_base + RNG_CTRL);
86
87 /* unregister driver */
88 hwrng_unregister(&bcm2835_rng_ops);
89 iounmap(rng_base);
90
91 return 0;
92}
93
94static const struct of_device_id bcm2835_rng_of_match[] = {
95 { .compatible = "brcm,bcm2835-rng", },
96 {},
97};
98MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
99
100static struct platform_driver bcm2835_rng_driver = {
101 .driver = {
102 .name = "bcm2835-rng",
103 .owner = THIS_MODULE,
104 .of_match_table = bcm2835_rng_of_match,
105 },
106 .probe = bcm2835_rng_probe,
107 .remove = bcm2835_rng_remove,
108};
109module_platform_driver(bcm2835_rng_driver);
110
111MODULE_AUTHOR("Lubomir Rintel <lkundrak@v3.sk>");
112MODULE_DESCRIPTION("BCM2835 Random Number Generator (RNG) driver");
113MODULE_LICENSE("GPLv2");
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
index ac47631ab34f..402ccfb625c5 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -144,6 +144,7 @@ static int exynos_rng_remove(struct platform_device *pdev)
144 return 0; 144 return 0;
145} 145}
146 146
147#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
147static int exynos_rng_runtime_suspend(struct device *dev) 148static int exynos_rng_runtime_suspend(struct device *dev)
148{ 149{
149 struct platform_device *pdev = to_platform_device(dev); 150 struct platform_device *pdev = to_platform_device(dev);
@@ -161,7 +162,7 @@ static int exynos_rng_runtime_resume(struct device *dev)
161 162
162 return clk_prepare_enable(exynos_rng->clk); 163 return clk_prepare_enable(exynos_rng->clk);
163} 164}
164 165#endif
165 166
166static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend, 167static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend,
167 exynos_rng_runtime_resume, NULL); 168 exynos_rng_runtime_resume, NULL);
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
index 895d0b8fb9ab..4ca35e8a5d8c 100644
--- a/drivers/char/hw_random/mxc-rnga.c
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -142,7 +142,7 @@ static void mxc_rnga_cleanup(struct hwrng *rng)
142static int __init mxc_rnga_probe(struct platform_device *pdev) 142static int __init mxc_rnga_probe(struct platform_device *pdev)
143{ 143{
144 int err = -ENODEV; 144 int err = -ENODEV;
145 struct resource *res, *mem; 145 struct resource *res;
146 struct mxc_rng *mxc_rng; 146 struct mxc_rng *mxc_rng;
147 147
148 mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng), 148 mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng),
@@ -172,15 +172,9 @@ static int __init mxc_rnga_probe(struct platform_device *pdev)
172 goto err_region; 172 goto err_region;
173 } 173 }
174 174
175 mem = request_mem_region(res->start, resource_size(res), pdev->name); 175 mxc_rng->mem = devm_ioremap_resource(&pdev->dev, res);
176 if (mem == NULL) { 176 if (IS_ERR(mxc_rng->mem)) {
177 err = -EBUSY; 177 err = PTR_ERR(mxc_rng->mem);
178 goto err_region;
179 }
180
181 mxc_rng->mem = ioremap(res->start, resource_size(res));
182 if (!mxc_rng->mem) {
183 err = -ENOMEM;
184 goto err_ioremap; 178 goto err_ioremap;
185 } 179 }
186 180
@@ -195,8 +189,6 @@ static int __init mxc_rnga_probe(struct platform_device *pdev)
195 return 0; 189 return 0;
196 190
197err_ioremap: 191err_ioremap:
198 release_mem_region(res->start, resource_size(res));
199
200err_region: 192err_region:
201 clk_disable_unprepare(mxc_rng->clk); 193 clk_disable_unprepare(mxc_rng->clk);
202 194
@@ -206,15 +198,10 @@ out:
206 198
207static int __exit mxc_rnga_remove(struct platform_device *pdev) 199static int __exit mxc_rnga_remove(struct platform_device *pdev)
208{ 200{
209 struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
210 struct mxc_rng *mxc_rng = platform_get_drvdata(pdev); 201 struct mxc_rng *mxc_rng = platform_get_drvdata(pdev);
211 202
212 hwrng_unregister(&mxc_rng->rng); 203 hwrng_unregister(&mxc_rng->rng);
213 204
214 iounmap(mxc_rng->mem);
215
216 release_mem_region(res->start, resource_size(res));
217
218 clk_disable_unprepare(mxc_rng->clk); 205 clk_disable_unprepare(mxc_rng->clk);
219 206
220 return 0; 207 return 0;
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 849db199c02c..3e75737f5fe1 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -23,127 +23,209 @@
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/platform_device.h> 25#include <linux/platform_device.h>
26#include <linux/of.h>
26#include <linux/hw_random.h> 27#include <linux/hw_random.h>
27#include <linux/io.h> 28#include <linux/io.h>
29#include <linux/slab.h>
28#include <linux/timeriomem-rng.h> 30#include <linux/timeriomem-rng.h>
29#include <linux/jiffies.h> 31#include <linux/jiffies.h>
30#include <linux/sched.h> 32#include <linux/sched.h>
31#include <linux/timer.h> 33#include <linux/timer.h>
32#include <linux/completion.h> 34#include <linux/completion.h>
33 35
34static struct timeriomem_rng_data *timeriomem_rng_data; 36struct timeriomem_rng_private_data {
37 void __iomem *io_base;
38 unsigned int expires;
39 unsigned int period;
40 unsigned int present:1;
35 41
36static void timeriomem_rng_trigger(unsigned long); 42 struct timer_list timer;
37static DEFINE_TIMER(timeriomem_rng_timer, timeriomem_rng_trigger, 0, 0); 43 struct completion completion;
44
45 struct hwrng timeriomem_rng_ops;
46};
47
48#define to_rng_priv(rng) \
49 ((struct timeriomem_rng_private_data *)rng->priv)
38 50
39/* 51/*
40 * have data return 1, however return 0 if we have nothing 52 * have data return 1, however return 0 if we have nothing
41 */ 53 */
42static int timeriomem_rng_data_present(struct hwrng *rng, int wait) 54static int timeriomem_rng_data_present(struct hwrng *rng, int wait)
43{ 55{
44 if (rng->priv == 0) 56 struct timeriomem_rng_private_data *priv = to_rng_priv(rng);
45 return 1;
46 57
47 if (!wait || timeriomem_rng_data->present) 58 if (!wait || priv->present)
48 return timeriomem_rng_data->present; 59 return priv->present;
49 60
50 wait_for_completion(&timeriomem_rng_data->completion); 61 wait_for_completion(&priv->completion);
51 62
52 return 1; 63 return 1;
53} 64}
54 65
55static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data) 66static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data)
56{ 67{
68 struct timeriomem_rng_private_data *priv = to_rng_priv(rng);
57 unsigned long cur; 69 unsigned long cur;
58 s32 delay; 70 s32 delay;
59 71
60 *data = readl(timeriomem_rng_data->address); 72 *data = readl(priv->io_base);
61 73
62 if (rng->priv != 0) { 74 cur = jiffies;
63 cur = jiffies;
64 75
65 delay = cur - timeriomem_rng_timer.expires; 76 delay = cur - priv->expires;
66 delay = rng->priv - (delay % rng->priv); 77 delay = priv->period - (delay % priv->period);
67 78
68 timeriomem_rng_timer.expires = cur + delay; 79 priv->expires = cur + delay;
69 timeriomem_rng_data->present = 0; 80 priv->present = 0;
70 81
71 init_completion(&timeriomem_rng_data->completion); 82 INIT_COMPLETION(priv->completion);
72 add_timer(&timeriomem_rng_timer); 83 mod_timer(&priv->timer, priv->expires);
73 }
74 84
75 return 4; 85 return 4;
76} 86}
77 87
78static void timeriomem_rng_trigger(unsigned long dummy) 88static void timeriomem_rng_trigger(unsigned long data)
79{ 89{
80 timeriomem_rng_data->present = 1; 90 struct timeriomem_rng_private_data *priv
81 complete(&timeriomem_rng_data->completion); 91 = (struct timeriomem_rng_private_data *)data;
82}
83 92
84static struct hwrng timeriomem_rng_ops = { 93 priv->present = 1;
85 .name = "timeriomem", 94 complete(&priv->completion);
86 .data_present = timeriomem_rng_data_present, 95}
87 .data_read = timeriomem_rng_data_read,
88 .priv = 0,
89};
90 96
91static int timeriomem_rng_probe(struct platform_device *pdev) 97static int timeriomem_rng_probe(struct platform_device *pdev)
92{ 98{
99 struct timeriomem_rng_data *pdata = pdev->dev.platform_data;
100 struct timeriomem_rng_private_data *priv;
93 struct resource *res; 101 struct resource *res;
94 int ret; 102 int err = 0;
103 int period;
95 104
96 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 105 if (!pdev->dev.of_node && !pdata) {
106 dev_err(&pdev->dev, "timeriomem_rng_data is missing\n");
107 return -EINVAL;
108 }
97 109
110 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
98 if (!res) 111 if (!res)
99 return -ENOENT; 112 return -ENXIO;
100 113
101 timeriomem_rng_data = pdev->dev.platform_data; 114 if (res->start % 4 != 0 || resource_size(res) != 4) {
115 dev_err(&pdev->dev,
116 "address must be four bytes wide and aligned\n");
117 return -EINVAL;
118 }
102 119
103 timeriomem_rng_data->address = ioremap(res->start, resource_size(res)); 120 /* Allocate memory for the device structure (and zero it) */
104 if (!timeriomem_rng_data->address) 121 priv = kzalloc(sizeof(struct timeriomem_rng_private_data), GFP_KERNEL);
105 return -EIO; 122 if (!priv) {
123 dev_err(&pdev->dev, "failed to allocate device structure.\n");
124 return -ENOMEM;
125 }
126
127 platform_set_drvdata(pdev, priv);
128
129 if (pdev->dev.of_node) {
130 int i;
131
132 if (!of_property_read_u32(pdev->dev.of_node,
133 "period", &i))
134 period = i;
135 else {
136 dev_err(&pdev->dev, "missing period\n");
137 err = -EINVAL;
138 goto out_free;
139 }
140 } else
141 period = pdata->period;
142
143 priv->period = usecs_to_jiffies(period);
144 if (priv->period < 1) {
145 dev_err(&pdev->dev, "period is less than one jiffy\n");
146 err = -EINVAL;
147 goto out_free;
148 }
106 149
107 if (timeriomem_rng_data->period != 0 150 priv->expires = jiffies;
108 && usecs_to_jiffies(timeriomem_rng_data->period) > 0) { 151 priv->present = 1;
109 timeriomem_rng_timer.expires = jiffies;
110 152
111 timeriomem_rng_ops.priv = usecs_to_jiffies( 153 init_completion(&priv->completion);
112 timeriomem_rng_data->period); 154 complete(&priv->completion);
155
156 setup_timer(&priv->timer, timeriomem_rng_trigger, (unsigned long)priv);
157
158 priv->timeriomem_rng_ops.name = dev_name(&pdev->dev);
159 priv->timeriomem_rng_ops.data_present = timeriomem_rng_data_present;
160 priv->timeriomem_rng_ops.data_read = timeriomem_rng_data_read;
161 priv->timeriomem_rng_ops.priv = (unsigned long)priv;
162
163 if (!request_mem_region(res->start, resource_size(res),
164 dev_name(&pdev->dev))) {
165 dev_err(&pdev->dev, "request_mem_region failed\n");
166 err = -EBUSY;
167 goto out_timer;
113 } 168 }
114 timeriomem_rng_data->present = 1;
115 169
116 ret = hwrng_register(&timeriomem_rng_ops); 170 priv->io_base = ioremap(res->start, resource_size(res));
117 if (ret) 171 if (priv->io_base == NULL) {
118 goto failed; 172 dev_err(&pdev->dev, "ioremap failed\n");
173 err = -EIO;
174 goto out_release_io;
175 }
176
177 err = hwrng_register(&priv->timeriomem_rng_ops);
178 if (err) {
179 dev_err(&pdev->dev, "problem registering\n");
180 goto out;
181 }
119 182
120 dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", 183 dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
121 timeriomem_rng_data->address, 184 priv->io_base, period);
122 timeriomem_rng_data->period);
123 185
124 return 0; 186 return 0;
125 187
126failed: 188out:
127 dev_err(&pdev->dev, "problem registering\n"); 189 iounmap(priv->io_base);
128 iounmap(timeriomem_rng_data->address); 190out_release_io:
129 191 release_mem_region(res->start, resource_size(res));
130 return ret; 192out_timer:
193 del_timer_sync(&priv->timer);
194out_free:
195 platform_set_drvdata(pdev, NULL);
196 kfree(priv);
197 return err;
131} 198}
132 199
133static int timeriomem_rng_remove(struct platform_device *pdev) 200static int timeriomem_rng_remove(struct platform_device *pdev)
134{ 201{
135 del_timer_sync(&timeriomem_rng_timer); 202 struct timeriomem_rng_private_data *priv = platform_get_drvdata(pdev);
136 hwrng_unregister(&timeriomem_rng_ops); 203 struct resource *res;
204
205 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
137 206
138 iounmap(timeriomem_rng_data->address); 207 hwrng_unregister(&priv->timeriomem_rng_ops);
208
209 del_timer_sync(&priv->timer);
210 iounmap(priv->io_base);
211 release_mem_region(res->start, resource_size(res));
212 platform_set_drvdata(pdev, NULL);
213 kfree(priv);
139 214
140 return 0; 215 return 0;
141} 216}
142 217
218static const struct of_device_id timeriomem_rng_match[] = {
219 { .compatible = "timeriomem_rng" },
220 {},
221};
222MODULE_DEVICE_TABLE(of, timeriomem_rng_match);
223
143static struct platform_driver timeriomem_rng_driver = { 224static struct platform_driver timeriomem_rng_driver = {
144 .driver = { 225 .driver = {
145 .name = "timeriomem_rng", 226 .name = "timeriomem_rng",
146 .owner = THIS_MODULE, 227 .owner = THIS_MODULE,
228 .of_match_table = timeriomem_rng_match,
147 }, 229 },
148 .probe = timeriomem_rng_probe, 230 .probe = timeriomem_rng_probe,
149 .remove = timeriomem_rng_remove, 231 .remove = timeriomem_rng_remove,
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 87ec4d027c25..dffb85525368 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -276,6 +276,16 @@ config CRYPTO_DEV_PICOXCELL
276 276
277 Saying m here will build a module named pipcoxcell_crypto. 277 Saying m here will build a module named pipcoxcell_crypto.
278 278
279config CRYPTO_DEV_SAHARA
280 tristate "Support for SAHARA crypto accelerator"
281 depends on ARCH_MXC && EXPERIMENTAL && OF
282 select CRYPTO_BLKCIPHER
283 select CRYPTO_AES
284 select CRYPTO_ECB
285 help
286 This option enables support for the SAHARA HW crypto accelerator
287 found in some Freescale i.MX chips.
288
279config CRYPTO_DEV_S5P 289config CRYPTO_DEV_S5P
280 tristate "Support for Samsung S5PV210 crypto accelerator" 290 tristate "Support for Samsung S5PV210 crypto accelerator"
281 depends on ARCH_S5PV210 291 depends on ARCH_S5PV210
@@ -361,15 +371,17 @@ config CRYPTO_DEV_ATMEL_TDES
361 will be called atmel-tdes. 371 will be called atmel-tdes.
362 372
363config CRYPTO_DEV_ATMEL_SHA 373config CRYPTO_DEV_ATMEL_SHA
364 tristate "Support for Atmel SHA1/SHA256 hw accelerator" 374 tristate "Support for Atmel SHA hw accelerator"
365 depends on ARCH_AT91 375 depends on ARCH_AT91
366 select CRYPTO_SHA1 376 select CRYPTO_SHA1
367 select CRYPTO_SHA256 377 select CRYPTO_SHA256
378 select CRYPTO_SHA512
368 select CRYPTO_ALGAPI 379 select CRYPTO_ALGAPI
369 help 380 help
370 Some Atmel processors have SHA1/SHA256 hw accelerator. 381 Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512
382 hw accelerator.
371 Select this if you want to use the Atmel module for 383 Select this if you want to use the Atmel module for
372 SHA1/SHA256 algorithms. 384 SHA1/SHA224/SHA256/SHA384/SHA512 algorithms.
373 385
374 To compile this driver as a module, choose M here: the module 386 To compile this driver as a module, choose M here: the module
375 will be called atmel-sha. 387 will be called atmel-sha.
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 880a47b0b023..38ce13d3b79b 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
12obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o 12obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
13obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o 13obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
14obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o 14obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
15obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
15obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o 16obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
16obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o 17obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
17obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ 18obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6f22ba51f969..c1efd910d97b 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -38,7 +38,7 @@
38#include <crypto/aes.h> 38#include <crypto/aes.h>
39#include <crypto/hash.h> 39#include <crypto/hash.h>
40#include <crypto/internal/hash.h> 40#include <crypto/internal/hash.h>
41#include <linux/platform_data/atmel-aes.h> 41#include <linux/platform_data/crypto-atmel.h>
42#include "atmel-aes-regs.h" 42#include "atmel-aes-regs.h"
43 43
44#define CFB8_BLOCK_SIZE 1 44#define CFB8_BLOCK_SIZE 1
@@ -47,7 +47,7 @@
47#define CFB64_BLOCK_SIZE 8 47#define CFB64_BLOCK_SIZE 8
48 48
49/* AES flags */ 49/* AES flags */
50#define AES_FLAGS_MODE_MASK 0x01ff 50#define AES_FLAGS_MODE_MASK 0x03ff
51#define AES_FLAGS_ENCRYPT BIT(0) 51#define AES_FLAGS_ENCRYPT BIT(0)
52#define AES_FLAGS_CBC BIT(1) 52#define AES_FLAGS_CBC BIT(1)
53#define AES_FLAGS_CFB BIT(2) 53#define AES_FLAGS_CFB BIT(2)
@@ -55,21 +55,26 @@
55#define AES_FLAGS_CFB16 BIT(4) 55#define AES_FLAGS_CFB16 BIT(4)
56#define AES_FLAGS_CFB32 BIT(5) 56#define AES_FLAGS_CFB32 BIT(5)
57#define AES_FLAGS_CFB64 BIT(6) 57#define AES_FLAGS_CFB64 BIT(6)
58#define AES_FLAGS_OFB BIT(7) 58#define AES_FLAGS_CFB128 BIT(7)
59#define AES_FLAGS_CTR BIT(8) 59#define AES_FLAGS_OFB BIT(8)
60#define AES_FLAGS_CTR BIT(9)
60 61
61#define AES_FLAGS_INIT BIT(16) 62#define AES_FLAGS_INIT BIT(16)
62#define AES_FLAGS_DMA BIT(17) 63#define AES_FLAGS_DMA BIT(17)
63#define AES_FLAGS_BUSY BIT(18) 64#define AES_FLAGS_BUSY BIT(18)
65#define AES_FLAGS_FAST BIT(19)
64 66
65#define AES_FLAGS_DUALBUFF BIT(24) 67#define ATMEL_AES_QUEUE_LENGTH 50
66
67#define ATMEL_AES_QUEUE_LENGTH 1
68#define ATMEL_AES_CACHE_SIZE 0
69 68
70#define ATMEL_AES_DMA_THRESHOLD 16 69#define ATMEL_AES_DMA_THRESHOLD 16
71 70
72 71
72struct atmel_aes_caps {
73 bool has_dualbuff;
74 bool has_cfb64;
75 u32 max_burst_size;
76};
77
73struct atmel_aes_dev; 78struct atmel_aes_dev;
74 79
75struct atmel_aes_ctx { 80struct atmel_aes_ctx {
@@ -77,6 +82,8 @@ struct atmel_aes_ctx {
77 82
78 int keylen; 83 int keylen;
79 u32 key[AES_KEYSIZE_256 / sizeof(u32)]; 84 u32 key[AES_KEYSIZE_256 / sizeof(u32)];
85
86 u16 block_size;
80}; 87};
81 88
82struct atmel_aes_reqctx { 89struct atmel_aes_reqctx {
@@ -112,20 +119,27 @@ struct atmel_aes_dev {
112 119
113 struct scatterlist *in_sg; 120 struct scatterlist *in_sg;
114 unsigned int nb_in_sg; 121 unsigned int nb_in_sg;
115 122 size_t in_offset;
116 struct scatterlist *out_sg; 123 struct scatterlist *out_sg;
117 unsigned int nb_out_sg; 124 unsigned int nb_out_sg;
125 size_t out_offset;
118 126
119 size_t bufcnt; 127 size_t bufcnt;
128 size_t buflen;
129 size_t dma_size;
120 130
121 u8 buf_in[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); 131 void *buf_in;
122 int dma_in; 132 int dma_in;
133 dma_addr_t dma_addr_in;
123 struct atmel_aes_dma dma_lch_in; 134 struct atmel_aes_dma dma_lch_in;
124 135
125 u8 buf_out[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); 136 void *buf_out;
126 int dma_out; 137 int dma_out;
138 dma_addr_t dma_addr_out;
127 struct atmel_aes_dma dma_lch_out; 139 struct atmel_aes_dma dma_lch_out;
128 140
141 struct atmel_aes_caps caps;
142
129 u32 hw_version; 143 u32 hw_version;
130}; 144};
131 145
@@ -165,6 +179,37 @@ static int atmel_aes_sg_length(struct ablkcipher_request *req,
165 return sg_nb; 179 return sg_nb;
166} 180}
167 181
182static int atmel_aes_sg_copy(struct scatterlist **sg, size_t *offset,
183 void *buf, size_t buflen, size_t total, int out)
184{
185 unsigned int count, off = 0;
186
187 while (buflen && total) {
188 count = min((*sg)->length - *offset, total);
189 count = min(count, buflen);
190
191 if (!count)
192 return off;
193
194 scatterwalk_map_and_copy(buf + off, *sg, *offset, count, out);
195
196 off += count;
197 buflen -= count;
198 *offset += count;
199 total -= count;
200
201 if (*offset == (*sg)->length) {
202 *sg = sg_next(*sg);
203 if (*sg)
204 *offset = 0;
205 else
206 total = 0;
207 }
208 }
209
210 return off;
211}
212
168static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset) 213static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset)
169{ 214{
170 return readl_relaxed(dd->io_base + offset); 215 return readl_relaxed(dd->io_base + offset);
@@ -190,14 +235,6 @@ static void atmel_aes_write_n(struct atmel_aes_dev *dd, u32 offset,
190 atmel_aes_write(dd, offset, *value); 235 atmel_aes_write(dd, offset, *value);
191} 236}
192 237
193static void atmel_aes_dualbuff_test(struct atmel_aes_dev *dd)
194{
195 atmel_aes_write(dd, AES_MR, AES_MR_DUALBUFF);
196
197 if (atmel_aes_read(dd, AES_MR) & AES_MR_DUALBUFF)
198 dd->flags |= AES_FLAGS_DUALBUFF;
199}
200
201static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) 238static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx)
202{ 239{
203 struct atmel_aes_dev *aes_dd = NULL; 240 struct atmel_aes_dev *aes_dd = NULL;
@@ -225,7 +262,7 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
225 262
226 if (!(dd->flags & AES_FLAGS_INIT)) { 263 if (!(dd->flags & AES_FLAGS_INIT)) {
227 atmel_aes_write(dd, AES_CR, AES_CR_SWRST); 264 atmel_aes_write(dd, AES_CR, AES_CR_SWRST);
228 atmel_aes_dualbuff_test(dd); 265 atmel_aes_write(dd, AES_MR, 0xE << AES_MR_CKEY_OFFSET);
229 dd->flags |= AES_FLAGS_INIT; 266 dd->flags |= AES_FLAGS_INIT;
230 dd->err = 0; 267 dd->err = 0;
231 } 268 }
@@ -233,11 +270,19 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd)
233 return 0; 270 return 0;
234} 271}
235 272
273static inline unsigned int atmel_aes_get_version(struct atmel_aes_dev *dd)
274{
275 return atmel_aes_read(dd, AES_HW_VERSION) & 0x00000fff;
276}
277
236static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd) 278static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd)
237{ 279{
238 atmel_aes_hw_init(dd); 280 atmel_aes_hw_init(dd);
239 281
240 dd->hw_version = atmel_aes_read(dd, AES_HW_VERSION); 282 dd->hw_version = atmel_aes_get_version(dd);
283
284 dev_info(dd->dev,
285 "version: 0x%x\n", dd->hw_version);
241 286
242 clk_disable_unprepare(dd->iclk); 287 clk_disable_unprepare(dd->iclk);
243} 288}
@@ -260,50 +305,77 @@ static void atmel_aes_dma_callback(void *data)
260 tasklet_schedule(&dd->done_task); 305 tasklet_schedule(&dd->done_task);
261} 306}
262 307
263static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd) 308static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd,
309 dma_addr_t dma_addr_in, dma_addr_t dma_addr_out, int length)
264{ 310{
311 struct scatterlist sg[2];
265 struct dma_async_tx_descriptor *in_desc, *out_desc; 312 struct dma_async_tx_descriptor *in_desc, *out_desc;
266 int nb_dma_sg_in, nb_dma_sg_out;
267 313
268 dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); 314 dd->dma_size = length;
269 if (!dd->nb_in_sg)
270 goto exit_err;
271 315
272 nb_dma_sg_in = dma_map_sg(dd->dev, dd->in_sg, dd->nb_in_sg, 316 if (!(dd->flags & AES_FLAGS_FAST)) {
273 DMA_TO_DEVICE); 317 dma_sync_single_for_device(dd->dev, dma_addr_in, length,
274 if (!nb_dma_sg_in) 318 DMA_TO_DEVICE);
275 goto exit_err; 319 }
276 320
277 in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, dd->in_sg, 321 if (dd->flags & AES_FLAGS_CFB8) {
278 nb_dma_sg_in, DMA_MEM_TO_DEV, 322 dd->dma_lch_in.dma_conf.dst_addr_width =
279 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 323 DMA_SLAVE_BUSWIDTH_1_BYTE;
324 dd->dma_lch_out.dma_conf.src_addr_width =
325 DMA_SLAVE_BUSWIDTH_1_BYTE;
326 } else if (dd->flags & AES_FLAGS_CFB16) {
327 dd->dma_lch_in.dma_conf.dst_addr_width =
328 DMA_SLAVE_BUSWIDTH_2_BYTES;
329 dd->dma_lch_out.dma_conf.src_addr_width =
330 DMA_SLAVE_BUSWIDTH_2_BYTES;
331 } else {
332 dd->dma_lch_in.dma_conf.dst_addr_width =
333 DMA_SLAVE_BUSWIDTH_4_BYTES;
334 dd->dma_lch_out.dma_conf.src_addr_width =
335 DMA_SLAVE_BUSWIDTH_4_BYTES;
336 }
280 337
281 if (!in_desc) 338 if (dd->flags & (AES_FLAGS_CFB8 | AES_FLAGS_CFB16 |
282 goto unmap_in; 339 AES_FLAGS_CFB32 | AES_FLAGS_CFB64)) {
340 dd->dma_lch_in.dma_conf.src_maxburst = 1;
341 dd->dma_lch_in.dma_conf.dst_maxburst = 1;
342 dd->dma_lch_out.dma_conf.src_maxburst = 1;
343 dd->dma_lch_out.dma_conf.dst_maxburst = 1;
344 } else {
345 dd->dma_lch_in.dma_conf.src_maxburst = dd->caps.max_burst_size;
346 dd->dma_lch_in.dma_conf.dst_maxburst = dd->caps.max_burst_size;
347 dd->dma_lch_out.dma_conf.src_maxburst = dd->caps.max_burst_size;
348 dd->dma_lch_out.dma_conf.dst_maxburst = dd->caps.max_burst_size;
349 }
283 350
284 /* callback not needed */ 351 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
352 dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf);
285 353
286 dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg); 354 dd->flags |= AES_FLAGS_DMA;
287 if (!dd->nb_out_sg)
288 goto unmap_in;
289 355
290 nb_dma_sg_out = dma_map_sg(dd->dev, dd->out_sg, dd->nb_out_sg, 356 sg_init_table(&sg[0], 1);
291 DMA_FROM_DEVICE); 357 sg_dma_address(&sg[0]) = dma_addr_in;
292 if (!nb_dma_sg_out) 358 sg_dma_len(&sg[0]) = length;
293 goto unmap_out;
294 359
295 out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, dd->out_sg, 360 sg_init_table(&sg[1], 1);
296 nb_dma_sg_out, DMA_DEV_TO_MEM, 361 sg_dma_address(&sg[1]) = dma_addr_out;
297 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 362 sg_dma_len(&sg[1]) = length;
363
364 in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, &sg[0],
365 1, DMA_MEM_TO_DEV,
366 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
367 if (!in_desc)
368 return -EINVAL;
298 369
370 out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, &sg[1],
371 1, DMA_DEV_TO_MEM,
372 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
299 if (!out_desc) 373 if (!out_desc)
300 goto unmap_out; 374 return -EINVAL;
301 375
302 out_desc->callback = atmel_aes_dma_callback; 376 out_desc->callback = atmel_aes_dma_callback;
303 out_desc->callback_param = dd; 377 out_desc->callback_param = dd;
304 378
305 dd->total -= dd->req->nbytes;
306
307 dmaengine_submit(out_desc); 379 dmaengine_submit(out_desc);
308 dma_async_issue_pending(dd->dma_lch_out.chan); 380 dma_async_issue_pending(dd->dma_lch_out.chan);
309 381
@@ -311,15 +383,6 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd)
311 dma_async_issue_pending(dd->dma_lch_in.chan); 383 dma_async_issue_pending(dd->dma_lch_in.chan);
312 384
313 return 0; 385 return 0;
314
315unmap_out:
316 dma_unmap_sg(dd->dev, dd->out_sg, dd->nb_out_sg,
317 DMA_FROM_DEVICE);
318unmap_in:
319 dma_unmap_sg(dd->dev, dd->in_sg, dd->nb_in_sg,
320 DMA_TO_DEVICE);
321exit_err:
322 return -EINVAL;
323} 386}
324 387
325static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) 388static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
@@ -352,30 +415,66 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
352 415
353static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) 416static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd)
354{ 417{
355 int err; 418 int err, fast = 0, in, out;
419 size_t count;
420 dma_addr_t addr_in, addr_out;
421
422 if ((!dd->in_offset) && (!dd->out_offset)) {
423 /* check for alignment */
424 in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)) &&
425 IS_ALIGNED(dd->in_sg->length, dd->ctx->block_size);
426 out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)) &&
427 IS_ALIGNED(dd->out_sg->length, dd->ctx->block_size);
428 fast = in && out;
429
430 if (sg_dma_len(dd->in_sg) != sg_dma_len(dd->out_sg))
431 fast = 0;
432 }
433
434
435 if (fast) {
436 count = min(dd->total, sg_dma_len(dd->in_sg));
437 count = min(count, sg_dma_len(dd->out_sg));
438
439 err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
440 if (!err) {
441 dev_err(dd->dev, "dma_map_sg() error\n");
442 return -EINVAL;
443 }
444
445 err = dma_map_sg(dd->dev, dd->out_sg, 1,
446 DMA_FROM_DEVICE);
447 if (!err) {
448 dev_err(dd->dev, "dma_map_sg() error\n");
449 dma_unmap_sg(dd->dev, dd->in_sg, 1,
450 DMA_TO_DEVICE);
451 return -EINVAL;
452 }
453
454 addr_in = sg_dma_address(dd->in_sg);
455 addr_out = sg_dma_address(dd->out_sg);
456
457 dd->flags |= AES_FLAGS_FAST;
356 458
357 if (dd->flags & AES_FLAGS_CFB8) {
358 dd->dma_lch_in.dma_conf.dst_addr_width =
359 DMA_SLAVE_BUSWIDTH_1_BYTE;
360 dd->dma_lch_out.dma_conf.src_addr_width =
361 DMA_SLAVE_BUSWIDTH_1_BYTE;
362 } else if (dd->flags & AES_FLAGS_CFB16) {
363 dd->dma_lch_in.dma_conf.dst_addr_width =
364 DMA_SLAVE_BUSWIDTH_2_BYTES;
365 dd->dma_lch_out.dma_conf.src_addr_width =
366 DMA_SLAVE_BUSWIDTH_2_BYTES;
367 } else { 459 } else {
368 dd->dma_lch_in.dma_conf.dst_addr_width = 460 /* use cache buffers */
369 DMA_SLAVE_BUSWIDTH_4_BYTES; 461 count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset,
370 dd->dma_lch_out.dma_conf.src_addr_width = 462 dd->buf_in, dd->buflen, dd->total, 0);
371 DMA_SLAVE_BUSWIDTH_4_BYTES; 463
464 addr_in = dd->dma_addr_in;
465 addr_out = dd->dma_addr_out;
466
467 dd->flags &= ~AES_FLAGS_FAST;
372 } 468 }
373 469
374 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); 470 dd->total -= count;
375 dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf);
376 471
377 dd->flags |= AES_FLAGS_DMA; 472 err = atmel_aes_crypt_dma(dd, addr_in, addr_out, count);
378 err = atmel_aes_crypt_dma(dd); 473
474 if (err && (dd->flags & AES_FLAGS_FAST)) {
475 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
476 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
477 }
379 478
380 return err; 479 return err;
381} 480}
@@ -410,6 +509,8 @@ static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd)
410 valmr |= AES_MR_CFBS_32b; 509 valmr |= AES_MR_CFBS_32b;
411 else if (dd->flags & AES_FLAGS_CFB64) 510 else if (dd->flags & AES_FLAGS_CFB64)
412 valmr |= AES_MR_CFBS_64b; 511 valmr |= AES_MR_CFBS_64b;
512 else if (dd->flags & AES_FLAGS_CFB128)
513 valmr |= AES_MR_CFBS_128b;
413 } else if (dd->flags & AES_FLAGS_OFB) { 514 } else if (dd->flags & AES_FLAGS_OFB) {
414 valmr |= AES_MR_OPMOD_OFB; 515 valmr |= AES_MR_OPMOD_OFB;
415 } else if (dd->flags & AES_FLAGS_CTR) { 516 } else if (dd->flags & AES_FLAGS_CTR) {
@@ -423,7 +524,7 @@ static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd)
423 524
424 if (dd->total > ATMEL_AES_DMA_THRESHOLD) { 525 if (dd->total > ATMEL_AES_DMA_THRESHOLD) {
425 valmr |= AES_MR_SMOD_IDATAR0; 526 valmr |= AES_MR_SMOD_IDATAR0;
426 if (dd->flags & AES_FLAGS_DUALBUFF) 527 if (dd->caps.has_dualbuff)
427 valmr |= AES_MR_DUALBUFF; 528 valmr |= AES_MR_DUALBUFF;
428 } else { 529 } else {
429 valmr |= AES_MR_SMOD_AUTO; 530 valmr |= AES_MR_SMOD_AUTO;
@@ -477,7 +578,9 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
477 /* assign new request to device */ 578 /* assign new request to device */
478 dd->req = req; 579 dd->req = req;
479 dd->total = req->nbytes; 580 dd->total = req->nbytes;
581 dd->in_offset = 0;
480 dd->in_sg = req->src; 582 dd->in_sg = req->src;
583 dd->out_offset = 0;
481 dd->out_sg = req->dst; 584 dd->out_sg = req->dst;
482 585
483 rctx = ablkcipher_request_ctx(req); 586 rctx = ablkcipher_request_ctx(req);
@@ -506,18 +609,86 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd,
506static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) 609static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd)
507{ 610{
508 int err = -EINVAL; 611 int err = -EINVAL;
612 size_t count;
509 613
510 if (dd->flags & AES_FLAGS_DMA) { 614 if (dd->flags & AES_FLAGS_DMA) {
511 dma_unmap_sg(dd->dev, dd->out_sg,
512 dd->nb_out_sg, DMA_FROM_DEVICE);
513 dma_unmap_sg(dd->dev, dd->in_sg,
514 dd->nb_in_sg, DMA_TO_DEVICE);
515 err = 0; 615 err = 0;
616 if (dd->flags & AES_FLAGS_FAST) {
617 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
618 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
619 } else {
620 dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
621 dd->dma_size, DMA_FROM_DEVICE);
622
623 /* copy data */
624 count = atmel_aes_sg_copy(&dd->out_sg, &dd->out_offset,
625 dd->buf_out, dd->buflen, dd->dma_size, 1);
626 if (count != dd->dma_size) {
627 err = -EINVAL;
628 pr_err("not all data converted: %u\n", count);
629 }
630 }
516 } 631 }
517 632
518 return err; 633 return err;
519} 634}
520 635
636
637static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
638{
639 int err = -ENOMEM;
640
641 dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, 0);
642 dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, 0);
643 dd->buflen = PAGE_SIZE;
644 dd->buflen &= ~(AES_BLOCK_SIZE - 1);
645
646 if (!dd->buf_in || !dd->buf_out) {
647 dev_err(dd->dev, "unable to alloc pages.\n");
648 goto err_alloc;
649 }
650
651 /* MAP here */
652 dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in,
653 dd->buflen, DMA_TO_DEVICE);
654 if (dma_mapping_error(dd->dev, dd->dma_addr_in)) {
655 dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
656 err = -EINVAL;
657 goto err_map_in;
658 }
659
660 dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out,
661 dd->buflen, DMA_FROM_DEVICE);
662 if (dma_mapping_error(dd->dev, dd->dma_addr_out)) {
663 dev_err(dd->dev, "dma %d bytes error\n", dd->buflen);
664 err = -EINVAL;
665 goto err_map_out;
666 }
667
668 return 0;
669
670err_map_out:
671 dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen,
672 DMA_TO_DEVICE);
673err_map_in:
674 free_page((unsigned long)dd->buf_out);
675 free_page((unsigned long)dd->buf_in);
676err_alloc:
677 if (err)
678 pr_err("error: %d\n", err);
679 return err;
680}
681
682static void atmel_aes_buff_cleanup(struct atmel_aes_dev *dd)
683{
684 dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
685 DMA_FROM_DEVICE);
686 dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen,
687 DMA_TO_DEVICE);
688 free_page((unsigned long)dd->buf_out);
689 free_page((unsigned long)dd->buf_in);
690}
691
521static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode) 692static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
522{ 693{
523 struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx( 694 struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx(
@@ -525,9 +696,30 @@ static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
525 struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req); 696 struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
526 struct atmel_aes_dev *dd; 697 struct atmel_aes_dev *dd;
527 698
528 if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { 699 if (mode & AES_FLAGS_CFB8) {
529 pr_err("request size is not exact amount of AES blocks\n"); 700 if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) {
530 return -EINVAL; 701 pr_err("request size is not exact amount of CFB8 blocks\n");
702 return -EINVAL;
703 }
704 ctx->block_size = CFB8_BLOCK_SIZE;
705 } else if (mode & AES_FLAGS_CFB16) {
706 if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) {
707 pr_err("request size is not exact amount of CFB16 blocks\n");
708 return -EINVAL;
709 }
710 ctx->block_size = CFB16_BLOCK_SIZE;
711 } else if (mode & AES_FLAGS_CFB32) {
712 if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) {
713 pr_err("request size is not exact amount of CFB32 blocks\n");
714 return -EINVAL;
715 }
716 ctx->block_size = CFB32_BLOCK_SIZE;
717 } else {
718 if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
719 pr_err("request size is not exact amount of AES blocks\n");
720 return -EINVAL;
721 }
722 ctx->block_size = AES_BLOCK_SIZE;
531 } 723 }
532 724
533 dd = atmel_aes_find_dev(ctx); 725 dd = atmel_aes_find_dev(ctx);
@@ -551,14 +743,12 @@ static bool atmel_aes_filter(struct dma_chan *chan, void *slave)
551 } 743 }
552} 744}
553 745
554static int atmel_aes_dma_init(struct atmel_aes_dev *dd) 746static int atmel_aes_dma_init(struct atmel_aes_dev *dd,
747 struct crypto_platform_data *pdata)
555{ 748{
556 int err = -ENOMEM; 749 int err = -ENOMEM;
557 struct aes_platform_data *pdata;
558 dma_cap_mask_t mask_in, mask_out; 750 dma_cap_mask_t mask_in, mask_out;
559 751
560 pdata = dd->dev->platform_data;
561
562 if (pdata && pdata->dma_slave->txdata.dma_dev && 752 if (pdata && pdata->dma_slave->txdata.dma_dev &&
563 pdata->dma_slave->rxdata.dma_dev) { 753 pdata->dma_slave->rxdata.dma_dev) {
564 754
@@ -568,28 +758,38 @@ static int atmel_aes_dma_init(struct atmel_aes_dev *dd)
568 758
569 dd->dma_lch_in.chan = dma_request_channel(mask_in, 759 dd->dma_lch_in.chan = dma_request_channel(mask_in,
570 atmel_aes_filter, &pdata->dma_slave->rxdata); 760 atmel_aes_filter, &pdata->dma_slave->rxdata);
761
571 if (!dd->dma_lch_in.chan) 762 if (!dd->dma_lch_in.chan)
572 goto err_dma_in; 763 goto err_dma_in;
573 764
574 dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; 765 dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
575 dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + 766 dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
576 AES_IDATAR(0); 767 AES_IDATAR(0);
577 dd->dma_lch_in.dma_conf.src_maxburst = 1; 768 dd->dma_lch_in.dma_conf.src_maxburst = dd->caps.max_burst_size;
578 dd->dma_lch_in.dma_conf.dst_maxburst = 1; 769 dd->dma_lch_in.dma_conf.src_addr_width =
770 DMA_SLAVE_BUSWIDTH_4_BYTES;
771 dd->dma_lch_in.dma_conf.dst_maxburst = dd->caps.max_burst_size;
772 dd->dma_lch_in.dma_conf.dst_addr_width =
773 DMA_SLAVE_BUSWIDTH_4_BYTES;
579 dd->dma_lch_in.dma_conf.device_fc = false; 774 dd->dma_lch_in.dma_conf.device_fc = false;
580 775
581 dma_cap_zero(mask_out); 776 dma_cap_zero(mask_out);
582 dma_cap_set(DMA_SLAVE, mask_out); 777 dma_cap_set(DMA_SLAVE, mask_out);
583 dd->dma_lch_out.chan = dma_request_channel(mask_out, 778 dd->dma_lch_out.chan = dma_request_channel(mask_out,
584 atmel_aes_filter, &pdata->dma_slave->txdata); 779 atmel_aes_filter, &pdata->dma_slave->txdata);
780
585 if (!dd->dma_lch_out.chan) 781 if (!dd->dma_lch_out.chan)
586 goto err_dma_out; 782 goto err_dma_out;
587 783
588 dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM; 784 dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM;
589 dd->dma_lch_out.dma_conf.src_addr = dd->phys_base + 785 dd->dma_lch_out.dma_conf.src_addr = dd->phys_base +
590 AES_ODATAR(0); 786 AES_ODATAR(0);
591 dd->dma_lch_out.dma_conf.src_maxburst = 1; 787 dd->dma_lch_out.dma_conf.src_maxburst = dd->caps.max_burst_size;
592 dd->dma_lch_out.dma_conf.dst_maxburst = 1; 788 dd->dma_lch_out.dma_conf.src_addr_width =
789 DMA_SLAVE_BUSWIDTH_4_BYTES;
790 dd->dma_lch_out.dma_conf.dst_maxburst = dd->caps.max_burst_size;
791 dd->dma_lch_out.dma_conf.dst_addr_width =
792 DMA_SLAVE_BUSWIDTH_4_BYTES;
593 dd->dma_lch_out.dma_conf.device_fc = false; 793 dd->dma_lch_out.dma_conf.device_fc = false;
594 794
595 return 0; 795 return 0;
@@ -665,13 +865,13 @@ static int atmel_aes_ofb_decrypt(struct ablkcipher_request *req)
665static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req) 865static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req)
666{ 866{
667 return atmel_aes_crypt(req, 867 return atmel_aes_crypt(req,
668 AES_FLAGS_ENCRYPT | AES_FLAGS_CFB); 868 AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB128);
669} 869}
670 870
671static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req) 871static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req)
672{ 872{
673 return atmel_aes_crypt(req, 873 return atmel_aes_crypt(req,
674 AES_FLAGS_CFB); 874 AES_FLAGS_CFB | AES_FLAGS_CFB128);
675} 875}
676 876
677static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req) 877static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req)
@@ -753,7 +953,7 @@ static struct crypto_alg aes_algs[] = {
753 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 953 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
754 .cra_blocksize = AES_BLOCK_SIZE, 954 .cra_blocksize = AES_BLOCK_SIZE,
755 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 955 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
756 .cra_alignmask = 0x0, 956 .cra_alignmask = 0xf,
757 .cra_type = &crypto_ablkcipher_type, 957 .cra_type = &crypto_ablkcipher_type,
758 .cra_module = THIS_MODULE, 958 .cra_module = THIS_MODULE,
759 .cra_init = atmel_aes_cra_init, 959 .cra_init = atmel_aes_cra_init,
@@ -773,7 +973,7 @@ static struct crypto_alg aes_algs[] = {
773 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 973 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
774 .cra_blocksize = AES_BLOCK_SIZE, 974 .cra_blocksize = AES_BLOCK_SIZE,
775 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 975 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
776 .cra_alignmask = 0x0, 976 .cra_alignmask = 0xf,
777 .cra_type = &crypto_ablkcipher_type, 977 .cra_type = &crypto_ablkcipher_type,
778 .cra_module = THIS_MODULE, 978 .cra_module = THIS_MODULE,
779 .cra_init = atmel_aes_cra_init, 979 .cra_init = atmel_aes_cra_init,
@@ -794,7 +994,7 @@ static struct crypto_alg aes_algs[] = {
794 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 994 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
795 .cra_blocksize = AES_BLOCK_SIZE, 995 .cra_blocksize = AES_BLOCK_SIZE,
796 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 996 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
797 .cra_alignmask = 0x0, 997 .cra_alignmask = 0xf,
798 .cra_type = &crypto_ablkcipher_type, 998 .cra_type = &crypto_ablkcipher_type,
799 .cra_module = THIS_MODULE, 999 .cra_module = THIS_MODULE,
800 .cra_init = atmel_aes_cra_init, 1000 .cra_init = atmel_aes_cra_init,
@@ -815,7 +1015,7 @@ static struct crypto_alg aes_algs[] = {
815 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1015 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
816 .cra_blocksize = AES_BLOCK_SIZE, 1016 .cra_blocksize = AES_BLOCK_SIZE,
817 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 1017 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
818 .cra_alignmask = 0x0, 1018 .cra_alignmask = 0xf,
819 .cra_type = &crypto_ablkcipher_type, 1019 .cra_type = &crypto_ablkcipher_type,
820 .cra_module = THIS_MODULE, 1020 .cra_module = THIS_MODULE,
821 .cra_init = atmel_aes_cra_init, 1021 .cra_init = atmel_aes_cra_init,
@@ -836,7 +1036,7 @@ static struct crypto_alg aes_algs[] = {
836 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1036 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
837 .cra_blocksize = CFB32_BLOCK_SIZE, 1037 .cra_blocksize = CFB32_BLOCK_SIZE,
838 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 1038 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
839 .cra_alignmask = 0x0, 1039 .cra_alignmask = 0x3,
840 .cra_type = &crypto_ablkcipher_type, 1040 .cra_type = &crypto_ablkcipher_type,
841 .cra_module = THIS_MODULE, 1041 .cra_module = THIS_MODULE,
842 .cra_init = atmel_aes_cra_init, 1042 .cra_init = atmel_aes_cra_init,
@@ -857,7 +1057,7 @@ static struct crypto_alg aes_algs[] = {
857 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1057 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
858 .cra_blocksize = CFB16_BLOCK_SIZE, 1058 .cra_blocksize = CFB16_BLOCK_SIZE,
859 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 1059 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
860 .cra_alignmask = 0x0, 1060 .cra_alignmask = 0x1,
861 .cra_type = &crypto_ablkcipher_type, 1061 .cra_type = &crypto_ablkcipher_type,
862 .cra_module = THIS_MODULE, 1062 .cra_module = THIS_MODULE,
863 .cra_init = atmel_aes_cra_init, 1063 .cra_init = atmel_aes_cra_init,
@@ -899,7 +1099,7 @@ static struct crypto_alg aes_algs[] = {
899 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1099 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
900 .cra_blocksize = AES_BLOCK_SIZE, 1100 .cra_blocksize = AES_BLOCK_SIZE,
901 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 1101 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
902 .cra_alignmask = 0x0, 1102 .cra_alignmask = 0xf,
903 .cra_type = &crypto_ablkcipher_type, 1103 .cra_type = &crypto_ablkcipher_type,
904 .cra_module = THIS_MODULE, 1104 .cra_module = THIS_MODULE,
905 .cra_init = atmel_aes_cra_init, 1105 .cra_init = atmel_aes_cra_init,
@@ -915,15 +1115,14 @@ static struct crypto_alg aes_algs[] = {
915}, 1115},
916}; 1116};
917 1117
918static struct crypto_alg aes_cfb64_alg[] = { 1118static struct crypto_alg aes_cfb64_alg = {
919{
920 .cra_name = "cfb64(aes)", 1119 .cra_name = "cfb64(aes)",
921 .cra_driver_name = "atmel-cfb64-aes", 1120 .cra_driver_name = "atmel-cfb64-aes",
922 .cra_priority = 100, 1121 .cra_priority = 100,
923 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1122 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
924 .cra_blocksize = CFB64_BLOCK_SIZE, 1123 .cra_blocksize = CFB64_BLOCK_SIZE,
925 .cra_ctxsize = sizeof(struct atmel_aes_ctx), 1124 .cra_ctxsize = sizeof(struct atmel_aes_ctx),
926 .cra_alignmask = 0x0, 1125 .cra_alignmask = 0x7,
927 .cra_type = &crypto_ablkcipher_type, 1126 .cra_type = &crypto_ablkcipher_type,
928 .cra_module = THIS_MODULE, 1127 .cra_module = THIS_MODULE,
929 .cra_init = atmel_aes_cra_init, 1128 .cra_init = atmel_aes_cra_init,
@@ -936,7 +1135,6 @@ static struct crypto_alg aes_cfb64_alg[] = {
936 .encrypt = atmel_aes_cfb64_encrypt, 1135 .encrypt = atmel_aes_cfb64_encrypt,
937 .decrypt = atmel_aes_cfb64_decrypt, 1136 .decrypt = atmel_aes_cfb64_decrypt,
938 } 1137 }
939},
940}; 1138};
941 1139
942static void atmel_aes_queue_task(unsigned long data) 1140static void atmel_aes_queue_task(unsigned long data)
@@ -969,7 +1167,14 @@ static void atmel_aes_done_task(unsigned long data)
969 err = dd->err ? : err; 1167 err = dd->err ? : err;
970 1168
971 if (dd->total && !err) { 1169 if (dd->total && !err) {
972 err = atmel_aes_crypt_dma_start(dd); 1170 if (dd->flags & AES_FLAGS_FAST) {
1171 dd->in_sg = sg_next(dd->in_sg);
1172 dd->out_sg = sg_next(dd->out_sg);
1173 if (!dd->in_sg || !dd->out_sg)
1174 err = -EINVAL;
1175 }
1176 if (!err)
1177 err = atmel_aes_crypt_dma_start(dd);
973 if (!err) 1178 if (!err)
974 return; /* DMA started. Not fininishing. */ 1179 return; /* DMA started. Not fininishing. */
975 } 1180 }
@@ -1003,8 +1208,8 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
1003 1208
1004 for (i = 0; i < ARRAY_SIZE(aes_algs); i++) 1209 for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
1005 crypto_unregister_alg(&aes_algs[i]); 1210 crypto_unregister_alg(&aes_algs[i]);
1006 if (dd->hw_version >= 0x130) 1211 if (dd->caps.has_cfb64)
1007 crypto_unregister_alg(&aes_cfb64_alg[0]); 1212 crypto_unregister_alg(&aes_cfb64_alg);
1008} 1213}
1009 1214
1010static int atmel_aes_register_algs(struct atmel_aes_dev *dd) 1215static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
@@ -1017,10 +1222,8 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
1017 goto err_aes_algs; 1222 goto err_aes_algs;
1018 } 1223 }
1019 1224
1020 atmel_aes_hw_version_init(dd); 1225 if (dd->caps.has_cfb64) {
1021 1226 err = crypto_register_alg(&aes_cfb64_alg);
1022 if (dd->hw_version >= 0x130) {
1023 err = crypto_register_alg(&aes_cfb64_alg[0]);
1024 if (err) 1227 if (err)
1025 goto err_aes_cfb64_alg; 1228 goto err_aes_cfb64_alg;
1026 } 1229 }
@@ -1036,10 +1239,32 @@ err_aes_algs:
1036 return err; 1239 return err;
1037} 1240}
1038 1241
1242static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
1243{
1244 dd->caps.has_dualbuff = 0;
1245 dd->caps.has_cfb64 = 0;
1246 dd->caps.max_burst_size = 1;
1247
1248 /* keep only major version number */
1249 switch (dd->hw_version & 0xff0) {
1250 case 0x130:
1251 dd->caps.has_dualbuff = 1;
1252 dd->caps.has_cfb64 = 1;
1253 dd->caps.max_burst_size = 4;
1254 break;
1255 case 0x120:
1256 break;
1257 default:
1258 dev_warn(dd->dev,
1259 "Unmanaged aes version, set minimum capabilities\n");
1260 break;
1261 }
1262}
1263
1039static int atmel_aes_probe(struct platform_device *pdev) 1264static int atmel_aes_probe(struct platform_device *pdev)
1040{ 1265{
1041 struct atmel_aes_dev *aes_dd; 1266 struct atmel_aes_dev *aes_dd;
1042 struct aes_platform_data *pdata; 1267 struct crypto_platform_data *pdata;
1043 struct device *dev = &pdev->dev; 1268 struct device *dev = &pdev->dev;
1044 struct resource *aes_res; 1269 struct resource *aes_res;
1045 unsigned long aes_phys_size; 1270 unsigned long aes_phys_size;
@@ -1099,7 +1324,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
1099 } 1324 }
1100 1325
1101 /* Initializing the clock */ 1326 /* Initializing the clock */
1102 aes_dd->iclk = clk_get(&pdev->dev, NULL); 1327 aes_dd->iclk = clk_get(&pdev->dev, "aes_clk");
1103 if (IS_ERR(aes_dd->iclk)) { 1328 if (IS_ERR(aes_dd->iclk)) {
1104 dev_err(dev, "clock intialization failed.\n"); 1329 dev_err(dev, "clock intialization failed.\n");
1105 err = PTR_ERR(aes_dd->iclk); 1330 err = PTR_ERR(aes_dd->iclk);
@@ -1113,7 +1338,15 @@ static int atmel_aes_probe(struct platform_device *pdev)
1113 goto aes_io_err; 1338 goto aes_io_err;
1114 } 1339 }
1115 1340
1116 err = atmel_aes_dma_init(aes_dd); 1341 atmel_aes_hw_version_init(aes_dd);
1342
1343 atmel_aes_get_cap(aes_dd);
1344
1345 err = atmel_aes_buff_init(aes_dd);
1346 if (err)
1347 goto err_aes_buff;
1348
1349 err = atmel_aes_dma_init(aes_dd, pdata);
1117 if (err) 1350 if (err)
1118 goto err_aes_dma; 1351 goto err_aes_dma;
1119 1352
@@ -1135,6 +1368,8 @@ err_algs:
1135 spin_unlock(&atmel_aes.lock); 1368 spin_unlock(&atmel_aes.lock);
1136 atmel_aes_dma_cleanup(aes_dd); 1369 atmel_aes_dma_cleanup(aes_dd);
1137err_aes_dma: 1370err_aes_dma:
1371 atmel_aes_buff_cleanup(aes_dd);
1372err_aes_buff:
1138 iounmap(aes_dd->io_base); 1373 iounmap(aes_dd->io_base);
1139aes_io_err: 1374aes_io_err:
1140 clk_put(aes_dd->iclk); 1375 clk_put(aes_dd->iclk);
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
index dc53a20d7da1..83b2d7425666 100644
--- a/drivers/crypto/atmel-sha-regs.h
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -14,10 +14,13 @@
14#define SHA_MR_MODE_MANUAL 0x0 14#define SHA_MR_MODE_MANUAL 0x0
15#define SHA_MR_MODE_AUTO 0x1 15#define SHA_MR_MODE_AUTO 0x1
16#define SHA_MR_MODE_PDC 0x2 16#define SHA_MR_MODE_PDC 0x2
17#define SHA_MR_DUALBUFF (1 << 3)
18#define SHA_MR_PROCDLY (1 << 4) 17#define SHA_MR_PROCDLY (1 << 4)
19#define SHA_MR_ALGO_SHA1 (0 << 8) 18#define SHA_MR_ALGO_SHA1 (0 << 8)
20#define SHA_MR_ALGO_SHA256 (1 << 8) 19#define SHA_MR_ALGO_SHA256 (1 << 8)
20#define SHA_MR_ALGO_SHA384 (2 << 8)
21#define SHA_MR_ALGO_SHA512 (3 << 8)
22#define SHA_MR_ALGO_SHA224 (4 << 8)
23#define SHA_MR_DUALBUFF (1 << 16)
21 24
22#define SHA_IER 0x10 25#define SHA_IER 0x10
23#define SHA_IDR 0x14 26#define SHA_IDR 0x14
@@ -33,6 +36,8 @@
33#define SHA_ISR_URAT_MR (0x2 << 12) 36#define SHA_ISR_URAT_MR (0x2 << 12)
34#define SHA_ISR_URAT_WO (0x5 << 12) 37#define SHA_ISR_URAT_WO (0x5 << 12)
35 38
39#define SHA_HW_VERSION 0xFC
40
36#define SHA_TPR 0x108 41#define SHA_TPR 0x108
37#define SHA_TCR 0x10C 42#define SHA_TCR 0x10C
38#define SHA_TNPR 0x118 43#define SHA_TNPR 0x118
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 4918e9424d31..eaed8bf183bc 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -38,6 +38,7 @@
38#include <crypto/sha.h> 38#include <crypto/sha.h>
39#include <crypto/hash.h> 39#include <crypto/hash.h>
40#include <crypto/internal/hash.h> 40#include <crypto/internal/hash.h>
41#include <linux/platform_data/crypto-atmel.h>
41#include "atmel-sha-regs.h" 42#include "atmel-sha-regs.h"
42 43
43/* SHA flags */ 44/* SHA flags */
@@ -52,11 +53,12 @@
52#define SHA_FLAGS_FINUP BIT(16) 53#define SHA_FLAGS_FINUP BIT(16)
53#define SHA_FLAGS_SG BIT(17) 54#define SHA_FLAGS_SG BIT(17)
54#define SHA_FLAGS_SHA1 BIT(18) 55#define SHA_FLAGS_SHA1 BIT(18)
55#define SHA_FLAGS_SHA256 BIT(19) 56#define SHA_FLAGS_SHA224 BIT(19)
56#define SHA_FLAGS_ERROR BIT(20) 57#define SHA_FLAGS_SHA256 BIT(20)
57#define SHA_FLAGS_PAD BIT(21) 58#define SHA_FLAGS_SHA384 BIT(21)
58 59#define SHA_FLAGS_SHA512 BIT(22)
59#define SHA_FLAGS_DUALBUFF BIT(24) 60#define SHA_FLAGS_ERROR BIT(23)
61#define SHA_FLAGS_PAD BIT(24)
60 62
61#define SHA_OP_UPDATE 1 63#define SHA_OP_UPDATE 1
62#define SHA_OP_FINAL 2 64#define SHA_OP_FINAL 2
@@ -65,6 +67,12 @@
65 67
66#define ATMEL_SHA_DMA_THRESHOLD 56 68#define ATMEL_SHA_DMA_THRESHOLD 56
67 69
70struct atmel_sha_caps {
71 bool has_dma;
72 bool has_dualbuff;
73 bool has_sha224;
74 bool has_sha_384_512;
75};
68 76
69struct atmel_sha_dev; 77struct atmel_sha_dev;
70 78
@@ -73,8 +81,8 @@ struct atmel_sha_reqctx {
73 unsigned long flags; 81 unsigned long flags;
74 unsigned long op; 82 unsigned long op;
75 83
76 u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); 84 u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32));
77 size_t digcnt; 85 u64 digcnt[2];
78 size_t bufcnt; 86 size_t bufcnt;
79 size_t buflen; 87 size_t buflen;
80 dma_addr_t dma_addr; 88 dma_addr_t dma_addr;
@@ -84,6 +92,8 @@ struct atmel_sha_reqctx {
84 unsigned int offset; /* offset in current sg */ 92 unsigned int offset; /* offset in current sg */
85 unsigned int total; /* total request */ 93 unsigned int total; /* total request */
86 94
95 size_t block_size;
96
87 u8 buffer[0] __aligned(sizeof(u32)); 97 u8 buffer[0] __aligned(sizeof(u32));
88}; 98};
89 99
@@ -97,7 +107,12 @@ struct atmel_sha_ctx {
97 107
98}; 108};
99 109
100#define ATMEL_SHA_QUEUE_LENGTH 1 110#define ATMEL_SHA_QUEUE_LENGTH 50
111
112struct atmel_sha_dma {
113 struct dma_chan *chan;
114 struct dma_slave_config dma_conf;
115};
101 116
102struct atmel_sha_dev { 117struct atmel_sha_dev {
103 struct list_head list; 118 struct list_head list;
@@ -114,6 +129,12 @@ struct atmel_sha_dev {
114 unsigned long flags; 129 unsigned long flags;
115 struct crypto_queue queue; 130 struct crypto_queue queue;
116 struct ahash_request *req; 131 struct ahash_request *req;
132
133 struct atmel_sha_dma dma_lch_in;
134
135 struct atmel_sha_caps caps;
136
137 u32 hw_version;
117}; 138};
118 139
119struct atmel_sha_drv { 140struct atmel_sha_drv {
@@ -137,14 +158,6 @@ static inline void atmel_sha_write(struct atmel_sha_dev *dd,
137 writel_relaxed(value, dd->io_base + offset); 158 writel_relaxed(value, dd->io_base + offset);
138} 159}
139 160
140static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
141{
142 atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
143
144 if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
145 dd->flags |= SHA_FLAGS_DUALBUFF;
146}
147
148static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) 161static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
149{ 162{
150 size_t count; 163 size_t count;
@@ -176,31 +189,58 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
176} 189}
177 190
178/* 191/*
179 * The purpose of this padding is to ensure that the padded message 192 * The purpose of this padding is to ensure that the padded message is a
180 * is a multiple of 512 bits. The bit "1" is appended at the end of 193 * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512).
181 * the message followed by "padlen-1" zero bits. Then a 64 bits block 194 * The bit "1" is appended at the end of the message followed by
182 * equals to the message length in bits is appended. 195 * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or
196 * 128 bits block (SHA384/SHA512) equals to the message length in bits
197 * is appended.
183 * 198 *
184 * padlen is calculated as followed: 199 * For SHA1/SHA224/SHA256, padlen is calculated as followed:
185 * - if message length < 56 bytes then padlen = 56 - message length 200 * - if message length < 56 bytes then padlen = 56 - message length
186 * - else padlen = 64 + 56 - message length 201 * - else padlen = 64 + 56 - message length
202 *
203 * For SHA384/SHA512, padlen is calculated as followed:
204 * - if message length < 112 bytes then padlen = 112 - message length
205 * - else padlen = 128 + 112 - message length
187 */ 206 */
188static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) 207static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
189{ 208{
190 unsigned int index, padlen; 209 unsigned int index, padlen;
191 u64 bits; 210 u64 bits[2];
192 u64 size; 211 u64 size[2];
193 212
194 bits = (ctx->bufcnt + ctx->digcnt + length) << 3; 213 size[0] = ctx->digcnt[0];
195 size = cpu_to_be64(bits); 214 size[1] = ctx->digcnt[1];
196 215
197 index = ctx->bufcnt & 0x3f; 216 size[0] += ctx->bufcnt;
198 padlen = (index < 56) ? (56 - index) : ((64+56) - index); 217 if (size[0] < ctx->bufcnt)
199 *(ctx->buffer + ctx->bufcnt) = 0x80; 218 size[1]++;
200 memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1); 219
201 memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8); 220 size[0] += length;
202 ctx->bufcnt += padlen + 8; 221 if (size[0] < length)
203 ctx->flags |= SHA_FLAGS_PAD; 222 size[1]++;
223
224 bits[1] = cpu_to_be64(size[0] << 3);
225 bits[0] = cpu_to_be64(size[1] << 3 | size[0] >> 61);
226
227 if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) {
228 index = ctx->bufcnt & 0x7f;
229 padlen = (index < 112) ? (112 - index) : ((128+112) - index);
230 *(ctx->buffer + ctx->bufcnt) = 0x80;
231 memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
232 memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16);
233 ctx->bufcnt += padlen + 16;
234 ctx->flags |= SHA_FLAGS_PAD;
235 } else {
236 index = ctx->bufcnt & 0x3f;
237 padlen = (index < 56) ? (56 - index) : ((64+56) - index);
238 *(ctx->buffer + ctx->bufcnt) = 0x80;
239 memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
240 memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8);
241 ctx->bufcnt += padlen + 8;
242 ctx->flags |= SHA_FLAGS_PAD;
243 }
204} 244}
205 245
206static int atmel_sha_init(struct ahash_request *req) 246static int atmel_sha_init(struct ahash_request *req)
@@ -231,13 +271,35 @@ static int atmel_sha_init(struct ahash_request *req)
231 dev_dbg(dd->dev, "init: digest size: %d\n", 271 dev_dbg(dd->dev, "init: digest size: %d\n",
232 crypto_ahash_digestsize(tfm)); 272 crypto_ahash_digestsize(tfm));
233 273
234 if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE) 274 switch (crypto_ahash_digestsize(tfm)) {
275 case SHA1_DIGEST_SIZE:
235 ctx->flags |= SHA_FLAGS_SHA1; 276 ctx->flags |= SHA_FLAGS_SHA1;
236 else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE) 277 ctx->block_size = SHA1_BLOCK_SIZE;
278 break;
279 case SHA224_DIGEST_SIZE:
280 ctx->flags |= SHA_FLAGS_SHA224;
281 ctx->block_size = SHA224_BLOCK_SIZE;
282 break;
283 case SHA256_DIGEST_SIZE:
237 ctx->flags |= SHA_FLAGS_SHA256; 284 ctx->flags |= SHA_FLAGS_SHA256;
285 ctx->block_size = SHA256_BLOCK_SIZE;
286 break;
287 case SHA384_DIGEST_SIZE:
288 ctx->flags |= SHA_FLAGS_SHA384;
289 ctx->block_size = SHA384_BLOCK_SIZE;
290 break;
291 case SHA512_DIGEST_SIZE:
292 ctx->flags |= SHA_FLAGS_SHA512;
293 ctx->block_size = SHA512_BLOCK_SIZE;
294 break;
295 default:
296 return -EINVAL;
297 break;
298 }
238 299
239 ctx->bufcnt = 0; 300 ctx->bufcnt = 0;
240 ctx->digcnt = 0; 301 ctx->digcnt[0] = 0;
302 ctx->digcnt[1] = 0;
241 ctx->buflen = SHA_BUFFER_LEN; 303 ctx->buflen = SHA_BUFFER_LEN;
242 304
243 return 0; 305 return 0;
@@ -249,19 +311,28 @@ static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
249 u32 valcr = 0, valmr = SHA_MR_MODE_AUTO; 311 u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
250 312
251 if (likely(dma)) { 313 if (likely(dma)) {
252 atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE); 314 if (!dd->caps.has_dma)
315 atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
253 valmr = SHA_MR_MODE_PDC; 316 valmr = SHA_MR_MODE_PDC;
254 if (dd->flags & SHA_FLAGS_DUALBUFF) 317 if (dd->caps.has_dualbuff)
255 valmr = SHA_MR_DUALBUFF; 318 valmr |= SHA_MR_DUALBUFF;
256 } else { 319 } else {
257 atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); 320 atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
258 } 321 }
259 322
260 if (ctx->flags & SHA_FLAGS_SHA256) 323 if (ctx->flags & SHA_FLAGS_SHA1)
324 valmr |= SHA_MR_ALGO_SHA1;
325 else if (ctx->flags & SHA_FLAGS_SHA224)
326 valmr |= SHA_MR_ALGO_SHA224;
327 else if (ctx->flags & SHA_FLAGS_SHA256)
261 valmr |= SHA_MR_ALGO_SHA256; 328 valmr |= SHA_MR_ALGO_SHA256;
329 else if (ctx->flags & SHA_FLAGS_SHA384)
330 valmr |= SHA_MR_ALGO_SHA384;
331 else if (ctx->flags & SHA_FLAGS_SHA512)
332 valmr |= SHA_MR_ALGO_SHA512;
262 333
263 /* Setting CR_FIRST only for the first iteration */ 334 /* Setting CR_FIRST only for the first iteration */
264 if (!ctx->digcnt) 335 if (!(ctx->digcnt[0] || ctx->digcnt[1]))
265 valcr = SHA_CR_FIRST; 336 valcr = SHA_CR_FIRST;
266 337
267 atmel_sha_write(dd, SHA_CR, valcr); 338 atmel_sha_write(dd, SHA_CR, valcr);
@@ -275,13 +346,15 @@ static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
275 int count, len32; 346 int count, len32;
276 const u32 *buffer = (const u32 *)buf; 347 const u32 *buffer = (const u32 *)buf;
277 348
278 dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", 349 dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
279 ctx->digcnt, length, final); 350 ctx->digcnt[1], ctx->digcnt[0], length, final);
280 351
281 atmel_sha_write_ctrl(dd, 0); 352 atmel_sha_write_ctrl(dd, 0);
282 353
283 /* should be non-zero before next lines to disable clocks later */ 354 /* should be non-zero before next lines to disable clocks later */
284 ctx->digcnt += length; 355 ctx->digcnt[0] += length;
356 if (ctx->digcnt[0] < length)
357 ctx->digcnt[1]++;
285 358
286 if (final) 359 if (final)
287 dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ 360 dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
@@ -302,8 +375,8 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
302 struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); 375 struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
303 int len32; 376 int len32;
304 377
305 dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n", 378 dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
306 ctx->digcnt, length1, final); 379 ctx->digcnt[1], ctx->digcnt[0], length1, final);
307 380
308 len32 = DIV_ROUND_UP(length1, sizeof(u32)); 381 len32 = DIV_ROUND_UP(length1, sizeof(u32));
309 atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS); 382 atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
@@ -317,7 +390,9 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
317 atmel_sha_write_ctrl(dd, 1); 390 atmel_sha_write_ctrl(dd, 1);
318 391
319 /* should be non-zero before next lines to disable clocks later */ 392 /* should be non-zero before next lines to disable clocks later */
320 ctx->digcnt += length1; 393 ctx->digcnt[0] += length1;
394 if (ctx->digcnt[0] < length1)
395 ctx->digcnt[1]++;
321 396
322 if (final) 397 if (final)
323 dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ 398 dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
@@ -330,6 +405,86 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
330 return -EINPROGRESS; 405 return -EINPROGRESS;
331} 406}
332 407
408static void atmel_sha_dma_callback(void *data)
409{
410 struct atmel_sha_dev *dd = data;
411
412 /* dma_lch_in - completed - wait DATRDY */
413 atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
414}
415
416static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
417 size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
418{
419 struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
420 struct dma_async_tx_descriptor *in_desc;
421 struct scatterlist sg[2];
422
423 dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
424 ctx->digcnt[1], ctx->digcnt[0], length1, final);
425
426 if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 |
427 SHA_FLAGS_SHA256)) {
428 dd->dma_lch_in.dma_conf.src_maxburst = 16;
429 dd->dma_lch_in.dma_conf.dst_maxburst = 16;
430 } else {
431 dd->dma_lch_in.dma_conf.src_maxburst = 32;
432 dd->dma_lch_in.dma_conf.dst_maxburst = 32;
433 }
434
435 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
436
437 if (length2) {
438 sg_init_table(sg, 2);
439 sg_dma_address(&sg[0]) = dma_addr1;
440 sg_dma_len(&sg[0]) = length1;
441 sg_dma_address(&sg[1]) = dma_addr2;
442 sg_dma_len(&sg[1]) = length2;
443 in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, sg, 2,
444 DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
445 } else {
446 sg_init_table(sg, 1);
447 sg_dma_address(&sg[0]) = dma_addr1;
448 sg_dma_len(&sg[0]) = length1;
449 in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, sg, 1,
450 DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
451 }
452 if (!in_desc)
453 return -EINVAL;
454
455 in_desc->callback = atmel_sha_dma_callback;
456 in_desc->callback_param = dd;
457
458 atmel_sha_write_ctrl(dd, 1);
459
460 /* should be non-zero before next lines to disable clocks later */
461 ctx->digcnt[0] += length1;
462 if (ctx->digcnt[0] < length1)
463 ctx->digcnt[1]++;
464
465 if (final)
466 dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
467
468 dd->flags |= SHA_FLAGS_DMA_ACTIVE;
469
470 /* Start DMA transfer */
471 dmaengine_submit(in_desc);
472 dma_async_issue_pending(dd->dma_lch_in.chan);
473
474 return -EINPROGRESS;
475}
476
477static int atmel_sha_xmit_start(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
478 size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
479{
480 if (dd->caps.has_dma)
481 return atmel_sha_xmit_dma(dd, dma_addr1, length1,
482 dma_addr2, length2, final);
483 else
484 return atmel_sha_xmit_pdc(dd, dma_addr1, length1,
485 dma_addr2, length2, final);
486}
487
333static int atmel_sha_update_cpu(struct atmel_sha_dev *dd) 488static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
334{ 489{
335 struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); 490 struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
@@ -337,7 +492,6 @@ static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
337 492
338 atmel_sha_append_sg(ctx); 493 atmel_sha_append_sg(ctx);
339 atmel_sha_fill_padding(ctx, 0); 494 atmel_sha_fill_padding(ctx, 0);
340
341 bufcnt = ctx->bufcnt; 495 bufcnt = ctx->bufcnt;
342 ctx->bufcnt = 0; 496 ctx->bufcnt = 0;
343 497
@@ -349,17 +503,17 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
349 size_t length, int final) 503 size_t length, int final)
350{ 504{
351 ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, 505 ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
352 ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); 506 ctx->buflen + ctx->block_size, DMA_TO_DEVICE);
353 if (dma_mapping_error(dd->dev, ctx->dma_addr)) { 507 if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
354 dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + 508 dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
355 SHA1_BLOCK_SIZE); 509 ctx->block_size);
356 return -EINVAL; 510 return -EINVAL;
357 } 511 }
358 512
359 ctx->flags &= ~SHA_FLAGS_SG; 513 ctx->flags &= ~SHA_FLAGS_SG;
360 514
361 /* next call does not fail... so no unmap in the case of error */ 515 /* next call does not fail... so no unmap in the case of error */
362 return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final); 516 return atmel_sha_xmit_start(dd, ctx->dma_addr, length, 0, 0, final);
363} 517}
364 518
365static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) 519static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
@@ -372,8 +526,8 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
372 526
373 final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; 527 final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
374 528
375 dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n", 529 dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: 0x%llx 0x%llx, final: %d\n",
376 ctx->bufcnt, ctx->digcnt, final); 530 ctx->bufcnt, ctx->digcnt[1], ctx->digcnt[0], final);
377 531
378 if (final) 532 if (final)
379 atmel_sha_fill_padding(ctx, 0); 533 atmel_sha_fill_padding(ctx, 0);
@@ -400,30 +554,25 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
400 if (ctx->bufcnt || ctx->offset) 554 if (ctx->bufcnt || ctx->offset)
401 return atmel_sha_update_dma_slow(dd); 555 return atmel_sha_update_dma_slow(dd);
402 556
403 dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", 557 dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %u, total: %u\n",
404 ctx->digcnt, ctx->bufcnt, ctx->total); 558 ctx->digcnt[1], ctx->digcnt[0], ctx->bufcnt, ctx->total);
405 559
406 sg = ctx->sg; 560 sg = ctx->sg;
407 561
408 if (!IS_ALIGNED(sg->offset, sizeof(u32))) 562 if (!IS_ALIGNED(sg->offset, sizeof(u32)))
409 return atmel_sha_update_dma_slow(dd); 563 return atmel_sha_update_dma_slow(dd);
410 564
411 if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE)) 565 if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->block_size))
412 /* size is not SHA1_BLOCK_SIZE aligned */ 566 /* size is not ctx->block_size aligned */
413 return atmel_sha_update_dma_slow(dd); 567 return atmel_sha_update_dma_slow(dd);
414 568
415 length = min(ctx->total, sg->length); 569 length = min(ctx->total, sg->length);
416 570
417 if (sg_is_last(sg)) { 571 if (sg_is_last(sg)) {
418 if (!(ctx->flags & SHA_FLAGS_FINUP)) { 572 if (!(ctx->flags & SHA_FLAGS_FINUP)) {
419 /* not last sg must be SHA1_BLOCK_SIZE aligned */ 573 /* not last sg must be ctx->block_size aligned */
420 tail = length & (SHA1_BLOCK_SIZE - 1); 574 tail = length & (ctx->block_size - 1);
421 length -= tail; 575 length -= tail;
422 if (length == 0) {
423 /* offset where to start slow */
424 ctx->offset = length;
425 return atmel_sha_update_dma_slow(dd);
426 }
427 } 576 }
428 } 577 }
429 578
@@ -434,7 +583,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
434 583
435 /* Add padding */ 584 /* Add padding */
436 if (final) { 585 if (final) {
437 tail = length & (SHA1_BLOCK_SIZE - 1); 586 tail = length & (ctx->block_size - 1);
438 length -= tail; 587 length -= tail;
439 ctx->total += tail; 588 ctx->total += tail;
440 ctx->offset = length; /* offset where to start slow */ 589 ctx->offset = length; /* offset where to start slow */
@@ -445,10 +594,10 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
445 atmel_sha_fill_padding(ctx, length); 594 atmel_sha_fill_padding(ctx, length);
446 595
447 ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, 596 ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
448 ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); 597 ctx->buflen + ctx->block_size, DMA_TO_DEVICE);
449 if (dma_mapping_error(dd->dev, ctx->dma_addr)) { 598 if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
450 dev_err(dd->dev, "dma %u bytes error\n", 599 dev_err(dd->dev, "dma %u bytes error\n",
451 ctx->buflen + SHA1_BLOCK_SIZE); 600 ctx->buflen + ctx->block_size);
452 return -EINVAL; 601 return -EINVAL;
453 } 602 }
454 603
@@ -456,7 +605,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
456 ctx->flags &= ~SHA_FLAGS_SG; 605 ctx->flags &= ~SHA_FLAGS_SG;
457 count = ctx->bufcnt; 606 count = ctx->bufcnt;
458 ctx->bufcnt = 0; 607 ctx->bufcnt = 0;
459 return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0, 608 return atmel_sha_xmit_start(dd, ctx->dma_addr, count, 0,
460 0, final); 609 0, final);
461 } else { 610 } else {
462 ctx->sg = sg; 611 ctx->sg = sg;
@@ -470,7 +619,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
470 619
471 count = ctx->bufcnt; 620 count = ctx->bufcnt;
472 ctx->bufcnt = 0; 621 ctx->bufcnt = 0;
473 return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), 622 return atmel_sha_xmit_start(dd, sg_dma_address(ctx->sg),
474 length, ctx->dma_addr, count, final); 623 length, ctx->dma_addr, count, final);
475 } 624 }
476 } 625 }
@@ -483,7 +632,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
483 ctx->flags |= SHA_FLAGS_SG; 632 ctx->flags |= SHA_FLAGS_SG;
484 633
485 /* next call does not fail... so no unmap in the case of error */ 634 /* next call does not fail... so no unmap in the case of error */
486 return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0, 635 return atmel_sha_xmit_start(dd, sg_dma_address(ctx->sg), length, 0,
487 0, final); 636 0, final);
488} 637}
489 638
@@ -498,12 +647,13 @@ static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
498 if (ctx->sg) 647 if (ctx->sg)
499 ctx->offset = 0; 648 ctx->offset = 0;
500 } 649 }
501 if (ctx->flags & SHA_FLAGS_PAD) 650 if (ctx->flags & SHA_FLAGS_PAD) {
502 dma_unmap_single(dd->dev, ctx->dma_addr, 651 dma_unmap_single(dd->dev, ctx->dma_addr,
503 ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); 652 ctx->buflen + ctx->block_size, DMA_TO_DEVICE);
653 }
504 } else { 654 } else {
505 dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen + 655 dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
506 SHA1_BLOCK_SIZE, DMA_TO_DEVICE); 656 ctx->block_size, DMA_TO_DEVICE);
507 } 657 }
508 658
509 return 0; 659 return 0;
@@ -515,8 +665,8 @@ static int atmel_sha_update_req(struct atmel_sha_dev *dd)
515 struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); 665 struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
516 int err; 666 int err;
517 667
518 dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n", 668 dev_dbg(dd->dev, "update_req: total: %u, digcnt: 0x%llx 0x%llx\n",
519 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0); 669 ctx->total, ctx->digcnt[1], ctx->digcnt[0]);
520 670
521 if (ctx->flags & SHA_FLAGS_CPU) 671 if (ctx->flags & SHA_FLAGS_CPU)
522 err = atmel_sha_update_cpu(dd); 672 err = atmel_sha_update_cpu(dd);
@@ -524,8 +674,8 @@ static int atmel_sha_update_req(struct atmel_sha_dev *dd)
524 err = atmel_sha_update_dma_start(dd); 674 err = atmel_sha_update_dma_start(dd);
525 675
526 /* wait for dma completion before can take more data */ 676 /* wait for dma completion before can take more data */
527 dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", 677 dev_dbg(dd->dev, "update: err: %d, digcnt: 0x%llx 0%llx\n",
528 err, ctx->digcnt); 678 err, ctx->digcnt[1], ctx->digcnt[0]);
529 679
530 return err; 680 return err;
531} 681}
@@ -562,12 +712,21 @@ static void atmel_sha_copy_hash(struct ahash_request *req)
562 u32 *hash = (u32 *)ctx->digest; 712 u32 *hash = (u32 *)ctx->digest;
563 int i; 713 int i;
564 714
565 if (likely(ctx->flags & SHA_FLAGS_SHA1)) 715 if (ctx->flags & SHA_FLAGS_SHA1)
566 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) 716 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
567 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); 717 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
568 else 718 else if (ctx->flags & SHA_FLAGS_SHA224)
719 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(u32); i++)
720 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
721 else if (ctx->flags & SHA_FLAGS_SHA256)
569 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++) 722 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
570 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); 723 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
724 else if (ctx->flags & SHA_FLAGS_SHA384)
725 for (i = 0; i < SHA384_DIGEST_SIZE / sizeof(u32); i++)
726 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
727 else
728 for (i = 0; i < SHA512_DIGEST_SIZE / sizeof(u32); i++)
729 hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
571} 730}
572 731
573static void atmel_sha_copy_ready_hash(struct ahash_request *req) 732static void atmel_sha_copy_ready_hash(struct ahash_request *req)
@@ -577,10 +736,16 @@ static void atmel_sha_copy_ready_hash(struct ahash_request *req)
577 if (!req->result) 736 if (!req->result)
578 return; 737 return;
579 738
580 if (likely(ctx->flags & SHA_FLAGS_SHA1)) 739 if (ctx->flags & SHA_FLAGS_SHA1)
581 memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); 740 memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
582 else 741 else if (ctx->flags & SHA_FLAGS_SHA224)
742 memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
743 else if (ctx->flags & SHA_FLAGS_SHA256)
583 memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); 744 memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
745 else if (ctx->flags & SHA_FLAGS_SHA384)
746 memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE);
747 else
748 memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE);
584} 749}
585 750
586static int atmel_sha_finish(struct ahash_request *req) 751static int atmel_sha_finish(struct ahash_request *req)
@@ -589,11 +754,11 @@ static int atmel_sha_finish(struct ahash_request *req)
589 struct atmel_sha_dev *dd = ctx->dd; 754 struct atmel_sha_dev *dd = ctx->dd;
590 int err = 0; 755 int err = 0;
591 756
592 if (ctx->digcnt) 757 if (ctx->digcnt[0] || ctx->digcnt[1])
593 atmel_sha_copy_ready_hash(req); 758 atmel_sha_copy_ready_hash(req);
594 759
595 dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, 760 dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %d\n", ctx->digcnt[1],
596 ctx->bufcnt); 761 ctx->digcnt[0], ctx->bufcnt);
597 762
598 return err; 763 return err;
599} 764}
@@ -628,9 +793,8 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
628{ 793{
629 clk_prepare_enable(dd->iclk); 794 clk_prepare_enable(dd->iclk);
630 795
631 if (SHA_FLAGS_INIT & dd->flags) { 796 if (!(SHA_FLAGS_INIT & dd->flags)) {
632 atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); 797 atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
633 atmel_sha_dualbuff_test(dd);
634 dd->flags |= SHA_FLAGS_INIT; 798 dd->flags |= SHA_FLAGS_INIT;
635 dd->err = 0; 799 dd->err = 0;
636 } 800 }
@@ -638,6 +802,23 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
638 return 0; 802 return 0;
639} 803}
640 804
805static inline unsigned int atmel_sha_get_version(struct atmel_sha_dev *dd)
806{
807 return atmel_sha_read(dd, SHA_HW_VERSION) & 0x00000fff;
808}
809
810static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd)
811{
812 atmel_sha_hw_init(dd);
813
814 dd->hw_version = atmel_sha_get_version(dd);
815
816 dev_info(dd->dev,
817 "version: 0x%x\n", dd->hw_version);
818
819 clk_disable_unprepare(dd->iclk);
820}
821
641static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, 822static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
642 struct ahash_request *req) 823 struct ahash_request *req)
643{ 824{
@@ -682,10 +863,9 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
682 863
683 if (ctx->op == SHA_OP_UPDATE) { 864 if (ctx->op == SHA_OP_UPDATE) {
684 err = atmel_sha_update_req(dd); 865 err = atmel_sha_update_req(dd);
685 if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) { 866 if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP))
686 /* no final() after finup() */ 867 /* no final() after finup() */
687 err = atmel_sha_final_req(dd); 868 err = atmel_sha_final_req(dd);
688 }
689 } else if (ctx->op == SHA_OP_FINAL) { 869 } else if (ctx->op == SHA_OP_FINAL) {
690 err = atmel_sha_final_req(dd); 870 err = atmel_sha_final_req(dd);
691 } 871 }
@@ -808,7 +988,7 @@ static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
808 } 988 }
809 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), 989 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
810 sizeof(struct atmel_sha_reqctx) + 990 sizeof(struct atmel_sha_reqctx) +
811 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); 991 SHA_BUFFER_LEN + SHA512_BLOCK_SIZE);
812 992
813 return 0; 993 return 0;
814} 994}
@@ -826,7 +1006,7 @@ static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
826 tctx->fallback = NULL; 1006 tctx->fallback = NULL;
827} 1007}
828 1008
829static struct ahash_alg sha_algs[] = { 1009static struct ahash_alg sha_1_256_algs[] = {
830{ 1010{
831 .init = atmel_sha_init, 1011 .init = atmel_sha_init,
832 .update = atmel_sha_update, 1012 .update = atmel_sha_update,
@@ -875,6 +1055,79 @@ static struct ahash_alg sha_algs[] = {
875}, 1055},
876}; 1056};
877 1057
1058static struct ahash_alg sha_224_alg = {
1059 .init = atmel_sha_init,
1060 .update = atmel_sha_update,
1061 .final = atmel_sha_final,
1062 .finup = atmel_sha_finup,
1063 .digest = atmel_sha_digest,
1064 .halg = {
1065 .digestsize = SHA224_DIGEST_SIZE,
1066 .base = {
1067 .cra_name = "sha224",
1068 .cra_driver_name = "atmel-sha224",
1069 .cra_priority = 100,
1070 .cra_flags = CRYPTO_ALG_ASYNC |
1071 CRYPTO_ALG_NEED_FALLBACK,
1072 .cra_blocksize = SHA224_BLOCK_SIZE,
1073 .cra_ctxsize = sizeof(struct atmel_sha_ctx),
1074 .cra_alignmask = 0,
1075 .cra_module = THIS_MODULE,
1076 .cra_init = atmel_sha_cra_init,
1077 .cra_exit = atmel_sha_cra_exit,
1078 }
1079 }
1080};
1081
1082static struct ahash_alg sha_384_512_algs[] = {
1083{
1084 .init = atmel_sha_init,
1085 .update = atmel_sha_update,
1086 .final = atmel_sha_final,
1087 .finup = atmel_sha_finup,
1088 .digest = atmel_sha_digest,
1089 .halg = {
1090 .digestsize = SHA384_DIGEST_SIZE,
1091 .base = {
1092 .cra_name = "sha384",
1093 .cra_driver_name = "atmel-sha384",
1094 .cra_priority = 100,
1095 .cra_flags = CRYPTO_ALG_ASYNC |
1096 CRYPTO_ALG_NEED_FALLBACK,
1097 .cra_blocksize = SHA384_BLOCK_SIZE,
1098 .cra_ctxsize = sizeof(struct atmel_sha_ctx),
1099 .cra_alignmask = 0x3,
1100 .cra_module = THIS_MODULE,
1101 .cra_init = atmel_sha_cra_init,
1102 .cra_exit = atmel_sha_cra_exit,
1103 }
1104 }
1105},
1106{
1107 .init = atmel_sha_init,
1108 .update = atmel_sha_update,
1109 .final = atmel_sha_final,
1110 .finup = atmel_sha_finup,
1111 .digest = atmel_sha_digest,
1112 .halg = {
1113 .digestsize = SHA512_DIGEST_SIZE,
1114 .base = {
1115 .cra_name = "sha512",
1116 .cra_driver_name = "atmel-sha512",
1117 .cra_priority = 100,
1118 .cra_flags = CRYPTO_ALG_ASYNC |
1119 CRYPTO_ALG_NEED_FALLBACK,
1120 .cra_blocksize = SHA512_BLOCK_SIZE,
1121 .cra_ctxsize = sizeof(struct atmel_sha_ctx),
1122 .cra_alignmask = 0x3,
1123 .cra_module = THIS_MODULE,
1124 .cra_init = atmel_sha_cra_init,
1125 .cra_exit = atmel_sha_cra_exit,
1126 }
1127 }
1128},
1129};
1130
878static void atmel_sha_done_task(unsigned long data) 1131static void atmel_sha_done_task(unsigned long data)
879{ 1132{
880 struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; 1133 struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
@@ -941,32 +1194,142 @@ static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
941{ 1194{
942 int i; 1195 int i;
943 1196
944 for (i = 0; i < ARRAY_SIZE(sha_algs); i++) 1197 for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++)
945 crypto_unregister_ahash(&sha_algs[i]); 1198 crypto_unregister_ahash(&sha_1_256_algs[i]);
1199
1200 if (dd->caps.has_sha224)
1201 crypto_unregister_ahash(&sha_224_alg);
1202
1203 if (dd->caps.has_sha_384_512) {
1204 for (i = 0; i < ARRAY_SIZE(sha_384_512_algs); i++)
1205 crypto_unregister_ahash(&sha_384_512_algs[i]);
1206 }
946} 1207}
947 1208
948static int atmel_sha_register_algs(struct atmel_sha_dev *dd) 1209static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
949{ 1210{
950 int err, i, j; 1211 int err, i, j;
951 1212
952 for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { 1213 for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) {
953 err = crypto_register_ahash(&sha_algs[i]); 1214 err = crypto_register_ahash(&sha_1_256_algs[i]);
954 if (err) 1215 if (err)
955 goto err_sha_algs; 1216 goto err_sha_1_256_algs;
1217 }
1218
1219 if (dd->caps.has_sha224) {
1220 err = crypto_register_ahash(&sha_224_alg);
1221 if (err)
1222 goto err_sha_224_algs;
1223 }
1224
1225 if (dd->caps.has_sha_384_512) {
1226 for (i = 0; i < ARRAY_SIZE(sha_384_512_algs); i++) {
1227 err = crypto_register_ahash(&sha_384_512_algs[i]);
1228 if (err)
1229 goto err_sha_384_512_algs;
1230 }
956 } 1231 }
957 1232
958 return 0; 1233 return 0;
959 1234
960err_sha_algs: 1235err_sha_384_512_algs:
1236 for (j = 0; j < i; j++)
1237 crypto_unregister_ahash(&sha_384_512_algs[j]);
1238 crypto_unregister_ahash(&sha_224_alg);
1239err_sha_224_algs:
1240 i = ARRAY_SIZE(sha_1_256_algs);
1241err_sha_1_256_algs:
961 for (j = 0; j < i; j++) 1242 for (j = 0; j < i; j++)
962 crypto_unregister_ahash(&sha_algs[j]); 1243 crypto_unregister_ahash(&sha_1_256_algs[j]);
963 1244
964 return err; 1245 return err;
965} 1246}
966 1247
1248static bool atmel_sha_filter(struct dma_chan *chan, void *slave)
1249{
1250 struct at_dma_slave *sl = slave;
1251
1252 if (sl && sl->dma_dev == chan->device->dev) {
1253 chan->private = sl;
1254 return true;
1255 } else {
1256 return false;
1257 }
1258}
1259
1260static int atmel_sha_dma_init(struct atmel_sha_dev *dd,
1261 struct crypto_platform_data *pdata)
1262{
1263 int err = -ENOMEM;
1264 dma_cap_mask_t mask_in;
1265
1266 if (pdata && pdata->dma_slave->rxdata.dma_dev) {
1267 /* Try to grab DMA channel */
1268 dma_cap_zero(mask_in);
1269 dma_cap_set(DMA_SLAVE, mask_in);
1270
1271 dd->dma_lch_in.chan = dma_request_channel(mask_in,
1272 atmel_sha_filter, &pdata->dma_slave->rxdata);
1273
1274 if (!dd->dma_lch_in.chan)
1275 return err;
1276
1277 dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
1278 dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
1279 SHA_REG_DIN(0);
1280 dd->dma_lch_in.dma_conf.src_maxburst = 1;
1281 dd->dma_lch_in.dma_conf.src_addr_width =
1282 DMA_SLAVE_BUSWIDTH_4_BYTES;
1283 dd->dma_lch_in.dma_conf.dst_maxburst = 1;
1284 dd->dma_lch_in.dma_conf.dst_addr_width =
1285 DMA_SLAVE_BUSWIDTH_4_BYTES;
1286 dd->dma_lch_in.dma_conf.device_fc = false;
1287
1288 return 0;
1289 }
1290
1291 return -ENODEV;
1292}
1293
1294static void atmel_sha_dma_cleanup(struct atmel_sha_dev *dd)
1295{
1296 dma_release_channel(dd->dma_lch_in.chan);
1297}
1298
1299static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
1300{
1301
1302 dd->caps.has_dma = 0;
1303 dd->caps.has_dualbuff = 0;
1304 dd->caps.has_sha224 = 0;
1305 dd->caps.has_sha_384_512 = 0;
1306
1307 /* keep only major version number */
1308 switch (dd->hw_version & 0xff0) {
1309 case 0x410:
1310 dd->caps.has_dma = 1;
1311 dd->caps.has_dualbuff = 1;
1312 dd->caps.has_sha224 = 1;
1313 dd->caps.has_sha_384_512 = 1;
1314 break;
1315 case 0x400:
1316 dd->caps.has_dma = 1;
1317 dd->caps.has_dualbuff = 1;
1318 dd->caps.has_sha224 = 1;
1319 break;
1320 case 0x320:
1321 break;
1322 default:
1323 dev_warn(dd->dev,
1324 "Unmanaged sha version, set minimum capabilities\n");
1325 break;
1326 }
1327}
1328
967static int atmel_sha_probe(struct platform_device *pdev) 1329static int atmel_sha_probe(struct platform_device *pdev)
968{ 1330{
969 struct atmel_sha_dev *sha_dd; 1331 struct atmel_sha_dev *sha_dd;
1332 struct crypto_platform_data *pdata;
970 struct device *dev = &pdev->dev; 1333 struct device *dev = &pdev->dev;
971 struct resource *sha_res; 1334 struct resource *sha_res;
972 unsigned long sha_phys_size; 1335 unsigned long sha_phys_size;
@@ -1018,7 +1381,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
1018 } 1381 }
1019 1382
1020 /* Initializing the clock */ 1383 /* Initializing the clock */
1021 sha_dd->iclk = clk_get(&pdev->dev, NULL); 1384 sha_dd->iclk = clk_get(&pdev->dev, "sha_clk");
1022 if (IS_ERR(sha_dd->iclk)) { 1385 if (IS_ERR(sha_dd->iclk)) {
1023 dev_err(dev, "clock intialization failed.\n"); 1386 dev_err(dev, "clock intialization failed.\n");
1024 err = PTR_ERR(sha_dd->iclk); 1387 err = PTR_ERR(sha_dd->iclk);
@@ -1032,6 +1395,22 @@ static int atmel_sha_probe(struct platform_device *pdev)
1032 goto sha_io_err; 1395 goto sha_io_err;
1033 } 1396 }
1034 1397
1398 atmel_sha_hw_version_init(sha_dd);
1399
1400 atmel_sha_get_cap(sha_dd);
1401
1402 if (sha_dd->caps.has_dma) {
1403 pdata = pdev->dev.platform_data;
1404 if (!pdata) {
1405 dev_err(&pdev->dev, "platform data not available\n");
1406 err = -ENXIO;
1407 goto err_pdata;
1408 }
1409 err = atmel_sha_dma_init(sha_dd, pdata);
1410 if (err)
1411 goto err_sha_dma;
1412 }
1413
1035 spin_lock(&atmel_sha.lock); 1414 spin_lock(&atmel_sha.lock);
1036 list_add_tail(&sha_dd->list, &atmel_sha.dev_list); 1415 list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
1037 spin_unlock(&atmel_sha.lock); 1416 spin_unlock(&atmel_sha.lock);
@@ -1048,6 +1427,10 @@ err_algs:
1048 spin_lock(&atmel_sha.lock); 1427 spin_lock(&atmel_sha.lock);
1049 list_del(&sha_dd->list); 1428 list_del(&sha_dd->list);
1050 spin_unlock(&atmel_sha.lock); 1429 spin_unlock(&atmel_sha.lock);
1430 if (sha_dd->caps.has_dma)
1431 atmel_sha_dma_cleanup(sha_dd);
1432err_sha_dma:
1433err_pdata:
1051 iounmap(sha_dd->io_base); 1434 iounmap(sha_dd->io_base);
1052sha_io_err: 1435sha_io_err:
1053 clk_put(sha_dd->iclk); 1436 clk_put(sha_dd->iclk);
@@ -1078,6 +1461,9 @@ static int atmel_sha_remove(struct platform_device *pdev)
1078 1461
1079 tasklet_kill(&sha_dd->done_task); 1462 tasklet_kill(&sha_dd->done_task);
1080 1463
1464 if (sha_dd->caps.has_dma)
1465 atmel_sha_dma_cleanup(sha_dd);
1466
1081 iounmap(sha_dd->io_base); 1467 iounmap(sha_dd->io_base);
1082 1468
1083 clk_put(sha_dd->iclk); 1469 clk_put(sha_dd->iclk);
@@ -1102,6 +1488,6 @@ static struct platform_driver atmel_sha_driver = {
1102 1488
1103module_platform_driver(atmel_sha_driver); 1489module_platform_driver(atmel_sha_driver);
1104 1490
1105MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support."); 1491MODULE_DESCRIPTION("Atmel SHA (1/256/224/384/512) hw acceleration support.");
1106MODULE_LICENSE("GPL v2"); 1492MODULE_LICENSE("GPL v2");
1107MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); 1493MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
diff --git a/drivers/crypto/atmel-tdes-regs.h b/drivers/crypto/atmel-tdes-regs.h
index 5ac2a900d80c..f86734d0fda4 100644
--- a/drivers/crypto/atmel-tdes-regs.h
+++ b/drivers/crypto/atmel-tdes-regs.h
@@ -69,6 +69,8 @@
69#define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0) 69#define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0)
70#define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0 70#define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0
71 71
72#define TDES_HW_VERSION 0xFC
73
72#define TDES_RPR 0x100 74#define TDES_RPR 0x100
73#define TDES_RCR 0x104 75#define TDES_RCR 0x104
74#define TDES_TPR 0x108 76#define TDES_TPR 0x108
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 7c73fbb17538..4a99564a08e6 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -38,29 +38,35 @@
38#include <crypto/des.h> 38#include <crypto/des.h>
39#include <crypto/hash.h> 39#include <crypto/hash.h>
40#include <crypto/internal/hash.h> 40#include <crypto/internal/hash.h>
41#include <linux/platform_data/crypto-atmel.h>
41#include "atmel-tdes-regs.h" 42#include "atmel-tdes-regs.h"
42 43
43/* TDES flags */ 44/* TDES flags */
44#define TDES_FLAGS_MODE_MASK 0x007f 45#define TDES_FLAGS_MODE_MASK 0x00ff
45#define TDES_FLAGS_ENCRYPT BIT(0) 46#define TDES_FLAGS_ENCRYPT BIT(0)
46#define TDES_FLAGS_CBC BIT(1) 47#define TDES_FLAGS_CBC BIT(1)
47#define TDES_FLAGS_CFB BIT(2) 48#define TDES_FLAGS_CFB BIT(2)
48#define TDES_FLAGS_CFB8 BIT(3) 49#define TDES_FLAGS_CFB8 BIT(3)
49#define TDES_FLAGS_CFB16 BIT(4) 50#define TDES_FLAGS_CFB16 BIT(4)
50#define TDES_FLAGS_CFB32 BIT(5) 51#define TDES_FLAGS_CFB32 BIT(5)
51#define TDES_FLAGS_OFB BIT(6) 52#define TDES_FLAGS_CFB64 BIT(6)
53#define TDES_FLAGS_OFB BIT(7)
52 54
53#define TDES_FLAGS_INIT BIT(16) 55#define TDES_FLAGS_INIT BIT(16)
54#define TDES_FLAGS_FAST BIT(17) 56#define TDES_FLAGS_FAST BIT(17)
55#define TDES_FLAGS_BUSY BIT(18) 57#define TDES_FLAGS_BUSY BIT(18)
58#define TDES_FLAGS_DMA BIT(19)
56 59
57#define ATMEL_TDES_QUEUE_LENGTH 1 60#define ATMEL_TDES_QUEUE_LENGTH 50
58 61
59#define CFB8_BLOCK_SIZE 1 62#define CFB8_BLOCK_SIZE 1
60#define CFB16_BLOCK_SIZE 2 63#define CFB16_BLOCK_SIZE 2
61#define CFB32_BLOCK_SIZE 4 64#define CFB32_BLOCK_SIZE 4
62#define CFB64_BLOCK_SIZE 8
63 65
66struct atmel_tdes_caps {
67 bool has_dma;
68 u32 has_cfb_3keys;
69};
64 70
65struct atmel_tdes_dev; 71struct atmel_tdes_dev;
66 72
@@ -70,12 +76,19 @@ struct atmel_tdes_ctx {
70 int keylen; 76 int keylen;
71 u32 key[3*DES_KEY_SIZE / sizeof(u32)]; 77 u32 key[3*DES_KEY_SIZE / sizeof(u32)];
72 unsigned long flags; 78 unsigned long flags;
79
80 u16 block_size;
73}; 81};
74 82
75struct atmel_tdes_reqctx { 83struct atmel_tdes_reqctx {
76 unsigned long mode; 84 unsigned long mode;
77}; 85};
78 86
87struct atmel_tdes_dma {
88 struct dma_chan *chan;
89 struct dma_slave_config dma_conf;
90};
91
79struct atmel_tdes_dev { 92struct atmel_tdes_dev {
80 struct list_head list; 93 struct list_head list;
81 unsigned long phys_base; 94 unsigned long phys_base;
@@ -99,8 +112,10 @@ struct atmel_tdes_dev {
99 size_t total; 112 size_t total;
100 113
101 struct scatterlist *in_sg; 114 struct scatterlist *in_sg;
115 unsigned int nb_in_sg;
102 size_t in_offset; 116 size_t in_offset;
103 struct scatterlist *out_sg; 117 struct scatterlist *out_sg;
118 unsigned int nb_out_sg;
104 size_t out_offset; 119 size_t out_offset;
105 120
106 size_t buflen; 121 size_t buflen;
@@ -109,10 +124,16 @@ struct atmel_tdes_dev {
109 void *buf_in; 124 void *buf_in;
110 int dma_in; 125 int dma_in;
111 dma_addr_t dma_addr_in; 126 dma_addr_t dma_addr_in;
127 struct atmel_tdes_dma dma_lch_in;
112 128
113 void *buf_out; 129 void *buf_out;
114 int dma_out; 130 int dma_out;
115 dma_addr_t dma_addr_out; 131 dma_addr_t dma_addr_out;
132 struct atmel_tdes_dma dma_lch_out;
133
134 struct atmel_tdes_caps caps;
135
136 u32 hw_version;
116}; 137};
117 138
118struct atmel_tdes_drv { 139struct atmel_tdes_drv {
@@ -207,6 +228,31 @@ static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd)
207 return 0; 228 return 0;
208} 229}
209 230
231static inline unsigned int atmel_tdes_get_version(struct atmel_tdes_dev *dd)
232{
233 return atmel_tdes_read(dd, TDES_HW_VERSION) & 0x00000fff;
234}
235
236static void atmel_tdes_hw_version_init(struct atmel_tdes_dev *dd)
237{
238 atmel_tdes_hw_init(dd);
239
240 dd->hw_version = atmel_tdes_get_version(dd);
241
242 dev_info(dd->dev,
243 "version: 0x%x\n", dd->hw_version);
244
245 clk_disable_unprepare(dd->iclk);
246}
247
248static void atmel_tdes_dma_callback(void *data)
249{
250 struct atmel_tdes_dev *dd = data;
251
252 /* dma_lch_out - completed */
253 tasklet_schedule(&dd->done_task);
254}
255
210static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) 256static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
211{ 257{
212 int err; 258 int err;
@@ -217,7 +263,9 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
217 if (err) 263 if (err)
218 return err; 264 return err;
219 265
220 atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); 266 if (!dd->caps.has_dma)
267 atmel_tdes_write(dd, TDES_PTCR,
268 TDES_PTCR_TXTDIS | TDES_PTCR_RXTDIS);
221 269
222 /* MR register must be set before IV registers */ 270 /* MR register must be set before IV registers */
223 if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) { 271 if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) {
@@ -241,6 +289,8 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
241 valmr |= TDES_MR_CFBS_16b; 289 valmr |= TDES_MR_CFBS_16b;
242 else if (dd->flags & TDES_FLAGS_CFB32) 290 else if (dd->flags & TDES_FLAGS_CFB32)
243 valmr |= TDES_MR_CFBS_32b; 291 valmr |= TDES_MR_CFBS_32b;
292 else if (dd->flags & TDES_FLAGS_CFB64)
293 valmr |= TDES_MR_CFBS_64b;
244 } else if (dd->flags & TDES_FLAGS_OFB) { 294 } else if (dd->flags & TDES_FLAGS_OFB) {
245 valmr |= TDES_MR_OPMOD_OFB; 295 valmr |= TDES_MR_OPMOD_OFB;
246 } 296 }
@@ -262,7 +312,7 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd)
262 return 0; 312 return 0;
263} 313}
264 314
265static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) 315static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd)
266{ 316{
267 int err = 0; 317 int err = 0;
268 size_t count; 318 size_t count;
@@ -288,7 +338,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
288 return err; 338 return err;
289} 339}
290 340
291static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd) 341static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd)
292{ 342{
293 int err = -ENOMEM; 343 int err = -ENOMEM;
294 344
@@ -333,7 +383,7 @@ err_alloc:
333 return err; 383 return err;
334} 384}
335 385
336static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd) 386static void atmel_tdes_buff_cleanup(struct atmel_tdes_dev *dd)
337{ 387{
338 dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, 388 dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
339 DMA_FROM_DEVICE); 389 DMA_FROM_DEVICE);
@@ -343,7 +393,7 @@ static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd)
343 free_page((unsigned long)dd->buf_in); 393 free_page((unsigned long)dd->buf_in);
344} 394}
345 395
346static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, 396static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
347 dma_addr_t dma_addr_out, int length) 397 dma_addr_t dma_addr_out, int length)
348{ 398{
349 struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); 399 struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -379,7 +429,76 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
379 return 0; 429 return 0;
380} 430}
381 431
382static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) 432static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
433 dma_addr_t dma_addr_out, int length)
434{
435 struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
436 struct atmel_tdes_dev *dd = ctx->dd;
437 struct scatterlist sg[2];
438 struct dma_async_tx_descriptor *in_desc, *out_desc;
439
440 dd->dma_size = length;
441
442 if (!(dd->flags & TDES_FLAGS_FAST)) {
443 dma_sync_single_for_device(dd->dev, dma_addr_in, length,
444 DMA_TO_DEVICE);
445 }
446
447 if (dd->flags & TDES_FLAGS_CFB8) {
448 dd->dma_lch_in.dma_conf.dst_addr_width =
449 DMA_SLAVE_BUSWIDTH_1_BYTE;
450 dd->dma_lch_out.dma_conf.src_addr_width =
451 DMA_SLAVE_BUSWIDTH_1_BYTE;
452 } else if (dd->flags & TDES_FLAGS_CFB16) {
453 dd->dma_lch_in.dma_conf.dst_addr_width =
454 DMA_SLAVE_BUSWIDTH_2_BYTES;
455 dd->dma_lch_out.dma_conf.src_addr_width =
456 DMA_SLAVE_BUSWIDTH_2_BYTES;
457 } else {
458 dd->dma_lch_in.dma_conf.dst_addr_width =
459 DMA_SLAVE_BUSWIDTH_4_BYTES;
460 dd->dma_lch_out.dma_conf.src_addr_width =
461 DMA_SLAVE_BUSWIDTH_4_BYTES;
462 }
463
464 dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
465 dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf);
466
467 dd->flags |= TDES_FLAGS_DMA;
468
469 sg_init_table(&sg[0], 1);
470 sg_dma_address(&sg[0]) = dma_addr_in;
471 sg_dma_len(&sg[0]) = length;
472
473 sg_init_table(&sg[1], 1);
474 sg_dma_address(&sg[1]) = dma_addr_out;
475 sg_dma_len(&sg[1]) = length;
476
477 in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, &sg[0],
478 1, DMA_MEM_TO_DEV,
479 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
480 if (!in_desc)
481 return -EINVAL;
482
483 out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, &sg[1],
484 1, DMA_DEV_TO_MEM,
485 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
486 if (!out_desc)
487 return -EINVAL;
488
489 out_desc->callback = atmel_tdes_dma_callback;
490 out_desc->callback_param = dd;
491
492 dmaengine_submit(out_desc);
493 dma_async_issue_pending(dd->dma_lch_out.chan);
494
495 dmaengine_submit(in_desc);
496 dma_async_issue_pending(dd->dma_lch_in.chan);
497
498 return 0;
499}
500
501static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd)
383{ 502{
384 struct crypto_tfm *tfm = crypto_ablkcipher_tfm( 503 struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
385 crypto_ablkcipher_reqtfm(dd->req)); 504 crypto_ablkcipher_reqtfm(dd->req));
@@ -387,23 +506,23 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd)
387 size_t count; 506 size_t count;
388 dma_addr_t addr_in, addr_out; 507 dma_addr_t addr_in, addr_out;
389 508
390 if (sg_is_last(dd->in_sg) && sg_is_last(dd->out_sg)) { 509 if ((!dd->in_offset) && (!dd->out_offset)) {
391 /* check for alignment */ 510 /* check for alignment */
392 in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)); 511 in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)) &&
393 out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)); 512 IS_ALIGNED(dd->in_sg->length, dd->ctx->block_size);
394 513 out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)) &&
514 IS_ALIGNED(dd->out_sg->length, dd->ctx->block_size);
395 fast = in && out; 515 fast = in && out;
516
517 if (sg_dma_len(dd->in_sg) != sg_dma_len(dd->out_sg))
518 fast = 0;
396 } 519 }
397 520
521
398 if (fast) { 522 if (fast) {
399 count = min(dd->total, sg_dma_len(dd->in_sg)); 523 count = min(dd->total, sg_dma_len(dd->in_sg));
400 count = min(count, sg_dma_len(dd->out_sg)); 524 count = min(count, sg_dma_len(dd->out_sg));
401 525
402 if (count != dd->total) {
403 pr_err("request length != buffer length\n");
404 return -EINVAL;
405 }
406
407 err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); 526 err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
408 if (!err) { 527 if (!err) {
409 dev_err(dd->dev, "dma_map_sg() error\n"); 528 dev_err(dd->dev, "dma_map_sg() error\n");
@@ -433,13 +552,16 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd)
433 addr_out = dd->dma_addr_out; 552 addr_out = dd->dma_addr_out;
434 553
435 dd->flags &= ~TDES_FLAGS_FAST; 554 dd->flags &= ~TDES_FLAGS_FAST;
436
437 } 555 }
438 556
439 dd->total -= count; 557 dd->total -= count;
440 558
441 err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count); 559 if (dd->caps.has_dma)
442 if (err) { 560 err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count);
561 else
562 err = atmel_tdes_crypt_pdc(tfm, addr_in, addr_out, count);
563
564 if (err && (dd->flags & TDES_FLAGS_FAST)) {
443 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); 565 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
444 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); 566 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
445 } 567 }
@@ -447,7 +569,6 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd)
447 return err; 569 return err;
448} 570}
449 571
450
451static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) 572static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err)
452{ 573{
453 struct ablkcipher_request *req = dd->req; 574 struct ablkcipher_request *req = dd->req;
@@ -506,7 +627,7 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
506 627
507 err = atmel_tdes_write_ctrl(dd); 628 err = atmel_tdes_write_ctrl(dd);
508 if (!err) 629 if (!err)
509 err = atmel_tdes_crypt_dma_start(dd); 630 err = atmel_tdes_crypt_start(dd);
510 if (err) { 631 if (err) {
511 /* des_task will not finish it, so do it here */ 632 /* des_task will not finish it, so do it here */
512 atmel_tdes_finish_req(dd, err); 633 atmel_tdes_finish_req(dd, err);
@@ -516,41 +637,145 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd,
516 return ret; 637 return ret;
517} 638}
518 639
640static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd)
641{
642 int err = -EINVAL;
643 size_t count;
644
645 if (dd->flags & TDES_FLAGS_DMA) {
646 err = 0;
647 if (dd->flags & TDES_FLAGS_FAST) {
648 dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
649 dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
650 } else {
651 dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
652 dd->dma_size, DMA_FROM_DEVICE);
653
654 /* copy data */
655 count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset,
656 dd->buf_out, dd->buflen, dd->dma_size, 1);
657 if (count != dd->dma_size) {
658 err = -EINVAL;
659 pr_err("not all data converted: %u\n", count);
660 }
661 }
662 }
663 return err;
664}
519 665
520static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode) 666static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode)
521{ 667{
522 struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx( 668 struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(
523 crypto_ablkcipher_reqtfm(req)); 669 crypto_ablkcipher_reqtfm(req));
524 struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req); 670 struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req);
525 struct atmel_tdes_dev *dd;
526 671
527 if (mode & TDES_FLAGS_CFB8) { 672 if (mode & TDES_FLAGS_CFB8) {
528 if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) { 673 if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) {
529 pr_err("request size is not exact amount of CFB8 blocks\n"); 674 pr_err("request size is not exact amount of CFB8 blocks\n");
530 return -EINVAL; 675 return -EINVAL;
531 } 676 }
677 ctx->block_size = CFB8_BLOCK_SIZE;
532 } else if (mode & TDES_FLAGS_CFB16) { 678 } else if (mode & TDES_FLAGS_CFB16) {
533 if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) { 679 if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) {
534 pr_err("request size is not exact amount of CFB16 blocks\n"); 680 pr_err("request size is not exact amount of CFB16 blocks\n");
535 return -EINVAL; 681 return -EINVAL;
536 } 682 }
683 ctx->block_size = CFB16_BLOCK_SIZE;
537 } else if (mode & TDES_FLAGS_CFB32) { 684 } else if (mode & TDES_FLAGS_CFB32) {
538 if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) { 685 if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) {
539 pr_err("request size is not exact amount of CFB32 blocks\n"); 686 pr_err("request size is not exact amount of CFB32 blocks\n");
540 return -EINVAL; 687 return -EINVAL;
541 } 688 }
542 } else if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) { 689 ctx->block_size = CFB32_BLOCK_SIZE;
543 pr_err("request size is not exact amount of DES blocks\n"); 690 } else {
544 return -EINVAL; 691 if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) {
692 pr_err("request size is not exact amount of DES blocks\n");
693 return -EINVAL;
694 }
695 ctx->block_size = DES_BLOCK_SIZE;
545 } 696 }
546 697
547 dd = atmel_tdes_find_dev(ctx); 698 rctx->mode = mode;
548 if (!dd) 699
700 return atmel_tdes_handle_queue(ctx->dd, req);
701}
702
703static bool atmel_tdes_filter(struct dma_chan *chan, void *slave)
704{
705 struct at_dma_slave *sl = slave;
706
707 if (sl && sl->dma_dev == chan->device->dev) {
708 chan->private = sl;
709 return true;
710 } else {
711 return false;
712 }
713}
714
715static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd,
716 struct crypto_platform_data *pdata)
717{
718 int err = -ENOMEM;
719 dma_cap_mask_t mask_in, mask_out;
720
721 if (pdata && pdata->dma_slave->txdata.dma_dev &&
722 pdata->dma_slave->rxdata.dma_dev) {
723
724 /* Try to grab 2 DMA channels */
725 dma_cap_zero(mask_in);
726 dma_cap_set(DMA_SLAVE, mask_in);
727
728 dd->dma_lch_in.chan = dma_request_channel(mask_in,
729 atmel_tdes_filter, &pdata->dma_slave->rxdata);
730
731 if (!dd->dma_lch_in.chan)
732 goto err_dma_in;
733
734 dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV;
735 dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base +
736 TDES_IDATA1R;
737 dd->dma_lch_in.dma_conf.src_maxburst = 1;
738 dd->dma_lch_in.dma_conf.src_addr_width =
739 DMA_SLAVE_BUSWIDTH_4_BYTES;
740 dd->dma_lch_in.dma_conf.dst_maxburst = 1;
741 dd->dma_lch_in.dma_conf.dst_addr_width =
742 DMA_SLAVE_BUSWIDTH_4_BYTES;
743 dd->dma_lch_in.dma_conf.device_fc = false;
744
745 dma_cap_zero(mask_out);
746 dma_cap_set(DMA_SLAVE, mask_out);
747 dd->dma_lch_out.chan = dma_request_channel(mask_out,
748 atmel_tdes_filter, &pdata->dma_slave->txdata);
749
750 if (!dd->dma_lch_out.chan)
751 goto err_dma_out;
752
753 dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM;
754 dd->dma_lch_out.dma_conf.src_addr = dd->phys_base +
755 TDES_ODATA1R;
756 dd->dma_lch_out.dma_conf.src_maxburst = 1;
757 dd->dma_lch_out.dma_conf.src_addr_width =
758 DMA_SLAVE_BUSWIDTH_4_BYTES;
759 dd->dma_lch_out.dma_conf.dst_maxburst = 1;
760 dd->dma_lch_out.dma_conf.dst_addr_width =
761 DMA_SLAVE_BUSWIDTH_4_BYTES;
762 dd->dma_lch_out.dma_conf.device_fc = false;
763
764 return 0;
765 } else {
549 return -ENODEV; 766 return -ENODEV;
767 }
550 768
551 rctx->mode = mode; 769err_dma_out:
770 dma_release_channel(dd->dma_lch_in.chan);
771err_dma_in:
772 return err;
773}
552 774
553 return atmel_tdes_handle_queue(dd, req); 775static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd)
776{
777 dma_release_channel(dd->dma_lch_in.chan);
778 dma_release_channel(dd->dma_lch_out.chan);
554} 779}
555 780
556static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, 781static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
@@ -590,7 +815,8 @@ static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
590 /* 815 /*
591 * HW bug in cfb 3-keys mode. 816 * HW bug in cfb 3-keys mode.
592 */ 817 */
593 if (strstr(alg_name, "cfb") && (keylen != 2*DES_KEY_SIZE)) { 818 if (!ctx->dd->caps.has_cfb_3keys && strstr(alg_name, "cfb")
819 && (keylen != 2*DES_KEY_SIZE)) {
594 crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); 820 crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
595 return -EINVAL; 821 return -EINVAL;
596 } else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) { 822 } else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) {
@@ -678,8 +904,15 @@ static int atmel_tdes_ofb_decrypt(struct ablkcipher_request *req)
678 904
679static int atmel_tdes_cra_init(struct crypto_tfm *tfm) 905static int atmel_tdes_cra_init(struct crypto_tfm *tfm)
680{ 906{
907 struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm);
908 struct atmel_tdes_dev *dd;
909
681 tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx); 910 tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx);
682 911
912 dd = atmel_tdes_find_dev(ctx);
913 if (!dd)
914 return -ENODEV;
915
683 return 0; 916 return 0;
684} 917}
685 918
@@ -695,7 +928,7 @@ static struct crypto_alg tdes_algs[] = {
695 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 928 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
696 .cra_blocksize = DES_BLOCK_SIZE, 929 .cra_blocksize = DES_BLOCK_SIZE,
697 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 930 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
698 .cra_alignmask = 0, 931 .cra_alignmask = 0x7,
699 .cra_type = &crypto_ablkcipher_type, 932 .cra_type = &crypto_ablkcipher_type,
700 .cra_module = THIS_MODULE, 933 .cra_module = THIS_MODULE,
701 .cra_init = atmel_tdes_cra_init, 934 .cra_init = atmel_tdes_cra_init,
@@ -715,7 +948,7 @@ static struct crypto_alg tdes_algs[] = {
715 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 948 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
716 .cra_blocksize = DES_BLOCK_SIZE, 949 .cra_blocksize = DES_BLOCK_SIZE,
717 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 950 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
718 .cra_alignmask = 0, 951 .cra_alignmask = 0x7,
719 .cra_type = &crypto_ablkcipher_type, 952 .cra_type = &crypto_ablkcipher_type,
720 .cra_module = THIS_MODULE, 953 .cra_module = THIS_MODULE,
721 .cra_init = atmel_tdes_cra_init, 954 .cra_init = atmel_tdes_cra_init,
@@ -736,7 +969,7 @@ static struct crypto_alg tdes_algs[] = {
736 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 969 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
737 .cra_blocksize = DES_BLOCK_SIZE, 970 .cra_blocksize = DES_BLOCK_SIZE,
738 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 971 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
739 .cra_alignmask = 0, 972 .cra_alignmask = 0x7,
740 .cra_type = &crypto_ablkcipher_type, 973 .cra_type = &crypto_ablkcipher_type,
741 .cra_module = THIS_MODULE, 974 .cra_module = THIS_MODULE,
742 .cra_init = atmel_tdes_cra_init, 975 .cra_init = atmel_tdes_cra_init,
@@ -778,7 +1011,7 @@ static struct crypto_alg tdes_algs[] = {
778 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1011 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
779 .cra_blocksize = CFB16_BLOCK_SIZE, 1012 .cra_blocksize = CFB16_BLOCK_SIZE,
780 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1013 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
781 .cra_alignmask = 0, 1014 .cra_alignmask = 0x1,
782 .cra_type = &crypto_ablkcipher_type, 1015 .cra_type = &crypto_ablkcipher_type,
783 .cra_module = THIS_MODULE, 1016 .cra_module = THIS_MODULE,
784 .cra_init = atmel_tdes_cra_init, 1017 .cra_init = atmel_tdes_cra_init,
@@ -799,7 +1032,7 @@ static struct crypto_alg tdes_algs[] = {
799 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1032 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
800 .cra_blocksize = CFB32_BLOCK_SIZE, 1033 .cra_blocksize = CFB32_BLOCK_SIZE,
801 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1034 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
802 .cra_alignmask = 0, 1035 .cra_alignmask = 0x3,
803 .cra_type = &crypto_ablkcipher_type, 1036 .cra_type = &crypto_ablkcipher_type,
804 .cra_module = THIS_MODULE, 1037 .cra_module = THIS_MODULE,
805 .cra_init = atmel_tdes_cra_init, 1038 .cra_init = atmel_tdes_cra_init,
@@ -820,7 +1053,7 @@ static struct crypto_alg tdes_algs[] = {
820 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1053 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
821 .cra_blocksize = DES_BLOCK_SIZE, 1054 .cra_blocksize = DES_BLOCK_SIZE,
822 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1055 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
823 .cra_alignmask = 0, 1056 .cra_alignmask = 0x7,
824 .cra_type = &crypto_ablkcipher_type, 1057 .cra_type = &crypto_ablkcipher_type,
825 .cra_module = THIS_MODULE, 1058 .cra_module = THIS_MODULE,
826 .cra_init = atmel_tdes_cra_init, 1059 .cra_init = atmel_tdes_cra_init,
@@ -841,7 +1074,7 @@ static struct crypto_alg tdes_algs[] = {
841 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1074 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
842 .cra_blocksize = DES_BLOCK_SIZE, 1075 .cra_blocksize = DES_BLOCK_SIZE,
843 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1076 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
844 .cra_alignmask = 0, 1077 .cra_alignmask = 0x7,
845 .cra_type = &crypto_ablkcipher_type, 1078 .cra_type = &crypto_ablkcipher_type,
846 .cra_module = THIS_MODULE, 1079 .cra_module = THIS_MODULE,
847 .cra_init = atmel_tdes_cra_init, 1080 .cra_init = atmel_tdes_cra_init,
@@ -861,7 +1094,7 @@ static struct crypto_alg tdes_algs[] = {
861 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1094 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
862 .cra_blocksize = DES_BLOCK_SIZE, 1095 .cra_blocksize = DES_BLOCK_SIZE,
863 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1096 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
864 .cra_alignmask = 0, 1097 .cra_alignmask = 0x7,
865 .cra_type = &crypto_ablkcipher_type, 1098 .cra_type = &crypto_ablkcipher_type,
866 .cra_module = THIS_MODULE, 1099 .cra_module = THIS_MODULE,
867 .cra_init = atmel_tdes_cra_init, 1100 .cra_init = atmel_tdes_cra_init,
@@ -882,7 +1115,7 @@ static struct crypto_alg tdes_algs[] = {
882 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1115 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
883 .cra_blocksize = DES_BLOCK_SIZE, 1116 .cra_blocksize = DES_BLOCK_SIZE,
884 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1117 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
885 .cra_alignmask = 0, 1118 .cra_alignmask = 0x7,
886 .cra_type = &crypto_ablkcipher_type, 1119 .cra_type = &crypto_ablkcipher_type,
887 .cra_module = THIS_MODULE, 1120 .cra_module = THIS_MODULE,
888 .cra_init = atmel_tdes_cra_init, 1121 .cra_init = atmel_tdes_cra_init,
@@ -924,7 +1157,7 @@ static struct crypto_alg tdes_algs[] = {
924 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1157 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
925 .cra_blocksize = CFB16_BLOCK_SIZE, 1158 .cra_blocksize = CFB16_BLOCK_SIZE,
926 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1159 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
927 .cra_alignmask = 0, 1160 .cra_alignmask = 0x1,
928 .cra_type = &crypto_ablkcipher_type, 1161 .cra_type = &crypto_ablkcipher_type,
929 .cra_module = THIS_MODULE, 1162 .cra_module = THIS_MODULE,
930 .cra_init = atmel_tdes_cra_init, 1163 .cra_init = atmel_tdes_cra_init,
@@ -945,7 +1178,7 @@ static struct crypto_alg tdes_algs[] = {
945 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1178 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
946 .cra_blocksize = CFB32_BLOCK_SIZE, 1179 .cra_blocksize = CFB32_BLOCK_SIZE,
947 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1180 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
948 .cra_alignmask = 0, 1181 .cra_alignmask = 0x3,
949 .cra_type = &crypto_ablkcipher_type, 1182 .cra_type = &crypto_ablkcipher_type,
950 .cra_module = THIS_MODULE, 1183 .cra_module = THIS_MODULE,
951 .cra_init = atmel_tdes_cra_init, 1184 .cra_init = atmel_tdes_cra_init,
@@ -966,7 +1199,7 @@ static struct crypto_alg tdes_algs[] = {
966 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1199 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
967 .cra_blocksize = DES_BLOCK_SIZE, 1200 .cra_blocksize = DES_BLOCK_SIZE,
968 .cra_ctxsize = sizeof(struct atmel_tdes_ctx), 1201 .cra_ctxsize = sizeof(struct atmel_tdes_ctx),
969 .cra_alignmask = 0, 1202 .cra_alignmask = 0x7,
970 .cra_type = &crypto_ablkcipher_type, 1203 .cra_type = &crypto_ablkcipher_type,
971 .cra_module = THIS_MODULE, 1204 .cra_module = THIS_MODULE,
972 .cra_init = atmel_tdes_cra_init, 1205 .cra_init = atmel_tdes_cra_init,
@@ -994,14 +1227,24 @@ static void atmel_tdes_done_task(unsigned long data)
994 struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data; 1227 struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data;
995 int err; 1228 int err;
996 1229
997 err = atmel_tdes_crypt_dma_stop(dd); 1230 if (!(dd->flags & TDES_FLAGS_DMA))
1231 err = atmel_tdes_crypt_pdc_stop(dd);
1232 else
1233 err = atmel_tdes_crypt_dma_stop(dd);
998 1234
999 err = dd->err ? : err; 1235 err = dd->err ? : err;
1000 1236
1001 if (dd->total && !err) { 1237 if (dd->total && !err) {
1002 err = atmel_tdes_crypt_dma_start(dd); 1238 if (dd->flags & TDES_FLAGS_FAST) {
1239 dd->in_sg = sg_next(dd->in_sg);
1240 dd->out_sg = sg_next(dd->out_sg);
1241 if (!dd->in_sg || !dd->out_sg)
1242 err = -EINVAL;
1243 }
1003 if (!err) 1244 if (!err)
1004 return; 1245 err = atmel_tdes_crypt_start(dd);
1246 if (!err)
1247 return; /* DMA started. Not fininishing. */
1005 } 1248 }
1006 1249
1007 atmel_tdes_finish_req(dd, err); 1250 atmel_tdes_finish_req(dd, err);
@@ -1053,9 +1296,31 @@ err_tdes_algs:
1053 return err; 1296 return err;
1054} 1297}
1055 1298
1299static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd)
1300{
1301
1302 dd->caps.has_dma = 0;
1303 dd->caps.has_cfb_3keys = 0;
1304
1305 /* keep only major version number */
1306 switch (dd->hw_version & 0xf00) {
1307 case 0x700:
1308 dd->caps.has_dma = 1;
1309 dd->caps.has_cfb_3keys = 1;
1310 break;
1311 case 0x600:
1312 break;
1313 default:
1314 dev_warn(dd->dev,
1315 "Unmanaged tdes version, set minimum capabilities\n");
1316 break;
1317 }
1318}
1319
1056static int atmel_tdes_probe(struct platform_device *pdev) 1320static int atmel_tdes_probe(struct platform_device *pdev)
1057{ 1321{
1058 struct atmel_tdes_dev *tdes_dd; 1322 struct atmel_tdes_dev *tdes_dd;
1323 struct crypto_platform_data *pdata;
1059 struct device *dev = &pdev->dev; 1324 struct device *dev = &pdev->dev;
1060 struct resource *tdes_res; 1325 struct resource *tdes_res;
1061 unsigned long tdes_phys_size; 1326 unsigned long tdes_phys_size;
@@ -1109,7 +1374,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
1109 } 1374 }
1110 1375
1111 /* Initializing the clock */ 1376 /* Initializing the clock */
1112 tdes_dd->iclk = clk_get(&pdev->dev, NULL); 1377 tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk");
1113 if (IS_ERR(tdes_dd->iclk)) { 1378 if (IS_ERR(tdes_dd->iclk)) {
1114 dev_err(dev, "clock intialization failed.\n"); 1379 dev_err(dev, "clock intialization failed.\n");
1115 err = PTR_ERR(tdes_dd->iclk); 1380 err = PTR_ERR(tdes_dd->iclk);
@@ -1123,9 +1388,25 @@ static int atmel_tdes_probe(struct platform_device *pdev)
1123 goto tdes_io_err; 1388 goto tdes_io_err;
1124 } 1389 }
1125 1390
1126 err = atmel_tdes_dma_init(tdes_dd); 1391 atmel_tdes_hw_version_init(tdes_dd);
1392
1393 atmel_tdes_get_cap(tdes_dd);
1394
1395 err = atmel_tdes_buff_init(tdes_dd);
1127 if (err) 1396 if (err)
1128 goto err_tdes_dma; 1397 goto err_tdes_buff;
1398
1399 if (tdes_dd->caps.has_dma) {
1400 pdata = pdev->dev.platform_data;
1401 if (!pdata) {
1402 dev_err(&pdev->dev, "platform data not available\n");
1403 err = -ENXIO;
1404 goto err_pdata;
1405 }
1406 err = atmel_tdes_dma_init(tdes_dd, pdata);
1407 if (err)
1408 goto err_tdes_dma;
1409 }
1129 1410
1130 spin_lock(&atmel_tdes.lock); 1411 spin_lock(&atmel_tdes.lock);
1131 list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list); 1412 list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list);
@@ -1143,8 +1424,12 @@ err_algs:
1143 spin_lock(&atmel_tdes.lock); 1424 spin_lock(&atmel_tdes.lock);
1144 list_del(&tdes_dd->list); 1425 list_del(&tdes_dd->list);
1145 spin_unlock(&atmel_tdes.lock); 1426 spin_unlock(&atmel_tdes.lock);
1146 atmel_tdes_dma_cleanup(tdes_dd); 1427 if (tdes_dd->caps.has_dma)
1428 atmel_tdes_dma_cleanup(tdes_dd);
1147err_tdes_dma: 1429err_tdes_dma:
1430err_pdata:
1431 atmel_tdes_buff_cleanup(tdes_dd);
1432err_tdes_buff:
1148 iounmap(tdes_dd->io_base); 1433 iounmap(tdes_dd->io_base);
1149tdes_io_err: 1434tdes_io_err:
1150 clk_put(tdes_dd->iclk); 1435 clk_put(tdes_dd->iclk);
@@ -1178,7 +1463,10 @@ static int atmel_tdes_remove(struct platform_device *pdev)
1178 tasklet_kill(&tdes_dd->done_task); 1463 tasklet_kill(&tdes_dd->done_task);
1179 tasklet_kill(&tdes_dd->queue_task); 1464 tasklet_kill(&tdes_dd->queue_task);
1180 1465
1181 atmel_tdes_dma_cleanup(tdes_dd); 1466 if (tdes_dd->caps.has_dma)
1467 atmel_tdes_dma_cleanup(tdes_dd);
1468
1469 atmel_tdes_buff_cleanup(tdes_dd);
1182 1470
1183 iounmap(tdes_dd->io_base); 1471 iounmap(tdes_dd->io_base);
1184 1472
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index 827913d7d33a..d797f31f5d85 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -151,7 +151,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req)
151 struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); 151 struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
152 struct bfin_crypto_crc *crc; 152 struct bfin_crypto_crc *crc;
153 153
154 dev_dbg(crc->dev, "crc_init\n"); 154 dev_dbg(ctx->crc->dev, "crc_init\n");
155 spin_lock_bh(&crc_list.lock); 155 spin_lock_bh(&crc_list.lock);
156 list_for_each_entry(crc, &crc_list.dev_list, list) { 156 list_for_each_entry(crc, &crc_list.dev_list, list) {
157 crc_ctx->crc = crc; 157 crc_ctx->crc = crc;
@@ -160,7 +160,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req)
160 spin_unlock_bh(&crc_list.lock); 160 spin_unlock_bh(&crc_list.lock);
161 161
162 if (sg_count(req->src) > CRC_MAX_DMA_DESC) { 162 if (sg_count(req->src) > CRC_MAX_DMA_DESC) {
163 dev_dbg(crc->dev, "init: requested sg list is too big > %d\n", 163 dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n",
164 CRC_MAX_DMA_DESC); 164 CRC_MAX_DMA_DESC);
165 return -EINVAL; 165 return -EINVAL;
166 } 166 }
@@ -175,7 +175,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req)
175 /* init crc results */ 175 /* init crc results */
176 put_unaligned_le32(crc_ctx->key, req->result); 176 put_unaligned_le32(crc_ctx->key, req->result);
177 177
178 dev_dbg(crc->dev, "init: digest size: %d\n", 178 dev_dbg(ctx->crc->dev, "init: digest size: %d\n",
179 crypto_ahash_digestsize(tfm)); 179 crypto_ahash_digestsize(tfm));
180 180
181 return bfin_crypto_crc_init_hw(crc, crc_ctx->key); 181 return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 65c7668614ab..b44091c47f75 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -78,7 +78,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
78 tristate "Register hash algorithm implementations with Crypto API" 78 tristate "Register hash algorithm implementations with Crypto API"
79 depends on CRYPTO_DEV_FSL_CAAM 79 depends on CRYPTO_DEV_FSL_CAAM
80 default y 80 default y
81 select CRYPTO_AHASH 81 select CRYPTO_HASH
82 help 82 help
83 Selecting this will offload ahash for users of the 83 Selecting this will offload ahash for users of the
84 scatterlist crypto API to the SEC4 via job ring. 84 scatterlist crypto API to the SEC4 via job ring.
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index cf268b14ae9a..765fdf5ce579 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1693,6 +1693,7 @@ static struct caam_alg_template driver_algs[] = {
1693 .name = "authenc(hmac(sha224),cbc(aes))", 1693 .name = "authenc(hmac(sha224),cbc(aes))",
1694 .driver_name = "authenc-hmac-sha224-cbc-aes-caam", 1694 .driver_name = "authenc-hmac-sha224-cbc-aes-caam",
1695 .blocksize = AES_BLOCK_SIZE, 1695 .blocksize = AES_BLOCK_SIZE,
1696 .type = CRYPTO_ALG_TYPE_AEAD,
1696 .template_aead = { 1697 .template_aead = {
1697 .setkey = aead_setkey, 1698 .setkey = aead_setkey,
1698 .setauthsize = aead_setauthsize, 1699 .setauthsize = aead_setauthsize,
@@ -1732,6 +1733,7 @@ static struct caam_alg_template driver_algs[] = {
1732 .name = "authenc(hmac(sha384),cbc(aes))", 1733 .name = "authenc(hmac(sha384),cbc(aes))",
1733 .driver_name = "authenc-hmac-sha384-cbc-aes-caam", 1734 .driver_name = "authenc-hmac-sha384-cbc-aes-caam",
1734 .blocksize = AES_BLOCK_SIZE, 1735 .blocksize = AES_BLOCK_SIZE,
1736 .type = CRYPTO_ALG_TYPE_AEAD,
1735 .template_aead = { 1737 .template_aead = {
1736 .setkey = aead_setkey, 1738 .setkey = aead_setkey,
1737 .setauthsize = aead_setauthsize, 1739 .setauthsize = aead_setauthsize,
@@ -1810,6 +1812,7 @@ static struct caam_alg_template driver_algs[] = {
1810 .name = "authenc(hmac(sha224),cbc(des3_ede))", 1812 .name = "authenc(hmac(sha224),cbc(des3_ede))",
1811 .driver_name = "authenc-hmac-sha224-cbc-des3_ede-caam", 1813 .driver_name = "authenc-hmac-sha224-cbc-des3_ede-caam",
1812 .blocksize = DES3_EDE_BLOCK_SIZE, 1814 .blocksize = DES3_EDE_BLOCK_SIZE,
1815 .type = CRYPTO_ALG_TYPE_AEAD,
1813 .template_aead = { 1816 .template_aead = {
1814 .setkey = aead_setkey, 1817 .setkey = aead_setkey,
1815 .setauthsize = aead_setauthsize, 1818 .setauthsize = aead_setauthsize,
@@ -1849,6 +1852,7 @@ static struct caam_alg_template driver_algs[] = {
1849 .name = "authenc(hmac(sha384),cbc(des3_ede))", 1852 .name = "authenc(hmac(sha384),cbc(des3_ede))",
1850 .driver_name = "authenc-hmac-sha384-cbc-des3_ede-caam", 1853 .driver_name = "authenc-hmac-sha384-cbc-des3_ede-caam",
1851 .blocksize = DES3_EDE_BLOCK_SIZE, 1854 .blocksize = DES3_EDE_BLOCK_SIZE,
1855 .type = CRYPTO_ALG_TYPE_AEAD,
1852 .template_aead = { 1856 .template_aead = {
1853 .setkey = aead_setkey, 1857 .setkey = aead_setkey,
1854 .setauthsize = aead_setauthsize, 1858 .setauthsize = aead_setauthsize,
@@ -1926,6 +1930,7 @@ static struct caam_alg_template driver_algs[] = {
1926 .name = "authenc(hmac(sha224),cbc(des))", 1930 .name = "authenc(hmac(sha224),cbc(des))",
1927 .driver_name = "authenc-hmac-sha224-cbc-des-caam", 1931 .driver_name = "authenc-hmac-sha224-cbc-des-caam",
1928 .blocksize = DES_BLOCK_SIZE, 1932 .blocksize = DES_BLOCK_SIZE,
1933 .type = CRYPTO_ALG_TYPE_AEAD,
1929 .template_aead = { 1934 .template_aead = {
1930 .setkey = aead_setkey, 1935 .setkey = aead_setkey,
1931 .setauthsize = aead_setauthsize, 1936 .setauthsize = aead_setauthsize,
@@ -1965,6 +1970,7 @@ static struct caam_alg_template driver_algs[] = {
1965 .name = "authenc(hmac(sha384),cbc(des))", 1970 .name = "authenc(hmac(sha384),cbc(des))",
1966 .driver_name = "authenc-hmac-sha384-cbc-des-caam", 1971 .driver_name = "authenc-hmac-sha384-cbc-des-caam",
1967 .blocksize = DES_BLOCK_SIZE, 1972 .blocksize = DES_BLOCK_SIZE,
1973 .type = CRYPTO_ALG_TYPE_AEAD,
1968 .template_aead = { 1974 .template_aead = {
1969 .setkey = aead_setkey, 1975 .setkey = aead_setkey,
1970 .setauthsize = aead_setauthsize, 1976 .setauthsize = aead_setauthsize,
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 32aba7a61503..5996521a1caf 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -411,7 +411,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
411 return 0; 411 return 0;
412} 412}
413 413
414static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, 414static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in,
415 u32 keylen) 415 u32 keylen)
416{ 416{
417 return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, 417 return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
@@ -420,7 +420,7 @@ static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in,
420} 420}
421 421
422/* Digest hash size if it is too large */ 422/* Digest hash size if it is too large */
423static u32 hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, 423static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
424 u32 *keylen, u8 *key_out, u32 digestsize) 424 u32 *keylen, u8 *key_out, u32 digestsize)
425{ 425{
426 struct device *jrdev = ctx->jrdev; 426 struct device *jrdev = ctx->jrdev;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 8acf00490fd5..6e94bcd94678 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -304,6 +304,9 @@ static int caam_probe(struct platform_device *pdev)
304 caam_remove(pdev); 304 caam_remove(pdev);
305 return ret; 305 return ret;
306 } 306 }
307
308 /* Enable RDB bit so that RNG works faster */
309 setbits32(&topregs->ctrl.scfgr, SCFGR_RDBENABLE);
307 } 310 }
308 311
309 /* NOTE: RTIC detection ought to go here, around Si time */ 312 /* NOTE: RTIC detection ought to go here, around Si time */
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 30b8f74833d4..9f25f5296029 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -36,7 +36,7 @@ static void report_jump_idx(u32 status, char *outstr)
36 36
37static void report_ccb_status(u32 status, char *outstr) 37static void report_ccb_status(u32 status, char *outstr)
38{ 38{
39 char *cha_id_list[] = { 39 static const char * const cha_id_list[] = {
40 "", 40 "",
41 "AES", 41 "AES",
42 "DES", 42 "DES",
@@ -51,7 +51,7 @@ static void report_ccb_status(u32 status, char *outstr)
51 "ZUCE", 51 "ZUCE",
52 "ZUCA", 52 "ZUCA",
53 }; 53 };
54 char *err_id_list[] = { 54 static const char * const err_id_list[] = {
55 "No error.", 55 "No error.",
56 "Mode error.", 56 "Mode error.",
57 "Data size error.", 57 "Data size error.",
@@ -69,7 +69,7 @@ static void report_ccb_status(u32 status, char *outstr)
69 "Invalid CHA combination was selected", 69 "Invalid CHA combination was selected",
70 "Invalid CHA selected.", 70 "Invalid CHA selected.",
71 }; 71 };
72 char *rng_err_id_list[] = { 72 static const char * const rng_err_id_list[] = {
73 "", 73 "",
74 "", 74 "",
75 "", 75 "",
@@ -117,7 +117,7 @@ static void report_jump_status(u32 status, char *outstr)
117 117
118static void report_deco_status(u32 status, char *outstr) 118static void report_deco_status(u32 status, char *outstr)
119{ 119{
120 const struct { 120 static const struct {
121 u8 value; 121 u8 value;
122 char *error_text; 122 char *error_text;
123 } desc_error_list[] = { 123 } desc_error_list[] = {
@@ -245,7 +245,7 @@ static void report_cond_code_status(u32 status, char *outstr)
245 245
246char *caam_jr_strstatus(char *outstr, u32 status) 246char *caam_jr_strstatus(char *outstr, u32 status)
247{ 247{
248 struct stat_src { 248 static const struct stat_src {
249 void (*report_ssed)(u32 status, char *outstr); 249 void (*report_ssed)(u32 status, char *outstr);
250 char *error; 250 char *error;
251 } status_src[] = { 251 } status_src[] = {
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 5cd4c1b268a1..e4a16b741371 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -41,6 +41,7 @@ struct caam_jrentry_info {
41/* Private sub-storage for a single JobR */ 41/* Private sub-storage for a single JobR */
42struct caam_drv_private_jr { 42struct caam_drv_private_jr {
43 struct device *parentdev; /* points back to controller dev */ 43 struct device *parentdev; /* points back to controller dev */
44 struct platform_device *jr_pdev;/* points to platform device for JR */
44 int ridx; 45 int ridx;
45 struct caam_job_ring __iomem *rregs; /* JobR's register space */ 46 struct caam_job_ring __iomem *rregs; /* JobR's register space */
46 struct tasklet_struct irqtask; 47 struct tasklet_struct irqtask;
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 93d14070141a..b4aa773ecbc8 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -407,6 +407,7 @@ int caam_jr_shutdown(struct device *dev)
407 dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, 407 dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH,
408 jrp->outring, outbusaddr); 408 jrp->outring, outbusaddr);
409 kfree(jrp->entinfo); 409 kfree(jrp->entinfo);
410 of_device_unregister(jrp->jr_pdev);
410 411
411 return ret; 412 return ret;
412} 413}
@@ -454,6 +455,8 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
454 kfree(jrpriv); 455 kfree(jrpriv);
455 return -EINVAL; 456 return -EINVAL;
456 } 457 }
458
459 jrpriv->jr_pdev = jr_pdev;
457 jrdev = &jr_pdev->dev; 460 jrdev = &jr_pdev->dev;
458 dev_set_drvdata(jrdev, jrpriv); 461 dev_set_drvdata(jrdev, jrpriv);
459 ctrlpriv->jrdev[ring] = jrdev; 462 ctrlpriv->jrdev[ring] = jrdev;
@@ -472,6 +475,7 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
472 /* Now do the platform independent part */ 475 /* Now do the platform independent part */
473 error = caam_jr_init(jrdev); /* now turn on hardware */ 476 error = caam_jr_init(jrdev); /* now turn on hardware */
474 if (error) { 477 if (error) {
478 of_device_unregister(jr_pdev);
475 kfree(jrpriv); 479 kfree(jrpriv);
476 return error; 480 return error;
477 } 481 }
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index f6dba10246c3..87138d2adb5f 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -44,7 +44,7 @@ Split key generation-----------------------------------------------
44[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 44[06] 0x64260028 fifostr: class2 mdsplit-jdk len=40
45 @0xffe04000 45 @0xffe04000
46*/ 46*/
47u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, 47int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
48 int split_key_pad_len, const u8 *key_in, u32 keylen, 48 int split_key_pad_len, const u8 *key_in, u32 keylen,
49 u32 alg_op) 49 u32 alg_op)
50{ 50{
diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h
index d95d290c6e8b..c5588f6d8109 100644
--- a/drivers/crypto/caam/key_gen.h
+++ b/drivers/crypto/caam/key_gen.h
@@ -12,6 +12,6 @@ struct split_key_result {
12 12
13void split_key_done(struct device *dev, u32 *desc, u32 err, void *context); 13void split_key_done(struct device *dev, u32 *desc, u32 err, void *context);
14 14
15u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, 15int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
16 int split_key_pad_len, const u8 *key_in, u32 keylen, 16 int split_key_pad_len, const u8 *key_in, u32 keylen,
17 u32 alg_op); 17 u32 alg_op);
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 3223fc6d647c..cd6fedad9935 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -252,7 +252,8 @@ struct caam_ctrl {
252 /* Read/Writable */ 252 /* Read/Writable */
253 u32 rsvd1; 253 u32 rsvd1;
254 u32 mcr; /* MCFG Master Config Register */ 254 u32 mcr; /* MCFG Master Config Register */
255 u32 rsvd2[2]; 255 u32 rsvd2;
256 u32 scfgr; /* SCFGR, Security Config Register */
256 257
257 /* Bus Access Configuration Section 010-11f */ 258 /* Bus Access Configuration Section 010-11f */
258 /* Read/Writable */ 259 /* Read/Writable */
@@ -299,6 +300,7 @@ struct caam_ctrl {
299#define MCFGR_WDFAIL 0x20000000 /* DECO watchdog force-fail */ 300#define MCFGR_WDFAIL 0x20000000 /* DECO watchdog force-fail */
300#define MCFGR_DMA_RESET 0x10000000 301#define MCFGR_DMA_RESET 0x10000000
301#define MCFGR_LONG_PTR 0x00010000 /* Use >32-bit desc addressing */ 302#define MCFGR_LONG_PTR 0x00010000 /* Use >32-bit desc addressing */
303#define SCFGR_RDBENABLE 0x00000400
302 304
303/* AXI read cache control */ 305/* AXI read cache control */
304#define MCFGR_ARCACHE_SHIFT 12 306#define MCFGR_ARCACHE_SHIFT 12
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 6aa425fe0ed5..ee15b0f7849a 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -636,7 +636,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
636 636
637 pr_debug("err: %d\n", err); 637 pr_debug("err: %d\n", err);
638 638
639 pm_runtime_put_sync(dd->dev); 639 pm_runtime_put(dd->dev);
640 dd->flags &= ~FLAGS_BUSY; 640 dd->flags &= ~FLAGS_BUSY;
641 641
642 req->base.complete(&req->base, err); 642 req->base.complete(&req->base, err);
@@ -1248,18 +1248,7 @@ static struct platform_driver omap_aes_driver = {
1248 }, 1248 },
1249}; 1249};
1250 1250
1251static int __init omap_aes_mod_init(void) 1251module_platform_driver(omap_aes_driver);
1252{
1253 return platform_driver_register(&omap_aes_driver);
1254}
1255
1256static void __exit omap_aes_mod_exit(void)
1257{
1258 platform_driver_unregister(&omap_aes_driver);
1259}
1260
1261module_init(omap_aes_mod_init);
1262module_exit(omap_aes_mod_exit);
1263 1252
1264MODULE_DESCRIPTION("OMAP AES hw acceleration support."); 1253MODULE_DESCRIPTION("OMAP AES hw acceleration support.");
1265MODULE_LICENSE("GPL v2"); 1254MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 3d1611f5aecf..a1e1b4756ee5 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -923,7 +923,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
923 dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | 923 dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) |
924 BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); 924 BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY));
925 925
926 pm_runtime_put_sync(dd->dev); 926 pm_runtime_put(dd->dev);
927 927
928 if (req->base.complete) 928 if (req->base.complete)
929 req->base.complete(&req->base, err); 929 req->base.complete(&req->base, err);
@@ -1813,18 +1813,7 @@ static struct platform_driver omap_sham_driver = {
1813 }, 1813 },
1814}; 1814};
1815 1815
1816static int __init omap_sham_mod_init(void) 1816module_platform_driver(omap_sham_driver);
1817{
1818 return platform_driver_register(&omap_sham_driver);
1819}
1820
1821static void __exit omap_sham_mod_exit(void)
1822{
1823 platform_driver_unregister(&omap_sham_driver);
1824}
1825
1826module_init(omap_sham_mod_init);
1827module_exit(omap_sham_mod_exit);
1828 1817
1829MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support."); 1818MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support.");
1830MODULE_LICENSE("GPL v2"); 1819MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 2096d4685a9e..ac30724d923d 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1688,8 +1688,6 @@ static const struct of_device_id spacc_of_id_table[] = {
1688 { .compatible = "picochip,spacc-l2" }, 1688 { .compatible = "picochip,spacc-l2" },
1689 {} 1689 {}
1690}; 1690};
1691#else /* CONFIG_OF */
1692#define spacc_of_id_table NULL
1693#endif /* CONFIG_OF */ 1691#endif /* CONFIG_OF */
1694 1692
1695static bool spacc_is_compatible(struct platform_device *pdev, 1693static bool spacc_is_compatible(struct platform_device *pdev,
@@ -1874,7 +1872,7 @@ static struct platform_driver spacc_driver = {
1874#ifdef CONFIG_PM 1872#ifdef CONFIG_PM
1875 .pm = &spacc_pm_ops, 1873 .pm = &spacc_pm_ops,
1876#endif /* CONFIG_PM */ 1874#endif /* CONFIG_PM */
1877 .of_match_table = spacc_of_id_table, 1875 .of_match_table = of_match_ptr(spacc_of_id_table),
1878 }, 1876 },
1879 .id_table = spacc_id_table, 1877 .id_table = spacc_id_table,
1880}; 1878};
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
new file mode 100644
index 000000000000..a97bb6c1596c
--- /dev/null
+++ b/drivers/crypto/sahara.c
@@ -0,0 +1,1070 @@
1/*
2 * Cryptographic API.
3 *
4 * Support for SAHARA cryptographic accelerator.
5 *
6 * Copyright (c) 2013 Vista Silicon S.L.
7 * Author: Javier Martin <javier.martin@vista-silicon.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as published
11 * by the Free Software Foundation.
12 *
13 * Based on omap-aes.c and tegra-aes.c
14 */
15
16#include <crypto/algapi.h>
17#include <crypto/aes.h>
18
19#include <linux/clk.h>
20#include <linux/crypto.h>
21#include <linux/interrupt.h>
22#include <linux/io.h>
23#include <linux/irq.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/of.h>
27#include <linux/platform_device.h>
28
29#define SAHARA_NAME "sahara"
30#define SAHARA_VERSION_3 3
31#define SAHARA_TIMEOUT_MS 1000
32#define SAHARA_MAX_HW_DESC 2
33#define SAHARA_MAX_HW_LINK 20
34
35#define FLAGS_MODE_MASK 0x000f
36#define FLAGS_ENCRYPT BIT(0)
37#define FLAGS_CBC BIT(1)
38#define FLAGS_NEW_KEY BIT(3)
39#define FLAGS_BUSY 4
40
41#define SAHARA_HDR_BASE 0x00800000
42#define SAHARA_HDR_SKHA_ALG_AES 0
43#define SAHARA_HDR_SKHA_OP_ENC (1 << 2)
44#define SAHARA_HDR_SKHA_MODE_ECB (0 << 3)
45#define SAHARA_HDR_SKHA_MODE_CBC (1 << 3)
46#define SAHARA_HDR_FORM_DATA (5 << 16)
47#define SAHARA_HDR_FORM_KEY (8 << 16)
48#define SAHARA_HDR_LLO (1 << 24)
49#define SAHARA_HDR_CHA_SKHA (1 << 28)
50#define SAHARA_HDR_CHA_MDHA (2 << 28)
51#define SAHARA_HDR_PARITY_BIT (1 << 31)
52
53/* SAHARA can only process one request at a time */
54#define SAHARA_QUEUE_LENGTH 1
55
56#define SAHARA_REG_VERSION 0x00
57#define SAHARA_REG_DAR 0x04
58#define SAHARA_REG_CONTROL 0x08
59#define SAHARA_CONTROL_SET_THROTTLE(x) (((x) & 0xff) << 24)
60#define SAHARA_CONTROL_SET_MAXBURST(x) (((x) & 0xff) << 16)
61#define SAHARA_CONTROL_RNG_AUTORSD (1 << 7)
62#define SAHARA_CONTROL_ENABLE_INT (1 << 4)
63#define SAHARA_REG_CMD 0x0C
64#define SAHARA_CMD_RESET (1 << 0)
65#define SAHARA_CMD_CLEAR_INT (1 << 8)
66#define SAHARA_CMD_CLEAR_ERR (1 << 9)
67#define SAHARA_CMD_SINGLE_STEP (1 << 10)
68#define SAHARA_CMD_MODE_BATCH (1 << 16)
69#define SAHARA_CMD_MODE_DEBUG (1 << 18)
70#define SAHARA_REG_STATUS 0x10
71#define SAHARA_STATUS_GET_STATE(x) ((x) & 0x7)
72#define SAHARA_STATE_IDLE 0
73#define SAHARA_STATE_BUSY 1
74#define SAHARA_STATE_ERR 2
75#define SAHARA_STATE_FAULT 3
76#define SAHARA_STATE_COMPLETE 4
77#define SAHARA_STATE_COMP_FLAG (1 << 2)
78#define SAHARA_STATUS_DAR_FULL (1 << 3)
79#define SAHARA_STATUS_ERROR (1 << 4)
80#define SAHARA_STATUS_SECURE (1 << 5)
81#define SAHARA_STATUS_FAIL (1 << 6)
82#define SAHARA_STATUS_INIT (1 << 7)
83#define SAHARA_STATUS_RNG_RESEED (1 << 8)
84#define SAHARA_STATUS_ACTIVE_RNG (1 << 9)
85#define SAHARA_STATUS_ACTIVE_MDHA (1 << 10)
86#define SAHARA_STATUS_ACTIVE_SKHA (1 << 11)
87#define SAHARA_STATUS_MODE_BATCH (1 << 16)
88#define SAHARA_STATUS_MODE_DEDICATED (1 << 17)
89#define SAHARA_STATUS_MODE_DEBUG (1 << 18)
90#define SAHARA_STATUS_GET_ISTATE(x) (((x) >> 24) & 0xff)
91#define SAHARA_REG_ERRSTATUS 0x14
92#define SAHARA_ERRSTATUS_GET_SOURCE(x) ((x) & 0xf)
93#define SAHARA_ERRSOURCE_CHA 14
94#define SAHARA_ERRSOURCE_DMA 15
95#define SAHARA_ERRSTATUS_DMA_DIR (1 << 8)
96#define SAHARA_ERRSTATUS_GET_DMASZ(x)(((x) >> 9) & 0x3)
97#define SAHARA_ERRSTATUS_GET_DMASRC(x) (((x) >> 13) & 0x7)
98#define SAHARA_ERRSTATUS_GET_CHASRC(x) (((x) >> 16) & 0xfff)
99#define SAHARA_ERRSTATUS_GET_CHAERR(x) (((x) >> 28) & 0x3)
100#define SAHARA_REG_FADDR 0x18
101#define SAHARA_REG_CDAR 0x1C
102#define SAHARA_REG_IDAR 0x20
103
104struct sahara_hw_desc {
105 u32 hdr;
106 u32 len1;
107 dma_addr_t p1;
108 u32 len2;
109 dma_addr_t p2;
110 dma_addr_t next;
111};
112
113struct sahara_hw_link {
114 u32 len;
115 dma_addr_t p;
116 dma_addr_t next;
117};
118
119struct sahara_ctx {
120 struct sahara_dev *dev;
121 unsigned long flags;
122 int keylen;
123 u8 key[AES_KEYSIZE_128];
124 struct crypto_ablkcipher *fallback;
125};
126
127struct sahara_aes_reqctx {
128 unsigned long mode;
129};
130
131struct sahara_dev {
132 struct device *device;
133 void __iomem *regs_base;
134 struct clk *clk_ipg;
135 struct clk *clk_ahb;
136
137 struct sahara_ctx *ctx;
138 spinlock_t lock;
139 struct crypto_queue queue;
140 unsigned long flags;
141
142 struct tasklet_struct done_task;
143 struct tasklet_struct queue_task;
144
145 struct sahara_hw_desc *hw_desc[SAHARA_MAX_HW_DESC];
146 dma_addr_t hw_phys_desc[SAHARA_MAX_HW_DESC];
147
148 u8 *key_base;
149 dma_addr_t key_phys_base;
150
151 u8 *iv_base;
152 dma_addr_t iv_phys_base;
153
154 struct sahara_hw_link *hw_link[SAHARA_MAX_HW_LINK];
155 dma_addr_t hw_phys_link[SAHARA_MAX_HW_LINK];
156
157 struct ablkcipher_request *req;
158 size_t total;
159 struct scatterlist *in_sg;
160 unsigned int nb_in_sg;
161 struct scatterlist *out_sg;
162 unsigned int nb_out_sg;
163
164 u32 error;
165 struct timer_list watchdog;
166};
167
168static struct sahara_dev *dev_ptr;
169
170static inline void sahara_write(struct sahara_dev *dev, u32 data, u32 reg)
171{
172 writel(data, dev->regs_base + reg);
173}
174
175static inline unsigned int sahara_read(struct sahara_dev *dev, u32 reg)
176{
177 return readl(dev->regs_base + reg);
178}
179
180static u32 sahara_aes_key_hdr(struct sahara_dev *dev)
181{
182 u32 hdr = SAHARA_HDR_BASE | SAHARA_HDR_SKHA_ALG_AES |
183 SAHARA_HDR_FORM_KEY | SAHARA_HDR_LLO |
184 SAHARA_HDR_CHA_SKHA | SAHARA_HDR_PARITY_BIT;
185
186 if (dev->flags & FLAGS_CBC) {
187 hdr |= SAHARA_HDR_SKHA_MODE_CBC;
188 hdr ^= SAHARA_HDR_PARITY_BIT;
189 }
190
191 if (dev->flags & FLAGS_ENCRYPT) {
192 hdr |= SAHARA_HDR_SKHA_OP_ENC;
193 hdr ^= SAHARA_HDR_PARITY_BIT;
194 }
195
196 return hdr;
197}
198
199static u32 sahara_aes_data_link_hdr(struct sahara_dev *dev)
200{
201 return SAHARA_HDR_BASE | SAHARA_HDR_FORM_DATA |
202 SAHARA_HDR_CHA_SKHA | SAHARA_HDR_PARITY_BIT;
203}
204
205static int sahara_sg_length(struct scatterlist *sg,
206 unsigned int total)
207{
208 int sg_nb;
209 unsigned int len;
210 struct scatterlist *sg_list;
211
212 sg_nb = 0;
213 sg_list = sg;
214
215 while (total) {
216 len = min(sg_list->length, total);
217
218 sg_nb++;
219 total -= len;
220
221 sg_list = sg_next(sg_list);
222 if (!sg_list)
223 total = 0;
224 }
225
226 return sg_nb;
227}
228
229static char *sahara_err_src[16] = {
230 "No error",
231 "Header error",
232 "Descriptor length error",
233 "Descriptor length or pointer error",
234 "Link length error",
235 "Link pointer error",
236 "Input buffer error",
237 "Output buffer error",
238 "Output buffer starvation",
239 "Internal state fault",
240 "General descriptor problem",
241 "Reserved",
242 "Descriptor address error",
243 "Link address error",
244 "CHA error",
245 "DMA error"
246};
247
248static char *sahara_err_dmasize[4] = {
249 "Byte transfer",
250 "Half-word transfer",
251 "Word transfer",
252 "Reserved"
253};
254
255static char *sahara_err_dmasrc[8] = {
256 "No error",
257 "AHB bus error",
258 "Internal IP bus error",
259 "Parity error",
260 "DMA crosses 256 byte boundary",
261 "DMA is busy",
262 "Reserved",
263 "DMA HW error"
264};
265
266static char *sahara_cha_errsrc[12] = {
267 "Input buffer non-empty",
268 "Illegal address",
269 "Illegal mode",
270 "Illegal data size",
271 "Illegal key size",
272 "Write during processing",
273 "CTX read during processing",
274 "HW error",
275 "Input buffer disabled/underflow",
276 "Output buffer disabled/overflow",
277 "DES key parity error",
278 "Reserved"
279};
280
281static char *sahara_cha_err[4] = { "No error", "SKHA", "MDHA", "RNG" };
282
283static void sahara_decode_error(struct sahara_dev *dev, unsigned int error)
284{
285 u8 source = SAHARA_ERRSTATUS_GET_SOURCE(error);
286 u16 chasrc = ffs(SAHARA_ERRSTATUS_GET_CHASRC(error));
287
288 dev_err(dev->device, "%s: Error Register = 0x%08x\n", __func__, error);
289
290 dev_err(dev->device, " - %s.\n", sahara_err_src[source]);
291
292 if (source == SAHARA_ERRSOURCE_DMA) {
293 if (error & SAHARA_ERRSTATUS_DMA_DIR)
294 dev_err(dev->device, " * DMA read.\n");
295 else
296 dev_err(dev->device, " * DMA write.\n");
297
298 dev_err(dev->device, " * %s.\n",
299 sahara_err_dmasize[SAHARA_ERRSTATUS_GET_DMASZ(error)]);
300 dev_err(dev->device, " * %s.\n",
301 sahara_err_dmasrc[SAHARA_ERRSTATUS_GET_DMASRC(error)]);
302 } else if (source == SAHARA_ERRSOURCE_CHA) {
303 dev_err(dev->device, " * %s.\n",
304 sahara_cha_errsrc[chasrc]);
305 dev_err(dev->device, " * %s.\n",
306 sahara_cha_err[SAHARA_ERRSTATUS_GET_CHAERR(error)]);
307 }
308 dev_err(dev->device, "\n");
309}
310
311static char *sahara_state[4] = { "Idle", "Busy", "Error", "HW Fault" };
312
313static void sahara_decode_status(struct sahara_dev *dev, unsigned int status)
314{
315 u8 state;
316
317 if (!IS_ENABLED(DEBUG))
318 return;
319
320 state = SAHARA_STATUS_GET_STATE(status);
321
322 dev_dbg(dev->device, "%s: Status Register = 0x%08x\n",
323 __func__, status);
324
325 dev_dbg(dev->device, " - State = %d:\n", state);
326 if (state & SAHARA_STATE_COMP_FLAG)
327 dev_dbg(dev->device, " * Descriptor completed. IRQ pending.\n");
328
329 dev_dbg(dev->device, " * %s.\n",
330 sahara_state[state & ~SAHARA_STATE_COMP_FLAG]);
331
332 if (status & SAHARA_STATUS_DAR_FULL)
333 dev_dbg(dev->device, " - DAR Full.\n");
334 if (status & SAHARA_STATUS_ERROR)
335 dev_dbg(dev->device, " - Error.\n");
336 if (status & SAHARA_STATUS_SECURE)
337 dev_dbg(dev->device, " - Secure.\n");
338 if (status & SAHARA_STATUS_FAIL)
339 dev_dbg(dev->device, " - Fail.\n");
340 if (status & SAHARA_STATUS_RNG_RESEED)
341 dev_dbg(dev->device, " - RNG Reseed Request.\n");
342 if (status & SAHARA_STATUS_ACTIVE_RNG)
343 dev_dbg(dev->device, " - RNG Active.\n");
344 if (status & SAHARA_STATUS_ACTIVE_MDHA)
345 dev_dbg(dev->device, " - MDHA Active.\n");
346 if (status & SAHARA_STATUS_ACTIVE_SKHA)
347 dev_dbg(dev->device, " - SKHA Active.\n");
348
349 if (status & SAHARA_STATUS_MODE_BATCH)
350 dev_dbg(dev->device, " - Batch Mode.\n");
351 else if (status & SAHARA_STATUS_MODE_DEDICATED)
352 dev_dbg(dev->device, " - Decidated Mode.\n");
353 else if (status & SAHARA_STATUS_MODE_DEBUG)
354 dev_dbg(dev->device, " - Debug Mode.\n");
355
356 dev_dbg(dev->device, " - Internal state = 0x%02x\n",
357 SAHARA_STATUS_GET_ISTATE(status));
358
359 dev_dbg(dev->device, "Current DAR: 0x%08x\n",
360 sahara_read(dev, SAHARA_REG_CDAR));
361 dev_dbg(dev->device, "Initial DAR: 0x%08x\n\n",
362 sahara_read(dev, SAHARA_REG_IDAR));
363}
364
365static void sahara_dump_descriptors(struct sahara_dev *dev)
366{
367 int i;
368
369 if (!IS_ENABLED(DEBUG))
370 return;
371
372 for (i = 0; i < SAHARA_MAX_HW_DESC; i++) {
373 dev_dbg(dev->device, "Descriptor (%d) (0x%08x):\n",
374 i, dev->hw_phys_desc[i]);
375 dev_dbg(dev->device, "\thdr = 0x%08x\n", dev->hw_desc[i]->hdr);
376 dev_dbg(dev->device, "\tlen1 = %u\n", dev->hw_desc[i]->len1);
377 dev_dbg(dev->device, "\tp1 = 0x%08x\n", dev->hw_desc[i]->p1);
378 dev_dbg(dev->device, "\tlen2 = %u\n", dev->hw_desc[i]->len2);
379 dev_dbg(dev->device, "\tp2 = 0x%08x\n", dev->hw_desc[i]->p2);
380 dev_dbg(dev->device, "\tnext = 0x%08x\n",
381 dev->hw_desc[i]->next);
382 }
383 dev_dbg(dev->device, "\n");
384}
385
386static void sahara_dump_links(struct sahara_dev *dev)
387{
388 int i;
389
390 if (!IS_ENABLED(DEBUG))
391 return;
392
393 for (i = 0; i < SAHARA_MAX_HW_LINK; i++) {
394 dev_dbg(dev->device, "Link (%d) (0x%08x):\n",
395 i, dev->hw_phys_link[i]);
396 dev_dbg(dev->device, "\tlen = %u\n", dev->hw_link[i]->len);
397 dev_dbg(dev->device, "\tp = 0x%08x\n", dev->hw_link[i]->p);
398 dev_dbg(dev->device, "\tnext = 0x%08x\n",
399 dev->hw_link[i]->next);
400 }
401 dev_dbg(dev->device, "\n");
402}
403
404static void sahara_aes_done_task(unsigned long data)
405{
406 struct sahara_dev *dev = (struct sahara_dev *)data;
407
408 dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg,
409 DMA_TO_DEVICE);
410 dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
411 DMA_FROM_DEVICE);
412
413 spin_lock(&dev->lock);
414 clear_bit(FLAGS_BUSY, &dev->flags);
415 spin_unlock(&dev->lock);
416
417 dev->req->base.complete(&dev->req->base, dev->error);
418}
419
420void sahara_watchdog(unsigned long data)
421{
422 struct sahara_dev *dev = (struct sahara_dev *)data;
423 unsigned int err = sahara_read(dev, SAHARA_REG_ERRSTATUS);
424 unsigned int stat = sahara_read(dev, SAHARA_REG_STATUS);
425
426 sahara_decode_status(dev, stat);
427 sahara_decode_error(dev, err);
428 dev->error = -ETIMEDOUT;
429 sahara_aes_done_task(data);
430}
431
432static int sahara_hw_descriptor_create(struct sahara_dev *dev)
433{
434 struct sahara_ctx *ctx = dev->ctx;
435 struct scatterlist *sg;
436 int ret;
437 int i, j;
438
439 /* Copy new key if necessary */
440 if (ctx->flags & FLAGS_NEW_KEY) {
441 memcpy(dev->key_base, ctx->key, ctx->keylen);
442 ctx->flags &= ~FLAGS_NEW_KEY;
443
444 if (dev->flags & FLAGS_CBC) {
445 dev->hw_desc[0]->len1 = AES_BLOCK_SIZE;
446 dev->hw_desc[0]->p1 = dev->iv_phys_base;
447 } else {
448 dev->hw_desc[0]->len1 = 0;
449 dev->hw_desc[0]->p1 = 0;
450 }
451 dev->hw_desc[0]->len2 = ctx->keylen;
452 dev->hw_desc[0]->p2 = dev->key_phys_base;
453 dev->hw_desc[0]->next = dev->hw_phys_desc[1];
454 }
455 dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev);
456
457 dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total);
458 dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total);
459 if ((dev->nb_in_sg + dev->nb_out_sg) > SAHARA_MAX_HW_LINK) {
460 dev_err(dev->device, "not enough hw links (%d)\n",
461 dev->nb_in_sg + dev->nb_out_sg);
462 return -EINVAL;
463 }
464
465 ret = dma_map_sg(dev->device, dev->in_sg, dev->nb_in_sg,
466 DMA_TO_DEVICE);
467 if (ret != dev->nb_in_sg) {
468 dev_err(dev->device, "couldn't map in sg\n");
469 goto unmap_in;
470 }
471 ret = dma_map_sg(dev->device, dev->out_sg, dev->nb_out_sg,
472 DMA_FROM_DEVICE);
473 if (ret != dev->nb_out_sg) {
474 dev_err(dev->device, "couldn't map out sg\n");
475 goto unmap_out;
476 }
477
478 /* Create input links */
479 dev->hw_desc[1]->p1 = dev->hw_phys_link[0];
480 sg = dev->in_sg;
481 for (i = 0; i < dev->nb_in_sg; i++) {
482 dev->hw_link[i]->len = sg->length;
483 dev->hw_link[i]->p = sg->dma_address;
484 if (i == (dev->nb_in_sg - 1)) {
485 dev->hw_link[i]->next = 0;
486 } else {
487 dev->hw_link[i]->next = dev->hw_phys_link[i + 1];
488 sg = sg_next(sg);
489 }
490 }
491
492 /* Create output links */
493 dev->hw_desc[1]->p2 = dev->hw_phys_link[i];
494 sg = dev->out_sg;
495 for (j = i; j < dev->nb_out_sg + i; j++) {
496 dev->hw_link[j]->len = sg->length;
497 dev->hw_link[j]->p = sg->dma_address;
498 if (j == (dev->nb_out_sg + i - 1)) {
499 dev->hw_link[j]->next = 0;
500 } else {
501 dev->hw_link[j]->next = dev->hw_phys_link[j + 1];
502 sg = sg_next(sg);
503 }
504 }
505
506 /* Fill remaining fields of hw_desc[1] */
507 dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev);
508 dev->hw_desc[1]->len1 = dev->total;
509 dev->hw_desc[1]->len2 = dev->total;
510 dev->hw_desc[1]->next = 0;
511
512 sahara_dump_descriptors(dev);
513 sahara_dump_links(dev);
514
515 /* Start processing descriptor chain. */
516 mod_timer(&dev->watchdog,
517 jiffies + msecs_to_jiffies(SAHARA_TIMEOUT_MS));
518 sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR);
519
520 return 0;
521
522unmap_out:
523 dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg,
524 DMA_TO_DEVICE);
525unmap_in:
526 dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
527 DMA_FROM_DEVICE);
528
529 return -EINVAL;
530}
531
532static void sahara_aes_queue_task(unsigned long data)
533{
534 struct sahara_dev *dev = (struct sahara_dev *)data;
535 struct crypto_async_request *async_req, *backlog;
536 struct sahara_ctx *ctx;
537 struct sahara_aes_reqctx *rctx;
538 struct ablkcipher_request *req;
539 int ret;
540
541 spin_lock(&dev->lock);
542 backlog = crypto_get_backlog(&dev->queue);
543 async_req = crypto_dequeue_request(&dev->queue);
544 if (!async_req)
545 clear_bit(FLAGS_BUSY, &dev->flags);
546 spin_unlock(&dev->lock);
547
548 if (!async_req)
549 return;
550
551 if (backlog)
552 backlog->complete(backlog, -EINPROGRESS);
553
554 req = ablkcipher_request_cast(async_req);
555
556 /* Request is ready to be dispatched by the device */
557 dev_dbg(dev->device,
558 "dispatch request (nbytes=%d, src=%p, dst=%p)\n",
559 req->nbytes, req->src, req->dst);
560
561 /* assign new request to device */
562 dev->req = req;
563 dev->total = req->nbytes;
564 dev->in_sg = req->src;
565 dev->out_sg = req->dst;
566
567 rctx = ablkcipher_request_ctx(req);
568 ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
569 rctx->mode &= FLAGS_MODE_MASK;
570 dev->flags = (dev->flags & ~FLAGS_MODE_MASK) | rctx->mode;
571
572 if ((dev->flags & FLAGS_CBC) && req->info)
573 memcpy(dev->iv_base, req->info, AES_KEYSIZE_128);
574
575 /* assign new context to device */
576 ctx->dev = dev;
577 dev->ctx = ctx;
578
579 ret = sahara_hw_descriptor_create(dev);
580 if (ret < 0) {
581 spin_lock(&dev->lock);
582 clear_bit(FLAGS_BUSY, &dev->flags);
583 spin_unlock(&dev->lock);
584 dev->req->base.complete(&dev->req->base, ret);
585 }
586}
587
588static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
589 unsigned int keylen)
590{
591 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(tfm);
592 int ret;
593
594 ctx->keylen = keylen;
595
596 /* SAHARA only supports 128bit keys */
597 if (keylen == AES_KEYSIZE_128) {
598 memcpy(ctx->key, key, keylen);
599 ctx->flags |= FLAGS_NEW_KEY;
600 return 0;
601 }
602
603 if (keylen != AES_KEYSIZE_128 &&
604 keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256)
605 return -EINVAL;
606
607 /*
608 * The requested key size is not supported by HW, do a fallback.
609 */
610 ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
611 ctx->fallback->base.crt_flags |=
612 (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
613
614 ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen);
615 if (ret) {
616 struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
617
618 tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
619 tfm_aux->crt_flags |=
620 (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
621 }
622 return ret;
623}
624
625static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
626{
627 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
628 crypto_ablkcipher_reqtfm(req));
629 struct sahara_aes_reqctx *rctx = ablkcipher_request_ctx(req);
630 struct sahara_dev *dev = dev_ptr;
631 int err = 0;
632 int busy;
633
634 dev_dbg(dev->device, "nbytes: %d, enc: %d, cbc: %d\n",
635 req->nbytes, !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC));
636
637 if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
638 dev_err(dev->device,
639 "request size is not exact amount of AES blocks\n");
640 return -EINVAL;
641 }
642
643 ctx->dev = dev;
644
645 rctx->mode = mode;
646 spin_lock_bh(&dev->lock);
647 err = ablkcipher_enqueue_request(&dev->queue, req);
648 busy = test_and_set_bit(FLAGS_BUSY, &dev->flags);
649 spin_unlock_bh(&dev->lock);
650
651 if (!busy)
652 tasklet_schedule(&dev->queue_task);
653
654 return err;
655}
656
657static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req)
658{
659 struct crypto_tfm *tfm =
660 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
661 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
662 crypto_ablkcipher_reqtfm(req));
663 int err;
664
665 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
666 ablkcipher_request_set_tfm(req, ctx->fallback);
667 err = crypto_ablkcipher_encrypt(req);
668 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
669 return err;
670 }
671
672 return sahara_aes_crypt(req, FLAGS_ENCRYPT);
673}
674
675static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req)
676{
677 struct crypto_tfm *tfm =
678 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
679 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
680 crypto_ablkcipher_reqtfm(req));
681 int err;
682
683 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
684 ablkcipher_request_set_tfm(req, ctx->fallback);
685 err = crypto_ablkcipher_decrypt(req);
686 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
687 return err;
688 }
689
690 return sahara_aes_crypt(req, 0);
691}
692
693static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req)
694{
695 struct crypto_tfm *tfm =
696 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
697 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
698 crypto_ablkcipher_reqtfm(req));
699 int err;
700
701 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
702 ablkcipher_request_set_tfm(req, ctx->fallback);
703 err = crypto_ablkcipher_encrypt(req);
704 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
705 return err;
706 }
707
708 return sahara_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
709}
710
711static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req)
712{
713 struct crypto_tfm *tfm =
714 crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
715 struct sahara_ctx *ctx = crypto_ablkcipher_ctx(
716 crypto_ablkcipher_reqtfm(req));
717 int err;
718
719 if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
720 ablkcipher_request_set_tfm(req, ctx->fallback);
721 err = crypto_ablkcipher_decrypt(req);
722 ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
723 return err;
724 }
725
726 return sahara_aes_crypt(req, FLAGS_CBC);
727}
728
729static int sahara_aes_cra_init(struct crypto_tfm *tfm)
730{
731 const char *name = tfm->__crt_alg->cra_name;
732 struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
733
734 ctx->fallback = crypto_alloc_ablkcipher(name, 0,
735 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
736 if (IS_ERR(ctx->fallback)) {
737 pr_err("Error allocating fallback algo %s\n", name);
738 return PTR_ERR(ctx->fallback);
739 }
740
741 tfm->crt_ablkcipher.reqsize = sizeof(struct sahara_aes_reqctx);
742
743 return 0;
744}
745
746static void sahara_aes_cra_exit(struct crypto_tfm *tfm)
747{
748 struct sahara_ctx *ctx = crypto_tfm_ctx(tfm);
749
750 if (ctx->fallback)
751 crypto_free_ablkcipher(ctx->fallback);
752 ctx->fallback = NULL;
753}
754
755static struct crypto_alg aes_algs[] = {
756{
757 .cra_name = "ecb(aes)",
758 .cra_driver_name = "sahara-ecb-aes",
759 .cra_priority = 300,
760 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
761 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
762 .cra_blocksize = AES_BLOCK_SIZE,
763 .cra_ctxsize = sizeof(struct sahara_ctx),
764 .cra_alignmask = 0x0,
765 .cra_type = &crypto_ablkcipher_type,
766 .cra_module = THIS_MODULE,
767 .cra_init = sahara_aes_cra_init,
768 .cra_exit = sahara_aes_cra_exit,
769 .cra_u.ablkcipher = {
770 .min_keysize = AES_MIN_KEY_SIZE ,
771 .max_keysize = AES_MAX_KEY_SIZE,
772 .setkey = sahara_aes_setkey,
773 .encrypt = sahara_aes_ecb_encrypt,
774 .decrypt = sahara_aes_ecb_decrypt,
775 }
776}, {
777 .cra_name = "cbc(aes)",
778 .cra_driver_name = "sahara-cbc-aes",
779 .cra_priority = 300,
780 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
781 CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
782 .cra_blocksize = AES_BLOCK_SIZE,
783 .cra_ctxsize = sizeof(struct sahara_ctx),
784 .cra_alignmask = 0x0,
785 .cra_type = &crypto_ablkcipher_type,
786 .cra_module = THIS_MODULE,
787 .cra_init = sahara_aes_cra_init,
788 .cra_exit = sahara_aes_cra_exit,
789 .cra_u.ablkcipher = {
790 .min_keysize = AES_MIN_KEY_SIZE ,
791 .max_keysize = AES_MAX_KEY_SIZE,
792 .ivsize = AES_BLOCK_SIZE,
793 .setkey = sahara_aes_setkey,
794 .encrypt = sahara_aes_cbc_encrypt,
795 .decrypt = sahara_aes_cbc_decrypt,
796 }
797}
798};
799
800static irqreturn_t sahara_irq_handler(int irq, void *data)
801{
802 struct sahara_dev *dev = (struct sahara_dev *)data;
803 unsigned int stat = sahara_read(dev, SAHARA_REG_STATUS);
804 unsigned int err = sahara_read(dev, SAHARA_REG_ERRSTATUS);
805
806 del_timer(&dev->watchdog);
807
808 sahara_write(dev, SAHARA_CMD_CLEAR_INT | SAHARA_CMD_CLEAR_ERR,
809 SAHARA_REG_CMD);
810
811 sahara_decode_status(dev, stat);
812
813 if (SAHARA_STATUS_GET_STATE(stat) == SAHARA_STATE_BUSY) {
814 return IRQ_NONE;
815 } else if (SAHARA_STATUS_GET_STATE(stat) == SAHARA_STATE_COMPLETE) {
816 dev->error = 0;
817 } else {
818 sahara_decode_error(dev, err);
819 dev->error = -EINVAL;
820 }
821
822 tasklet_schedule(&dev->done_task);
823
824 return IRQ_HANDLED;
825}
826
827
828static int sahara_register_algs(struct sahara_dev *dev)
829{
830 int err, i, j;
831
832 for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
833 INIT_LIST_HEAD(&aes_algs[i].cra_list);
834 err = crypto_register_alg(&aes_algs[i]);
835 if (err)
836 goto err_aes_algs;
837 }
838
839 return 0;
840
841err_aes_algs:
842 for (j = 0; j < i; j++)
843 crypto_unregister_alg(&aes_algs[j]);
844
845 return err;
846}
847
848static void sahara_unregister_algs(struct sahara_dev *dev)
849{
850 int i;
851
852 for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
853 crypto_unregister_alg(&aes_algs[i]);
854}
855
856static struct platform_device_id sahara_platform_ids[] = {
857 { .name = "sahara-imx27" },
858 { /* sentinel */ }
859};
860MODULE_DEVICE_TABLE(platform, sahara_platform_ids);
861
862static struct of_device_id sahara_dt_ids[] = {
863 { .compatible = "fsl,imx27-sahara" },
864 { /* sentinel */ }
865};
866MODULE_DEVICE_TABLE(platform, sahara_dt_ids);
867
868static int sahara_probe(struct platform_device *pdev)
869{
870 struct sahara_dev *dev;
871 struct resource *res;
872 u32 version;
873 int irq;
874 int err;
875 int i;
876
877 dev = devm_kzalloc(&pdev->dev, sizeof(struct sahara_dev), GFP_KERNEL);
878 if (dev == NULL) {
879 dev_err(&pdev->dev, "unable to alloc data struct.\n");
880 return -ENOMEM;
881 }
882
883 dev->device = &pdev->dev;
884 platform_set_drvdata(pdev, dev);
885
886 /* Get the base address */
887 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
888 if (!res) {
889 dev_err(&pdev->dev, "failed to get memory region resource\n");
890 return -ENODEV;
891 }
892
893 if (devm_request_mem_region(&pdev->dev, res->start,
894 resource_size(res), SAHARA_NAME) == NULL) {
895 dev_err(&pdev->dev, "failed to request memory region\n");
896 return -ENOENT;
897 }
898 dev->regs_base = devm_ioremap(&pdev->dev, res->start,
899 resource_size(res));
900 if (!dev->regs_base) {
901 dev_err(&pdev->dev, "failed to ioremap address region\n");
902 return -ENOENT;
903 }
904
905 /* Get the IRQ */
906 irq = platform_get_irq(pdev, 0);
907 if (irq < 0) {
908 dev_err(&pdev->dev, "failed to get irq resource\n");
909 return irq;
910 }
911
912 if (devm_request_irq(&pdev->dev, irq, sahara_irq_handler,
913 0, SAHARA_NAME, dev) < 0) {
914 dev_err(&pdev->dev, "failed to request irq\n");
915 return -ENOENT;
916 }
917
918 /* clocks */
919 dev->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
920 if (IS_ERR(dev->clk_ipg)) {
921 dev_err(&pdev->dev, "Could not get ipg clock\n");
922 return PTR_ERR(dev->clk_ipg);
923 }
924
925 dev->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
926 if (IS_ERR(dev->clk_ahb)) {
927 dev_err(&pdev->dev, "Could not get ahb clock\n");
928 return PTR_ERR(dev->clk_ahb);
929 }
930
931 /* Allocate HW descriptors */
932 dev->hw_desc[0] = dma_alloc_coherent(&pdev->dev,
933 SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc),
934 &dev->hw_phys_desc[0], GFP_KERNEL);
935 if (!dev->hw_desc[0]) {
936 dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
937 return -ENOMEM;
938 }
939 dev->hw_desc[1] = dev->hw_desc[0] + 1;
940 dev->hw_phys_desc[1] = dev->hw_phys_desc[0] +
941 sizeof(struct sahara_hw_desc);
942
943 /* Allocate space for iv and key */
944 dev->key_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
945 &dev->key_phys_base, GFP_KERNEL);
946 if (!dev->key_base) {
947 dev_err(&pdev->dev, "Could not allocate memory for key\n");
948 err = -ENOMEM;
949 goto err_key;
950 }
951 dev->iv_base = dev->key_base + AES_KEYSIZE_128;
952 dev->iv_phys_base = dev->key_phys_base + AES_KEYSIZE_128;
953
954 /* Allocate space for HW links */
955 dev->hw_link[0] = dma_alloc_coherent(&pdev->dev,
956 SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link),
957 &dev->hw_phys_link[0], GFP_KERNEL);
958 if (!dev->hw_link) {
959 dev_err(&pdev->dev, "Could not allocate hw links\n");
960 err = -ENOMEM;
961 goto err_link;
962 }
963 for (i = 1; i < SAHARA_MAX_HW_LINK; i++) {
964 dev->hw_phys_link[i] = dev->hw_phys_link[i - 1] +
965 sizeof(struct sahara_hw_link);
966 dev->hw_link[i] = dev->hw_link[i - 1] + 1;
967 }
968
969 crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH);
970
971 dev_ptr = dev;
972
973 tasklet_init(&dev->queue_task, sahara_aes_queue_task,
974 (unsigned long)dev);
975 tasklet_init(&dev->done_task, sahara_aes_done_task,
976 (unsigned long)dev);
977
978 init_timer(&dev->watchdog);
979 dev->watchdog.function = &sahara_watchdog;
980 dev->watchdog.data = (unsigned long)dev;
981
982 clk_prepare_enable(dev->clk_ipg);
983 clk_prepare_enable(dev->clk_ahb);
984
985 version = sahara_read(dev, SAHARA_REG_VERSION);
986 if (version != SAHARA_VERSION_3) {
987 dev_err(&pdev->dev, "SAHARA version %d not supported\n",
988 version);
989 err = -ENODEV;
990 goto err_algs;
991 }
992
993 sahara_write(dev, SAHARA_CMD_RESET | SAHARA_CMD_MODE_BATCH,
994 SAHARA_REG_CMD);
995 sahara_write(dev, SAHARA_CONTROL_SET_THROTTLE(0) |
996 SAHARA_CONTROL_SET_MAXBURST(8) |
997 SAHARA_CONTROL_RNG_AUTORSD |
998 SAHARA_CONTROL_ENABLE_INT,
999 SAHARA_REG_CONTROL);
1000
1001 err = sahara_register_algs(dev);
1002 if (err)
1003 goto err_algs;
1004
1005 dev_info(&pdev->dev, "SAHARA version %d initialized\n", version);
1006
1007 return 0;
1008
1009err_algs:
1010 dma_free_coherent(&pdev->dev,
1011 SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link),
1012 dev->hw_link[0], dev->hw_phys_link[0]);
1013 clk_disable_unprepare(dev->clk_ipg);
1014 clk_disable_unprepare(dev->clk_ahb);
1015 dev_ptr = NULL;
1016err_link:
1017 dma_free_coherent(&pdev->dev,
1018 2 * AES_KEYSIZE_128,
1019 dev->key_base, dev->key_phys_base);
1020err_key:
1021 dma_free_coherent(&pdev->dev,
1022 SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc),
1023 dev->hw_desc[0], dev->hw_phys_desc[0]);
1024
1025 return err;
1026}
1027
1028static int sahara_remove(struct platform_device *pdev)
1029{
1030 struct sahara_dev *dev = platform_get_drvdata(pdev);
1031
1032 dma_free_coherent(&pdev->dev,
1033 SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link),
1034 dev->hw_link[0], dev->hw_phys_link[0]);
1035 dma_free_coherent(&pdev->dev,
1036 2 * AES_KEYSIZE_128,
1037 dev->key_base, dev->key_phys_base);
1038 dma_free_coherent(&pdev->dev,
1039 SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc),
1040 dev->hw_desc[0], dev->hw_phys_desc[0]);
1041
1042 tasklet_kill(&dev->done_task);
1043 tasklet_kill(&dev->queue_task);
1044
1045 sahara_unregister_algs(dev);
1046
1047 clk_disable_unprepare(dev->clk_ipg);
1048 clk_disable_unprepare(dev->clk_ahb);
1049
1050 dev_ptr = NULL;
1051
1052 return 0;
1053}
1054
1055static struct platform_driver sahara_driver = {
1056 .probe = sahara_probe,
1057 .remove = sahara_remove,
1058 .driver = {
1059 .name = SAHARA_NAME,
1060 .owner = THIS_MODULE,
1061 .of_match_table = of_match_ptr(sahara_dt_ids),
1062 },
1063 .id_table = sahara_platform_ids,
1064};
1065
1066module_platform_driver(sahara_driver);
1067
1068MODULE_LICENSE("GPL");
1069MODULE_AUTHOR("Javier Martin <javier.martin@vista-silicon.com>");
1070MODULE_DESCRIPTION("SAHARA2 HW crypto accelerator");
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 1827e9f1f873..cf5508967539 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -938,6 +938,7 @@ static int hash_dma_final(struct ahash_request *req)
938 if (!ctx->device->dma.nents) { 938 if (!ctx->device->dma.nents) {
939 dev_err(device_data->dev, "[%s] " 939 dev_err(device_data->dev, "[%s] "
940 "ctx->device->dma.nents = 0", __func__); 940 "ctx->device->dma.nents = 0", __func__);
941 ret = ctx->device->dma.nents;
941 goto out; 942 goto out;
942 } 943 }
943 944
@@ -945,6 +946,7 @@ static int hash_dma_final(struct ahash_request *req)
945 if (bytes_written != req->nbytes) { 946 if (bytes_written != req->nbytes) {
946 dev_err(device_data->dev, "[%s] " 947 dev_err(device_data->dev, "[%s] "
947 "hash_dma_write() failed!", __func__); 948 "hash_dma_write() failed!", __func__);
949 ret = bytes_written;
948 goto out; 950 goto out;
949 } 951 }
950 952
@@ -1367,14 +1369,12 @@ static int hash_setkey(struct crypto_ahash *tfm,
1367 /** 1369 /**
1368 * Freed in final. 1370 * Freed in final.
1369 */ 1371 */
1370 ctx->key = kmalloc(keylen, GFP_KERNEL); 1372 ctx->key = kmemdup(key, keylen, GFP_KERNEL);
1371 if (!ctx->key) { 1373 if (!ctx->key) {
1372 pr_err(DEV_DBG_NAME " [%s] Failed to allocate ctx->key " 1374 pr_err(DEV_DBG_NAME " [%s] Failed to allocate ctx->key "
1373 "for %d\n", __func__, alg); 1375 "for %d\n", __func__, alg);
1374 return -ENOMEM; 1376 return -ENOMEM;
1375 } 1377 }
1376
1377 memcpy(ctx->key, key, keylen);
1378 ctx->keylen = keylen; 1378 ctx->keylen = keylen;
1379 1379
1380 return ret; 1380 return ret;
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index c6c9c1fe460c..190f8a0e0242 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -87,4 +87,9 @@ struct shash_desc;
87extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, 87extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
88 unsigned int len); 88 unsigned int len);
89 89
90extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
91 unsigned int len);
92
93extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
94 unsigned int len);
90#endif 95#endif
diff --git a/include/linux/platform_data/atmel-aes.h b/include/linux/platform_data/atmel-aes.h
deleted file mode 100644
index ab68082fbcb0..000000000000
--- a/include/linux/platform_data/atmel-aes.h
+++ /dev/null
@@ -1,22 +0,0 @@
1#ifndef __LINUX_ATMEL_AES_H
2#define __LINUX_ATMEL_AES_H
3
4#include <linux/platform_data/dma-atmel.h>
5
6/**
7 * struct aes_dma_data - DMA data for AES
8 */
9struct aes_dma_data {
10 struct at_dma_slave txdata;
11 struct at_dma_slave rxdata;
12};
13
14/**
15 * struct aes_platform_data - board-specific AES configuration
16 * @dma_slave: DMA slave interface to use in data transfers.
17 */
18struct aes_platform_data {
19 struct aes_dma_data *dma_slave;
20};
21
22#endif /* __LINUX_ATMEL_AES_H */
diff --git a/include/linux/platform_data/crypto-atmel.h b/include/linux/platform_data/crypto-atmel.h
new file mode 100644
index 000000000000..b46e0d9062a0
--- /dev/null
+++ b/include/linux/platform_data/crypto-atmel.h
@@ -0,0 +1,22 @@
1#ifndef __LINUX_CRYPTO_ATMEL_H
2#define __LINUX_CRYPTO_ATMEL_H
3
4#include <linux/platform_data/dma-atmel.h>
5
6/**
7 * struct crypto_dma_data - DMA data for AES/TDES/SHA
8 */
9struct crypto_dma_data {
10 struct at_dma_slave txdata;
11 struct at_dma_slave rxdata;
12};
13
14/**
15 * struct crypto_platform_data - board-specific AES/TDES/SHA configuration
16 * @dma_slave: DMA slave interface to use in data transfers.
17 */
18struct crypto_platform_data {
19 struct crypto_dma_data *dma_slave;
20};
21
22#endif /* __LINUX_CRYPTO_ATMEL_H */
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index 3e08a1c86830..46eb27ddbfab 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -8,12 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10
11#include <linux/completion.h>
12
13struct timeriomem_rng_data { 11struct timeriomem_rng_data {
14 struct completion completion;
15 unsigned int present:1;
16
17 void __iomem *address; 12 void __iomem *address;
18 13
19 /* measures in usecs */ 14 /* measures in usecs */
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00a75dc..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
311 .sadb_alg_maxbits = 128 311 .sadb_alg_maxbits = 128
312 } 312 }
313}, 313},
314{
315 /* rfc4494 */
316 .name = "cmac(aes)",
317
318 .uinfo = {
319 .auth = {
320 .icv_truncbits = 96,
321 .icv_fullbits = 128,
322 }
323 },
324
325 .pfkey_supported = 0,
326},
314}; 327};
315 328
316static struct xfrm_algo_desc ealg_list[] = { 329static struct xfrm_algo_desc ealg_list[] = {