diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 17:53:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-02 17:53:12 -0400 |
commit | 797994f81a8b2bdca2eecffa415c1e7a89a4f961 (patch) | |
tree | 1383dc469c26ad37fdf960f682d9a48c782935c5 | |
parent | c8d8566952fda026966784a62f324c8352f77430 (diff) | |
parent | 3862de1f6c442d53bd828d39f86d07d933a70605 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
- XTS mode optimisation for twofish/cast6/camellia/aes on x86
- AVX2/x86_64 implementation for blowfish/twofish/serpent/camellia
- SSSE3/AVX/AVX2 optimisations for sha256/sha512
- Added driver for SAHARA2 crypto accelerator
- Fix for GMAC when used in non-IPsec secnarios
- Added generic CMAC implementation (including IPsec glue)
- IP update for crypto/atmel
- Support for more than one device in hwrng/timeriomem
- Added Broadcom BCM2835 RNG driver
- Misc fixes
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (59 commits)
crypto: caam - fix job ring cleanup code
crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher
crypto: serpent - add AVX2/x86_64 assembler implementation of serpent cipher
crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher
crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher
crypto: tcrypt - add async cipher speed tests for blowfish
crypto: testmgr - extend camellia test-vectors for camellia-aesni/avx2
crypto: aesni_intel - fix Kconfig problem with CRYPTO_GLUE_HELPER_X86
crypto: aesni_intel - add more optimized XTS mode for x86-64
crypto: x86/camellia-aesni-avx - add more optimized XTS code
crypto: cast6-avx: use new optimized XTS code
crypto: x86/twofish-avx - use optimized XTS code
crypto: x86 - add more optimized XTS-mode for serpent-avx
xfrm: add rfc4494 AES-CMAC-96 support
crypto: add CMAC support to CryptoAPI
crypto: testmgr - add empty test vectors for null ciphers
crypto: testmgr - add AES GMAC test vectors
crypto: gcm - fix rfc4543 to handle async crypto correctly
crypto: gcm - make GMAC work when dst and src are different
hwrng: timeriomem - added devicetree hooks
...
88 files changed, 15378 insertions, 744 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt new file mode 100644 index 000000000000..5c65eccd0e56 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt | |||
@@ -0,0 +1,15 @@ | |||
1 | Freescale SAHARA Cryptographic Accelerator included in some i.MX chips. | ||
2 | Currently only i.MX27 is supported. | ||
3 | |||
4 | Required properties: | ||
5 | - compatible : Should be "fsl,<soc>-sahara" | ||
6 | - reg : Should contain SAHARA registers location and length | ||
7 | - interrupts : Should contain SAHARA interrupt number | ||
8 | |||
9 | Example: | ||
10 | |||
11 | sah@10025000 { | ||
12 | compatible = "fsl,imx27-sahara"; | ||
13 | reg = < 0x10025000 0x800>; | ||
14 | interrupts = <75>; | ||
15 | }; | ||
diff --git a/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt new file mode 100644 index 000000000000..6616d15866a3 --- /dev/null +++ b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt | |||
@@ -0,0 +1,18 @@ | |||
1 | HWRNG support for the timeriomem_rng driver | ||
2 | |||
3 | Required properties: | ||
4 | - compatible : "timeriomem_rng" | ||
5 | - reg : base address to sample from | ||
6 | - period : wait time in microseconds to use between samples | ||
7 | |||
8 | N.B. currently 'reg' must be four bytes wide and aligned | ||
9 | |||
10 | Example: | ||
11 | |||
12 | hwrng@44 { | ||
13 | #address-cells = <1>; | ||
14 | #size-cells = <1>; | ||
15 | compatible = "timeriomem_rng"; | ||
16 | reg = <0x44 0x04>; | ||
17 | period = <1000000>; | ||
18 | }; | ||
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt new file mode 100644 index 000000000000..07ccdaa68324 --- /dev/null +++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt | |||
@@ -0,0 +1,13 @@ | |||
1 | BCM2835 Random number generator | ||
2 | |||
3 | Required properties: | ||
4 | |||
5 | - compatible : should be "brcm,bcm2835-rng" | ||
6 | - reg : Specifies base physical address and size of the registers. | ||
7 | |||
8 | Example: | ||
9 | |||
10 | rng { | ||
11 | compatible = "brcm,bcm2835-rng"; | ||
12 | reg = <0x7e104000 0x10>; | ||
13 | }; | ||
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt index 690f52550c80..026e237bbc87 100644 --- a/Documentation/hw_random.txt +++ b/Documentation/hw_random.txt | |||
@@ -63,7 +63,7 @@ Intel RNG Driver notes: | |||
63 | 63 | ||
64 | * FIXME: support poll(2) | 64 | * FIXME: support poll(2) |
65 | 65 | ||
66 | NOTE: request_mem_region was removed, for two reasons: | 66 | NOTE: request_mem_region was removed, for three reasons: |
67 | 1) Only one RNG is supported by this driver, 2) The location | 67 | 1) Only one RNG is supported by this driver, 2) The location |
68 | used by the RNG is a fixed location in MMIO-addressable memory, | 68 | used by the RNG is a fixed location in MMIO-addressable memory, |
69 | 3) users with properly working BIOS e820 handling will always | 69 | 3) users with properly working BIOS e820 handling will always |
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 827c9f2a70fb..f0bf68268ca2 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <linux/platform_device.h> | 18 | #include <linux/platform_device.h> |
19 | #include <linux/i2c-gpio.h> | 19 | #include <linux/i2c-gpio.h> |
20 | #include <linux/atmel-mci.h> | 20 | #include <linux/atmel-mci.h> |
21 | #include <linux/platform_data/atmel-aes.h> | 21 | #include <linux/platform_data/crypto-atmel.h> |
22 | 22 | ||
23 | #include <linux/platform_data/at91_adc.h> | 23 | #include <linux/platform_data/at91_adc.h> |
24 | 24 | ||
@@ -1900,7 +1900,8 @@ static void __init at91_add_device_tdes(void) {} | |||
1900 | * -------------------------------------------------------------------- */ | 1900 | * -------------------------------------------------------------------- */ |
1901 | 1901 | ||
1902 | #if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE) | 1902 | #if defined(CONFIG_CRYPTO_DEV_ATMEL_AES) || defined(CONFIG_CRYPTO_DEV_ATMEL_AES_MODULE) |
1903 | static struct aes_platform_data aes_data; | 1903 | static struct crypto_platform_data aes_data; |
1904 | static struct crypto_dma_data alt_atslave; | ||
1904 | static u64 aes_dmamask = DMA_BIT_MASK(32); | 1905 | static u64 aes_dmamask = DMA_BIT_MASK(32); |
1905 | 1906 | ||
1906 | static struct resource aes_resources[] = { | 1907 | static struct resource aes_resources[] = { |
@@ -1931,23 +1932,20 @@ static struct platform_device at91sam9g45_aes_device = { | |||
1931 | static void __init at91_add_device_aes(void) | 1932 | static void __init at91_add_device_aes(void) |
1932 | { | 1933 | { |
1933 | struct at_dma_slave *atslave; | 1934 | struct at_dma_slave *atslave; |
1934 | struct aes_dma_data *alt_atslave; | ||
1935 | |||
1936 | alt_atslave = kzalloc(sizeof(struct aes_dma_data), GFP_KERNEL); | ||
1937 | 1935 | ||
1938 | /* DMA TX slave channel configuration */ | 1936 | /* DMA TX slave channel configuration */ |
1939 | atslave = &alt_atslave->txdata; | 1937 | atslave = &alt_atslave.txdata; |
1940 | atslave->dma_dev = &at_hdmac_device.dev; | 1938 | atslave->dma_dev = &at_hdmac_device.dev; |
1941 | atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW | | 1939 | atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_SRC_H2SEL_HW | |
1942 | ATC_SRC_PER(AT_DMA_ID_AES_RX); | 1940 | ATC_SRC_PER(AT_DMA_ID_AES_RX); |
1943 | 1941 | ||
1944 | /* DMA RX slave channel configuration */ | 1942 | /* DMA RX slave channel configuration */ |
1945 | atslave = &alt_atslave->rxdata; | 1943 | atslave = &alt_atslave.rxdata; |
1946 | atslave->dma_dev = &at_hdmac_device.dev; | 1944 | atslave->dma_dev = &at_hdmac_device.dev; |
1947 | atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW | | 1945 | atslave->cfg = ATC_FIFOCFG_ENOUGHSPACE | ATC_DST_H2SEL_HW | |
1948 | ATC_DST_PER(AT_DMA_ID_AES_TX); | 1946 | ATC_DST_PER(AT_DMA_ID_AES_TX); |
1949 | 1947 | ||
1950 | aes_data.dma_slave = alt_atslave; | 1948 | aes_data.dma_slave = &alt_atslave; |
1951 | platform_device_register(&at91sam9g45_aes_device); | 1949 | platform_device_register(&at91sam9g45_aes_device); |
1952 | } | 1950 | } |
1953 | #else | 1951 | #else |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 63947a8f9f0f..a3a0ed80f17c 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,10 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) | ||
6 | avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ | ||
7 | $(comma)4)$(comma)%ymm2,yes,no) | ||
8 | |||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | 9 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o |
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | 10 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o |
7 | 11 | ||
@@ -12,22 +16,37 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o | |||
12 | 16 | ||
13 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 17 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | 18 | obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o |
15 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o | ||
16 | obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o | ||
18 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
19 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
20 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 21 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
21 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
22 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 22 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
23 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | 23 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o |
24 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
25 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 24 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
26 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 25 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
27 | 26 | ||
28 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o | 27 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o |
29 | obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | 28 | obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o |
30 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o | 29 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o |
30 | obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o | ||
31 | obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o | ||
32 | |||
33 | # These modules require assembler to support AVX. | ||
34 | ifeq ($(avx_supported),yes) | ||
35 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \ | ||
36 | camellia-aesni-avx-x86_64.o | ||
37 | obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o | ||
38 | obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o | ||
39 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
40 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
41 | endif | ||
42 | |||
43 | # These modules require assembler to support AVX2. | ||
44 | ifeq ($(avx2_supported),yes) | ||
45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o | ||
46 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o | ||
47 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o | ||
48 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o | ||
49 | endif | ||
31 | 50 | ||
32 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 51 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
33 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o | 52 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
@@ -36,21 +55,35 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o | |||
36 | 55 | ||
37 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 56 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
38 | camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | 57 | camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o |
39 | camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ | ||
40 | camellia_aesni_avx_glue.o | ||
41 | cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o | ||
42 | cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o | ||
43 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 58 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
44 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 59 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
45 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 60 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
46 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
47 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 61 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
48 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | 62 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
49 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | 63 | |
64 | ifeq ($(avx_supported),yes) | ||
65 | camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ | ||
66 | camellia_aesni_avx_glue.o | ||
67 | cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o | ||
68 | cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o | ||
69 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \ | ||
70 | twofish_avx_glue.o | ||
71 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \ | ||
72 | serpent_avx_glue.o | ||
73 | endif | ||
74 | |||
75 | ifeq ($(avx2_supported),yes) | ||
76 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o | ||
77 | camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o | ||
78 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o | ||
79 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o | ||
80 | endif | ||
50 | 81 | ||
51 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 82 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
52 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 83 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
53 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 84 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
54 | crc32c-intel-y := crc32c-intel_glue.o | 85 | crc32c-intel-y := crc32c-intel_glue.o |
55 | crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o | 86 | crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o |
56 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | 87 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o |
88 | sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o | ||
89 | sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 04b797767b9e..62fe22cd4cba 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -34,6 +34,10 @@ | |||
34 | 34 | ||
35 | #ifdef __x86_64__ | 35 | #ifdef __x86_64__ |
36 | .data | 36 | .data |
37 | .align 16 | ||
38 | .Lgf128mul_x_ble_mask: | ||
39 | .octa 0x00000000000000010000000000000087 | ||
40 | |||
37 | POLY: .octa 0xC2000000000000000000000000000001 | 41 | POLY: .octa 0xC2000000000000000000000000000001 |
38 | TWOONE: .octa 0x00000001000000000000000000000001 | 42 | TWOONE: .octa 0x00000001000000000000000000000001 |
39 | 43 | ||
@@ -105,6 +109,8 @@ enc: .octa 0x2 | |||
105 | #define CTR %xmm11 | 109 | #define CTR %xmm11 |
106 | #define INC %xmm12 | 110 | #define INC %xmm12 |
107 | 111 | ||
112 | #define GF128MUL_MASK %xmm10 | ||
113 | |||
108 | #ifdef __x86_64__ | 114 | #ifdef __x86_64__ |
109 | #define AREG %rax | 115 | #define AREG %rax |
110 | #define KEYP %rdi | 116 | #define KEYP %rdi |
@@ -2636,4 +2642,115 @@ ENTRY(aesni_ctr_enc) | |||
2636 | .Lctr_enc_just_ret: | 2642 | .Lctr_enc_just_ret: |
2637 | ret | 2643 | ret |
2638 | ENDPROC(aesni_ctr_enc) | 2644 | ENDPROC(aesni_ctr_enc) |
2645 | |||
2646 | /* | ||
2647 | * _aesni_gf128mul_x_ble: internal ABI | ||
2648 | * Multiply in GF(2^128) for XTS IVs | ||
2649 | * input: | ||
2650 | * IV: current IV | ||
2651 | * GF128MUL_MASK == mask with 0x87 and 0x01 | ||
2652 | * output: | ||
2653 | * IV: next IV | ||
2654 | * changed: | ||
2655 | * CTR: == temporary value | ||
2656 | */ | ||
2657 | #define _aesni_gf128mul_x_ble() \ | ||
2658 | pshufd $0x13, IV, CTR; \ | ||
2659 | paddq IV, IV; \ | ||
2660 | psrad $31, CTR; \ | ||
2661 | pand GF128MUL_MASK, CTR; \ | ||
2662 | pxor CTR, IV; | ||
2663 | |||
2664 | /* | ||
2665 | * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | ||
2666 | * bool enc, u8 *iv) | ||
2667 | */ | ||
2668 | ENTRY(aesni_xts_crypt8) | ||
2669 | cmpb $0, %cl | ||
2670 | movl $0, %ecx | ||
2671 | movl $240, %r10d | ||
2672 | leaq _aesni_enc4, %r11 | ||
2673 | leaq _aesni_dec4, %rax | ||
2674 | cmovel %r10d, %ecx | ||
2675 | cmoveq %rax, %r11 | ||
2676 | |||
2677 | movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK | ||
2678 | movups (IVP), IV | ||
2679 | |||
2680 | mov 480(KEYP), KLEN | ||
2681 | addq %rcx, KEYP | ||
2682 | |||
2683 | movdqa IV, STATE1 | ||
2684 | pxor 0x00(INP), STATE1 | ||
2685 | movdqu IV, 0x00(OUTP) | ||
2686 | |||
2687 | _aesni_gf128mul_x_ble() | ||
2688 | movdqa IV, STATE2 | ||
2689 | pxor 0x10(INP), STATE2 | ||
2690 | movdqu IV, 0x10(OUTP) | ||
2691 | |||
2692 | _aesni_gf128mul_x_ble() | ||
2693 | movdqa IV, STATE3 | ||
2694 | pxor 0x20(INP), STATE3 | ||
2695 | movdqu IV, 0x20(OUTP) | ||
2696 | |||
2697 | _aesni_gf128mul_x_ble() | ||
2698 | movdqa IV, STATE4 | ||
2699 | pxor 0x30(INP), STATE4 | ||
2700 | movdqu IV, 0x30(OUTP) | ||
2701 | |||
2702 | call *%r11 | ||
2703 | |||
2704 | pxor 0x00(OUTP), STATE1 | ||
2705 | movdqu STATE1, 0x00(OUTP) | ||
2706 | |||
2707 | _aesni_gf128mul_x_ble() | ||
2708 | movdqa IV, STATE1 | ||
2709 | pxor 0x40(INP), STATE1 | ||
2710 | movdqu IV, 0x40(OUTP) | ||
2711 | |||
2712 | pxor 0x10(OUTP), STATE2 | ||
2713 | movdqu STATE2, 0x10(OUTP) | ||
2714 | |||
2715 | _aesni_gf128mul_x_ble() | ||
2716 | movdqa IV, STATE2 | ||
2717 | pxor 0x50(INP), STATE2 | ||
2718 | movdqu IV, 0x50(OUTP) | ||
2719 | |||
2720 | pxor 0x20(OUTP), STATE3 | ||
2721 | movdqu STATE3, 0x20(OUTP) | ||
2722 | |||
2723 | _aesni_gf128mul_x_ble() | ||
2724 | movdqa IV, STATE3 | ||
2725 | pxor 0x60(INP), STATE3 | ||
2726 | movdqu IV, 0x60(OUTP) | ||
2727 | |||
2728 | pxor 0x30(OUTP), STATE4 | ||
2729 | movdqu STATE4, 0x30(OUTP) | ||
2730 | |||
2731 | _aesni_gf128mul_x_ble() | ||
2732 | movdqa IV, STATE4 | ||
2733 | pxor 0x70(INP), STATE4 | ||
2734 | movdqu IV, 0x70(OUTP) | ||
2735 | |||
2736 | _aesni_gf128mul_x_ble() | ||
2737 | movups IV, (IVP) | ||
2738 | |||
2739 | call *%r11 | ||
2740 | |||
2741 | pxor 0x40(OUTP), STATE1 | ||
2742 | movdqu STATE1, 0x40(OUTP) | ||
2743 | |||
2744 | pxor 0x50(OUTP), STATE2 | ||
2745 | movdqu STATE2, 0x50(OUTP) | ||
2746 | |||
2747 | pxor 0x60(OUTP), STATE3 | ||
2748 | movdqu STATE3, 0x60(OUTP) | ||
2749 | |||
2750 | pxor 0x70(OUTP), STATE4 | ||
2751 | movdqu STATE4, 0x70(OUTP) | ||
2752 | |||
2753 | ret | ||
2754 | ENDPROC(aesni_xts_crypt8) | ||
2755 | |||
2639 | #endif | 2756 | #endif |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index a0795da22c02..f80e668785c0 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -39,6 +39,9 @@ | |||
39 | #include <crypto/internal/aead.h> | 39 | #include <crypto/internal/aead.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/spinlock.h> | 41 | #include <linux/spinlock.h> |
42 | #ifdef CONFIG_X86_64 | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | #endif | ||
42 | 45 | ||
43 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) | 46 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) |
44 | #define HAS_PCBC | 47 | #define HAS_PCBC |
@@ -102,6 +105,9 @@ void crypto_fpu_exit(void); | |||
102 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 105 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
103 | const u8 *in, unsigned int len, u8 *iv); | 106 | const u8 *in, unsigned int len, u8 *iv); |
104 | 107 | ||
108 | asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, | ||
109 | const u8 *in, bool enc, u8 *iv); | ||
110 | |||
105 | /* asmlinkage void aesni_gcm_enc() | 111 | /* asmlinkage void aesni_gcm_enc() |
106 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | 112 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. |
107 | * u8 *out, Ciphertext output. Encrypt in-place is allowed. | 113 | * u8 *out, Ciphertext output. Encrypt in-place is allowed. |
@@ -510,6 +516,78 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) | |||
510 | aesni_enc(ctx, out, in); | 516 | aesni_enc(ctx, out, in); |
511 | } | 517 | } |
512 | 518 | ||
519 | #ifdef CONFIG_X86_64 | ||
520 | |||
521 | static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
522 | { | ||
523 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc)); | ||
524 | } | ||
525 | |||
526 | static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
527 | { | ||
528 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec)); | ||
529 | } | ||
530 | |||
531 | static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
532 | { | ||
533 | aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv); | ||
534 | } | ||
535 | |||
536 | static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
537 | { | ||
538 | aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv); | ||
539 | } | ||
540 | |||
541 | static const struct common_glue_ctx aesni_enc_xts = { | ||
542 | .num_funcs = 2, | ||
543 | .fpu_blocks_limit = 1, | ||
544 | |||
545 | .funcs = { { | ||
546 | .num_blocks = 8, | ||
547 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) } | ||
548 | }, { | ||
549 | .num_blocks = 1, | ||
550 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) } | ||
551 | } } | ||
552 | }; | ||
553 | |||
554 | static const struct common_glue_ctx aesni_dec_xts = { | ||
555 | .num_funcs = 2, | ||
556 | .fpu_blocks_limit = 1, | ||
557 | |||
558 | .funcs = { { | ||
559 | .num_blocks = 8, | ||
560 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) } | ||
561 | }, { | ||
562 | .num_blocks = 1, | ||
563 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) } | ||
564 | } } | ||
565 | }; | ||
566 | |||
567 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
568 | struct scatterlist *src, unsigned int nbytes) | ||
569 | { | ||
570 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
571 | |||
572 | return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes, | ||
573 | XTS_TWEAK_CAST(aesni_xts_tweak), | ||
574 | aes_ctx(ctx->raw_tweak_ctx), | ||
575 | aes_ctx(ctx->raw_crypt_ctx)); | ||
576 | } | ||
577 | |||
578 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
579 | struct scatterlist *src, unsigned int nbytes) | ||
580 | { | ||
581 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
582 | |||
583 | return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes, | ||
584 | XTS_TWEAK_CAST(aesni_xts_tweak), | ||
585 | aes_ctx(ctx->raw_tweak_ctx), | ||
586 | aes_ctx(ctx->raw_crypt_ctx)); | ||
587 | } | ||
588 | |||
589 | #else | ||
590 | |||
513 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 591 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
514 | struct scatterlist *src, unsigned int nbytes) | 592 | struct scatterlist *src, unsigned int nbytes) |
515 | { | 593 | { |
@@ -560,6 +638,8 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
560 | return ret; | 638 | return ret; |
561 | } | 639 | } |
562 | 640 | ||
641 | #endif | ||
642 | |||
563 | #ifdef CONFIG_X86_64 | 643 | #ifdef CONFIG_X86_64 |
564 | static int rfc4106_init(struct crypto_tfm *tfm) | 644 | static int rfc4106_init(struct crypto_tfm *tfm) |
565 | { | 645 | { |
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S new file mode 100644 index 000000000000..784452e0d05d --- /dev/null +++ b/arch/x86/crypto/blowfish-avx2-asm_64.S | |||
@@ -0,0 +1,449 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Blowfish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | |||
15 | .file "blowfish-avx2-asm_64.S" | ||
16 | |||
17 | .data | ||
18 | .align 32 | ||
19 | |||
20 | .Lprefetch_mask: | ||
21 | .long 0*64 | ||
22 | .long 1*64 | ||
23 | .long 2*64 | ||
24 | .long 3*64 | ||
25 | .long 4*64 | ||
26 | .long 5*64 | ||
27 | .long 6*64 | ||
28 | .long 7*64 | ||
29 | |||
30 | .Lbswap32_mask: | ||
31 | .long 0x00010203 | ||
32 | .long 0x04050607 | ||
33 | .long 0x08090a0b | ||
34 | .long 0x0c0d0e0f | ||
35 | |||
36 | .Lbswap128_mask: | ||
37 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
38 | .Lbswap_iv_mask: | ||
39 | .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 | ||
40 | |||
41 | .text | ||
42 | /* structure of crypto context */ | ||
43 | #define p 0 | ||
44 | #define s0 ((16 + 2) * 4) | ||
45 | #define s1 ((16 + 2 + (1 * 256)) * 4) | ||
46 | #define s2 ((16 + 2 + (2 * 256)) * 4) | ||
47 | #define s3 ((16 + 2 + (3 * 256)) * 4) | ||
48 | |||
49 | /* register macros */ | ||
50 | #define CTX %rdi | ||
51 | #define RIO %rdx | ||
52 | |||
53 | #define RS0 %rax | ||
54 | #define RS1 %r8 | ||
55 | #define RS2 %r9 | ||
56 | #define RS3 %r10 | ||
57 | |||
58 | #define RLOOP %r11 | ||
59 | #define RLOOPd %r11d | ||
60 | |||
61 | #define RXr0 %ymm8 | ||
62 | #define RXr1 %ymm9 | ||
63 | #define RXr2 %ymm10 | ||
64 | #define RXr3 %ymm11 | ||
65 | #define RXl0 %ymm12 | ||
66 | #define RXl1 %ymm13 | ||
67 | #define RXl2 %ymm14 | ||
68 | #define RXl3 %ymm15 | ||
69 | |||
70 | /* temp regs */ | ||
71 | #define RT0 %ymm0 | ||
72 | #define RT0x %xmm0 | ||
73 | #define RT1 %ymm1 | ||
74 | #define RT1x %xmm1 | ||
75 | #define RIDX0 %ymm2 | ||
76 | #define RIDX1 %ymm3 | ||
77 | #define RIDX1x %xmm3 | ||
78 | #define RIDX2 %ymm4 | ||
79 | #define RIDX3 %ymm5 | ||
80 | |||
81 | /* vpgatherdd mask and '-1' */ | ||
82 | #define RNOT %ymm6 | ||
83 | |||
84 | /* byte mask, (-1 >> 24) */ | ||
85 | #define RBYTE %ymm7 | ||
86 | |||
87 | /*********************************************************************** | ||
88 | * 32-way AVX2 blowfish | ||
89 | ***********************************************************************/ | ||
90 | #define F(xl, xr) \ | ||
91 | vpsrld $24, xl, RIDX0; \ | ||
92 | vpsrld $16, xl, RIDX1; \ | ||
93 | vpsrld $8, xl, RIDX2; \ | ||
94 | vpand RBYTE, RIDX1, RIDX1; \ | ||
95 | vpand RBYTE, RIDX2, RIDX2; \ | ||
96 | vpand RBYTE, xl, RIDX3; \ | ||
97 | \ | ||
98 | vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \ | ||
99 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
100 | vpcmpeqd RIDX0, RIDX0, RIDX0; \ | ||
101 | \ | ||
102 | vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \ | ||
103 | vpcmpeqd RIDX1, RIDX1, RIDX1; \ | ||
104 | vpaddd RT0, RT1, RT0; \ | ||
105 | \ | ||
106 | vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \ | ||
107 | vpxor RT0, RT1, RT0; \ | ||
108 | \ | ||
109 | vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \ | ||
110 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
111 | vpaddd RT0, RT1, RT0; \ | ||
112 | \ | ||
113 | vpxor RT0, xr, xr; | ||
114 | |||
115 | #define add_roundkey(xl, nmem) \ | ||
116 | vpbroadcastd nmem, RT0; \ | ||
117 | vpxor RT0, xl ## 0, xl ## 0; \ | ||
118 | vpxor RT0, xl ## 1, xl ## 1; \ | ||
119 | vpxor RT0, xl ## 2, xl ## 2; \ | ||
120 | vpxor RT0, xl ## 3, xl ## 3; | ||
121 | |||
122 | #define round_enc() \ | ||
123 | add_roundkey(RXr, p(CTX,RLOOP,4)); \ | ||
124 | F(RXl0, RXr0); \ | ||
125 | F(RXl1, RXr1); \ | ||
126 | F(RXl2, RXr2); \ | ||
127 | F(RXl3, RXr3); \ | ||
128 | \ | ||
129 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
130 | F(RXr0, RXl0); \ | ||
131 | F(RXr1, RXl1); \ | ||
132 | F(RXr2, RXl2); \ | ||
133 | F(RXr3, RXl3); | ||
134 | |||
135 | #define round_dec() \ | ||
136 | add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \ | ||
137 | F(RXl0, RXr0); \ | ||
138 | F(RXl1, RXr1); \ | ||
139 | F(RXl2, RXr2); \ | ||
140 | F(RXl3, RXr3); \ | ||
141 | \ | ||
142 | add_roundkey(RXl, p+4(CTX,RLOOP,4)); \ | ||
143 | F(RXr0, RXl0); \ | ||
144 | F(RXr1, RXl1); \ | ||
145 | F(RXr2, RXl2); \ | ||
146 | F(RXr3, RXl3); | ||
147 | |||
148 | #define init_round_constants() \ | ||
149 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
150 | leaq s0(CTX), RS0; \ | ||
151 | leaq s1(CTX), RS1; \ | ||
152 | leaq s2(CTX), RS2; \ | ||
153 | leaq s3(CTX), RS3; \ | ||
154 | vpsrld $24, RNOT, RBYTE; | ||
155 | |||
156 | #define transpose_2x2(x0, x1, t0) \ | ||
157 | vpunpckldq x0, x1, t0; \ | ||
158 | vpunpckhdq x0, x1, x1; \ | ||
159 | \ | ||
160 | vpunpcklqdq t0, x1, x0; \ | ||
161 | vpunpckhqdq t0, x1, x1; | ||
162 | |||
163 | #define read_block(xl, xr) \ | ||
164 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
165 | \ | ||
166 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
167 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
168 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
169 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
170 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
171 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
172 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
173 | vpshufb RT1, xr ## 3, xr ## 3; \ | ||
174 | \ | ||
175 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
176 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
177 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
178 | transpose_2x2(xl ## 3, xr ## 3, RT0); | ||
179 | |||
180 | #define write_block(xl, xr) \ | ||
181 | vbroadcasti128 .Lbswap32_mask, RT1; \ | ||
182 | \ | ||
183 | transpose_2x2(xl ## 0, xr ## 0, RT0); \ | ||
184 | transpose_2x2(xl ## 1, xr ## 1, RT0); \ | ||
185 | transpose_2x2(xl ## 2, xr ## 2, RT0); \ | ||
186 | transpose_2x2(xl ## 3, xr ## 3, RT0); \ | ||
187 | \ | ||
188 | vpshufb RT1, xl ## 0, xl ## 0; \ | ||
189 | vpshufb RT1, xr ## 0, xr ## 0; \ | ||
190 | vpshufb RT1, xl ## 1, xl ## 1; \ | ||
191 | vpshufb RT1, xr ## 1, xr ## 1; \ | ||
192 | vpshufb RT1, xl ## 2, xl ## 2; \ | ||
193 | vpshufb RT1, xr ## 2, xr ## 2; \ | ||
194 | vpshufb RT1, xl ## 3, xl ## 3; \ | ||
195 | vpshufb RT1, xr ## 3, xr ## 3; | ||
196 | |||
197 | .align 8 | ||
198 | __blowfish_enc_blk32: | ||
199 | /* input: | ||
200 | * %rdi: ctx, CTX | ||
201 | * RXl0..4, RXr0..4: plaintext | ||
202 | * output: | ||
203 | * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped) | ||
204 | */ | ||
205 | init_round_constants(); | ||
206 | |||
207 | read_block(RXl, RXr); | ||
208 | |||
209 | movl $1, RLOOPd; | ||
210 | add_roundkey(RXl, p+4*(0)(CTX)); | ||
211 | |||
212 | .align 4 | ||
213 | .L__enc_loop: | ||
214 | round_enc(); | ||
215 | |||
216 | leal 2(RLOOPd), RLOOPd; | ||
217 | cmpl $17, RLOOPd; | ||
218 | jne .L__enc_loop; | ||
219 | |||
220 | add_roundkey(RXr, p+4*(17)(CTX)); | ||
221 | |||
222 | write_block(RXl, RXr); | ||
223 | |||
224 | ret; | ||
225 | ENDPROC(__blowfish_enc_blk32) | ||
226 | |||
227 | .align 8 | ||
228 | __blowfish_dec_blk32: | ||
229 | /* input: | ||
230 | * %rdi: ctx, CTX | ||
231 | * RXl0..4, RXr0..4: ciphertext | ||
232 | * output: | ||
233 | * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped) | ||
234 | */ | ||
235 | init_round_constants(); | ||
236 | |||
237 | read_block(RXl, RXr); | ||
238 | |||
239 | movl $14, RLOOPd; | ||
240 | add_roundkey(RXl, p+4*(17)(CTX)); | ||
241 | |||
242 | .align 4 | ||
243 | .L__dec_loop: | ||
244 | round_dec(); | ||
245 | |||
246 | addl $-2, RLOOPd; | ||
247 | jns .L__dec_loop; | ||
248 | |||
249 | add_roundkey(RXr, p+4*(0)(CTX)); | ||
250 | |||
251 | write_block(RXl, RXr); | ||
252 | |||
253 | ret; | ||
254 | ENDPROC(__blowfish_dec_blk32) | ||
255 | |||
256 | ENTRY(blowfish_ecb_enc_32way) | ||
257 | /* input: | ||
258 | * %rdi: ctx, CTX | ||
259 | * %rsi: dst | ||
260 | * %rdx: src | ||
261 | */ | ||
262 | |||
263 | vzeroupper; | ||
264 | |||
265 | vmovdqu 0*32(%rdx), RXl0; | ||
266 | vmovdqu 1*32(%rdx), RXr0; | ||
267 | vmovdqu 2*32(%rdx), RXl1; | ||
268 | vmovdqu 3*32(%rdx), RXr1; | ||
269 | vmovdqu 4*32(%rdx), RXl2; | ||
270 | vmovdqu 5*32(%rdx), RXr2; | ||
271 | vmovdqu 6*32(%rdx), RXl3; | ||
272 | vmovdqu 7*32(%rdx), RXr3; | ||
273 | |||
274 | call __blowfish_enc_blk32; | ||
275 | |||
276 | vmovdqu RXr0, 0*32(%rsi); | ||
277 | vmovdqu RXl0, 1*32(%rsi); | ||
278 | vmovdqu RXr1, 2*32(%rsi); | ||
279 | vmovdqu RXl1, 3*32(%rsi); | ||
280 | vmovdqu RXr2, 4*32(%rsi); | ||
281 | vmovdqu RXl2, 5*32(%rsi); | ||
282 | vmovdqu RXr3, 6*32(%rsi); | ||
283 | vmovdqu RXl3, 7*32(%rsi); | ||
284 | |||
285 | vzeroupper; | ||
286 | |||
287 | ret; | ||
288 | ENDPROC(blowfish_ecb_enc_32way) | ||
289 | |||
290 | ENTRY(blowfish_ecb_dec_32way) | ||
291 | /* input: | ||
292 | * %rdi: ctx, CTX | ||
293 | * %rsi: dst | ||
294 | * %rdx: src | ||
295 | */ | ||
296 | |||
297 | vzeroupper; | ||
298 | |||
299 | vmovdqu 0*32(%rdx), RXl0; | ||
300 | vmovdqu 1*32(%rdx), RXr0; | ||
301 | vmovdqu 2*32(%rdx), RXl1; | ||
302 | vmovdqu 3*32(%rdx), RXr1; | ||
303 | vmovdqu 4*32(%rdx), RXl2; | ||
304 | vmovdqu 5*32(%rdx), RXr2; | ||
305 | vmovdqu 6*32(%rdx), RXl3; | ||
306 | vmovdqu 7*32(%rdx), RXr3; | ||
307 | |||
308 | call __blowfish_dec_blk32; | ||
309 | |||
310 | vmovdqu RXr0, 0*32(%rsi); | ||
311 | vmovdqu RXl0, 1*32(%rsi); | ||
312 | vmovdqu RXr1, 2*32(%rsi); | ||
313 | vmovdqu RXl1, 3*32(%rsi); | ||
314 | vmovdqu RXr2, 4*32(%rsi); | ||
315 | vmovdqu RXl2, 5*32(%rsi); | ||
316 | vmovdqu RXr3, 6*32(%rsi); | ||
317 | vmovdqu RXl3, 7*32(%rsi); | ||
318 | |||
319 | vzeroupper; | ||
320 | |||
321 | ret; | ||
322 | ENDPROC(blowfish_ecb_dec_32way) | ||
323 | |||
324 | ENTRY(blowfish_cbc_dec_32way) | ||
325 | /* input: | ||
326 | * %rdi: ctx, CTX | ||
327 | * %rsi: dst | ||
328 | * %rdx: src | ||
329 | */ | ||
330 | |||
331 | vzeroupper; | ||
332 | |||
333 | vmovdqu 0*32(%rdx), RXl0; | ||
334 | vmovdqu 1*32(%rdx), RXr0; | ||
335 | vmovdqu 2*32(%rdx), RXl1; | ||
336 | vmovdqu 3*32(%rdx), RXr1; | ||
337 | vmovdqu 4*32(%rdx), RXl2; | ||
338 | vmovdqu 5*32(%rdx), RXr2; | ||
339 | vmovdqu 6*32(%rdx), RXl3; | ||
340 | vmovdqu 7*32(%rdx), RXr3; | ||
341 | |||
342 | call __blowfish_dec_blk32; | ||
343 | |||
344 | /* xor with src */ | ||
345 | vmovq (%rdx), RT0x; | ||
346 | vpshufd $0x4f, RT0x, RT0x; | ||
347 | vinserti128 $1, 8(%rdx), RT0, RT0; | ||
348 | vpxor RT0, RXr0, RXr0; | ||
349 | vpxor 0*32+24(%rdx), RXl0, RXl0; | ||
350 | vpxor 1*32+24(%rdx), RXr1, RXr1; | ||
351 | vpxor 2*32+24(%rdx), RXl1, RXl1; | ||
352 | vpxor 3*32+24(%rdx), RXr2, RXr2; | ||
353 | vpxor 4*32+24(%rdx), RXl2, RXl2; | ||
354 | vpxor 5*32+24(%rdx), RXr3, RXr3; | ||
355 | vpxor 6*32+24(%rdx), RXl3, RXl3; | ||
356 | |||
357 | vmovdqu RXr0, (0*32)(%rsi); | ||
358 | vmovdqu RXl0, (1*32)(%rsi); | ||
359 | vmovdqu RXr1, (2*32)(%rsi); | ||
360 | vmovdqu RXl1, (3*32)(%rsi); | ||
361 | vmovdqu RXr2, (4*32)(%rsi); | ||
362 | vmovdqu RXl2, (5*32)(%rsi); | ||
363 | vmovdqu RXr3, (6*32)(%rsi); | ||
364 | vmovdqu RXl3, (7*32)(%rsi); | ||
365 | |||
366 | vzeroupper; | ||
367 | |||
368 | ret; | ||
369 | ENDPROC(blowfish_cbc_dec_32way) | ||
370 | |||
371 | ENTRY(blowfish_ctr_32way) | ||
372 | /* input: | ||
373 | * %rdi: ctx, CTX | ||
374 | * %rsi: dst | ||
375 | * %rdx: src | ||
376 | * %rcx: iv (big endian, 64bit) | ||
377 | */ | ||
378 | |||
379 | vzeroupper; | ||
380 | |||
381 | vpcmpeqd RT0, RT0, RT0; | ||
382 | vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */ | ||
383 | |||
384 | vpcmpeqd RT1x, RT1x, RT1x; | ||
385 | vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */ | ||
386 | vpxor RIDX0, RIDX0, RIDX0; | ||
387 | vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */ | ||
388 | |||
389 | vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */ | ||
390 | |||
391 | vpcmpeqd RT1, RT1, RT1; | ||
392 | vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */ | ||
393 | vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */ | ||
394 | |||
395 | vbroadcasti128 .Lbswap_iv_mask, RIDX0; | ||
396 | vbroadcasti128 .Lbswap128_mask, RIDX1; | ||
397 | |||
398 | /* load IV and byteswap */ | ||
399 | vmovq (%rcx), RT1x; | ||
400 | vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */ | ||
401 | vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */ | ||
402 | |||
403 | /* construct IVs */ | ||
404 | vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */ | ||
405 | vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */ | ||
406 | vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */ | ||
407 | vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */ | ||
408 | vpsubq RIDX2, RT1, RT1; | ||
409 | vpshufb RIDX1, RT1, RXl1; | ||
410 | vpsubq RIDX2, RT1, RT1; | ||
411 | vpshufb RIDX1, RT1, RXr1; | ||
412 | vpsubq RIDX2, RT1, RT1; | ||
413 | vpshufb RIDX1, RT1, RXl2; | ||
414 | vpsubq RIDX2, RT1, RT1; | ||
415 | vpshufb RIDX1, RT1, RXr2; | ||
416 | vpsubq RIDX2, RT1, RT1; | ||
417 | vpshufb RIDX1, RT1, RXl3; | ||
418 | vpsubq RIDX2, RT1, RT1; | ||
419 | vpshufb RIDX1, RT1, RXr3; | ||
420 | |||
421 | /* store last IV */ | ||
422 | vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */ | ||
423 | vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */ | ||
424 | vmovq RT1x, (%rcx); | ||
425 | |||
426 | call __blowfish_enc_blk32; | ||
427 | |||
428 | /* dst = src ^ iv */ | ||
429 | vpxor 0*32(%rdx), RXr0, RXr0; | ||
430 | vpxor 1*32(%rdx), RXl0, RXl0; | ||
431 | vpxor 2*32(%rdx), RXr1, RXr1; | ||
432 | vpxor 3*32(%rdx), RXl1, RXl1; | ||
433 | vpxor 4*32(%rdx), RXr2, RXr2; | ||
434 | vpxor 5*32(%rdx), RXl2, RXl2; | ||
435 | vpxor 6*32(%rdx), RXr3, RXr3; | ||
436 | vpxor 7*32(%rdx), RXl3, RXl3; | ||
437 | vmovdqu RXr0, (0*32)(%rsi); | ||
438 | vmovdqu RXl0, (1*32)(%rsi); | ||
439 | vmovdqu RXr1, (2*32)(%rsi); | ||
440 | vmovdqu RXl1, (3*32)(%rsi); | ||
441 | vmovdqu RXr2, (4*32)(%rsi); | ||
442 | vmovdqu RXl2, (5*32)(%rsi); | ||
443 | vmovdqu RXr3, (6*32)(%rsi); | ||
444 | vmovdqu RXl3, (7*32)(%rsi); | ||
445 | |||
446 | vzeroupper; | ||
447 | |||
448 | ret; | ||
449 | ENDPROC(blowfish_ctr_32way) | ||
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c new file mode 100644 index 000000000000..4417e9aea78d --- /dev/null +++ b/arch/x86/crypto/blowfish_avx2_glue.c | |||
@@ -0,0 +1,585 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/module.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/crypto.h> | ||
26 | #include <linux/err.h> | ||
27 | #include <crypto/algapi.h> | ||
28 | #include <crypto/blowfish.h> | ||
29 | #include <crypto/cryptd.h> | ||
30 | #include <crypto/ctr.h> | ||
31 | #include <asm/i387.h> | ||
32 | #include <asm/xcr.h> | ||
33 | #include <asm/xsave.h> | ||
34 | #include <asm/crypto/blowfish.h> | ||
35 | #include <asm/crypto/ablk_helper.h> | ||
36 | #include <crypto/scatterwalk.h> | ||
37 | |||
38 | #define BF_AVX2_PARALLEL_BLOCKS 32 | ||
39 | |||
40 | /* 32-way AVX2 parallel cipher functions */ | ||
41 | asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst, | ||
42 | const u8 *src); | ||
43 | asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
44 | const u8 *src); | ||
45 | asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst, | ||
46 | const u8 *src); | ||
47 | asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
48 | __be64 *iv); | ||
49 | |||
50 | static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
51 | { | ||
52 | if (fpu_enabled) | ||
53 | return true; | ||
54 | |||
55 | /* FPU is only used when chunk to be processed is large enough, so | ||
56 | * do not enable FPU until it is necessary. | ||
57 | */ | ||
58 | if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS) | ||
59 | return false; | ||
60 | |||
61 | kernel_fpu_begin(); | ||
62 | return true; | ||
63 | } | ||
64 | |||
65 | static inline void bf_fpu_end(bool fpu_enabled) | ||
66 | { | ||
67 | if (fpu_enabled) | ||
68 | kernel_fpu_end(); | ||
69 | } | ||
70 | |||
71 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
72 | bool enc) | ||
73 | { | ||
74 | bool fpu_enabled = false; | ||
75 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
76 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
77 | unsigned int nbytes; | ||
78 | int err; | ||
79 | |||
80 | err = blkcipher_walk_virt(desc, walk); | ||
81 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
82 | |||
83 | while ((nbytes = walk->nbytes)) { | ||
84 | u8 *wsrc = walk->src.virt.addr; | ||
85 | u8 *wdst = walk->dst.virt.addr; | ||
86 | |||
87 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
88 | |||
89 | /* Process multi-block AVX2 batch */ | ||
90 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
91 | do { | ||
92 | if (enc) | ||
93 | blowfish_ecb_enc_32way(ctx, wdst, wsrc); | ||
94 | else | ||
95 | blowfish_ecb_dec_32way(ctx, wdst, wsrc); | ||
96 | |||
97 | wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
98 | wdst += bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
99 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
100 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
101 | |||
102 | if (nbytes < bsize) | ||
103 | goto done; | ||
104 | } | ||
105 | |||
106 | /* Process multi-block batch */ | ||
107 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
108 | do { | ||
109 | if (enc) | ||
110 | blowfish_enc_blk_4way(ctx, wdst, wsrc); | ||
111 | else | ||
112 | blowfish_dec_blk_4way(ctx, wdst, wsrc); | ||
113 | |||
114 | wsrc += bsize * BF_PARALLEL_BLOCKS; | ||
115 | wdst += bsize * BF_PARALLEL_BLOCKS; | ||
116 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
117 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
118 | |||
119 | if (nbytes < bsize) | ||
120 | goto done; | ||
121 | } | ||
122 | |||
123 | /* Handle leftovers */ | ||
124 | do { | ||
125 | if (enc) | ||
126 | blowfish_enc_blk(ctx, wdst, wsrc); | ||
127 | else | ||
128 | blowfish_dec_blk(ctx, wdst, wsrc); | ||
129 | |||
130 | wsrc += bsize; | ||
131 | wdst += bsize; | ||
132 | nbytes -= bsize; | ||
133 | } while (nbytes >= bsize); | ||
134 | |||
135 | done: | ||
136 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
137 | } | ||
138 | |||
139 | bf_fpu_end(fpu_enabled); | ||
140 | return err; | ||
141 | } | ||
142 | |||
143 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
144 | struct scatterlist *src, unsigned int nbytes) | ||
145 | { | ||
146 | struct blkcipher_walk walk; | ||
147 | |||
148 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
149 | return ecb_crypt(desc, &walk, true); | ||
150 | } | ||
151 | |||
152 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
153 | struct scatterlist *src, unsigned int nbytes) | ||
154 | { | ||
155 | struct blkcipher_walk walk; | ||
156 | |||
157 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
158 | return ecb_crypt(desc, &walk, false); | ||
159 | } | ||
160 | |||
161 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
162 | struct blkcipher_walk *walk) | ||
163 | { | ||
164 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
165 | unsigned int bsize = BF_BLOCK_SIZE; | ||
166 | unsigned int nbytes = walk->nbytes; | ||
167 | u64 *src = (u64 *)walk->src.virt.addr; | ||
168 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
169 | u64 *iv = (u64 *)walk->iv; | ||
170 | |||
171 | do { | ||
172 | *dst = *src ^ *iv; | ||
173 | blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
174 | iv = dst; | ||
175 | |||
176 | src += 1; | ||
177 | dst += 1; | ||
178 | nbytes -= bsize; | ||
179 | } while (nbytes >= bsize); | ||
180 | |||
181 | *(u64 *)walk->iv = *iv; | ||
182 | return nbytes; | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | struct blkcipher_walk walk; | ||
189 | int err; | ||
190 | |||
191 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
192 | err = blkcipher_walk_virt(desc, &walk); | ||
193 | |||
194 | while ((nbytes = walk.nbytes)) { | ||
195 | nbytes = __cbc_encrypt(desc, &walk); | ||
196 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
197 | } | ||
198 | |||
199 | return err; | ||
200 | } | ||
201 | |||
202 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
203 | struct blkcipher_walk *walk) | ||
204 | { | ||
205 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
206 | const unsigned int bsize = BF_BLOCK_SIZE; | ||
207 | unsigned int nbytes = walk->nbytes; | ||
208 | u64 *src = (u64 *)walk->src.virt.addr; | ||
209 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
210 | u64 last_iv; | ||
211 | int i; | ||
212 | |||
213 | /* Start of the last block. */ | ||
214 | src += nbytes / bsize - 1; | ||
215 | dst += nbytes / bsize - 1; | ||
216 | |||
217 | last_iv = *src; | ||
218 | |||
219 | /* Process multi-block AVX2 batch */ | ||
220 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
221 | do { | ||
222 | nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1); | ||
223 | src -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
224 | dst -= BF_AVX2_PARALLEL_BLOCKS - 1; | ||
225 | |||
226 | blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src); | ||
227 | |||
228 | nbytes -= bsize; | ||
229 | if (nbytes < bsize) | ||
230 | goto done; | ||
231 | |||
232 | *dst ^= *(src - 1); | ||
233 | src -= 1; | ||
234 | dst -= 1; | ||
235 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
236 | |||
237 | if (nbytes < bsize) | ||
238 | goto done; | ||
239 | } | ||
240 | |||
241 | /* Process multi-block batch */ | ||
242 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
243 | u64 ivs[BF_PARALLEL_BLOCKS - 1]; | ||
244 | |||
245 | do { | ||
246 | nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1); | ||
247 | src -= BF_PARALLEL_BLOCKS - 1; | ||
248 | dst -= BF_PARALLEL_BLOCKS - 1; | ||
249 | |||
250 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
251 | ivs[i] = src[i]; | ||
252 | |||
253 | blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); | ||
254 | |||
255 | for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++) | ||
256 | dst[i + 1] ^= ivs[i]; | ||
257 | |||
258 | nbytes -= bsize; | ||
259 | if (nbytes < bsize) | ||
260 | goto done; | ||
261 | |||
262 | *dst ^= *(src - 1); | ||
263 | src -= 1; | ||
264 | dst -= 1; | ||
265 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
266 | |||
267 | if (nbytes < bsize) | ||
268 | goto done; | ||
269 | } | ||
270 | |||
271 | /* Handle leftovers */ | ||
272 | for (;;) { | ||
273 | blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
274 | |||
275 | nbytes -= bsize; | ||
276 | if (nbytes < bsize) | ||
277 | break; | ||
278 | |||
279 | *dst ^= *(src - 1); | ||
280 | src -= 1; | ||
281 | dst -= 1; | ||
282 | } | ||
283 | |||
284 | done: | ||
285 | *dst ^= *(u64 *)walk->iv; | ||
286 | *(u64 *)walk->iv = last_iv; | ||
287 | |||
288 | return nbytes; | ||
289 | } | ||
290 | |||
291 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | bool fpu_enabled = false; | ||
295 | struct blkcipher_walk walk; | ||
296 | int err; | ||
297 | |||
298 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
299 | err = blkcipher_walk_virt(desc, &walk); | ||
300 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
301 | |||
302 | while ((nbytes = walk.nbytes)) { | ||
303 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
304 | nbytes = __cbc_decrypt(desc, &walk); | ||
305 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
306 | } | ||
307 | |||
308 | bf_fpu_end(fpu_enabled); | ||
309 | return err; | ||
310 | } | ||
311 | |||
312 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
313 | struct blkcipher_walk *walk) | ||
314 | { | ||
315 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
316 | u8 *ctrblk = walk->iv; | ||
317 | u8 keystream[BF_BLOCK_SIZE]; | ||
318 | u8 *src = walk->src.virt.addr; | ||
319 | u8 *dst = walk->dst.virt.addr; | ||
320 | unsigned int nbytes = walk->nbytes; | ||
321 | |||
322 | blowfish_enc_blk(ctx, keystream, ctrblk); | ||
323 | crypto_xor(keystream, src, nbytes); | ||
324 | memcpy(dst, keystream, nbytes); | ||
325 | |||
326 | crypto_inc(ctrblk, BF_BLOCK_SIZE); | ||
327 | } | ||
328 | |||
329 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
330 | struct blkcipher_walk *walk) | ||
331 | { | ||
332 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | unsigned int bsize = BF_BLOCK_SIZE; | ||
334 | unsigned int nbytes = walk->nbytes; | ||
335 | u64 *src = (u64 *)walk->src.virt.addr; | ||
336 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
337 | int i; | ||
338 | |||
339 | /* Process multi-block AVX2 batch */ | ||
340 | if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) { | ||
341 | do { | ||
342 | blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src, | ||
343 | (__be64 *)walk->iv); | ||
344 | |||
345 | src += BF_AVX2_PARALLEL_BLOCKS; | ||
346 | dst += BF_AVX2_PARALLEL_BLOCKS; | ||
347 | nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS; | ||
348 | } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS); | ||
349 | |||
350 | if (nbytes < bsize) | ||
351 | goto done; | ||
352 | } | ||
353 | |||
354 | /* Process four block batch */ | ||
355 | if (nbytes >= bsize * BF_PARALLEL_BLOCKS) { | ||
356 | __be64 ctrblocks[BF_PARALLEL_BLOCKS]; | ||
357 | u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); | ||
358 | |||
359 | do { | ||
360 | /* create ctrblks for parallel encrypt */ | ||
361 | for (i = 0; i < BF_PARALLEL_BLOCKS; i++) { | ||
362 | if (dst != src) | ||
363 | dst[i] = src[i]; | ||
364 | |||
365 | ctrblocks[i] = cpu_to_be64(ctrblk++); | ||
366 | } | ||
367 | |||
368 | blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, | ||
369 | (u8 *)ctrblocks); | ||
370 | |||
371 | src += BF_PARALLEL_BLOCKS; | ||
372 | dst += BF_PARALLEL_BLOCKS; | ||
373 | nbytes -= bsize * BF_PARALLEL_BLOCKS; | ||
374 | } while (nbytes >= bsize * BF_PARALLEL_BLOCKS); | ||
375 | |||
376 | *(__be64 *)walk->iv = cpu_to_be64(ctrblk); | ||
377 | |||
378 | if (nbytes < bsize) | ||
379 | goto done; | ||
380 | } | ||
381 | |||
382 | /* Handle leftovers */ | ||
383 | do { | ||
384 | u64 ctrblk; | ||
385 | |||
386 | if (dst != src) | ||
387 | *dst = *src; | ||
388 | |||
389 | ctrblk = *(u64 *)walk->iv; | ||
390 | be64_add_cpu((__be64 *)walk->iv, 1); | ||
391 | |||
392 | blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); | ||
393 | |||
394 | src += 1; | ||
395 | dst += 1; | ||
396 | } while ((nbytes -= bsize) >= bsize); | ||
397 | |||
398 | done: | ||
399 | return nbytes; | ||
400 | } | ||
401 | |||
402 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
403 | struct scatterlist *src, unsigned int nbytes) | ||
404 | { | ||
405 | bool fpu_enabled = false; | ||
406 | struct blkcipher_walk walk; | ||
407 | int err; | ||
408 | |||
409 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
410 | err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); | ||
411 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
412 | |||
413 | while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { | ||
414 | fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes); | ||
415 | nbytes = __ctr_crypt(desc, &walk); | ||
416 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
417 | } | ||
418 | |||
419 | bf_fpu_end(fpu_enabled); | ||
420 | |||
421 | if (walk.nbytes) { | ||
422 | ctr_crypt_final(desc, &walk); | ||
423 | err = blkcipher_walk_done(desc, &walk, 0); | ||
424 | } | ||
425 | |||
426 | return err; | ||
427 | } | ||
428 | |||
429 | static struct crypto_alg bf_algs[6] = { { | ||
430 | .cra_name = "__ecb-blowfish-avx2", | ||
431 | .cra_driver_name = "__driver-ecb-blowfish-avx2", | ||
432 | .cra_priority = 0, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
434 | .cra_blocksize = BF_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
436 | .cra_alignmask = 0, | ||
437 | .cra_type = &crypto_blkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_u = { | ||
440 | .blkcipher = { | ||
441 | .min_keysize = BF_MIN_KEY_SIZE, | ||
442 | .max_keysize = BF_MAX_KEY_SIZE, | ||
443 | .setkey = blowfish_setkey, | ||
444 | .encrypt = ecb_encrypt, | ||
445 | .decrypt = ecb_decrypt, | ||
446 | }, | ||
447 | }, | ||
448 | }, { | ||
449 | .cra_name = "__cbc-blowfish-avx2", | ||
450 | .cra_driver_name = "__driver-cbc-blowfish-avx2", | ||
451 | .cra_priority = 0, | ||
452 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
453 | .cra_blocksize = BF_BLOCK_SIZE, | ||
454 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
455 | .cra_alignmask = 0, | ||
456 | .cra_type = &crypto_blkcipher_type, | ||
457 | .cra_module = THIS_MODULE, | ||
458 | .cra_u = { | ||
459 | .blkcipher = { | ||
460 | .min_keysize = BF_MIN_KEY_SIZE, | ||
461 | .max_keysize = BF_MAX_KEY_SIZE, | ||
462 | .setkey = blowfish_setkey, | ||
463 | .encrypt = cbc_encrypt, | ||
464 | .decrypt = cbc_decrypt, | ||
465 | }, | ||
466 | }, | ||
467 | }, { | ||
468 | .cra_name = "__ctr-blowfish-avx2", | ||
469 | .cra_driver_name = "__driver-ctr-blowfish-avx2", | ||
470 | .cra_priority = 0, | ||
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
472 | .cra_blocksize = 1, | ||
473 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
474 | .cra_alignmask = 0, | ||
475 | .cra_type = &crypto_blkcipher_type, | ||
476 | .cra_module = THIS_MODULE, | ||
477 | .cra_u = { | ||
478 | .blkcipher = { | ||
479 | .min_keysize = BF_MIN_KEY_SIZE, | ||
480 | .max_keysize = BF_MAX_KEY_SIZE, | ||
481 | .ivsize = BF_BLOCK_SIZE, | ||
482 | .setkey = blowfish_setkey, | ||
483 | .encrypt = ctr_crypt, | ||
484 | .decrypt = ctr_crypt, | ||
485 | }, | ||
486 | }, | ||
487 | }, { | ||
488 | .cra_name = "ecb(blowfish)", | ||
489 | .cra_driver_name = "ecb-blowfish-avx2", | ||
490 | .cra_priority = 400, | ||
491 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
492 | .cra_blocksize = BF_BLOCK_SIZE, | ||
493 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
494 | .cra_alignmask = 0, | ||
495 | .cra_type = &crypto_ablkcipher_type, | ||
496 | .cra_module = THIS_MODULE, | ||
497 | .cra_init = ablk_init, | ||
498 | .cra_exit = ablk_exit, | ||
499 | .cra_u = { | ||
500 | .ablkcipher = { | ||
501 | .min_keysize = BF_MIN_KEY_SIZE, | ||
502 | .max_keysize = BF_MAX_KEY_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_decrypt, | ||
506 | }, | ||
507 | }, | ||
508 | }, { | ||
509 | .cra_name = "cbc(blowfish)", | ||
510 | .cra_driver_name = "cbc-blowfish-avx2", | ||
511 | .cra_priority = 400, | ||
512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
513 | .cra_blocksize = BF_BLOCK_SIZE, | ||
514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
515 | .cra_alignmask = 0, | ||
516 | .cra_type = &crypto_ablkcipher_type, | ||
517 | .cra_module = THIS_MODULE, | ||
518 | .cra_init = ablk_init, | ||
519 | .cra_exit = ablk_exit, | ||
520 | .cra_u = { | ||
521 | .ablkcipher = { | ||
522 | .min_keysize = BF_MIN_KEY_SIZE, | ||
523 | .max_keysize = BF_MAX_KEY_SIZE, | ||
524 | .ivsize = BF_BLOCK_SIZE, | ||
525 | .setkey = ablk_set_key, | ||
526 | .encrypt = __ablk_encrypt, | ||
527 | .decrypt = ablk_decrypt, | ||
528 | }, | ||
529 | }, | ||
530 | }, { | ||
531 | .cra_name = "ctr(blowfish)", | ||
532 | .cra_driver_name = "ctr-blowfish-avx2", | ||
533 | .cra_priority = 400, | ||
534 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
535 | .cra_blocksize = 1, | ||
536 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
537 | .cra_alignmask = 0, | ||
538 | .cra_type = &crypto_ablkcipher_type, | ||
539 | .cra_module = THIS_MODULE, | ||
540 | .cra_init = ablk_init, | ||
541 | .cra_exit = ablk_exit, | ||
542 | .cra_u = { | ||
543 | .ablkcipher = { | ||
544 | .min_keysize = BF_MIN_KEY_SIZE, | ||
545 | .max_keysize = BF_MAX_KEY_SIZE, | ||
546 | .ivsize = BF_BLOCK_SIZE, | ||
547 | .setkey = ablk_set_key, | ||
548 | .encrypt = ablk_encrypt, | ||
549 | .decrypt = ablk_encrypt, | ||
550 | .geniv = "chainiv", | ||
551 | }, | ||
552 | }, | ||
553 | } }; | ||
554 | |||
555 | |||
556 | static int __init init(void) | ||
557 | { | ||
558 | u64 xcr0; | ||
559 | |||
560 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
561 | pr_info("AVX2 instructions are not detected.\n"); | ||
562 | return -ENODEV; | ||
563 | } | ||
564 | |||
565 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
566 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
567 | pr_info("AVX detected but unusable.\n"); | ||
568 | return -ENODEV; | ||
569 | } | ||
570 | |||
571 | return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
572 | } | ||
573 | |||
574 | static void __exit fini(void) | ||
575 | { | ||
576 | crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
577 | } | ||
578 | |||
579 | module_init(init); | ||
580 | module_exit(fini); | ||
581 | |||
582 | MODULE_LICENSE("GPL"); | ||
583 | MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized"); | ||
584 | MODULE_ALIAS("blowfish"); | ||
585 | MODULE_ALIAS("blowfish-asm"); | ||
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333b70e6..3548d76dbaa9 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Glue Code for assembler optimized version of Blowfish | 2 | * Glue Code for assembler optimized version of Blowfish |
3 | * | 3 | * |
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
@@ -32,40 +32,24 @@ | |||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <linux/types.h> | 33 | #include <linux/types.h> |
34 | #include <crypto/algapi.h> | 34 | #include <crypto/algapi.h> |
35 | #include <asm/crypto/blowfish.h> | ||
35 | 36 | ||
36 | /* regular block cipher functions */ | 37 | /* regular block cipher functions */ |
37 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | 38 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, |
38 | bool xor); | 39 | bool xor); |
40 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk); | ||
41 | |||
39 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | 42 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); |
43 | EXPORT_SYMBOL_GPL(blowfish_dec_blk); | ||
40 | 44 | ||
41 | /* 4-way parallel cipher functions */ | 45 | /* 4-way parallel cipher functions */ |
42 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | 46 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, |
43 | const u8 *src, bool xor); | 47 | const u8 *src, bool xor); |
48 | EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way); | ||
49 | |||
44 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | 50 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, |
45 | const u8 *src); | 51 | const u8 *src); |
46 | 52 | EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way); | |
47 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
48 | { | ||
49 | __blowfish_enc_blk(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
53 | const u8 *src) | ||
54 | { | ||
55 | __blowfish_enc_blk(ctx, dst, src, true); | ||
56 | } | ||
57 | |||
58 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
59 | const u8 *src) | ||
60 | { | ||
61 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
62 | } | ||
63 | |||
64 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
65 | const u8 *src) | ||
66 | { | ||
67 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
68 | } | ||
69 | 53 | ||
70 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 54 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
71 | { | 55 | { |
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index cfc163469c71..ce71f9212409 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * x86_64/AVX/AES-NI assembler implementation of Camellia | 2 | * x86_64/AVX/AES-NI assembler implementation of Camellia |
3 | * | 3 | * |
4 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -589,6 +589,10 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | |||
589 | .Lbswap128_mask: | 589 | .Lbswap128_mask: |
590 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 590 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
591 | 591 | ||
592 | /* For XTS mode IV generation */ | ||
593 | .Lxts_gf128mul_and_shl1_mask: | ||
594 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
595 | |||
592 | /* | 596 | /* |
593 | * pre-SubByte transform | 597 | * pre-SubByte transform |
594 | * | 598 | * |
@@ -1090,3 +1094,177 @@ ENTRY(camellia_ctr_16way) | |||
1090 | 1094 | ||
1091 | ret; | 1095 | ret; |
1092 | ENDPROC(camellia_ctr_16way) | 1096 | ENDPROC(camellia_ctr_16way) |
1097 | |||
1098 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
1099 | vpsrad $31, iv, tmp; \ | ||
1100 | vpaddq iv, iv, iv; \ | ||
1101 | vpshufd $0x13, tmp, tmp; \ | ||
1102 | vpand mask, tmp, tmp; \ | ||
1103 | vpxor tmp, iv, iv; | ||
1104 | |||
1105 | .align 8 | ||
1106 | camellia_xts_crypt_16way: | ||
1107 | /* input: | ||
1108 | * %rdi: ctx, CTX | ||
1109 | * %rsi: dst (16 blocks) | ||
1110 | * %rdx: src (16 blocks) | ||
1111 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1112 | * %r8: index for input whitening key | ||
1113 | * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16 | ||
1114 | */ | ||
1115 | |||
1116 | subq $(16 * 16), %rsp; | ||
1117 | movq %rsp, %rax; | ||
1118 | |||
1119 | vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14; | ||
1120 | |||
1121 | /* load IV */ | ||
1122 | vmovdqu (%rcx), %xmm0; | ||
1123 | vpxor 0 * 16(%rdx), %xmm0, %xmm15; | ||
1124 | vmovdqu %xmm15, 15 * 16(%rax); | ||
1125 | vmovdqu %xmm0, 0 * 16(%rsi); | ||
1126 | |||
1127 | /* construct IVs */ | ||
1128 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1129 | vpxor 1 * 16(%rdx), %xmm0, %xmm15; | ||
1130 | vmovdqu %xmm15, 14 * 16(%rax); | ||
1131 | vmovdqu %xmm0, 1 * 16(%rsi); | ||
1132 | |||
1133 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1134 | vpxor 2 * 16(%rdx), %xmm0, %xmm13; | ||
1135 | vmovdqu %xmm0, 2 * 16(%rsi); | ||
1136 | |||
1137 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1138 | vpxor 3 * 16(%rdx), %xmm0, %xmm12; | ||
1139 | vmovdqu %xmm0, 3 * 16(%rsi); | ||
1140 | |||
1141 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1142 | vpxor 4 * 16(%rdx), %xmm0, %xmm11; | ||
1143 | vmovdqu %xmm0, 4 * 16(%rsi); | ||
1144 | |||
1145 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1146 | vpxor 5 * 16(%rdx), %xmm0, %xmm10; | ||
1147 | vmovdqu %xmm0, 5 * 16(%rsi); | ||
1148 | |||
1149 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1150 | vpxor 6 * 16(%rdx), %xmm0, %xmm9; | ||
1151 | vmovdqu %xmm0, 6 * 16(%rsi); | ||
1152 | |||
1153 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1154 | vpxor 7 * 16(%rdx), %xmm0, %xmm8; | ||
1155 | vmovdqu %xmm0, 7 * 16(%rsi); | ||
1156 | |||
1157 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1158 | vpxor 8 * 16(%rdx), %xmm0, %xmm7; | ||
1159 | vmovdqu %xmm0, 8 * 16(%rsi); | ||
1160 | |||
1161 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1162 | vpxor 9 * 16(%rdx), %xmm0, %xmm6; | ||
1163 | vmovdqu %xmm0, 9 * 16(%rsi); | ||
1164 | |||
1165 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1166 | vpxor 10 * 16(%rdx), %xmm0, %xmm5; | ||
1167 | vmovdqu %xmm0, 10 * 16(%rsi); | ||
1168 | |||
1169 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1170 | vpxor 11 * 16(%rdx), %xmm0, %xmm4; | ||
1171 | vmovdqu %xmm0, 11 * 16(%rsi); | ||
1172 | |||
1173 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1174 | vpxor 12 * 16(%rdx), %xmm0, %xmm3; | ||
1175 | vmovdqu %xmm0, 12 * 16(%rsi); | ||
1176 | |||
1177 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1178 | vpxor 13 * 16(%rdx), %xmm0, %xmm2; | ||
1179 | vmovdqu %xmm0, 13 * 16(%rsi); | ||
1180 | |||
1181 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1182 | vpxor 14 * 16(%rdx), %xmm0, %xmm1; | ||
1183 | vmovdqu %xmm0, 14 * 16(%rsi); | ||
1184 | |||
1185 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1186 | vpxor 15 * 16(%rdx), %xmm0, %xmm15; | ||
1187 | vmovdqu %xmm15, 0 * 16(%rax); | ||
1188 | vmovdqu %xmm0, 15 * 16(%rsi); | ||
1189 | |||
1190 | gf128mul_x_ble(%xmm0, %xmm14, %xmm15); | ||
1191 | vmovdqu %xmm0, (%rcx); | ||
1192 | |||
1193 | /* inpack16_pre: */ | ||
1194 | vmovq (key_table)(CTX, %r8, 8), %xmm15; | ||
1195 | vpshufb .Lpack_bswap, %xmm15, %xmm15; | ||
1196 | vpxor 0 * 16(%rax), %xmm15, %xmm0; | ||
1197 | vpxor %xmm1, %xmm15, %xmm1; | ||
1198 | vpxor %xmm2, %xmm15, %xmm2; | ||
1199 | vpxor %xmm3, %xmm15, %xmm3; | ||
1200 | vpxor %xmm4, %xmm15, %xmm4; | ||
1201 | vpxor %xmm5, %xmm15, %xmm5; | ||
1202 | vpxor %xmm6, %xmm15, %xmm6; | ||
1203 | vpxor %xmm7, %xmm15, %xmm7; | ||
1204 | vpxor %xmm8, %xmm15, %xmm8; | ||
1205 | vpxor %xmm9, %xmm15, %xmm9; | ||
1206 | vpxor %xmm10, %xmm15, %xmm10; | ||
1207 | vpxor %xmm11, %xmm15, %xmm11; | ||
1208 | vpxor %xmm12, %xmm15, %xmm12; | ||
1209 | vpxor %xmm13, %xmm15, %xmm13; | ||
1210 | vpxor 14 * 16(%rax), %xmm15, %xmm14; | ||
1211 | vpxor 15 * 16(%rax), %xmm15, %xmm15; | ||
1212 | |||
1213 | call *%r9; | ||
1214 | |||
1215 | addq $(16 * 16), %rsp; | ||
1216 | |||
1217 | vpxor 0 * 16(%rsi), %xmm7, %xmm7; | ||
1218 | vpxor 1 * 16(%rsi), %xmm6, %xmm6; | ||
1219 | vpxor 2 * 16(%rsi), %xmm5, %xmm5; | ||
1220 | vpxor 3 * 16(%rsi), %xmm4, %xmm4; | ||
1221 | vpxor 4 * 16(%rsi), %xmm3, %xmm3; | ||
1222 | vpxor 5 * 16(%rsi), %xmm2, %xmm2; | ||
1223 | vpxor 6 * 16(%rsi), %xmm1, %xmm1; | ||
1224 | vpxor 7 * 16(%rsi), %xmm0, %xmm0; | ||
1225 | vpxor 8 * 16(%rsi), %xmm15, %xmm15; | ||
1226 | vpxor 9 * 16(%rsi), %xmm14, %xmm14; | ||
1227 | vpxor 10 * 16(%rsi), %xmm13, %xmm13; | ||
1228 | vpxor 11 * 16(%rsi), %xmm12, %xmm12; | ||
1229 | vpxor 12 * 16(%rsi), %xmm11, %xmm11; | ||
1230 | vpxor 13 * 16(%rsi), %xmm10, %xmm10; | ||
1231 | vpxor 14 * 16(%rsi), %xmm9, %xmm9; | ||
1232 | vpxor 15 * 16(%rsi), %xmm8, %xmm8; | ||
1233 | write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, | ||
1234 | %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, | ||
1235 | %xmm8, %rsi); | ||
1236 | |||
1237 | ret; | ||
1238 | ENDPROC(camellia_xts_crypt_16way) | ||
1239 | |||
1240 | ENTRY(camellia_xts_enc_16way) | ||
1241 | /* input: | ||
1242 | * %rdi: ctx, CTX | ||
1243 | * %rsi: dst (16 blocks) | ||
1244 | * %rdx: src (16 blocks) | ||
1245 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1246 | */ | ||
1247 | xorl %r8d, %r8d; /* input whitening key, 0 for enc */ | ||
1248 | |||
1249 | leaq __camellia_enc_blk16, %r9; | ||
1250 | |||
1251 | jmp camellia_xts_crypt_16way; | ||
1252 | ENDPROC(camellia_xts_enc_16way) | ||
1253 | |||
1254 | ENTRY(camellia_xts_dec_16way) | ||
1255 | /* input: | ||
1256 | * %rdi: ctx, CTX | ||
1257 | * %rsi: dst (16 blocks) | ||
1258 | * %rdx: src (16 blocks) | ||
1259 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1260 | */ | ||
1261 | |||
1262 | cmpl $16, key_length(CTX); | ||
1263 | movl $32, %r8d; | ||
1264 | movl $24, %eax; | ||
1265 | cmovel %eax, %r8d; /* input whitening key, last for dec */ | ||
1266 | |||
1267 | leaq __camellia_dec_blk16, %r9; | ||
1268 | |||
1269 | jmp camellia_xts_crypt_16way; | ||
1270 | ENDPROC(camellia_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S new file mode 100644 index 000000000000..91a1878fcc3e --- /dev/null +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S | |||
@@ -0,0 +1,1368 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2/AES-NI assembler implementation of Camellia | ||
3 | * | ||
4 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | |||
15 | #define CAMELLIA_TABLE_BYTE_LEN 272 | ||
16 | |||
17 | /* struct camellia_ctx: */ | ||
18 | #define key_table 0 | ||
19 | #define key_length CAMELLIA_TABLE_BYTE_LEN | ||
20 | |||
21 | /* register macros */ | ||
22 | #define CTX %rdi | ||
23 | #define RIO %r8 | ||
24 | |||
25 | /********************************************************************** | ||
26 | helper macros | ||
27 | **********************************************************************/ | ||
28 | #define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ | ||
29 | vpand x, mask4bit, tmp0; \ | ||
30 | vpandn x, mask4bit, x; \ | ||
31 | vpsrld $4, x, x; \ | ||
32 | \ | ||
33 | vpshufb tmp0, lo_t, tmp0; \ | ||
34 | vpshufb x, hi_t, x; \ | ||
35 | vpxor tmp0, x, x; | ||
36 | |||
37 | #define ymm0_x xmm0 | ||
38 | #define ymm1_x xmm1 | ||
39 | #define ymm2_x xmm2 | ||
40 | #define ymm3_x xmm3 | ||
41 | #define ymm4_x xmm4 | ||
42 | #define ymm5_x xmm5 | ||
43 | #define ymm6_x xmm6 | ||
44 | #define ymm7_x xmm7 | ||
45 | #define ymm8_x xmm8 | ||
46 | #define ymm9_x xmm9 | ||
47 | #define ymm10_x xmm10 | ||
48 | #define ymm11_x xmm11 | ||
49 | #define ymm12_x xmm12 | ||
50 | #define ymm13_x xmm13 | ||
51 | #define ymm14_x xmm14 | ||
52 | #define ymm15_x xmm15 | ||
53 | |||
54 | /* | ||
55 | * AES-NI instructions do not support ymmX registers, so we need splitting and | ||
56 | * merging. | ||
57 | */ | ||
58 | #define vaesenclast256(zero, yreg, tmp) \ | ||
59 | vextracti128 $1, yreg, tmp##_x; \ | ||
60 | vaesenclast zero##_x, yreg##_x, yreg##_x; \ | ||
61 | vaesenclast zero##_x, tmp##_x, tmp##_x; \ | ||
62 | vinserti128 $1, tmp##_x, yreg, yreg; | ||
63 | |||
64 | /********************************************************************** | ||
65 | 32-way camellia | ||
66 | **********************************************************************/ | ||
67 | |||
68 | /* | ||
69 | * IN: | ||
70 | * x0..x7: byte-sliced AB state | ||
71 | * mem_cd: register pointer storing CD state | ||
72 | * key: index for key material | ||
73 | * OUT: | ||
74 | * x0..x7: new byte-sliced CD state | ||
75 | */ | ||
76 | #define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ | ||
77 | t7, mem_cd, key) \ | ||
78 | /* \ | ||
79 | * S-function with AES subbytes \ | ||
80 | */ \ | ||
81 | vbroadcasti128 .Linv_shift_row, t4; \ | ||
82 | vpbroadcastb .L0f0f0f0f, t7; \ | ||
83 | vbroadcasti128 .Lpre_tf_lo_s1, t0; \ | ||
84 | vbroadcasti128 .Lpre_tf_hi_s1, t1; \ | ||
85 | \ | ||
86 | /* AES inverse shift rows */ \ | ||
87 | vpshufb t4, x0, x0; \ | ||
88 | vpshufb t4, x7, x7; \ | ||
89 | vpshufb t4, x1, x1; \ | ||
90 | vpshufb t4, x4, x4; \ | ||
91 | vpshufb t4, x2, x2; \ | ||
92 | vpshufb t4, x5, x5; \ | ||
93 | vpshufb t4, x3, x3; \ | ||
94 | vpshufb t4, x6, x6; \ | ||
95 | \ | ||
96 | /* prefilter sboxes 1, 2 and 3 */ \ | ||
97 | vbroadcasti128 .Lpre_tf_lo_s4, t2; \ | ||
98 | vbroadcasti128 .Lpre_tf_hi_s4, t3; \ | ||
99 | filter_8bit(x0, t0, t1, t7, t6); \ | ||
100 | filter_8bit(x7, t0, t1, t7, t6); \ | ||
101 | filter_8bit(x1, t0, t1, t7, t6); \ | ||
102 | filter_8bit(x4, t0, t1, t7, t6); \ | ||
103 | filter_8bit(x2, t0, t1, t7, t6); \ | ||
104 | filter_8bit(x5, t0, t1, t7, t6); \ | ||
105 | \ | ||
106 | /* prefilter sbox 4 */ \ | ||
107 | vpxor t4##_x, t4##_x, t4##_x; \ | ||
108 | filter_8bit(x3, t2, t3, t7, t6); \ | ||
109 | filter_8bit(x6, t2, t3, t7, t6); \ | ||
110 | \ | ||
111 | /* AES subbytes + AES shift rows */ \ | ||
112 | vbroadcasti128 .Lpost_tf_lo_s1, t0; \ | ||
113 | vbroadcasti128 .Lpost_tf_hi_s1, t1; \ | ||
114 | vaesenclast256(t4, x0, t5); \ | ||
115 | vaesenclast256(t4, x7, t5); \ | ||
116 | vaesenclast256(t4, x1, t5); \ | ||
117 | vaesenclast256(t4, x4, t5); \ | ||
118 | vaesenclast256(t4, x2, t5); \ | ||
119 | vaesenclast256(t4, x5, t5); \ | ||
120 | vaesenclast256(t4, x3, t5); \ | ||
121 | vaesenclast256(t4, x6, t5); \ | ||
122 | \ | ||
123 | /* postfilter sboxes 1 and 4 */ \ | ||
124 | vbroadcasti128 .Lpost_tf_lo_s3, t2; \ | ||
125 | vbroadcasti128 .Lpost_tf_hi_s3, t3; \ | ||
126 | filter_8bit(x0, t0, t1, t7, t6); \ | ||
127 | filter_8bit(x7, t0, t1, t7, t6); \ | ||
128 | filter_8bit(x3, t0, t1, t7, t6); \ | ||
129 | filter_8bit(x6, t0, t1, t7, t6); \ | ||
130 | \ | ||
131 | /* postfilter sbox 3 */ \ | ||
132 | vbroadcasti128 .Lpost_tf_lo_s2, t4; \ | ||
133 | vbroadcasti128 .Lpost_tf_hi_s2, t5; \ | ||
134 | filter_8bit(x2, t2, t3, t7, t6); \ | ||
135 | filter_8bit(x5, t2, t3, t7, t6); \ | ||
136 | \ | ||
137 | vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \ | ||
138 | \ | ||
139 | /* postfilter sbox 2 */ \ | ||
140 | filter_8bit(x1, t4, t5, t7, t2); \ | ||
141 | filter_8bit(x4, t4, t5, t7, t2); \ | ||
142 | \ | ||
143 | vpsrldq $1, t0, t1; \ | ||
144 | vpsrldq $2, t0, t2; \ | ||
145 | vpsrldq $3, t0, t3; \ | ||
146 | vpsrldq $4, t0, t4; \ | ||
147 | vpsrldq $5, t0, t5; \ | ||
148 | vpsrldq $6, t0, t6; \ | ||
149 | vpsrldq $7, t0, t7; \ | ||
150 | vpbroadcastb t0##_x, t0; \ | ||
151 | vpbroadcastb t1##_x, t1; \ | ||
152 | vpbroadcastb t2##_x, t2; \ | ||
153 | vpbroadcastb t3##_x, t3; \ | ||
154 | vpbroadcastb t4##_x, t4; \ | ||
155 | vpbroadcastb t6##_x, t6; \ | ||
156 | vpbroadcastb t5##_x, t5; \ | ||
157 | vpbroadcastb t7##_x, t7; \ | ||
158 | \ | ||
159 | /* P-function */ \ | ||
160 | vpxor x5, x0, x0; \ | ||
161 | vpxor x6, x1, x1; \ | ||
162 | vpxor x7, x2, x2; \ | ||
163 | vpxor x4, x3, x3; \ | ||
164 | \ | ||
165 | vpxor x2, x4, x4; \ | ||
166 | vpxor x3, x5, x5; \ | ||
167 | vpxor x0, x6, x6; \ | ||
168 | vpxor x1, x7, x7; \ | ||
169 | \ | ||
170 | vpxor x7, x0, x0; \ | ||
171 | vpxor x4, x1, x1; \ | ||
172 | vpxor x5, x2, x2; \ | ||
173 | vpxor x6, x3, x3; \ | ||
174 | \ | ||
175 | vpxor x3, x4, x4; \ | ||
176 | vpxor x0, x5, x5; \ | ||
177 | vpxor x1, x6, x6; \ | ||
178 | vpxor x2, x7, x7; /* note: high and low parts swapped */ \ | ||
179 | \ | ||
180 | /* Add key material and result to CD (x becomes new CD) */ \ | ||
181 | \ | ||
182 | vpxor t7, x0, x0; \ | ||
183 | vpxor 4 * 32(mem_cd), x0, x0; \ | ||
184 | \ | ||
185 | vpxor t6, x1, x1; \ | ||
186 | vpxor 5 * 32(mem_cd), x1, x1; \ | ||
187 | \ | ||
188 | vpxor t5, x2, x2; \ | ||
189 | vpxor 6 * 32(mem_cd), x2, x2; \ | ||
190 | \ | ||
191 | vpxor t4, x3, x3; \ | ||
192 | vpxor 7 * 32(mem_cd), x3, x3; \ | ||
193 | \ | ||
194 | vpxor t3, x4, x4; \ | ||
195 | vpxor 0 * 32(mem_cd), x4, x4; \ | ||
196 | \ | ||
197 | vpxor t2, x5, x5; \ | ||
198 | vpxor 1 * 32(mem_cd), x5, x5; \ | ||
199 | \ | ||
200 | vpxor t1, x6, x6; \ | ||
201 | vpxor 2 * 32(mem_cd), x6, x6; \ | ||
202 | \ | ||
203 | vpxor t0, x7, x7; \ | ||
204 | vpxor 3 * 32(mem_cd), x7, x7; | ||
205 | |||
206 | /* | ||
207 | * Size optimization... with inlined roundsm16 binary would be over 5 times | ||
208 | * larger and would only marginally faster. | ||
209 | */ | ||
210 | .align 8 | ||
211 | roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: | ||
212 | roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
213 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, | ||
214 | %rcx, (%r9)); | ||
215 | ret; | ||
216 | ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) | ||
217 | |||
218 | .align 8 | ||
219 | roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | ||
220 | roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, | ||
221 | %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, | ||
222 | %rax, (%r9)); | ||
223 | ret; | ||
224 | ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | ||
225 | |||
226 | /* | ||
227 | * IN/OUT: | ||
228 | * x0..x7: byte-sliced AB state preloaded | ||
229 | * mem_ab: byte-sliced AB state in memory | ||
230 | * mem_cb: byte-sliced CD state in memory | ||
231 | */ | ||
232 | #define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
233 | y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ | ||
234 | leaq (key_table + (i) * 8)(CTX), %r9; \ | ||
235 | call roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \ | ||
236 | \ | ||
237 | vmovdqu x0, 4 * 32(mem_cd); \ | ||
238 | vmovdqu x1, 5 * 32(mem_cd); \ | ||
239 | vmovdqu x2, 6 * 32(mem_cd); \ | ||
240 | vmovdqu x3, 7 * 32(mem_cd); \ | ||
241 | vmovdqu x4, 0 * 32(mem_cd); \ | ||
242 | vmovdqu x5, 1 * 32(mem_cd); \ | ||
243 | vmovdqu x6, 2 * 32(mem_cd); \ | ||
244 | vmovdqu x7, 3 * 32(mem_cd); \ | ||
245 | \ | ||
246 | leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \ | ||
247 | call roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \ | ||
248 | \ | ||
249 | store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); | ||
250 | |||
251 | #define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ | ||
252 | |||
253 | #define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ | ||
254 | /* Store new AB state */ \ | ||
255 | vmovdqu x4, 4 * 32(mem_ab); \ | ||
256 | vmovdqu x5, 5 * 32(mem_ab); \ | ||
257 | vmovdqu x6, 6 * 32(mem_ab); \ | ||
258 | vmovdqu x7, 7 * 32(mem_ab); \ | ||
259 | vmovdqu x0, 0 * 32(mem_ab); \ | ||
260 | vmovdqu x1, 1 * 32(mem_ab); \ | ||
261 | vmovdqu x2, 2 * 32(mem_ab); \ | ||
262 | vmovdqu x3, 3 * 32(mem_ab); | ||
263 | |||
264 | #define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
265 | y6, y7, mem_ab, mem_cd, i) \ | ||
266 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
267 | y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ | ||
268 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
269 | y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ | ||
270 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
271 | y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); | ||
272 | |||
273 | #define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
274 | y6, y7, mem_ab, mem_cd, i) \ | ||
275 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
276 | y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ | ||
277 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
278 | y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ | ||
279 | two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
280 | y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); | ||
281 | |||
282 | /* | ||
283 | * IN: | ||
284 | * v0..3: byte-sliced 32-bit integers | ||
285 | * OUT: | ||
286 | * v0..3: (IN <<< 1) | ||
287 | */ | ||
288 | #define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \ | ||
289 | vpcmpgtb v0, zero, t0; \ | ||
290 | vpaddb v0, v0, v0; \ | ||
291 | vpabsb t0, t0; \ | ||
292 | \ | ||
293 | vpcmpgtb v1, zero, t1; \ | ||
294 | vpaddb v1, v1, v1; \ | ||
295 | vpabsb t1, t1; \ | ||
296 | \ | ||
297 | vpcmpgtb v2, zero, t2; \ | ||
298 | vpaddb v2, v2, v2; \ | ||
299 | vpabsb t2, t2; \ | ||
300 | \ | ||
301 | vpor t0, v1, v1; \ | ||
302 | \ | ||
303 | vpcmpgtb v3, zero, t0; \ | ||
304 | vpaddb v3, v3, v3; \ | ||
305 | vpabsb t0, t0; \ | ||
306 | \ | ||
307 | vpor t1, v2, v2; \ | ||
308 | vpor t2, v3, v3; \ | ||
309 | vpor t0, v0, v0; | ||
310 | |||
311 | /* | ||
312 | * IN: | ||
313 | * r: byte-sliced AB state in memory | ||
314 | * l: byte-sliced CD state in memory | ||
315 | * OUT: | ||
316 | * x0..x7: new byte-sliced CD state | ||
317 | */ | ||
318 | #define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ | ||
319 | tt1, tt2, tt3, kll, klr, krl, krr) \ | ||
320 | /* \ | ||
321 | * t0 = kll; \ | ||
322 | * t0 &= ll; \ | ||
323 | * lr ^= rol32(t0, 1); \ | ||
324 | */ \ | ||
325 | vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ | ||
326 | vpxor tt0, tt0, tt0; \ | ||
327 | vpbroadcastb t0##_x, t3; \ | ||
328 | vpsrldq $1, t0, t0; \ | ||
329 | vpbroadcastb t0##_x, t2; \ | ||
330 | vpsrldq $1, t0, t0; \ | ||
331 | vpbroadcastb t0##_x, t1; \ | ||
332 | vpsrldq $1, t0, t0; \ | ||
333 | vpbroadcastb t0##_x, t0; \ | ||
334 | \ | ||
335 | vpand l0, t0, t0; \ | ||
336 | vpand l1, t1, t1; \ | ||
337 | vpand l2, t2, t2; \ | ||
338 | vpand l3, t3, t3; \ | ||
339 | \ | ||
340 | rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ | ||
341 | \ | ||
342 | vpxor l4, t0, l4; \ | ||
343 | vmovdqu l4, 4 * 32(l); \ | ||
344 | vpxor l5, t1, l5; \ | ||
345 | vmovdqu l5, 5 * 32(l); \ | ||
346 | vpxor l6, t2, l6; \ | ||
347 | vmovdqu l6, 6 * 32(l); \ | ||
348 | vpxor l7, t3, l7; \ | ||
349 | vmovdqu l7, 7 * 32(l); \ | ||
350 | \ | ||
351 | /* \ | ||
352 | * t2 = krr; \ | ||
353 | * t2 |= rr; \ | ||
354 | * rl ^= t2; \ | ||
355 | */ \ | ||
356 | \ | ||
357 | vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ | ||
358 | vpbroadcastb t0##_x, t3; \ | ||
359 | vpsrldq $1, t0, t0; \ | ||
360 | vpbroadcastb t0##_x, t2; \ | ||
361 | vpsrldq $1, t0, t0; \ | ||
362 | vpbroadcastb t0##_x, t1; \ | ||
363 | vpsrldq $1, t0, t0; \ | ||
364 | vpbroadcastb t0##_x, t0; \ | ||
365 | \ | ||
366 | vpor 4 * 32(r), t0, t0; \ | ||
367 | vpor 5 * 32(r), t1, t1; \ | ||
368 | vpor 6 * 32(r), t2, t2; \ | ||
369 | vpor 7 * 32(r), t3, t3; \ | ||
370 | \ | ||
371 | vpxor 0 * 32(r), t0, t0; \ | ||
372 | vpxor 1 * 32(r), t1, t1; \ | ||
373 | vpxor 2 * 32(r), t2, t2; \ | ||
374 | vpxor 3 * 32(r), t3, t3; \ | ||
375 | vmovdqu t0, 0 * 32(r); \ | ||
376 | vmovdqu t1, 1 * 32(r); \ | ||
377 | vmovdqu t2, 2 * 32(r); \ | ||
378 | vmovdqu t3, 3 * 32(r); \ | ||
379 | \ | ||
380 | /* \ | ||
381 | * t2 = krl; \ | ||
382 | * t2 &= rl; \ | ||
383 | * rr ^= rol32(t2, 1); \ | ||
384 | */ \ | ||
385 | vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ | ||
386 | vpbroadcastb t0##_x, t3; \ | ||
387 | vpsrldq $1, t0, t0; \ | ||
388 | vpbroadcastb t0##_x, t2; \ | ||
389 | vpsrldq $1, t0, t0; \ | ||
390 | vpbroadcastb t0##_x, t1; \ | ||
391 | vpsrldq $1, t0, t0; \ | ||
392 | vpbroadcastb t0##_x, t0; \ | ||
393 | \ | ||
394 | vpand 0 * 32(r), t0, t0; \ | ||
395 | vpand 1 * 32(r), t1, t1; \ | ||
396 | vpand 2 * 32(r), t2, t2; \ | ||
397 | vpand 3 * 32(r), t3, t3; \ | ||
398 | \ | ||
399 | rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ | ||
400 | \ | ||
401 | vpxor 4 * 32(r), t0, t0; \ | ||
402 | vpxor 5 * 32(r), t1, t1; \ | ||
403 | vpxor 6 * 32(r), t2, t2; \ | ||
404 | vpxor 7 * 32(r), t3, t3; \ | ||
405 | vmovdqu t0, 4 * 32(r); \ | ||
406 | vmovdqu t1, 5 * 32(r); \ | ||
407 | vmovdqu t2, 6 * 32(r); \ | ||
408 | vmovdqu t3, 7 * 32(r); \ | ||
409 | \ | ||
410 | /* \ | ||
411 | * t0 = klr; \ | ||
412 | * t0 |= lr; \ | ||
413 | * ll ^= t0; \ | ||
414 | */ \ | ||
415 | \ | ||
416 | vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ | ||
417 | vpbroadcastb t0##_x, t3; \ | ||
418 | vpsrldq $1, t0, t0; \ | ||
419 | vpbroadcastb t0##_x, t2; \ | ||
420 | vpsrldq $1, t0, t0; \ | ||
421 | vpbroadcastb t0##_x, t1; \ | ||
422 | vpsrldq $1, t0, t0; \ | ||
423 | vpbroadcastb t0##_x, t0; \ | ||
424 | \ | ||
425 | vpor l4, t0, t0; \ | ||
426 | vpor l5, t1, t1; \ | ||
427 | vpor l6, t2, t2; \ | ||
428 | vpor l7, t3, t3; \ | ||
429 | \ | ||
430 | vpxor l0, t0, l0; \ | ||
431 | vmovdqu l0, 0 * 32(l); \ | ||
432 | vpxor l1, t1, l1; \ | ||
433 | vmovdqu l1, 1 * 32(l); \ | ||
434 | vpxor l2, t2, l2; \ | ||
435 | vmovdqu l2, 2 * 32(l); \ | ||
436 | vpxor l3, t3, l3; \ | ||
437 | vmovdqu l3, 3 * 32(l); | ||
438 | |||
439 | #define transpose_4x4(x0, x1, x2, x3, t1, t2) \ | ||
440 | vpunpckhdq x1, x0, t2; \ | ||
441 | vpunpckldq x1, x0, x0; \ | ||
442 | \ | ||
443 | vpunpckldq x3, x2, t1; \ | ||
444 | vpunpckhdq x3, x2, x2; \ | ||
445 | \ | ||
446 | vpunpckhqdq t1, x0, x1; \ | ||
447 | vpunpcklqdq t1, x0, x0; \ | ||
448 | \ | ||
449 | vpunpckhqdq x2, t2, x3; \ | ||
450 | vpunpcklqdq x2, t2, x2; | ||
451 | |||
452 | #define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ | ||
453 | a3, b3, c3, d3, st0, st1) \ | ||
454 | vmovdqu d2, st0; \ | ||
455 | vmovdqu d3, st1; \ | ||
456 | transpose_4x4(a0, a1, a2, a3, d2, d3); \ | ||
457 | transpose_4x4(b0, b1, b2, b3, d2, d3); \ | ||
458 | vmovdqu st0, d2; \ | ||
459 | vmovdqu st1, d3; \ | ||
460 | \ | ||
461 | vmovdqu a0, st0; \ | ||
462 | vmovdqu a1, st1; \ | ||
463 | transpose_4x4(c0, c1, c2, c3, a0, a1); \ | ||
464 | transpose_4x4(d0, d1, d2, d3, a0, a1); \ | ||
465 | \ | ||
466 | vbroadcasti128 .Lshufb_16x16b, a0; \ | ||
467 | vmovdqu st1, a1; \ | ||
468 | vpshufb a0, a2, a2; \ | ||
469 | vpshufb a0, a3, a3; \ | ||
470 | vpshufb a0, b0, b0; \ | ||
471 | vpshufb a0, b1, b1; \ | ||
472 | vpshufb a0, b2, b2; \ | ||
473 | vpshufb a0, b3, b3; \ | ||
474 | vpshufb a0, a1, a1; \ | ||
475 | vpshufb a0, c0, c0; \ | ||
476 | vpshufb a0, c1, c1; \ | ||
477 | vpshufb a0, c2, c2; \ | ||
478 | vpshufb a0, c3, c3; \ | ||
479 | vpshufb a0, d0, d0; \ | ||
480 | vpshufb a0, d1, d1; \ | ||
481 | vpshufb a0, d2, d2; \ | ||
482 | vpshufb a0, d3, d3; \ | ||
483 | vmovdqu d3, st1; \ | ||
484 | vmovdqu st0, d3; \ | ||
485 | vpshufb a0, d3, a0; \ | ||
486 | vmovdqu d2, st0; \ | ||
487 | \ | ||
488 | transpose_4x4(a0, b0, c0, d0, d2, d3); \ | ||
489 | transpose_4x4(a1, b1, c1, d1, d2, d3); \ | ||
490 | vmovdqu st0, d2; \ | ||
491 | vmovdqu st1, d3; \ | ||
492 | \ | ||
493 | vmovdqu b0, st0; \ | ||
494 | vmovdqu b1, st1; \ | ||
495 | transpose_4x4(a2, b2, c2, d2, b0, b1); \ | ||
496 | transpose_4x4(a3, b3, c3, d3, b0, b1); \ | ||
497 | vmovdqu st0, b0; \ | ||
498 | vmovdqu st1, b1; \ | ||
499 | /* does not adjust output bytes inside vectors */ | ||
500 | |||
501 | /* load blocks to registers and apply pre-whitening */ | ||
502 | #define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
503 | y6, y7, rio, key) \ | ||
504 | vpbroadcastq key, x0; \ | ||
505 | vpshufb .Lpack_bswap, x0, x0; \ | ||
506 | \ | ||
507 | vpxor 0 * 32(rio), x0, y7; \ | ||
508 | vpxor 1 * 32(rio), x0, y6; \ | ||
509 | vpxor 2 * 32(rio), x0, y5; \ | ||
510 | vpxor 3 * 32(rio), x0, y4; \ | ||
511 | vpxor 4 * 32(rio), x0, y3; \ | ||
512 | vpxor 5 * 32(rio), x0, y2; \ | ||
513 | vpxor 6 * 32(rio), x0, y1; \ | ||
514 | vpxor 7 * 32(rio), x0, y0; \ | ||
515 | vpxor 8 * 32(rio), x0, x7; \ | ||
516 | vpxor 9 * 32(rio), x0, x6; \ | ||
517 | vpxor 10 * 32(rio), x0, x5; \ | ||
518 | vpxor 11 * 32(rio), x0, x4; \ | ||
519 | vpxor 12 * 32(rio), x0, x3; \ | ||
520 | vpxor 13 * 32(rio), x0, x2; \ | ||
521 | vpxor 14 * 32(rio), x0, x1; \ | ||
522 | vpxor 15 * 32(rio), x0, x0; | ||
523 | |||
524 | /* byteslice pre-whitened blocks and store to temporary memory */ | ||
525 | #define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
526 | y6, y7, mem_ab, mem_cd) \ | ||
527 | byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ | ||
528 | y4, y5, y6, y7, (mem_ab), (mem_cd)); \ | ||
529 | \ | ||
530 | vmovdqu x0, 0 * 32(mem_ab); \ | ||
531 | vmovdqu x1, 1 * 32(mem_ab); \ | ||
532 | vmovdqu x2, 2 * 32(mem_ab); \ | ||
533 | vmovdqu x3, 3 * 32(mem_ab); \ | ||
534 | vmovdqu x4, 4 * 32(mem_ab); \ | ||
535 | vmovdqu x5, 5 * 32(mem_ab); \ | ||
536 | vmovdqu x6, 6 * 32(mem_ab); \ | ||
537 | vmovdqu x7, 7 * 32(mem_ab); \ | ||
538 | vmovdqu y0, 0 * 32(mem_cd); \ | ||
539 | vmovdqu y1, 1 * 32(mem_cd); \ | ||
540 | vmovdqu y2, 2 * 32(mem_cd); \ | ||
541 | vmovdqu y3, 3 * 32(mem_cd); \ | ||
542 | vmovdqu y4, 4 * 32(mem_cd); \ | ||
543 | vmovdqu y5, 5 * 32(mem_cd); \ | ||
544 | vmovdqu y6, 6 * 32(mem_cd); \ | ||
545 | vmovdqu y7, 7 * 32(mem_cd); | ||
546 | |||
547 | /* de-byteslice, apply post-whitening and store blocks */ | ||
548 | #define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ | ||
549 | y5, y6, y7, key, stack_tmp0, stack_tmp1) \ | ||
550 | byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ | ||
551 | y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ | ||
552 | \ | ||
553 | vmovdqu x0, stack_tmp0; \ | ||
554 | \ | ||
555 | vpbroadcastq key, x0; \ | ||
556 | vpshufb .Lpack_bswap, x0, x0; \ | ||
557 | \ | ||
558 | vpxor x0, y7, y7; \ | ||
559 | vpxor x0, y6, y6; \ | ||
560 | vpxor x0, y5, y5; \ | ||
561 | vpxor x0, y4, y4; \ | ||
562 | vpxor x0, y3, y3; \ | ||
563 | vpxor x0, y2, y2; \ | ||
564 | vpxor x0, y1, y1; \ | ||
565 | vpxor x0, y0, y0; \ | ||
566 | vpxor x0, x7, x7; \ | ||
567 | vpxor x0, x6, x6; \ | ||
568 | vpxor x0, x5, x5; \ | ||
569 | vpxor x0, x4, x4; \ | ||
570 | vpxor x0, x3, x3; \ | ||
571 | vpxor x0, x2, x2; \ | ||
572 | vpxor x0, x1, x1; \ | ||
573 | vpxor stack_tmp0, x0, x0; | ||
574 | |||
575 | #define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
576 | y6, y7, rio) \ | ||
577 | vmovdqu x0, 0 * 32(rio); \ | ||
578 | vmovdqu x1, 1 * 32(rio); \ | ||
579 | vmovdqu x2, 2 * 32(rio); \ | ||
580 | vmovdqu x3, 3 * 32(rio); \ | ||
581 | vmovdqu x4, 4 * 32(rio); \ | ||
582 | vmovdqu x5, 5 * 32(rio); \ | ||
583 | vmovdqu x6, 6 * 32(rio); \ | ||
584 | vmovdqu x7, 7 * 32(rio); \ | ||
585 | vmovdqu y0, 8 * 32(rio); \ | ||
586 | vmovdqu y1, 9 * 32(rio); \ | ||
587 | vmovdqu y2, 10 * 32(rio); \ | ||
588 | vmovdqu y3, 11 * 32(rio); \ | ||
589 | vmovdqu y4, 12 * 32(rio); \ | ||
590 | vmovdqu y5, 13 * 32(rio); \ | ||
591 | vmovdqu y6, 14 * 32(rio); \ | ||
592 | vmovdqu y7, 15 * 32(rio); | ||
593 | |||
594 | .data | ||
595 | .align 32 | ||
596 | |||
597 | #define SHUFB_BYTES(idx) \ | ||
598 | 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) | ||
599 | |||
600 | .Lshufb_16x16b: | ||
601 | .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) | ||
602 | .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) | ||
603 | |||
604 | .Lpack_bswap: | ||
605 | .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 | ||
606 | .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 | ||
607 | |||
608 | /* For CTR-mode IV byteswap */ | ||
609 | .Lbswap128_mask: | ||
610 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
611 | |||
612 | /* For XTS mode */ | ||
613 | .Lxts_gf128mul_and_shl1_mask_0: | ||
614 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
615 | .Lxts_gf128mul_and_shl1_mask_1: | ||
616 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
617 | |||
618 | /* | ||
619 | * pre-SubByte transform | ||
620 | * | ||
621 | * pre-lookup for sbox1, sbox2, sbox3: | ||
622 | * swap_bitendianness( | ||
623 | * isom_map_camellia_to_aes( | ||
624 | * camellia_f( | ||
625 | * swap_bitendianess(in) | ||
626 | * ) | ||
627 | * ) | ||
628 | * ) | ||
629 | * | ||
630 | * (note: '⊕ 0xc5' inside camellia_f()) | ||
631 | */ | ||
632 | .Lpre_tf_lo_s1: | ||
633 | .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 | ||
634 | .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 | ||
635 | .Lpre_tf_hi_s1: | ||
636 | .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a | ||
637 | .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 | ||
638 | |||
639 | /* | ||
640 | * pre-SubByte transform | ||
641 | * | ||
642 | * pre-lookup for sbox4: | ||
643 | * swap_bitendianness( | ||
644 | * isom_map_camellia_to_aes( | ||
645 | * camellia_f( | ||
646 | * swap_bitendianess(in <<< 1) | ||
647 | * ) | ||
648 | * ) | ||
649 | * ) | ||
650 | * | ||
651 | * (note: '⊕ 0xc5' inside camellia_f()) | ||
652 | */ | ||
653 | .Lpre_tf_lo_s4: | ||
654 | .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 | ||
655 | .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 | ||
656 | .Lpre_tf_hi_s4: | ||
657 | .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 | ||
658 | .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf | ||
659 | |||
660 | /* | ||
661 | * post-SubByte transform | ||
662 | * | ||
663 | * post-lookup for sbox1, sbox4: | ||
664 | * swap_bitendianness( | ||
665 | * camellia_h( | ||
666 | * isom_map_aes_to_camellia( | ||
667 | * swap_bitendianness( | ||
668 | * aes_inverse_affine_transform(in) | ||
669 | * ) | ||
670 | * ) | ||
671 | * ) | ||
672 | * ) | ||
673 | * | ||
674 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
675 | */ | ||
676 | .Lpost_tf_lo_s1: | ||
677 | .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 | ||
678 | .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 | ||
679 | .Lpost_tf_hi_s1: | ||
680 | .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 | ||
681 | .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c | ||
682 | |||
683 | /* | ||
684 | * post-SubByte transform | ||
685 | * | ||
686 | * post-lookup for sbox2: | ||
687 | * swap_bitendianness( | ||
688 | * camellia_h( | ||
689 | * isom_map_aes_to_camellia( | ||
690 | * swap_bitendianness( | ||
691 | * aes_inverse_affine_transform(in) | ||
692 | * ) | ||
693 | * ) | ||
694 | * ) | ||
695 | * ) <<< 1 | ||
696 | * | ||
697 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
698 | */ | ||
699 | .Lpost_tf_lo_s2: | ||
700 | .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 | ||
701 | .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 | ||
702 | .Lpost_tf_hi_s2: | ||
703 | .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 | ||
704 | .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 | ||
705 | |||
706 | /* | ||
707 | * post-SubByte transform | ||
708 | * | ||
709 | * post-lookup for sbox3: | ||
710 | * swap_bitendianness( | ||
711 | * camellia_h( | ||
712 | * isom_map_aes_to_camellia( | ||
713 | * swap_bitendianness( | ||
714 | * aes_inverse_affine_transform(in) | ||
715 | * ) | ||
716 | * ) | ||
717 | * ) | ||
718 | * ) >>> 1 | ||
719 | * | ||
720 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
721 | */ | ||
722 | .Lpost_tf_lo_s3: | ||
723 | .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 | ||
724 | .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 | ||
725 | .Lpost_tf_hi_s3: | ||
726 | .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 | ||
727 | .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 | ||
728 | |||
729 | /* For isolating SubBytes from AESENCLAST, inverse shift row */ | ||
730 | .Linv_shift_row: | ||
731 | .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b | ||
732 | .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 | ||
733 | |||
734 | .align 4 | ||
735 | /* 4-bit mask */ | ||
736 | .L0f0f0f0f: | ||
737 | .long 0x0f0f0f0f | ||
738 | |||
739 | .text | ||
740 | |||
741 | .align 8 | ||
742 | __camellia_enc_blk32: | ||
743 | /* input: | ||
744 | * %rdi: ctx, CTX | ||
745 | * %rax: temporary storage, 512 bytes | ||
746 | * %ymm0..%ymm15: 32 plaintext blocks | ||
747 | * output: | ||
748 | * %ymm0..%ymm15: 32 encrypted blocks, order swapped: | ||
749 | * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 | ||
750 | */ | ||
751 | |||
752 | leaq 8 * 32(%rax), %rcx; | ||
753 | |||
754 | inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
755 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
756 | %ymm15, %rax, %rcx); | ||
757 | |||
758 | enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
759 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
760 | %ymm15, %rax, %rcx, 0); | ||
761 | |||
762 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
763 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
764 | %ymm15, | ||
765 | ((key_table + (8) * 8) + 0)(CTX), | ||
766 | ((key_table + (8) * 8) + 4)(CTX), | ||
767 | ((key_table + (8) * 8) + 8)(CTX), | ||
768 | ((key_table + (8) * 8) + 12)(CTX)); | ||
769 | |||
770 | enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
771 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
772 | %ymm15, %rax, %rcx, 8); | ||
773 | |||
774 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
775 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
776 | %ymm15, | ||
777 | ((key_table + (16) * 8) + 0)(CTX), | ||
778 | ((key_table + (16) * 8) + 4)(CTX), | ||
779 | ((key_table + (16) * 8) + 8)(CTX), | ||
780 | ((key_table + (16) * 8) + 12)(CTX)); | ||
781 | |||
782 | enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
783 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
784 | %ymm15, %rax, %rcx, 16); | ||
785 | |||
786 | movl $24, %r8d; | ||
787 | cmpl $16, key_length(CTX); | ||
788 | jne .Lenc_max32; | ||
789 | |||
790 | .Lenc_done: | ||
791 | /* load CD for output */ | ||
792 | vmovdqu 0 * 32(%rcx), %ymm8; | ||
793 | vmovdqu 1 * 32(%rcx), %ymm9; | ||
794 | vmovdqu 2 * 32(%rcx), %ymm10; | ||
795 | vmovdqu 3 * 32(%rcx), %ymm11; | ||
796 | vmovdqu 4 * 32(%rcx), %ymm12; | ||
797 | vmovdqu 5 * 32(%rcx), %ymm13; | ||
798 | vmovdqu 6 * 32(%rcx), %ymm14; | ||
799 | vmovdqu 7 * 32(%rcx), %ymm15; | ||
800 | |||
801 | outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
802 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
803 | %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax)); | ||
804 | |||
805 | ret; | ||
806 | |||
807 | .align 8 | ||
808 | .Lenc_max32: | ||
809 | movl $32, %r8d; | ||
810 | |||
811 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
812 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
813 | %ymm15, | ||
814 | ((key_table + (24) * 8) + 0)(CTX), | ||
815 | ((key_table + (24) * 8) + 4)(CTX), | ||
816 | ((key_table + (24) * 8) + 8)(CTX), | ||
817 | ((key_table + (24) * 8) + 12)(CTX)); | ||
818 | |||
819 | enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
820 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
821 | %ymm15, %rax, %rcx, 24); | ||
822 | |||
823 | jmp .Lenc_done; | ||
824 | ENDPROC(__camellia_enc_blk32) | ||
825 | |||
826 | .align 8 | ||
827 | __camellia_dec_blk32: | ||
828 | /* input: | ||
829 | * %rdi: ctx, CTX | ||
830 | * %rax: temporary storage, 512 bytes | ||
831 | * %r8d: 24 for 16 byte key, 32 for larger | ||
832 | * %ymm0..%ymm15: 16 encrypted blocks | ||
833 | * output: | ||
834 | * %ymm0..%ymm15: 16 plaintext blocks, order swapped: | ||
835 | * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 | ||
836 | */ | ||
837 | |||
838 | leaq 8 * 32(%rax), %rcx; | ||
839 | |||
840 | inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
841 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
842 | %ymm15, %rax, %rcx); | ||
843 | |||
844 | cmpl $32, %r8d; | ||
845 | je .Ldec_max32; | ||
846 | |||
847 | .Ldec_max24: | ||
848 | dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
849 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
850 | %ymm15, %rax, %rcx, 16); | ||
851 | |||
852 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
853 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
854 | %ymm15, | ||
855 | ((key_table + (16) * 8) + 8)(CTX), | ||
856 | ((key_table + (16) * 8) + 12)(CTX), | ||
857 | ((key_table + (16) * 8) + 0)(CTX), | ||
858 | ((key_table + (16) * 8) + 4)(CTX)); | ||
859 | |||
860 | dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
861 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
862 | %ymm15, %rax, %rcx, 8); | ||
863 | |||
864 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
865 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
866 | %ymm15, | ||
867 | ((key_table + (8) * 8) + 8)(CTX), | ||
868 | ((key_table + (8) * 8) + 12)(CTX), | ||
869 | ((key_table + (8) * 8) + 0)(CTX), | ||
870 | ((key_table + (8) * 8) + 4)(CTX)); | ||
871 | |||
872 | dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
873 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
874 | %ymm15, %rax, %rcx, 0); | ||
875 | |||
876 | /* load CD for output */ | ||
877 | vmovdqu 0 * 32(%rcx), %ymm8; | ||
878 | vmovdqu 1 * 32(%rcx), %ymm9; | ||
879 | vmovdqu 2 * 32(%rcx), %ymm10; | ||
880 | vmovdqu 3 * 32(%rcx), %ymm11; | ||
881 | vmovdqu 4 * 32(%rcx), %ymm12; | ||
882 | vmovdqu 5 * 32(%rcx), %ymm13; | ||
883 | vmovdqu 6 * 32(%rcx), %ymm14; | ||
884 | vmovdqu 7 * 32(%rcx), %ymm15; | ||
885 | |||
886 | outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
887 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
888 | %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); | ||
889 | |||
890 | ret; | ||
891 | |||
892 | .align 8 | ||
893 | .Ldec_max32: | ||
894 | dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
895 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
896 | %ymm15, %rax, %rcx, 24); | ||
897 | |||
898 | fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
899 | %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
900 | %ymm15, | ||
901 | ((key_table + (24) * 8) + 8)(CTX), | ||
902 | ((key_table + (24) * 8) + 12)(CTX), | ||
903 | ((key_table + (24) * 8) + 0)(CTX), | ||
904 | ((key_table + (24) * 8) + 4)(CTX)); | ||
905 | |||
906 | jmp .Ldec_max24; | ||
907 | ENDPROC(__camellia_dec_blk32) | ||
908 | |||
909 | ENTRY(camellia_ecb_enc_32way) | ||
910 | /* input: | ||
911 | * %rdi: ctx, CTX | ||
912 | * %rsi: dst (32 blocks) | ||
913 | * %rdx: src (32 blocks) | ||
914 | */ | ||
915 | |||
916 | vzeroupper; | ||
917 | |||
918 | inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
919 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
920 | %ymm15, %rdx, (key_table)(CTX)); | ||
921 | |||
922 | /* now dst can be used as temporary buffer (even in src == dst case) */ | ||
923 | movq %rsi, %rax; | ||
924 | |||
925 | call __camellia_enc_blk32; | ||
926 | |||
927 | write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, | ||
928 | %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, | ||
929 | %ymm8, %rsi); | ||
930 | |||
931 | vzeroupper; | ||
932 | |||
933 | ret; | ||
934 | ENDPROC(camellia_ecb_enc_32way) | ||
935 | |||
936 | ENTRY(camellia_ecb_dec_32way) | ||
937 | /* input: | ||
938 | * %rdi: ctx, CTX | ||
939 | * %rsi: dst (32 blocks) | ||
940 | * %rdx: src (32 blocks) | ||
941 | */ | ||
942 | |||
943 | vzeroupper; | ||
944 | |||
945 | cmpl $16, key_length(CTX); | ||
946 | movl $32, %r8d; | ||
947 | movl $24, %eax; | ||
948 | cmovel %eax, %r8d; /* max */ | ||
949 | |||
950 | inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
951 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
952 | %ymm15, %rdx, (key_table)(CTX, %r8, 8)); | ||
953 | |||
954 | /* now dst can be used as temporary buffer (even in src == dst case) */ | ||
955 | movq %rsi, %rax; | ||
956 | |||
957 | call __camellia_dec_blk32; | ||
958 | |||
959 | write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, | ||
960 | %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, | ||
961 | %ymm8, %rsi); | ||
962 | |||
963 | vzeroupper; | ||
964 | |||
965 | ret; | ||
966 | ENDPROC(camellia_ecb_dec_32way) | ||
967 | |||
968 | ENTRY(camellia_cbc_dec_32way) | ||
969 | /* input: | ||
970 | * %rdi: ctx, CTX | ||
971 | * %rsi: dst (32 blocks) | ||
972 | * %rdx: src (32 blocks) | ||
973 | */ | ||
974 | |||
975 | vzeroupper; | ||
976 | |||
977 | cmpl $16, key_length(CTX); | ||
978 | movl $32, %r8d; | ||
979 | movl $24, %eax; | ||
980 | cmovel %eax, %r8d; /* max */ | ||
981 | |||
982 | inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, | ||
983 | %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, | ||
984 | %ymm15, %rdx, (key_table)(CTX, %r8, 8)); | ||
985 | |||
986 | movq %rsp, %r10; | ||
987 | cmpq %rsi, %rdx; | ||
988 | je .Lcbc_dec_use_stack; | ||
989 | |||
990 | /* dst can be used as temporary storage, src is not overwritten. */ | ||
991 | movq %rsi, %rax; | ||
992 | jmp .Lcbc_dec_continue; | ||
993 | |||
994 | .Lcbc_dec_use_stack: | ||
995 | /* | ||
996 | * dst still in-use (because dst == src), so use stack for temporary | ||
997 | * storage. | ||
998 | */ | ||
999 | subq $(16 * 32), %rsp; | ||
1000 | movq %rsp, %rax; | ||
1001 | |||
1002 | .Lcbc_dec_continue: | ||
1003 | call __camellia_dec_blk32; | ||
1004 | |||
1005 | vmovdqu %ymm7, (%rax); | ||
1006 | vpxor %ymm7, %ymm7, %ymm7; | ||
1007 | vinserti128 $1, (%rdx), %ymm7, %ymm7; | ||
1008 | vpxor (%rax), %ymm7, %ymm7; | ||
1009 | movq %r10, %rsp; | ||
1010 | vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6; | ||
1011 | vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5; | ||
1012 | vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4; | ||
1013 | vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3; | ||
1014 | vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2; | ||
1015 | vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1; | ||
1016 | vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0; | ||
1017 | vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15; | ||
1018 | vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14; | ||
1019 | vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13; | ||
1020 | vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12; | ||
1021 | vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11; | ||
1022 | vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10; | ||
1023 | vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9; | ||
1024 | vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8; | ||
1025 | write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, | ||
1026 | %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, | ||
1027 | %ymm8, %rsi); | ||
1028 | |||
1029 | vzeroupper; | ||
1030 | |||
1031 | ret; | ||
1032 | ENDPROC(camellia_cbc_dec_32way) | ||
1033 | |||
1034 | #define inc_le128(x, minus_one, tmp) \ | ||
1035 | vpcmpeqq minus_one, x, tmp; \ | ||
1036 | vpsubq minus_one, x, x; \ | ||
1037 | vpslldq $8, tmp, tmp; \ | ||
1038 | vpsubq tmp, x, x; | ||
1039 | |||
1040 | #define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ | ||
1041 | vpcmpeqq minus_one, x, tmp1; \ | ||
1042 | vpcmpeqq minus_two, x, tmp2; \ | ||
1043 | vpsubq minus_two, x, x; \ | ||
1044 | vpor tmp2, tmp1, tmp1; \ | ||
1045 | vpslldq $8, tmp1, tmp1; \ | ||
1046 | vpsubq tmp1, x, x; | ||
1047 | |||
1048 | ENTRY(camellia_ctr_32way) | ||
1049 | /* input: | ||
1050 | * %rdi: ctx, CTX | ||
1051 | * %rsi: dst (32 blocks) | ||
1052 | * %rdx: src (32 blocks) | ||
1053 | * %rcx: iv (little endian, 128bit) | ||
1054 | */ | ||
1055 | |||
1056 | vzeroupper; | ||
1057 | |||
1058 | movq %rsp, %r10; | ||
1059 | cmpq %rsi, %rdx; | ||
1060 | je .Lctr_use_stack; | ||
1061 | |||
1062 | /* dst can be used as temporary storage, src is not overwritten. */ | ||
1063 | movq %rsi, %rax; | ||
1064 | jmp .Lctr_continue; | ||
1065 | |||
1066 | .Lctr_use_stack: | ||
1067 | subq $(16 * 32), %rsp; | ||
1068 | movq %rsp, %rax; | ||
1069 | |||
1070 | .Lctr_continue: | ||
1071 | vpcmpeqd %ymm15, %ymm15, %ymm15; | ||
1072 | vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */ | ||
1073 | vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */ | ||
1074 | |||
1075 | /* load IV and byteswap */ | ||
1076 | vmovdqu (%rcx), %xmm0; | ||
1077 | vmovdqa %xmm0, %xmm1; | ||
1078 | inc_le128(%xmm0, %xmm15, %xmm14); | ||
1079 | vbroadcasti128 .Lbswap128_mask, %ymm14; | ||
1080 | vinserti128 $1, %xmm0, %ymm1, %ymm0; | ||
1081 | vpshufb %ymm14, %ymm0, %ymm13; | ||
1082 | vmovdqu %ymm13, 15 * 32(%rax); | ||
1083 | |||
1084 | /* construct IVs */ | ||
1085 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */ | ||
1086 | vpshufb %ymm14, %ymm0, %ymm13; | ||
1087 | vmovdqu %ymm13, 14 * 32(%rax); | ||
1088 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1089 | vpshufb %ymm14, %ymm0, %ymm13; | ||
1090 | vmovdqu %ymm13, 13 * 32(%rax); | ||
1091 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1092 | vpshufb %ymm14, %ymm0, %ymm13; | ||
1093 | vmovdqu %ymm13, 12 * 32(%rax); | ||
1094 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1095 | vpshufb %ymm14, %ymm0, %ymm13; | ||
1096 | vmovdqu %ymm13, 11 * 32(%rax); | ||
1097 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1098 | vpshufb %ymm14, %ymm0, %ymm10; | ||
1099 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1100 | vpshufb %ymm14, %ymm0, %ymm9; | ||
1101 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1102 | vpshufb %ymm14, %ymm0, %ymm8; | ||
1103 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1104 | vpshufb %ymm14, %ymm0, %ymm7; | ||
1105 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1106 | vpshufb %ymm14, %ymm0, %ymm6; | ||
1107 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1108 | vpshufb %ymm14, %ymm0, %ymm5; | ||
1109 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1110 | vpshufb %ymm14, %ymm0, %ymm4; | ||
1111 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1112 | vpshufb %ymm14, %ymm0, %ymm3; | ||
1113 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1114 | vpshufb %ymm14, %ymm0, %ymm2; | ||
1115 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1116 | vpshufb %ymm14, %ymm0, %ymm1; | ||
1117 | add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); | ||
1118 | vextracti128 $1, %ymm0, %xmm13; | ||
1119 | vpshufb %ymm14, %ymm0, %ymm0; | ||
1120 | inc_le128(%xmm13, %xmm15, %xmm14); | ||
1121 | vmovdqu %xmm13, (%rcx); | ||
1122 | |||
1123 | /* inpack32_pre: */ | ||
1124 | vpbroadcastq (key_table)(CTX), %ymm15; | ||
1125 | vpshufb .Lpack_bswap, %ymm15, %ymm15; | ||
1126 | vpxor %ymm0, %ymm15, %ymm0; | ||
1127 | vpxor %ymm1, %ymm15, %ymm1; | ||
1128 | vpxor %ymm2, %ymm15, %ymm2; | ||
1129 | vpxor %ymm3, %ymm15, %ymm3; | ||
1130 | vpxor %ymm4, %ymm15, %ymm4; | ||
1131 | vpxor %ymm5, %ymm15, %ymm5; | ||
1132 | vpxor %ymm6, %ymm15, %ymm6; | ||
1133 | vpxor %ymm7, %ymm15, %ymm7; | ||
1134 | vpxor %ymm8, %ymm15, %ymm8; | ||
1135 | vpxor %ymm9, %ymm15, %ymm9; | ||
1136 | vpxor %ymm10, %ymm15, %ymm10; | ||
1137 | vpxor 11 * 32(%rax), %ymm15, %ymm11; | ||
1138 | vpxor 12 * 32(%rax), %ymm15, %ymm12; | ||
1139 | vpxor 13 * 32(%rax), %ymm15, %ymm13; | ||
1140 | vpxor 14 * 32(%rax), %ymm15, %ymm14; | ||
1141 | vpxor 15 * 32(%rax), %ymm15, %ymm15; | ||
1142 | |||
1143 | call __camellia_enc_blk32; | ||
1144 | |||
1145 | movq %r10, %rsp; | ||
1146 | |||
1147 | vpxor 0 * 32(%rdx), %ymm7, %ymm7; | ||
1148 | vpxor 1 * 32(%rdx), %ymm6, %ymm6; | ||
1149 | vpxor 2 * 32(%rdx), %ymm5, %ymm5; | ||
1150 | vpxor 3 * 32(%rdx), %ymm4, %ymm4; | ||
1151 | vpxor 4 * 32(%rdx), %ymm3, %ymm3; | ||
1152 | vpxor 5 * 32(%rdx), %ymm2, %ymm2; | ||
1153 | vpxor 6 * 32(%rdx), %ymm1, %ymm1; | ||
1154 | vpxor 7 * 32(%rdx), %ymm0, %ymm0; | ||
1155 | vpxor 8 * 32(%rdx), %ymm15, %ymm15; | ||
1156 | vpxor 9 * 32(%rdx), %ymm14, %ymm14; | ||
1157 | vpxor 10 * 32(%rdx), %ymm13, %ymm13; | ||
1158 | vpxor 11 * 32(%rdx), %ymm12, %ymm12; | ||
1159 | vpxor 12 * 32(%rdx), %ymm11, %ymm11; | ||
1160 | vpxor 13 * 32(%rdx), %ymm10, %ymm10; | ||
1161 | vpxor 14 * 32(%rdx), %ymm9, %ymm9; | ||
1162 | vpxor 15 * 32(%rdx), %ymm8, %ymm8; | ||
1163 | write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, | ||
1164 | %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, | ||
1165 | %ymm8, %rsi); | ||
1166 | |||
1167 | vzeroupper; | ||
1168 | |||
1169 | ret; | ||
1170 | ENDPROC(camellia_ctr_32way) | ||
1171 | |||
1172 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
1173 | vpsrad $31, iv, tmp; \ | ||
1174 | vpaddq iv, iv, iv; \ | ||
1175 | vpshufd $0x13, tmp, tmp; \ | ||
1176 | vpand mask, tmp, tmp; \ | ||
1177 | vpxor tmp, iv, iv; | ||
1178 | |||
1179 | #define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ | ||
1180 | vpsrad $31, iv, tmp0; \ | ||
1181 | vpaddq iv, iv, tmp1; \ | ||
1182 | vpsllq $2, iv, iv; \ | ||
1183 | vpshufd $0x13, tmp0, tmp0; \ | ||
1184 | vpsrad $31, tmp1, tmp1; \ | ||
1185 | vpand mask2, tmp0, tmp0; \ | ||
1186 | vpshufd $0x13, tmp1, tmp1; \ | ||
1187 | vpxor tmp0, iv, iv; \ | ||
1188 | vpand mask1, tmp1, tmp1; \ | ||
1189 | vpxor tmp1, iv, iv; | ||
1190 | |||
1191 | .align 8 | ||
1192 | camellia_xts_crypt_32way: | ||
1193 | /* input: | ||
1194 | * %rdi: ctx, CTX | ||
1195 | * %rsi: dst (32 blocks) | ||
1196 | * %rdx: src (32 blocks) | ||
1197 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1198 | * %r8: index for input whitening key | ||
1199 | * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32 | ||
1200 | */ | ||
1201 | |||
1202 | vzeroupper; | ||
1203 | |||
1204 | subq $(16 * 32), %rsp; | ||
1205 | movq %rsp, %rax; | ||
1206 | |||
1207 | vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12; | ||
1208 | |||
1209 | /* load IV and construct second IV */ | ||
1210 | vmovdqu (%rcx), %xmm0; | ||
1211 | vmovdqa %xmm0, %xmm15; | ||
1212 | gf128mul_x_ble(%xmm0, %xmm12, %xmm13); | ||
1213 | vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13; | ||
1214 | vinserti128 $1, %xmm0, %ymm15, %ymm0; | ||
1215 | vpxor 0 * 32(%rdx), %ymm0, %ymm15; | ||
1216 | vmovdqu %ymm15, 15 * 32(%rax); | ||
1217 | vmovdqu %ymm0, 0 * 32(%rsi); | ||
1218 | |||
1219 | /* construct IVs */ | ||
1220 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1221 | vpxor 1 * 32(%rdx), %ymm0, %ymm15; | ||
1222 | vmovdqu %ymm15, 14 * 32(%rax); | ||
1223 | vmovdqu %ymm0, 1 * 32(%rsi); | ||
1224 | |||
1225 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1226 | vpxor 2 * 32(%rdx), %ymm0, %ymm15; | ||
1227 | vmovdqu %ymm15, 13 * 32(%rax); | ||
1228 | vmovdqu %ymm0, 2 * 32(%rsi); | ||
1229 | |||
1230 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1231 | vpxor 3 * 32(%rdx), %ymm0, %ymm15; | ||
1232 | vmovdqu %ymm15, 12 * 32(%rax); | ||
1233 | vmovdqu %ymm0, 3 * 32(%rsi); | ||
1234 | |||
1235 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1236 | vpxor 4 * 32(%rdx), %ymm0, %ymm11; | ||
1237 | vmovdqu %ymm0, 4 * 32(%rsi); | ||
1238 | |||
1239 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1240 | vpxor 5 * 32(%rdx), %ymm0, %ymm10; | ||
1241 | vmovdqu %ymm0, 5 * 32(%rsi); | ||
1242 | |||
1243 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1244 | vpxor 6 * 32(%rdx), %ymm0, %ymm9; | ||
1245 | vmovdqu %ymm0, 6 * 32(%rsi); | ||
1246 | |||
1247 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1248 | vpxor 7 * 32(%rdx), %ymm0, %ymm8; | ||
1249 | vmovdqu %ymm0, 7 * 32(%rsi); | ||
1250 | |||
1251 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1252 | vpxor 8 * 32(%rdx), %ymm0, %ymm7; | ||
1253 | vmovdqu %ymm0, 8 * 32(%rsi); | ||
1254 | |||
1255 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1256 | vpxor 9 * 32(%rdx), %ymm0, %ymm6; | ||
1257 | vmovdqu %ymm0, 9 * 32(%rsi); | ||
1258 | |||
1259 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1260 | vpxor 10 * 32(%rdx), %ymm0, %ymm5; | ||
1261 | vmovdqu %ymm0, 10 * 32(%rsi); | ||
1262 | |||
1263 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1264 | vpxor 11 * 32(%rdx), %ymm0, %ymm4; | ||
1265 | vmovdqu %ymm0, 11 * 32(%rsi); | ||
1266 | |||
1267 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1268 | vpxor 12 * 32(%rdx), %ymm0, %ymm3; | ||
1269 | vmovdqu %ymm0, 12 * 32(%rsi); | ||
1270 | |||
1271 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1272 | vpxor 13 * 32(%rdx), %ymm0, %ymm2; | ||
1273 | vmovdqu %ymm0, 13 * 32(%rsi); | ||
1274 | |||
1275 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1276 | vpxor 14 * 32(%rdx), %ymm0, %ymm1; | ||
1277 | vmovdqu %ymm0, 14 * 32(%rsi); | ||
1278 | |||
1279 | gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15); | ||
1280 | vpxor 15 * 32(%rdx), %ymm0, %ymm15; | ||
1281 | vmovdqu %ymm15, 0 * 32(%rax); | ||
1282 | vmovdqu %ymm0, 15 * 32(%rsi); | ||
1283 | |||
1284 | vextracti128 $1, %ymm0, %xmm0; | ||
1285 | gf128mul_x_ble(%xmm0, %xmm12, %xmm15); | ||
1286 | vmovdqu %xmm0, (%rcx); | ||
1287 | |||
1288 | /* inpack32_pre: */ | ||
1289 | vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; | ||
1290 | vpshufb .Lpack_bswap, %ymm15, %ymm15; | ||
1291 | vpxor 0 * 32(%rax), %ymm15, %ymm0; | ||
1292 | vpxor %ymm1, %ymm15, %ymm1; | ||
1293 | vpxor %ymm2, %ymm15, %ymm2; | ||
1294 | vpxor %ymm3, %ymm15, %ymm3; | ||
1295 | vpxor %ymm4, %ymm15, %ymm4; | ||
1296 | vpxor %ymm5, %ymm15, %ymm5; | ||
1297 | vpxor %ymm6, %ymm15, %ymm6; | ||
1298 | vpxor %ymm7, %ymm15, %ymm7; | ||
1299 | vpxor %ymm8, %ymm15, %ymm8; | ||
1300 | vpxor %ymm9, %ymm15, %ymm9; | ||
1301 | vpxor %ymm10, %ymm15, %ymm10; | ||
1302 | vpxor %ymm11, %ymm15, %ymm11; | ||
1303 | vpxor 12 * 32(%rax), %ymm15, %ymm12; | ||
1304 | vpxor 13 * 32(%rax), %ymm15, %ymm13; | ||
1305 | vpxor 14 * 32(%rax), %ymm15, %ymm14; | ||
1306 | vpxor 15 * 32(%rax), %ymm15, %ymm15; | ||
1307 | |||
1308 | call *%r9; | ||
1309 | |||
1310 | addq $(16 * 32), %rsp; | ||
1311 | |||
1312 | vpxor 0 * 32(%rsi), %ymm7, %ymm7; | ||
1313 | vpxor 1 * 32(%rsi), %ymm6, %ymm6; | ||
1314 | vpxor 2 * 32(%rsi), %ymm5, %ymm5; | ||
1315 | vpxor 3 * 32(%rsi), %ymm4, %ymm4; | ||
1316 | vpxor 4 * 32(%rsi), %ymm3, %ymm3; | ||
1317 | vpxor 5 * 32(%rsi), %ymm2, %ymm2; | ||
1318 | vpxor 6 * 32(%rsi), %ymm1, %ymm1; | ||
1319 | vpxor 7 * 32(%rsi), %ymm0, %ymm0; | ||
1320 | vpxor 8 * 32(%rsi), %ymm15, %ymm15; | ||
1321 | vpxor 9 * 32(%rsi), %ymm14, %ymm14; | ||
1322 | vpxor 10 * 32(%rsi), %ymm13, %ymm13; | ||
1323 | vpxor 11 * 32(%rsi), %ymm12, %ymm12; | ||
1324 | vpxor 12 * 32(%rsi), %ymm11, %ymm11; | ||
1325 | vpxor 13 * 32(%rsi), %ymm10, %ymm10; | ||
1326 | vpxor 14 * 32(%rsi), %ymm9, %ymm9; | ||
1327 | vpxor 15 * 32(%rsi), %ymm8, %ymm8; | ||
1328 | write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, | ||
1329 | %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, | ||
1330 | %ymm8, %rsi); | ||
1331 | |||
1332 | vzeroupper; | ||
1333 | |||
1334 | ret; | ||
1335 | ENDPROC(camellia_xts_crypt_32way) | ||
1336 | |||
1337 | ENTRY(camellia_xts_enc_32way) | ||
1338 | /* input: | ||
1339 | * %rdi: ctx, CTX | ||
1340 | * %rsi: dst (32 blocks) | ||
1341 | * %rdx: src (32 blocks) | ||
1342 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1343 | */ | ||
1344 | |||
1345 | xorl %r8d, %r8d; /* input whitening key, 0 for enc */ | ||
1346 | |||
1347 | leaq __camellia_enc_blk32, %r9; | ||
1348 | |||
1349 | jmp camellia_xts_crypt_32way; | ||
1350 | ENDPROC(camellia_xts_enc_32way) | ||
1351 | |||
1352 | ENTRY(camellia_xts_dec_32way) | ||
1353 | /* input: | ||
1354 | * %rdi: ctx, CTX | ||
1355 | * %rsi: dst (32 blocks) | ||
1356 | * %rdx: src (32 blocks) | ||
1357 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
1358 | */ | ||
1359 | |||
1360 | cmpl $16, key_length(CTX); | ||
1361 | movl $32, %r8d; | ||
1362 | movl $24, %eax; | ||
1363 | cmovel %eax, %r8d; /* input whitening key, last for dec */ | ||
1364 | |||
1365 | leaq __camellia_dec_blk32, %r9; | ||
1366 | |||
1367 | jmp camellia_xts_crypt_32way; | ||
1368 | ENDPROC(camellia_xts_dec_32way) | ||
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c new file mode 100644 index 000000000000..414fe5d7946b --- /dev/null +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia | ||
3 | * | ||
4 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/lrw.h> | ||
20 | #include <crypto/xts.h> | ||
21 | #include <asm/xcr.h> | ||
22 | #include <asm/xsave.h> | ||
23 | #include <asm/crypto/camellia.h> | ||
24 | #include <asm/crypto/ablk_helper.h> | ||
25 | #include <asm/crypto/glue_helper.h> | ||
26 | |||
27 | #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 | ||
28 | #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 | ||
29 | |||
30 | /* 32-way AVX2/AES-NI parallel cipher functions */ | ||
31 | asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst, | ||
32 | const u8 *src); | ||
33 | asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst, | ||
34 | const u8 *src); | ||
35 | |||
36 | asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst, | ||
37 | const u8 *src); | ||
38 | asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, le128 *iv); | ||
40 | |||
41 | asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst, | ||
44 | const u8 *src, le128 *iv); | ||
45 | |||
46 | static const struct common_glue_ctx camellia_enc = { | ||
47 | .num_funcs = 4, | ||
48 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
49 | |||
50 | .funcs = { { | ||
51 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
52 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) } | ||
53 | }, { | ||
54 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
55 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } | ||
56 | }, { | ||
57 | .num_blocks = 2, | ||
58 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } | ||
59 | }, { | ||
60 | .num_blocks = 1, | ||
61 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
62 | } } | ||
63 | }; | ||
64 | |||
65 | static const struct common_glue_ctx camellia_ctr = { | ||
66 | .num_funcs = 4, | ||
67 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
68 | |||
69 | .funcs = { { | ||
70 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
71 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) } | ||
72 | }, { | ||
73 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
74 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } | ||
75 | }, { | ||
76 | .num_blocks = 2, | ||
77 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
78 | }, { | ||
79 | .num_blocks = 1, | ||
80 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
81 | } } | ||
82 | }; | ||
83 | |||
84 | static const struct common_glue_ctx camellia_enc_xts = { | ||
85 | .num_funcs = 3, | ||
86 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
87 | |||
88 | .funcs = { { | ||
89 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
90 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) } | ||
91 | }, { | ||
92 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
93 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } | ||
94 | }, { | ||
95 | .num_blocks = 1, | ||
96 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } | ||
97 | } } | ||
98 | }; | ||
99 | |||
100 | static const struct common_glue_ctx camellia_dec = { | ||
101 | .num_funcs = 4, | ||
102 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) } | ||
107 | }, { | ||
108 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } | ||
110 | }, { | ||
111 | .num_blocks = 2, | ||
112 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
113 | }, { | ||
114 | .num_blocks = 1, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
116 | } } | ||
117 | }; | ||
118 | |||
119 | static const struct common_glue_ctx camellia_dec_cbc = { | ||
120 | .num_funcs = 4, | ||
121 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
122 | |||
123 | .funcs = { { | ||
124 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
125 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) } | ||
126 | }, { | ||
127 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
128 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } | ||
129 | }, { | ||
130 | .num_blocks = 2, | ||
131 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
132 | }, { | ||
133 | .num_blocks = 1, | ||
134 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
135 | } } | ||
136 | }; | ||
137 | |||
138 | static const struct common_glue_ctx camellia_dec_xts = { | ||
139 | .num_funcs = 3, | ||
140 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
141 | |||
142 | .funcs = { { | ||
143 | .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, | ||
144 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) } | ||
145 | }, { | ||
146 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
147 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } | ||
151 | } } | ||
152 | }; | ||
153 | |||
154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
155 | struct scatterlist *src, unsigned int nbytes) | ||
156 | { | ||
157 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); | ||
158 | } | ||
159 | |||
160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
161 | struct scatterlist *src, unsigned int nbytes) | ||
162 | { | ||
163 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); | ||
164 | } | ||
165 | |||
166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, | ||
170 | dst, src, nbytes); | ||
171 | } | ||
172 | |||
173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, | ||
177 | nbytes); | ||
178 | } | ||
179 | |||
180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
181 | struct scatterlist *src, unsigned int nbytes) | ||
182 | { | ||
183 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); | ||
184 | } | ||
185 | |||
186 | static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
187 | { | ||
188 | return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, | ||
189 | CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, | ||
190 | nbytes); | ||
191 | } | ||
192 | |||
193 | static inline void camellia_fpu_end(bool fpu_enabled) | ||
194 | { | ||
195 | glue_fpu_end(fpu_enabled); | ||
196 | } | ||
197 | |||
198 | static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
199 | unsigned int key_len) | ||
200 | { | ||
201 | return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, | ||
202 | &tfm->crt_flags); | ||
203 | } | ||
204 | |||
205 | struct crypt_priv { | ||
206 | struct camellia_ctx *ctx; | ||
207 | bool fpu_enabled; | ||
208 | }; | ||
209 | |||
210 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
211 | { | ||
212 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
213 | struct crypt_priv *ctx = priv; | ||
214 | int i; | ||
215 | |||
216 | ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); | ||
217 | |||
218 | if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { | ||
219 | camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst); | ||
220 | srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; | ||
221 | nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; | ||
222 | } | ||
223 | |||
224 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | ||
225 | camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
226 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
227 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
228 | } | ||
229 | |||
230 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | ||
231 | camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); | ||
232 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
233 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
234 | } | ||
235 | |||
236 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
237 | camellia_enc_blk(ctx->ctx, srcdst, srcdst); | ||
238 | } | ||
239 | |||
240 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
241 | { | ||
242 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
243 | struct crypt_priv *ctx = priv; | ||
244 | int i; | ||
245 | |||
246 | ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); | ||
247 | |||
248 | if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { | ||
249 | camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst); | ||
250 | srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; | ||
251 | nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; | ||
252 | } | ||
253 | |||
254 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | ||
255 | camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
256 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
257 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
258 | } | ||
259 | |||
260 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | ||
261 | camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); | ||
262 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
263 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
264 | } | ||
265 | |||
266 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
267 | camellia_dec_blk(ctx->ctx, srcdst, srcdst); | ||
268 | } | ||
269 | |||
270 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
271 | struct scatterlist *src, unsigned int nbytes) | ||
272 | { | ||
273 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
274 | be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; | ||
275 | struct crypt_priv crypt_ctx = { | ||
276 | .ctx = &ctx->camellia_ctx, | ||
277 | .fpu_enabled = false, | ||
278 | }; | ||
279 | struct lrw_crypt_req req = { | ||
280 | .tbuf = buf, | ||
281 | .tbuflen = sizeof(buf), | ||
282 | |||
283 | .table_ctx = &ctx->lrw_table, | ||
284 | .crypt_ctx = &crypt_ctx, | ||
285 | .crypt_fn = encrypt_callback, | ||
286 | }; | ||
287 | int ret; | ||
288 | |||
289 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
290 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
291 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
292 | |||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
297 | struct scatterlist *src, unsigned int nbytes) | ||
298 | { | ||
299 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
300 | be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; | ||
301 | struct crypt_priv crypt_ctx = { | ||
302 | .ctx = &ctx->camellia_ctx, | ||
303 | .fpu_enabled = false, | ||
304 | }; | ||
305 | struct lrw_crypt_req req = { | ||
306 | .tbuf = buf, | ||
307 | .tbuflen = sizeof(buf), | ||
308 | |||
309 | .table_ctx = &ctx->lrw_table, | ||
310 | .crypt_ctx = &crypt_ctx, | ||
311 | .crypt_fn = decrypt_callback, | ||
312 | }; | ||
313 | int ret; | ||
314 | |||
315 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
316 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
317 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
318 | |||
319 | return ret; | ||
320 | } | ||
321 | |||
322 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
323 | struct scatterlist *src, unsigned int nbytes) | ||
324 | { | ||
325 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
326 | |||
327 | return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, | ||
328 | XTS_TWEAK_CAST(camellia_enc_blk), | ||
329 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
330 | } | ||
331 | |||
332 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
333 | struct scatterlist *src, unsigned int nbytes) | ||
334 | { | ||
335 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
336 | |||
337 | return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, | ||
338 | XTS_TWEAK_CAST(camellia_enc_blk), | ||
339 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
340 | } | ||
341 | |||
342 | static struct crypto_alg cmll_algs[10] = { { | ||
343 | .cra_name = "__ecb-camellia-aesni-avx2", | ||
344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", | ||
345 | .cra_priority = 0, | ||
346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
347 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
348 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
349 | .cra_alignmask = 0, | ||
350 | .cra_type = &crypto_blkcipher_type, | ||
351 | .cra_module = THIS_MODULE, | ||
352 | .cra_u = { | ||
353 | .blkcipher = { | ||
354 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
355 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
356 | .setkey = camellia_setkey, | ||
357 | .encrypt = ecb_encrypt, | ||
358 | .decrypt = ecb_decrypt, | ||
359 | }, | ||
360 | }, | ||
361 | }, { | ||
362 | .cra_name = "__cbc-camellia-aesni-avx2", | ||
363 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", | ||
364 | .cra_priority = 0, | ||
365 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
366 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
367 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
368 | .cra_alignmask = 0, | ||
369 | .cra_type = &crypto_blkcipher_type, | ||
370 | .cra_module = THIS_MODULE, | ||
371 | .cra_u = { | ||
372 | .blkcipher = { | ||
373 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
374 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
375 | .setkey = camellia_setkey, | ||
376 | .encrypt = cbc_encrypt, | ||
377 | .decrypt = cbc_decrypt, | ||
378 | }, | ||
379 | }, | ||
380 | }, { | ||
381 | .cra_name = "__ctr-camellia-aesni-avx2", | ||
382 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", | ||
383 | .cra_priority = 0, | ||
384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
385 | .cra_blocksize = 1, | ||
386 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
387 | .cra_alignmask = 0, | ||
388 | .cra_type = &crypto_blkcipher_type, | ||
389 | .cra_module = THIS_MODULE, | ||
390 | .cra_u = { | ||
391 | .blkcipher = { | ||
392 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
393 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
394 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
395 | .setkey = camellia_setkey, | ||
396 | .encrypt = ctr_crypt, | ||
397 | .decrypt = ctr_crypt, | ||
398 | }, | ||
399 | }, | ||
400 | }, { | ||
401 | .cra_name = "__lrw-camellia-aesni-avx2", | ||
402 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", | ||
403 | .cra_priority = 0, | ||
404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
405 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
406 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | ||
407 | .cra_alignmask = 0, | ||
408 | .cra_type = &crypto_blkcipher_type, | ||
409 | .cra_module = THIS_MODULE, | ||
410 | .cra_exit = lrw_camellia_exit_tfm, | ||
411 | .cra_u = { | ||
412 | .blkcipher = { | ||
413 | .min_keysize = CAMELLIA_MIN_KEY_SIZE + | ||
414 | CAMELLIA_BLOCK_SIZE, | ||
415 | .max_keysize = CAMELLIA_MAX_KEY_SIZE + | ||
416 | CAMELLIA_BLOCK_SIZE, | ||
417 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
418 | .setkey = lrw_camellia_setkey, | ||
419 | .encrypt = lrw_encrypt, | ||
420 | .decrypt = lrw_decrypt, | ||
421 | }, | ||
422 | }, | ||
423 | }, { | ||
424 | .cra_name = "__xts-camellia-aesni-avx2", | ||
425 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", | ||
426 | .cra_priority = 0, | ||
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
428 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
429 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | ||
430 | .cra_alignmask = 0, | ||
431 | .cra_type = &crypto_blkcipher_type, | ||
432 | .cra_module = THIS_MODULE, | ||
433 | .cra_u = { | ||
434 | .blkcipher = { | ||
435 | .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, | ||
436 | .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, | ||
437 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
438 | .setkey = xts_camellia_setkey, | ||
439 | .encrypt = xts_encrypt, | ||
440 | .decrypt = xts_decrypt, | ||
441 | }, | ||
442 | }, | ||
443 | }, { | ||
444 | .cra_name = "ecb(camellia)", | ||
445 | .cra_driver_name = "ecb-camellia-aesni-avx2", | ||
446 | .cra_priority = 500, | ||
447 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
448 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
449 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
450 | .cra_alignmask = 0, | ||
451 | .cra_type = &crypto_ablkcipher_type, | ||
452 | .cra_module = THIS_MODULE, | ||
453 | .cra_init = ablk_init, | ||
454 | .cra_exit = ablk_exit, | ||
455 | .cra_u = { | ||
456 | .ablkcipher = { | ||
457 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
458 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
459 | .setkey = ablk_set_key, | ||
460 | .encrypt = ablk_encrypt, | ||
461 | .decrypt = ablk_decrypt, | ||
462 | }, | ||
463 | }, | ||
464 | }, { | ||
465 | .cra_name = "cbc(camellia)", | ||
466 | .cra_driver_name = "cbc-camellia-aesni-avx2", | ||
467 | .cra_priority = 500, | ||
468 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
469 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
470 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
471 | .cra_alignmask = 0, | ||
472 | .cra_type = &crypto_ablkcipher_type, | ||
473 | .cra_module = THIS_MODULE, | ||
474 | .cra_init = ablk_init, | ||
475 | .cra_exit = ablk_exit, | ||
476 | .cra_u = { | ||
477 | .ablkcipher = { | ||
478 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
479 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
480 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
481 | .setkey = ablk_set_key, | ||
482 | .encrypt = __ablk_encrypt, | ||
483 | .decrypt = ablk_decrypt, | ||
484 | }, | ||
485 | }, | ||
486 | }, { | ||
487 | .cra_name = "ctr(camellia)", | ||
488 | .cra_driver_name = "ctr-camellia-aesni-avx2", | ||
489 | .cra_priority = 500, | ||
490 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
491 | .cra_blocksize = 1, | ||
492 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
493 | .cra_alignmask = 0, | ||
494 | .cra_type = &crypto_ablkcipher_type, | ||
495 | .cra_module = THIS_MODULE, | ||
496 | .cra_init = ablk_init, | ||
497 | .cra_exit = ablk_exit, | ||
498 | .cra_u = { | ||
499 | .ablkcipher = { | ||
500 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
501 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
502 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_encrypt, | ||
506 | .geniv = "chainiv", | ||
507 | }, | ||
508 | }, | ||
509 | }, { | ||
510 | .cra_name = "lrw(camellia)", | ||
511 | .cra_driver_name = "lrw-camellia-aesni-avx2", | ||
512 | .cra_priority = 500, | ||
513 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
514 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
515 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
516 | .cra_alignmask = 0, | ||
517 | .cra_type = &crypto_ablkcipher_type, | ||
518 | .cra_module = THIS_MODULE, | ||
519 | .cra_init = ablk_init, | ||
520 | .cra_exit = ablk_exit, | ||
521 | .cra_u = { | ||
522 | .ablkcipher = { | ||
523 | .min_keysize = CAMELLIA_MIN_KEY_SIZE + | ||
524 | CAMELLIA_BLOCK_SIZE, | ||
525 | .max_keysize = CAMELLIA_MAX_KEY_SIZE + | ||
526 | CAMELLIA_BLOCK_SIZE, | ||
527 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
528 | .setkey = ablk_set_key, | ||
529 | .encrypt = ablk_encrypt, | ||
530 | .decrypt = ablk_decrypt, | ||
531 | }, | ||
532 | }, | ||
533 | }, { | ||
534 | .cra_name = "xts(camellia)", | ||
535 | .cra_driver_name = "xts-camellia-aesni-avx2", | ||
536 | .cra_priority = 500, | ||
537 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
538 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
539 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
540 | .cra_alignmask = 0, | ||
541 | .cra_type = &crypto_ablkcipher_type, | ||
542 | .cra_module = THIS_MODULE, | ||
543 | .cra_init = ablk_init, | ||
544 | .cra_exit = ablk_exit, | ||
545 | .cra_u = { | ||
546 | .ablkcipher = { | ||
547 | .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, | ||
548 | .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, | ||
549 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
550 | .setkey = ablk_set_key, | ||
551 | .encrypt = ablk_encrypt, | ||
552 | .decrypt = ablk_decrypt, | ||
553 | }, | ||
554 | }, | ||
555 | } }; | ||
556 | |||
557 | static int __init camellia_aesni_init(void) | ||
558 | { | ||
559 | u64 xcr0; | ||
560 | |||
561 | if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { | ||
562 | pr_info("AVX2 or AES-NI instructions are not detected.\n"); | ||
563 | return -ENODEV; | ||
564 | } | ||
565 | |||
566 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
567 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
568 | pr_info("AVX2 detected but unusable.\n"); | ||
569 | return -ENODEV; | ||
570 | } | ||
571 | |||
572 | return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); | ||
573 | } | ||
574 | |||
575 | static void __exit camellia_aesni_fini(void) | ||
576 | { | ||
577 | crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); | ||
578 | } | ||
579 | |||
580 | module_init(camellia_aesni_init); | ||
581 | module_exit(camellia_aesni_fini); | ||
582 | |||
583 | MODULE_LICENSE("GPL"); | ||
584 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized"); | ||
585 | MODULE_ALIAS("camellia"); | ||
586 | MODULE_ALIAS("camellia-asm"); | ||
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 96cbb6068fce..37fd0c0a81ea 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia | 2 | * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia |
3 | * | 3 | * |
4 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -26,16 +26,44 @@ | |||
26 | 26 | ||
27 | #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 | 27 | #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 |
28 | 28 | ||
29 | /* 16-way AES-NI parallel cipher functions */ | 29 | /* 16-way parallel cipher functions (avx/aes-ni) */ |
30 | asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, | 30 | asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, |
31 | const u8 *src); | 31 | const u8 *src); |
32 | EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way); | ||
33 | |||
32 | asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, | 34 | asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, |
33 | const u8 *src); | 35 | const u8 *src); |
36 | EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); | ||
34 | 37 | ||
35 | asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, | 38 | asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, |
36 | const u8 *src); | 39 | const u8 *src); |
40 | EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); | ||
41 | |||
37 | asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, | 42 | asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, |
38 | const u8 *src, le128 *iv); | 43 | const u8 *src, le128 *iv); |
44 | EXPORT_SYMBOL_GPL(camellia_ctr_16way); | ||
45 | |||
46 | asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, | ||
47 | const u8 *src, le128 *iv); | ||
48 | EXPORT_SYMBOL_GPL(camellia_xts_enc_16way); | ||
49 | |||
50 | asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
51 | const u8 *src, le128 *iv); | ||
52 | EXPORT_SYMBOL_GPL(camellia_xts_dec_16way); | ||
53 | |||
54 | void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
55 | { | ||
56 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
57 | GLUE_FUNC_CAST(camellia_enc_blk)); | ||
58 | } | ||
59 | EXPORT_SYMBOL_GPL(camellia_xts_enc); | ||
60 | |||
61 | void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
62 | { | ||
63 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
64 | GLUE_FUNC_CAST(camellia_dec_blk)); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(camellia_xts_dec); | ||
39 | 67 | ||
40 | static const struct common_glue_ctx camellia_enc = { | 68 | static const struct common_glue_ctx camellia_enc = { |
41 | .num_funcs = 3, | 69 | .num_funcs = 3, |
@@ -69,6 +97,19 @@ static const struct common_glue_ctx camellia_ctr = { | |||
69 | } } | 97 | } } |
70 | }; | 98 | }; |
71 | 99 | ||
100 | static const struct common_glue_ctx camellia_enc_xts = { | ||
101 | .num_funcs = 2, | ||
102 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } | ||
107 | }, { | ||
108 | .num_blocks = 1, | ||
109 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } | ||
110 | } } | ||
111 | }; | ||
112 | |||
72 | static const struct common_glue_ctx camellia_dec = { | 113 | static const struct common_glue_ctx camellia_dec = { |
73 | .num_funcs = 3, | 114 | .num_funcs = 3, |
74 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | 115 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, |
@@ -101,6 +142,19 @@ static const struct common_glue_ctx camellia_dec_cbc = { | |||
101 | } } | 142 | } } |
102 | }; | 143 | }; |
103 | 144 | ||
145 | static const struct common_glue_ctx camellia_dec_xts = { | ||
146 | .num_funcs = 2, | ||
147 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
148 | |||
149 | .funcs = { { | ||
150 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
151 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } | ||
152 | }, { | ||
153 | .num_blocks = 1, | ||
154 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } | ||
155 | } } | ||
156 | }; | ||
157 | |||
104 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 158 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
105 | struct scatterlist *src, unsigned int nbytes) | 159 | struct scatterlist *src, unsigned int nbytes) |
106 | { | 160 | { |
@@ -261,54 +315,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
261 | struct scatterlist *src, unsigned int nbytes) | 315 | struct scatterlist *src, unsigned int nbytes) |
262 | { | 316 | { |
263 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 317 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
264 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
265 | struct crypt_priv crypt_ctx = { | ||
266 | .ctx = &ctx->crypt_ctx, | ||
267 | .fpu_enabled = false, | ||
268 | }; | ||
269 | struct xts_crypt_req req = { | ||
270 | .tbuf = buf, | ||
271 | .tbuflen = sizeof(buf), | ||
272 | 318 | ||
273 | .tweak_ctx = &ctx->tweak_ctx, | 319 | return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, |
274 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | 320 | XTS_TWEAK_CAST(camellia_enc_blk), |
275 | .crypt_ctx = &crypt_ctx, | 321 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
276 | .crypt_fn = encrypt_callback, | ||
277 | }; | ||
278 | int ret; | ||
279 | |||
280 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
281 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
282 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
283 | |||
284 | return ret; | ||
285 | } | 322 | } |
286 | 323 | ||
287 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 324 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
288 | struct scatterlist *src, unsigned int nbytes) | 325 | struct scatterlist *src, unsigned int nbytes) |
289 | { | 326 | { |
290 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 327 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
291 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
292 | struct crypt_priv crypt_ctx = { | ||
293 | .ctx = &ctx->crypt_ctx, | ||
294 | .fpu_enabled = false, | ||
295 | }; | ||
296 | struct xts_crypt_req req = { | ||
297 | .tbuf = buf, | ||
298 | .tbuflen = sizeof(buf), | ||
299 | |||
300 | .tweak_ctx = &ctx->tweak_ctx, | ||
301 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | ||
302 | .crypt_ctx = &crypt_ctx, | ||
303 | .crypt_fn = decrypt_callback, | ||
304 | }; | ||
305 | int ret; | ||
306 | 328 | ||
307 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 329 | return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, |
308 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 330 | XTS_TWEAK_CAST(camellia_enc_blk), |
309 | camellia_fpu_end(crypt_ctx.fpu_enabled); | 331 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
310 | |||
311 | return ret; | ||
312 | } | 332 | } |
313 | 333 | ||
314 | static struct crypto_alg cmll_algs[10] = { { | 334 | static struct crypto_alg cmll_algs[10] = { { |
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index f93b6105a0ce..e3531f833951 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 7 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -227,6 +227,8 @@ | |||
227 | .data | 227 | .data |
228 | 228 | ||
229 | .align 16 | 229 | .align 16 |
230 | .Lxts_gf128mul_and_shl1_mask: | ||
231 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
230 | .Lbswap_mask: | 232 | .Lbswap_mask: |
231 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | 233 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 |
232 | .Lbswap128_mask: | 234 | .Lbswap128_mask: |
@@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way) | |||
424 | 426 | ||
425 | ret; | 427 | ret; |
426 | ENDPROC(cast6_ctr_8way) | 428 | ENDPROC(cast6_ctr_8way) |
429 | |||
430 | ENTRY(cast6_xts_enc_8way) | ||
431 | /* input: | ||
432 | * %rdi: ctx, CTX | ||
433 | * %rsi: dst | ||
434 | * %rdx: src | ||
435 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
436 | */ | ||
437 | |||
438 | movq %rsi, %r11; | ||
439 | |||
440 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
441 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
442 | RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); | ||
443 | |||
444 | call __cast6_enc_blk8; | ||
445 | |||
446 | /* dst <= regs xor IVs(in dst) */ | ||
447 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
448 | |||
449 | ret; | ||
450 | ENDPROC(cast6_xts_enc_8way) | ||
451 | |||
452 | ENTRY(cast6_xts_dec_8way) | ||
453 | /* input: | ||
454 | * %rdi: ctx, CTX | ||
455 | * %rsi: dst | ||
456 | * %rdx: src | ||
457 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
458 | */ | ||
459 | |||
460 | movq %rsi, %r11; | ||
461 | |||
462 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
463 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
464 | RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); | ||
465 | |||
466 | call __cast6_dec_blk8; | ||
467 | |||
468 | /* dst <= regs xor IVs(in dst) */ | ||
469 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
470 | |||
471 | ret; | ||
472 | ENDPROC(cast6_xts_dec_8way) | ||
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 92f7ca24790a..8d0dfb86a559 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -4,6 +4,8 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
8 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
@@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst, | |||
50 | asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, | 52 | asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, |
51 | le128 *iv); | 53 | le128 *iv); |
52 | 54 | ||
55 | asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, le128 *iv); | ||
57 | asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, | ||
58 | const u8 *src, le128 *iv); | ||
59 | |||
60 | static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
61 | { | ||
62 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
63 | GLUE_FUNC_CAST(__cast6_encrypt)); | ||
64 | } | ||
65 | |||
66 | static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
67 | { | ||
68 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
69 | GLUE_FUNC_CAST(__cast6_decrypt)); | ||
70 | } | ||
71 | |||
53 | static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 72 | static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
54 | { | 73 | { |
55 | be128 ctrblk; | 74 | be128 ctrblk; |
@@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = { | |||
87 | } } | 106 | } } |
88 | }; | 107 | }; |
89 | 108 | ||
109 | static const struct common_glue_ctx cast6_enc_xts = { | ||
110 | .num_funcs = 2, | ||
111 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) } | ||
116 | }, { | ||
117 | .num_blocks = 1, | ||
118 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) } | ||
119 | } } | ||
120 | }; | ||
121 | |||
90 | static const struct common_glue_ctx cast6_dec = { | 122 | static const struct common_glue_ctx cast6_dec = { |
91 | .num_funcs = 2, | 123 | .num_funcs = 2, |
92 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | 124 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, |
@@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = { | |||
113 | } } | 145 | } } |
114 | }; | 146 | }; |
115 | 147 | ||
148 | static const struct common_glue_ctx cast6_dec_xts = { | ||
149 | .num_funcs = 2, | ||
150 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
151 | |||
152 | .funcs = { { | ||
153 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
154 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) } | ||
155 | }, { | ||
156 | .num_blocks = 1, | ||
157 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) } | ||
158 | } } | ||
159 | }; | ||
160 | |||
116 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 161 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
117 | struct scatterlist *src, unsigned int nbytes) | 162 | struct scatterlist *src, unsigned int nbytes) |
118 | { | 163 | { |
@@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
307 | struct scatterlist *src, unsigned int nbytes) | 352 | struct scatterlist *src, unsigned int nbytes) |
308 | { | 353 | { |
309 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 354 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
310 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
311 | struct crypt_priv crypt_ctx = { | ||
312 | .ctx = &ctx->crypt_ctx, | ||
313 | .fpu_enabled = false, | ||
314 | }; | ||
315 | struct xts_crypt_req req = { | ||
316 | .tbuf = buf, | ||
317 | .tbuflen = sizeof(buf), | ||
318 | 355 | ||
319 | .tweak_ctx = &ctx->tweak_ctx, | 356 | return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes, |
320 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | 357 | XTS_TWEAK_CAST(__cast6_encrypt), |
321 | .crypt_ctx = &crypt_ctx, | 358 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
322 | .crypt_fn = encrypt_callback, | ||
323 | }; | ||
324 | int ret; | ||
325 | |||
326 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
327 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
328 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
329 | |||
330 | return ret; | ||
331 | } | 359 | } |
332 | 360 | ||
333 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 361 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
334 | struct scatterlist *src, unsigned int nbytes) | 362 | struct scatterlist *src, unsigned int nbytes) |
335 | { | 363 | { |
336 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 364 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
337 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
338 | struct crypt_priv crypt_ctx = { | ||
339 | .ctx = &ctx->crypt_ctx, | ||
340 | .fpu_enabled = false, | ||
341 | }; | ||
342 | struct xts_crypt_req req = { | ||
343 | .tbuf = buf, | ||
344 | .tbuflen = sizeof(buf), | ||
345 | |||
346 | .tweak_ctx = &ctx->tweak_ctx, | ||
347 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | ||
348 | .crypt_ctx = &crypt_ctx, | ||
349 | .crypt_fn = decrypt_callback, | ||
350 | }; | ||
351 | int ret; | ||
352 | 365 | ||
353 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 366 | return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes, |
354 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 367 | XTS_TWEAK_CAST(__cast6_encrypt), |
355 | cast6_fpu_end(crypt_ctx.fpu_enabled); | 368 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
356 | |||
357 | return ret; | ||
358 | } | 369 | } |
359 | 370 | ||
360 | static struct crypto_alg cast6_algs[10] = { { | 371 | static struct crypto_alg cast6_algs[10] = { { |
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index c8335014a044..94c27df8a549 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S | |||
@@ -101,9 +101,8 @@ | |||
101 | * uint crc32_pclmul_le_16(unsigned char const *buffer, | 101 | * uint crc32_pclmul_le_16(unsigned char const *buffer, |
102 | * size_t len, uint crc32) | 102 | * size_t len, uint crc32) |
103 | */ | 103 | */ |
104 | .globl crc32_pclmul_le_16 | 104 | |
105 | .align 4, 0x90 | 105 | ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */ |
106 | crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ | ||
107 | movdqa (BUF), %xmm1 | 106 | movdqa (BUF), %xmm1 |
108 | movdqa 0x10(BUF), %xmm2 | 107 | movdqa 0x10(BUF), %xmm2 |
109 | movdqa 0x20(BUF), %xmm3 | 108 | movdqa 0x20(BUF), %xmm3 |
@@ -244,3 +243,4 @@ fold_64: | |||
244 | pextrd $0x01, %xmm1, %eax | 243 | pextrd $0x01, %xmm1, %eax |
245 | 244 | ||
246 | ret | 245 | ret |
246 | ENDPROC(crc32_pclmul_le_16) | ||
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index cf1a7ec4cc3a..dbc4339b5417 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S | |||
@@ -1,9 +1,10 @@ | |||
1 | /* | 1 | /* |
2 | * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) | 2 | * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) |
3 | * | 3 | * |
4 | * The white paper on CRC32C calculations with PCLMULQDQ instruction can be | 4 | * The white papers on CRC32C calculations with PCLMULQDQ instruction can be |
5 | * downloaded from: | 5 | * downloaded from: |
6 | * http://download.intel.com/design/intarch/papers/323405.pdf | 6 | * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf |
7 | * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf | ||
7 | * | 8 | * |
8 | * Copyright (C) 2012 Intel Corporation. | 9 | * Copyright (C) 2012 Intel Corporation. |
9 | * | 10 | * |
@@ -42,6 +43,7 @@ | |||
42 | * SOFTWARE. | 43 | * SOFTWARE. |
43 | */ | 44 | */ |
44 | 45 | ||
46 | #include <asm/inst.h> | ||
45 | #include <linux/linkage.h> | 47 | #include <linux/linkage.h> |
46 | 48 | ||
47 | ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction | 49 | ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction |
@@ -225,10 +227,10 @@ LABEL crc_ %i | |||
225 | movdqa (bufp), %xmm0 # 2 consts: K1:K2 | 227 | movdqa (bufp), %xmm0 # 2 consts: K1:K2 |
226 | 228 | ||
227 | movq crc_init, %xmm1 # CRC for block 1 | 229 | movq crc_init, %xmm1 # CRC for block 1 |
228 | pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2 | 230 | PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 |
229 | 231 | ||
230 | movq crc1, %xmm2 # CRC for block 2 | 232 | movq crc1, %xmm2 # CRC for block 2 |
231 | pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 | 233 | PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1 |
232 | 234 | ||
233 | pxor %xmm2,%xmm1 | 235 | pxor %xmm2,%xmm1 |
234 | movq %xmm1, %rax | 236 | movq %xmm1, %rax |
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index f7b6ea2ddfdb..02ee2308fb38 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Shared glue code for 128bit block ciphers, AVX assembler macros | 2 | * Shared glue code for 128bit block ciphers, AVX assembler macros |
3 | * | 3 | * |
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -89,3 +89,62 @@ | |||
89 | vpxor (6*16)(src), x6, x6; \ | 89 | vpxor (6*16)(src), x6, x6; \ |
90 | vpxor (7*16)(src), x7, x7; \ | 90 | vpxor (7*16)(src), x7, x7; \ |
91 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | 91 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); |
92 | |||
93 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
94 | vpsrad $31, iv, tmp; \ | ||
95 | vpaddq iv, iv, iv; \ | ||
96 | vpshufd $0x13, tmp, tmp; \ | ||
97 | vpand mask, tmp, tmp; \ | ||
98 | vpxor tmp, iv, iv; | ||
99 | |||
100 | #define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ | ||
101 | t1, xts_gf128mul_and_shl1_mask) \ | ||
102 | vmovdqa xts_gf128mul_and_shl1_mask, t0; \ | ||
103 | \ | ||
104 | /* load IV */ \ | ||
105 | vmovdqu (iv), tiv; \ | ||
106 | vpxor (0*16)(src), tiv, x0; \ | ||
107 | vmovdqu tiv, (0*16)(dst); \ | ||
108 | \ | ||
109 | /* construct and store IVs, also xor with source */ \ | ||
110 | gf128mul_x_ble(tiv, t0, t1); \ | ||
111 | vpxor (1*16)(src), tiv, x1; \ | ||
112 | vmovdqu tiv, (1*16)(dst); \ | ||
113 | \ | ||
114 | gf128mul_x_ble(tiv, t0, t1); \ | ||
115 | vpxor (2*16)(src), tiv, x2; \ | ||
116 | vmovdqu tiv, (2*16)(dst); \ | ||
117 | \ | ||
118 | gf128mul_x_ble(tiv, t0, t1); \ | ||
119 | vpxor (3*16)(src), tiv, x3; \ | ||
120 | vmovdqu tiv, (3*16)(dst); \ | ||
121 | \ | ||
122 | gf128mul_x_ble(tiv, t0, t1); \ | ||
123 | vpxor (4*16)(src), tiv, x4; \ | ||
124 | vmovdqu tiv, (4*16)(dst); \ | ||
125 | \ | ||
126 | gf128mul_x_ble(tiv, t0, t1); \ | ||
127 | vpxor (5*16)(src), tiv, x5; \ | ||
128 | vmovdqu tiv, (5*16)(dst); \ | ||
129 | \ | ||
130 | gf128mul_x_ble(tiv, t0, t1); \ | ||
131 | vpxor (6*16)(src), tiv, x6; \ | ||
132 | vmovdqu tiv, (6*16)(dst); \ | ||
133 | \ | ||
134 | gf128mul_x_ble(tiv, t0, t1); \ | ||
135 | vpxor (7*16)(src), tiv, x7; \ | ||
136 | vmovdqu tiv, (7*16)(dst); \ | ||
137 | \ | ||
138 | gf128mul_x_ble(tiv, t0, t1); \ | ||
139 | vmovdqu tiv, (iv); | ||
140 | |||
141 | #define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
142 | vpxor (0*16)(dst), x0, x0; \ | ||
143 | vpxor (1*16)(dst), x1, x1; \ | ||
144 | vpxor (2*16)(dst), x2, x2; \ | ||
145 | vpxor (3*16)(dst), x3, x3; \ | ||
146 | vpxor (4*16)(dst), x4, x4; \ | ||
147 | vpxor (5*16)(dst), x5, x5; \ | ||
148 | vpxor (6*16)(dst), x6, x6; \ | ||
149 | vpxor (7*16)(dst), x7, x7; \ | ||
150 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S new file mode 100644 index 000000000000..a53ac11dd385 --- /dev/null +++ b/arch/x86/crypto/glue_helper-asm-avx2.S | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers, AVX2 assembler macros | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
14 | vmovdqu (0*32)(src), x0; \ | ||
15 | vmovdqu (1*32)(src), x1; \ | ||
16 | vmovdqu (2*32)(src), x2; \ | ||
17 | vmovdqu (3*32)(src), x3; \ | ||
18 | vmovdqu (4*32)(src), x4; \ | ||
19 | vmovdqu (5*32)(src), x5; \ | ||
20 | vmovdqu (6*32)(src), x6; \ | ||
21 | vmovdqu (7*32)(src), x7; | ||
22 | |||
23 | #define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
24 | vmovdqu x0, (0*32)(dst); \ | ||
25 | vmovdqu x1, (1*32)(dst); \ | ||
26 | vmovdqu x2, (2*32)(dst); \ | ||
27 | vmovdqu x3, (3*32)(dst); \ | ||
28 | vmovdqu x4, (4*32)(dst); \ | ||
29 | vmovdqu x5, (5*32)(dst); \ | ||
30 | vmovdqu x6, (6*32)(dst); \ | ||
31 | vmovdqu x7, (7*32)(dst); | ||
32 | |||
33 | #define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \ | ||
34 | vpxor t0, t0, t0; \ | ||
35 | vinserti128 $1, (src), t0, t0; \ | ||
36 | vpxor t0, x0, x0; \ | ||
37 | vpxor (0*32+16)(src), x1, x1; \ | ||
38 | vpxor (1*32+16)(src), x2, x2; \ | ||
39 | vpxor (2*32+16)(src), x3, x3; \ | ||
40 | vpxor (3*32+16)(src), x4, x4; \ | ||
41 | vpxor (4*32+16)(src), x5, x5; \ | ||
42 | vpxor (5*32+16)(src), x6, x6; \ | ||
43 | vpxor (6*32+16)(src), x7, x7; \ | ||
44 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
45 | |||
46 | #define inc_le128(x, minus_one, tmp) \ | ||
47 | vpcmpeqq minus_one, x, tmp; \ | ||
48 | vpsubq minus_one, x, x; \ | ||
49 | vpslldq $8, tmp, tmp; \ | ||
50 | vpsubq tmp, x, x; | ||
51 | |||
52 | #define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \ | ||
53 | vpcmpeqq minus_one, x, tmp1; \ | ||
54 | vpcmpeqq minus_two, x, tmp2; \ | ||
55 | vpsubq minus_two, x, x; \ | ||
56 | vpor tmp2, tmp1, tmp1; \ | ||
57 | vpslldq $8, tmp1, tmp1; \ | ||
58 | vpsubq tmp1, x, x; | ||
59 | |||
60 | #define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \ | ||
61 | t1x, t2, t2x, t3, t3x, t4, t5) \ | ||
62 | vpcmpeqd t0, t0, t0; \ | ||
63 | vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \ | ||
64 | vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\ | ||
65 | \ | ||
66 | /* load IV and byteswap */ \ | ||
67 | vmovdqu (iv), t2x; \ | ||
68 | vmovdqa t2x, t3x; \ | ||
69 | inc_le128(t2x, t0x, t1x); \ | ||
70 | vbroadcasti128 bswap, t1; \ | ||
71 | vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \ | ||
72 | vpshufb t1, t2, x0; \ | ||
73 | \ | ||
74 | /* construct IVs */ \ | ||
75 | add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \ | ||
76 | vpshufb t1, t2, x1; \ | ||
77 | add2_le128(t2, t0, t4, t3, t5); \ | ||
78 | vpshufb t1, t2, x2; \ | ||
79 | add2_le128(t2, t0, t4, t3, t5); \ | ||
80 | vpshufb t1, t2, x3; \ | ||
81 | add2_le128(t2, t0, t4, t3, t5); \ | ||
82 | vpshufb t1, t2, x4; \ | ||
83 | add2_le128(t2, t0, t4, t3, t5); \ | ||
84 | vpshufb t1, t2, x5; \ | ||
85 | add2_le128(t2, t0, t4, t3, t5); \ | ||
86 | vpshufb t1, t2, x6; \ | ||
87 | add2_le128(t2, t0, t4, t3, t5); \ | ||
88 | vpshufb t1, t2, x7; \ | ||
89 | vextracti128 $1, t2, t2x; \ | ||
90 | inc_le128(t2x, t0x, t3x); \ | ||
91 | vmovdqu t2x, (iv); | ||
92 | |||
93 | #define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
94 | vpxor (0*32)(src), x0, x0; \ | ||
95 | vpxor (1*32)(src), x1, x1; \ | ||
96 | vpxor (2*32)(src), x2, x2; \ | ||
97 | vpxor (3*32)(src), x3, x3; \ | ||
98 | vpxor (4*32)(src), x4, x4; \ | ||
99 | vpxor (5*32)(src), x5, x5; \ | ||
100 | vpxor (6*32)(src), x6, x6; \ | ||
101 | vpxor (7*32)(src), x7, x7; \ | ||
102 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
103 | |||
104 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
105 | vpsrad $31, iv, tmp; \ | ||
106 | vpaddq iv, iv, iv; \ | ||
107 | vpshufd $0x13, tmp, tmp; \ | ||
108 | vpand mask, tmp, tmp; \ | ||
109 | vpxor tmp, iv, iv; | ||
110 | |||
111 | #define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \ | ||
112 | vpsrad $31, iv, tmp0; \ | ||
113 | vpaddq iv, iv, tmp1; \ | ||
114 | vpsllq $2, iv, iv; \ | ||
115 | vpshufd $0x13, tmp0, tmp0; \ | ||
116 | vpsrad $31, tmp1, tmp1; \ | ||
117 | vpand mask2, tmp0, tmp0; \ | ||
118 | vpshufd $0x13, tmp1, tmp1; \ | ||
119 | vpxor tmp0, iv, iv; \ | ||
120 | vpand mask1, tmp1, tmp1; \ | ||
121 | vpxor tmp1, iv, iv; | ||
122 | |||
123 | #define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \ | ||
124 | tivx, t0, t0x, t1, t1x, t2, t2x, t3, \ | ||
125 | xts_gf128mul_and_shl1_mask_0, \ | ||
126 | xts_gf128mul_and_shl1_mask_1) \ | ||
127 | vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \ | ||
128 | \ | ||
129 | /* load IV and construct second IV */ \ | ||
130 | vmovdqu (iv), tivx; \ | ||
131 | vmovdqa tivx, t0x; \ | ||
132 | gf128mul_x_ble(tivx, t1x, t2x); \ | ||
133 | vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \ | ||
134 | vinserti128 $1, tivx, t0, tiv; \ | ||
135 | vpxor (0*32)(src), tiv, x0; \ | ||
136 | vmovdqu tiv, (0*32)(dst); \ | ||
137 | \ | ||
138 | /* construct and store IVs, also xor with source */ \ | ||
139 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
140 | vpxor (1*32)(src), tiv, x1; \ | ||
141 | vmovdqu tiv, (1*32)(dst); \ | ||
142 | \ | ||
143 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
144 | vpxor (2*32)(src), tiv, x2; \ | ||
145 | vmovdqu tiv, (2*32)(dst); \ | ||
146 | \ | ||
147 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
148 | vpxor (3*32)(src), tiv, x3; \ | ||
149 | vmovdqu tiv, (3*32)(dst); \ | ||
150 | \ | ||
151 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
152 | vpxor (4*32)(src), tiv, x4; \ | ||
153 | vmovdqu tiv, (4*32)(dst); \ | ||
154 | \ | ||
155 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
156 | vpxor (5*32)(src), tiv, x5; \ | ||
157 | vmovdqu tiv, (5*32)(dst); \ | ||
158 | \ | ||
159 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
160 | vpxor (6*32)(src), tiv, x6; \ | ||
161 | vmovdqu tiv, (6*32)(dst); \ | ||
162 | \ | ||
163 | gf128mul_x2_ble(tiv, t1, t2, t0, t3); \ | ||
164 | vpxor (7*32)(src), tiv, x7; \ | ||
165 | vmovdqu tiv, (7*32)(dst); \ | ||
166 | \ | ||
167 | vextracti128 $1, tiv, tivx; \ | ||
168 | gf128mul_x_ble(tivx, t1x, t2x); \ | ||
169 | vmovdqu tivx, (iv); | ||
170 | |||
171 | #define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
172 | vpxor (0*32)(dst), x0, x0; \ | ||
173 | vpxor (1*32)(dst), x1, x1; \ | ||
174 | vpxor (2*32)(dst), x2, x2; \ | ||
175 | vpxor (3*32)(dst), x3, x3; \ | ||
176 | vpxor (4*32)(dst), x4, x4; \ | ||
177 | vpxor (5*32)(dst), x5, x5; \ | ||
178 | vpxor (6*32)(dst), x6, x6; \ | ||
179 | vpxor (7*32)(dst), x7, x7; \ | ||
180 | store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 22ce4f683e55..432f1d76ceb8 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Shared glue code for 128bit block ciphers | 2 | * Shared glue code for 128bit block ciphers |
3 | * | 3 | * |
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
@@ -304,4 +304,99 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |||
304 | } | 304 | } |
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | 305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); |
306 | 306 | ||
307 | static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
308 | void *ctx, | ||
309 | struct blkcipher_desc *desc, | ||
310 | struct blkcipher_walk *walk) | ||
311 | { | ||
312 | const unsigned int bsize = 128 / 8; | ||
313 | unsigned int nbytes = walk->nbytes; | ||
314 | u128 *src = (u128 *)walk->src.virt.addr; | ||
315 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
316 | unsigned int num_blocks, func_bytes; | ||
317 | unsigned int i; | ||
318 | |||
319 | /* Process multi-block batch */ | ||
320 | for (i = 0; i < gctx->num_funcs; i++) { | ||
321 | num_blocks = gctx->funcs[i].num_blocks; | ||
322 | func_bytes = bsize * num_blocks; | ||
323 | |||
324 | if (nbytes >= func_bytes) { | ||
325 | do { | ||
326 | gctx->funcs[i].fn_u.xts(ctx, dst, src, | ||
327 | (le128 *)walk->iv); | ||
328 | |||
329 | src += num_blocks; | ||
330 | dst += num_blocks; | ||
331 | nbytes -= func_bytes; | ||
332 | } while (nbytes >= func_bytes); | ||
333 | |||
334 | if (nbytes < bsize) | ||
335 | goto done; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | done: | ||
340 | return nbytes; | ||
341 | } | ||
342 | |||
343 | /* for implementations implementing faster XTS IV generator */ | ||
344 | int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
345 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
346 | struct scatterlist *src, unsigned int nbytes, | ||
347 | void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), | ||
348 | void *tweak_ctx, void *crypt_ctx) | ||
349 | { | ||
350 | const unsigned int bsize = 128 / 8; | ||
351 | bool fpu_enabled = false; | ||
352 | struct blkcipher_walk walk; | ||
353 | int err; | ||
354 | |||
355 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
356 | |||
357 | err = blkcipher_walk_virt(desc, &walk); | ||
358 | nbytes = walk.nbytes; | ||
359 | if (!nbytes) | ||
360 | return err; | ||
361 | |||
362 | /* set minimum length to bsize, for tweak_fn */ | ||
363 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
364 | desc, fpu_enabled, | ||
365 | nbytes < bsize ? bsize : nbytes); | ||
366 | |||
367 | /* calculate first value of T */ | ||
368 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | ||
369 | |||
370 | while (nbytes) { | ||
371 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | ||
372 | |||
373 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
374 | nbytes = walk.nbytes; | ||
375 | } | ||
376 | |||
377 | glue_fpu_end(fpu_enabled); | ||
378 | |||
379 | return err; | ||
380 | } | ||
381 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | ||
382 | |||
383 | void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, | ||
384 | common_glue_func_t fn) | ||
385 | { | ||
386 | le128 ivblk = *iv; | ||
387 | |||
388 | /* generate next IV */ | ||
389 | le128_gf128mul_x_ble(iv, &ivblk); | ||
390 | |||
391 | /* CC <- T xor C */ | ||
392 | u128_xor(dst, src, (u128 *)&ivblk); | ||
393 | |||
394 | /* PP <- D(Key2,CC) */ | ||
395 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
396 | |||
397 | /* P <- T xor PP */ | ||
398 | u128_xor(dst, dst, (u128 *)&ivblk); | ||
399 | } | ||
400 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); | ||
401 | |||
307 | MODULE_LICENSE("GPL"); | 402 | MODULE_LICENSE("GPL"); |
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 43c938612b74..2f202f49872b 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -4,8 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | 7 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | 8 | * |
10 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,6 +33,8 @@ | |||
34 | 33 | ||
35 | .Lbswap128_mask: | 34 | .Lbswap128_mask: |
36 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 35 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
36 | .Lxts_gf128mul_and_shl1_mask: | ||
37 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
37 | 38 | ||
38 | .text | 39 | .text |
39 | 40 | ||
@@ -739,3 +740,43 @@ ENTRY(serpent_ctr_8way_avx) | |||
739 | 740 | ||
740 | ret; | 741 | ret; |
741 | ENDPROC(serpent_ctr_8way_avx) | 742 | ENDPROC(serpent_ctr_8way_avx) |
743 | |||
744 | ENTRY(serpent_xts_enc_8way_avx) | ||
745 | /* input: | ||
746 | * %rdi: ctx, CTX | ||
747 | * %rsi: dst | ||
748 | * %rdx: src | ||
749 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
750 | */ | ||
751 | |||
752 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
753 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
754 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | ||
755 | |||
756 | call __serpent_enc_blk8_avx; | ||
757 | |||
758 | /* dst <= regs xor IVs(in dst) */ | ||
759 | store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
760 | |||
761 | ret; | ||
762 | ENDPROC(serpent_xts_enc_8way_avx) | ||
763 | |||
764 | ENTRY(serpent_xts_dec_8way_avx) | ||
765 | /* input: | ||
766 | * %rdi: ctx, CTX | ||
767 | * %rsi: dst | ||
768 | * %rdx: src | ||
769 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
770 | */ | ||
771 | |||
772 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
773 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
774 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | ||
775 | |||
776 | call __serpent_dec_blk8_avx; | ||
777 | |||
778 | /* dst <= regs xor IVs(in dst) */ | ||
779 | store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
780 | |||
781 | ret; | ||
782 | ENDPROC(serpent_xts_dec_8way_avx) | ||
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S new file mode 100644 index 000000000000..b222085cccac --- /dev/null +++ b/arch/x86/crypto/serpent-avx2-asm_64.S | |||
@@ -0,0 +1,800 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Serpent | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on AVX assembler implementation of Serpent by: | ||
7 | * Copyright © 2012 Johannes Goetzfried | ||
8 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include "glue_helper-asm-avx2.S" | ||
19 | |||
20 | .file "serpent-avx2-asm_64.S" | ||
21 | |||
22 | .data | ||
23 | .align 16 | ||
24 | |||
25 | .Lbswap128_mask: | ||
26 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
27 | .Lxts_gf128mul_and_shl1_mask_0: | ||
28 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
29 | .Lxts_gf128mul_and_shl1_mask_1: | ||
30 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
31 | |||
32 | .text | ||
33 | |||
34 | #define CTX %rdi | ||
35 | |||
36 | #define RNOT %ymm0 | ||
37 | #define tp %ymm1 | ||
38 | |||
39 | #define RA1 %ymm2 | ||
40 | #define RA2 %ymm3 | ||
41 | #define RB1 %ymm4 | ||
42 | #define RB2 %ymm5 | ||
43 | #define RC1 %ymm6 | ||
44 | #define RC2 %ymm7 | ||
45 | #define RD1 %ymm8 | ||
46 | #define RD2 %ymm9 | ||
47 | #define RE1 %ymm10 | ||
48 | #define RE2 %ymm11 | ||
49 | |||
50 | #define RK0 %ymm12 | ||
51 | #define RK1 %ymm13 | ||
52 | #define RK2 %ymm14 | ||
53 | #define RK3 %ymm15 | ||
54 | |||
55 | #define RK0x %xmm12 | ||
56 | #define RK1x %xmm13 | ||
57 | #define RK2x %xmm14 | ||
58 | #define RK3x %xmm15 | ||
59 | |||
60 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
61 | vpor x0, x3, tp; \ | ||
62 | vpxor x3, x0, x0; \ | ||
63 | vpxor x2, x3, x4; \ | ||
64 | vpxor RNOT, x4, x4; \ | ||
65 | vpxor x1, tp, x3; \ | ||
66 | vpand x0, x1, x1; \ | ||
67 | vpxor x4, x1, x1; \ | ||
68 | vpxor x0, x2, x2; | ||
69 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
70 | vpxor x3, x0, x0; \ | ||
71 | vpor x0, x4, x4; \ | ||
72 | vpxor x2, x0, x0; \ | ||
73 | vpand x1, x2, x2; \ | ||
74 | vpxor x2, x3, x3; \ | ||
75 | vpxor RNOT, x1, x1; \ | ||
76 | vpxor x4, x2, x2; \ | ||
77 | vpxor x2, x1, x1; | ||
78 | |||
79 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
80 | vpxor x0, x1, tp; \ | ||
81 | vpxor x3, x0, x0; \ | ||
82 | vpxor RNOT, x3, x3; \ | ||
83 | vpand tp, x1, x4; \ | ||
84 | vpor tp, x0, x0; \ | ||
85 | vpxor x2, x3, x3; \ | ||
86 | vpxor x3, x0, x0; \ | ||
87 | vpxor x3, tp, x1; | ||
88 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
89 | vpxor x4, x3, x3; \ | ||
90 | vpor x4, x1, x1; \ | ||
91 | vpxor x2, x4, x4; \ | ||
92 | vpand x0, x2, x2; \ | ||
93 | vpxor x1, x2, x2; \ | ||
94 | vpor x0, x1, x1; \ | ||
95 | vpxor RNOT, x0, x0; \ | ||
96 | vpxor x2, x0, x0; \ | ||
97 | vpxor x1, x4, x4; | ||
98 | |||
99 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
100 | vpxor RNOT, x3, x3; \ | ||
101 | vpxor x0, x1, x1; \ | ||
102 | vpand x2, x0, tp; \ | ||
103 | vpxor x3, tp, tp; \ | ||
104 | vpor x0, x3, x3; \ | ||
105 | vpxor x1, x2, x2; \ | ||
106 | vpxor x1, x3, x3; \ | ||
107 | vpand tp, x1, x1; | ||
108 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
109 | vpxor x2, tp, tp; \ | ||
110 | vpand x3, x2, x2; \ | ||
111 | vpor x1, x3, x3; \ | ||
112 | vpxor RNOT, tp, tp; \ | ||
113 | vpxor tp, x3, x3; \ | ||
114 | vpxor tp, x0, x4; \ | ||
115 | vpxor x2, tp, x0; \ | ||
116 | vpor x2, x1, x1; | ||
117 | |||
118 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
119 | vpxor x3, x1, tp; \ | ||
120 | vpor x0, x3, x3; \ | ||
121 | vpand x0, x1, x4; \ | ||
122 | vpxor x2, x0, x0; \ | ||
123 | vpxor tp, x2, x2; \ | ||
124 | vpand x3, tp, x1; \ | ||
125 | vpxor x3, x2, x2; \ | ||
126 | vpor x4, x0, x0; \ | ||
127 | vpxor x3, x4, x4; | ||
128 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
129 | vpxor x0, x1, x1; \ | ||
130 | vpand x3, x0, x0; \ | ||
131 | vpand x4, x3, x3; \ | ||
132 | vpxor x2, x3, x3; \ | ||
133 | vpor x1, x4, x4; \ | ||
134 | vpand x1, x2, x2; \ | ||
135 | vpxor x3, x4, x4; \ | ||
136 | vpxor x3, x0, x0; \ | ||
137 | vpxor x2, x3, x3; | ||
138 | |||
139 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
140 | vpand x0, x3, tp; \ | ||
141 | vpxor x3, x0, x0; \ | ||
142 | vpxor x2, tp, tp; \ | ||
143 | vpor x3, x2, x2; \ | ||
144 | vpxor x1, x0, x0; \ | ||
145 | vpxor tp, x3, x4; \ | ||
146 | vpor x0, x2, x2; \ | ||
147 | vpxor x1, x2, x2; | ||
148 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
149 | vpand x0, x1, x1; \ | ||
150 | vpxor x4, x1, x1; \ | ||
151 | vpand x2, x4, x4; \ | ||
152 | vpxor tp, x2, x2; \ | ||
153 | vpxor x0, x4, x4; \ | ||
154 | vpor x1, tp, x3; \ | ||
155 | vpxor RNOT, x1, x1; \ | ||
156 | vpxor x0, x3, x3; | ||
157 | |||
158 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
159 | vpor x0, x1, tp; \ | ||
160 | vpxor tp, x2, x2; \ | ||
161 | vpxor RNOT, x3, x3; \ | ||
162 | vpxor x0, x1, x4; \ | ||
163 | vpxor x2, x0, x0; \ | ||
164 | vpand x4, tp, x1; \ | ||
165 | vpor x3, x4, x4; \ | ||
166 | vpxor x0, x4, x4; | ||
167 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
168 | vpand x3, x0, x0; \ | ||
169 | vpxor x3, x1, x1; \ | ||
170 | vpxor x2, x3, x3; \ | ||
171 | vpxor x1, x0, x0; \ | ||
172 | vpand x4, x2, x2; \ | ||
173 | vpxor x2, x1, x1; \ | ||
174 | vpand x0, x2, x2; \ | ||
175 | vpxor x2, x3, x3; | ||
176 | |||
177 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
178 | vpxor x0, x3, x3; \ | ||
179 | vpxor x2, x1, tp; \ | ||
180 | vpxor x0, x2, x2; \ | ||
181 | vpand x3, x0, x0; \ | ||
182 | vpor x3, tp, tp; \ | ||
183 | vpxor RNOT, x1, x4; \ | ||
184 | vpxor tp, x0, x0; \ | ||
185 | vpxor x2, tp, x1; | ||
186 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
187 | vpxor x4, x3, x3; \ | ||
188 | vpxor x0, x4, x4; \ | ||
189 | vpand x0, x2, x2; \ | ||
190 | vpxor x1, x4, x4; \ | ||
191 | vpxor x3, x2, x2; \ | ||
192 | vpand x1, x3, x3; \ | ||
193 | vpxor x0, x3, x3; \ | ||
194 | vpxor x2, x1, x1; | ||
195 | |||
196 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
197 | vpxor RNOT, x1, tp; \ | ||
198 | vpxor RNOT, x0, x0; \ | ||
199 | vpand x2, tp, x1; \ | ||
200 | vpxor x3, x1, x1; \ | ||
201 | vpor tp, x3, x3; \ | ||
202 | vpxor x2, tp, x4; \ | ||
203 | vpxor x3, x2, x2; \ | ||
204 | vpxor x0, x3, x3; \ | ||
205 | vpor x1, x0, x0; | ||
206 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
207 | vpand x0, x2, x2; \ | ||
208 | vpxor x4, x0, x0; \ | ||
209 | vpxor x3, x4, x4; \ | ||
210 | vpand x0, x3, x3; \ | ||
211 | vpxor x1, x4, x4; \ | ||
212 | vpxor x4, x2, x2; \ | ||
213 | vpxor x1, x3, x3; \ | ||
214 | vpor x0, x4, x4; \ | ||
215 | vpxor x1, x4, x4; | ||
216 | |||
217 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
218 | vpxor x0, x1, x1; \ | ||
219 | vpor x1, x3, tp; \ | ||
220 | vpxor x1, x3, x4; \ | ||
221 | vpxor RNOT, x0, x0; \ | ||
222 | vpxor tp, x2, x2; \ | ||
223 | vpxor x0, tp, x3; \ | ||
224 | vpand x1, x0, x0; \ | ||
225 | vpxor x2, x0, x0; | ||
226 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
227 | vpand x3, x2, x2; \ | ||
228 | vpxor x4, x3, x3; \ | ||
229 | vpxor x3, x2, x2; \ | ||
230 | vpxor x3, x1, x1; \ | ||
231 | vpand x0, x3, x3; \ | ||
232 | vpxor x0, x1, x1; \ | ||
233 | vpxor x2, x0, x0; \ | ||
234 | vpxor x3, x4, x4; | ||
235 | |||
236 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
237 | vpxor x3, x1, x1; \ | ||
238 | vpxor x2, x0, tp; \ | ||
239 | vpxor RNOT, x2, x2; \ | ||
240 | vpor x1, x0, x4; \ | ||
241 | vpxor x3, x4, x4; \ | ||
242 | vpand x1, x3, x3; \ | ||
243 | vpxor x2, x1, x1; \ | ||
244 | vpand x4, x2, x2; | ||
245 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
246 | vpxor x1, x4, x4; \ | ||
247 | vpor x3, x1, x1; \ | ||
248 | vpxor tp, x3, x3; \ | ||
249 | vpxor tp, x2, x2; \ | ||
250 | vpor x4, tp, x0; \ | ||
251 | vpxor x4, x2, x2; \ | ||
252 | vpxor x0, x1, x1; \ | ||
253 | vpxor x1, x4, x4; | ||
254 | |||
255 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
256 | vpxor x1, x2, x2; \ | ||
257 | vpxor RNOT, x3, tp; \ | ||
258 | vpor x2, tp, tp; \ | ||
259 | vpxor x3, x2, x2; \ | ||
260 | vpxor x0, x3, x4; \ | ||
261 | vpxor x1, tp, x3; \ | ||
262 | vpor x2, x1, x1; \ | ||
263 | vpxor x0, x2, x2; | ||
264 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
265 | vpxor x4, x1, x1; \ | ||
266 | vpor x3, x4, x4; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x2, x4, x4; \ | ||
269 | vpand x1, x2, x2; \ | ||
270 | vpxor x3, x2, x2; \ | ||
271 | vpxor x4, x3, x3; \ | ||
272 | vpxor x0, x4, x4; | ||
273 | |||
274 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
275 | vpxor x1, x2, x2; \ | ||
276 | vpand x2, x1, tp; \ | ||
277 | vpxor x0, tp, tp; \ | ||
278 | vpor x1, x0, x0; \ | ||
279 | vpxor x3, x1, x4; \ | ||
280 | vpxor x3, x0, x0; \ | ||
281 | vpor tp, x3, x3; \ | ||
282 | vpxor x2, tp, x1; | ||
283 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
284 | vpxor x3, x1, x1; \ | ||
285 | vpxor x2, x0, x0; \ | ||
286 | vpxor x3, x2, x2; \ | ||
287 | vpand x1, x3, x3; \ | ||
288 | vpxor x0, x1, x1; \ | ||
289 | vpand x2, x0, x0; \ | ||
290 | vpxor x3, x4, x4; \ | ||
291 | vpxor x0, x3, x3; \ | ||
292 | vpxor x1, x0, x0; | ||
293 | |||
294 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
295 | vpxor x3, x2, x2; \ | ||
296 | vpand x1, x0, tp; \ | ||
297 | vpxor x2, tp, tp; \ | ||
298 | vpor x3, x2, x2; \ | ||
299 | vpxor RNOT, x0, x4; \ | ||
300 | vpxor tp, x1, x1; \ | ||
301 | vpxor x2, tp, x0; \ | ||
302 | vpand x4, x2, x2; | ||
303 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
304 | vpxor x0, x2, x2; \ | ||
305 | vpor x4, x0, x0; \ | ||
306 | vpxor x3, x0, x0; \ | ||
307 | vpand x2, x3, x3; \ | ||
308 | vpxor x3, x4, x4; \ | ||
309 | vpxor x1, x3, x3; \ | ||
310 | vpand x0, x1, x1; \ | ||
311 | vpxor x1, x4, x4; \ | ||
312 | vpxor x3, x0, x0; | ||
313 | |||
314 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
315 | vpor x2, x1, tp; \ | ||
316 | vpxor x1, x2, x2; \ | ||
317 | vpxor x3, tp, tp; \ | ||
318 | vpand x1, x3, x3; \ | ||
319 | vpxor x3, x2, x2; \ | ||
320 | vpor x0, x3, x3; \ | ||
321 | vpxor RNOT, x0, x0; \ | ||
322 | vpxor x2, x3, x3; \ | ||
323 | vpor x0, x2, x2; | ||
324 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
325 | vpxor tp, x1, x4; \ | ||
326 | vpxor x4, x2, x2; \ | ||
327 | vpand x0, x4, x4; \ | ||
328 | vpxor tp, x0, x0; \ | ||
329 | vpxor x3, tp, x1; \ | ||
330 | vpand x2, x0, x0; \ | ||
331 | vpxor x3, x2, x2; \ | ||
332 | vpxor x2, x0, x0; \ | ||
333 | vpxor x4, x2, x2; \ | ||
334 | vpxor x3, x4, x4; | ||
335 | |||
336 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
337 | vpxor x2, x0, x0; \ | ||
338 | vpand x3, x0, tp; \ | ||
339 | vpxor x3, x2, x2; \ | ||
340 | vpxor x2, tp, tp; \ | ||
341 | vpxor x1, x3, x3; \ | ||
342 | vpor x0, x2, x2; \ | ||
343 | vpxor x3, x2, x2; \ | ||
344 | vpand tp, x3, x3; | ||
345 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
346 | vpxor RNOT, tp, tp; \ | ||
347 | vpxor x1, x3, x3; \ | ||
348 | vpand x2, x1, x1; \ | ||
349 | vpxor tp, x0, x4; \ | ||
350 | vpxor x4, x3, x3; \ | ||
351 | vpxor x2, x4, x4; \ | ||
352 | vpxor x1, tp, x0; \ | ||
353 | vpxor x0, x2, x2; | ||
354 | |||
355 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
356 | vpand x0, x3, tp; \ | ||
357 | vpxor x2, x0, x0; \ | ||
358 | vpor x3, x2, x2; \ | ||
359 | vpxor x1, x3, x4; \ | ||
360 | vpxor RNOT, x0, x0; \ | ||
361 | vpor tp, x1, x1; \ | ||
362 | vpxor x0, x4, x4; \ | ||
363 | vpand x2, x0, x0; \ | ||
364 | vpxor x1, x0, x0; | ||
365 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
366 | vpand x2, x1, x1; \ | ||
367 | vpxor x2, tp, x3; \ | ||
368 | vpxor x3, x4, x4; \ | ||
369 | vpand x3, x2, x2; \ | ||
370 | vpor x0, x3, x3; \ | ||
371 | vpxor x4, x1, x1; \ | ||
372 | vpxor x4, x3, x3; \ | ||
373 | vpand x0, x4, x4; \ | ||
374 | vpxor x2, x4, x4; | ||
375 | |||
376 | #define get_key(i,j,t) \ | ||
377 | vpbroadcastd (4*(i)+(j))*4(CTX), t; | ||
378 | |||
379 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
380 | get_key(i, 0, RK0); \ | ||
381 | get_key(i, 1, RK1); \ | ||
382 | get_key(i, 2, RK2); \ | ||
383 | get_key(i, 3, RK3); \ | ||
384 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
385 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
386 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
387 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
388 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
389 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
390 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
391 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
392 | |||
393 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
394 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
395 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
396 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
397 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
398 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
399 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
400 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
401 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
402 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
403 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
404 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
405 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
406 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
407 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
408 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
409 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
410 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
411 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
412 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
413 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
414 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
415 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
416 | get_key(i, 1, RK1); \ | ||
417 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
418 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
419 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
420 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
421 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
422 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
423 | get_key(i, 3, RK3); \ | ||
424 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
425 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
426 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
427 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
428 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
429 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
430 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
431 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
432 | get_key(i, 0, RK0); \ | ||
433 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
434 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
435 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
436 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
437 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
438 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
439 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
440 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
441 | get_key(i, 2, RK2); \ | ||
442 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
443 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
444 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
446 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
447 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
448 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
449 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
450 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
451 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
452 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
453 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
454 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
456 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
457 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
458 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
459 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
460 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
461 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
462 | |||
463 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
464 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
465 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
466 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
467 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
468 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
469 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
470 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
471 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
472 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
473 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
474 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
475 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
476 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
477 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
478 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
479 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
480 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
481 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
482 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
483 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
484 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
485 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
486 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
487 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
488 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
489 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
490 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
491 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
492 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
493 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
494 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
495 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
496 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
497 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
498 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
499 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
500 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
501 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
502 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
504 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
505 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
506 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
507 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
508 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
510 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
511 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
512 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
513 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
514 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
515 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
516 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
517 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
518 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
519 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
520 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
521 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
522 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
523 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
524 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
525 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
526 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
527 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
528 | |||
529 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
530 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
531 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
532 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
533 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
534 | |||
535 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
536 | get_key(i, 0, RK0); \ | ||
537 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
538 | get_key(i, 2, RK2); \ | ||
539 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
540 | get_key(i, 3, RK3); \ | ||
541 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
542 | get_key(i, 1, RK1); \ | ||
543 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
544 | |||
545 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
546 | vpunpckldq x1, x0, t0; \ | ||
547 | vpunpckhdq x1, x0, t2; \ | ||
548 | vpunpckldq x3, x2, t1; \ | ||
549 | vpunpckhdq x3, x2, x3; \ | ||
550 | \ | ||
551 | vpunpcklqdq t1, t0, x0; \ | ||
552 | vpunpckhqdq t1, t0, x1; \ | ||
553 | vpunpcklqdq x3, t2, x2; \ | ||
554 | vpunpckhqdq x3, t2, x3; | ||
555 | |||
556 | #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
557 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
558 | |||
559 | #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
560 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
561 | |||
562 | .align 8 | ||
563 | __serpent_enc_blk16: | ||
564 | /* input: | ||
565 | * %rdi: ctx, CTX | ||
566 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext | ||
567 | * output: | ||
568 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext | ||
569 | */ | ||
570 | |||
571 | vpcmpeqd RNOT, RNOT, RNOT; | ||
572 | |||
573 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
574 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
575 | |||
576 | K2(RA, RB, RC, RD, RE, 0); | ||
577 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
578 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
579 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
580 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
581 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
582 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
583 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
584 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
585 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
586 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
587 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
588 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
589 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
590 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
591 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
592 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
593 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
594 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
595 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
596 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
597 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
598 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
599 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
600 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
601 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
602 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
603 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
604 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
605 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
606 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
607 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
608 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
609 | |||
610 | write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
611 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
612 | |||
613 | ret; | ||
614 | ENDPROC(__serpent_enc_blk16) | ||
615 | |||
616 | .align 8 | ||
617 | __serpent_dec_blk16: | ||
618 | /* input: | ||
619 | * %rdi: ctx, CTX | ||
620 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext | ||
621 | * output: | ||
622 | * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext | ||
623 | */ | ||
624 | |||
625 | vpcmpeqd RNOT, RNOT, RNOT; | ||
626 | |||
627 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
628 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
629 | |||
630 | K2(RA, RB, RC, RD, RE, 32); | ||
631 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
632 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
633 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
634 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
635 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
636 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
637 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
638 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
639 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
640 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
641 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
642 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
643 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
644 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
645 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
646 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
647 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
648 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
649 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
650 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
651 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
652 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
653 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
654 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
655 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
656 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
657 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
658 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
659 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
660 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
661 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
662 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
663 | |||
664 | write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
665 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
666 | |||
667 | ret; | ||
668 | ENDPROC(__serpent_dec_blk16) | ||
669 | |||
670 | ENTRY(serpent_ecb_enc_16way) | ||
671 | /* input: | ||
672 | * %rdi: ctx, CTX | ||
673 | * %rsi: dst | ||
674 | * %rdx: src | ||
675 | */ | ||
676 | |||
677 | vzeroupper; | ||
678 | |||
679 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
680 | |||
681 | call __serpent_enc_blk16; | ||
682 | |||
683 | store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
684 | |||
685 | vzeroupper; | ||
686 | |||
687 | ret; | ||
688 | ENDPROC(serpent_ecb_enc_16way) | ||
689 | |||
690 | ENTRY(serpent_ecb_dec_16way) | ||
691 | /* input: | ||
692 | * %rdi: ctx, CTX | ||
693 | * %rsi: dst | ||
694 | * %rdx: src | ||
695 | */ | ||
696 | |||
697 | vzeroupper; | ||
698 | |||
699 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
700 | |||
701 | call __serpent_dec_blk16; | ||
702 | |||
703 | store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
704 | |||
705 | vzeroupper; | ||
706 | |||
707 | ret; | ||
708 | ENDPROC(serpent_ecb_dec_16way) | ||
709 | |||
710 | ENTRY(serpent_cbc_dec_16way) | ||
711 | /* input: | ||
712 | * %rdi: ctx, CTX | ||
713 | * %rsi: dst | ||
714 | * %rdx: src | ||
715 | */ | ||
716 | |||
717 | vzeroupper; | ||
718 | |||
719 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
720 | |||
721 | call __serpent_dec_blk16; | ||
722 | |||
723 | store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2, | ||
724 | RK0); | ||
725 | |||
726 | vzeroupper; | ||
727 | |||
728 | ret; | ||
729 | ENDPROC(serpent_cbc_dec_16way) | ||
730 | |||
731 | ENTRY(serpent_ctr_16way) | ||
732 | /* input: | ||
733 | * %rdi: ctx, CTX | ||
734 | * %rsi: dst (16 blocks) | ||
735 | * %rdx: src (16 blocks) | ||
736 | * %rcx: iv (little endian, 128bit) | ||
737 | */ | ||
738 | |||
739 | vzeroupper; | ||
740 | |||
741 | load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
742 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
743 | tp); | ||
744 | |||
745 | call __serpent_enc_blk16; | ||
746 | |||
747 | store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
748 | |||
749 | vzeroupper; | ||
750 | |||
751 | ret; | ||
752 | ENDPROC(serpent_ctr_16way) | ||
753 | |||
754 | ENTRY(serpent_xts_enc_16way) | ||
755 | /* input: | ||
756 | * %rdi: ctx, CTX | ||
757 | * %rsi: dst (16 blocks) | ||
758 | * %rdx: src (16 blocks) | ||
759 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
760 | */ | ||
761 | |||
762 | vzeroupper; | ||
763 | |||
764 | load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
765 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
766 | .Lxts_gf128mul_and_shl1_mask_0, | ||
767 | .Lxts_gf128mul_and_shl1_mask_1); | ||
768 | |||
769 | call __serpent_enc_blk16; | ||
770 | |||
771 | store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
772 | |||
773 | vzeroupper; | ||
774 | |||
775 | ret; | ||
776 | ENDPROC(serpent_xts_enc_16way) | ||
777 | |||
778 | ENTRY(serpent_xts_dec_16way) | ||
779 | /* input: | ||
780 | * %rdi: ctx, CTX | ||
781 | * %rsi: dst (16 blocks) | ||
782 | * %rdx: src (16 blocks) | ||
783 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
784 | */ | ||
785 | |||
786 | vzeroupper; | ||
787 | |||
788 | load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
789 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
790 | .Lxts_gf128mul_and_shl1_mask_0, | ||
791 | .Lxts_gf128mul_and_shl1_mask_1); | ||
792 | |||
793 | call __serpent_dec_blk16; | ||
794 | |||
795 | store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
796 | |||
797 | vzeroupper; | ||
798 | |||
799 | ret; | ||
800 | ENDPROC(serpent_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c new file mode 100644 index 000000000000..23aabc6c20a5 --- /dev/null +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -0,0 +1,562 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Serpent | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/lrw.h> | ||
20 | #include <crypto/xts.h> | ||
21 | #include <crypto/serpent.h> | ||
22 | #include <asm/xcr.h> | ||
23 | #include <asm/xsave.h> | ||
24 | #include <asm/crypto/serpent-avx.h> | ||
25 | #include <asm/crypto/ablk_helper.h> | ||
26 | #include <asm/crypto/glue_helper.h> | ||
27 | |||
28 | #define SERPENT_AVX2_PARALLEL_BLOCKS 16 | ||
29 | |||
30 | /* 16-way AVX2 parallel cipher functions */ | ||
31 | asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst, | ||
32 | const u8 *src); | ||
33 | asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst, | ||
34 | const u8 *src); | ||
35 | asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); | ||
36 | |||
37 | asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src, | ||
38 | le128 *iv); | ||
39 | asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, | ||
40 | const u8 *src, le128 *iv); | ||
41 | asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | |||
44 | static const struct common_glue_ctx serpent_enc = { | ||
45 | .num_funcs = 3, | ||
46 | .fpu_blocks_limit = 8, | ||
47 | |||
48 | .funcs = { { | ||
49 | .num_blocks = 16, | ||
50 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) } | ||
51 | }, { | ||
52 | .num_blocks = 8, | ||
53 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } | ||
54 | }, { | ||
55 | .num_blocks = 1, | ||
56 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
57 | } } | ||
58 | }; | ||
59 | |||
60 | static const struct common_glue_ctx serpent_ctr = { | ||
61 | .num_funcs = 3, | ||
62 | .fpu_blocks_limit = 8, | ||
63 | |||
64 | .funcs = { { | ||
65 | .num_blocks = 16, | ||
66 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) } | ||
67 | }, { | ||
68 | .num_blocks = 8, | ||
69 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } | ||
70 | }, { | ||
71 | .num_blocks = 1, | ||
72 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } | ||
73 | } } | ||
74 | }; | ||
75 | |||
76 | static const struct common_glue_ctx serpent_enc_xts = { | ||
77 | .num_funcs = 3, | ||
78 | .fpu_blocks_limit = 8, | ||
79 | |||
80 | .funcs = { { | ||
81 | .num_blocks = 16, | ||
82 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) } | ||
83 | }, { | ||
84 | .num_blocks = 8, | ||
85 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } | ||
86 | }, { | ||
87 | .num_blocks = 1, | ||
88 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } | ||
89 | } } | ||
90 | }; | ||
91 | |||
92 | static const struct common_glue_ctx serpent_dec = { | ||
93 | .num_funcs = 3, | ||
94 | .fpu_blocks_limit = 8, | ||
95 | |||
96 | .funcs = { { | ||
97 | .num_blocks = 16, | ||
98 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) } | ||
99 | }, { | ||
100 | .num_blocks = 8, | ||
101 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } | ||
102 | }, { | ||
103 | .num_blocks = 1, | ||
104 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
105 | } } | ||
106 | }; | ||
107 | |||
108 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
109 | .num_funcs = 3, | ||
110 | .fpu_blocks_limit = 8, | ||
111 | |||
112 | .funcs = { { | ||
113 | .num_blocks = 16, | ||
114 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) } | ||
115 | }, { | ||
116 | .num_blocks = 8, | ||
117 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } | ||
118 | }, { | ||
119 | .num_blocks = 1, | ||
120 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
121 | } } | ||
122 | }; | ||
123 | |||
124 | static const struct common_glue_ctx serpent_dec_xts = { | ||
125 | .num_funcs = 3, | ||
126 | .fpu_blocks_limit = 8, | ||
127 | |||
128 | .funcs = { { | ||
129 | .num_blocks = 16, | ||
130 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) } | ||
131 | }, { | ||
132 | .num_blocks = 8, | ||
133 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } | ||
134 | }, { | ||
135 | .num_blocks = 1, | ||
136 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } | ||
137 | } } | ||
138 | }; | ||
139 | |||
140 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
141 | struct scatterlist *src, unsigned int nbytes) | ||
142 | { | ||
143 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
144 | } | ||
145 | |||
146 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
147 | struct scatterlist *src, unsigned int nbytes) | ||
148 | { | ||
149 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
150 | } | ||
151 | |||
152 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
153 | struct scatterlist *src, unsigned int nbytes) | ||
154 | { | ||
155 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
156 | dst, src, nbytes); | ||
157 | } | ||
158 | |||
159 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
160 | struct scatterlist *src, unsigned int nbytes) | ||
161 | { | ||
162 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
163 | nbytes); | ||
164 | } | ||
165 | |||
166 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
170 | } | ||
171 | |||
172 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
173 | { | ||
174 | /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ | ||
175 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); | ||
176 | } | ||
177 | |||
178 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
179 | { | ||
180 | glue_fpu_end(fpu_enabled); | ||
181 | } | ||
182 | |||
183 | struct crypt_priv { | ||
184 | struct serpent_ctx *ctx; | ||
185 | bool fpu_enabled; | ||
186 | }; | ||
187 | |||
188 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
189 | { | ||
190 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
191 | struct crypt_priv *ctx = priv; | ||
192 | int i; | ||
193 | |||
194 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
195 | |||
196 | if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { | ||
197 | serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
198 | srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
199 | nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
200 | } | ||
201 | |||
202 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | ||
203 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); | ||
204 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
205 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
206 | } | ||
207 | |||
208 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
209 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
210 | } | ||
211 | |||
212 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
213 | { | ||
214 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
215 | struct crypt_priv *ctx = priv; | ||
216 | int i; | ||
217 | |||
218 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
219 | |||
220 | if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { | ||
221 | serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
222 | srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
223 | nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
224 | } | ||
225 | |||
226 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | ||
227 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); | ||
228 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
229 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
230 | } | ||
231 | |||
232 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
233 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
234 | } | ||
235 | |||
236 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
237 | struct scatterlist *src, unsigned int nbytes) | ||
238 | { | ||
239 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
240 | be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; | ||
241 | struct crypt_priv crypt_ctx = { | ||
242 | .ctx = &ctx->serpent_ctx, | ||
243 | .fpu_enabled = false, | ||
244 | }; | ||
245 | struct lrw_crypt_req req = { | ||
246 | .tbuf = buf, | ||
247 | .tbuflen = sizeof(buf), | ||
248 | |||
249 | .table_ctx = &ctx->lrw_table, | ||
250 | .crypt_ctx = &crypt_ctx, | ||
251 | .crypt_fn = encrypt_callback, | ||
252 | }; | ||
253 | int ret; | ||
254 | |||
255 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
256 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
257 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
263 | struct scatterlist *src, unsigned int nbytes) | ||
264 | { | ||
265 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
266 | be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; | ||
267 | struct crypt_priv crypt_ctx = { | ||
268 | .ctx = &ctx->serpent_ctx, | ||
269 | .fpu_enabled = false, | ||
270 | }; | ||
271 | struct lrw_crypt_req req = { | ||
272 | .tbuf = buf, | ||
273 | .tbuflen = sizeof(buf), | ||
274 | |||
275 | .table_ctx = &ctx->lrw_table, | ||
276 | .crypt_ctx = &crypt_ctx, | ||
277 | .crypt_fn = decrypt_callback, | ||
278 | }; | ||
279 | int ret; | ||
280 | |||
281 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
282 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
283 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
284 | |||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
289 | struct scatterlist *src, unsigned int nbytes) | ||
290 | { | ||
291 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
292 | |||
293 | return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, | ||
294 | XTS_TWEAK_CAST(__serpent_encrypt), | ||
295 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
296 | } | ||
297 | |||
298 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
299 | struct scatterlist *src, unsigned int nbytes) | ||
300 | { | ||
301 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
302 | |||
303 | return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, | ||
304 | XTS_TWEAK_CAST(__serpent_encrypt), | ||
305 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
306 | } | ||
307 | |||
308 | static struct crypto_alg srp_algs[10] = { { | ||
309 | .cra_name = "__ecb-serpent-avx2", | ||
310 | .cra_driver_name = "__driver-ecb-serpent-avx2", | ||
311 | .cra_priority = 0, | ||
312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
313 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
314 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
315 | .cra_alignmask = 0, | ||
316 | .cra_type = &crypto_blkcipher_type, | ||
317 | .cra_module = THIS_MODULE, | ||
318 | .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list), | ||
319 | .cra_u = { | ||
320 | .blkcipher = { | ||
321 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
322 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
323 | .setkey = serpent_setkey, | ||
324 | .encrypt = ecb_encrypt, | ||
325 | .decrypt = ecb_decrypt, | ||
326 | }, | ||
327 | }, | ||
328 | }, { | ||
329 | .cra_name = "__cbc-serpent-avx2", | ||
330 | .cra_driver_name = "__driver-cbc-serpent-avx2", | ||
331 | .cra_priority = 0, | ||
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
333 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
334 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
335 | .cra_alignmask = 0, | ||
336 | .cra_type = &crypto_blkcipher_type, | ||
337 | .cra_module = THIS_MODULE, | ||
338 | .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list), | ||
339 | .cra_u = { | ||
340 | .blkcipher = { | ||
341 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
342 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
343 | .setkey = serpent_setkey, | ||
344 | .encrypt = cbc_encrypt, | ||
345 | .decrypt = cbc_decrypt, | ||
346 | }, | ||
347 | }, | ||
348 | }, { | ||
349 | .cra_name = "__ctr-serpent-avx2", | ||
350 | .cra_driver_name = "__driver-ctr-serpent-avx2", | ||
351 | .cra_priority = 0, | ||
352 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
353 | .cra_blocksize = 1, | ||
354 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
355 | .cra_alignmask = 0, | ||
356 | .cra_type = &crypto_blkcipher_type, | ||
357 | .cra_module = THIS_MODULE, | ||
358 | .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list), | ||
359 | .cra_u = { | ||
360 | .blkcipher = { | ||
361 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
362 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
363 | .ivsize = SERPENT_BLOCK_SIZE, | ||
364 | .setkey = serpent_setkey, | ||
365 | .encrypt = ctr_crypt, | ||
366 | .decrypt = ctr_crypt, | ||
367 | }, | ||
368 | }, | ||
369 | }, { | ||
370 | .cra_name = "__lrw-serpent-avx2", | ||
371 | .cra_driver_name = "__driver-lrw-serpent-avx2", | ||
372 | .cra_priority = 0, | ||
373 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
374 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
375 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
376 | .cra_alignmask = 0, | ||
377 | .cra_type = &crypto_blkcipher_type, | ||
378 | .cra_module = THIS_MODULE, | ||
379 | .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list), | ||
380 | .cra_exit = lrw_serpent_exit_tfm, | ||
381 | .cra_u = { | ||
382 | .blkcipher = { | ||
383 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
384 | SERPENT_BLOCK_SIZE, | ||
385 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
386 | SERPENT_BLOCK_SIZE, | ||
387 | .ivsize = SERPENT_BLOCK_SIZE, | ||
388 | .setkey = lrw_serpent_setkey, | ||
389 | .encrypt = lrw_encrypt, | ||
390 | .decrypt = lrw_decrypt, | ||
391 | }, | ||
392 | }, | ||
393 | }, { | ||
394 | .cra_name = "__xts-serpent-avx2", | ||
395 | .cra_driver_name = "__driver-xts-serpent-avx2", | ||
396 | .cra_priority = 0, | ||
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
398 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
399 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
400 | .cra_alignmask = 0, | ||
401 | .cra_type = &crypto_blkcipher_type, | ||
402 | .cra_module = THIS_MODULE, | ||
403 | .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list), | ||
404 | .cra_u = { | ||
405 | .blkcipher = { | ||
406 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
407 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
408 | .ivsize = SERPENT_BLOCK_SIZE, | ||
409 | .setkey = xts_serpent_setkey, | ||
410 | .encrypt = xts_encrypt, | ||
411 | .decrypt = xts_decrypt, | ||
412 | }, | ||
413 | }, | ||
414 | }, { | ||
415 | .cra_name = "ecb(serpent)", | ||
416 | .cra_driver_name = "ecb-serpent-avx2", | ||
417 | .cra_priority = 600, | ||
418 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
419 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
420 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
421 | .cra_alignmask = 0, | ||
422 | .cra_type = &crypto_ablkcipher_type, | ||
423 | .cra_module = THIS_MODULE, | ||
424 | .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list), | ||
425 | .cra_init = ablk_init, | ||
426 | .cra_exit = ablk_exit, | ||
427 | .cra_u = { | ||
428 | .ablkcipher = { | ||
429 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
430 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
431 | .setkey = ablk_set_key, | ||
432 | .encrypt = ablk_encrypt, | ||
433 | .decrypt = ablk_decrypt, | ||
434 | }, | ||
435 | }, | ||
436 | }, { | ||
437 | .cra_name = "cbc(serpent)", | ||
438 | .cra_driver_name = "cbc-serpent-avx2", | ||
439 | .cra_priority = 600, | ||
440 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
441 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
442 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
443 | .cra_alignmask = 0, | ||
444 | .cra_type = &crypto_ablkcipher_type, | ||
445 | .cra_module = THIS_MODULE, | ||
446 | .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list), | ||
447 | .cra_init = ablk_init, | ||
448 | .cra_exit = ablk_exit, | ||
449 | .cra_u = { | ||
450 | .ablkcipher = { | ||
451 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
452 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
453 | .ivsize = SERPENT_BLOCK_SIZE, | ||
454 | .setkey = ablk_set_key, | ||
455 | .encrypt = __ablk_encrypt, | ||
456 | .decrypt = ablk_decrypt, | ||
457 | }, | ||
458 | }, | ||
459 | }, { | ||
460 | .cra_name = "ctr(serpent)", | ||
461 | .cra_driver_name = "ctr-serpent-avx2", | ||
462 | .cra_priority = 600, | ||
463 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
464 | .cra_blocksize = 1, | ||
465 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
466 | .cra_alignmask = 0, | ||
467 | .cra_type = &crypto_ablkcipher_type, | ||
468 | .cra_module = THIS_MODULE, | ||
469 | .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list), | ||
470 | .cra_init = ablk_init, | ||
471 | .cra_exit = ablk_exit, | ||
472 | .cra_u = { | ||
473 | .ablkcipher = { | ||
474 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
475 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
476 | .ivsize = SERPENT_BLOCK_SIZE, | ||
477 | .setkey = ablk_set_key, | ||
478 | .encrypt = ablk_encrypt, | ||
479 | .decrypt = ablk_encrypt, | ||
480 | .geniv = "chainiv", | ||
481 | }, | ||
482 | }, | ||
483 | }, { | ||
484 | .cra_name = "lrw(serpent)", | ||
485 | .cra_driver_name = "lrw-serpent-avx2", | ||
486 | .cra_priority = 600, | ||
487 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
488 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
489 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
490 | .cra_alignmask = 0, | ||
491 | .cra_type = &crypto_ablkcipher_type, | ||
492 | .cra_module = THIS_MODULE, | ||
493 | .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list), | ||
494 | .cra_init = ablk_init, | ||
495 | .cra_exit = ablk_exit, | ||
496 | .cra_u = { | ||
497 | .ablkcipher = { | ||
498 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
499 | SERPENT_BLOCK_SIZE, | ||
500 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
501 | SERPENT_BLOCK_SIZE, | ||
502 | .ivsize = SERPENT_BLOCK_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_decrypt, | ||
506 | }, | ||
507 | }, | ||
508 | }, { | ||
509 | .cra_name = "xts(serpent)", | ||
510 | .cra_driver_name = "xts-serpent-avx2", | ||
511 | .cra_priority = 600, | ||
512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
513 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
515 | .cra_alignmask = 0, | ||
516 | .cra_type = &crypto_ablkcipher_type, | ||
517 | .cra_module = THIS_MODULE, | ||
518 | .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list), | ||
519 | .cra_init = ablk_init, | ||
520 | .cra_exit = ablk_exit, | ||
521 | .cra_u = { | ||
522 | .ablkcipher = { | ||
523 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
524 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
525 | .ivsize = SERPENT_BLOCK_SIZE, | ||
526 | .setkey = ablk_set_key, | ||
527 | .encrypt = ablk_encrypt, | ||
528 | .decrypt = ablk_decrypt, | ||
529 | }, | ||
530 | }, | ||
531 | } }; | ||
532 | |||
533 | static int __init init(void) | ||
534 | { | ||
535 | u64 xcr0; | ||
536 | |||
537 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
538 | pr_info("AVX2 instructions are not detected.\n"); | ||
539 | return -ENODEV; | ||
540 | } | ||
541 | |||
542 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
543 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
544 | pr_info("AVX detected but unusable.\n"); | ||
545 | return -ENODEV; | ||
546 | } | ||
547 | |||
548 | return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); | ||
549 | } | ||
550 | |||
551 | static void __exit fini(void) | ||
552 | { | ||
553 | crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); | ||
554 | } | ||
555 | |||
556 | module_init(init); | ||
557 | module_exit(fini); | ||
558 | |||
559 | MODULE_LICENSE("GPL"); | ||
560 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); | ||
561 | MODULE_ALIAS("serpent"); | ||
562 | MODULE_ALIAS("serpent-asm"); | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 52abaaf28e7f..9ae83cf8d21e 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -4,8 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Glue code based on serpent_sse2_glue.c by: | 7 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | 8 | * |
10 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -42,7 +41,32 @@ | |||
42 | #include <asm/crypto/ablk_helper.h> | 41 | #include <asm/crypto/ablk_helper.h> |
43 | #include <asm/crypto/glue_helper.h> | 42 | #include <asm/crypto/glue_helper.h> |
44 | 43 | ||
45 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 44 | /* 8-way parallel cipher functions */ |
45 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
46 | const u8 *src); | ||
47 | EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); | ||
48 | |||
49 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
50 | const u8 *src); | ||
51 | EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); | ||
52 | |||
53 | asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
54 | const u8 *src); | ||
55 | EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); | ||
56 | |||
57 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
58 | const u8 *src, le128 *iv); | ||
59 | EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); | ||
60 | |||
61 | asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
62 | const u8 *src, le128 *iv); | ||
63 | EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); | ||
64 | |||
65 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
66 | const u8 *src, le128 *iv); | ||
67 | EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); | ||
68 | |||
69 | void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
46 | { | 70 | { |
47 | be128 ctrblk; | 71 | be128 ctrblk; |
48 | 72 | ||
@@ -52,6 +76,22 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | |||
52 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | 76 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
53 | u128_xor(dst, src, (u128 *)&ctrblk); | 77 | u128_xor(dst, src, (u128 *)&ctrblk); |
54 | } | 78 | } |
79 | EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); | ||
80 | |||
81 | void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
82 | { | ||
83 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
84 | GLUE_FUNC_CAST(__serpent_encrypt)); | ||
85 | } | ||
86 | EXPORT_SYMBOL_GPL(serpent_xts_enc); | ||
87 | |||
88 | void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
89 | { | ||
90 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
91 | GLUE_FUNC_CAST(__serpent_decrypt)); | ||
92 | } | ||
93 | EXPORT_SYMBOL_GPL(serpent_xts_dec); | ||
94 | |||
55 | 95 | ||
56 | static const struct common_glue_ctx serpent_enc = { | 96 | static const struct common_glue_ctx serpent_enc = { |
57 | .num_funcs = 2, | 97 | .num_funcs = 2, |
@@ -75,7 +115,20 @@ static const struct common_glue_ctx serpent_ctr = { | |||
75 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } | 115 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } |
76 | }, { | 116 | }, { |
77 | .num_blocks = 1, | 117 | .num_blocks = 1, |
78 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | 118 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } |
119 | } } | ||
120 | }; | ||
121 | |||
122 | static const struct common_glue_ctx serpent_enc_xts = { | ||
123 | .num_funcs = 2, | ||
124 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
125 | |||
126 | .funcs = { { | ||
127 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
128 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } | ||
129 | }, { | ||
130 | .num_blocks = 1, | ||
131 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } | ||
79 | } } | 132 | } } |
80 | }; | 133 | }; |
81 | 134 | ||
@@ -105,6 +158,19 @@ static const struct common_glue_ctx serpent_dec_cbc = { | |||
105 | } } | 158 | } } |
106 | }; | 159 | }; |
107 | 160 | ||
161 | static const struct common_glue_ctx serpent_dec_xts = { | ||
162 | .num_funcs = 2, | ||
163 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
164 | |||
165 | .funcs = { { | ||
166 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
167 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } | ||
168 | }, { | ||
169 | .num_blocks = 1, | ||
170 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } | ||
171 | } } | ||
172 | }; | ||
173 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 174 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
109 | struct scatterlist *src, unsigned int nbytes) | 175 | struct scatterlist *src, unsigned int nbytes) |
110 | { | 176 | { |
@@ -187,13 +253,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
187 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | 253 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); |
188 | } | 254 | } |
189 | 255 | ||
190 | struct serpent_lrw_ctx { | 256 | int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, |
191 | struct lrw_table_ctx lrw_table; | 257 | unsigned int keylen) |
192 | struct serpent_ctx serpent_ctx; | ||
193 | }; | ||
194 | |||
195 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
196 | unsigned int keylen) | ||
197 | { | 258 | { |
198 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 259 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
199 | int err; | 260 | int err; |
@@ -206,6 +267,7 @@ static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
206 | return lrw_init_table(&ctx->lrw_table, key + keylen - | 267 | return lrw_init_table(&ctx->lrw_table, key + keylen - |
207 | SERPENT_BLOCK_SIZE); | 268 | SERPENT_BLOCK_SIZE); |
208 | } | 269 | } |
270 | EXPORT_SYMBOL_GPL(lrw_serpent_setkey); | ||
209 | 271 | ||
210 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 272 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
211 | struct scatterlist *src, unsigned int nbytes) | 273 | struct scatterlist *src, unsigned int nbytes) |
@@ -259,20 +321,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
259 | return ret; | 321 | return ret; |
260 | } | 322 | } |
261 | 323 | ||
262 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 324 | void lrw_serpent_exit_tfm(struct crypto_tfm *tfm) |
263 | { | 325 | { |
264 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 326 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
265 | 327 | ||
266 | lrw_free_table(&ctx->lrw_table); | 328 | lrw_free_table(&ctx->lrw_table); |
267 | } | 329 | } |
330 | EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm); | ||
268 | 331 | ||
269 | struct serpent_xts_ctx { | 332 | int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, |
270 | struct serpent_ctx tweak_ctx; | 333 | unsigned int keylen) |
271 | struct serpent_ctx crypt_ctx; | ||
272 | }; | ||
273 | |||
274 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
275 | unsigned int keylen) | ||
276 | { | 334 | { |
277 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 335 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
278 | u32 *flags = &tfm->crt_flags; | 336 | u32 *flags = &tfm->crt_flags; |
@@ -294,59 +352,26 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
294 | /* second half of xts-key is for tweak */ | 352 | /* second half of xts-key is for tweak */ |
295 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | 353 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); |
296 | } | 354 | } |
355 | EXPORT_SYMBOL_GPL(xts_serpent_setkey); | ||
297 | 356 | ||
298 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 357 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
299 | struct scatterlist *src, unsigned int nbytes) | 358 | struct scatterlist *src, unsigned int nbytes) |
300 | { | 359 | { |
301 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 360 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
302 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
303 | struct crypt_priv crypt_ctx = { | ||
304 | .ctx = &ctx->crypt_ctx, | ||
305 | .fpu_enabled = false, | ||
306 | }; | ||
307 | struct xts_crypt_req req = { | ||
308 | .tbuf = buf, | ||
309 | .tbuflen = sizeof(buf), | ||
310 | |||
311 | .tweak_ctx = &ctx->tweak_ctx, | ||
312 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
313 | .crypt_ctx = &crypt_ctx, | ||
314 | .crypt_fn = encrypt_callback, | ||
315 | }; | ||
316 | int ret; | ||
317 | |||
318 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
319 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
320 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
321 | 361 | ||
322 | return ret; | 362 | return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, |
363 | XTS_TWEAK_CAST(__serpent_encrypt), | ||
364 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
323 | } | 365 | } |
324 | 366 | ||
325 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 367 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
326 | struct scatterlist *src, unsigned int nbytes) | 368 | struct scatterlist *src, unsigned int nbytes) |
327 | { | 369 | { |
328 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 370 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
329 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
330 | struct crypt_priv crypt_ctx = { | ||
331 | .ctx = &ctx->crypt_ctx, | ||
332 | .fpu_enabled = false, | ||
333 | }; | ||
334 | struct xts_crypt_req req = { | ||
335 | .tbuf = buf, | ||
336 | .tbuflen = sizeof(buf), | ||
337 | |||
338 | .tweak_ctx = &ctx->tweak_ctx, | ||
339 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
340 | .crypt_ctx = &crypt_ctx, | ||
341 | .crypt_fn = decrypt_callback, | ||
342 | }; | ||
343 | int ret; | ||
344 | 371 | ||
345 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 372 | return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, |
346 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 373 | XTS_TWEAK_CAST(__serpent_encrypt), |
347 | serpent_fpu_end(crypt_ctx.fpu_enabled); | 374 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
348 | |||
349 | return ret; | ||
350 | } | 375 | } |
351 | 376 | ||
352 | static struct crypto_alg serpent_algs[10] = { { | 377 | static struct crypto_alg serpent_algs[10] = { { |
@@ -417,7 +442,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
417 | .cra_alignmask = 0, | 442 | .cra_alignmask = 0, |
418 | .cra_type = &crypto_blkcipher_type, | 443 | .cra_type = &crypto_blkcipher_type, |
419 | .cra_module = THIS_MODULE, | 444 | .cra_module = THIS_MODULE, |
420 | .cra_exit = lrw_exit_tfm, | 445 | .cra_exit = lrw_serpent_exit_tfm, |
421 | .cra_u = { | 446 | .cra_u = { |
422 | .blkcipher = { | 447 | .blkcipher = { |
423 | .min_keysize = SERPENT_MIN_KEY_SIZE + | 448 | .min_keysize = SERPENT_MIN_KEY_SIZE + |
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S new file mode 100644 index 000000000000..56610c4bf31b --- /dev/null +++ b/arch/x86/crypto/sha256-avx-asm.S | |||
@@ -0,0 +1,496 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-256 with AVX1 instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | # | ||
11 | # This software is available to you under a choice of one of two | ||
12 | # licenses. You may choose to be licensed under the terms of the GNU | ||
13 | # General Public License (GPL) Version 2, available from the file | ||
14 | # COPYING in the main directory of this source tree, or the | ||
15 | # OpenIB.org BSD license below: | ||
16 | # | ||
17 | # Redistribution and use in source and binary forms, with or | ||
18 | # without modification, are permitted provided that the following | ||
19 | # conditions are met: | ||
20 | # | ||
21 | # - Redistributions of source code must retain the above | ||
22 | # copyright notice, this list of conditions and the following | ||
23 | # disclaimer. | ||
24 | # | ||
25 | # - Redistributions in binary form must reproduce the above | ||
26 | # copyright notice, this list of conditions and the following | ||
27 | # disclaimer in the documentation and/or other materials | ||
28 | # provided with the distribution. | ||
29 | # | ||
30 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
31 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
32 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
33 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
34 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
35 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
36 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
37 | # SOFTWARE. | ||
38 | ######################################################################## | ||
39 | # | ||
40 | # This code is described in an Intel White-Paper: | ||
41 | # "Fast SHA-256 Implementations on Intel Architecture Processors" | ||
42 | # | ||
43 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
44 | # and search for that title. | ||
45 | # | ||
46 | ######################################################################## | ||
47 | # This code schedules 1 block at a time, with 4 lanes per block | ||
48 | ######################################################################## | ||
49 | |||
50 | #ifdef CONFIG_AS_AVX | ||
51 | #include <linux/linkage.h> | ||
52 | |||
53 | ## assume buffers not aligned | ||
54 | #define VMOVDQ vmovdqu | ||
55 | |||
56 | ################################ Define Macros | ||
57 | |||
58 | # addm [mem], reg | ||
59 | # Add reg to mem using reg-mem add and store | ||
60 | .macro addm p1 p2 | ||
61 | add \p1, \p2 | ||
62 | mov \p2, \p1 | ||
63 | .endm | ||
64 | |||
65 | |||
66 | .macro MY_ROR p1 p2 | ||
67 | shld $(32-(\p1)), \p2, \p2 | ||
68 | .endm | ||
69 | |||
70 | ################################ | ||
71 | |||
72 | # COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask | ||
73 | # Load xmm with mem and byte swap each dword | ||
74 | .macro COPY_XMM_AND_BSWAP p1 p2 p3 | ||
75 | VMOVDQ \p2, \p1 | ||
76 | vpshufb \p3, \p1, \p1 | ||
77 | .endm | ||
78 | |||
79 | ################################ | ||
80 | |||
81 | X0 = %xmm4 | ||
82 | X1 = %xmm5 | ||
83 | X2 = %xmm6 | ||
84 | X3 = %xmm7 | ||
85 | |||
86 | XTMP0 = %xmm0 | ||
87 | XTMP1 = %xmm1 | ||
88 | XTMP2 = %xmm2 | ||
89 | XTMP3 = %xmm3 | ||
90 | XTMP4 = %xmm8 | ||
91 | XFER = %xmm9 | ||
92 | XTMP5 = %xmm11 | ||
93 | |||
94 | SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA | ||
95 | SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00 | ||
96 | BYTE_FLIP_MASK = %xmm13 | ||
97 | |||
98 | NUM_BLKS = %rdx # 3rd arg | ||
99 | CTX = %rsi # 2nd arg | ||
100 | INP = %rdi # 1st arg | ||
101 | |||
102 | SRND = %rdi # clobbers INP | ||
103 | c = %ecx | ||
104 | d = %r8d | ||
105 | e = %edx | ||
106 | TBL = %rbp | ||
107 | a = %eax | ||
108 | b = %ebx | ||
109 | |||
110 | f = %r9d | ||
111 | g = %r10d | ||
112 | h = %r11d | ||
113 | |||
114 | y0 = %r13d | ||
115 | y1 = %r14d | ||
116 | y2 = %r15d | ||
117 | |||
118 | |||
119 | _INP_END_SIZE = 8 | ||
120 | _INP_SIZE = 8 | ||
121 | _XFER_SIZE = 8 | ||
122 | _XMM_SAVE_SIZE = 0 | ||
123 | |||
124 | _INP_END = 0 | ||
125 | _INP = _INP_END + _INP_END_SIZE | ||
126 | _XFER = _INP + _INP_SIZE | ||
127 | _XMM_SAVE = _XFER + _XFER_SIZE | ||
128 | STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE | ||
129 | |||
130 | # rotate_Xs | ||
131 | # Rotate values of symbols X0...X3 | ||
132 | .macro rotate_Xs | ||
133 | X_ = X0 | ||
134 | X0 = X1 | ||
135 | X1 = X2 | ||
136 | X2 = X3 | ||
137 | X3 = X_ | ||
138 | .endm | ||
139 | |||
140 | # ROTATE_ARGS | ||
141 | # Rotate values of symbols a...h | ||
142 | .macro ROTATE_ARGS | ||
143 | TMP_ = h | ||
144 | h = g | ||
145 | g = f | ||
146 | f = e | ||
147 | e = d | ||
148 | d = c | ||
149 | c = b | ||
150 | b = a | ||
151 | a = TMP_ | ||
152 | .endm | ||
153 | |||
154 | .macro FOUR_ROUNDS_AND_SCHED | ||
155 | ## compute s0 four at a time and s1 two at a time | ||
156 | ## compute W[-16] + W[-7] 4 at a time | ||
157 | |||
158 | mov e, y0 # y0 = e | ||
159 | MY_ROR (25-11), y0 # y0 = e >> (25-11) | ||
160 | mov a, y1 # y1 = a | ||
161 | vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7] | ||
162 | MY_ROR (22-13), y1 # y1 = a >> (22-13) | ||
163 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
164 | mov f, y2 # y2 = f | ||
165 | MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
166 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
167 | xor g, y2 # y2 = f^g | ||
168 | vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16] | ||
169 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
170 | and e, y2 # y2 = (f^g)&e | ||
171 | MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
172 | ## compute s0 | ||
173 | vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15] | ||
174 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
175 | MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
176 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
177 | MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
178 | add y0, y2 # y2 = S1 + CH | ||
179 | add _XFER(%rsp), y2 # y2 = k + w + S1 + CH | ||
180 | mov a, y0 # y0 = a | ||
181 | add y2, h # h = h + S1 + CH + k + w | ||
182 | mov a, y2 # y2 = a | ||
183 | vpsrld $7, XTMP1, XTMP2 | ||
184 | or c, y0 # y0 = a|c | ||
185 | add h, d # d = d + h + S1 + CH + k + w | ||
186 | and c, y2 # y2 = a&c | ||
187 | vpslld $(32-7), XTMP1, XTMP3 | ||
188 | and b, y0 # y0 = (a|c)&b | ||
189 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
190 | vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 | ||
191 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
192 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
193 | ROTATE_ARGS | ||
194 | mov e, y0 # y0 = e | ||
195 | mov a, y1 # y1 = a | ||
196 | MY_ROR (25-11), y0 # y0 = e >> (25-11) | ||
197 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
198 | mov f, y2 # y2 = f | ||
199 | MY_ROR (22-13), y1 # y1 = a >> (22-13) | ||
200 | vpsrld $18, XTMP1, XTMP2 # | ||
201 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
202 | MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
203 | xor g, y2 # y2 = f^g | ||
204 | vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3 | ||
205 | MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
206 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
207 | and e, y2 # y2 = (f^g)&e | ||
208 | MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
209 | vpslld $(32-18), XTMP1, XTMP1 | ||
210 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
211 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
212 | vpxor XTMP1, XTMP3, XTMP3 # | ||
213 | add y0, y2 # y2 = S1 + CH | ||
214 | add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
215 | MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
216 | vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR | ||
217 | mov a, y0 # y0 = a | ||
218 | add y2, h # h = h + S1 + CH + k + w | ||
219 | mov a, y2 # y2 = a | ||
220 | vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0 | ||
221 | or c, y0 # y0 = a|c | ||
222 | add h, d # d = d + h + S1 + CH + k + w | ||
223 | and c, y2 # y2 = a&c | ||
224 | ## compute low s1 | ||
225 | vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} | ||
226 | and b, y0 # y0 = (a|c)&b | ||
227 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
228 | vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 | ||
229 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
230 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
231 | ROTATE_ARGS | ||
232 | mov e, y0 # y0 = e | ||
233 | mov a, y1 # y1 = a | ||
234 | MY_ROR (25-11), y0 # y0 = e >> (25-11) | ||
235 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
236 | MY_ROR (22-13), y1 # y1 = a >> (22-13) | ||
237 | mov f, y2 # y2 = f | ||
238 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
239 | MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
240 | vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} | ||
241 | xor g, y2 # y2 = f^g | ||
242 | vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA} | ||
243 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
244 | and e, y2 # y2 = (f^g)&e | ||
245 | vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA} | ||
246 | MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
247 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
248 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
249 | MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
250 | vpxor XTMP3, XTMP2, XTMP2 # | ||
251 | add y0, y2 # y2 = S1 + CH | ||
252 | MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
253 | add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
254 | vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA} | ||
255 | mov a, y0 # y0 = a | ||
256 | add y2, h # h = h + S1 + CH + k + w | ||
257 | mov a, y2 # y2 = a | ||
258 | vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA} | ||
259 | or c, y0 # y0 = a|c | ||
260 | add h, d # d = d + h + S1 + CH + k + w | ||
261 | and c, y2 # y2 = a&c | ||
262 | vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} | ||
263 | and b, y0 # y0 = (a|c)&b | ||
264 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
265 | ## compute high s1 | ||
266 | vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC} | ||
267 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
268 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
269 | ROTATE_ARGS | ||
270 | mov e, y0 # y0 = e | ||
271 | MY_ROR (25-11), y0 # y0 = e >> (25-11) | ||
272 | mov a, y1 # y1 = a | ||
273 | MY_ROR (22-13), y1 # y1 = a >> (22-13) | ||
274 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
275 | mov f, y2 # y2 = f | ||
276 | MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
277 | vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC} | ||
278 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
279 | xor g, y2 # y2 = f^g | ||
280 | vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC} | ||
281 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
282 | and e, y2 # y2 = (f^g)&e | ||
283 | MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
284 | vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC} | ||
285 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
286 | MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
287 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
288 | vpxor XTMP3, XTMP2, XTMP2 | ||
289 | MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
290 | add y0, y2 # y2 = S1 + CH | ||
291 | add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
292 | vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC} | ||
293 | mov a, y0 # y0 = a | ||
294 | add y2, h # h = h + S1 + CH + k + w | ||
295 | mov a, y2 # y2 = a | ||
296 | vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00} | ||
297 | or c, y0 # y0 = a|c | ||
298 | add h, d # d = d + h + S1 + CH + k + w | ||
299 | and c, y2 # y2 = a&c | ||
300 | vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]} | ||
301 | and b, y0 # y0 = (a|c)&b | ||
302 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
303 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
304 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
305 | ROTATE_ARGS | ||
306 | rotate_Xs | ||
307 | .endm | ||
308 | |||
309 | ## input is [rsp + _XFER + %1 * 4] | ||
310 | .macro DO_ROUND round | ||
311 | mov e, y0 # y0 = e | ||
312 | MY_ROR (25-11), y0 # y0 = e >> (25-11) | ||
313 | mov a, y1 # y1 = a | ||
314 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
315 | MY_ROR (22-13), y1 # y1 = a >> (22-13) | ||
316 | mov f, y2 # y2 = f | ||
317 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
318 | MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
319 | xor g, y2 # y2 = f^g | ||
320 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
321 | MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
322 | and e, y2 # y2 = (f^g)&e | ||
323 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
324 | MY_ROR 6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
325 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
326 | add y0, y2 # y2 = S1 + CH | ||
327 | MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
328 | offset = \round * 4 + _XFER # | ||
329 | add offset(%rsp), y2 # y2 = k + w + S1 + CH | ||
330 | mov a, y0 # y0 = a | ||
331 | add y2, h # h = h + S1 + CH + k + w | ||
332 | mov a, y2 # y2 = a | ||
333 | or c, y0 # y0 = a|c | ||
334 | add h, d # d = d + h + S1 + CH + k + w | ||
335 | and c, y2 # y2 = a&c | ||
336 | and b, y0 # y0 = (a|c)&b | ||
337 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
338 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
339 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
340 | ROTATE_ARGS | ||
341 | .endm | ||
342 | |||
343 | ######################################################################## | ||
344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) | ||
345 | ## arg 1 : pointer to input data | ||
346 | ## arg 2 : pointer to digest | ||
347 | ## arg 3 : Num blocks | ||
348 | ######################################################################## | ||
349 | .text | ||
350 | ENTRY(sha256_transform_avx) | ||
351 | .align 32 | ||
352 | pushq %rbx | ||
353 | pushq %rbp | ||
354 | pushq %r13 | ||
355 | pushq %r14 | ||
356 | pushq %r15 | ||
357 | pushq %r12 | ||
358 | |||
359 | mov %rsp, %r12 | ||
360 | subq $STACK_SIZE, %rsp # allocate stack space | ||
361 | and $~15, %rsp # align stack pointer | ||
362 | |||
363 | shl $6, NUM_BLKS # convert to bytes | ||
364 | jz done_hash | ||
365 | add INP, NUM_BLKS # pointer to end of data | ||
366 | mov NUM_BLKS, _INP_END(%rsp) | ||
367 | |||
368 | ## load initial digest | ||
369 | mov 4*0(CTX), a | ||
370 | mov 4*1(CTX), b | ||
371 | mov 4*2(CTX), c | ||
372 | mov 4*3(CTX), d | ||
373 | mov 4*4(CTX), e | ||
374 | mov 4*5(CTX), f | ||
375 | mov 4*6(CTX), g | ||
376 | mov 4*7(CTX), h | ||
377 | |||
378 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | ||
379 | vmovdqa _SHUF_00BA(%rip), SHUF_00BA | ||
380 | vmovdqa _SHUF_DC00(%rip), SHUF_DC00 | ||
381 | loop0: | ||
382 | lea K256(%rip), TBL | ||
383 | |||
384 | ## byte swap first 16 dwords | ||
385 | COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK | ||
386 | COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK | ||
387 | COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK | ||
388 | COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK | ||
389 | |||
390 | mov INP, _INP(%rsp) | ||
391 | |||
392 | ## schedule 48 input dwords, by doing 3 rounds of 16 each | ||
393 | mov $3, SRND | ||
394 | .align 16 | ||
395 | loop1: | ||
396 | vpaddd (TBL), X0, XFER | ||
397 | vmovdqa XFER, _XFER(%rsp) | ||
398 | FOUR_ROUNDS_AND_SCHED | ||
399 | |||
400 | vpaddd 1*16(TBL), X0, XFER | ||
401 | vmovdqa XFER, _XFER(%rsp) | ||
402 | FOUR_ROUNDS_AND_SCHED | ||
403 | |||
404 | vpaddd 2*16(TBL), X0, XFER | ||
405 | vmovdqa XFER, _XFER(%rsp) | ||
406 | FOUR_ROUNDS_AND_SCHED | ||
407 | |||
408 | vpaddd 3*16(TBL), X0, XFER | ||
409 | vmovdqa XFER, _XFER(%rsp) | ||
410 | add $4*16, TBL | ||
411 | FOUR_ROUNDS_AND_SCHED | ||
412 | |||
413 | sub $1, SRND | ||
414 | jne loop1 | ||
415 | |||
416 | mov $2, SRND | ||
417 | loop2: | ||
418 | vpaddd (TBL), X0, XFER | ||
419 | vmovdqa XFER, _XFER(%rsp) | ||
420 | DO_ROUND 0 | ||
421 | DO_ROUND 1 | ||
422 | DO_ROUND 2 | ||
423 | DO_ROUND 3 | ||
424 | |||
425 | vpaddd 1*16(TBL), X1, XFER | ||
426 | vmovdqa XFER, _XFER(%rsp) | ||
427 | add $2*16, TBL | ||
428 | DO_ROUND 0 | ||
429 | DO_ROUND 1 | ||
430 | DO_ROUND 2 | ||
431 | DO_ROUND 3 | ||
432 | |||
433 | vmovdqa X2, X0 | ||
434 | vmovdqa X3, X1 | ||
435 | |||
436 | sub $1, SRND | ||
437 | jne loop2 | ||
438 | |||
439 | addm (4*0)(CTX),a | ||
440 | addm (4*1)(CTX),b | ||
441 | addm (4*2)(CTX),c | ||
442 | addm (4*3)(CTX),d | ||
443 | addm (4*4)(CTX),e | ||
444 | addm (4*5)(CTX),f | ||
445 | addm (4*6)(CTX),g | ||
446 | addm (4*7)(CTX),h | ||
447 | |||
448 | mov _INP(%rsp), INP | ||
449 | add $64, INP | ||
450 | cmp _INP_END(%rsp), INP | ||
451 | jne loop0 | ||
452 | |||
453 | done_hash: | ||
454 | |||
455 | mov %r12, %rsp | ||
456 | |||
457 | popq %r12 | ||
458 | popq %r15 | ||
459 | popq %r14 | ||
460 | popq %r13 | ||
461 | popq %rbp | ||
462 | popq %rbx | ||
463 | ret | ||
464 | ENDPROC(sha256_transform_avx) | ||
465 | |||
466 | .data | ||
467 | .align 64 | ||
468 | K256: | ||
469 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
470 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
471 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
472 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
473 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
474 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
475 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
476 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
477 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
478 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
479 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
480 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
481 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
482 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
483 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
484 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
485 | |||
486 | PSHUFFLE_BYTE_FLIP_MASK: | ||
487 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
488 | |||
489 | # shuffle xBxA -> 00BA | ||
490 | _SHUF_00BA: | ||
491 | .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 | ||
492 | |||
493 | # shuffle xDxC -> DC00 | ||
494 | _SHUF_DC00: | ||
495 | .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF | ||
496 | #endif | ||
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S new file mode 100644 index 000000000000..9e86944c539d --- /dev/null +++ b/arch/x86/crypto/sha256-avx2-asm.S | |||
@@ -0,0 +1,772 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-256 with AVX2 instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | # | ||
11 | # This software is available to you under a choice of one of two | ||
12 | # licenses. You may choose to be licensed under the terms of the GNU | ||
13 | # General Public License (GPL) Version 2, available from the file | ||
14 | # COPYING in the main directory of this source tree, or the | ||
15 | # OpenIB.org BSD license below: | ||
16 | # | ||
17 | # Redistribution and use in source and binary forms, with or | ||
18 | # without modification, are permitted provided that the following | ||
19 | # conditions are met: | ||
20 | # | ||
21 | # - Redistributions of source code must retain the above | ||
22 | # copyright notice, this list of conditions and the following | ||
23 | # disclaimer. | ||
24 | # | ||
25 | # - Redistributions in binary form must reproduce the above | ||
26 | # copyright notice, this list of conditions and the following | ||
27 | # disclaimer in the documentation and/or other materials | ||
28 | # provided with the distribution. | ||
29 | # | ||
30 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
31 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
32 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
33 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
34 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
35 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
36 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
37 | # SOFTWARE. | ||
38 | # | ||
39 | ######################################################################## | ||
40 | # | ||
41 | # This code is described in an Intel White-Paper: | ||
42 | # "Fast SHA-256 Implementations on Intel Architecture Processors" | ||
43 | # | ||
44 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
45 | # and search for that title. | ||
46 | # | ||
47 | ######################################################################## | ||
48 | # This code schedules 2 blocks at a time, with 4 lanes per block | ||
49 | ######################################################################## | ||
50 | |||
51 | #ifdef CONFIG_AS_AVX2 | ||
52 | #include <linux/linkage.h> | ||
53 | |||
54 | ## assume buffers not aligned | ||
55 | #define VMOVDQ vmovdqu | ||
56 | |||
57 | ################################ Define Macros | ||
58 | |||
59 | # addm [mem], reg | ||
60 | # Add reg to mem using reg-mem add and store | ||
61 | .macro addm p1 p2 | ||
62 | add \p1, \p2 | ||
63 | mov \p2, \p1 | ||
64 | .endm | ||
65 | |||
66 | ################################ | ||
67 | |||
68 | X0 = %ymm4 | ||
69 | X1 = %ymm5 | ||
70 | X2 = %ymm6 | ||
71 | X3 = %ymm7 | ||
72 | |||
73 | # XMM versions of above | ||
74 | XWORD0 = %xmm4 | ||
75 | XWORD1 = %xmm5 | ||
76 | XWORD2 = %xmm6 | ||
77 | XWORD3 = %xmm7 | ||
78 | |||
79 | XTMP0 = %ymm0 | ||
80 | XTMP1 = %ymm1 | ||
81 | XTMP2 = %ymm2 | ||
82 | XTMP3 = %ymm3 | ||
83 | XTMP4 = %ymm8 | ||
84 | XFER = %ymm9 | ||
85 | XTMP5 = %ymm11 | ||
86 | |||
87 | SHUF_00BA = %ymm10 # shuffle xBxA -> 00BA | ||
88 | SHUF_DC00 = %ymm12 # shuffle xDxC -> DC00 | ||
89 | BYTE_FLIP_MASK = %ymm13 | ||
90 | |||
91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK | ||
92 | |||
93 | NUM_BLKS = %rdx # 3rd arg | ||
94 | CTX = %rsi # 2nd arg | ||
95 | INP = %rdi # 1st arg | ||
96 | c = %ecx | ||
97 | d = %r8d | ||
98 | e = %edx # clobbers NUM_BLKS | ||
99 | y3 = %edi # clobbers INP | ||
100 | |||
101 | |||
102 | TBL = %rbp | ||
103 | SRND = CTX # SRND is same register as CTX | ||
104 | |||
105 | a = %eax | ||
106 | b = %ebx | ||
107 | f = %r9d | ||
108 | g = %r10d | ||
109 | h = %r11d | ||
110 | old_h = %r11d | ||
111 | |||
112 | T1 = %r12d | ||
113 | y0 = %r13d | ||
114 | y1 = %r14d | ||
115 | y2 = %r15d | ||
116 | |||
117 | |||
118 | _XFER_SIZE = 2*64*4 # 2 blocks, 64 rounds, 4 bytes/round | ||
119 | _XMM_SAVE_SIZE = 0 | ||
120 | _INP_END_SIZE = 8 | ||
121 | _INP_SIZE = 8 | ||
122 | _CTX_SIZE = 8 | ||
123 | _RSP_SIZE = 8 | ||
124 | |||
125 | _XFER = 0 | ||
126 | _XMM_SAVE = _XFER + _XFER_SIZE | ||
127 | _INP_END = _XMM_SAVE + _XMM_SAVE_SIZE | ||
128 | _INP = _INP_END + _INP_END_SIZE | ||
129 | _CTX = _INP + _INP_SIZE | ||
130 | _RSP = _CTX + _CTX_SIZE | ||
131 | STACK_SIZE = _RSP + _RSP_SIZE | ||
132 | |||
133 | # rotate_Xs | ||
134 | # Rotate values of symbols X0...X3 | ||
135 | .macro rotate_Xs | ||
136 | X_ = X0 | ||
137 | X0 = X1 | ||
138 | X1 = X2 | ||
139 | X2 = X3 | ||
140 | X3 = X_ | ||
141 | .endm | ||
142 | |||
143 | # ROTATE_ARGS | ||
144 | # Rotate values of symbols a...h | ||
145 | .macro ROTATE_ARGS | ||
146 | old_h = h | ||
147 | TMP_ = h | ||
148 | h = g | ||
149 | g = f | ||
150 | f = e | ||
151 | e = d | ||
152 | d = c | ||
153 | c = b | ||
154 | b = a | ||
155 | a = TMP_ | ||
156 | .endm | ||
157 | |||
158 | .macro FOUR_ROUNDS_AND_SCHED disp | ||
159 | ################################### RND N + 0 ############################ | ||
160 | |||
161 | mov a, y3 # y3 = a # MAJA | ||
162 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
163 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
164 | |||
165 | addl \disp(%rsp, SRND), h # h = k + w + h # -- | ||
166 | or c, y3 # y3 = a|c # MAJA | ||
167 | vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7] | ||
168 | mov f, y2 # y2 = f # CH | ||
169 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
170 | |||
171 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
172 | xor g, y2 # y2 = f^g # CH | ||
173 | vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1 | ||
174 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
175 | |||
176 | and e, y2 # y2 = (f^g)&e # CH | ||
177 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
178 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
179 | add h, d # d = k + w + h + d # -- | ||
180 | |||
181 | and b, y3 # y3 = (a|c)&b # MAJA | ||
182 | vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15] | ||
183 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
184 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
185 | |||
186 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
187 | vpsrld $7, XTMP1, XTMP2 | ||
188 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
189 | mov a, T1 # T1 = a # MAJB | ||
190 | and c, T1 # T1 = a&c # MAJB | ||
191 | |||
192 | add y0, y2 # y2 = S1 + CH # -- | ||
193 | vpslld $(32-7), XTMP1, XTMP3 | ||
194 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
195 | add y1, h # h = k + w + h + S0 # -- | ||
196 | |||
197 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
198 | vpor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 | ||
199 | |||
200 | vpsrld $18, XTMP1, XTMP2 | ||
201 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
202 | add y3, h # h = t1 + S0 + MAJ # -- | ||
203 | |||
204 | |||
205 | ROTATE_ARGS | ||
206 | |||
207 | ################################### RND N + 1 ############################ | ||
208 | |||
209 | mov a, y3 # y3 = a # MAJA | ||
210 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
211 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
212 | offset = \disp + 1*4 | ||
213 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
214 | or c, y3 # y3 = a|c # MAJA | ||
215 | |||
216 | |||
217 | vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3 | ||
218 | mov f, y2 # y2 = f # CH | ||
219 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
220 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
221 | xor g, y2 # y2 = f^g # CH | ||
222 | |||
223 | |||
224 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
225 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
226 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
227 | and e, y2 # y2 = (f^g)&e # CH | ||
228 | add h, d # d = k + w + h + d # -- | ||
229 | |||
230 | vpslld $(32-18), XTMP1, XTMP1 | ||
231 | and b, y3 # y3 = (a|c)&b # MAJA | ||
232 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
233 | |||
234 | vpxor XTMP1, XTMP3, XTMP3 | ||
235 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
236 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
237 | |||
238 | vpxor XTMP2, XTMP3, XTMP3 # XTMP3 = W[-15] ror 7 ^ W[-15] ror 18 | ||
239 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
240 | mov a, T1 # T1 = a # MAJB | ||
241 | and c, T1 # T1 = a&c # MAJB | ||
242 | add y0, y2 # y2 = S1 + CH # -- | ||
243 | |||
244 | vpxor XTMP4, XTMP3, XTMP1 # XTMP1 = s0 | ||
245 | vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} | ||
246 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
247 | add y1, h # h = k + w + h + S0 # -- | ||
248 | |||
249 | vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 | ||
250 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
251 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
252 | add y3, h # h = t1 + S0 + MAJ # -- | ||
253 | |||
254 | vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} | ||
255 | |||
256 | |||
257 | ROTATE_ARGS | ||
258 | |||
259 | ################################### RND N + 2 ############################ | ||
260 | |||
261 | mov a, y3 # y3 = a # MAJA | ||
262 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
263 | offset = \disp + 2*4 | ||
264 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
265 | |||
266 | vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA} | ||
267 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
268 | or c, y3 # y3 = a|c # MAJA | ||
269 | mov f, y2 # y2 = f # CH | ||
270 | xor g, y2 # y2 = f^g # CH | ||
271 | |||
272 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
273 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
274 | vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA} | ||
275 | and e, y2 # y2 = (f^g)&e # CH | ||
276 | |||
277 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
278 | vpxor XTMP3, XTMP2, XTMP2 | ||
279 | add h, d # d = k + w + h + d # -- | ||
280 | and b, y3 # y3 = (a|c)&b # MAJA | ||
281 | |||
282 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
283 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
284 | vpxor XTMP2, XTMP4, XTMP4 # XTMP4 = s1 {xBxA} | ||
285 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
286 | |||
287 | vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA} | ||
288 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
289 | rorx $2, a ,T1 # T1 = (a >> 2) # S0 | ||
290 | vpaddd XTMP4, XTMP0, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} | ||
291 | |||
292 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
293 | mov a, T1 # T1 = a # MAJB | ||
294 | and c, T1 # T1 = a&c # MAJB | ||
295 | add y0, y2 # y2 = S1 + CH # -- | ||
296 | vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC} | ||
297 | |||
298 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
299 | add y1,h # h = k + w + h + S0 # -- | ||
300 | add y2,d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
301 | add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
302 | |||
303 | add y3,h # h = t1 + S0 + MAJ # -- | ||
304 | |||
305 | |||
306 | ROTATE_ARGS | ||
307 | |||
308 | ################################### RND N + 3 ############################ | ||
309 | |||
310 | mov a, y3 # y3 = a # MAJA | ||
311 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
312 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
313 | offset = \disp + 3*4 | ||
314 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
315 | or c, y3 # y3 = a|c # MAJA | ||
316 | |||
317 | |||
318 | vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC} | ||
319 | mov f, y2 # y2 = f # CH | ||
320 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
321 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
322 | xor g, y2 # y2 = f^g # CH | ||
323 | |||
324 | |||
325 | vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC} | ||
326 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
327 | and e, y2 # y2 = (f^g)&e # CH | ||
328 | add h, d # d = k + w + h + d # -- | ||
329 | and b, y3 # y3 = (a|c)&b # MAJA | ||
330 | |||
331 | vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC} | ||
332 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
333 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
334 | |||
335 | vpxor XTMP3, XTMP2, XTMP2 | ||
336 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
337 | add y0, y2 # y2 = S1 + CH # -- | ||
338 | |||
339 | vpxor XTMP2, XTMP5, XTMP5 # XTMP5 = s1 {xDxC} | ||
340 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
341 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
342 | |||
343 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
344 | vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00} | ||
345 | |||
346 | vpaddd XTMP0, XTMP5, X0 # X0 = {W[3], W[2], W[1], W[0]} | ||
347 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
348 | mov a, T1 # T1 = a # MAJB | ||
349 | and c, T1 # T1 = a&c # MAJB | ||
350 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
351 | |||
352 | add y1, h # h = k + w + h + S0 # -- | ||
353 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
354 | add y3, h # h = t1 + S0 + MAJ # -- | ||
355 | |||
356 | ROTATE_ARGS | ||
357 | rotate_Xs | ||
358 | .endm | ||
359 | |||
360 | .macro DO_4ROUNDS disp | ||
361 | ################################### RND N + 0 ########################### | ||
362 | |||
363 | mov f, y2 # y2 = f # CH | ||
364 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
365 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
366 | xor g, y2 # y2 = f^g # CH | ||
367 | |||
368 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
369 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
370 | and e, y2 # y2 = (f^g)&e # CH | ||
371 | |||
372 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
373 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
374 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
375 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
376 | mov a, y3 # y3 = a # MAJA | ||
377 | |||
378 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
379 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
380 | addl \disp(%rsp, SRND), h # h = k + w + h # -- | ||
381 | or c, y3 # y3 = a|c # MAJA | ||
382 | |||
383 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
384 | mov a, T1 # T1 = a # MAJB | ||
385 | and b, y3 # y3 = (a|c)&b # MAJA | ||
386 | and c, T1 # T1 = a&c # MAJB | ||
387 | add y0, y2 # y2 = S1 + CH # -- | ||
388 | |||
389 | |||
390 | add h, d # d = k + w + h + d # -- | ||
391 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
392 | add y1, h # h = k + w + h + S0 # -- | ||
393 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
394 | |||
395 | ROTATE_ARGS | ||
396 | |||
397 | ################################### RND N + 1 ########################### | ||
398 | |||
399 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
400 | mov f, y2 # y2 = f # CH | ||
401 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
402 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
403 | xor g, y2 # y2 = f^g # CH | ||
404 | |||
405 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
406 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
407 | and e, y2 # y2 = (f^g)&e # CH | ||
408 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
409 | |||
410 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
411 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
412 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
413 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
414 | mov a, y3 # y3 = a # MAJA | ||
415 | |||
416 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
417 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
418 | offset = 4*1 + \disp | ||
419 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
420 | or c, y3 # y3 = a|c # MAJA | ||
421 | |||
422 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
423 | mov a, T1 # T1 = a # MAJB | ||
424 | and b, y3 # y3 = (a|c)&b # MAJA | ||
425 | and c, T1 # T1 = a&c # MAJB | ||
426 | add y0, y2 # y2 = S1 + CH # -- | ||
427 | |||
428 | |||
429 | add h, d # d = k + w + h + d # -- | ||
430 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
431 | add y1, h # h = k + w + h + S0 # -- | ||
432 | |||
433 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
434 | |||
435 | ROTATE_ARGS | ||
436 | |||
437 | ################################### RND N + 2 ############################## | ||
438 | |||
439 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
440 | mov f, y2 # y2 = f # CH | ||
441 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
442 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
443 | xor g, y2 # y2 = f^g # CH | ||
444 | |||
445 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
446 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
447 | and e, y2 # y2 = (f^g)&e # CH | ||
448 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
449 | |||
450 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
451 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
452 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
453 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
454 | mov a, y3 # y3 = a # MAJA | ||
455 | |||
456 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
457 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
458 | offset = 4*2 + \disp | ||
459 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
460 | or c, y3 # y3 = a|c # MAJA | ||
461 | |||
462 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
463 | mov a, T1 # T1 = a # MAJB | ||
464 | and b, y3 # y3 = (a|c)&b # MAJA | ||
465 | and c, T1 # T1 = a&c # MAJB | ||
466 | add y0, y2 # y2 = S1 + CH # -- | ||
467 | |||
468 | |||
469 | add h, d # d = k + w + h + d # -- | ||
470 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
471 | add y1, h # h = k + w + h + S0 # -- | ||
472 | |||
473 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
474 | |||
475 | ROTATE_ARGS | ||
476 | |||
477 | ################################### RND N + 3 ########################### | ||
478 | |||
479 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
480 | mov f, y2 # y2 = f # CH | ||
481 | rorx $25, e, y0 # y0 = e >> 25 # S1A | ||
482 | rorx $11, e, y1 # y1 = e >> 11 # S1B | ||
483 | xor g, y2 # y2 = f^g # CH | ||
484 | |||
485 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) # S1 | ||
486 | rorx $6, e, y1 # y1 = (e >> 6) # S1 | ||
487 | and e, y2 # y2 = (f^g)&e # CH | ||
488 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
489 | |||
490 | xor y1, y0 # y0 = (e>>25) ^ (e>>11) ^ (e>>6) # S1 | ||
491 | rorx $13, a, T1 # T1 = a >> 13 # S0B | ||
492 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
493 | rorx $22, a, y1 # y1 = a >> 22 # S0A | ||
494 | mov a, y3 # y3 = a # MAJA | ||
495 | |||
496 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) # S0 | ||
497 | rorx $2, a, T1 # T1 = (a >> 2) # S0 | ||
498 | offset = 4*3 + \disp | ||
499 | addl offset(%rsp, SRND), h # h = k + w + h # -- | ||
500 | or c, y3 # y3 = a|c # MAJA | ||
501 | |||
502 | xor T1, y1 # y1 = (a>>22) ^ (a>>13) ^ (a>>2) # S0 | ||
503 | mov a, T1 # T1 = a # MAJB | ||
504 | and b, y3 # y3 = (a|c)&b # MAJA | ||
505 | and c, T1 # T1 = a&c # MAJB | ||
506 | add y0, y2 # y2 = S1 + CH # -- | ||
507 | |||
508 | |||
509 | add h, d # d = k + w + h + d # -- | ||
510 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
511 | add y1, h # h = k + w + h + S0 # -- | ||
512 | |||
513 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
514 | |||
515 | |||
516 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
517 | |||
518 | add y3, h # h = t1 + S0 + MAJ # -- | ||
519 | |||
520 | ROTATE_ARGS | ||
521 | |||
522 | .endm | ||
523 | |||
524 | ######################################################################## | ||
525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) | ||
526 | ## arg 1 : pointer to input data | ||
527 | ## arg 2 : pointer to digest | ||
528 | ## arg 3 : Num blocks | ||
529 | ######################################################################## | ||
530 | .text | ||
531 | ENTRY(sha256_transform_rorx) | ||
532 | .align 32 | ||
533 | pushq %rbx | ||
534 | pushq %rbp | ||
535 | pushq %r12 | ||
536 | pushq %r13 | ||
537 | pushq %r14 | ||
538 | pushq %r15 | ||
539 | |||
540 | mov %rsp, %rax | ||
541 | subq $STACK_SIZE, %rsp | ||
542 | and $-32, %rsp # align rsp to 32 byte boundary | ||
543 | mov %rax, _RSP(%rsp) | ||
544 | |||
545 | |||
546 | shl $6, NUM_BLKS # convert to bytes | ||
547 | jz done_hash | ||
548 | lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block | ||
549 | mov NUM_BLKS, _INP_END(%rsp) | ||
550 | |||
551 | cmp NUM_BLKS, INP | ||
552 | je only_one_block | ||
553 | |||
554 | ## load initial digest | ||
555 | mov (CTX), a | ||
556 | mov 4*1(CTX), b | ||
557 | mov 4*2(CTX), c | ||
558 | mov 4*3(CTX), d | ||
559 | mov 4*4(CTX), e | ||
560 | mov 4*5(CTX), f | ||
561 | mov 4*6(CTX), g | ||
562 | mov 4*7(CTX), h | ||
563 | |||
564 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | ||
565 | vmovdqa _SHUF_00BA(%rip), SHUF_00BA | ||
566 | vmovdqa _SHUF_DC00(%rip), SHUF_DC00 | ||
567 | |||
568 | mov CTX, _CTX(%rsp) | ||
569 | |||
570 | loop0: | ||
571 | lea K256(%rip), TBL | ||
572 | |||
573 | ## Load first 16 dwords from two blocks | ||
574 | VMOVDQ 0*32(INP),XTMP0 | ||
575 | VMOVDQ 1*32(INP),XTMP1 | ||
576 | VMOVDQ 2*32(INP),XTMP2 | ||
577 | VMOVDQ 3*32(INP),XTMP3 | ||
578 | |||
579 | ## byte swap data | ||
580 | vpshufb BYTE_FLIP_MASK, XTMP0, XTMP0 | ||
581 | vpshufb BYTE_FLIP_MASK, XTMP1, XTMP1 | ||
582 | vpshufb BYTE_FLIP_MASK, XTMP2, XTMP2 | ||
583 | vpshufb BYTE_FLIP_MASK, XTMP3, XTMP3 | ||
584 | |||
585 | ## transpose data into high/low halves | ||
586 | vperm2i128 $0x20, XTMP2, XTMP0, X0 | ||
587 | vperm2i128 $0x31, XTMP2, XTMP0, X1 | ||
588 | vperm2i128 $0x20, XTMP3, XTMP1, X2 | ||
589 | vperm2i128 $0x31, XTMP3, XTMP1, X3 | ||
590 | |||
591 | last_block_enter: | ||
592 | add $64, INP | ||
593 | mov INP, _INP(%rsp) | ||
594 | |||
595 | ## schedule 48 input dwords, by doing 3 rounds of 12 each | ||
596 | xor SRND, SRND | ||
597 | |||
598 | .align 16 | ||
599 | loop1: | ||
600 | vpaddd 0*32(TBL, SRND), X0, XFER | ||
601 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) | ||
602 | FOUR_ROUNDS_AND_SCHED _XFER + 0*32 | ||
603 | |||
604 | vpaddd 1*32(TBL, SRND), X0, XFER | ||
605 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) | ||
606 | FOUR_ROUNDS_AND_SCHED _XFER + 1*32 | ||
607 | |||
608 | vpaddd 2*32(TBL, SRND), X0, XFER | ||
609 | vmovdqa XFER, 2*32+_XFER(%rsp, SRND) | ||
610 | FOUR_ROUNDS_AND_SCHED _XFER + 2*32 | ||
611 | |||
612 | vpaddd 3*32(TBL, SRND), X0, XFER | ||
613 | vmovdqa XFER, 3*32+_XFER(%rsp, SRND) | ||
614 | FOUR_ROUNDS_AND_SCHED _XFER + 3*32 | ||
615 | |||
616 | add $4*32, SRND | ||
617 | cmp $3*4*32, SRND | ||
618 | jb loop1 | ||
619 | |||
620 | loop2: | ||
621 | ## Do last 16 rounds with no scheduling | ||
622 | vpaddd 0*32(TBL, SRND), X0, XFER | ||
623 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) | ||
624 | DO_4ROUNDS _XFER + 0*32 | ||
625 | vpaddd 1*32(TBL, SRND), X1, XFER | ||
626 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) | ||
627 | DO_4ROUNDS _XFER + 1*32 | ||
628 | add $2*32, SRND | ||
629 | |||
630 | vmovdqa X2, X0 | ||
631 | vmovdqa X3, X1 | ||
632 | |||
633 | cmp $4*4*32, SRND | ||
634 | jb loop2 | ||
635 | |||
636 | mov _CTX(%rsp), CTX | ||
637 | mov _INP(%rsp), INP | ||
638 | |||
639 | addm (4*0)(CTX),a | ||
640 | addm (4*1)(CTX),b | ||
641 | addm (4*2)(CTX),c | ||
642 | addm (4*3)(CTX),d | ||
643 | addm (4*4)(CTX),e | ||
644 | addm (4*5)(CTX),f | ||
645 | addm (4*6)(CTX),g | ||
646 | addm (4*7)(CTX),h | ||
647 | |||
648 | cmp _INP_END(%rsp), INP | ||
649 | ja done_hash | ||
650 | |||
651 | #### Do second block using previously scheduled results | ||
652 | xor SRND, SRND | ||
653 | .align 16 | ||
654 | loop3: | ||
655 | DO_4ROUNDS _XFER + 0*32 + 16 | ||
656 | DO_4ROUNDS _XFER + 1*32 + 16 | ||
657 | add $2*32, SRND | ||
658 | cmp $4*4*32, SRND | ||
659 | jb loop3 | ||
660 | |||
661 | mov _CTX(%rsp), CTX | ||
662 | mov _INP(%rsp), INP | ||
663 | add $64, INP | ||
664 | |||
665 | addm (4*0)(CTX),a | ||
666 | addm (4*1)(CTX),b | ||
667 | addm (4*2)(CTX),c | ||
668 | addm (4*3)(CTX),d | ||
669 | addm (4*4)(CTX),e | ||
670 | addm (4*5)(CTX),f | ||
671 | addm (4*6)(CTX),g | ||
672 | addm (4*7)(CTX),h | ||
673 | |||
674 | cmp _INP_END(%rsp), INP | ||
675 | jb loop0 | ||
676 | ja done_hash | ||
677 | |||
678 | do_last_block: | ||
679 | #### do last block | ||
680 | lea K256(%rip), TBL | ||
681 | |||
682 | VMOVDQ 0*16(INP),XWORD0 | ||
683 | VMOVDQ 1*16(INP),XWORD1 | ||
684 | VMOVDQ 2*16(INP),XWORD2 | ||
685 | VMOVDQ 3*16(INP),XWORD3 | ||
686 | |||
687 | vpshufb X_BYTE_FLIP_MASK, XWORD0, XWORD0 | ||
688 | vpshufb X_BYTE_FLIP_MASK, XWORD1, XWORD1 | ||
689 | vpshufb X_BYTE_FLIP_MASK, XWORD2, XWORD2 | ||
690 | vpshufb X_BYTE_FLIP_MASK, XWORD3, XWORD3 | ||
691 | |||
692 | jmp last_block_enter | ||
693 | |||
694 | only_one_block: | ||
695 | |||
696 | ## load initial digest | ||
697 | mov (4*0)(CTX),a | ||
698 | mov (4*1)(CTX),b | ||
699 | mov (4*2)(CTX),c | ||
700 | mov (4*3)(CTX),d | ||
701 | mov (4*4)(CTX),e | ||
702 | mov (4*5)(CTX),f | ||
703 | mov (4*6)(CTX),g | ||
704 | mov (4*7)(CTX),h | ||
705 | |||
706 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | ||
707 | vmovdqa _SHUF_00BA(%rip), SHUF_00BA | ||
708 | vmovdqa _SHUF_DC00(%rip), SHUF_DC00 | ||
709 | |||
710 | mov CTX, _CTX(%rsp) | ||
711 | jmp do_last_block | ||
712 | |||
713 | done_hash: | ||
714 | |||
715 | mov _RSP(%rsp), %rsp | ||
716 | |||
717 | popq %r15 | ||
718 | popq %r14 | ||
719 | popq %r13 | ||
720 | popq %r12 | ||
721 | popq %rbp | ||
722 | popq %rbx | ||
723 | ret | ||
724 | ENDPROC(sha256_transform_rorx) | ||
725 | |||
726 | .data | ||
727 | .align 64 | ||
728 | K256: | ||
729 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
730 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
731 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
732 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
733 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
734 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
735 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
736 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
737 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
738 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
739 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
740 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
741 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
742 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
743 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
744 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
745 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
746 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
747 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
748 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
749 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
750 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
751 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
752 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
753 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
754 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
755 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
756 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
757 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
758 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
759 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
760 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
761 | |||
762 | PSHUFFLE_BYTE_FLIP_MASK: | ||
763 | .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 | ||
764 | |||
765 | # shuffle xBxA -> 00BA | ||
766 | _SHUF_00BA: | ||
767 | .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 | ||
768 | |||
769 | # shuffle xDxC -> DC00 | ||
770 | _SHUF_DC00: | ||
771 | .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF | ||
772 | #endif | ||
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S new file mode 100644 index 000000000000..98d3c391da81 --- /dev/null +++ b/arch/x86/crypto/sha256-ssse3-asm.S | |||
@@ -0,0 +1,506 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-256 with SSSE3 instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | # | ||
11 | # This software is available to you under a choice of one of two | ||
12 | # licenses. You may choose to be licensed under the terms of the GNU | ||
13 | # General Public License (GPL) Version 2, available from the file | ||
14 | # COPYING in the main directory of this source tree, or the | ||
15 | # OpenIB.org BSD license below: | ||
16 | # | ||
17 | # Redistribution and use in source and binary forms, with or | ||
18 | # without modification, are permitted provided that the following | ||
19 | # conditions are met: | ||
20 | # | ||
21 | # - Redistributions of source code must retain the above | ||
22 | # copyright notice, this list of conditions and the following | ||
23 | # disclaimer. | ||
24 | # | ||
25 | # - Redistributions in binary form must reproduce the above | ||
26 | # copyright notice, this list of conditions and the following | ||
27 | # disclaimer in the documentation and/or other materials | ||
28 | # provided with the distribution. | ||
29 | # | ||
30 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
31 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
32 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
33 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
34 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
35 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
36 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
37 | # SOFTWARE. | ||
38 | # | ||
39 | ######################################################################## | ||
40 | # | ||
41 | # This code is described in an Intel White-Paper: | ||
42 | # "Fast SHA-256 Implementations on Intel Architecture Processors" | ||
43 | # | ||
44 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
45 | # and search for that title. | ||
46 | # | ||
47 | ######################################################################## | ||
48 | |||
49 | #include <linux/linkage.h> | ||
50 | |||
51 | ## assume buffers not aligned | ||
52 | #define MOVDQ movdqu | ||
53 | |||
54 | ################################ Define Macros | ||
55 | |||
56 | # addm [mem], reg | ||
57 | # Add reg to mem using reg-mem add and store | ||
58 | .macro addm p1 p2 | ||
59 | add \p1, \p2 | ||
60 | mov \p2, \p1 | ||
61 | .endm | ||
62 | |||
63 | ################################ | ||
64 | |||
65 | # COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask | ||
66 | # Load xmm with mem and byte swap each dword | ||
67 | .macro COPY_XMM_AND_BSWAP p1 p2 p3 | ||
68 | MOVDQ \p2, \p1 | ||
69 | pshufb \p3, \p1 | ||
70 | .endm | ||
71 | |||
72 | ################################ | ||
73 | |||
74 | X0 = %xmm4 | ||
75 | X1 = %xmm5 | ||
76 | X2 = %xmm6 | ||
77 | X3 = %xmm7 | ||
78 | |||
79 | XTMP0 = %xmm0 | ||
80 | XTMP1 = %xmm1 | ||
81 | XTMP2 = %xmm2 | ||
82 | XTMP3 = %xmm3 | ||
83 | XTMP4 = %xmm8 | ||
84 | XFER = %xmm9 | ||
85 | |||
86 | SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA | ||
87 | SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00 | ||
88 | BYTE_FLIP_MASK = %xmm12 | ||
89 | |||
90 | NUM_BLKS = %rdx # 3rd arg | ||
91 | CTX = %rsi # 2nd arg | ||
92 | INP = %rdi # 1st arg | ||
93 | |||
94 | SRND = %rdi # clobbers INP | ||
95 | c = %ecx | ||
96 | d = %r8d | ||
97 | e = %edx | ||
98 | TBL = %rbp | ||
99 | a = %eax | ||
100 | b = %ebx | ||
101 | |||
102 | f = %r9d | ||
103 | g = %r10d | ||
104 | h = %r11d | ||
105 | |||
106 | y0 = %r13d | ||
107 | y1 = %r14d | ||
108 | y2 = %r15d | ||
109 | |||
110 | |||
111 | |||
112 | _INP_END_SIZE = 8 | ||
113 | _INP_SIZE = 8 | ||
114 | _XFER_SIZE = 8 | ||
115 | _XMM_SAVE_SIZE = 0 | ||
116 | |||
117 | _INP_END = 0 | ||
118 | _INP = _INP_END + _INP_END_SIZE | ||
119 | _XFER = _INP + _INP_SIZE | ||
120 | _XMM_SAVE = _XFER + _XFER_SIZE | ||
121 | STACK_SIZE = _XMM_SAVE + _XMM_SAVE_SIZE | ||
122 | |||
123 | # rotate_Xs | ||
124 | # Rotate values of symbols X0...X3 | ||
125 | .macro rotate_Xs | ||
126 | X_ = X0 | ||
127 | X0 = X1 | ||
128 | X1 = X2 | ||
129 | X2 = X3 | ||
130 | X3 = X_ | ||
131 | .endm | ||
132 | |||
133 | # ROTATE_ARGS | ||
134 | # Rotate values of symbols a...h | ||
135 | .macro ROTATE_ARGS | ||
136 | TMP_ = h | ||
137 | h = g | ||
138 | g = f | ||
139 | f = e | ||
140 | e = d | ||
141 | d = c | ||
142 | c = b | ||
143 | b = a | ||
144 | a = TMP_ | ||
145 | .endm | ||
146 | |||
147 | .macro FOUR_ROUNDS_AND_SCHED | ||
148 | ## compute s0 four at a time and s1 two at a time | ||
149 | ## compute W[-16] + W[-7] 4 at a time | ||
150 | movdqa X3, XTMP0 | ||
151 | mov e, y0 # y0 = e | ||
152 | ror $(25-11), y0 # y0 = e >> (25-11) | ||
153 | mov a, y1 # y1 = a | ||
154 | palignr $4, X2, XTMP0 # XTMP0 = W[-7] | ||
155 | ror $(22-13), y1 # y1 = a >> (22-13) | ||
156 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
157 | mov f, y2 # y2 = f | ||
158 | ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
159 | movdqa X1, XTMP1 | ||
160 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
161 | xor g, y2 # y2 = f^g | ||
162 | paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16] | ||
163 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
164 | and e, y2 # y2 = (f^g)&e | ||
165 | ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
166 | ## compute s0 | ||
167 | palignr $4, X0, XTMP1 # XTMP1 = W[-15] | ||
168 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
169 | ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
170 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
171 | movdqa XTMP1, XTMP2 # XTMP2 = W[-15] | ||
172 | ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
173 | add y0, y2 # y2 = S1 + CH | ||
174 | add _XFER(%rsp) , y2 # y2 = k + w + S1 + CH | ||
175 | movdqa XTMP1, XTMP3 # XTMP3 = W[-15] | ||
176 | mov a, y0 # y0 = a | ||
177 | add y2, h # h = h + S1 + CH + k + w | ||
178 | mov a, y2 # y2 = a | ||
179 | pslld $(32-7), XTMP1 # | ||
180 | or c, y0 # y0 = a|c | ||
181 | add h, d # d = d + h + S1 + CH + k + w | ||
182 | and c, y2 # y2 = a&c | ||
183 | psrld $7, XTMP2 # | ||
184 | and b, y0 # y0 = (a|c)&b | ||
185 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
186 | por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 | ||
187 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
188 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
189 | # | ||
190 | ROTATE_ARGS # | ||
191 | movdqa XTMP3, XTMP2 # XTMP2 = W[-15] | ||
192 | mov e, y0 # y0 = e | ||
193 | mov a, y1 # y1 = a | ||
194 | movdqa XTMP3, XTMP4 # XTMP4 = W[-15] | ||
195 | ror $(25-11), y0 # y0 = e >> (25-11) | ||
196 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
197 | mov f, y2 # y2 = f | ||
198 | ror $(22-13), y1 # y1 = a >> (22-13) | ||
199 | pslld $(32-18), XTMP3 # | ||
200 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
201 | ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
202 | xor g, y2 # y2 = f^g | ||
203 | psrld $18, XTMP2 # | ||
204 | ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
205 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
206 | and e, y2 # y2 = (f^g)&e | ||
207 | ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
208 | pxor XTMP3, XTMP1 | ||
209 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
210 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
211 | psrld $3, XTMP4 # XTMP4 = W[-15] >> 3 | ||
212 | add y0, y2 # y2 = S1 + CH | ||
213 | add (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
214 | ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
215 | pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 | ||
216 | mov a, y0 # y0 = a | ||
217 | add y2, h # h = h + S1 + CH + k + w | ||
218 | mov a, y2 # y2 = a | ||
219 | pxor XTMP4, XTMP1 # XTMP1 = s0 | ||
220 | or c, y0 # y0 = a|c | ||
221 | add h, d # d = d + h + S1 + CH + k + w | ||
222 | and c, y2 # y2 = a&c | ||
223 | ## compute low s1 | ||
224 | pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA} | ||
225 | and b, y0 # y0 = (a|c)&b | ||
226 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
227 | paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0 | ||
228 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
229 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
230 | |||
231 | ROTATE_ARGS | ||
232 | movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA} | ||
233 | mov e, y0 # y0 = e | ||
234 | mov a, y1 # y1 = a | ||
235 | ror $(25-11), y0 # y0 = e >> (25-11) | ||
236 | movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA} | ||
237 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
238 | ror $(22-13), y1 # y1 = a >> (22-13) | ||
239 | mov f, y2 # y2 = f | ||
240 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
241 | ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
242 | psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA} | ||
243 | xor g, y2 # y2 = f^g | ||
244 | psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA} | ||
245 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
246 | and e, y2 # y2 = (f^g)&e | ||
247 | psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA} | ||
248 | ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
249 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
250 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
251 | ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
252 | pxor XTMP3, XTMP2 | ||
253 | add y0, y2 # y2 = S1 + CH | ||
254 | ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
255 | add (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
256 | pxor XTMP2, XTMP4 # XTMP4 = s1 {xBxA} | ||
257 | mov a, y0 # y0 = a | ||
258 | add y2, h # h = h + S1 + CH + k + w | ||
259 | mov a, y2 # y2 = a | ||
260 | pshufb SHUF_00BA, XTMP4 # XTMP4 = s1 {00BA} | ||
261 | or c, y0 # y0 = a|c | ||
262 | add h, d # d = d + h + S1 + CH + k + w | ||
263 | and c, y2 # y2 = a&c | ||
264 | paddd XTMP4, XTMP0 # XTMP0 = {..., ..., W[1], W[0]} | ||
265 | and b, y0 # y0 = (a|c)&b | ||
266 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
267 | ## compute high s1 | ||
268 | pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA} | ||
269 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
270 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
271 | # | ||
272 | ROTATE_ARGS # | ||
273 | movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC} | ||
274 | mov e, y0 # y0 = e | ||
275 | ror $(25-11), y0 # y0 = e >> (25-11) | ||
276 | mov a, y1 # y1 = a | ||
277 | movdqa XTMP2, X0 # X0 = W[-2] {DDCC} | ||
278 | ror $(22-13), y1 # y1 = a >> (22-13) | ||
279 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
280 | mov f, y2 # y2 = f | ||
281 | ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
282 | psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC} | ||
283 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
284 | xor g, y2 # y2 = f^g | ||
285 | psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC} | ||
286 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25 | ||
287 | and e, y2 # y2 = (f^g)&e | ||
288 | ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
289 | psrld $10, X0 # X0 = W[-2] >> 10 {DDCC} | ||
290 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22 | ||
291 | ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2 | ||
292 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
293 | pxor XTMP3, XTMP2 # | ||
294 | ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2 | ||
295 | add y0, y2 # y2 = S1 + CH | ||
296 | add (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH | ||
297 | pxor XTMP2, X0 # X0 = s1 {xDxC} | ||
298 | mov a, y0 # y0 = a | ||
299 | add y2, h # h = h + S1 + CH + k + w | ||
300 | mov a, y2 # y2 = a | ||
301 | pshufb SHUF_DC00, X0 # X0 = s1 {DC00} | ||
302 | or c, y0 # y0 = a|c | ||
303 | add h, d # d = d + h + S1 + CH + k + w | ||
304 | and c, y2 # y2 = a&c | ||
305 | paddd XTMP0, X0 # X0 = {W[3], W[2], W[1], W[0]} | ||
306 | and b, y0 # y0 = (a|c)&b | ||
307 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
308 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
309 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
310 | |||
311 | ROTATE_ARGS | ||
312 | rotate_Xs | ||
313 | .endm | ||
314 | |||
315 | ## input is [rsp + _XFER + %1 * 4] | ||
316 | .macro DO_ROUND round | ||
317 | mov e, y0 # y0 = e | ||
318 | ror $(25-11), y0 # y0 = e >> (25-11) | ||
319 | mov a, y1 # y1 = a | ||
320 | xor e, y0 # y0 = e ^ (e >> (25-11)) | ||
321 | ror $(22-13), y1 # y1 = a >> (22-13) | ||
322 | mov f, y2 # y2 = f | ||
323 | xor a, y1 # y1 = a ^ (a >> (22-13) | ||
324 | ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6)) | ||
325 | xor g, y2 # y2 = f^g | ||
326 | xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) | ||
327 | ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2)) | ||
328 | and e, y2 # y2 = (f^g)&e | ||
329 | xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) | ||
330 | ror $6, y0 # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) | ||
331 | xor g, y2 # y2 = CH = ((f^g)&e)^g | ||
332 | add y0, y2 # y2 = S1 + CH | ||
333 | ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) | ||
334 | offset = \round * 4 + _XFER | ||
335 | add offset(%rsp), y2 # y2 = k + w + S1 + CH | ||
336 | mov a, y0 # y0 = a | ||
337 | add y2, h # h = h + S1 + CH + k + w | ||
338 | mov a, y2 # y2 = a | ||
339 | or c, y0 # y0 = a|c | ||
340 | add h, d # d = d + h + S1 + CH + k + w | ||
341 | and c, y2 # y2 = a&c | ||
342 | and b, y0 # y0 = (a|c)&b | ||
343 | add y1, h # h = h + S1 + CH + k + w + S0 | ||
344 | or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c) | ||
345 | add y0, h # h = h + S1 + CH + k + w + S0 + MAJ | ||
346 | ROTATE_ARGS | ||
347 | .endm | ||
348 | |||
349 | ######################################################################## | ||
350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) | ||
351 | ## arg 1 : pointer to input data | ||
352 | ## arg 2 : pointer to digest | ||
353 | ## arg 3 : Num blocks | ||
354 | ######################################################################## | ||
355 | .text | ||
356 | ENTRY(sha256_transform_ssse3) | ||
357 | .align 32 | ||
358 | pushq %rbx | ||
359 | pushq %rbp | ||
360 | pushq %r13 | ||
361 | pushq %r14 | ||
362 | pushq %r15 | ||
363 | pushq %r12 | ||
364 | |||
365 | mov %rsp, %r12 | ||
366 | subq $STACK_SIZE, %rsp | ||
367 | and $~15, %rsp | ||
368 | |||
369 | shl $6, NUM_BLKS # convert to bytes | ||
370 | jz done_hash | ||
371 | add INP, NUM_BLKS | ||
372 | mov NUM_BLKS, _INP_END(%rsp) # pointer to end of data | ||
373 | |||
374 | ## load initial digest | ||
375 | mov 4*0(CTX), a | ||
376 | mov 4*1(CTX), b | ||
377 | mov 4*2(CTX), c | ||
378 | mov 4*3(CTX), d | ||
379 | mov 4*4(CTX), e | ||
380 | mov 4*5(CTX), f | ||
381 | mov 4*6(CTX), g | ||
382 | mov 4*7(CTX), h | ||
383 | |||
384 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | ||
385 | movdqa _SHUF_00BA(%rip), SHUF_00BA | ||
386 | movdqa _SHUF_DC00(%rip), SHUF_DC00 | ||
387 | |||
388 | loop0: | ||
389 | lea K256(%rip), TBL | ||
390 | |||
391 | ## byte swap first 16 dwords | ||
392 | COPY_XMM_AND_BSWAP X0, 0*16(INP), BYTE_FLIP_MASK | ||
393 | COPY_XMM_AND_BSWAP X1, 1*16(INP), BYTE_FLIP_MASK | ||
394 | COPY_XMM_AND_BSWAP X2, 2*16(INP), BYTE_FLIP_MASK | ||
395 | COPY_XMM_AND_BSWAP X3, 3*16(INP), BYTE_FLIP_MASK | ||
396 | |||
397 | mov INP, _INP(%rsp) | ||
398 | |||
399 | ## schedule 48 input dwords, by doing 3 rounds of 16 each | ||
400 | mov $3, SRND | ||
401 | .align 16 | ||
402 | loop1: | ||
403 | movdqa (TBL), XFER | ||
404 | paddd X0, XFER | ||
405 | movdqa XFER, _XFER(%rsp) | ||
406 | FOUR_ROUNDS_AND_SCHED | ||
407 | |||
408 | movdqa 1*16(TBL), XFER | ||
409 | paddd X0, XFER | ||
410 | movdqa XFER, _XFER(%rsp) | ||
411 | FOUR_ROUNDS_AND_SCHED | ||
412 | |||
413 | movdqa 2*16(TBL), XFER | ||
414 | paddd X0, XFER | ||
415 | movdqa XFER, _XFER(%rsp) | ||
416 | FOUR_ROUNDS_AND_SCHED | ||
417 | |||
418 | movdqa 3*16(TBL), XFER | ||
419 | paddd X0, XFER | ||
420 | movdqa XFER, _XFER(%rsp) | ||
421 | add $4*16, TBL | ||
422 | FOUR_ROUNDS_AND_SCHED | ||
423 | |||
424 | sub $1, SRND | ||
425 | jne loop1 | ||
426 | |||
427 | mov $2, SRND | ||
428 | loop2: | ||
429 | paddd (TBL), X0 | ||
430 | movdqa X0, _XFER(%rsp) | ||
431 | DO_ROUND 0 | ||
432 | DO_ROUND 1 | ||
433 | DO_ROUND 2 | ||
434 | DO_ROUND 3 | ||
435 | paddd 1*16(TBL), X1 | ||
436 | movdqa X1, _XFER(%rsp) | ||
437 | add $2*16, TBL | ||
438 | DO_ROUND 0 | ||
439 | DO_ROUND 1 | ||
440 | DO_ROUND 2 | ||
441 | DO_ROUND 3 | ||
442 | |||
443 | movdqa X2, X0 | ||
444 | movdqa X3, X1 | ||
445 | |||
446 | sub $1, SRND | ||
447 | jne loop2 | ||
448 | |||
449 | addm (4*0)(CTX),a | ||
450 | addm (4*1)(CTX),b | ||
451 | addm (4*2)(CTX),c | ||
452 | addm (4*3)(CTX),d | ||
453 | addm (4*4)(CTX),e | ||
454 | addm (4*5)(CTX),f | ||
455 | addm (4*6)(CTX),g | ||
456 | addm (4*7)(CTX),h | ||
457 | |||
458 | mov _INP(%rsp), INP | ||
459 | add $64, INP | ||
460 | cmp _INP_END(%rsp), INP | ||
461 | jne loop0 | ||
462 | |||
463 | done_hash: | ||
464 | |||
465 | mov %r12, %rsp | ||
466 | |||
467 | popq %r12 | ||
468 | popq %r15 | ||
469 | popq %r14 | ||
470 | popq %r13 | ||
471 | popq %rbp | ||
472 | popq %rbx | ||
473 | |||
474 | ret | ||
475 | ENDPROC(sha256_transform_ssse3) | ||
476 | |||
477 | .data | ||
478 | .align 64 | ||
479 | K256: | ||
480 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
481 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
482 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
483 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
484 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
485 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
486 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
487 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
488 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
489 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
490 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
491 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
492 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
493 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
494 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
495 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
496 | |||
497 | PSHUFFLE_BYTE_FLIP_MASK: | ||
498 | .octa 0x0c0d0e0f08090a0b0405060700010203 | ||
499 | |||
500 | # shuffle xBxA -> 00BA | ||
501 | _SHUF_00BA: | ||
502 | .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 | ||
503 | |||
504 | # shuffle xDxC -> DC00 | ||
505 | _SHUF_DC00: | ||
506 | .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF | ||
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c new file mode 100644 index 000000000000..597d4da69656 --- /dev/null +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Glue code for the SHA256 Secure Hash Algorithm assembler | ||
5 | * implementation using supplemental SSE3 / AVX / AVX2 instructions. | ||
6 | * | ||
7 | * This file is based on sha256_generic.c | ||
8 | * | ||
9 | * Copyright (C) 2013 Intel Corporation. | ||
10 | * | ||
11 | * Author: | ||
12 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | * | ||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
20 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
22 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
23 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
24 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
25 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
26 | * SOFTWARE. | ||
27 | */ | ||
28 | |||
29 | |||
30 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
31 | |||
32 | #include <crypto/internal/hash.h> | ||
33 | #include <linux/init.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/mm.h> | ||
36 | #include <linux/cryptohash.h> | ||
37 | #include <linux/types.h> | ||
38 | #include <crypto/sha.h> | ||
39 | #include <asm/byteorder.h> | ||
40 | #include <asm/i387.h> | ||
41 | #include <asm/xcr.h> | ||
42 | #include <asm/xsave.h> | ||
43 | #include <linux/string.h> | ||
44 | |||
45 | asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, | ||
46 | u64 rounds); | ||
47 | #ifdef CONFIG_AS_AVX | ||
48 | asmlinkage void sha256_transform_avx(const char *data, u32 *digest, | ||
49 | u64 rounds); | ||
50 | #endif | ||
51 | #ifdef CONFIG_AS_AVX2 | ||
52 | asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, | ||
53 | u64 rounds); | ||
54 | #endif | ||
55 | |||
56 | static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); | ||
57 | |||
58 | |||
59 | static int sha256_ssse3_init(struct shash_desc *desc) | ||
60 | { | ||
61 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
62 | |||
63 | sctx->state[0] = SHA256_H0; | ||
64 | sctx->state[1] = SHA256_H1; | ||
65 | sctx->state[2] = SHA256_H2; | ||
66 | sctx->state[3] = SHA256_H3; | ||
67 | sctx->state[4] = SHA256_H4; | ||
68 | sctx->state[5] = SHA256_H5; | ||
69 | sctx->state[6] = SHA256_H6; | ||
70 | sctx->state[7] = SHA256_H7; | ||
71 | sctx->count = 0; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
77 | unsigned int len, unsigned int partial) | ||
78 | { | ||
79 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
80 | unsigned int done = 0; | ||
81 | |||
82 | sctx->count += len; | ||
83 | |||
84 | if (partial) { | ||
85 | done = SHA256_BLOCK_SIZE - partial; | ||
86 | memcpy(sctx->buf + partial, data, done); | ||
87 | sha256_transform_asm(sctx->buf, sctx->state, 1); | ||
88 | } | ||
89 | |||
90 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
91 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
92 | |||
93 | sha256_transform_asm(data + done, sctx->state, (u64) rounds); | ||
94 | |||
95 | done += rounds * SHA256_BLOCK_SIZE; | ||
96 | } | ||
97 | |||
98 | memcpy(sctx->buf, data + done, len - done); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
104 | unsigned int len) | ||
105 | { | ||
106 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
107 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
108 | int res; | ||
109 | |||
110 | /* Handle the fast case right here */ | ||
111 | if (partial + len < SHA256_BLOCK_SIZE) { | ||
112 | sctx->count += len; | ||
113 | memcpy(sctx->buf + partial, data, len); | ||
114 | |||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | if (!irq_fpu_usable()) { | ||
119 | res = crypto_sha256_update(desc, data, len); | ||
120 | } else { | ||
121 | kernel_fpu_begin(); | ||
122 | res = __sha256_ssse3_update(desc, data, len, partial); | ||
123 | kernel_fpu_end(); | ||
124 | } | ||
125 | |||
126 | return res; | ||
127 | } | ||
128 | |||
129 | |||
130 | /* Add padding and return the message digest. */ | ||
131 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) | ||
132 | { | ||
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
134 | unsigned int i, index, padlen; | ||
135 | __be32 *dst = (__be32 *)out; | ||
136 | __be64 bits; | ||
137 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
138 | |||
139 | bits = cpu_to_be64(sctx->count << 3); | ||
140 | |||
141 | /* Pad out to 56 mod 64 and append length */ | ||
142 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
143 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); | ||
144 | |||
145 | if (!irq_fpu_usable()) { | ||
146 | crypto_sha256_update(desc, padding, padlen); | ||
147 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
148 | } else { | ||
149 | kernel_fpu_begin(); | ||
150 | /* We need to fill a whole block for __sha256_ssse3_update() */ | ||
151 | if (padlen <= 56) { | ||
152 | sctx->count += padlen; | ||
153 | memcpy(sctx->buf + index, padding, padlen); | ||
154 | } else { | ||
155 | __sha256_ssse3_update(desc, padding, padlen, index); | ||
156 | } | ||
157 | __sha256_ssse3_update(desc, (const u8 *)&bits, | ||
158 | sizeof(bits), 56); | ||
159 | kernel_fpu_end(); | ||
160 | } | ||
161 | |||
162 | /* Store state in digest */ | ||
163 | for (i = 0; i < 8; i++) | ||
164 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
165 | |||
166 | /* Wipe context */ | ||
167 | memset(sctx, 0, sizeof(*sctx)); | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int sha256_ssse3_export(struct shash_desc *desc, void *out) | ||
173 | { | ||
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
175 | |||
176 | memcpy(out, sctx, sizeof(*sctx)); | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int sha256_ssse3_import(struct shash_desc *desc, const void *in) | ||
182 | { | ||
183 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
184 | |||
185 | memcpy(sctx, in, sizeof(*sctx)); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static struct shash_alg alg = { | ||
191 | .digestsize = SHA256_DIGEST_SIZE, | ||
192 | .init = sha256_ssse3_init, | ||
193 | .update = sha256_ssse3_update, | ||
194 | .final = sha256_ssse3_final, | ||
195 | .export = sha256_ssse3_export, | ||
196 | .import = sha256_ssse3_import, | ||
197 | .descsize = sizeof(struct sha256_state), | ||
198 | .statesize = sizeof(struct sha256_state), | ||
199 | .base = { | ||
200 | .cra_name = "sha256", | ||
201 | .cra_driver_name = "sha256-ssse3", | ||
202 | .cra_priority = 150, | ||
203 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
204 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
205 | .cra_module = THIS_MODULE, | ||
206 | } | ||
207 | }; | ||
208 | |||
209 | #ifdef CONFIG_AS_AVX | ||
210 | static bool __init avx_usable(void) | ||
211 | { | ||
212 | u64 xcr0; | ||
213 | |||
214 | if (!cpu_has_avx || !cpu_has_osxsave) | ||
215 | return false; | ||
216 | |||
217 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
218 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
219 | pr_info("AVX detected but unusable.\n"); | ||
220 | |||
221 | return false; | ||
222 | } | ||
223 | |||
224 | return true; | ||
225 | } | ||
226 | #endif | ||
227 | |||
228 | static int __init sha256_ssse3_mod_init(void) | ||
229 | { | ||
230 | /* test for SSE3 first */ | ||
231 | if (cpu_has_ssse3) | ||
232 | sha256_transform_asm = sha256_transform_ssse3; | ||
233 | |||
234 | #ifdef CONFIG_AS_AVX | ||
235 | /* allow AVX to override SSSE3, it's a little faster */ | ||
236 | if (avx_usable()) { | ||
237 | #ifdef CONFIG_AS_AVX2 | ||
238 | if (boot_cpu_has(X86_FEATURE_AVX2)) | ||
239 | sha256_transform_asm = sha256_transform_rorx; | ||
240 | else | ||
241 | #endif | ||
242 | sha256_transform_asm = sha256_transform_avx; | ||
243 | } | ||
244 | #endif | ||
245 | |||
246 | if (sha256_transform_asm) { | ||
247 | #ifdef CONFIG_AS_AVX | ||
248 | if (sha256_transform_asm == sha256_transform_avx) | ||
249 | pr_info("Using AVX optimized SHA-256 implementation\n"); | ||
250 | #ifdef CONFIG_AS_AVX2 | ||
251 | else if (sha256_transform_asm == sha256_transform_rorx) | ||
252 | pr_info("Using AVX2 optimized SHA-256 implementation\n"); | ||
253 | #endif | ||
254 | else | ||
255 | #endif | ||
256 | pr_info("Using SSSE3 optimized SHA-256 implementation\n"); | ||
257 | return crypto_register_shash(&alg); | ||
258 | } | ||
259 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | ||
260 | |||
261 | return -ENODEV; | ||
262 | } | ||
263 | |||
264 | static void __exit sha256_ssse3_mod_fini(void) | ||
265 | { | ||
266 | crypto_unregister_shash(&alg); | ||
267 | } | ||
268 | |||
269 | module_init(sha256_ssse3_mod_init); | ||
270 | module_exit(sha256_ssse3_mod_fini); | ||
271 | |||
272 | MODULE_LICENSE("GPL"); | ||
273 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); | ||
274 | |||
275 | MODULE_ALIAS("sha256"); | ||
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S new file mode 100644 index 000000000000..974dde9bc6cd --- /dev/null +++ b/arch/x86/crypto/sha512-avx-asm.S | |||
@@ -0,0 +1,423 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-512 with AVX instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # David Cote <david.m.cote@intel.com> | ||
10 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | # | ||
12 | # This software is available to you under a choice of one of two | ||
13 | # licenses. You may choose to be licensed under the terms of the GNU | ||
14 | # General Public License (GPL) Version 2, available from the file | ||
15 | # COPYING in the main directory of this source tree, or the | ||
16 | # OpenIB.org BSD license below: | ||
17 | # | ||
18 | # Redistribution and use in source and binary forms, with or | ||
19 | # without modification, are permitted provided that the following | ||
20 | # conditions are met: | ||
21 | # | ||
22 | # - Redistributions of source code must retain the above | ||
23 | # copyright notice, this list of conditions and the following | ||
24 | # disclaimer. | ||
25 | # | ||
26 | # - Redistributions in binary form must reproduce the above | ||
27 | # copyright notice, this list of conditions and the following | ||
28 | # disclaimer in the documentation and/or other materials | ||
29 | # provided with the distribution. | ||
30 | # | ||
31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
32 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
33 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
34 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
35 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
36 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
37 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
38 | # SOFTWARE. | ||
39 | # | ||
40 | ######################################################################## | ||
41 | # | ||
42 | # This code is described in an Intel White-Paper: | ||
43 | # "Fast SHA-512 Implementations on Intel Architecture Processors" | ||
44 | # | ||
45 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
46 | # and search for that title. | ||
47 | # | ||
48 | ######################################################################## | ||
49 | |||
50 | #ifdef CONFIG_AS_AVX | ||
51 | #include <linux/linkage.h> | ||
52 | |||
53 | .text | ||
54 | |||
55 | # Virtual Registers | ||
56 | # ARG1 | ||
57 | msg = %rdi | ||
58 | # ARG2 | ||
59 | digest = %rsi | ||
60 | # ARG3 | ||
61 | msglen = %rdx | ||
62 | T1 = %rcx | ||
63 | T2 = %r8 | ||
64 | a_64 = %r9 | ||
65 | b_64 = %r10 | ||
66 | c_64 = %r11 | ||
67 | d_64 = %r12 | ||
68 | e_64 = %r13 | ||
69 | f_64 = %r14 | ||
70 | g_64 = %r15 | ||
71 | h_64 = %rbx | ||
72 | tmp0 = %rax | ||
73 | |||
74 | # Local variables (stack frame) | ||
75 | |||
76 | # Message Schedule | ||
77 | W_SIZE = 80*8 | ||
78 | # W[t] + K[t] | W[t+1] + K[t+1] | ||
79 | WK_SIZE = 2*8 | ||
80 | RSPSAVE_SIZE = 1*8 | ||
81 | GPRSAVE_SIZE = 5*8 | ||
82 | |||
83 | frame_W = 0 | ||
84 | frame_WK = frame_W + W_SIZE | ||
85 | frame_RSPSAVE = frame_WK + WK_SIZE | ||
86 | frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE | ||
87 | frame_size = frame_GPRSAVE + GPRSAVE_SIZE | ||
88 | |||
89 | # Useful QWORD "arrays" for simpler memory references | ||
90 | # MSG, DIGEST, K_t, W_t are arrays | ||
91 | # WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even | ||
92 | |||
93 | # Input message (arg1) | ||
94 | #define MSG(i) 8*i(msg) | ||
95 | |||
96 | # Output Digest (arg2) | ||
97 | #define DIGEST(i) 8*i(digest) | ||
98 | |||
99 | # SHA Constants (static mem) | ||
100 | #define K_t(i) 8*i+K512(%rip) | ||
101 | |||
102 | # Message Schedule (stack frame) | ||
103 | #define W_t(i) 8*i+frame_W(%rsp) | ||
104 | |||
105 | # W[t]+K[t] (stack frame) | ||
106 | #define WK_2(i) 8*((i%2))+frame_WK(%rsp) | ||
107 | |||
108 | .macro RotateState | ||
109 | # Rotate symbols a..h right | ||
110 | TMP = h_64 | ||
111 | h_64 = g_64 | ||
112 | g_64 = f_64 | ||
113 | f_64 = e_64 | ||
114 | e_64 = d_64 | ||
115 | d_64 = c_64 | ||
116 | c_64 = b_64 | ||
117 | b_64 = a_64 | ||
118 | a_64 = TMP | ||
119 | .endm | ||
120 | |||
121 | .macro RORQ p1 p2 | ||
122 | # shld is faster than ror on Sandybridge | ||
123 | shld $(64-\p2), \p1, \p1 | ||
124 | .endm | ||
125 | |||
126 | .macro SHA512_Round rnd | ||
127 | # Compute Round %%t | ||
128 | mov f_64, T1 # T1 = f | ||
129 | mov e_64, tmp0 # tmp = e | ||
130 | xor g_64, T1 # T1 = f ^ g | ||
131 | RORQ tmp0, 23 # 41 # tmp = e ror 23 | ||
132 | and e_64, T1 # T1 = (f ^ g) & e | ||
133 | xor e_64, tmp0 # tmp = (e ror 23) ^ e | ||
134 | xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) | ||
135 | idx = \rnd | ||
136 | add WK_2(idx), T1 # W[t] + K[t] from message scheduler | ||
137 | RORQ tmp0, 4 # 18 # tmp = ((e ror 23) ^ e) ror 4 | ||
138 | xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e | ||
139 | mov a_64, T2 # T2 = a | ||
140 | add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h | ||
141 | RORQ tmp0, 14 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) | ||
142 | add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) | ||
143 | mov a_64, tmp0 # tmp = a | ||
144 | xor c_64, T2 # T2 = a ^ c | ||
145 | and c_64, tmp0 # tmp = a & c | ||
146 | and b_64, T2 # T2 = (a ^ c) & b | ||
147 | xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) | ||
148 | mov a_64, tmp0 # tmp = a | ||
149 | RORQ tmp0, 5 # 39 # tmp = a ror 5 | ||
150 | xor a_64, tmp0 # tmp = (a ror 5) ^ a | ||
151 | add T1, d_64 # e(next_state) = d + T1 | ||
152 | RORQ tmp0, 6 # 34 # tmp = ((a ror 5) ^ a) ror 6 | ||
153 | xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a | ||
154 | lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) | ||
155 | RORQ tmp0, 28 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) | ||
156 | add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) | ||
157 | RotateState | ||
158 | .endm | ||
159 | |||
160 | .macro SHA512_2Sched_2Round_avx rnd | ||
161 | # Compute rounds t-2 and t-1 | ||
162 | # Compute message schedule QWORDS t and t+1 | ||
163 | |||
164 | # Two rounds are computed based on the values for K[t-2]+W[t-2] and | ||
165 | # K[t-1]+W[t-1] which were previously stored at WK_2 by the message | ||
166 | # scheduler. | ||
167 | # The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)]. | ||
168 | # They are then added to their respective SHA512 constants at | ||
169 | # [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)] | ||
170 | # For brievity, the comments following vectored instructions only refer to | ||
171 | # the first of a pair of QWORDS. | ||
172 | # Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} | ||
173 | # The computation of the message schedule and the rounds are tightly | ||
174 | # stitched to take advantage of instruction-level parallelism. | ||
175 | |||
176 | idx = \rnd - 2 | ||
177 | vmovdqa W_t(idx), %xmm4 # XMM4 = W[t-2] | ||
178 | idx = \rnd - 15 | ||
179 | vmovdqu W_t(idx), %xmm5 # XMM5 = W[t-15] | ||
180 | mov f_64, T1 | ||
181 | vpsrlq $61, %xmm4, %xmm0 # XMM0 = W[t-2]>>61 | ||
182 | mov e_64, tmp0 | ||
183 | vpsrlq $1, %xmm5, %xmm6 # XMM6 = W[t-15]>>1 | ||
184 | xor g_64, T1 | ||
185 | RORQ tmp0, 23 # 41 | ||
186 | vpsrlq $19, %xmm4, %xmm1 # XMM1 = W[t-2]>>19 | ||
187 | and e_64, T1 | ||
188 | xor e_64, tmp0 | ||
189 | vpxor %xmm1, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 | ||
190 | xor g_64, T1 | ||
191 | idx = \rnd | ||
192 | add WK_2(idx), T1# | ||
193 | vpsrlq $8, %xmm5, %xmm7 # XMM7 = W[t-15]>>8 | ||
194 | RORQ tmp0, 4 # 18 | ||
195 | vpsrlq $6, %xmm4, %xmm2 # XMM2 = W[t-2]>>6 | ||
196 | xor e_64, tmp0 | ||
197 | mov a_64, T2 | ||
198 | add h_64, T1 | ||
199 | vpxor %xmm7, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 | ||
200 | RORQ tmp0, 14 # 14 | ||
201 | add tmp0, T1 | ||
202 | vpsrlq $7, %xmm5, %xmm8 # XMM8 = W[t-15]>>7 | ||
203 | mov a_64, tmp0 | ||
204 | xor c_64, T2 | ||
205 | vpsllq $(64-61), %xmm4, %xmm3 # XMM3 = W[t-2]<<3 | ||
206 | and c_64, tmp0 | ||
207 | and b_64, T2 | ||
208 | vpxor %xmm3, %xmm2, %xmm2 # XMM2 = W[t-2]>>6 ^ W[t-2]<<3 | ||
209 | xor tmp0, T2 | ||
210 | mov a_64, tmp0 | ||
211 | vpsllq $(64-1), %xmm5, %xmm9 # XMM9 = W[t-15]<<63 | ||
212 | RORQ tmp0, 5 # 39 | ||
213 | vpxor %xmm9, %xmm8, %xmm8 # XMM8 = W[t-15]>>7 ^ W[t-15]<<63 | ||
214 | xor a_64, tmp0 | ||
215 | add T1, d_64 | ||
216 | RORQ tmp0, 6 # 34 | ||
217 | xor a_64, tmp0 | ||
218 | vpxor %xmm8, %xmm6, %xmm6 # XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^ | ||
219 | # W[t-15]>>7 ^ W[t-15]<<63 | ||
220 | lea (T1, T2), h_64 | ||
221 | RORQ tmp0, 28 # 28 | ||
222 | vpsllq $(64-19), %xmm4, %xmm4 # XMM4 = W[t-2]<<25 | ||
223 | add tmp0, h_64 | ||
224 | RotateState | ||
225 | vpxor %xmm4, %xmm0, %xmm0 # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^ | ||
226 | # W[t-2]<<25 | ||
227 | mov f_64, T1 | ||
228 | vpxor %xmm2, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) | ||
229 | mov e_64, tmp0 | ||
230 | xor g_64, T1 | ||
231 | idx = \rnd - 16 | ||
232 | vpaddq W_t(idx), %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] | ||
233 | idx = \rnd - 7 | ||
234 | vmovdqu W_t(idx), %xmm1 # XMM1 = W[t-7] | ||
235 | RORQ tmp0, 23 # 41 | ||
236 | and e_64, T1 | ||
237 | xor e_64, tmp0 | ||
238 | xor g_64, T1 | ||
239 | vpsllq $(64-8), %xmm5, %xmm5 # XMM5 = W[t-15]<<56 | ||
240 | idx = \rnd + 1 | ||
241 | add WK_2(idx), T1 | ||
242 | vpxor %xmm5, %xmm6, %xmm6 # XMM6 = s0(W[t-15]) | ||
243 | RORQ tmp0, 4 # 18 | ||
244 | vpaddq %xmm6, %xmm0, %xmm0 # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15]) | ||
245 | xor e_64, tmp0 | ||
246 | vpaddq %xmm1, %xmm0, %xmm0 # XMM0 = W[t] = s1(W[t-2]) + W[t-7] + | ||
247 | # s0(W[t-15]) + W[t-16] | ||
248 | mov a_64, T2 | ||
249 | add h_64, T1 | ||
250 | RORQ tmp0, 14 # 14 | ||
251 | add tmp0, T1 | ||
252 | idx = \rnd | ||
253 | vmovdqa %xmm0, W_t(idx) # Store W[t] | ||
254 | vpaddq K_t(idx), %xmm0, %xmm0 # Compute W[t]+K[t] | ||
255 | vmovdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds | ||
256 | mov a_64, tmp0 | ||
257 | xor c_64, T2 | ||
258 | and c_64, tmp0 | ||
259 | and b_64, T2 | ||
260 | xor tmp0, T2 | ||
261 | mov a_64, tmp0 | ||
262 | RORQ tmp0, 5 # 39 | ||
263 | xor a_64, tmp0 | ||
264 | add T1, d_64 | ||
265 | RORQ tmp0, 6 # 34 | ||
266 | xor a_64, tmp0 | ||
267 | lea (T1, T2), h_64 | ||
268 | RORQ tmp0, 28 # 28 | ||
269 | add tmp0, h_64 | ||
270 | RotateState | ||
271 | .endm | ||
272 | |||
273 | ######################################################################## | ||
274 | # void sha512_transform_avx(const void* M, void* D, u64 L) | ||
275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | ||
276 | # The size of the message pointed to by M must be an integer multiple of SHA512 | ||
277 | # message blocks. | ||
278 | # L is the message length in SHA512 blocks | ||
279 | ######################################################################## | ||
280 | ENTRY(sha512_transform_avx) | ||
281 | cmp $0, msglen | ||
282 | je nowork | ||
283 | |||
284 | # Allocate Stack Space | ||
285 | mov %rsp, %rax | ||
286 | sub $frame_size, %rsp | ||
287 | and $~(0x20 - 1), %rsp | ||
288 | mov %rax, frame_RSPSAVE(%rsp) | ||
289 | |||
290 | # Save GPRs | ||
291 | mov %rbx, frame_GPRSAVE(%rsp) | ||
292 | mov %r12, frame_GPRSAVE +8*1(%rsp) | ||
293 | mov %r13, frame_GPRSAVE +8*2(%rsp) | ||
294 | mov %r14, frame_GPRSAVE +8*3(%rsp) | ||
295 | mov %r15, frame_GPRSAVE +8*4(%rsp) | ||
296 | |||
297 | updateblock: | ||
298 | |||
299 | # Load state variables | ||
300 | mov DIGEST(0), a_64 | ||
301 | mov DIGEST(1), b_64 | ||
302 | mov DIGEST(2), c_64 | ||
303 | mov DIGEST(3), d_64 | ||
304 | mov DIGEST(4), e_64 | ||
305 | mov DIGEST(5), f_64 | ||
306 | mov DIGEST(6), g_64 | ||
307 | mov DIGEST(7), h_64 | ||
308 | |||
309 | t = 0 | ||
310 | .rept 80/2 + 1 | ||
311 | # (80 rounds) / (2 rounds/iteration) + (1 iteration) | ||
312 | # +1 iteration because the scheduler leads hashing by 1 iteration | ||
313 | .if t < 2 | ||
314 | # BSWAP 2 QWORDS | ||
315 | vmovdqa XMM_QWORD_BSWAP(%rip), %xmm1 | ||
316 | vmovdqu MSG(t), %xmm0 | ||
317 | vpshufb %xmm1, %xmm0, %xmm0 # BSWAP | ||
318 | vmovdqa %xmm0, W_t(t) # Store Scheduled Pair | ||
319 | vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] | ||
320 | vmovdqa %xmm0, WK_2(t) # Store into WK for rounds | ||
321 | .elseif t < 16 | ||
322 | # BSWAP 2 QWORDS# Compute 2 Rounds | ||
323 | vmovdqu MSG(t), %xmm0 | ||
324 | vpshufb %xmm1, %xmm0, %xmm0 # BSWAP | ||
325 | SHA512_Round t-2 # Round t-2 | ||
326 | vmovdqa %xmm0, W_t(t) # Store Scheduled Pair | ||
327 | vpaddq K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t] | ||
328 | SHA512_Round t-1 # Round t-1 | ||
329 | vmovdqa %xmm0, WK_2(t)# Store W[t]+K[t] into WK | ||
330 | .elseif t < 79 | ||
331 | # Schedule 2 QWORDS# Compute 2 Rounds | ||
332 | SHA512_2Sched_2Round_avx t | ||
333 | .else | ||
334 | # Compute 2 Rounds | ||
335 | SHA512_Round t-2 | ||
336 | SHA512_Round t-1 | ||
337 | .endif | ||
338 | t = t+2 | ||
339 | .endr | ||
340 | |||
341 | # Update digest | ||
342 | add a_64, DIGEST(0) | ||
343 | add b_64, DIGEST(1) | ||
344 | add c_64, DIGEST(2) | ||
345 | add d_64, DIGEST(3) | ||
346 | add e_64, DIGEST(4) | ||
347 | add f_64, DIGEST(5) | ||
348 | add g_64, DIGEST(6) | ||
349 | add h_64, DIGEST(7) | ||
350 | |||
351 | # Advance to next message block | ||
352 | add $16*8, msg | ||
353 | dec msglen | ||
354 | jnz updateblock | ||
355 | |||
356 | # Restore GPRs | ||
357 | mov frame_GPRSAVE(%rsp), %rbx | ||
358 | mov frame_GPRSAVE +8*1(%rsp), %r12 | ||
359 | mov frame_GPRSAVE +8*2(%rsp), %r13 | ||
360 | mov frame_GPRSAVE +8*3(%rsp), %r14 | ||
361 | mov frame_GPRSAVE +8*4(%rsp), %r15 | ||
362 | |||
363 | # Restore Stack Pointer | ||
364 | mov frame_RSPSAVE(%rsp), %rsp | ||
365 | |||
366 | nowork: | ||
367 | ret | ||
368 | ENDPROC(sha512_transform_avx) | ||
369 | |||
370 | ######################################################################## | ||
371 | ### Binary Data | ||
372 | |||
373 | .data | ||
374 | |||
375 | .align 16 | ||
376 | |||
377 | # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. | ||
378 | XMM_QWORD_BSWAP: | ||
379 | .octa 0x08090a0b0c0d0e0f0001020304050607 | ||
380 | |||
381 | # K[t] used in SHA512 hashing | ||
382 | K512: | ||
383 | .quad 0x428a2f98d728ae22,0x7137449123ef65cd | ||
384 | .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc | ||
385 | .quad 0x3956c25bf348b538,0x59f111f1b605d019 | ||
386 | .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 | ||
387 | .quad 0xd807aa98a3030242,0x12835b0145706fbe | ||
388 | .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 | ||
389 | .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 | ||
390 | .quad 0x9bdc06a725c71235,0xc19bf174cf692694 | ||
391 | .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 | ||
392 | .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 | ||
393 | .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 | ||
394 | .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 | ||
395 | .quad 0x983e5152ee66dfab,0xa831c66d2db43210 | ||
396 | .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 | ||
397 | .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 | ||
398 | .quad 0x06ca6351e003826f,0x142929670a0e6e70 | ||
399 | .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 | ||
400 | .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df | ||
401 | .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 | ||
402 | .quad 0x81c2c92e47edaee6,0x92722c851482353b | ||
403 | .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 | ||
404 | .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 | ||
405 | .quad 0xd192e819d6ef5218,0xd69906245565a910 | ||
406 | .quad 0xf40e35855771202a,0x106aa07032bbd1b8 | ||
407 | .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 | ||
408 | .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 | ||
409 | .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb | ||
410 | .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 | ||
411 | .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 | ||
412 | .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec | ||
413 | .quad 0x90befffa23631e28,0xa4506cebde82bde9 | ||
414 | .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b | ||
415 | .quad 0xca273eceea26619c,0xd186b8c721c0c207 | ||
416 | .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 | ||
417 | .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 | ||
418 | .quad 0x113f9804bef90dae,0x1b710b35131c471b | ||
419 | .quad 0x28db77f523047d84,0x32caab7b40c72493 | ||
420 | .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c | ||
421 | .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a | ||
422 | .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 | ||
423 | #endif | ||
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S new file mode 100644 index 000000000000..568b96105f5c --- /dev/null +++ b/arch/x86/crypto/sha512-avx2-asm.S | |||
@@ -0,0 +1,743 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-512 with AVX2 instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # David Cote <david.m.cote@intel.com> | ||
10 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | # | ||
12 | # This software is available to you under a choice of one of two | ||
13 | # licenses. You may choose to be licensed under the terms of the GNU | ||
14 | # General Public License (GPL) Version 2, available from the file | ||
15 | # COPYING in the main directory of this source tree, or the | ||
16 | # OpenIB.org BSD license below: | ||
17 | # | ||
18 | # Redistribution and use in source and binary forms, with or | ||
19 | # without modification, are permitted provided that the following | ||
20 | # conditions are met: | ||
21 | # | ||
22 | # - Redistributions of source code must retain the above | ||
23 | # copyright notice, this list of conditions and the following | ||
24 | # disclaimer. | ||
25 | # | ||
26 | # - Redistributions in binary form must reproduce the above | ||
27 | # copyright notice, this list of conditions and the following | ||
28 | # disclaimer in the documentation and/or other materials | ||
29 | # provided with the distribution. | ||
30 | # | ||
31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
32 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
33 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
34 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
35 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
36 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
37 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
38 | # SOFTWARE. | ||
39 | # | ||
40 | ######################################################################## | ||
41 | # | ||
42 | # This code is described in an Intel White-Paper: | ||
43 | # "Fast SHA-512 Implementations on Intel Architecture Processors" | ||
44 | # | ||
45 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
46 | # and search for that title. | ||
47 | # | ||
48 | ######################################################################## | ||
49 | # This code schedules 1 blocks at a time, with 4 lanes per block | ||
50 | ######################################################################## | ||
51 | |||
52 | #ifdef CONFIG_AS_AVX2 | ||
53 | #include <linux/linkage.h> | ||
54 | |||
55 | .text | ||
56 | |||
57 | # Virtual Registers | ||
58 | Y_0 = %ymm4 | ||
59 | Y_1 = %ymm5 | ||
60 | Y_2 = %ymm6 | ||
61 | Y_3 = %ymm7 | ||
62 | |||
63 | YTMP0 = %ymm0 | ||
64 | YTMP1 = %ymm1 | ||
65 | YTMP2 = %ymm2 | ||
66 | YTMP3 = %ymm3 | ||
67 | YTMP4 = %ymm8 | ||
68 | XFER = YTMP0 | ||
69 | |||
70 | BYTE_FLIP_MASK = %ymm9 | ||
71 | |||
72 | # 1st arg | ||
73 | INP = %rdi | ||
74 | # 2nd arg | ||
75 | CTX = %rsi | ||
76 | # 3rd arg | ||
77 | NUM_BLKS = %rdx | ||
78 | |||
79 | c = %rcx | ||
80 | d = %r8 | ||
81 | e = %rdx | ||
82 | y3 = %rdi | ||
83 | |||
84 | TBL = %rbp | ||
85 | |||
86 | a = %rax | ||
87 | b = %rbx | ||
88 | |||
89 | f = %r9 | ||
90 | g = %r10 | ||
91 | h = %r11 | ||
92 | old_h = %r11 | ||
93 | |||
94 | T1 = %r12 | ||
95 | y0 = %r13 | ||
96 | y1 = %r14 | ||
97 | y2 = %r15 | ||
98 | |||
99 | y4 = %r12 | ||
100 | |||
101 | # Local variables (stack frame) | ||
102 | XFER_SIZE = 4*8 | ||
103 | SRND_SIZE = 1*8 | ||
104 | INP_SIZE = 1*8 | ||
105 | INPEND_SIZE = 1*8 | ||
106 | RSPSAVE_SIZE = 1*8 | ||
107 | GPRSAVE_SIZE = 6*8 | ||
108 | |||
109 | frame_XFER = 0 | ||
110 | frame_SRND = frame_XFER + XFER_SIZE | ||
111 | frame_INP = frame_SRND + SRND_SIZE | ||
112 | frame_INPEND = frame_INP + INP_SIZE | ||
113 | frame_RSPSAVE = frame_INPEND + INPEND_SIZE | ||
114 | frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE | ||
115 | frame_size = frame_GPRSAVE + GPRSAVE_SIZE | ||
116 | |||
117 | ## assume buffers not aligned | ||
118 | #define VMOVDQ vmovdqu | ||
119 | |||
120 | # addm [mem], reg | ||
121 | # Add reg to mem using reg-mem add and store | ||
122 | .macro addm p1 p2 | ||
123 | add \p1, \p2 | ||
124 | mov \p2, \p1 | ||
125 | .endm | ||
126 | |||
127 | |||
128 | # COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask | ||
129 | # Load ymm with mem and byte swap each dword | ||
130 | .macro COPY_YMM_AND_BSWAP p1 p2 p3 | ||
131 | VMOVDQ \p2, \p1 | ||
132 | vpshufb \p3, \p1, \p1 | ||
133 | .endm | ||
134 | # rotate_Ys | ||
135 | # Rotate values of symbols Y0...Y3 | ||
136 | .macro rotate_Ys | ||
137 | Y_ = Y_0 | ||
138 | Y_0 = Y_1 | ||
139 | Y_1 = Y_2 | ||
140 | Y_2 = Y_3 | ||
141 | Y_3 = Y_ | ||
142 | .endm | ||
143 | |||
144 | # RotateState | ||
145 | .macro RotateState | ||
146 | # Rotate symbols a..h right | ||
147 | old_h = h | ||
148 | TMP_ = h | ||
149 | h = g | ||
150 | g = f | ||
151 | f = e | ||
152 | e = d | ||
153 | d = c | ||
154 | c = b | ||
155 | b = a | ||
156 | a = TMP_ | ||
157 | .endm | ||
158 | |||
159 | # macro MY_VPALIGNR YDST, YSRC1, YSRC2, RVAL | ||
160 | # YDST = {YSRC1, YSRC2} >> RVAL*8 | ||
161 | .macro MY_VPALIGNR YDST YSRC1 YSRC2 RVAL | ||
162 | vperm2f128 $0x3, \YSRC2, \YSRC1, \YDST # YDST = {YS1_LO, YS2_HI} | ||
163 | vpalignr $\RVAL, \YSRC2, \YDST, \YDST # YDST = {YDS1, YS2} >> RVAL*8 | ||
164 | .endm | ||
165 | |||
166 | .macro FOUR_ROUNDS_AND_SCHED | ||
167 | ################################### RND N + 0 ######################################### | ||
168 | |||
169 | # Extract w[t-7] | ||
170 | MY_VPALIGNR YTMP0, Y_3, Y_2, 8 # YTMP0 = W[-7] | ||
171 | # Calculate w[t-16] + w[t-7] | ||
172 | vpaddq Y_0, YTMP0, YTMP0 # YTMP0 = W[-7] + W[-16] | ||
173 | # Extract w[t-15] | ||
174 | MY_VPALIGNR YTMP1, Y_1, Y_0, 8 # YTMP1 = W[-15] | ||
175 | |||
176 | # Calculate sigma0 | ||
177 | |||
178 | # Calculate w[t-15] ror 1 | ||
179 | vpsrlq $1, YTMP1, YTMP2 | ||
180 | vpsllq $(64-1), YTMP1, YTMP3 | ||
181 | vpor YTMP2, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1 | ||
182 | # Calculate w[t-15] shr 7 | ||
183 | vpsrlq $7, YTMP1, YTMP4 # YTMP4 = W[-15] >> 7 | ||
184 | |||
185 | mov a, y3 # y3 = a # MAJA | ||
186 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
187 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
188 | add frame_XFER(%rsp),h # h = k + w + h # -- | ||
189 | or c, y3 # y3 = a|c # MAJA | ||
190 | mov f, y2 # y2 = f # CH | ||
191 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
192 | |||
193 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
194 | xor g, y2 # y2 = f^g # CH | ||
195 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
196 | |||
197 | and e, y2 # y2 = (f^g)&e # CH | ||
198 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
199 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
200 | add h, d # d = k + w + h + d # -- | ||
201 | |||
202 | and b, y3 # y3 = (a|c)&b # MAJA | ||
203 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
204 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
205 | |||
206 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
207 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
208 | mov a, T1 # T1 = a # MAJB | ||
209 | and c, T1 # T1 = a&c # MAJB | ||
210 | |||
211 | add y0, y2 # y2 = S1 + CH # -- | ||
212 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
213 | add y1, h # h = k + w + h + S0 # -- | ||
214 | |||
215 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
216 | |||
217 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
218 | add y3, h # h = t1 + S0 + MAJ # -- | ||
219 | |||
220 | RotateState | ||
221 | |||
222 | ################################### RND N + 1 ######################################### | ||
223 | |||
224 | # Calculate w[t-15] ror 8 | ||
225 | vpsrlq $8, YTMP1, YTMP2 | ||
226 | vpsllq $(64-8), YTMP1, YTMP1 | ||
227 | vpor YTMP2, YTMP1, YTMP1 # YTMP1 = W[-15] ror 8 | ||
228 | # XOR the three components | ||
229 | vpxor YTMP4, YTMP3, YTMP3 # YTMP3 = W[-15] ror 1 ^ W[-15] >> 7 | ||
230 | vpxor YTMP1, YTMP3, YTMP1 # YTMP1 = s0 | ||
231 | |||
232 | |||
233 | # Add three components, w[t-16], w[t-7] and sigma0 | ||
234 | vpaddq YTMP1, YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0 | ||
235 | # Move to appropriate lanes for calculating w[16] and w[17] | ||
236 | vperm2f128 $0x0, YTMP0, YTMP0, Y_0 # Y_0 = W[-16] + W[-7] + s0 {BABA} | ||
237 | # Move to appropriate lanes for calculating w[18] and w[19] | ||
238 | vpand MASK_YMM_LO(%rip), YTMP0, YTMP0 # YTMP0 = W[-16] + W[-7] + s0 {DC00} | ||
239 | |||
240 | # Calculate w[16] and w[17] in both 128 bit lanes | ||
241 | |||
242 | # Calculate sigma1 for w[16] and w[17] on both 128 bit lanes | ||
243 | vperm2f128 $0x11, Y_3, Y_3, YTMP2 # YTMP2 = W[-2] {BABA} | ||
244 | vpsrlq $6, YTMP2, YTMP4 # YTMP4 = W[-2] >> 6 {BABA} | ||
245 | |||
246 | |||
247 | mov a, y3 # y3 = a # MAJA | ||
248 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
249 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
250 | add 1*8+frame_XFER(%rsp), h # h = k + w + h # -- | ||
251 | or c, y3 # y3 = a|c # MAJA | ||
252 | |||
253 | |||
254 | mov f, y2 # y2 = f # CH | ||
255 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
256 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
257 | xor g, y2 # y2 = f^g # CH | ||
258 | |||
259 | |||
260 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
261 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
262 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
263 | and e, y2 # y2 = (f^g)&e # CH | ||
264 | add h, d # d = k + w + h + d # -- | ||
265 | |||
266 | and b, y3 # y3 = (a|c)&b # MAJA | ||
267 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
268 | |||
269 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
270 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
271 | |||
272 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
273 | mov a, T1 # T1 = a # MAJB | ||
274 | and c, T1 # T1 = a&c # MAJB | ||
275 | add y0, y2 # y2 = S1 + CH # -- | ||
276 | |||
277 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
278 | add y1, h # h = k + w + h + S0 # -- | ||
279 | |||
280 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
281 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
282 | add y3, h # h = t1 + S0 + MAJ # -- | ||
283 | |||
284 | RotateState | ||
285 | |||
286 | |||
287 | ################################### RND N + 2 ######################################### | ||
288 | |||
289 | vpsrlq $19, YTMP2, YTMP3 # YTMP3 = W[-2] >> 19 {BABA} | ||
290 | vpsllq $(64-19), YTMP2, YTMP1 # YTMP1 = W[-2] << 19 {BABA} | ||
291 | vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {BABA} | ||
292 | vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA} | ||
293 | vpsrlq $61, YTMP2, YTMP3 # YTMP3 = W[-2] >> 61 {BABA} | ||
294 | vpsllq $(64-61), YTMP2, YTMP1 # YTMP1 = W[-2] << 61 {BABA} | ||
295 | vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {BABA} | ||
296 | vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^ | ||
297 | # (W[-2] ror 61) ^ (W[-2] >> 6) {BABA} | ||
298 | |||
299 | # Add sigma1 to the other compunents to get w[16] and w[17] | ||
300 | vpaddq YTMP4, Y_0, Y_0 # Y_0 = {W[1], W[0], W[1], W[0]} | ||
301 | |||
302 | # Calculate sigma1 for w[18] and w[19] for upper 128 bit lane | ||
303 | vpsrlq $6, Y_0, YTMP4 # YTMP4 = W[-2] >> 6 {DC--} | ||
304 | |||
305 | mov a, y3 # y3 = a # MAJA | ||
306 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
307 | add 2*8+frame_XFER(%rsp), h # h = k + w + h # -- | ||
308 | |||
309 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
310 | or c, y3 # y3 = a|c # MAJA | ||
311 | mov f, y2 # y2 = f # CH | ||
312 | xor g, y2 # y2 = f^g # CH | ||
313 | |||
314 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
315 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
316 | and e, y2 # y2 = (f^g)&e # CH | ||
317 | |||
318 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
319 | add h, d # d = k + w + h + d # -- | ||
320 | and b, y3 # y3 = (a|c)&b # MAJA | ||
321 | |||
322 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
323 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
324 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
325 | |||
326 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
327 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
328 | |||
329 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
330 | mov a, T1 # T1 = a # MAJB | ||
331 | and c, T1 # T1 = a&c # MAJB | ||
332 | add y0, y2 # y2 = S1 + CH # -- | ||
333 | |||
334 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
335 | add y1, h # h = k + w + h + S0 # -- | ||
336 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
337 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
338 | |||
339 | add y3, h # h = t1 + S0 + MAJ # -- | ||
340 | |||
341 | RotateState | ||
342 | |||
343 | ################################### RND N + 3 ######################################### | ||
344 | |||
345 | vpsrlq $19, Y_0, YTMP3 # YTMP3 = W[-2] >> 19 {DC--} | ||
346 | vpsllq $(64-19), Y_0, YTMP1 # YTMP1 = W[-2] << 19 {DC--} | ||
347 | vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 19 {DC--} | ||
348 | vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--} | ||
349 | vpsrlq $61, Y_0, YTMP3 # YTMP3 = W[-2] >> 61 {DC--} | ||
350 | vpsllq $(64-61), Y_0, YTMP1 # YTMP1 = W[-2] << 61 {DC--} | ||
351 | vpor YTMP1, YTMP3, YTMP3 # YTMP3 = W[-2] ror 61 {DC--} | ||
352 | vpxor YTMP3, YTMP4, YTMP4 # YTMP4 = s1 = (W[-2] ror 19) ^ | ||
353 | # (W[-2] ror 61) ^ (W[-2] >> 6) {DC--} | ||
354 | |||
355 | # Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19] | ||
356 | # to newly calculated sigma1 to get w[18] and w[19] | ||
357 | vpaddq YTMP4, YTMP0, YTMP2 # YTMP2 = {W[3], W[2], --, --} | ||
358 | |||
359 | # Form w[19, w[18], w17], w[16] | ||
360 | vpblendd $0xF0, YTMP2, Y_0, Y_0 # Y_0 = {W[3], W[2], W[1], W[0]} | ||
361 | |||
362 | mov a, y3 # y3 = a # MAJA | ||
363 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
364 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
365 | add 3*8+frame_XFER(%rsp), h # h = k + w + h # -- | ||
366 | or c, y3 # y3 = a|c # MAJA | ||
367 | |||
368 | |||
369 | mov f, y2 # y2 = f # CH | ||
370 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
371 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
372 | xor g, y2 # y2 = f^g # CH | ||
373 | |||
374 | |||
375 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
376 | and e, y2 # y2 = (f^g)&e # CH | ||
377 | add h, d # d = k + w + h + d # -- | ||
378 | and b, y3 # y3 = (a|c)&b # MAJA | ||
379 | |||
380 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
381 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
382 | |||
383 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
384 | add y0, y2 # y2 = S1 + CH # -- | ||
385 | |||
386 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
387 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
388 | |||
389 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
390 | |||
391 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
392 | mov a, T1 # T1 = a # MAJB | ||
393 | and c, T1 # T1 = a&c # MAJB | ||
394 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
395 | |||
396 | add y1, h # h = k + w + h + S0 # -- | ||
397 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
398 | add y3, h # h = t1 + S0 + MAJ # -- | ||
399 | |||
400 | RotateState | ||
401 | |||
402 | rotate_Ys | ||
403 | .endm | ||
404 | |||
405 | .macro DO_4ROUNDS | ||
406 | |||
407 | ################################### RND N + 0 ######################################### | ||
408 | |||
409 | mov f, y2 # y2 = f # CH | ||
410 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
411 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
412 | xor g, y2 # y2 = f^g # CH | ||
413 | |||
414 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
415 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
416 | and e, y2 # y2 = (f^g)&e # CH | ||
417 | |||
418 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
419 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
420 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
421 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
422 | mov a, y3 # y3 = a # MAJA | ||
423 | |||
424 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
425 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
426 | add frame_XFER(%rsp), h # h = k + w + h # -- | ||
427 | or c, y3 # y3 = a|c # MAJA | ||
428 | |||
429 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
430 | mov a, T1 # T1 = a # MAJB | ||
431 | and b, y3 # y3 = (a|c)&b # MAJA | ||
432 | and c, T1 # T1 = a&c # MAJB | ||
433 | add y0, y2 # y2 = S1 + CH # -- | ||
434 | |||
435 | add h, d # d = k + w + h + d # -- | ||
436 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
437 | add y1, h # h = k + w + h + S0 # -- | ||
438 | |||
439 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
440 | |||
441 | RotateState | ||
442 | |||
443 | ################################### RND N + 1 ######################################### | ||
444 | |||
445 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
446 | mov f, y2 # y2 = f # CH | ||
447 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
448 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
449 | xor g, y2 # y2 = f^g # CH | ||
450 | |||
451 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
452 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
453 | and e, y2 # y2 = (f^g)&e # CH | ||
454 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
455 | |||
456 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
457 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
458 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
459 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
460 | mov a, y3 # y3 = a # MAJA | ||
461 | |||
462 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
463 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
464 | add 8*1+frame_XFER(%rsp), h # h = k + w + h # -- | ||
465 | or c, y3 # y3 = a|c # MAJA | ||
466 | |||
467 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
468 | mov a, T1 # T1 = a # MAJB | ||
469 | and b, y3 # y3 = (a|c)&b # MAJA | ||
470 | and c, T1 # T1 = a&c # MAJB | ||
471 | add y0, y2 # y2 = S1 + CH # -- | ||
472 | |||
473 | add h, d # d = k + w + h + d # -- | ||
474 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
475 | add y1, h # h = k + w + h + S0 # -- | ||
476 | |||
477 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
478 | |||
479 | RotateState | ||
480 | |||
481 | ################################### RND N + 2 ######################################### | ||
482 | |||
483 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
484 | mov f, y2 # y2 = f # CH | ||
485 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
486 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
487 | xor g, y2 # y2 = f^g # CH | ||
488 | |||
489 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
490 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
491 | and e, y2 # y2 = (f^g)&e # CH | ||
492 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
493 | |||
494 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
495 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
496 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
497 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
498 | mov a, y3 # y3 = a # MAJA | ||
499 | |||
500 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
501 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
502 | add 8*2+frame_XFER(%rsp), h # h = k + w + h # -- | ||
503 | or c, y3 # y3 = a|c # MAJA | ||
504 | |||
505 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
506 | mov a, T1 # T1 = a # MAJB | ||
507 | and b, y3 # y3 = (a|c)&b # MAJA | ||
508 | and c, T1 # T1 = a&c # MAJB | ||
509 | add y0, y2 # y2 = S1 + CH # -- | ||
510 | |||
511 | add h, d # d = k + w + h + d # -- | ||
512 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
513 | add y1, h # h = k + w + h + S0 # -- | ||
514 | |||
515 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
516 | |||
517 | RotateState | ||
518 | |||
519 | ################################### RND N + 3 ######################################### | ||
520 | |||
521 | add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
522 | mov f, y2 # y2 = f # CH | ||
523 | rorx $41, e, y0 # y0 = e >> 41 # S1A | ||
524 | rorx $18, e, y1 # y1 = e >> 18 # S1B | ||
525 | xor g, y2 # y2 = f^g # CH | ||
526 | |||
527 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) # S1 | ||
528 | rorx $14, e, y1 # y1 = (e >> 14) # S1 | ||
529 | and e, y2 # y2 = (f^g)&e # CH | ||
530 | add y3, old_h # h = t1 + S0 + MAJ # -- | ||
531 | |||
532 | xor y1, y0 # y0 = (e>>41) ^ (e>>18) ^ (e>>14) # S1 | ||
533 | rorx $34, a, T1 # T1 = a >> 34 # S0B | ||
534 | xor g, y2 # y2 = CH = ((f^g)&e)^g # CH | ||
535 | rorx $39, a, y1 # y1 = a >> 39 # S0A | ||
536 | mov a, y3 # y3 = a # MAJA | ||
537 | |||
538 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) # S0 | ||
539 | rorx $28, a, T1 # T1 = (a >> 28) # S0 | ||
540 | add 8*3+frame_XFER(%rsp), h # h = k + w + h # -- | ||
541 | or c, y3 # y3 = a|c # MAJA | ||
542 | |||
543 | xor T1, y1 # y1 = (a>>39) ^ (a>>34) ^ (a>>28) # S0 | ||
544 | mov a, T1 # T1 = a # MAJB | ||
545 | and b, y3 # y3 = (a|c)&b # MAJA | ||
546 | and c, T1 # T1 = a&c # MAJB | ||
547 | add y0, y2 # y2 = S1 + CH # -- | ||
548 | |||
549 | |||
550 | add h, d # d = k + w + h + d # -- | ||
551 | or T1, y3 # y3 = MAJ = (a|c)&b)|(a&c) # MAJ | ||
552 | add y1, h # h = k + w + h + S0 # -- | ||
553 | |||
554 | add y2, d # d = k + w + h + d + S1 + CH = d + t1 # -- | ||
555 | |||
556 | add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# -- | ||
557 | |||
558 | add y3, h # h = t1 + S0 + MAJ # -- | ||
559 | |||
560 | RotateState | ||
561 | |||
562 | .endm | ||
563 | |||
564 | ######################################################################## | ||
565 | # void sha512_transform_rorx(const void* M, void* D, uint64_t L)# | ||
566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | ||
567 | # The size of the message pointed to by M must be an integer multiple of SHA512 | ||
568 | # message blocks. | ||
569 | # L is the message length in SHA512 blocks | ||
570 | ######################################################################## | ||
571 | ENTRY(sha512_transform_rorx) | ||
572 | # Allocate Stack Space | ||
573 | mov %rsp, %rax | ||
574 | sub $frame_size, %rsp | ||
575 | and $~(0x20 - 1), %rsp | ||
576 | mov %rax, frame_RSPSAVE(%rsp) | ||
577 | |||
578 | # Save GPRs | ||
579 | mov %rbp, frame_GPRSAVE(%rsp) | ||
580 | mov %rbx, 8*1+frame_GPRSAVE(%rsp) | ||
581 | mov %r12, 8*2+frame_GPRSAVE(%rsp) | ||
582 | mov %r13, 8*3+frame_GPRSAVE(%rsp) | ||
583 | mov %r14, 8*4+frame_GPRSAVE(%rsp) | ||
584 | mov %r15, 8*5+frame_GPRSAVE(%rsp) | ||
585 | |||
586 | shl $7, NUM_BLKS # convert to bytes | ||
587 | jz done_hash | ||
588 | add INP, NUM_BLKS # pointer to end of data | ||
589 | mov NUM_BLKS, frame_INPEND(%rsp) | ||
590 | |||
591 | ## load initial digest | ||
592 | mov 8*0(CTX),a | ||
593 | mov 8*1(CTX),b | ||
594 | mov 8*2(CTX),c | ||
595 | mov 8*3(CTX),d | ||
596 | mov 8*4(CTX),e | ||
597 | mov 8*5(CTX),f | ||
598 | mov 8*6(CTX),g | ||
599 | mov 8*7(CTX),h | ||
600 | |||
601 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | ||
602 | |||
603 | loop0: | ||
604 | lea K512(%rip), TBL | ||
605 | |||
606 | ## byte swap first 16 dwords | ||
607 | COPY_YMM_AND_BSWAP Y_0, (INP), BYTE_FLIP_MASK | ||
608 | COPY_YMM_AND_BSWAP Y_1, 1*32(INP), BYTE_FLIP_MASK | ||
609 | COPY_YMM_AND_BSWAP Y_2, 2*32(INP), BYTE_FLIP_MASK | ||
610 | COPY_YMM_AND_BSWAP Y_3, 3*32(INP), BYTE_FLIP_MASK | ||
611 | |||
612 | mov INP, frame_INP(%rsp) | ||
613 | |||
614 | ## schedule 64 input dwords, by doing 12 rounds of 4 each | ||
615 | movq $4, frame_SRND(%rsp) | ||
616 | |||
617 | .align 16 | ||
618 | loop1: | ||
619 | vpaddq (TBL), Y_0, XFER | ||
620 | vmovdqa XFER, frame_XFER(%rsp) | ||
621 | FOUR_ROUNDS_AND_SCHED | ||
622 | |||
623 | vpaddq 1*32(TBL), Y_0, XFER | ||
624 | vmovdqa XFER, frame_XFER(%rsp) | ||
625 | FOUR_ROUNDS_AND_SCHED | ||
626 | |||
627 | vpaddq 2*32(TBL), Y_0, XFER | ||
628 | vmovdqa XFER, frame_XFER(%rsp) | ||
629 | FOUR_ROUNDS_AND_SCHED | ||
630 | |||
631 | vpaddq 3*32(TBL), Y_0, XFER | ||
632 | vmovdqa XFER, frame_XFER(%rsp) | ||
633 | add $(4*32), TBL | ||
634 | FOUR_ROUNDS_AND_SCHED | ||
635 | |||
636 | subq $1, frame_SRND(%rsp) | ||
637 | jne loop1 | ||
638 | |||
639 | movq $2, frame_SRND(%rsp) | ||
640 | loop2: | ||
641 | vpaddq (TBL), Y_0, XFER | ||
642 | vmovdqa XFER, frame_XFER(%rsp) | ||
643 | DO_4ROUNDS | ||
644 | vpaddq 1*32(TBL), Y_1, XFER | ||
645 | vmovdqa XFER, frame_XFER(%rsp) | ||
646 | add $(2*32), TBL | ||
647 | DO_4ROUNDS | ||
648 | |||
649 | vmovdqa Y_2, Y_0 | ||
650 | vmovdqa Y_3, Y_1 | ||
651 | |||
652 | subq $1, frame_SRND(%rsp) | ||
653 | jne loop2 | ||
654 | |||
655 | addm 8*0(CTX),a | ||
656 | addm 8*1(CTX),b | ||
657 | addm 8*2(CTX),c | ||
658 | addm 8*3(CTX),d | ||
659 | addm 8*4(CTX),e | ||
660 | addm 8*5(CTX),f | ||
661 | addm 8*6(CTX),g | ||
662 | addm 8*7(CTX),h | ||
663 | |||
664 | mov frame_INP(%rsp), INP | ||
665 | add $128, INP | ||
666 | cmp frame_INPEND(%rsp), INP | ||
667 | jne loop0 | ||
668 | |||
669 | done_hash: | ||
670 | |||
671 | # Restore GPRs | ||
672 | mov frame_GPRSAVE(%rsp) ,%rbp | ||
673 | mov 8*1+frame_GPRSAVE(%rsp) ,%rbx | ||
674 | mov 8*2+frame_GPRSAVE(%rsp) ,%r12 | ||
675 | mov 8*3+frame_GPRSAVE(%rsp) ,%r13 | ||
676 | mov 8*4+frame_GPRSAVE(%rsp) ,%r14 | ||
677 | mov 8*5+frame_GPRSAVE(%rsp) ,%r15 | ||
678 | |||
679 | # Restore Stack Pointer | ||
680 | mov frame_RSPSAVE(%rsp), %rsp | ||
681 | ret | ||
682 | ENDPROC(sha512_transform_rorx) | ||
683 | |||
684 | ######################################################################## | ||
685 | ### Binary Data | ||
686 | |||
687 | .data | ||
688 | |||
689 | .align 64 | ||
690 | # K[t] used in SHA512 hashing | ||
691 | K512: | ||
692 | .quad 0x428a2f98d728ae22,0x7137449123ef65cd | ||
693 | .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc | ||
694 | .quad 0x3956c25bf348b538,0x59f111f1b605d019 | ||
695 | .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 | ||
696 | .quad 0xd807aa98a3030242,0x12835b0145706fbe | ||
697 | .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 | ||
698 | .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 | ||
699 | .quad 0x9bdc06a725c71235,0xc19bf174cf692694 | ||
700 | .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 | ||
701 | .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 | ||
702 | .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 | ||
703 | .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 | ||
704 | .quad 0x983e5152ee66dfab,0xa831c66d2db43210 | ||
705 | .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 | ||
706 | .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 | ||
707 | .quad 0x06ca6351e003826f,0x142929670a0e6e70 | ||
708 | .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 | ||
709 | .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df | ||
710 | .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 | ||
711 | .quad 0x81c2c92e47edaee6,0x92722c851482353b | ||
712 | .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 | ||
713 | .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 | ||
714 | .quad 0xd192e819d6ef5218,0xd69906245565a910 | ||
715 | .quad 0xf40e35855771202a,0x106aa07032bbd1b8 | ||
716 | .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 | ||
717 | .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 | ||
718 | .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb | ||
719 | .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 | ||
720 | .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 | ||
721 | .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec | ||
722 | .quad 0x90befffa23631e28,0xa4506cebde82bde9 | ||
723 | .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b | ||
724 | .quad 0xca273eceea26619c,0xd186b8c721c0c207 | ||
725 | .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 | ||
726 | .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 | ||
727 | .quad 0x113f9804bef90dae,0x1b710b35131c471b | ||
728 | .quad 0x28db77f523047d84,0x32caab7b40c72493 | ||
729 | .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c | ||
730 | .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a | ||
731 | .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 | ||
732 | |||
733 | .align 32 | ||
734 | |||
735 | # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. | ||
736 | PSHUFFLE_BYTE_FLIP_MASK: | ||
737 | .octa 0x08090a0b0c0d0e0f0001020304050607 | ||
738 | .octa 0x18191a1b1c1d1e1f1011121314151617 | ||
739 | |||
740 | MASK_YMM_LO: | ||
741 | .octa 0x00000000000000000000000000000000 | ||
742 | .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF | ||
743 | #endif | ||
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S new file mode 100644 index 000000000000..fb56855d51f5 --- /dev/null +++ b/arch/x86/crypto/sha512-ssse3-asm.S | |||
@@ -0,0 +1,421 @@ | |||
1 | ######################################################################## | ||
2 | # Implement fast SHA-512 with SSSE3 instructions. (x86_64) | ||
3 | # | ||
4 | # Copyright (C) 2013 Intel Corporation. | ||
5 | # | ||
6 | # Authors: | ||
7 | # James Guilford <james.guilford@intel.com> | ||
8 | # Kirk Yap <kirk.s.yap@intel.com> | ||
9 | # David Cote <david.m.cote@intel.com> | ||
10 | # Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | # | ||
12 | # This software is available to you under a choice of one of two | ||
13 | # licenses. You may choose to be licensed under the terms of the GNU | ||
14 | # General Public License (GPL) Version 2, available from the file | ||
15 | # COPYING in the main directory of this source tree, or the | ||
16 | # OpenIB.org BSD license below: | ||
17 | # | ||
18 | # Redistribution and use in source and binary forms, with or | ||
19 | # without modification, are permitted provided that the following | ||
20 | # conditions are met: | ||
21 | # | ||
22 | # - Redistributions of source code must retain the above | ||
23 | # copyright notice, this list of conditions and the following | ||
24 | # disclaimer. | ||
25 | # | ||
26 | # - Redistributions in binary form must reproduce the above | ||
27 | # copyright notice, this list of conditions and the following | ||
28 | # disclaimer in the documentation and/or other materials | ||
29 | # provided with the distribution. | ||
30 | # | ||
31 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
32 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
33 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
34 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
35 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
36 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
37 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
38 | # SOFTWARE. | ||
39 | # | ||
40 | ######################################################################## | ||
41 | # | ||
42 | # This code is described in an Intel White-Paper: | ||
43 | # "Fast SHA-512 Implementations on Intel Architecture Processors" | ||
44 | # | ||
45 | # To find it, surf to http://www.intel.com/p/en_US/embedded | ||
46 | # and search for that title. | ||
47 | # | ||
48 | ######################################################################## | ||
49 | |||
50 | #include <linux/linkage.h> | ||
51 | |||
52 | .text | ||
53 | |||
54 | # Virtual Registers | ||
55 | # ARG1 | ||
56 | msg = %rdi | ||
57 | # ARG2 | ||
58 | digest = %rsi | ||
59 | # ARG3 | ||
60 | msglen = %rdx | ||
61 | T1 = %rcx | ||
62 | T2 = %r8 | ||
63 | a_64 = %r9 | ||
64 | b_64 = %r10 | ||
65 | c_64 = %r11 | ||
66 | d_64 = %r12 | ||
67 | e_64 = %r13 | ||
68 | f_64 = %r14 | ||
69 | g_64 = %r15 | ||
70 | h_64 = %rbx | ||
71 | tmp0 = %rax | ||
72 | |||
73 | # Local variables (stack frame) | ||
74 | |||
75 | W_SIZE = 80*8 | ||
76 | WK_SIZE = 2*8 | ||
77 | RSPSAVE_SIZE = 1*8 | ||
78 | GPRSAVE_SIZE = 5*8 | ||
79 | |||
80 | frame_W = 0 | ||
81 | frame_WK = frame_W + W_SIZE | ||
82 | frame_RSPSAVE = frame_WK + WK_SIZE | ||
83 | frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE | ||
84 | frame_size = frame_GPRSAVE + GPRSAVE_SIZE | ||
85 | |||
86 | # Useful QWORD "arrays" for simpler memory references | ||
87 | # MSG, DIGEST, K_t, W_t are arrays | ||
88 | # WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even | ||
89 | |||
90 | # Input message (arg1) | ||
91 | #define MSG(i) 8*i(msg) | ||
92 | |||
93 | # Output Digest (arg2) | ||
94 | #define DIGEST(i) 8*i(digest) | ||
95 | |||
96 | # SHA Constants (static mem) | ||
97 | #define K_t(i) 8*i+K512(%rip) | ||
98 | |||
99 | # Message Schedule (stack frame) | ||
100 | #define W_t(i) 8*i+frame_W(%rsp) | ||
101 | |||
102 | # W[t]+K[t] (stack frame) | ||
103 | #define WK_2(i) 8*((i%2))+frame_WK(%rsp) | ||
104 | |||
105 | .macro RotateState | ||
106 | # Rotate symbols a..h right | ||
107 | TMP = h_64 | ||
108 | h_64 = g_64 | ||
109 | g_64 = f_64 | ||
110 | f_64 = e_64 | ||
111 | e_64 = d_64 | ||
112 | d_64 = c_64 | ||
113 | c_64 = b_64 | ||
114 | b_64 = a_64 | ||
115 | a_64 = TMP | ||
116 | .endm | ||
117 | |||
118 | .macro SHA512_Round rnd | ||
119 | |||
120 | # Compute Round %%t | ||
121 | mov f_64, T1 # T1 = f | ||
122 | mov e_64, tmp0 # tmp = e | ||
123 | xor g_64, T1 # T1 = f ^ g | ||
124 | ror $23, tmp0 # 41 # tmp = e ror 23 | ||
125 | and e_64, T1 # T1 = (f ^ g) & e | ||
126 | xor e_64, tmp0 # tmp = (e ror 23) ^ e | ||
127 | xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g) | ||
128 | idx = \rnd | ||
129 | add WK_2(idx), T1 # W[t] + K[t] from message scheduler | ||
130 | ror $4, tmp0 # 18 # tmp = ((e ror 23) ^ e) ror 4 | ||
131 | xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e | ||
132 | mov a_64, T2 # T2 = a | ||
133 | add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h | ||
134 | ror $14, tmp0 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) | ||
135 | add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e) | ||
136 | mov a_64, tmp0 # tmp = a | ||
137 | xor c_64, T2 # T2 = a ^ c | ||
138 | and c_64, tmp0 # tmp = a & c | ||
139 | and b_64, T2 # T2 = (a ^ c) & b | ||
140 | xor tmp0, T2 # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) | ||
141 | mov a_64, tmp0 # tmp = a | ||
142 | ror $5, tmp0 # 39 # tmp = a ror 5 | ||
143 | xor a_64, tmp0 # tmp = (a ror 5) ^ a | ||
144 | add T1, d_64 # e(next_state) = d + T1 | ||
145 | ror $6, tmp0 # 34 # tmp = ((a ror 5) ^ a) ror 6 | ||
146 | xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a | ||
147 | lea (T1, T2), h_64 # a(next_state) = T1 + Maj(a,b,c) | ||
148 | ror $28, tmp0 # 28 # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) | ||
149 | add tmp0, h_64 # a(next_state) = T1 + Maj(a,b,c) S0(a) | ||
150 | RotateState | ||
151 | .endm | ||
152 | |||
153 | .macro SHA512_2Sched_2Round_sse rnd | ||
154 | |||
155 | # Compute rounds t-2 and t-1 | ||
156 | # Compute message schedule QWORDS t and t+1 | ||
157 | |||
158 | # Two rounds are computed based on the values for K[t-2]+W[t-2] and | ||
159 | # K[t-1]+W[t-1] which were previously stored at WK_2 by the message | ||
160 | # scheduler. | ||
161 | # The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)]. | ||
162 | # They are then added to their respective SHA512 constants at | ||
163 | # [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] | ||
164 | # For brievity, the comments following vectored instructions only refer to | ||
165 | # the first of a pair of QWORDS. | ||
166 | # Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} | ||
167 | # The computation of the message schedule and the rounds are tightly | ||
168 | # stitched to take advantage of instruction-level parallelism. | ||
169 | # For clarity, integer instructions (for the rounds calculation) are indented | ||
170 | # by one tab. Vectored instructions (for the message scheduler) are indented | ||
171 | # by two tabs. | ||
172 | |||
173 | mov f_64, T1 | ||
174 | idx = \rnd -2 | ||
175 | movdqa W_t(idx), %xmm2 # XMM2 = W[t-2] | ||
176 | xor g_64, T1 | ||
177 | and e_64, T1 | ||
178 | movdqa %xmm2, %xmm0 # XMM0 = W[t-2] | ||
179 | xor g_64, T1 | ||
180 | idx = \rnd | ||
181 | add WK_2(idx), T1 | ||
182 | idx = \rnd - 15 | ||
183 | movdqu W_t(idx), %xmm5 # XMM5 = W[t-15] | ||
184 | mov e_64, tmp0 | ||
185 | ror $23, tmp0 # 41 | ||
186 | movdqa %xmm5, %xmm3 # XMM3 = W[t-15] | ||
187 | xor e_64, tmp0 | ||
188 | ror $4, tmp0 # 18 | ||
189 | psrlq $61-19, %xmm0 # XMM0 = W[t-2] >> 42 | ||
190 | xor e_64, tmp0 | ||
191 | ror $14, tmp0 # 14 | ||
192 | psrlq $(8-7), %xmm3 # XMM3 = W[t-15] >> 1 | ||
193 | add tmp0, T1 | ||
194 | add h_64, T1 | ||
195 | pxor %xmm2, %xmm0 # XMM0 = (W[t-2] >> 42) ^ W[t-2] | ||
196 | mov a_64, T2 | ||
197 | xor c_64, T2 | ||
198 | pxor %xmm5, %xmm3 # XMM3 = (W[t-15] >> 1) ^ W[t-15] | ||
199 | and b_64, T2 | ||
200 | mov a_64, tmp0 | ||
201 | psrlq $(19-6), %xmm0 # XMM0 = ((W[t-2]>>42)^W[t-2])>>13 | ||
202 | and c_64, tmp0 | ||
203 | xor tmp0, T2 | ||
204 | psrlq $(7-1), %xmm3 # XMM3 = ((W[t-15]>>1)^W[t-15])>>6 | ||
205 | mov a_64, tmp0 | ||
206 | ror $5, tmp0 # 39 | ||
207 | pxor %xmm2, %xmm0 # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2] | ||
208 | xor a_64, tmp0 | ||
209 | ror $6, tmp0 # 34 | ||
210 | pxor %xmm5, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15] | ||
211 | xor a_64, tmp0 | ||
212 | ror $28, tmp0 # 28 | ||
213 | psrlq $6, %xmm0 # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 | ||
214 | add tmp0, T2 | ||
215 | add T1, d_64 | ||
216 | psrlq $1, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 | ||
217 | lea (T1, T2), h_64 | ||
218 | RotateState | ||
219 | movdqa %xmm2, %xmm1 # XMM1 = W[t-2] | ||
220 | mov f_64, T1 | ||
221 | xor g_64, T1 | ||
222 | movdqa %xmm5, %xmm4 # XMM4 = W[t-15] | ||
223 | and e_64, T1 | ||
224 | xor g_64, T1 | ||
225 | psllq $(64-19)-(64-61) , %xmm1 # XMM1 = W[t-2] << 42 | ||
226 | idx = \rnd + 1 | ||
227 | add WK_2(idx), T1 | ||
228 | mov e_64, tmp0 | ||
229 | psllq $(64-1)-(64-8), %xmm4 # XMM4 = W[t-15] << 7 | ||
230 | ror $23, tmp0 # 41 | ||
231 | xor e_64, tmp0 | ||
232 | pxor %xmm2, %xmm1 # XMM1 = (W[t-2] << 42)^W[t-2] | ||
233 | ror $4, tmp0 # 18 | ||
234 | xor e_64, tmp0 | ||
235 | pxor %xmm5, %xmm4 # XMM4 = (W[t-15]<<7)^W[t-15] | ||
236 | ror $14, tmp0 # 14 | ||
237 | add tmp0, T1 | ||
238 | psllq $(64-61), %xmm1 # XMM1 = ((W[t-2] << 42)^W[t-2])<<3 | ||
239 | add h_64, T1 | ||
240 | mov a_64, T2 | ||
241 | psllq $(64-8), %xmm4 # XMM4 = ((W[t-15]<<7)^W[t-15])<<56 | ||
242 | xor c_64, T2 | ||
243 | and b_64, T2 | ||
244 | pxor %xmm1, %xmm0 # XMM0 = s1(W[t-2]) | ||
245 | mov a_64, tmp0 | ||
246 | and c_64, tmp0 | ||
247 | idx = \rnd - 7 | ||
248 | movdqu W_t(idx), %xmm1 # XMM1 = W[t-7] | ||
249 | xor tmp0, T2 | ||
250 | pxor %xmm4, %xmm3 # XMM3 = s0(W[t-15]) | ||
251 | mov a_64, tmp0 | ||
252 | paddq %xmm3, %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) | ||
253 | ror $5, tmp0 # 39 | ||
254 | idx =\rnd-16 | ||
255 | paddq W_t(idx), %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16] | ||
256 | xor a_64, tmp0 | ||
257 | paddq %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16] | ||
258 | ror $6, tmp0 # 34 | ||
259 | movdqa %xmm0, W_t(\rnd) # Store scheduled qwords | ||
260 | xor a_64, tmp0 | ||
261 | paddq K_t(\rnd), %xmm0 # Compute W[t]+K[t] | ||
262 | ror $28, tmp0 # 28 | ||
263 | idx = \rnd | ||
264 | movdqa %xmm0, WK_2(idx) # Store W[t]+K[t] for next rounds | ||
265 | add tmp0, T2 | ||
266 | add T1, d_64 | ||
267 | lea (T1, T2), h_64 | ||
268 | RotateState | ||
269 | .endm | ||
270 | |||
271 | ######################################################################## | ||
272 | # void sha512_transform_ssse3(const void* M, void* D, u64 L)# | ||
273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | ||
274 | # The size of the message pointed to by M must be an integer multiple of SHA512 | ||
275 | # message blocks. | ||
276 | # L is the message length in SHA512 blocks. | ||
277 | ######################################################################## | ||
278 | ENTRY(sha512_transform_ssse3) | ||
279 | |||
280 | cmp $0, msglen | ||
281 | je nowork | ||
282 | |||
283 | # Allocate Stack Space | ||
284 | mov %rsp, %rax | ||
285 | sub $frame_size, %rsp | ||
286 | and $~(0x20 - 1), %rsp | ||
287 | mov %rax, frame_RSPSAVE(%rsp) | ||
288 | |||
289 | # Save GPRs | ||
290 | mov %rbx, frame_GPRSAVE(%rsp) | ||
291 | mov %r12, frame_GPRSAVE +8*1(%rsp) | ||
292 | mov %r13, frame_GPRSAVE +8*2(%rsp) | ||
293 | mov %r14, frame_GPRSAVE +8*3(%rsp) | ||
294 | mov %r15, frame_GPRSAVE +8*4(%rsp) | ||
295 | |||
296 | updateblock: | ||
297 | |||
298 | # Load state variables | ||
299 | mov DIGEST(0), a_64 | ||
300 | mov DIGEST(1), b_64 | ||
301 | mov DIGEST(2), c_64 | ||
302 | mov DIGEST(3), d_64 | ||
303 | mov DIGEST(4), e_64 | ||
304 | mov DIGEST(5), f_64 | ||
305 | mov DIGEST(6), g_64 | ||
306 | mov DIGEST(7), h_64 | ||
307 | |||
308 | t = 0 | ||
309 | .rept 80/2 + 1 | ||
310 | # (80 rounds) / (2 rounds/iteration) + (1 iteration) | ||
311 | # +1 iteration because the scheduler leads hashing by 1 iteration | ||
312 | .if t < 2 | ||
313 | # BSWAP 2 QWORDS | ||
314 | movdqa XMM_QWORD_BSWAP(%rip), %xmm1 | ||
315 | movdqu MSG(t), %xmm0 | ||
316 | pshufb %xmm1, %xmm0 # BSWAP | ||
317 | movdqa %xmm0, W_t(t) # Store Scheduled Pair | ||
318 | paddq K_t(t), %xmm0 # Compute W[t]+K[t] | ||
319 | movdqa %xmm0, WK_2(t) # Store into WK for rounds | ||
320 | .elseif t < 16 | ||
321 | # BSWAP 2 QWORDS# Compute 2 Rounds | ||
322 | movdqu MSG(t), %xmm0 | ||
323 | pshufb %xmm1, %xmm0 # BSWAP | ||
324 | SHA512_Round t-2 # Round t-2 | ||
325 | movdqa %xmm0, W_t(t) # Store Scheduled Pair | ||
326 | paddq K_t(t), %xmm0 # Compute W[t]+K[t] | ||
327 | SHA512_Round t-1 # Round t-1 | ||
328 | movdqa %xmm0, WK_2(t) # Store W[t]+K[t] into WK | ||
329 | .elseif t < 79 | ||
330 | # Schedule 2 QWORDS# Compute 2 Rounds | ||
331 | SHA512_2Sched_2Round_sse t | ||
332 | .else | ||
333 | # Compute 2 Rounds | ||
334 | SHA512_Round t-2 | ||
335 | SHA512_Round t-1 | ||
336 | .endif | ||
337 | t = t+2 | ||
338 | .endr | ||
339 | |||
340 | # Update digest | ||
341 | add a_64, DIGEST(0) | ||
342 | add b_64, DIGEST(1) | ||
343 | add c_64, DIGEST(2) | ||
344 | add d_64, DIGEST(3) | ||
345 | add e_64, DIGEST(4) | ||
346 | add f_64, DIGEST(5) | ||
347 | add g_64, DIGEST(6) | ||
348 | add h_64, DIGEST(7) | ||
349 | |||
350 | # Advance to next message block | ||
351 | add $16*8, msg | ||
352 | dec msglen | ||
353 | jnz updateblock | ||
354 | |||
355 | # Restore GPRs | ||
356 | mov frame_GPRSAVE(%rsp), %rbx | ||
357 | mov frame_GPRSAVE +8*1(%rsp), %r12 | ||
358 | mov frame_GPRSAVE +8*2(%rsp), %r13 | ||
359 | mov frame_GPRSAVE +8*3(%rsp), %r14 | ||
360 | mov frame_GPRSAVE +8*4(%rsp), %r15 | ||
361 | |||
362 | # Restore Stack Pointer | ||
363 | mov frame_RSPSAVE(%rsp), %rsp | ||
364 | |||
365 | nowork: | ||
366 | ret | ||
367 | ENDPROC(sha512_transform_ssse3) | ||
368 | |||
369 | ######################################################################## | ||
370 | ### Binary Data | ||
371 | |||
372 | .data | ||
373 | |||
374 | .align 16 | ||
375 | |||
376 | # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. | ||
377 | XMM_QWORD_BSWAP: | ||
378 | .octa 0x08090a0b0c0d0e0f0001020304050607 | ||
379 | |||
380 | # K[t] used in SHA512 hashing | ||
381 | K512: | ||
382 | .quad 0x428a2f98d728ae22,0x7137449123ef65cd | ||
383 | .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc | ||
384 | .quad 0x3956c25bf348b538,0x59f111f1b605d019 | ||
385 | .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 | ||
386 | .quad 0xd807aa98a3030242,0x12835b0145706fbe | ||
387 | .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 | ||
388 | .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 | ||
389 | .quad 0x9bdc06a725c71235,0xc19bf174cf692694 | ||
390 | .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 | ||
391 | .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 | ||
392 | .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 | ||
393 | .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 | ||
394 | .quad 0x983e5152ee66dfab,0xa831c66d2db43210 | ||
395 | .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 | ||
396 | .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 | ||
397 | .quad 0x06ca6351e003826f,0x142929670a0e6e70 | ||
398 | .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 | ||
399 | .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df | ||
400 | .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 | ||
401 | .quad 0x81c2c92e47edaee6,0x92722c851482353b | ||
402 | .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 | ||
403 | .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 | ||
404 | .quad 0xd192e819d6ef5218,0xd69906245565a910 | ||
405 | .quad 0xf40e35855771202a,0x106aa07032bbd1b8 | ||
406 | .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 | ||
407 | .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 | ||
408 | .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb | ||
409 | .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 | ||
410 | .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 | ||
411 | .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec | ||
412 | .quad 0x90befffa23631e28,0xa4506cebde82bde9 | ||
413 | .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b | ||
414 | .quad 0xca273eceea26619c,0xd186b8c721c0c207 | ||
415 | .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 | ||
416 | .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 | ||
417 | .quad 0x113f9804bef90dae,0x1b710b35131c471b | ||
418 | .quad 0x28db77f523047d84,0x32caab7b40c72493 | ||
419 | .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c | ||
420 | .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a | ||
421 | .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 | ||
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c new file mode 100644 index 000000000000..6cbd8df348d2 --- /dev/null +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -0,0 +1,282 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Glue code for the SHA512 Secure Hash Algorithm assembler | ||
5 | * implementation using supplemental SSE3 / AVX / AVX2 instructions. | ||
6 | * | ||
7 | * This file is based on sha512_generic.c | ||
8 | * | ||
9 | * Copyright (C) 2013 Intel Corporation | ||
10 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify it | ||
13 | * under the terms of the GNU General Public License as published by the Free | ||
14 | * Software Foundation; either version 2 of the License, or (at your option) | ||
15 | * any later version. | ||
16 | * | ||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
21 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
22 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
23 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
24 | * SOFTWARE. | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
29 | |||
30 | #include <crypto/internal/hash.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/cryptohash.h> | ||
35 | #include <linux/types.h> | ||
36 | #include <crypto/sha.h> | ||
37 | #include <asm/byteorder.h> | ||
38 | #include <asm/i387.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | |||
42 | #include <linux/string.h> | ||
43 | |||
44 | asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, | ||
45 | u64 rounds); | ||
46 | #ifdef CONFIG_AS_AVX | ||
47 | asmlinkage void sha512_transform_avx(const char *data, u64 *digest, | ||
48 | u64 rounds); | ||
49 | #endif | ||
50 | #ifdef CONFIG_AS_AVX2 | ||
51 | asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, | ||
52 | u64 rounds); | ||
53 | #endif | ||
54 | |||
55 | static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); | ||
56 | |||
57 | |||
58 | static int sha512_ssse3_init(struct shash_desc *desc) | ||
59 | { | ||
60 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
61 | |||
62 | sctx->state[0] = SHA512_H0; | ||
63 | sctx->state[1] = SHA512_H1; | ||
64 | sctx->state[2] = SHA512_H2; | ||
65 | sctx->state[3] = SHA512_H3; | ||
66 | sctx->state[4] = SHA512_H4; | ||
67 | sctx->state[5] = SHA512_H5; | ||
68 | sctx->state[6] = SHA512_H6; | ||
69 | sctx->state[7] = SHA512_H7; | ||
70 | sctx->count[0] = sctx->count[1] = 0; | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
76 | unsigned int len, unsigned int partial) | ||
77 | { | ||
78 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
79 | unsigned int done = 0; | ||
80 | |||
81 | sctx->count[0] += len; | ||
82 | if (sctx->count[0] < len) | ||
83 | sctx->count[1]++; | ||
84 | |||
85 | if (partial) { | ||
86 | done = SHA512_BLOCK_SIZE - partial; | ||
87 | memcpy(sctx->buf + partial, data, done); | ||
88 | sha512_transform_asm(sctx->buf, sctx->state, 1); | ||
89 | } | ||
90 | |||
91 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
92 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
93 | |||
94 | sha512_transform_asm(data + done, sctx->state, (u64) rounds); | ||
95 | |||
96 | done += rounds * SHA512_BLOCK_SIZE; | ||
97 | } | ||
98 | |||
99 | memcpy(sctx->buf, data + done, len - done); | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
105 | unsigned int len) | ||
106 | { | ||
107 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
108 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
109 | int res; | ||
110 | |||
111 | /* Handle the fast case right here */ | ||
112 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
113 | sctx->count[0] += len; | ||
114 | if (sctx->count[0] < len) | ||
115 | sctx->count[1]++; | ||
116 | memcpy(sctx->buf + partial, data, len); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | if (!irq_fpu_usable()) { | ||
122 | res = crypto_sha512_update(desc, data, len); | ||
123 | } else { | ||
124 | kernel_fpu_begin(); | ||
125 | res = __sha512_ssse3_update(desc, data, len, partial); | ||
126 | kernel_fpu_end(); | ||
127 | } | ||
128 | |||
129 | return res; | ||
130 | } | ||
131 | |||
132 | |||
133 | /* Add padding and return the message digest. */ | ||
134 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) | ||
135 | { | ||
136 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
137 | unsigned int i, index, padlen; | ||
138 | __be64 *dst = (__be64 *)out; | ||
139 | __be64 bits[2]; | ||
140 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
141 | |||
142 | /* save number of bits */ | ||
143 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
144 | bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61; | ||
145 | |||
146 | /* Pad out to 112 mod 128 and append length */ | ||
147 | index = sctx->count[0] & 0x7f; | ||
148 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
149 | |||
150 | if (!irq_fpu_usable()) { | ||
151 | crypto_sha512_update(desc, padding, padlen); | ||
152 | crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
153 | } else { | ||
154 | kernel_fpu_begin(); | ||
155 | /* We need to fill a whole block for __sha512_ssse3_update() */ | ||
156 | if (padlen <= 112) { | ||
157 | sctx->count[0] += padlen; | ||
158 | if (sctx->count[0] < padlen) | ||
159 | sctx->count[1]++; | ||
160 | memcpy(sctx->buf + index, padding, padlen); | ||
161 | } else { | ||
162 | __sha512_ssse3_update(desc, padding, padlen, index); | ||
163 | } | ||
164 | __sha512_ssse3_update(desc, (const u8 *)&bits, | ||
165 | sizeof(bits), 112); | ||
166 | kernel_fpu_end(); | ||
167 | } | ||
168 | |||
169 | /* Store state in digest */ | ||
170 | for (i = 0; i < 8; i++) | ||
171 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
172 | |||
173 | /* Wipe context */ | ||
174 | memset(sctx, 0, sizeof(*sctx)); | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static int sha512_ssse3_export(struct shash_desc *desc, void *out) | ||
180 | { | ||
181 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
182 | |||
183 | memcpy(out, sctx, sizeof(*sctx)); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static int sha512_ssse3_import(struct shash_desc *desc, const void *in) | ||
189 | { | ||
190 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
191 | |||
192 | memcpy(sctx, in, sizeof(*sctx)); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static struct shash_alg alg = { | ||
198 | .digestsize = SHA512_DIGEST_SIZE, | ||
199 | .init = sha512_ssse3_init, | ||
200 | .update = sha512_ssse3_update, | ||
201 | .final = sha512_ssse3_final, | ||
202 | .export = sha512_ssse3_export, | ||
203 | .import = sha512_ssse3_import, | ||
204 | .descsize = sizeof(struct sha512_state), | ||
205 | .statesize = sizeof(struct sha512_state), | ||
206 | .base = { | ||
207 | .cra_name = "sha512", | ||
208 | .cra_driver_name = "sha512-ssse3", | ||
209 | .cra_priority = 150, | ||
210 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
211 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
212 | .cra_module = THIS_MODULE, | ||
213 | } | ||
214 | }; | ||
215 | |||
216 | #ifdef CONFIG_AS_AVX | ||
217 | static bool __init avx_usable(void) | ||
218 | { | ||
219 | u64 xcr0; | ||
220 | |||
221 | if (!cpu_has_avx || !cpu_has_osxsave) | ||
222 | return false; | ||
223 | |||
224 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
225 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
226 | pr_info("AVX detected but unusable.\n"); | ||
227 | |||
228 | return false; | ||
229 | } | ||
230 | |||
231 | return true; | ||
232 | } | ||
233 | #endif | ||
234 | |||
235 | static int __init sha512_ssse3_mod_init(void) | ||
236 | { | ||
237 | /* test for SSE3 first */ | ||
238 | if (cpu_has_ssse3) | ||
239 | sha512_transform_asm = sha512_transform_ssse3; | ||
240 | |||
241 | #ifdef CONFIG_AS_AVX | ||
242 | /* allow AVX to override SSSE3, it's a little faster */ | ||
243 | if (avx_usable()) { | ||
244 | #ifdef CONFIG_AS_AVX2 | ||
245 | if (boot_cpu_has(X86_FEATURE_AVX2)) | ||
246 | sha512_transform_asm = sha512_transform_rorx; | ||
247 | else | ||
248 | #endif | ||
249 | sha512_transform_asm = sha512_transform_avx; | ||
250 | } | ||
251 | #endif | ||
252 | |||
253 | if (sha512_transform_asm) { | ||
254 | #ifdef CONFIG_AS_AVX | ||
255 | if (sha512_transform_asm == sha512_transform_avx) | ||
256 | pr_info("Using AVX optimized SHA-512 implementation\n"); | ||
257 | #ifdef CONFIG_AS_AVX2 | ||
258 | else if (sha512_transform_asm == sha512_transform_rorx) | ||
259 | pr_info("Using AVX2 optimized SHA-512 implementation\n"); | ||
260 | #endif | ||
261 | else | ||
262 | #endif | ||
263 | pr_info("Using SSSE3 optimized SHA-512 implementation\n"); | ||
264 | return crypto_register_shash(&alg); | ||
265 | } | ||
266 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | ||
267 | |||
268 | return -ENODEV; | ||
269 | } | ||
270 | |||
271 | static void __exit sha512_ssse3_mod_fini(void) | ||
272 | { | ||
273 | crypto_unregister_shash(&alg); | ||
274 | } | ||
275 | |||
276 | module_init(sha512_ssse3_mod_init); | ||
277 | module_exit(sha512_ssse3_mod_fini); | ||
278 | |||
279 | MODULE_LICENSE("GPL"); | ||
280 | MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); | ||
281 | |||
282 | MODULE_ALIAS("sha512"); | ||
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 8d3e113b2c95..05058134c443 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 7 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -33,6 +33,8 @@ | |||
33 | 33 | ||
34 | .Lbswap128_mask: | 34 | .Lbswap128_mask: |
35 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 35 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
36 | .Lxts_gf128mul_and_shl1_mask: | ||
37 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
36 | 38 | ||
37 | .text | 39 | .text |
38 | 40 | ||
@@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way) | |||
408 | 410 | ||
409 | ret; | 411 | ret; |
410 | ENDPROC(twofish_ctr_8way) | 412 | ENDPROC(twofish_ctr_8way) |
413 | |||
414 | ENTRY(twofish_xts_enc_8way) | ||
415 | /* input: | ||
416 | * %rdi: ctx, CTX | ||
417 | * %rsi: dst | ||
418 | * %rdx: src | ||
419 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
420 | */ | ||
421 | |||
422 | movq %rsi, %r11; | ||
423 | |||
424 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
425 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
426 | RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); | ||
427 | |||
428 | call __twofish_enc_blk8; | ||
429 | |||
430 | /* dst <= regs xor IVs(in dst) */ | ||
431 | store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | ||
432 | |||
433 | ret; | ||
434 | ENDPROC(twofish_xts_enc_8way) | ||
435 | |||
436 | ENTRY(twofish_xts_dec_8way) | ||
437 | /* input: | ||
438 | * %rdi: ctx, CTX | ||
439 | * %rsi: dst | ||
440 | * %rdx: src | ||
441 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
442 | */ | ||
443 | |||
444 | movq %rsi, %r11; | ||
445 | |||
446 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
447 | load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2, | ||
448 | RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask); | ||
449 | |||
450 | call __twofish_dec_blk8; | ||
451 | |||
452 | /* dst <= regs xor IVs(in dst) */ | ||
453 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
454 | |||
455 | ret; | ||
456 | ENDPROC(twofish_xts_dec_8way) | ||
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S new file mode 100644 index 000000000000..e1a83b9cd389 --- /dev/null +++ b/arch/x86/crypto/twofish-avx2-asm_64.S | |||
@@ -0,0 +1,600 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include "glue_helper-asm-avx2.S" | ||
15 | |||
16 | .file "twofish-avx2-asm_64.S" | ||
17 | |||
18 | .data | ||
19 | .align 16 | ||
20 | |||
21 | .Lvpshufb_mask0: | ||
22 | .long 0x80808000 | ||
23 | .long 0x80808004 | ||
24 | .long 0x80808008 | ||
25 | .long 0x8080800c | ||
26 | |||
27 | .Lbswap128_mask: | ||
28 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
29 | .Lxts_gf128mul_and_shl1_mask_0: | ||
30 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
31 | .Lxts_gf128mul_and_shl1_mask_1: | ||
32 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
33 | |||
34 | .text | ||
35 | |||
36 | /* structure of crypto context */ | ||
37 | #define s0 0 | ||
38 | #define s1 1024 | ||
39 | #define s2 2048 | ||
40 | #define s3 3072 | ||
41 | #define w 4096 | ||
42 | #define k 4128 | ||
43 | |||
44 | /* register macros */ | ||
45 | #define CTX %rdi | ||
46 | |||
47 | #define RS0 CTX | ||
48 | #define RS1 %r8 | ||
49 | #define RS2 %r9 | ||
50 | #define RS3 %r10 | ||
51 | #define RK %r11 | ||
52 | #define RW %rax | ||
53 | #define RROUND %r12 | ||
54 | #define RROUNDd %r12d | ||
55 | |||
56 | #define RA0 %ymm8 | ||
57 | #define RB0 %ymm9 | ||
58 | #define RC0 %ymm10 | ||
59 | #define RD0 %ymm11 | ||
60 | #define RA1 %ymm12 | ||
61 | #define RB1 %ymm13 | ||
62 | #define RC1 %ymm14 | ||
63 | #define RD1 %ymm15 | ||
64 | |||
65 | /* temp regs */ | ||
66 | #define RX0 %ymm0 | ||
67 | #define RY0 %ymm1 | ||
68 | #define RX1 %ymm2 | ||
69 | #define RY1 %ymm3 | ||
70 | #define RT0 %ymm4 | ||
71 | #define RIDX %ymm5 | ||
72 | |||
73 | #define RX0x %xmm0 | ||
74 | #define RY0x %xmm1 | ||
75 | #define RX1x %xmm2 | ||
76 | #define RY1x %xmm3 | ||
77 | #define RT0x %xmm4 | ||
78 | |||
79 | /* vpgatherdd mask and '-1' */ | ||
80 | #define RNOT %ymm6 | ||
81 | |||
82 | /* byte mask, (-1 >> 24) */ | ||
83 | #define RBYTE %ymm7 | ||
84 | |||
85 | /********************************************************************** | ||
86 | 16-way AVX2 twofish | ||
87 | **********************************************************************/ | ||
88 | #define init_round_constants() \ | ||
89 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
90 | vpsrld $24, RNOT, RBYTE; \ | ||
91 | leaq k(CTX), RK; \ | ||
92 | leaq w(CTX), RW; \ | ||
93 | leaq s1(CTX), RS1; \ | ||
94 | leaq s2(CTX), RS2; \ | ||
95 | leaq s3(CTX), RS3; \ | ||
96 | |||
97 | #define g16(ab, rs0, rs1, rs2, rs3, xy) \ | ||
98 | vpand RBYTE, ab ## 0, RIDX; \ | ||
99 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \ | ||
100 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
101 | \ | ||
102 | vpand RBYTE, ab ## 1, RIDX; \ | ||
103 | vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \ | ||
104 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
105 | \ | ||
106 | vpsrld $8, ab ## 0, RIDX; \ | ||
107 | vpand RBYTE, RIDX, RIDX; \ | ||
108 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
109 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
110 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
111 | \ | ||
112 | vpsrld $8, ab ## 1, RIDX; \ | ||
113 | vpand RBYTE, RIDX, RIDX; \ | ||
114 | vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \ | ||
115 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
116 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
117 | \ | ||
118 | vpsrld $16, ab ## 0, RIDX; \ | ||
119 | vpand RBYTE, RIDX, RIDX; \ | ||
120 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
121 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
122 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
123 | \ | ||
124 | vpsrld $16, ab ## 1, RIDX; \ | ||
125 | vpand RBYTE, RIDX, RIDX; \ | ||
126 | vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \ | ||
127 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
128 | vpxor RT0, xy ## 1, xy ## 1; \ | ||
129 | \ | ||
130 | vpsrld $24, ab ## 0, RIDX; \ | ||
131 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
132 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
133 | vpxor RT0, xy ## 0, xy ## 0; \ | ||
134 | \ | ||
135 | vpsrld $24, ab ## 1, RIDX; \ | ||
136 | vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \ | ||
137 | vpcmpeqd RNOT, RNOT, RNOT; \ | ||
138 | vpxor RT0, xy ## 1, xy ## 1; | ||
139 | |||
140 | #define g1_16(a, x) \ | ||
141 | g16(a, RS0, RS1, RS2, RS3, x); | ||
142 | |||
143 | #define g2_16(b, y) \ | ||
144 | g16(b, RS1, RS2, RS3, RS0, y); | ||
145 | |||
146 | #define encrypt_round_end16(a, b, c, d, nk) \ | ||
147 | vpaddd RY0, RX0, RX0; \ | ||
148 | vpaddd RX0, RY0, RY0; \ | ||
149 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
150 | vpaddd RT0, RX0, RX0; \ | ||
151 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
152 | vpaddd RT0, RY0, RY0; \ | ||
153 | \ | ||
154 | vpxor RY0, d ## 0, d ## 0; \ | ||
155 | \ | ||
156 | vpxor RX0, c ## 0, c ## 0; \ | ||
157 | vpsrld $1, c ## 0, RT0; \ | ||
158 | vpslld $31, c ## 0, c ## 0; \ | ||
159 | vpor RT0, c ## 0, c ## 0; \ | ||
160 | \ | ||
161 | vpaddd RY1, RX1, RX1; \ | ||
162 | vpaddd RX1, RY1, RY1; \ | ||
163 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
164 | vpaddd RT0, RX1, RX1; \ | ||
165 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
166 | vpaddd RT0, RY1, RY1; \ | ||
167 | \ | ||
168 | vpxor RY1, d ## 1, d ## 1; \ | ||
169 | \ | ||
170 | vpxor RX1, c ## 1, c ## 1; \ | ||
171 | vpsrld $1, c ## 1, RT0; \ | ||
172 | vpslld $31, c ## 1, c ## 1; \ | ||
173 | vpor RT0, c ## 1, c ## 1; \ | ||
174 | |||
175 | #define encrypt_round16(a, b, c, d, nk) \ | ||
176 | g2_16(b, RY); \ | ||
177 | \ | ||
178 | vpslld $1, b ## 0, RT0; \ | ||
179 | vpsrld $31, b ## 0, b ## 0; \ | ||
180 | vpor RT0, b ## 0, b ## 0; \ | ||
181 | \ | ||
182 | vpslld $1, b ## 1, RT0; \ | ||
183 | vpsrld $31, b ## 1, b ## 1; \ | ||
184 | vpor RT0, b ## 1, b ## 1; \ | ||
185 | \ | ||
186 | g1_16(a, RX); \ | ||
187 | \ | ||
188 | encrypt_round_end16(a, b, c, d, nk); | ||
189 | |||
190 | #define encrypt_round_first16(a, b, c, d, nk) \ | ||
191 | vpslld $1, d ## 0, RT0; \ | ||
192 | vpsrld $31, d ## 0, d ## 0; \ | ||
193 | vpor RT0, d ## 0, d ## 0; \ | ||
194 | \ | ||
195 | vpslld $1, d ## 1, RT0; \ | ||
196 | vpsrld $31, d ## 1, d ## 1; \ | ||
197 | vpor RT0, d ## 1, d ## 1; \ | ||
198 | \ | ||
199 | encrypt_round16(a, b, c, d, nk); | ||
200 | |||
201 | #define encrypt_round_last16(a, b, c, d, nk) \ | ||
202 | g2_16(b, RY); \ | ||
203 | \ | ||
204 | g1_16(a, RX); \ | ||
205 | \ | ||
206 | encrypt_round_end16(a, b, c, d, nk); | ||
207 | |||
208 | #define decrypt_round_end16(a, b, c, d, nk) \ | ||
209 | vpaddd RY0, RX0, RX0; \ | ||
210 | vpaddd RX0, RY0, RY0; \ | ||
211 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
212 | vpaddd RT0, RX0, RX0; \ | ||
213 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
214 | vpaddd RT0, RY0, RY0; \ | ||
215 | \ | ||
216 | vpxor RX0, c ## 0, c ## 0; \ | ||
217 | \ | ||
218 | vpxor RY0, d ## 0, d ## 0; \ | ||
219 | vpsrld $1, d ## 0, RT0; \ | ||
220 | vpslld $31, d ## 0, d ## 0; \ | ||
221 | vpor RT0, d ## 0, d ## 0; \ | ||
222 | \ | ||
223 | vpaddd RY1, RX1, RX1; \ | ||
224 | vpaddd RX1, RY1, RY1; \ | ||
225 | vpbroadcastd nk(RK,RROUND,8), RT0; \ | ||
226 | vpaddd RT0, RX1, RX1; \ | ||
227 | vpbroadcastd 4+nk(RK,RROUND,8), RT0; \ | ||
228 | vpaddd RT0, RY1, RY1; \ | ||
229 | \ | ||
230 | vpxor RX1, c ## 1, c ## 1; \ | ||
231 | \ | ||
232 | vpxor RY1, d ## 1, d ## 1; \ | ||
233 | vpsrld $1, d ## 1, RT0; \ | ||
234 | vpslld $31, d ## 1, d ## 1; \ | ||
235 | vpor RT0, d ## 1, d ## 1; | ||
236 | |||
237 | #define decrypt_round16(a, b, c, d, nk) \ | ||
238 | g1_16(a, RX); \ | ||
239 | \ | ||
240 | vpslld $1, a ## 0, RT0; \ | ||
241 | vpsrld $31, a ## 0, a ## 0; \ | ||
242 | vpor RT0, a ## 0, a ## 0; \ | ||
243 | \ | ||
244 | vpslld $1, a ## 1, RT0; \ | ||
245 | vpsrld $31, a ## 1, a ## 1; \ | ||
246 | vpor RT0, a ## 1, a ## 1; \ | ||
247 | \ | ||
248 | g2_16(b, RY); \ | ||
249 | \ | ||
250 | decrypt_round_end16(a, b, c, d, nk); | ||
251 | |||
252 | #define decrypt_round_first16(a, b, c, d, nk) \ | ||
253 | vpslld $1, c ## 0, RT0; \ | ||
254 | vpsrld $31, c ## 0, c ## 0; \ | ||
255 | vpor RT0, c ## 0, c ## 0; \ | ||
256 | \ | ||
257 | vpslld $1, c ## 1, RT0; \ | ||
258 | vpsrld $31, c ## 1, c ## 1; \ | ||
259 | vpor RT0, c ## 1, c ## 1; \ | ||
260 | \ | ||
261 | decrypt_round16(a, b, c, d, nk) | ||
262 | |||
263 | #define decrypt_round_last16(a, b, c, d, nk) \ | ||
264 | g1_16(a, RX); \ | ||
265 | \ | ||
266 | g2_16(b, RY); \ | ||
267 | \ | ||
268 | decrypt_round_end16(a, b, c, d, nk); | ||
269 | |||
270 | #define encrypt_cycle16() \ | ||
271 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
272 | encrypt_round16(RC, RD, RA, RB, 8); | ||
273 | |||
274 | #define encrypt_cycle_first16() \ | ||
275 | encrypt_round_first16(RA, RB, RC, RD, 0); \ | ||
276 | encrypt_round16(RC, RD, RA, RB, 8); | ||
277 | |||
278 | #define encrypt_cycle_last16() \ | ||
279 | encrypt_round16(RA, RB, RC, RD, 0); \ | ||
280 | encrypt_round_last16(RC, RD, RA, RB, 8); | ||
281 | |||
282 | #define decrypt_cycle16(n) \ | ||
283 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
284 | decrypt_round16(RA, RB, RC, RD, 0); | ||
285 | |||
286 | #define decrypt_cycle_first16(n) \ | ||
287 | decrypt_round_first16(RC, RD, RA, RB, 8); \ | ||
288 | decrypt_round16(RA, RB, RC, RD, 0); | ||
289 | |||
290 | #define decrypt_cycle_last16(n) \ | ||
291 | decrypt_round16(RC, RD, RA, RB, 8); \ | ||
292 | decrypt_round_last16(RA, RB, RC, RD, 0); | ||
293 | |||
294 | #define transpose_4x4(x0,x1,x2,x3,t1,t2) \ | ||
295 | vpunpckhdq x1, x0, t2; \ | ||
296 | vpunpckldq x1, x0, x0; \ | ||
297 | \ | ||
298 | vpunpckldq x3, x2, t1; \ | ||
299 | vpunpckhdq x3, x2, x2; \ | ||
300 | \ | ||
301 | vpunpckhqdq t1, x0, x1; \ | ||
302 | vpunpcklqdq t1, x0, x0; \ | ||
303 | \ | ||
304 | vpunpckhqdq x2, t2, x3; \ | ||
305 | vpunpcklqdq x2, t2, x2; | ||
306 | |||
307 | #define read_blocks8(offs,a,b,c,d) \ | ||
308 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
309 | |||
310 | #define write_blocks8(offs,a,b,c,d) \ | ||
311 | transpose_4x4(a, b, c, d, RX0, RY0); | ||
312 | |||
313 | #define inpack_enc8(a,b,c,d) \ | ||
314 | vpbroadcastd 4*0(RW), RT0; \ | ||
315 | vpxor RT0, a, a; \ | ||
316 | \ | ||
317 | vpbroadcastd 4*1(RW), RT0; \ | ||
318 | vpxor RT0, b, b; \ | ||
319 | \ | ||
320 | vpbroadcastd 4*2(RW), RT0; \ | ||
321 | vpxor RT0, c, c; \ | ||
322 | \ | ||
323 | vpbroadcastd 4*3(RW), RT0; \ | ||
324 | vpxor RT0, d, d; | ||
325 | |||
326 | #define outunpack_enc8(a,b,c,d) \ | ||
327 | vpbroadcastd 4*4(RW), RX0; \ | ||
328 | vpbroadcastd 4*5(RW), RY0; \ | ||
329 | vpxor RX0, c, RX0; \ | ||
330 | vpxor RY0, d, RY0; \ | ||
331 | \ | ||
332 | vpbroadcastd 4*6(RW), RT0; \ | ||
333 | vpxor RT0, a, c; \ | ||
334 | vpbroadcastd 4*7(RW), RT0; \ | ||
335 | vpxor RT0, b, d; \ | ||
336 | \ | ||
337 | vmovdqa RX0, a; \ | ||
338 | vmovdqa RY0, b; | ||
339 | |||
340 | #define inpack_dec8(a,b,c,d) \ | ||
341 | vpbroadcastd 4*4(RW), RX0; \ | ||
342 | vpbroadcastd 4*5(RW), RY0; \ | ||
343 | vpxor RX0, a, RX0; \ | ||
344 | vpxor RY0, b, RY0; \ | ||
345 | \ | ||
346 | vpbroadcastd 4*6(RW), RT0; \ | ||
347 | vpxor RT0, c, a; \ | ||
348 | vpbroadcastd 4*7(RW), RT0; \ | ||
349 | vpxor RT0, d, b; \ | ||
350 | \ | ||
351 | vmovdqa RX0, c; \ | ||
352 | vmovdqa RY0, d; | ||
353 | |||
354 | #define outunpack_dec8(a,b,c,d) \ | ||
355 | vpbroadcastd 4*0(RW), RT0; \ | ||
356 | vpxor RT0, a, a; \ | ||
357 | \ | ||
358 | vpbroadcastd 4*1(RW), RT0; \ | ||
359 | vpxor RT0, b, b; \ | ||
360 | \ | ||
361 | vpbroadcastd 4*2(RW), RT0; \ | ||
362 | vpxor RT0, c, c; \ | ||
363 | \ | ||
364 | vpbroadcastd 4*3(RW), RT0; \ | ||
365 | vpxor RT0, d, d; | ||
366 | |||
367 | #define read_blocks16(a,b,c,d) \ | ||
368 | read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
369 | read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
370 | |||
371 | #define write_blocks16(a,b,c,d) \ | ||
372 | write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
373 | write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
374 | |||
375 | #define xor_blocks16(a,b,c,d) \ | ||
376 | xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
377 | xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1); | ||
378 | |||
379 | #define inpack_enc16(a,b,c,d) \ | ||
380 | inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
381 | inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
382 | |||
383 | #define outunpack_enc16(a,b,c,d) \ | ||
384 | outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
385 | outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
386 | |||
387 | #define inpack_dec16(a,b,c,d) \ | ||
388 | inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
389 | inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
390 | |||
391 | #define outunpack_dec16(a,b,c,d) \ | ||
392 | outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \ | ||
393 | outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1); | ||
394 | |||
395 | .align 8 | ||
396 | __twofish_enc_blk16: | ||
397 | /* input: | ||
398 | * %rdi: ctx, CTX | ||
399 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
400 | * output: | ||
401 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
402 | */ | ||
403 | init_round_constants(); | ||
404 | |||
405 | read_blocks16(RA, RB, RC, RD); | ||
406 | inpack_enc16(RA, RB, RC, RD); | ||
407 | |||
408 | xorl RROUNDd, RROUNDd; | ||
409 | encrypt_cycle_first16(); | ||
410 | movl $2, RROUNDd; | ||
411 | |||
412 | .align 4 | ||
413 | .L__enc_loop: | ||
414 | encrypt_cycle16(); | ||
415 | |||
416 | addl $2, RROUNDd; | ||
417 | cmpl $14, RROUNDd; | ||
418 | jne .L__enc_loop; | ||
419 | |||
420 | encrypt_cycle_last16(); | ||
421 | |||
422 | outunpack_enc16(RA, RB, RC, RD); | ||
423 | write_blocks16(RA, RB, RC, RD); | ||
424 | |||
425 | ret; | ||
426 | ENDPROC(__twofish_enc_blk16) | ||
427 | |||
428 | .align 8 | ||
429 | __twofish_dec_blk16: | ||
430 | /* input: | ||
431 | * %rdi: ctx, CTX | ||
432 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext | ||
433 | * output: | ||
434 | * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext | ||
435 | */ | ||
436 | init_round_constants(); | ||
437 | |||
438 | read_blocks16(RA, RB, RC, RD); | ||
439 | inpack_dec16(RA, RB, RC, RD); | ||
440 | |||
441 | movl $14, RROUNDd; | ||
442 | decrypt_cycle_first16(); | ||
443 | movl $12, RROUNDd; | ||
444 | |||
445 | .align 4 | ||
446 | .L__dec_loop: | ||
447 | decrypt_cycle16(); | ||
448 | |||
449 | addl $-2, RROUNDd; | ||
450 | jnz .L__dec_loop; | ||
451 | |||
452 | decrypt_cycle_last16(); | ||
453 | |||
454 | outunpack_dec16(RA, RB, RC, RD); | ||
455 | write_blocks16(RA, RB, RC, RD); | ||
456 | |||
457 | ret; | ||
458 | ENDPROC(__twofish_dec_blk16) | ||
459 | |||
460 | ENTRY(twofish_ecb_enc_16way) | ||
461 | /* input: | ||
462 | * %rdi: ctx, CTX | ||
463 | * %rsi: dst | ||
464 | * %rdx: src | ||
465 | */ | ||
466 | |||
467 | vzeroupper; | ||
468 | pushq %r12; | ||
469 | |||
470 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
471 | |||
472 | call __twofish_enc_blk16; | ||
473 | |||
474 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
475 | |||
476 | popq %r12; | ||
477 | vzeroupper; | ||
478 | |||
479 | ret; | ||
480 | ENDPROC(twofish_ecb_enc_16way) | ||
481 | |||
482 | ENTRY(twofish_ecb_dec_16way) | ||
483 | /* input: | ||
484 | * %rdi: ctx, CTX | ||
485 | * %rsi: dst | ||
486 | * %rdx: src | ||
487 | */ | ||
488 | |||
489 | vzeroupper; | ||
490 | pushq %r12; | ||
491 | |||
492 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
493 | |||
494 | call __twofish_dec_blk16; | ||
495 | |||
496 | store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
497 | |||
498 | popq %r12; | ||
499 | vzeroupper; | ||
500 | |||
501 | ret; | ||
502 | ENDPROC(twofish_ecb_dec_16way) | ||
503 | |||
504 | ENTRY(twofish_cbc_dec_16way) | ||
505 | /* input: | ||
506 | * %rdi: ctx, CTX | ||
507 | * %rsi: dst | ||
508 | * %rdx: src | ||
509 | */ | ||
510 | |||
511 | vzeroupper; | ||
512 | pushq %r12; | ||
513 | |||
514 | load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
515 | |||
516 | call __twofish_dec_blk16; | ||
517 | |||
518 | store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1, | ||
519 | RX0); | ||
520 | |||
521 | popq %r12; | ||
522 | vzeroupper; | ||
523 | |||
524 | ret; | ||
525 | ENDPROC(twofish_cbc_dec_16way) | ||
526 | |||
527 | ENTRY(twofish_ctr_16way) | ||
528 | /* input: | ||
529 | * %rdi: ctx, CTX | ||
530 | * %rsi: dst (16 blocks) | ||
531 | * %rdx: src (16 blocks) | ||
532 | * %rcx: iv (little endian, 128bit) | ||
533 | */ | ||
534 | |||
535 | vzeroupper; | ||
536 | pushq %r12; | ||
537 | |||
538 | load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
539 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
540 | RBYTE); | ||
541 | |||
542 | call __twofish_enc_blk16; | ||
543 | |||
544 | store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
545 | |||
546 | popq %r12; | ||
547 | vzeroupper; | ||
548 | |||
549 | ret; | ||
550 | ENDPROC(twofish_ctr_16way) | ||
551 | |||
552 | .align 8 | ||
553 | twofish_xts_crypt_16way: | ||
554 | /* input: | ||
555 | * %rdi: ctx, CTX | ||
556 | * %rsi: dst (16 blocks) | ||
557 | * %rdx: src (16 blocks) | ||
558 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
559 | * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16 | ||
560 | */ | ||
561 | |||
562 | vzeroupper; | ||
563 | pushq %r12; | ||
564 | |||
565 | load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, | ||
566 | RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT, | ||
567 | .Lxts_gf128mul_and_shl1_mask_0, | ||
568 | .Lxts_gf128mul_and_shl1_mask_1); | ||
569 | |||
570 | call *%r8; | ||
571 | |||
572 | store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1); | ||
573 | |||
574 | popq %r12; | ||
575 | vzeroupper; | ||
576 | |||
577 | ret; | ||
578 | ENDPROC(twofish_xts_crypt_16way) | ||
579 | |||
580 | ENTRY(twofish_xts_enc_16way) | ||
581 | /* input: | ||
582 | * %rdi: ctx, CTX | ||
583 | * %rsi: dst (16 blocks) | ||
584 | * %rdx: src (16 blocks) | ||
585 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
586 | */ | ||
587 | leaq __twofish_enc_blk16, %r8; | ||
588 | jmp twofish_xts_crypt_16way; | ||
589 | ENDPROC(twofish_xts_enc_16way) | ||
590 | |||
591 | ENTRY(twofish_xts_dec_16way) | ||
592 | /* input: | ||
593 | * %rdi: ctx, CTX | ||
594 | * %rsi: dst (16 blocks) | ||
595 | * %rdx: src (16 blocks) | ||
596 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
597 | */ | ||
598 | leaq __twofish_dec_blk16, %r8; | ||
599 | jmp twofish_xts_crypt_16way; | ||
600 | ENDPROC(twofish_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c new file mode 100644 index 000000000000..ce33b5be64ee --- /dev/null +++ b/arch/x86/crypto/twofish_avx2_glue.c | |||
@@ -0,0 +1,584 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/twofish.h> | ||
20 | #include <crypto/lrw.h> | ||
21 | #include <crypto/xts.h> | ||
22 | #include <asm/xcr.h> | ||
23 | #include <asm/xsave.h> | ||
24 | #include <asm/crypto/twofish.h> | ||
25 | #include <asm/crypto/ablk_helper.h> | ||
26 | #include <asm/crypto/glue_helper.h> | ||
27 | #include <crypto/scatterwalk.h> | ||
28 | |||
29 | #define TF_AVX2_PARALLEL_BLOCKS 16 | ||
30 | |||
31 | /* 16-way AVX2 parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); | ||
37 | |||
38 | asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src, | ||
39 | le128 *iv); | ||
40 | |||
41 | asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst, | ||
44 | const u8 *src, le128 *iv); | ||
45 | |||
46 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src) | ||
48 | { | ||
49 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static const struct common_glue_ctx twofish_enc = { | ||
53 | .num_funcs = 4, | ||
54 | .fpu_blocks_limit = 8, | ||
55 | |||
56 | .funcs = { { | ||
57 | .num_blocks = 16, | ||
58 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) } | ||
59 | }, { | ||
60 | .num_blocks = 8, | ||
61 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } | ||
62 | }, { | ||
63 | .num_blocks = 3, | ||
64 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
65 | }, { | ||
66 | .num_blocks = 1, | ||
67 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
68 | } } | ||
69 | }; | ||
70 | |||
71 | static const struct common_glue_ctx twofish_ctr = { | ||
72 | .num_funcs = 4, | ||
73 | .fpu_blocks_limit = 8, | ||
74 | |||
75 | .funcs = { { | ||
76 | .num_blocks = 16, | ||
77 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) } | ||
78 | }, { | ||
79 | .num_blocks = 8, | ||
80 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } | ||
81 | }, { | ||
82 | .num_blocks = 3, | ||
83 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
84 | }, { | ||
85 | .num_blocks = 1, | ||
86 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
87 | } } | ||
88 | }; | ||
89 | |||
90 | static const struct common_glue_ctx twofish_enc_xts = { | ||
91 | .num_funcs = 3, | ||
92 | .fpu_blocks_limit = 8, | ||
93 | |||
94 | .funcs = { { | ||
95 | .num_blocks = 16, | ||
96 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) } | ||
97 | }, { | ||
98 | .num_blocks = 8, | ||
99 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } | ||
100 | }, { | ||
101 | .num_blocks = 1, | ||
102 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } | ||
103 | } } | ||
104 | }; | ||
105 | |||
106 | static const struct common_glue_ctx twofish_dec = { | ||
107 | .num_funcs = 4, | ||
108 | .fpu_blocks_limit = 8, | ||
109 | |||
110 | .funcs = { { | ||
111 | .num_blocks = 16, | ||
112 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) } | ||
113 | }, { | ||
114 | .num_blocks = 8, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
126 | .num_funcs = 4, | ||
127 | .fpu_blocks_limit = 8, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = 16, | ||
131 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) } | ||
132 | }, { | ||
133 | .num_blocks = 8, | ||
134 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } | ||
135 | }, { | ||
136 | .num_blocks = 3, | ||
137 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
138 | }, { | ||
139 | .num_blocks = 1, | ||
140 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
141 | } } | ||
142 | }; | ||
143 | |||
144 | static const struct common_glue_ctx twofish_dec_xts = { | ||
145 | .num_funcs = 3, | ||
146 | .fpu_blocks_limit = 8, | ||
147 | |||
148 | .funcs = { { | ||
149 | .num_blocks = 16, | ||
150 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) } | ||
151 | }, { | ||
152 | .num_blocks = 8, | ||
153 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } | ||
154 | }, { | ||
155 | .num_blocks = 1, | ||
156 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } | ||
157 | } } | ||
158 | }; | ||
159 | |||
160 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
161 | struct scatterlist *src, unsigned int nbytes) | ||
162 | { | ||
163 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
164 | } | ||
165 | |||
166 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
170 | } | ||
171 | |||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
173 | struct scatterlist *src, unsigned int nbytes) | ||
174 | { | ||
175 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
176 | dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
183 | nbytes); | ||
184 | } | ||
185 | |||
186 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
187 | struct scatterlist *src, unsigned int nbytes) | ||
188 | { | ||
189 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
193 | { | ||
194 | /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ | ||
195 | return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); | ||
196 | } | ||
197 | |||
198 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
199 | { | ||
200 | glue_fpu_end(fpu_enabled); | ||
201 | } | ||
202 | |||
203 | struct crypt_priv { | ||
204 | struct twofish_ctx *ctx; | ||
205 | bool fpu_enabled; | ||
206 | }; | ||
207 | |||
208 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
209 | { | ||
210 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
211 | struct crypt_priv *ctx = priv; | ||
212 | int i; | ||
213 | |||
214 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
215 | |||
216 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
217 | twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
218 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
219 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
220 | } | ||
221 | |||
222 | while (nbytes >= 8 * bsize) { | ||
223 | twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | ||
224 | srcdst += bsize * 8; | ||
225 | nbytes -= bsize * 8; | ||
226 | } | ||
227 | |||
228 | while (nbytes >= 3 * bsize) { | ||
229 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
230 | srcdst += bsize * 3; | ||
231 | nbytes -= bsize * 3; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
235 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
236 | } | ||
237 | |||
238 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
239 | { | ||
240 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
241 | struct crypt_priv *ctx = priv; | ||
242 | int i; | ||
243 | |||
244 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
245 | |||
246 | while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) { | ||
247 | twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
248 | srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
249 | nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS; | ||
250 | } | ||
251 | |||
252 | while (nbytes >= 8 * bsize) { | ||
253 | twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | ||
254 | srcdst += bsize * 8; | ||
255 | nbytes -= bsize * 8; | ||
256 | } | ||
257 | |||
258 | while (nbytes >= 3 * bsize) { | ||
259 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
260 | srcdst += bsize * 3; | ||
261 | nbytes -= bsize * 3; | ||
262 | } | ||
263 | |||
264 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
265 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
266 | } | ||
267 | |||
268 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
269 | struct scatterlist *src, unsigned int nbytes) | ||
270 | { | ||
271 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
272 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
273 | struct crypt_priv crypt_ctx = { | ||
274 | .ctx = &ctx->twofish_ctx, | ||
275 | .fpu_enabled = false, | ||
276 | }; | ||
277 | struct lrw_crypt_req req = { | ||
278 | .tbuf = buf, | ||
279 | .tbuflen = sizeof(buf), | ||
280 | |||
281 | .table_ctx = &ctx->lrw_table, | ||
282 | .crypt_ctx = &crypt_ctx, | ||
283 | .crypt_fn = encrypt_callback, | ||
284 | }; | ||
285 | int ret; | ||
286 | |||
287 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
288 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
289 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
290 | |||
291 | return ret; | ||
292 | } | ||
293 | |||
294 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
295 | struct scatterlist *src, unsigned int nbytes) | ||
296 | { | ||
297 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
298 | be128 buf[TF_AVX2_PARALLEL_BLOCKS]; | ||
299 | struct crypt_priv crypt_ctx = { | ||
300 | .ctx = &ctx->twofish_ctx, | ||
301 | .fpu_enabled = false, | ||
302 | }; | ||
303 | struct lrw_crypt_req req = { | ||
304 | .tbuf = buf, | ||
305 | .tbuflen = sizeof(buf), | ||
306 | |||
307 | .table_ctx = &ctx->lrw_table, | ||
308 | .crypt_ctx = &crypt_ctx, | ||
309 | .crypt_fn = decrypt_callback, | ||
310 | }; | ||
311 | int ret; | ||
312 | |||
313 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
314 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
315 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
316 | |||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
321 | struct scatterlist *src, unsigned int nbytes) | ||
322 | { | ||
323 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
324 | |||
325 | return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, | ||
326 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
327 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
328 | } | ||
329 | |||
330 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
331 | struct scatterlist *src, unsigned int nbytes) | ||
332 | { | ||
333 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
334 | |||
335 | return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, | ||
336 | XTS_TWEAK_CAST(twofish_enc_blk), | ||
337 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
338 | } | ||
339 | |||
340 | static struct crypto_alg tf_algs[10] = { { | ||
341 | .cra_name = "__ecb-twofish-avx2", | ||
342 | .cra_driver_name = "__driver-ecb-twofish-avx2", | ||
343 | .cra_priority = 0, | ||
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
345 | .cra_blocksize = TF_BLOCK_SIZE, | ||
346 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
347 | .cra_alignmask = 0, | ||
348 | .cra_type = &crypto_blkcipher_type, | ||
349 | .cra_module = THIS_MODULE, | ||
350 | .cra_u = { | ||
351 | .blkcipher = { | ||
352 | .min_keysize = TF_MIN_KEY_SIZE, | ||
353 | .max_keysize = TF_MAX_KEY_SIZE, | ||
354 | .setkey = twofish_setkey, | ||
355 | .encrypt = ecb_encrypt, | ||
356 | .decrypt = ecb_decrypt, | ||
357 | }, | ||
358 | }, | ||
359 | }, { | ||
360 | .cra_name = "__cbc-twofish-avx2", | ||
361 | .cra_driver_name = "__driver-cbc-twofish-avx2", | ||
362 | .cra_priority = 0, | ||
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
364 | .cra_blocksize = TF_BLOCK_SIZE, | ||
365 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
366 | .cra_alignmask = 0, | ||
367 | .cra_type = &crypto_blkcipher_type, | ||
368 | .cra_module = THIS_MODULE, | ||
369 | .cra_u = { | ||
370 | .blkcipher = { | ||
371 | .min_keysize = TF_MIN_KEY_SIZE, | ||
372 | .max_keysize = TF_MAX_KEY_SIZE, | ||
373 | .setkey = twofish_setkey, | ||
374 | .encrypt = cbc_encrypt, | ||
375 | .decrypt = cbc_decrypt, | ||
376 | }, | ||
377 | }, | ||
378 | }, { | ||
379 | .cra_name = "__ctr-twofish-avx2", | ||
380 | .cra_driver_name = "__driver-ctr-twofish-avx2", | ||
381 | .cra_priority = 0, | ||
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
383 | .cra_blocksize = 1, | ||
384 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
385 | .cra_alignmask = 0, | ||
386 | .cra_type = &crypto_blkcipher_type, | ||
387 | .cra_module = THIS_MODULE, | ||
388 | .cra_u = { | ||
389 | .blkcipher = { | ||
390 | .min_keysize = TF_MIN_KEY_SIZE, | ||
391 | .max_keysize = TF_MAX_KEY_SIZE, | ||
392 | .ivsize = TF_BLOCK_SIZE, | ||
393 | .setkey = twofish_setkey, | ||
394 | .encrypt = ctr_crypt, | ||
395 | .decrypt = ctr_crypt, | ||
396 | }, | ||
397 | }, | ||
398 | }, { | ||
399 | .cra_name = "__lrw-twofish-avx2", | ||
400 | .cra_driver_name = "__driver-lrw-twofish-avx2", | ||
401 | .cra_priority = 0, | ||
402 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
403 | .cra_blocksize = TF_BLOCK_SIZE, | ||
404 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
405 | .cra_alignmask = 0, | ||
406 | .cra_type = &crypto_blkcipher_type, | ||
407 | .cra_module = THIS_MODULE, | ||
408 | .cra_exit = lrw_twofish_exit_tfm, | ||
409 | .cra_u = { | ||
410 | .blkcipher = { | ||
411 | .min_keysize = TF_MIN_KEY_SIZE + | ||
412 | TF_BLOCK_SIZE, | ||
413 | .max_keysize = TF_MAX_KEY_SIZE + | ||
414 | TF_BLOCK_SIZE, | ||
415 | .ivsize = TF_BLOCK_SIZE, | ||
416 | .setkey = lrw_twofish_setkey, | ||
417 | .encrypt = lrw_encrypt, | ||
418 | .decrypt = lrw_decrypt, | ||
419 | }, | ||
420 | }, | ||
421 | }, { | ||
422 | .cra_name = "__xts-twofish-avx2", | ||
423 | .cra_driver_name = "__driver-xts-twofish-avx2", | ||
424 | .cra_priority = 0, | ||
425 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
426 | .cra_blocksize = TF_BLOCK_SIZE, | ||
427 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
428 | .cra_alignmask = 0, | ||
429 | .cra_type = &crypto_blkcipher_type, | ||
430 | .cra_module = THIS_MODULE, | ||
431 | .cra_u = { | ||
432 | .blkcipher = { | ||
433 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
434 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
435 | .ivsize = TF_BLOCK_SIZE, | ||
436 | .setkey = xts_twofish_setkey, | ||
437 | .encrypt = xts_encrypt, | ||
438 | .decrypt = xts_decrypt, | ||
439 | }, | ||
440 | }, | ||
441 | }, { | ||
442 | .cra_name = "ecb(twofish)", | ||
443 | .cra_driver_name = "ecb-twofish-avx2", | ||
444 | .cra_priority = 500, | ||
445 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
446 | .cra_blocksize = TF_BLOCK_SIZE, | ||
447 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
448 | .cra_alignmask = 0, | ||
449 | .cra_type = &crypto_ablkcipher_type, | ||
450 | .cra_module = THIS_MODULE, | ||
451 | .cra_init = ablk_init, | ||
452 | .cra_exit = ablk_exit, | ||
453 | .cra_u = { | ||
454 | .ablkcipher = { | ||
455 | .min_keysize = TF_MIN_KEY_SIZE, | ||
456 | .max_keysize = TF_MAX_KEY_SIZE, | ||
457 | .setkey = ablk_set_key, | ||
458 | .encrypt = ablk_encrypt, | ||
459 | .decrypt = ablk_decrypt, | ||
460 | }, | ||
461 | }, | ||
462 | }, { | ||
463 | .cra_name = "cbc(twofish)", | ||
464 | .cra_driver_name = "cbc-twofish-avx2", | ||
465 | .cra_priority = 500, | ||
466 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
467 | .cra_blocksize = TF_BLOCK_SIZE, | ||
468 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
469 | .cra_alignmask = 0, | ||
470 | .cra_type = &crypto_ablkcipher_type, | ||
471 | .cra_module = THIS_MODULE, | ||
472 | .cra_init = ablk_init, | ||
473 | .cra_exit = ablk_exit, | ||
474 | .cra_u = { | ||
475 | .ablkcipher = { | ||
476 | .min_keysize = TF_MIN_KEY_SIZE, | ||
477 | .max_keysize = TF_MAX_KEY_SIZE, | ||
478 | .ivsize = TF_BLOCK_SIZE, | ||
479 | .setkey = ablk_set_key, | ||
480 | .encrypt = __ablk_encrypt, | ||
481 | .decrypt = ablk_decrypt, | ||
482 | }, | ||
483 | }, | ||
484 | }, { | ||
485 | .cra_name = "ctr(twofish)", | ||
486 | .cra_driver_name = "ctr-twofish-avx2", | ||
487 | .cra_priority = 500, | ||
488 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
489 | .cra_blocksize = 1, | ||
490 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
491 | .cra_alignmask = 0, | ||
492 | .cra_type = &crypto_ablkcipher_type, | ||
493 | .cra_module = THIS_MODULE, | ||
494 | .cra_init = ablk_init, | ||
495 | .cra_exit = ablk_exit, | ||
496 | .cra_u = { | ||
497 | .ablkcipher = { | ||
498 | .min_keysize = TF_MIN_KEY_SIZE, | ||
499 | .max_keysize = TF_MAX_KEY_SIZE, | ||
500 | .ivsize = TF_BLOCK_SIZE, | ||
501 | .setkey = ablk_set_key, | ||
502 | .encrypt = ablk_encrypt, | ||
503 | .decrypt = ablk_encrypt, | ||
504 | .geniv = "chainiv", | ||
505 | }, | ||
506 | }, | ||
507 | }, { | ||
508 | .cra_name = "lrw(twofish)", | ||
509 | .cra_driver_name = "lrw-twofish-avx2", | ||
510 | .cra_priority = 500, | ||
511 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
512 | .cra_blocksize = TF_BLOCK_SIZE, | ||
513 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
514 | .cra_alignmask = 0, | ||
515 | .cra_type = &crypto_ablkcipher_type, | ||
516 | .cra_module = THIS_MODULE, | ||
517 | .cra_init = ablk_init, | ||
518 | .cra_exit = ablk_exit, | ||
519 | .cra_u = { | ||
520 | .ablkcipher = { | ||
521 | .min_keysize = TF_MIN_KEY_SIZE + | ||
522 | TF_BLOCK_SIZE, | ||
523 | .max_keysize = TF_MAX_KEY_SIZE + | ||
524 | TF_BLOCK_SIZE, | ||
525 | .ivsize = TF_BLOCK_SIZE, | ||
526 | .setkey = ablk_set_key, | ||
527 | .encrypt = ablk_encrypt, | ||
528 | .decrypt = ablk_decrypt, | ||
529 | }, | ||
530 | }, | ||
531 | }, { | ||
532 | .cra_name = "xts(twofish)", | ||
533 | .cra_driver_name = "xts-twofish-avx2", | ||
534 | .cra_priority = 500, | ||
535 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
536 | .cra_blocksize = TF_BLOCK_SIZE, | ||
537 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
538 | .cra_alignmask = 0, | ||
539 | .cra_type = &crypto_ablkcipher_type, | ||
540 | .cra_module = THIS_MODULE, | ||
541 | .cra_init = ablk_init, | ||
542 | .cra_exit = ablk_exit, | ||
543 | .cra_u = { | ||
544 | .ablkcipher = { | ||
545 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
546 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
547 | .ivsize = TF_BLOCK_SIZE, | ||
548 | .setkey = ablk_set_key, | ||
549 | .encrypt = ablk_encrypt, | ||
550 | .decrypt = ablk_decrypt, | ||
551 | }, | ||
552 | }, | ||
553 | } }; | ||
554 | |||
555 | static int __init init(void) | ||
556 | { | ||
557 | u64 xcr0; | ||
558 | |||
559 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
560 | pr_info("AVX2 instructions are not detected.\n"); | ||
561 | return -ENODEV; | ||
562 | } | ||
563 | |||
564 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
565 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
566 | pr_info("AVX2 detected but unusable.\n"); | ||
567 | return -ENODEV; | ||
568 | } | ||
569 | |||
570 | return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
571 | } | ||
572 | |||
573 | static void __exit fini(void) | ||
574 | { | ||
575 | crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
576 | } | ||
577 | |||
578 | module_init(init); | ||
579 | module_exit(fini); | ||
580 | |||
581 | MODULE_LICENSE("GPL"); | ||
582 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized"); | ||
583 | MODULE_ALIAS("twofish"); | ||
584 | MODULE_ALIAS("twofish-asm"); | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 94ac91d26e47..2047a562f6b3 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -4,6 +4,8 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
8 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
@@ -48,13 +50,26 @@ | |||
48 | /* 8-way parallel cipher functions */ | 50 | /* 8-way parallel cipher functions */ |
49 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | 51 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, |
50 | const u8 *src); | 52 | const u8 *src); |
53 | EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way); | ||
54 | |||
51 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 55 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src); | 56 | const u8 *src); |
57 | EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way); | ||
53 | 58 | ||
54 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | 59 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, |
55 | const u8 *src); | 60 | const u8 *src); |
61 | EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way); | ||
62 | |||
56 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | 63 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, |
57 | const u8 *src, le128 *iv); | 64 | const u8 *src, le128 *iv); |
65 | EXPORT_SYMBOL_GPL(twofish_ctr_8way); | ||
66 | |||
67 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
68 | const u8 *src, le128 *iv); | ||
69 | EXPORT_SYMBOL_GPL(twofish_xts_enc_8way); | ||
70 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
71 | const u8 *src, le128 *iv); | ||
72 | EXPORT_SYMBOL_GPL(twofish_xts_dec_8way); | ||
58 | 73 | ||
59 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 74 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
60 | const u8 *src) | 75 | const u8 *src) |
@@ -62,6 +77,20 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
62 | __twofish_enc_blk_3way(ctx, dst, src, false); | 77 | __twofish_enc_blk_3way(ctx, dst, src, false); |
63 | } | 78 | } |
64 | 79 | ||
80 | void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
81 | { | ||
82 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
83 | GLUE_FUNC_CAST(twofish_enc_blk)); | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(twofish_xts_enc); | ||
86 | |||
87 | void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
88 | { | ||
89 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
90 | GLUE_FUNC_CAST(twofish_dec_blk)); | ||
91 | } | ||
92 | EXPORT_SYMBOL_GPL(twofish_xts_dec); | ||
93 | |||
65 | 94 | ||
66 | static const struct common_glue_ctx twofish_enc = { | 95 | static const struct common_glue_ctx twofish_enc = { |
67 | .num_funcs = 3, | 96 | .num_funcs = 3, |
@@ -95,6 +124,19 @@ static const struct common_glue_ctx twofish_ctr = { | |||
95 | } } | 124 | } } |
96 | }; | 125 | }; |
97 | 126 | ||
127 | static const struct common_glue_ctx twofish_enc_xts = { | ||
128 | .num_funcs = 2, | ||
129 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
130 | |||
131 | .funcs = { { | ||
132 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
133 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } | ||
134 | }, { | ||
135 | .num_blocks = 1, | ||
136 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } | ||
137 | } } | ||
138 | }; | ||
139 | |||
98 | static const struct common_glue_ctx twofish_dec = { | 140 | static const struct common_glue_ctx twofish_dec = { |
99 | .num_funcs = 3, | 141 | .num_funcs = 3, |
100 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | 142 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, |
@@ -127,6 +169,19 @@ static const struct common_glue_ctx twofish_dec_cbc = { | |||
127 | } } | 169 | } } |
128 | }; | 170 | }; |
129 | 171 | ||
172 | static const struct common_glue_ctx twofish_dec_xts = { | ||
173 | .num_funcs = 2, | ||
174 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
175 | |||
176 | .funcs = { { | ||
177 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
178 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } | ||
179 | }, { | ||
180 | .num_blocks = 1, | ||
181 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } | ||
182 | } } | ||
183 | }; | ||
184 | |||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 185 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
131 | struct scatterlist *src, unsigned int nbytes) | 186 | struct scatterlist *src, unsigned int nbytes) |
132 | { | 187 | { |
@@ -275,54 +330,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
275 | struct scatterlist *src, unsigned int nbytes) | 330 | struct scatterlist *src, unsigned int nbytes) |
276 | { | 331 | { |
277 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 332 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
278 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
279 | struct crypt_priv crypt_ctx = { | ||
280 | .ctx = &ctx->crypt_ctx, | ||
281 | .fpu_enabled = false, | ||
282 | }; | ||
283 | struct xts_crypt_req req = { | ||
284 | .tbuf = buf, | ||
285 | .tbuflen = sizeof(buf), | ||
286 | 333 | ||
287 | .tweak_ctx = &ctx->tweak_ctx, | 334 | return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, |
288 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | 335 | XTS_TWEAK_CAST(twofish_enc_blk), |
289 | .crypt_ctx = &crypt_ctx, | 336 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
290 | .crypt_fn = encrypt_callback, | ||
291 | }; | ||
292 | int ret; | ||
293 | |||
294 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
295 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
296 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
297 | |||
298 | return ret; | ||
299 | } | 337 | } |
300 | 338 | ||
301 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 339 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
302 | struct scatterlist *src, unsigned int nbytes) | 340 | struct scatterlist *src, unsigned int nbytes) |
303 | { | 341 | { |
304 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 342 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
305 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
306 | struct crypt_priv crypt_ctx = { | ||
307 | .ctx = &ctx->crypt_ctx, | ||
308 | .fpu_enabled = false, | ||
309 | }; | ||
310 | struct xts_crypt_req req = { | ||
311 | .tbuf = buf, | ||
312 | .tbuflen = sizeof(buf), | ||
313 | |||
314 | .tweak_ctx = &ctx->tweak_ctx, | ||
315 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
316 | .crypt_ctx = &crypt_ctx, | ||
317 | .crypt_fn = decrypt_callback, | ||
318 | }; | ||
319 | int ret; | ||
320 | 343 | ||
321 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 344 | return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, |
322 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 345 | XTS_TWEAK_CAST(twofish_enc_blk), |
323 | twofish_fpu_end(crypt_ctx.fpu_enabled); | 346 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
324 | |||
325 | return ret; | ||
326 | } | 347 | } |
327 | 348 | ||
328 | static struct crypto_alg twofish_algs[10] = { { | 349 | static struct crypto_alg twofish_algs[10] = { { |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 8010ebc5705f..e99ac27f95b2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -293,6 +293,7 @@ extern const char * const x86_power_flags[32]; | |||
293 | #define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) | 293 | #define cpu_has_ssse3 boot_cpu_has(X86_FEATURE_SSSE3) |
294 | #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) | 294 | #define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) |
295 | #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) | 295 | #define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) |
296 | #define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2) | ||
296 | #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) | 297 | #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) |
297 | #define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) | 298 | #define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) |
298 | #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) | 299 | #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) |
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h new file mode 100644 index 000000000000..f097b2face10 --- /dev/null +++ b/arch/x86/include/asm/crypto/blowfish.h | |||
@@ -0,0 +1,43 @@ | |||
1 | #ifndef ASM_X86_BLOWFISH_H | ||
2 | #define ASM_X86_BLOWFISH_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/blowfish.h> | ||
6 | |||
7 | #define BF_PARALLEL_BLOCKS 4 | ||
8 | |||
9 | /* regular block cipher functions */ | ||
10 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
11 | bool xor); | ||
12 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | ||
13 | |||
14 | /* 4-way parallel cipher functions */ | ||
15 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
16 | const u8 *src, bool xor); | ||
17 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
18 | const u8 *src); | ||
19 | |||
20 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
21 | { | ||
22 | __blowfish_enc_blk(ctx, dst, src, false); | ||
23 | } | ||
24 | |||
25 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
26 | const u8 *src) | ||
27 | { | ||
28 | __blowfish_enc_blk(ctx, dst, src, true); | ||
29 | } | ||
30 | |||
31 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
32 | const u8 *src) | ||
33 | { | ||
34 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
35 | } | ||
36 | |||
37 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
38 | const u8 *src) | ||
39 | { | ||
40 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
41 | } | ||
42 | |||
43 | #endif | ||
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 98038add801e..bb93333d9200 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h | |||
@@ -48,6 +48,22 @@ asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, | |||
48 | asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, | 48 | asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, |
49 | const u8 *src); | 49 | const u8 *src); |
50 | 50 | ||
51 | /* 16-way parallel cipher functions (avx/aes-ni) */ | ||
52 | asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, | ||
53 | const u8 *src); | ||
54 | asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
55 | const u8 *src); | ||
56 | |||
57 | asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
58 | const u8 *src); | ||
59 | asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, | ||
60 | const u8 *src, le128 *iv); | ||
61 | |||
62 | asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, | ||
63 | const u8 *src, le128 *iv); | ||
64 | asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
65 | const u8 *src, le128 *iv); | ||
66 | |||
51 | static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, | 67 | static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, |
52 | const u8 *src) | 68 | const u8 *src) |
53 | { | 69 | { |
@@ -79,4 +95,7 @@ extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, | |||
79 | extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, | 95 | extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, |
80 | le128 *iv); | 96 | le128 *iv); |
81 | 97 | ||
98 | extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
99 | extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
100 | |||
82 | #endif /* ASM_X86_CAMELLIA_H */ | 101 | #endif /* ASM_X86_CAMELLIA_H */ |
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index e2d65b061d27..1eef55596e82 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
@@ -14,10 +14,13 @@ typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | |||
14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | 14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); |
15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | 15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, |
16 | le128 *iv); | 16 | le128 *iv); |
17 | typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
18 | le128 *iv); | ||
17 | 19 | ||
18 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | 20 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) |
19 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | 21 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) |
20 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | 22 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) |
23 | #define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) | ||
21 | 24 | ||
22 | struct common_glue_func_entry { | 25 | struct common_glue_func_entry { |
23 | unsigned int num_blocks; /* number of blocks that @fn will process */ | 26 | unsigned int num_blocks; /* number of blocks that @fn will process */ |
@@ -25,6 +28,7 @@ struct common_glue_func_entry { | |||
25 | common_glue_func_t ecb; | 28 | common_glue_func_t ecb; |
26 | common_glue_cbc_func_t cbc; | 29 | common_glue_cbc_func_t cbc; |
27 | common_glue_ctr_func_t ctr; | 30 | common_glue_ctr_func_t ctr; |
31 | common_glue_xts_func_t xts; | ||
28 | } fn_u; | 32 | } fn_u; |
29 | }; | 33 | }; |
30 | 34 | ||
@@ -96,6 +100,16 @@ static inline void le128_inc(le128 *i) | |||
96 | i->b = cpu_to_le64(b); | 100 | i->b = cpu_to_le64(b); |
97 | } | 101 | } |
98 | 102 | ||
103 | static inline void le128_gf128mul_x_ble(le128 *dst, const le128 *src) | ||
104 | { | ||
105 | u64 a = le64_to_cpu(src->a); | ||
106 | u64 b = le64_to_cpu(src->b); | ||
107 | u64 _tt = ((s64)a >> 63) & 0x87; | ||
108 | |||
109 | dst->a = cpu_to_le64((a << 1) ^ (b >> 63)); | ||
110 | dst->b = cpu_to_le64((b << 1) ^ _tt); | ||
111 | } | ||
112 | |||
99 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | 113 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, |
100 | struct blkcipher_desc *desc, | 114 | struct blkcipher_desc *desc, |
101 | struct scatterlist *dst, | 115 | struct scatterlist *dst, |
@@ -118,4 +132,14 @@ extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |||
118 | struct scatterlist *dst, | 132 | struct scatterlist *dst, |
119 | struct scatterlist *src, unsigned int nbytes); | 133 | struct scatterlist *src, unsigned int nbytes); |
120 | 134 | ||
135 | extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
136 | struct blkcipher_desc *desc, | ||
137 | struct scatterlist *dst, | ||
138 | struct scatterlist *src, unsigned int nbytes, | ||
139 | common_glue_func_t tweak_fn, void *tweak_ctx, | ||
140 | void *crypt_ctx); | ||
141 | |||
142 | extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, | ||
143 | le128 *iv, common_glue_func_t fn); | ||
144 | |||
121 | #endif /* _CRYPTO_GLUE_HELPER_H */ | 145 | #endif /* _CRYPTO_GLUE_HELPER_H */ |
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 0da1d3e2a55c..33c2b8a435da 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -6,6 +6,16 @@ | |||
6 | 6 | ||
7 | #define SERPENT_PARALLEL_BLOCKS 8 | 7 | #define SERPENT_PARALLEL_BLOCKS 8 |
8 | 8 | ||
9 | struct serpent_lrw_ctx { | ||
10 | struct lrw_table_ctx lrw_table; | ||
11 | struct serpent_ctx serpent_ctx; | ||
12 | }; | ||
13 | |||
14 | struct serpent_xts_ctx { | ||
15 | struct serpent_ctx tweak_ctx; | ||
16 | struct serpent_ctx crypt_ctx; | ||
17 | }; | ||
18 | |||
9 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 19 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
10 | const u8 *src); | 20 | const u8 *src); |
11 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 21 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
@@ -16,4 +26,23 @@ asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | |||
16 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 26 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
17 | const u8 *src, le128 *iv); | 27 | const u8 *src, le128 *iv); |
18 | 28 | ||
29 | asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
30 | const u8 *src, le128 *iv); | ||
31 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
32 | const u8 *src, le128 *iv); | ||
33 | |||
34 | extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, | ||
35 | le128 *iv); | ||
36 | |||
37 | extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
38 | extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
39 | |||
40 | extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
41 | unsigned int keylen); | ||
42 | |||
43 | extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); | ||
44 | |||
45 | extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
46 | unsigned int keylen); | ||
47 | |||
19 | #endif | 48 | #endif |
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 878c51ceebb5..e655c6029b45 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -28,6 +28,20 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | |||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
29 | const u8 *src); | 29 | const u8 *src); |
30 | 30 | ||
31 | /* 8-way parallel cipher functions */ | ||
32 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
35 | const u8 *src); | ||
36 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
37 | const u8 *src); | ||
38 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | ||
39 | const u8 *src, le128 *iv); | ||
40 | asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
41 | const u8 *src, le128 *iv); | ||
42 | asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src, le128 *iv); | ||
44 | |||
31 | /* helpers from twofish_x86_64-3way module */ | 45 | /* helpers from twofish_x86_64-3way module */ |
32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | 46 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); |
33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | 47 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, |
@@ -43,4 +57,8 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | |||
43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | 57 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
44 | unsigned int keylen); | 58 | unsigned int keylen); |
45 | 59 | ||
60 | /* helpers from twofish-avx module */ | ||
61 | extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
62 | extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
63 | |||
46 | #endif /* ASM_X86_TWOFISH_H */ | 64 | #endif /* ASM_X86_TWOFISH_H */ |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 05c0ce52f96d..622d8a48cbe9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -198,6 +198,7 @@ config CRYPTO_GCM | |||
198 | select CRYPTO_CTR | 198 | select CRYPTO_CTR |
199 | select CRYPTO_AEAD | 199 | select CRYPTO_AEAD |
200 | select CRYPTO_GHASH | 200 | select CRYPTO_GHASH |
201 | select CRYPTO_NULL | ||
201 | help | 202 | help |
202 | Support for Galois/Counter Mode (GCM) and Galois Message | 203 | Support for Galois/Counter Mode (GCM) and Galois Message |
203 | Authentication Code (GMAC). Required for IPSec. | 204 | Authentication Code (GMAC). Required for IPSec. |
@@ -282,6 +283,17 @@ config CRYPTO_XTS | |||
282 | 283 | ||
283 | comment "Hash modes" | 284 | comment "Hash modes" |
284 | 285 | ||
286 | config CRYPTO_CMAC | ||
287 | tristate "CMAC support" | ||
288 | select CRYPTO_HASH | ||
289 | select CRYPTO_MANAGER | ||
290 | help | ||
291 | Cipher-based Message Authentication Code (CMAC) specified by | ||
292 | The National Institute of Standards and Technology (NIST). | ||
293 | |||
294 | https://tools.ietf.org/html/rfc4493 | ||
295 | http://csrc.nist.gov/publications/nistpubs/800-38B/SP_800-38B.pdf | ||
296 | |||
285 | config CRYPTO_HMAC | 297 | config CRYPTO_HMAC |
286 | tristate "HMAC support" | 298 | tristate "HMAC support" |
287 | select CRYPTO_HASH | 299 | select CRYPTO_HASH |
@@ -322,19 +334,9 @@ config CRYPTO_CRC32C | |||
322 | by iSCSI for header and data digests and by others. | 334 | by iSCSI for header and data digests and by others. |
323 | See Castagnoli93. Module will be crc32c. | 335 | See Castagnoli93. Module will be crc32c. |
324 | 336 | ||
325 | config CRYPTO_CRC32C_X86_64 | ||
326 | bool | ||
327 | depends on X86 && 64BIT | ||
328 | select CRYPTO_HASH | ||
329 | help | ||
330 | In Intel processor with SSE4.2 supported, the processor will | ||
331 | support CRC32C calculation using hardware accelerated CRC32 | ||
332 | instruction optimized with PCLMULQDQ instruction when available. | ||
333 | |||
334 | config CRYPTO_CRC32C_INTEL | 337 | config CRYPTO_CRC32C_INTEL |
335 | tristate "CRC32c INTEL hardware acceleration" | 338 | tristate "CRC32c INTEL hardware acceleration" |
336 | depends on X86 | 339 | depends on X86 |
337 | select CRYPTO_CRC32C_X86_64 if 64BIT | ||
338 | select CRYPTO_HASH | 340 | select CRYPTO_HASH |
339 | help | 341 | help |
340 | In Intel processor with SSE4.2 supported, the processor will | 342 | In Intel processor with SSE4.2 supported, the processor will |
@@ -480,6 +482,28 @@ config CRYPTO_SHA1_SSSE3 | |||
480 | using Supplemental SSE3 (SSSE3) instructions or Advanced Vector | 482 | using Supplemental SSE3 (SSSE3) instructions or Advanced Vector |
481 | Extensions (AVX), when available. | 483 | Extensions (AVX), when available. |
482 | 484 | ||
485 | config CRYPTO_SHA256_SSSE3 | ||
486 | tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)" | ||
487 | depends on X86 && 64BIT | ||
488 | select CRYPTO_SHA256 | ||
489 | select CRYPTO_HASH | ||
490 | help | ||
491 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
492 | using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector | ||
493 | Extensions version 1 (AVX1), or Advanced Vector Extensions | ||
494 | version 2 (AVX2) instructions, when available. | ||
495 | |||
496 | config CRYPTO_SHA512_SSSE3 | ||
497 | tristate "SHA512 digest algorithm (SSSE3/AVX/AVX2)" | ||
498 | depends on X86 && 64BIT | ||
499 | select CRYPTO_SHA512 | ||
500 | select CRYPTO_HASH | ||
501 | help | ||
502 | SHA-512 secure hash standard (DFIPS 180-2) implemented | ||
503 | using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector | ||
504 | Extensions version 1 (AVX1), or Advanced Vector Extensions | ||
505 | version 2 (AVX2) instructions, when available. | ||
506 | |||
483 | config CRYPTO_SHA1_SPARC64 | 507 | config CRYPTO_SHA1_SPARC64 |
484 | tristate "SHA1 digest algorithm (SPARC64)" | 508 | tristate "SHA1 digest algorithm (SPARC64)" |
485 | depends on SPARC64 | 509 | depends on SPARC64 |
@@ -654,6 +678,7 @@ config CRYPTO_AES_NI_INTEL | |||
654 | select CRYPTO_CRYPTD | 678 | select CRYPTO_CRYPTD |
655 | select CRYPTO_ABLK_HELPER_X86 | 679 | select CRYPTO_ABLK_HELPER_X86 |
656 | select CRYPTO_ALGAPI | 680 | select CRYPTO_ALGAPI |
681 | select CRYPTO_GLUE_HELPER_X86 if 64BIT | ||
657 | select CRYPTO_LRW | 682 | select CRYPTO_LRW |
658 | select CRYPTO_XTS | 683 | select CRYPTO_XTS |
659 | help | 684 | help |
@@ -795,6 +820,24 @@ config CRYPTO_BLOWFISH_X86_64 | |||
795 | See also: | 820 | See also: |
796 | <http://www.schneier.com/blowfish.html> | 821 | <http://www.schneier.com/blowfish.html> |
797 | 822 | ||
823 | config CRYPTO_BLOWFISH_AVX2_X86_64 | ||
824 | tristate "Blowfish cipher algorithm (x86_64/AVX2)" | ||
825 | depends on X86 && 64BIT | ||
826 | select CRYPTO_ALGAPI | ||
827 | select CRYPTO_CRYPTD | ||
828 | select CRYPTO_ABLK_HELPER_X86 | ||
829 | select CRYPTO_BLOWFISH_COMMON | ||
830 | select CRYPTO_BLOWFISH_X86_64 | ||
831 | help | ||
832 | Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier. | ||
833 | |||
834 | This is a variable key length cipher which can use keys from 32 | ||
835 | bits to 448 bits in length. It's fast, simple and specifically | ||
836 | designed for use on "large microprocessors". | ||
837 | |||
838 | See also: | ||
839 | <http://www.schneier.com/blowfish.html> | ||
840 | |||
798 | config CRYPTO_CAMELLIA | 841 | config CRYPTO_CAMELLIA |
799 | tristate "Camellia cipher algorithms" | 842 | tristate "Camellia cipher algorithms" |
800 | depends on CRYPTO | 843 | depends on CRYPTO |
@@ -851,6 +894,29 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64 | |||
851 | See also: | 894 | See also: |
852 | <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> | 895 | <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> |
853 | 896 | ||
897 | config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 | ||
898 | tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)" | ||
899 | depends on X86 && 64BIT | ||
900 | depends on CRYPTO | ||
901 | select CRYPTO_ALGAPI | ||
902 | select CRYPTO_CRYPTD | ||
903 | select CRYPTO_ABLK_HELPER_X86 | ||
904 | select CRYPTO_GLUE_HELPER_X86 | ||
905 | select CRYPTO_CAMELLIA_X86_64 | ||
906 | select CRYPTO_CAMELLIA_AESNI_AVX_X86_64 | ||
907 | select CRYPTO_LRW | ||
908 | select CRYPTO_XTS | ||
909 | help | ||
910 | Camellia cipher algorithm module (x86_64/AES-NI/AVX2). | ||
911 | |||
912 | Camellia is a symmetric key block cipher developed jointly | ||
913 | at NTT and Mitsubishi Electric Corporation. | ||
914 | |||
915 | The Camellia specifies three key sizes: 128, 192 and 256 bits. | ||
916 | |||
917 | See also: | ||
918 | <https://info.isl.ntt.co.jp/crypt/eng/camellia/index_s.html> | ||
919 | |||
854 | config CRYPTO_CAMELLIA_SPARC64 | 920 | config CRYPTO_CAMELLIA_SPARC64 |
855 | tristate "Camellia cipher algorithm (SPARC64)" | 921 | tristate "Camellia cipher algorithm (SPARC64)" |
856 | depends on SPARC64 | 922 | depends on SPARC64 |
@@ -1088,6 +1154,29 @@ config CRYPTO_SERPENT_AVX_X86_64 | |||
1088 | See also: | 1154 | See also: |
1089 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> | 1155 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> |
1090 | 1156 | ||
1157 | config CRYPTO_SERPENT_AVX2_X86_64 | ||
1158 | tristate "Serpent cipher algorithm (x86_64/AVX2)" | ||
1159 | depends on X86 && 64BIT | ||
1160 | select CRYPTO_ALGAPI | ||
1161 | select CRYPTO_CRYPTD | ||
1162 | select CRYPTO_ABLK_HELPER_X86 | ||
1163 | select CRYPTO_GLUE_HELPER_X86 | ||
1164 | select CRYPTO_SERPENT | ||
1165 | select CRYPTO_SERPENT_AVX_X86_64 | ||
1166 | select CRYPTO_LRW | ||
1167 | select CRYPTO_XTS | ||
1168 | help | ||
1169 | Serpent cipher algorithm, by Anderson, Biham & Knudsen. | ||
1170 | |||
1171 | Keys are allowed to be from 0 to 256 bits in length, in steps | ||
1172 | of 8 bits. | ||
1173 | |||
1174 | This module provides Serpent cipher algorithm that processes 16 | ||
1175 | blocks parallel using AVX2 instruction set. | ||
1176 | |||
1177 | See also: | ||
1178 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> | ||
1179 | |||
1091 | config CRYPTO_TEA | 1180 | config CRYPTO_TEA |
1092 | tristate "TEA, XTEA and XETA cipher algorithms" | 1181 | tristate "TEA, XTEA and XETA cipher algorithms" |
1093 | select CRYPTO_ALGAPI | 1182 | select CRYPTO_ALGAPI |
@@ -1207,6 +1296,30 @@ config CRYPTO_TWOFISH_AVX_X86_64 | |||
1207 | See also: | 1296 | See also: |
1208 | <http://www.schneier.com/twofish.html> | 1297 | <http://www.schneier.com/twofish.html> |
1209 | 1298 | ||
1299 | config CRYPTO_TWOFISH_AVX2_X86_64 | ||
1300 | tristate "Twofish cipher algorithm (x86_64/AVX2)" | ||
1301 | depends on X86 && 64BIT | ||
1302 | select CRYPTO_ALGAPI | ||
1303 | select CRYPTO_CRYPTD | ||
1304 | select CRYPTO_ABLK_HELPER_X86 | ||
1305 | select CRYPTO_GLUE_HELPER_X86 | ||
1306 | select CRYPTO_TWOFISH_COMMON | ||
1307 | select CRYPTO_TWOFISH_X86_64 | ||
1308 | select CRYPTO_TWOFISH_X86_64_3WAY | ||
1309 | select CRYPTO_TWOFISH_AVX_X86_64 | ||
1310 | select CRYPTO_LRW | ||
1311 | select CRYPTO_XTS | ||
1312 | help | ||
1313 | Twofish cipher algorithm (x86_64/AVX2). | ||
1314 | |||
1315 | Twofish was submitted as an AES (Advanced Encryption Standard) | ||
1316 | candidate cipher by researchers at CounterPane Systems. It is a | ||
1317 | 16 round block cipher supporting key sizes of 128, 192, and 256 | ||
1318 | bits. | ||
1319 | |||
1320 | See also: | ||
1321 | <http://www.schneier.com/twofish.html> | ||
1322 | |||
1210 | comment "Compression" | 1323 | comment "Compression" |
1211 | 1324 | ||
1212 | config CRYPTO_DEFLATE | 1325 | config CRYPTO_DEFLATE |
diff --git a/crypto/Makefile b/crypto/Makefile index be1a1bebbb86..a8e9b0fefbe9 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -32,6 +32,7 @@ cryptomgr-y := algboss.o testmgr.o | |||
32 | 32 | ||
33 | obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o | 33 | obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o |
34 | obj-$(CONFIG_CRYPTO_USER) += crypto_user.o | 34 | obj-$(CONFIG_CRYPTO_USER) += crypto_user.o |
35 | obj-$(CONFIG_CRYPTO_CMAC) += cmac.o | ||
35 | obj-$(CONFIG_CRYPTO_HMAC) += hmac.o | 36 | obj-$(CONFIG_CRYPTO_HMAC) += hmac.o |
36 | obj-$(CONFIG_CRYPTO_VMAC) += vmac.o | 37 | obj-$(CONFIG_CRYPTO_VMAC) += vmac.o |
37 | obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o | 38 | obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o |
diff --git a/crypto/cmac.c b/crypto/cmac.c new file mode 100644 index 000000000000..50880cf17fad --- /dev/null +++ b/crypto/cmac.c | |||
@@ -0,0 +1,315 @@ | |||
1 | /* | ||
2 | * CMAC: Cipher Block Mode for Authentication | ||
3 | * | ||
4 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * Based on work by: | ||
7 | * Copyright © 2013 Tom St Denis <tstdenis@elliptictech.com> | ||
8 | * Based on crypto/xcbc.c: | ||
9 | * Copyright © 2006 USAGI/WIDE Project, | ||
10 | * Author: Kazunori Miyazawa <miyazawa@linux-ipv6.org> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License as published by | ||
14 | * the Free Software Foundation; either version 2 of the License, or | ||
15 | * (at your option) any later version. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <crypto/internal/hash.h> | ||
20 | #include <linux/err.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | |||
24 | /* | ||
25 | * +------------------------ | ||
26 | * | <parent tfm> | ||
27 | * +------------------------ | ||
28 | * | cmac_tfm_ctx | ||
29 | * +------------------------ | ||
30 | * | consts (block size * 2) | ||
31 | * +------------------------ | ||
32 | */ | ||
33 | struct cmac_tfm_ctx { | ||
34 | struct crypto_cipher *child; | ||
35 | u8 ctx[]; | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * +------------------------ | ||
40 | * | <shash desc> | ||
41 | * +------------------------ | ||
42 | * | cmac_desc_ctx | ||
43 | * +------------------------ | ||
44 | * | odds (block size) | ||
45 | * +------------------------ | ||
46 | * | prev (block size) | ||
47 | * +------------------------ | ||
48 | */ | ||
49 | struct cmac_desc_ctx { | ||
50 | unsigned int len; | ||
51 | u8 ctx[]; | ||
52 | }; | ||
53 | |||
54 | static int crypto_cmac_digest_setkey(struct crypto_shash *parent, | ||
55 | const u8 *inkey, unsigned int keylen) | ||
56 | { | ||
57 | unsigned long alignmask = crypto_shash_alignmask(parent); | ||
58 | struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent); | ||
59 | unsigned int bs = crypto_shash_blocksize(parent); | ||
60 | __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); | ||
61 | u64 _const[2]; | ||
62 | int i, err = 0; | ||
63 | u8 msb_mask, gfmask; | ||
64 | |||
65 | err = crypto_cipher_setkey(ctx->child, inkey, keylen); | ||
66 | if (err) | ||
67 | return err; | ||
68 | |||
69 | /* encrypt the zero block */ | ||
70 | memset(consts, 0, bs); | ||
71 | crypto_cipher_encrypt_one(ctx->child, (u8 *)consts, (u8 *)consts); | ||
72 | |||
73 | switch (bs) { | ||
74 | case 16: | ||
75 | gfmask = 0x87; | ||
76 | _const[0] = be64_to_cpu(consts[1]); | ||
77 | _const[1] = be64_to_cpu(consts[0]); | ||
78 | |||
79 | /* gf(2^128) multiply zero-ciphertext with u and u^2 */ | ||
80 | for (i = 0; i < 4; i += 2) { | ||
81 | msb_mask = ((s64)_const[1] >> 63) & gfmask; | ||
82 | _const[1] = (_const[1] << 1) | (_const[0] >> 63); | ||
83 | _const[0] = (_const[0] << 1) ^ msb_mask; | ||
84 | |||
85 | consts[i + 0] = cpu_to_be64(_const[1]); | ||
86 | consts[i + 1] = cpu_to_be64(_const[0]); | ||
87 | } | ||
88 | |||
89 | break; | ||
90 | case 8: | ||
91 | gfmask = 0x1B; | ||
92 | _const[0] = be64_to_cpu(consts[0]); | ||
93 | |||
94 | /* gf(2^64) multiply zero-ciphertext with u and u^2 */ | ||
95 | for (i = 0; i < 2; i++) { | ||
96 | msb_mask = ((s64)_const[0] >> 63) & gfmask; | ||
97 | _const[0] = (_const[0] << 1) ^ msb_mask; | ||
98 | |||
99 | consts[i] = cpu_to_be64(_const[0]); | ||
100 | } | ||
101 | |||
102 | break; | ||
103 | } | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static int crypto_cmac_digest_init(struct shash_desc *pdesc) | ||
109 | { | ||
110 | unsigned long alignmask = crypto_shash_alignmask(pdesc->tfm); | ||
111 | struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); | ||
112 | int bs = crypto_shash_blocksize(pdesc->tfm); | ||
113 | u8 *prev = PTR_ALIGN((void *)ctx->ctx, alignmask + 1) + bs; | ||
114 | |||
115 | ctx->len = 0; | ||
116 | memset(prev, 0, bs); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p, | ||
122 | unsigned int len) | ||
123 | { | ||
124 | struct crypto_shash *parent = pdesc->tfm; | ||
125 | unsigned long alignmask = crypto_shash_alignmask(parent); | ||
126 | struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); | ||
127 | struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); | ||
128 | struct crypto_cipher *tfm = tctx->child; | ||
129 | int bs = crypto_shash_blocksize(parent); | ||
130 | u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); | ||
131 | u8 *prev = odds + bs; | ||
132 | |||
133 | /* checking the data can fill the block */ | ||
134 | if ((ctx->len + len) <= bs) { | ||
135 | memcpy(odds + ctx->len, p, len); | ||
136 | ctx->len += len; | ||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* filling odds with new data and encrypting it */ | ||
141 | memcpy(odds + ctx->len, p, bs - ctx->len); | ||
142 | len -= bs - ctx->len; | ||
143 | p += bs - ctx->len; | ||
144 | |||
145 | crypto_xor(prev, odds, bs); | ||
146 | crypto_cipher_encrypt_one(tfm, prev, prev); | ||
147 | |||
148 | /* clearing the length */ | ||
149 | ctx->len = 0; | ||
150 | |||
151 | /* encrypting the rest of data */ | ||
152 | while (len > bs) { | ||
153 | crypto_xor(prev, p, bs); | ||
154 | crypto_cipher_encrypt_one(tfm, prev, prev); | ||
155 | p += bs; | ||
156 | len -= bs; | ||
157 | } | ||
158 | |||
159 | /* keeping the surplus of blocksize */ | ||
160 | if (len) { | ||
161 | memcpy(odds, p, len); | ||
162 | ctx->len = len; | ||
163 | } | ||
164 | |||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out) | ||
169 | { | ||
170 | struct crypto_shash *parent = pdesc->tfm; | ||
171 | unsigned long alignmask = crypto_shash_alignmask(parent); | ||
172 | struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent); | ||
173 | struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); | ||
174 | struct crypto_cipher *tfm = tctx->child; | ||
175 | int bs = crypto_shash_blocksize(parent); | ||
176 | u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1); | ||
177 | u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); | ||
178 | u8 *prev = odds + bs; | ||
179 | unsigned int offset = 0; | ||
180 | |||
181 | if (ctx->len != bs) { | ||
182 | unsigned int rlen; | ||
183 | u8 *p = odds + ctx->len; | ||
184 | |||
185 | *p = 0x80; | ||
186 | p++; | ||
187 | |||
188 | rlen = bs - ctx->len - 1; | ||
189 | if (rlen) | ||
190 | memset(p, 0, rlen); | ||
191 | |||
192 | offset += bs; | ||
193 | } | ||
194 | |||
195 | crypto_xor(prev, odds, bs); | ||
196 | crypto_xor(prev, consts + offset, bs); | ||
197 | |||
198 | crypto_cipher_encrypt_one(tfm, out, prev); | ||
199 | |||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | static int cmac_init_tfm(struct crypto_tfm *tfm) | ||
204 | { | ||
205 | struct crypto_cipher *cipher; | ||
206 | struct crypto_instance *inst = (void *)tfm->__crt_alg; | ||
207 | struct crypto_spawn *spawn = crypto_instance_ctx(inst); | ||
208 | struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); | ||
209 | |||
210 | cipher = crypto_spawn_cipher(spawn); | ||
211 | if (IS_ERR(cipher)) | ||
212 | return PTR_ERR(cipher); | ||
213 | |||
214 | ctx->child = cipher; | ||
215 | |||
216 | return 0; | ||
217 | }; | ||
218 | |||
219 | static void cmac_exit_tfm(struct crypto_tfm *tfm) | ||
220 | { | ||
221 | struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); | ||
222 | crypto_free_cipher(ctx->child); | ||
223 | } | ||
224 | |||
225 | static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) | ||
226 | { | ||
227 | struct shash_instance *inst; | ||
228 | struct crypto_alg *alg; | ||
229 | unsigned long alignmask; | ||
230 | int err; | ||
231 | |||
232 | err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH); | ||
233 | if (err) | ||
234 | return err; | ||
235 | |||
236 | alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, | ||
237 | CRYPTO_ALG_TYPE_MASK); | ||
238 | if (IS_ERR(alg)) | ||
239 | return PTR_ERR(alg); | ||
240 | |||
241 | switch (alg->cra_blocksize) { | ||
242 | case 16: | ||
243 | case 8: | ||
244 | break; | ||
245 | default: | ||
246 | goto out_put_alg; | ||
247 | } | ||
248 | |||
249 | inst = shash_alloc_instance("cmac", alg); | ||
250 | err = PTR_ERR(inst); | ||
251 | if (IS_ERR(inst)) | ||
252 | goto out_put_alg; | ||
253 | |||
254 | err = crypto_init_spawn(shash_instance_ctx(inst), alg, | ||
255 | shash_crypto_instance(inst), | ||
256 | CRYPTO_ALG_TYPE_MASK); | ||
257 | if (err) | ||
258 | goto out_free_inst; | ||
259 | |||
260 | alignmask = alg->cra_alignmask | (sizeof(long) - 1); | ||
261 | inst->alg.base.cra_alignmask = alignmask; | ||
262 | inst->alg.base.cra_priority = alg->cra_priority; | ||
263 | inst->alg.base.cra_blocksize = alg->cra_blocksize; | ||
264 | |||
265 | inst->alg.digestsize = alg->cra_blocksize; | ||
266 | inst->alg.descsize = | ||
267 | ALIGN(sizeof(struct cmac_desc_ctx), crypto_tfm_ctx_alignment()) | ||
268 | + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)) | ||
269 | + alg->cra_blocksize * 2; | ||
270 | |||
271 | inst->alg.base.cra_ctxsize = | ||
272 | ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1) | ||
273 | + alg->cra_blocksize * 2; | ||
274 | |||
275 | inst->alg.base.cra_init = cmac_init_tfm; | ||
276 | inst->alg.base.cra_exit = cmac_exit_tfm; | ||
277 | |||
278 | inst->alg.init = crypto_cmac_digest_init; | ||
279 | inst->alg.update = crypto_cmac_digest_update; | ||
280 | inst->alg.final = crypto_cmac_digest_final; | ||
281 | inst->alg.setkey = crypto_cmac_digest_setkey; | ||
282 | |||
283 | err = shash_register_instance(tmpl, inst); | ||
284 | if (err) { | ||
285 | out_free_inst: | ||
286 | shash_free_instance(shash_crypto_instance(inst)); | ||
287 | } | ||
288 | |||
289 | out_put_alg: | ||
290 | crypto_mod_put(alg); | ||
291 | return err; | ||
292 | } | ||
293 | |||
294 | static struct crypto_template crypto_cmac_tmpl = { | ||
295 | .name = "cmac", | ||
296 | .create = cmac_create, | ||
297 | .free = shash_free_instance, | ||
298 | .module = THIS_MODULE, | ||
299 | }; | ||
300 | |||
301 | static int __init crypto_cmac_module_init(void) | ||
302 | { | ||
303 | return crypto_register_template(&crypto_cmac_tmpl); | ||
304 | } | ||
305 | |||
306 | static void __exit crypto_cmac_module_exit(void) | ||
307 | { | ||
308 | crypto_unregister_template(&crypto_cmac_tmpl); | ||
309 | } | ||
310 | |||
311 | module_init(crypto_cmac_module_init); | ||
312 | module_exit(crypto_cmac_module_exit); | ||
313 | |||
314 | MODULE_LICENSE("GPL"); | ||
315 | MODULE_DESCRIPTION("CMAC keyed hash algorithm"); | ||
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index dfd511fb39ee..1512e41cd93d 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c | |||
@@ -440,7 +440,7 @@ static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = { | |||
440 | 440 | ||
441 | #undef MSGSIZE | 441 | #undef MSGSIZE |
442 | 442 | ||
443 | static struct crypto_link { | 443 | static const struct crypto_link { |
444 | int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); | 444 | int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); |
445 | int (*dump)(struct sk_buff *, struct netlink_callback *); | 445 | int (*dump)(struct sk_buff *, struct netlink_callback *); |
446 | int (*done)(struct netlink_callback *); | 446 | int (*done)(struct netlink_callback *); |
@@ -456,7 +456,7 @@ static struct crypto_link { | |||
456 | static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | 456 | static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) |
457 | { | 457 | { |
458 | struct nlattr *attrs[CRYPTOCFGA_MAX+1]; | 458 | struct nlattr *attrs[CRYPTOCFGA_MAX+1]; |
459 | struct crypto_link *link; | 459 | const struct crypto_link *link; |
460 | int type, err; | 460 | int type, err; |
461 | 461 | ||
462 | type = nlh->nlmsg_type; | 462 | type = nlh->nlmsg_type; |
diff --git a/crypto/gcm.c b/crypto/gcm.c index 13ccbda34ff9..43e1fb05ea54 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c | |||
@@ -37,8 +37,14 @@ struct crypto_rfc4106_ctx { | |||
37 | u8 nonce[4]; | 37 | u8 nonce[4]; |
38 | }; | 38 | }; |
39 | 39 | ||
40 | struct crypto_rfc4543_instance_ctx { | ||
41 | struct crypto_aead_spawn aead; | ||
42 | struct crypto_skcipher_spawn null; | ||
43 | }; | ||
44 | |||
40 | struct crypto_rfc4543_ctx { | 45 | struct crypto_rfc4543_ctx { |
41 | struct crypto_aead *child; | 46 | struct crypto_aead *child; |
47 | struct crypto_blkcipher *null; | ||
42 | u8 nonce[4]; | 48 | u8 nonce[4]; |
43 | }; | 49 | }; |
44 | 50 | ||
@@ -1094,21 +1100,36 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent, | |||
1094 | return crypto_aead_setauthsize(ctx->child, authsize); | 1100 | return crypto_aead_setauthsize(ctx->child, authsize); |
1095 | } | 1101 | } |
1096 | 1102 | ||
1103 | static void crypto_rfc4543_done(struct crypto_async_request *areq, int err) | ||
1104 | { | ||
1105 | struct aead_request *req = areq->data; | ||
1106 | struct crypto_aead *aead = crypto_aead_reqtfm(req); | ||
1107 | struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); | ||
1108 | |||
1109 | if (!err) { | ||
1110 | scatterwalk_map_and_copy(rctx->auth_tag, req->dst, | ||
1111 | req->cryptlen, | ||
1112 | crypto_aead_authsize(aead), 1); | ||
1113 | } | ||
1114 | |||
1115 | aead_request_complete(req, err); | ||
1116 | } | ||
1117 | |||
1097 | static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, | 1118 | static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, |
1098 | int enc) | 1119 | bool enc) |
1099 | { | 1120 | { |
1100 | struct crypto_aead *aead = crypto_aead_reqtfm(req); | 1121 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
1101 | struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead); | 1122 | struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead); |
1102 | struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); | 1123 | struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req); |
1103 | struct aead_request *subreq = &rctx->subreq; | 1124 | struct aead_request *subreq = &rctx->subreq; |
1104 | struct scatterlist *dst = req->dst; | 1125 | struct scatterlist *src = req->src; |
1105 | struct scatterlist *cipher = rctx->cipher; | 1126 | struct scatterlist *cipher = rctx->cipher; |
1106 | struct scatterlist *payload = rctx->payload; | 1127 | struct scatterlist *payload = rctx->payload; |
1107 | struct scatterlist *assoc = rctx->assoc; | 1128 | struct scatterlist *assoc = rctx->assoc; |
1108 | unsigned int authsize = crypto_aead_authsize(aead); | 1129 | unsigned int authsize = crypto_aead_authsize(aead); |
1109 | unsigned int assoclen = req->assoclen; | 1130 | unsigned int assoclen = req->assoclen; |
1110 | struct page *dstp; | 1131 | struct page *srcp; |
1111 | u8 *vdst; | 1132 | u8 *vsrc; |
1112 | u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child), | 1133 | u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child), |
1113 | crypto_aead_alignmask(ctx->child) + 1); | 1134 | crypto_aead_alignmask(ctx->child) + 1); |
1114 | 1135 | ||
@@ -1119,19 +1140,19 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, | |||
1119 | if (enc) | 1140 | if (enc) |
1120 | memset(rctx->auth_tag, 0, authsize); | 1141 | memset(rctx->auth_tag, 0, authsize); |
1121 | else | 1142 | else |
1122 | scatterwalk_map_and_copy(rctx->auth_tag, dst, | 1143 | scatterwalk_map_and_copy(rctx->auth_tag, src, |
1123 | req->cryptlen - authsize, | 1144 | req->cryptlen - authsize, |
1124 | authsize, 0); | 1145 | authsize, 0); |
1125 | 1146 | ||
1126 | sg_init_one(cipher, rctx->auth_tag, authsize); | 1147 | sg_init_one(cipher, rctx->auth_tag, authsize); |
1127 | 1148 | ||
1128 | /* construct the aad */ | 1149 | /* construct the aad */ |
1129 | dstp = sg_page(dst); | 1150 | srcp = sg_page(src); |
1130 | vdst = PageHighMem(dstp) ? NULL : page_address(dstp) + dst->offset; | 1151 | vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + src->offset; |
1131 | 1152 | ||
1132 | sg_init_table(payload, 2); | 1153 | sg_init_table(payload, 2); |
1133 | sg_set_buf(payload, req->iv, 8); | 1154 | sg_set_buf(payload, req->iv, 8); |
1134 | scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); | 1155 | scatterwalk_crypto_chain(payload, src, vsrc == req->iv + 8, 2); |
1135 | assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); | 1156 | assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); |
1136 | 1157 | ||
1137 | if (req->assoc->length == req->assoclen) { | 1158 | if (req->assoc->length == req->assoclen) { |
@@ -1150,14 +1171,27 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, | |||
1150 | scatterwalk_crypto_chain(assoc, payload, 0, 2); | 1171 | scatterwalk_crypto_chain(assoc, payload, 0, 2); |
1151 | 1172 | ||
1152 | aead_request_set_tfm(subreq, ctx->child); | 1173 | aead_request_set_tfm(subreq, ctx->child); |
1153 | aead_request_set_callback(subreq, req->base.flags, req->base.complete, | 1174 | aead_request_set_callback(subreq, req->base.flags, crypto_rfc4543_done, |
1154 | req->base.data); | 1175 | req); |
1155 | aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv); | 1176 | aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv); |
1156 | aead_request_set_assoc(subreq, assoc, assoclen); | 1177 | aead_request_set_assoc(subreq, assoc, assoclen); |
1157 | 1178 | ||
1158 | return subreq; | 1179 | return subreq; |
1159 | } | 1180 | } |
1160 | 1181 | ||
1182 | static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc) | ||
1183 | { | ||
1184 | struct crypto_aead *aead = crypto_aead_reqtfm(req); | ||
1185 | struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead); | ||
1186 | unsigned int authsize = crypto_aead_authsize(aead); | ||
1187 | unsigned int nbytes = req->cryptlen - (enc ? 0 : authsize); | ||
1188 | struct blkcipher_desc desc = { | ||
1189 | .tfm = ctx->null, | ||
1190 | }; | ||
1191 | |||
1192 | return crypto_blkcipher_encrypt(&desc, req->dst, req->src, nbytes); | ||
1193 | } | ||
1194 | |||
1161 | static int crypto_rfc4543_encrypt(struct aead_request *req) | 1195 | static int crypto_rfc4543_encrypt(struct aead_request *req) |
1162 | { | 1196 | { |
1163 | struct crypto_aead *aead = crypto_aead_reqtfm(req); | 1197 | struct crypto_aead *aead = crypto_aead_reqtfm(req); |
@@ -1165,7 +1199,13 @@ static int crypto_rfc4543_encrypt(struct aead_request *req) | |||
1165 | struct aead_request *subreq; | 1199 | struct aead_request *subreq; |
1166 | int err; | 1200 | int err; |
1167 | 1201 | ||
1168 | subreq = crypto_rfc4543_crypt(req, 1); | 1202 | if (req->src != req->dst) { |
1203 | err = crypto_rfc4543_copy_src_to_dst(req, true); | ||
1204 | if (err) | ||
1205 | return err; | ||
1206 | } | ||
1207 | |||
1208 | subreq = crypto_rfc4543_crypt(req, true); | ||
1169 | err = crypto_aead_encrypt(subreq); | 1209 | err = crypto_aead_encrypt(subreq); |
1170 | if (err) | 1210 | if (err) |
1171 | return err; | 1211 | return err; |
@@ -1178,7 +1218,15 @@ static int crypto_rfc4543_encrypt(struct aead_request *req) | |||
1178 | 1218 | ||
1179 | static int crypto_rfc4543_decrypt(struct aead_request *req) | 1219 | static int crypto_rfc4543_decrypt(struct aead_request *req) |
1180 | { | 1220 | { |
1181 | req = crypto_rfc4543_crypt(req, 0); | 1221 | int err; |
1222 | |||
1223 | if (req->src != req->dst) { | ||
1224 | err = crypto_rfc4543_copy_src_to_dst(req, false); | ||
1225 | if (err) | ||
1226 | return err; | ||
1227 | } | ||
1228 | |||
1229 | req = crypto_rfc4543_crypt(req, false); | ||
1182 | 1230 | ||
1183 | return crypto_aead_decrypt(req); | 1231 | return crypto_aead_decrypt(req); |
1184 | } | 1232 | } |
@@ -1186,16 +1234,25 @@ static int crypto_rfc4543_decrypt(struct aead_request *req) | |||
1186 | static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) | 1234 | static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) |
1187 | { | 1235 | { |
1188 | struct crypto_instance *inst = (void *)tfm->__crt_alg; | 1236 | struct crypto_instance *inst = (void *)tfm->__crt_alg; |
1189 | struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst); | 1237 | struct crypto_rfc4543_instance_ctx *ictx = crypto_instance_ctx(inst); |
1238 | struct crypto_aead_spawn *spawn = &ictx->aead; | ||
1190 | struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); | 1239 | struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); |
1191 | struct crypto_aead *aead; | 1240 | struct crypto_aead *aead; |
1241 | struct crypto_blkcipher *null; | ||
1192 | unsigned long align; | 1242 | unsigned long align; |
1243 | int err = 0; | ||
1193 | 1244 | ||
1194 | aead = crypto_spawn_aead(spawn); | 1245 | aead = crypto_spawn_aead(spawn); |
1195 | if (IS_ERR(aead)) | 1246 | if (IS_ERR(aead)) |
1196 | return PTR_ERR(aead); | 1247 | return PTR_ERR(aead); |
1197 | 1248 | ||
1249 | null = crypto_spawn_blkcipher(&ictx->null.base); | ||
1250 | err = PTR_ERR(null); | ||
1251 | if (IS_ERR(null)) | ||
1252 | goto err_free_aead; | ||
1253 | |||
1198 | ctx->child = aead; | 1254 | ctx->child = aead; |
1255 | ctx->null = null; | ||
1199 | 1256 | ||
1200 | align = crypto_aead_alignmask(aead); | 1257 | align = crypto_aead_alignmask(aead); |
1201 | align &= ~(crypto_tfm_ctx_alignment() - 1); | 1258 | align &= ~(crypto_tfm_ctx_alignment() - 1); |
@@ -1205,6 +1262,10 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) | |||
1205 | align + 16; | 1262 | align + 16; |
1206 | 1263 | ||
1207 | return 0; | 1264 | return 0; |
1265 | |||
1266 | err_free_aead: | ||
1267 | crypto_free_aead(aead); | ||
1268 | return err; | ||
1208 | } | 1269 | } |
1209 | 1270 | ||
1210 | static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) | 1271 | static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) |
@@ -1212,6 +1273,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) | |||
1212 | struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); | 1273 | struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); |
1213 | 1274 | ||
1214 | crypto_free_aead(ctx->child); | 1275 | crypto_free_aead(ctx->child); |
1276 | crypto_free_blkcipher(ctx->null); | ||
1215 | } | 1277 | } |
1216 | 1278 | ||
1217 | static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | 1279 | static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) |
@@ -1220,6 +1282,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | |||
1220 | struct crypto_instance *inst; | 1282 | struct crypto_instance *inst; |
1221 | struct crypto_aead_spawn *spawn; | 1283 | struct crypto_aead_spawn *spawn; |
1222 | struct crypto_alg *alg; | 1284 | struct crypto_alg *alg; |
1285 | struct crypto_rfc4543_instance_ctx *ctx; | ||
1223 | const char *ccm_name; | 1286 | const char *ccm_name; |
1224 | int err; | 1287 | int err; |
1225 | 1288 | ||
@@ -1234,11 +1297,12 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | |||
1234 | if (IS_ERR(ccm_name)) | 1297 | if (IS_ERR(ccm_name)) |
1235 | return ERR_CAST(ccm_name); | 1298 | return ERR_CAST(ccm_name); |
1236 | 1299 | ||
1237 | inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); | 1300 | inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); |
1238 | if (!inst) | 1301 | if (!inst) |
1239 | return ERR_PTR(-ENOMEM); | 1302 | return ERR_PTR(-ENOMEM); |
1240 | 1303 | ||
1241 | spawn = crypto_instance_ctx(inst); | 1304 | ctx = crypto_instance_ctx(inst); |
1305 | spawn = &ctx->aead; | ||
1242 | crypto_set_aead_spawn(spawn, inst); | 1306 | crypto_set_aead_spawn(spawn, inst); |
1243 | err = crypto_grab_aead(spawn, ccm_name, 0, | 1307 | err = crypto_grab_aead(spawn, ccm_name, 0, |
1244 | crypto_requires_sync(algt->type, algt->mask)); | 1308 | crypto_requires_sync(algt->type, algt->mask)); |
@@ -1247,15 +1311,23 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | |||
1247 | 1311 | ||
1248 | alg = crypto_aead_spawn_alg(spawn); | 1312 | alg = crypto_aead_spawn_alg(spawn); |
1249 | 1313 | ||
1314 | crypto_set_skcipher_spawn(&ctx->null, inst); | ||
1315 | err = crypto_grab_skcipher(&ctx->null, "ecb(cipher_null)", 0, | ||
1316 | CRYPTO_ALG_ASYNC); | ||
1317 | if (err) | ||
1318 | goto out_drop_alg; | ||
1319 | |||
1320 | crypto_skcipher_spawn_alg(&ctx->null); | ||
1321 | |||
1250 | err = -EINVAL; | 1322 | err = -EINVAL; |
1251 | 1323 | ||
1252 | /* We only support 16-byte blocks. */ | 1324 | /* We only support 16-byte blocks. */ |
1253 | if (alg->cra_aead.ivsize != 16) | 1325 | if (alg->cra_aead.ivsize != 16) |
1254 | goto out_drop_alg; | 1326 | goto out_drop_ecbnull; |
1255 | 1327 | ||
1256 | /* Not a stream cipher? */ | 1328 | /* Not a stream cipher? */ |
1257 | if (alg->cra_blocksize != 1) | 1329 | if (alg->cra_blocksize != 1) |
1258 | goto out_drop_alg; | 1330 | goto out_drop_ecbnull; |
1259 | 1331 | ||
1260 | err = -ENAMETOOLONG; | 1332 | err = -ENAMETOOLONG; |
1261 | if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, | 1333 | if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, |
@@ -1263,7 +1335,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | |||
1263 | snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, | 1335 | snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, |
1264 | "rfc4543(%s)", alg->cra_driver_name) >= | 1336 | "rfc4543(%s)", alg->cra_driver_name) >= |
1265 | CRYPTO_MAX_ALG_NAME) | 1337 | CRYPTO_MAX_ALG_NAME) |
1266 | goto out_drop_alg; | 1338 | goto out_drop_ecbnull; |
1267 | 1339 | ||
1268 | inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD; | 1340 | inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD; |
1269 | inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; | 1341 | inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; |
@@ -1290,6 +1362,8 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) | |||
1290 | out: | 1362 | out: |
1291 | return inst; | 1363 | return inst; |
1292 | 1364 | ||
1365 | out_drop_ecbnull: | ||
1366 | crypto_drop_skcipher(&ctx->null); | ||
1293 | out_drop_alg: | 1367 | out_drop_alg: |
1294 | crypto_drop_aead(spawn); | 1368 | crypto_drop_aead(spawn); |
1295 | out_free_inst: | 1369 | out_free_inst: |
@@ -1300,7 +1374,11 @@ out_free_inst: | |||
1300 | 1374 | ||
1301 | static void crypto_rfc4543_free(struct crypto_instance *inst) | 1375 | static void crypto_rfc4543_free(struct crypto_instance *inst) |
1302 | { | 1376 | { |
1303 | crypto_drop_spawn(crypto_instance_ctx(inst)); | 1377 | struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst); |
1378 | |||
1379 | crypto_drop_aead(&ctx->aead); | ||
1380 | crypto_drop_skcipher(&ctx->null); | ||
1381 | |||
1304 | kfree(inst); | 1382 | kfree(inst); |
1305 | } | 1383 | } |
1306 | 1384 | ||
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index c3ed4ec924e1..543366779524 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c | |||
@@ -246,7 +246,7 @@ static int sha256_init(struct shash_desc *desc) | |||
246 | return 0; | 246 | return 0; |
247 | } | 247 | } |
248 | 248 | ||
249 | static int sha256_update(struct shash_desc *desc, const u8 *data, | 249 | int crypto_sha256_update(struct shash_desc *desc, const u8 *data, |
250 | unsigned int len) | 250 | unsigned int len) |
251 | { | 251 | { |
252 | struct sha256_state *sctx = shash_desc_ctx(desc); | 252 | struct sha256_state *sctx = shash_desc_ctx(desc); |
@@ -277,6 +277,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, | |||
277 | 277 | ||
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
280 | EXPORT_SYMBOL(crypto_sha256_update); | ||
280 | 281 | ||
281 | static int sha256_final(struct shash_desc *desc, u8 *out) | 282 | static int sha256_final(struct shash_desc *desc, u8 *out) |
282 | { | 283 | { |
@@ -293,10 +294,10 @@ static int sha256_final(struct shash_desc *desc, u8 *out) | |||
293 | /* Pad out to 56 mod 64. */ | 294 | /* Pad out to 56 mod 64. */ |
294 | index = sctx->count & 0x3f; | 295 | index = sctx->count & 0x3f; |
295 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | 296 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); |
296 | sha256_update(desc, padding, pad_len); | 297 | crypto_sha256_update(desc, padding, pad_len); |
297 | 298 | ||
298 | /* Append length (before padding) */ | 299 | /* Append length (before padding) */ |
299 | sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | 300 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); |
300 | 301 | ||
301 | /* Store state in digest */ | 302 | /* Store state in digest */ |
302 | for (i = 0; i < 8; i++) | 303 | for (i = 0; i < 8; i++) |
@@ -339,7 +340,7 @@ static int sha256_import(struct shash_desc *desc, const void *in) | |||
339 | static struct shash_alg sha256_algs[2] = { { | 340 | static struct shash_alg sha256_algs[2] = { { |
340 | .digestsize = SHA256_DIGEST_SIZE, | 341 | .digestsize = SHA256_DIGEST_SIZE, |
341 | .init = sha256_init, | 342 | .init = sha256_init, |
342 | .update = sha256_update, | 343 | .update = crypto_sha256_update, |
343 | .final = sha256_final, | 344 | .final = sha256_final, |
344 | .export = sha256_export, | 345 | .export = sha256_export, |
345 | .import = sha256_import, | 346 | .import = sha256_import, |
@@ -355,7 +356,7 @@ static struct shash_alg sha256_algs[2] = { { | |||
355 | }, { | 356 | }, { |
356 | .digestsize = SHA224_DIGEST_SIZE, | 357 | .digestsize = SHA224_DIGEST_SIZE, |
357 | .init = sha224_init, | 358 | .init = sha224_init, |
358 | .update = sha256_update, | 359 | .update = crypto_sha256_update, |
359 | .final = sha224_final, | 360 | .final = sha224_final, |
360 | .descsize = sizeof(struct sha256_state), | 361 | .descsize = sizeof(struct sha256_state), |
361 | .base = { | 362 | .base = { |
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 71fcf361102d..4c5862095679 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c | |||
@@ -163,8 +163,8 @@ sha384_init(struct shash_desc *desc) | |||
163 | return 0; | 163 | return 0; |
164 | } | 164 | } |
165 | 165 | ||
166 | static int | 166 | int crypto_sha512_update(struct shash_desc *desc, const u8 *data, |
167 | sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) | 167 | unsigned int len) |
168 | { | 168 | { |
169 | struct sha512_state *sctx = shash_desc_ctx(desc); | 169 | struct sha512_state *sctx = shash_desc_ctx(desc); |
170 | 170 | ||
@@ -197,6 +197,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len) | |||
197 | 197 | ||
198 | return 0; | 198 | return 0; |
199 | } | 199 | } |
200 | EXPORT_SYMBOL(crypto_sha512_update); | ||
200 | 201 | ||
201 | static int | 202 | static int |
202 | sha512_final(struct shash_desc *desc, u8 *hash) | 203 | sha512_final(struct shash_desc *desc, u8 *hash) |
@@ -215,10 +216,10 @@ sha512_final(struct shash_desc *desc, u8 *hash) | |||
215 | /* Pad out to 112 mod 128. */ | 216 | /* Pad out to 112 mod 128. */ |
216 | index = sctx->count[0] & 0x7f; | 217 | index = sctx->count[0] & 0x7f; |
217 | pad_len = (index < 112) ? (112 - index) : ((128+112) - index); | 218 | pad_len = (index < 112) ? (112 - index) : ((128+112) - index); |
218 | sha512_update(desc, padding, pad_len); | 219 | crypto_sha512_update(desc, padding, pad_len); |
219 | 220 | ||
220 | /* Append length (before padding) */ | 221 | /* Append length (before padding) */ |
221 | sha512_update(desc, (const u8 *)bits, sizeof(bits)); | 222 | crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits)); |
222 | 223 | ||
223 | /* Store state in digest */ | 224 | /* Store state in digest */ |
224 | for (i = 0; i < 8; i++) | 225 | for (i = 0; i < 8; i++) |
@@ -245,7 +246,7 @@ static int sha384_final(struct shash_desc *desc, u8 *hash) | |||
245 | static struct shash_alg sha512_algs[2] = { { | 246 | static struct shash_alg sha512_algs[2] = { { |
246 | .digestsize = SHA512_DIGEST_SIZE, | 247 | .digestsize = SHA512_DIGEST_SIZE, |
247 | .init = sha512_init, | 248 | .init = sha512_init, |
248 | .update = sha512_update, | 249 | .update = crypto_sha512_update, |
249 | .final = sha512_final, | 250 | .final = sha512_final, |
250 | .descsize = sizeof(struct sha512_state), | 251 | .descsize = sizeof(struct sha512_state), |
251 | .base = { | 252 | .base = { |
@@ -257,7 +258,7 @@ static struct shash_alg sha512_algs[2] = { { | |||
257 | }, { | 258 | }, { |
258 | .digestsize = SHA384_DIGEST_SIZE, | 259 | .digestsize = SHA384_DIGEST_SIZE, |
259 | .init = sha384_init, | 260 | .init = sha384_init, |
260 | .update = sha512_update, | 261 | .update = crypto_sha512_update, |
261 | .final = sha384_final, | 262 | .final = sha384_final, |
262 | .descsize = sizeof(struct sha512_state), | 263 | .descsize = sizeof(struct sha512_state), |
263 | .base = { | 264 | .base = { |
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 87ef7d66bc20..66d254ce0d11 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c | |||
@@ -1095,7 +1095,6 @@ static int do_test(int m) | |||
1095 | break; | 1095 | break; |
1096 | 1096 | ||
1097 | case 28: | 1097 | case 28: |
1098 | |||
1099 | ret += tcrypt_test("tgr160"); | 1098 | ret += tcrypt_test("tgr160"); |
1100 | break; | 1099 | break; |
1101 | 1100 | ||
@@ -1118,6 +1117,7 @@ static int do_test(int m) | |||
1118 | ret += tcrypt_test("lrw(camellia)"); | 1117 | ret += tcrypt_test("lrw(camellia)"); |
1119 | ret += tcrypt_test("xts(camellia)"); | 1118 | ret += tcrypt_test("xts(camellia)"); |
1120 | break; | 1119 | break; |
1120 | |||
1121 | case 33: | 1121 | case 33: |
1122 | ret += tcrypt_test("sha224"); | 1122 | ret += tcrypt_test("sha224"); |
1123 | break; | 1123 | break; |
@@ -1213,6 +1213,7 @@ static int do_test(int m) | |||
1213 | case 109: | 1213 | case 109: |
1214 | ret += tcrypt_test("vmac(aes)"); | 1214 | ret += tcrypt_test("vmac(aes)"); |
1215 | break; | 1215 | break; |
1216 | |||
1216 | case 110: | 1217 | case 110: |
1217 | ret += tcrypt_test("hmac(crc32)"); | 1218 | ret += tcrypt_test("hmac(crc32)"); |
1218 | break; | 1219 | break; |
@@ -1225,6 +1226,18 @@ static int do_test(int m) | |||
1225 | ret += tcrypt_test("rfc4106(gcm(aes))"); | 1226 | ret += tcrypt_test("rfc4106(gcm(aes))"); |
1226 | break; | 1227 | break; |
1227 | 1228 | ||
1229 | case 152: | ||
1230 | ret += tcrypt_test("rfc4543(gcm(aes))"); | ||
1231 | break; | ||
1232 | |||
1233 | case 153: | ||
1234 | ret += tcrypt_test("cmac(aes)"); | ||
1235 | break; | ||
1236 | |||
1237 | case 154: | ||
1238 | ret += tcrypt_test("cmac(des3_ede)"); | ||
1239 | break; | ||
1240 | |||
1228 | case 200: | 1241 | case 200: |
1229 | test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, | 1242 | test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, |
1230 | speed_template_16_24_32); | 1243 | speed_template_16_24_32); |
@@ -1755,6 +1768,21 @@ static int do_test(int m) | |||
1755 | speed_template_32_64); | 1768 | speed_template_32_64); |
1756 | break; | 1769 | break; |
1757 | 1770 | ||
1771 | case 509: | ||
1772 | test_acipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0, | ||
1773 | speed_template_8_32); | ||
1774 | test_acipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0, | ||
1775 | speed_template_8_32); | ||
1776 | test_acipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0, | ||
1777 | speed_template_8_32); | ||
1778 | test_acipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0, | ||
1779 | speed_template_8_32); | ||
1780 | test_acipher_speed("ctr(blowfish)", ENCRYPT, sec, NULL, 0, | ||
1781 | speed_template_8_32); | ||
1782 | test_acipher_speed("ctr(blowfish)", DECRYPT, sec, NULL, 0, | ||
1783 | speed_template_8_32); | ||
1784 | break; | ||
1785 | |||
1758 | case 1000: | 1786 | case 1000: |
1759 | test_available(); | 1787 | test_available(); |
1760 | break; | 1788 | break; |
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index efd8b20e13dc..5823735cf381 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
@@ -1645,19 +1645,31 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1645 | .alg = "__cbc-serpent-avx", | 1645 | .alg = "__cbc-serpent-avx", |
1646 | .test = alg_test_null, | 1646 | .test = alg_test_null, |
1647 | }, { | 1647 | }, { |
1648 | .alg = "__cbc-serpent-avx2", | ||
1649 | .test = alg_test_null, | ||
1650 | }, { | ||
1648 | .alg = "__cbc-serpent-sse2", | 1651 | .alg = "__cbc-serpent-sse2", |
1649 | .test = alg_test_null, | 1652 | .test = alg_test_null, |
1650 | }, { | 1653 | }, { |
1651 | .alg = "__cbc-twofish-avx", | 1654 | .alg = "__cbc-twofish-avx", |
1652 | .test = alg_test_null, | 1655 | .test = alg_test_null, |
1653 | }, { | 1656 | }, { |
1657 | .alg = "__cbc-twofish-avx2", | ||
1658 | .test = alg_test_null, | ||
1659 | }, { | ||
1654 | .alg = "__driver-cbc-aes-aesni", | 1660 | .alg = "__driver-cbc-aes-aesni", |
1655 | .test = alg_test_null, | 1661 | .test = alg_test_null, |
1656 | .fips_allowed = 1, | 1662 | .fips_allowed = 1, |
1657 | }, { | 1663 | }, { |
1664 | .alg = "__driver-cbc-blowfish-avx2", | ||
1665 | .test = alg_test_null, | ||
1666 | }, { | ||
1658 | .alg = "__driver-cbc-camellia-aesni", | 1667 | .alg = "__driver-cbc-camellia-aesni", |
1659 | .test = alg_test_null, | 1668 | .test = alg_test_null, |
1660 | }, { | 1669 | }, { |
1670 | .alg = "__driver-cbc-camellia-aesni-avx2", | ||
1671 | .test = alg_test_null, | ||
1672 | }, { | ||
1661 | .alg = "__driver-cbc-cast5-avx", | 1673 | .alg = "__driver-cbc-cast5-avx", |
1662 | .test = alg_test_null, | 1674 | .test = alg_test_null, |
1663 | }, { | 1675 | }, { |
@@ -1667,19 +1679,31 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1667 | .alg = "__driver-cbc-serpent-avx", | 1679 | .alg = "__driver-cbc-serpent-avx", |
1668 | .test = alg_test_null, | 1680 | .test = alg_test_null, |
1669 | }, { | 1681 | }, { |
1682 | .alg = "__driver-cbc-serpent-avx2", | ||
1683 | .test = alg_test_null, | ||
1684 | }, { | ||
1670 | .alg = "__driver-cbc-serpent-sse2", | 1685 | .alg = "__driver-cbc-serpent-sse2", |
1671 | .test = alg_test_null, | 1686 | .test = alg_test_null, |
1672 | }, { | 1687 | }, { |
1673 | .alg = "__driver-cbc-twofish-avx", | 1688 | .alg = "__driver-cbc-twofish-avx", |
1674 | .test = alg_test_null, | 1689 | .test = alg_test_null, |
1675 | }, { | 1690 | }, { |
1691 | .alg = "__driver-cbc-twofish-avx2", | ||
1692 | .test = alg_test_null, | ||
1693 | }, { | ||
1676 | .alg = "__driver-ecb-aes-aesni", | 1694 | .alg = "__driver-ecb-aes-aesni", |
1677 | .test = alg_test_null, | 1695 | .test = alg_test_null, |
1678 | .fips_allowed = 1, | 1696 | .fips_allowed = 1, |
1679 | }, { | 1697 | }, { |
1698 | .alg = "__driver-ecb-blowfish-avx2", | ||
1699 | .test = alg_test_null, | ||
1700 | }, { | ||
1680 | .alg = "__driver-ecb-camellia-aesni", | 1701 | .alg = "__driver-ecb-camellia-aesni", |
1681 | .test = alg_test_null, | 1702 | .test = alg_test_null, |
1682 | }, { | 1703 | }, { |
1704 | .alg = "__driver-ecb-camellia-aesni-avx2", | ||
1705 | .test = alg_test_null, | ||
1706 | }, { | ||
1683 | .alg = "__driver-ecb-cast5-avx", | 1707 | .alg = "__driver-ecb-cast5-avx", |
1684 | .test = alg_test_null, | 1708 | .test = alg_test_null, |
1685 | }, { | 1709 | }, { |
@@ -1689,12 +1713,18 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1689 | .alg = "__driver-ecb-serpent-avx", | 1713 | .alg = "__driver-ecb-serpent-avx", |
1690 | .test = alg_test_null, | 1714 | .test = alg_test_null, |
1691 | }, { | 1715 | }, { |
1716 | .alg = "__driver-ecb-serpent-avx2", | ||
1717 | .test = alg_test_null, | ||
1718 | }, { | ||
1692 | .alg = "__driver-ecb-serpent-sse2", | 1719 | .alg = "__driver-ecb-serpent-sse2", |
1693 | .test = alg_test_null, | 1720 | .test = alg_test_null, |
1694 | }, { | 1721 | }, { |
1695 | .alg = "__driver-ecb-twofish-avx", | 1722 | .alg = "__driver-ecb-twofish-avx", |
1696 | .test = alg_test_null, | 1723 | .test = alg_test_null, |
1697 | }, { | 1724 | }, { |
1725 | .alg = "__driver-ecb-twofish-avx2", | ||
1726 | .test = alg_test_null, | ||
1727 | }, { | ||
1698 | .alg = "__ghash-pclmulqdqni", | 1728 | .alg = "__ghash-pclmulqdqni", |
1699 | .test = alg_test_null, | 1729 | .test = alg_test_null, |
1700 | .fips_allowed = 1, | 1730 | .fips_allowed = 1, |
@@ -1913,6 +1943,27 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1913 | } | 1943 | } |
1914 | } | 1944 | } |
1915 | }, { | 1945 | }, { |
1946 | .alg = "cmac(aes)", | ||
1947 | .test = alg_test_hash, | ||
1948 | .suite = { | ||
1949 | .hash = { | ||
1950 | .vecs = aes_cmac128_tv_template, | ||
1951 | .count = CMAC_AES_TEST_VECTORS | ||
1952 | } | ||
1953 | } | ||
1954 | }, { | ||
1955 | .alg = "cmac(des3_ede)", | ||
1956 | .test = alg_test_hash, | ||
1957 | .suite = { | ||
1958 | .hash = { | ||
1959 | .vecs = des3_ede_cmac64_tv_template, | ||
1960 | .count = CMAC_DES3_EDE_TEST_VECTORS | ||
1961 | } | ||
1962 | } | ||
1963 | }, { | ||
1964 | .alg = "compress_null", | ||
1965 | .test = alg_test_null, | ||
1966 | }, { | ||
1916 | .alg = "crc32c", | 1967 | .alg = "crc32c", |
1917 | .test = alg_test_crc32c, | 1968 | .test = alg_test_crc32c, |
1918 | .fips_allowed = 1, | 1969 | .fips_allowed = 1, |
@@ -1927,16 +1978,31 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1927 | .test = alg_test_null, | 1978 | .test = alg_test_null, |
1928 | .fips_allowed = 1, | 1979 | .fips_allowed = 1, |
1929 | }, { | 1980 | }, { |
1981 | .alg = "cryptd(__driver-cbc-blowfish-avx2)", | ||
1982 | .test = alg_test_null, | ||
1983 | }, { | ||
1930 | .alg = "cryptd(__driver-cbc-camellia-aesni)", | 1984 | .alg = "cryptd(__driver-cbc-camellia-aesni)", |
1931 | .test = alg_test_null, | 1985 | .test = alg_test_null, |
1932 | }, { | 1986 | }, { |
1987 | .alg = "cryptd(__driver-cbc-camellia-aesni-avx2)", | ||
1988 | .test = alg_test_null, | ||
1989 | }, { | ||
1990 | .alg = "cryptd(__driver-cbc-serpent-avx2)", | ||
1991 | .test = alg_test_null, | ||
1992 | }, { | ||
1933 | .alg = "cryptd(__driver-ecb-aes-aesni)", | 1993 | .alg = "cryptd(__driver-ecb-aes-aesni)", |
1934 | .test = alg_test_null, | 1994 | .test = alg_test_null, |
1935 | .fips_allowed = 1, | 1995 | .fips_allowed = 1, |
1936 | }, { | 1996 | }, { |
1997 | .alg = "cryptd(__driver-ecb-blowfish-avx2)", | ||
1998 | .test = alg_test_null, | ||
1999 | }, { | ||
1937 | .alg = "cryptd(__driver-ecb-camellia-aesni)", | 2000 | .alg = "cryptd(__driver-ecb-camellia-aesni)", |
1938 | .test = alg_test_null, | 2001 | .test = alg_test_null, |
1939 | }, { | 2002 | }, { |
2003 | .alg = "cryptd(__driver-ecb-camellia-aesni-avx2)", | ||
2004 | .test = alg_test_null, | ||
2005 | }, { | ||
1940 | .alg = "cryptd(__driver-ecb-cast5-avx)", | 2006 | .alg = "cryptd(__driver-ecb-cast5-avx)", |
1941 | .test = alg_test_null, | 2007 | .test = alg_test_null, |
1942 | }, { | 2008 | }, { |
@@ -1946,12 +2012,18 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1946 | .alg = "cryptd(__driver-ecb-serpent-avx)", | 2012 | .alg = "cryptd(__driver-ecb-serpent-avx)", |
1947 | .test = alg_test_null, | 2013 | .test = alg_test_null, |
1948 | }, { | 2014 | }, { |
2015 | .alg = "cryptd(__driver-ecb-serpent-avx2)", | ||
2016 | .test = alg_test_null, | ||
2017 | }, { | ||
1949 | .alg = "cryptd(__driver-ecb-serpent-sse2)", | 2018 | .alg = "cryptd(__driver-ecb-serpent-sse2)", |
1950 | .test = alg_test_null, | 2019 | .test = alg_test_null, |
1951 | }, { | 2020 | }, { |
1952 | .alg = "cryptd(__driver-ecb-twofish-avx)", | 2021 | .alg = "cryptd(__driver-ecb-twofish-avx)", |
1953 | .test = alg_test_null, | 2022 | .test = alg_test_null, |
1954 | }, { | 2023 | }, { |
2024 | .alg = "cryptd(__driver-ecb-twofish-avx2)", | ||
2025 | .test = alg_test_null, | ||
2026 | }, { | ||
1955 | .alg = "cryptd(__driver-gcm-aes-aesni)", | 2027 | .alg = "cryptd(__driver-gcm-aes-aesni)", |
1956 | .test = alg_test_null, | 2028 | .test = alg_test_null, |
1957 | .fips_allowed = 1, | 2029 | .fips_allowed = 1, |
@@ -2127,6 +2199,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
2127 | } | 2199 | } |
2128 | } | 2200 | } |
2129 | }, { | 2201 | }, { |
2202 | .alg = "digest_null", | ||
2203 | .test = alg_test_null, | ||
2204 | }, { | ||
2130 | .alg = "ecb(__aes-aesni)", | 2205 | .alg = "ecb(__aes-aesni)", |
2131 | .test = alg_test_null, | 2206 | .test = alg_test_null, |
2132 | .fips_allowed = 1, | 2207 | .fips_allowed = 1, |
@@ -2237,6 +2312,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
2237 | } | 2312 | } |
2238 | } | 2313 | } |
2239 | }, { | 2314 | }, { |
2315 | .alg = "ecb(cipher_null)", | ||
2316 | .test = alg_test_null, | ||
2317 | }, { | ||
2240 | .alg = "ecb(des)", | 2318 | .alg = "ecb(des)", |
2241 | .test = alg_test_skcipher, | 2319 | .test = alg_test_skcipher, |
2242 | .fips_allowed = 1, | 2320 | .fips_allowed = 1, |
@@ -2696,8 +2774,6 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
2696 | } | 2774 | } |
2697 | } | 2775 | } |
2698 | }, { | 2776 | }, { |
2699 | |||
2700 | |||
2701 | .alg = "rfc4309(ccm(aes))", | 2777 | .alg = "rfc4309(ccm(aes))", |
2702 | .test = alg_test_aead, | 2778 | .test = alg_test_aead, |
2703 | .fips_allowed = 1, | 2779 | .fips_allowed = 1, |
@@ -2714,6 +2790,21 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
2714 | } | 2790 | } |
2715 | } | 2791 | } |
2716 | }, { | 2792 | }, { |
2793 | .alg = "rfc4543(gcm(aes))", | ||
2794 | .test = alg_test_aead, | ||
2795 | .suite = { | ||
2796 | .aead = { | ||
2797 | .enc = { | ||
2798 | .vecs = aes_gcm_rfc4543_enc_tv_template, | ||
2799 | .count = AES_GCM_4543_ENC_TEST_VECTORS | ||
2800 | }, | ||
2801 | .dec = { | ||
2802 | .vecs = aes_gcm_rfc4543_dec_tv_template, | ||
2803 | .count = AES_GCM_4543_DEC_TEST_VECTORS | ||
2804 | }, | ||
2805 | } | ||
2806 | } | ||
2807 | }, { | ||
2717 | .alg = "rmd128", | 2808 | .alg = "rmd128", |
2718 | .test = alg_test_hash, | 2809 | .test = alg_test_hash, |
2719 | .suite = { | 2810 | .suite = { |
diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 3db1b7591559..1e701bc075b9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h | |||
@@ -1639,6 +1639,131 @@ static struct hash_testvec hmac_sha256_tv_template[] = { | |||
1639 | }, | 1639 | }, |
1640 | }; | 1640 | }; |
1641 | 1641 | ||
1642 | #define CMAC_AES_TEST_VECTORS 6 | ||
1643 | |||
1644 | static struct hash_testvec aes_cmac128_tv_template[] = { | ||
1645 | { /* From NIST Special Publication 800-38B, AES-128 */ | ||
1646 | .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" | ||
1647 | "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", | ||
1648 | .plaintext = zeroed_string, | ||
1649 | .digest = "\xbb\x1d\x69\x29\xe9\x59\x37\x28" | ||
1650 | "\x7f\xa3\x7d\x12\x9b\x75\x67\x46", | ||
1651 | .psize = 0, | ||
1652 | .ksize = 16, | ||
1653 | }, { | ||
1654 | .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" | ||
1655 | "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", | ||
1656 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1657 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a", | ||
1658 | .digest = "\x07\x0a\x16\xb4\x6b\x4d\x41\x44" | ||
1659 | "\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c", | ||
1660 | .psize = 16, | ||
1661 | .ksize = 16, | ||
1662 | }, { | ||
1663 | .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" | ||
1664 | "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", | ||
1665 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1666 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" | ||
1667 | "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" | ||
1668 | "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" | ||
1669 | "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11", | ||
1670 | .digest = "\xdf\xa6\x67\x47\xde\x9a\xe6\x30" | ||
1671 | "\x30\xca\x32\x61\x14\x97\xc8\x27", | ||
1672 | .psize = 40, | ||
1673 | .ksize = 16, | ||
1674 | }, { | ||
1675 | .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6" | ||
1676 | "\xab\xf7\x15\x88\x09\xcf\x4f\x3c", | ||
1677 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1678 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" | ||
1679 | "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" | ||
1680 | "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" | ||
1681 | "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" | ||
1682 | "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" | ||
1683 | "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" | ||
1684 | "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", | ||
1685 | .digest = "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92" | ||
1686 | "\xfc\x49\x74\x17\x79\x36\x3c\xfe", | ||
1687 | .psize = 64, | ||
1688 | .ksize = 16, | ||
1689 | }, { /* From NIST Special Publication 800-38B, AES-256 */ | ||
1690 | .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" | ||
1691 | "\x2b\x73\xae\xf0\x85\x7d\x77\x81" | ||
1692 | "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" | ||
1693 | "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", | ||
1694 | .plaintext = zeroed_string, | ||
1695 | .digest = "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e" | ||
1696 | "\xfc\x6b\x55\x1f\x46\x67\xd9\x83", | ||
1697 | .psize = 0, | ||
1698 | .ksize = 32, | ||
1699 | }, { | ||
1700 | .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe" | ||
1701 | "\x2b\x73\xae\xf0\x85\x7d\x77\x81" | ||
1702 | "\x1f\x35\x2c\x07\x3b\x61\x08\xd7" | ||
1703 | "\x2d\x98\x10\xa3\x09\x14\xdf\xf4", | ||
1704 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1705 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" | ||
1706 | "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" | ||
1707 | "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" | ||
1708 | "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11" | ||
1709 | "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" | ||
1710 | "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17" | ||
1711 | "\xad\x2b\x41\x7b\xe6\x6c\x37\x10", | ||
1712 | .digest = "\xe1\x99\x21\x90\x54\x9f\x6e\xd5" | ||
1713 | "\x69\x6a\x2c\x05\x6c\x31\x54\x10", | ||
1714 | .psize = 64, | ||
1715 | .ksize = 32, | ||
1716 | } | ||
1717 | }; | ||
1718 | |||
1719 | #define CMAC_DES3_EDE_TEST_VECTORS 4 | ||
1720 | |||
1721 | static struct hash_testvec des3_ede_cmac64_tv_template[] = { | ||
1722 | /* | ||
1723 | * From NIST Special Publication 800-38B, Three Key TDEA | ||
1724 | * Corrected test vectors from: | ||
1725 | * http://csrc.nist.gov/publications/nistpubs/800-38B/Updated_CMAC_Examples.pdf | ||
1726 | */ | ||
1727 | { | ||
1728 | .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62" | ||
1729 | "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" | ||
1730 | "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", | ||
1731 | .plaintext = zeroed_string, | ||
1732 | .digest = "\xb7\xa6\x88\xe1\x22\xff\xaf\x95", | ||
1733 | .psize = 0, | ||
1734 | .ksize = 24, | ||
1735 | }, { | ||
1736 | .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62" | ||
1737 | "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" | ||
1738 | "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", | ||
1739 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96", | ||
1740 | .digest = "\x8e\x8f\x29\x31\x36\x28\x37\x97", | ||
1741 | .psize = 8, | ||
1742 | .ksize = 24, | ||
1743 | }, { | ||
1744 | .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62" | ||
1745 | "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" | ||
1746 | "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", | ||
1747 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1748 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" | ||
1749 | "\xae\x2d\x8a\x57", | ||
1750 | .digest = "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed", | ||
1751 | .psize = 20, | ||
1752 | .ksize = 24, | ||
1753 | }, { | ||
1754 | .key = "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62" | ||
1755 | "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" | ||
1756 | "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", | ||
1757 | .plaintext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96" | ||
1758 | "\xe9\x3d\x7e\x11\x73\x93\x17\x2a" | ||
1759 | "\xae\x2d\x8a\x57\x1e\x03\xac\x9c" | ||
1760 | "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51", | ||
1761 | .digest = "\x33\xe6\xb1\x09\x24\x00\xea\xe5", | ||
1762 | .psize = 32, | ||
1763 | .ksize = 24, | ||
1764 | } | ||
1765 | }; | ||
1766 | |||
1642 | #define XCBC_AES_TEST_VECTORS 6 | 1767 | #define XCBC_AES_TEST_VECTORS 6 |
1643 | 1768 | ||
1644 | static struct hash_testvec aes_xcbc128_tv_template[] = { | 1769 | static struct hash_testvec aes_xcbc128_tv_template[] = { |
@@ -12680,6 +12805,8 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { | |||
12680 | #define AES_GCM_DEC_TEST_VECTORS 8 | 12805 | #define AES_GCM_DEC_TEST_VECTORS 8 |
12681 | #define AES_GCM_4106_ENC_TEST_VECTORS 7 | 12806 | #define AES_GCM_4106_ENC_TEST_VECTORS 7 |
12682 | #define AES_GCM_4106_DEC_TEST_VECTORS 7 | 12807 | #define AES_GCM_4106_DEC_TEST_VECTORS 7 |
12808 | #define AES_GCM_4543_ENC_TEST_VECTORS 1 | ||
12809 | #define AES_GCM_4543_DEC_TEST_VECTORS 2 | ||
12683 | #define AES_CCM_ENC_TEST_VECTORS 7 | 12810 | #define AES_CCM_ENC_TEST_VECTORS 7 |
12684 | #define AES_CCM_DEC_TEST_VECTORS 7 | 12811 | #define AES_CCM_DEC_TEST_VECTORS 7 |
12685 | #define AES_CCM_4309_ENC_TEST_VECTORS 7 | 12812 | #define AES_CCM_4309_ENC_TEST_VECTORS 7 |
@@ -18193,6 +18320,93 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = { | |||
18193 | } | 18320 | } |
18194 | }; | 18321 | }; |
18195 | 18322 | ||
18323 | static struct aead_testvec aes_gcm_rfc4543_enc_tv_template[] = { | ||
18324 | { /* From draft-mcgrew-gcm-test-01 */ | ||
18325 | .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda" | ||
18326 | "\x90\x6a\xc7\x3c\x36\x13\xa6\x34" | ||
18327 | "\x22\x43\x3c\x64", | ||
18328 | .klen = 20, | ||
18329 | .iv = zeroed_string, | ||
18330 | .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07", | ||
18331 | .alen = 8, | ||
18332 | .input = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18333 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18334 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18335 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18336 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18337 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18338 | "\x01\x02\x02\x01", | ||
18339 | .ilen = 52, | ||
18340 | .result = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18341 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18342 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18343 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18344 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18345 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18346 | "\x01\x02\x02\x01\xf2\xa9\xa8\x36" | ||
18347 | "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18" | ||
18348 | "\xe4\x09\x9a\xaa", | ||
18349 | .rlen = 68, | ||
18350 | } | ||
18351 | }; | ||
18352 | |||
18353 | static struct aead_testvec aes_gcm_rfc4543_dec_tv_template[] = { | ||
18354 | { /* From draft-mcgrew-gcm-test-01 */ | ||
18355 | .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda" | ||
18356 | "\x90\x6a\xc7\x3c\x36\x13\xa6\x34" | ||
18357 | "\x22\x43\x3c\x64", | ||
18358 | .klen = 20, | ||
18359 | .iv = zeroed_string, | ||
18360 | .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07", | ||
18361 | .alen = 8, | ||
18362 | .input = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18363 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18364 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18365 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18366 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18367 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18368 | "\x01\x02\x02\x01\xf2\xa9\xa8\x36" | ||
18369 | "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18" | ||
18370 | "\xe4\x09\x9a\xaa", | ||
18371 | .ilen = 68, | ||
18372 | .result = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18373 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18374 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18375 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18376 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18377 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18378 | "\x01\x02\x02\x01", | ||
18379 | .rlen = 52, | ||
18380 | }, { /* nearly same as previous, but should fail */ | ||
18381 | .key = "\x4c\x80\xcd\xef\xbb\x5d\x10\xda" | ||
18382 | "\x90\x6a\xc7\x3c\x36\x13\xa6\x34" | ||
18383 | "\x22\x43\x3c\x64", | ||
18384 | .klen = 20, | ||
18385 | .iv = zeroed_string, | ||
18386 | .assoc = "\x00\x00\x43\x21\x00\x00\x00\x07", | ||
18387 | .alen = 8, | ||
18388 | .input = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18389 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18390 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18391 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18392 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18393 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18394 | "\x01\x02\x02\x01\xf2\xa9\xa8\x36" | ||
18395 | "\xe1\x55\x10\x6a\xa8\xdc\xd6\x18" | ||
18396 | "\x00\x00\x00\x00", | ||
18397 | .ilen = 68, | ||
18398 | .novrfy = 1, | ||
18399 | .result = "\x45\x00\x00\x30\xda\x3a\x00\x00" | ||
18400 | "\x80\x01\xdf\x3b\xc0\xa8\x00\x05" | ||
18401 | "\xc0\xa8\x00\x01\x08\x00\xc6\xcd" | ||
18402 | "\x02\x00\x07\x00\x61\x62\x63\x64" | ||
18403 | "\x65\x66\x67\x68\x69\x6a\x6b\x6c" | ||
18404 | "\x6d\x6e\x6f\x70\x71\x72\x73\x74" | ||
18405 | "\x01\x02\x02\x01", | ||
18406 | .rlen = 52, | ||
18407 | }, | ||
18408 | }; | ||
18409 | |||
18196 | static struct aead_testvec aes_ccm_enc_tv_template[] = { | 18410 | static struct aead_testvec aes_ccm_enc_tv_template[] = { |
18197 | { /* From RFC 3610 */ | 18411 | { /* From RFC 3610 */ |
18198 | .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" | 18412 | .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" |
@@ -20783,8 +20997,72 @@ static struct cipher_testvec camellia_enc_tv_template[] = { | |||
20783 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 20997 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
20784 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 20998 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
20785 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 20999 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
20786 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 21000 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
20787 | .ilen = 496, | 21001 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
21002 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
21003 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
21004 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
21005 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
21006 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
21007 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
21008 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
21009 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
21010 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
21011 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
21012 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
21013 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
21014 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
21015 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
21016 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
21017 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
21018 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
21019 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
21020 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
21021 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
21022 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
21023 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
21024 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
21025 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
21026 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
21027 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
21028 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
21029 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
21030 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
21031 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
21032 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
21033 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
21034 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
21035 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
21036 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
21037 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
21038 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
21039 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
21040 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
21041 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
21042 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
21043 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
21044 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
21045 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
21046 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
21047 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
21048 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
21049 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
21050 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
21051 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
21052 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
21053 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
21054 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
21055 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
21056 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
21057 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
21058 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
21059 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
21060 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
21061 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
21062 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
21063 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
21064 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
21065 | .ilen = 1008, | ||
20788 | .result = "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA" | 21066 | .result = "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA" |
20789 | "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7" | 21067 | "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7" |
20790 | "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04" | 21068 | "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04" |
@@ -20846,11 +21124,75 @@ static struct cipher_testvec camellia_enc_tv_template[] = { | |||
20846 | "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" | 21124 | "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" |
20847 | "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" | 21125 | "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" |
20848 | "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" | 21126 | "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" |
20849 | "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB", | 21127 | "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB" |
20850 | .rlen = 496, | 21128 | "\xA4\xAD\xCF\x5D\xD4\x58\xC9\xCD" |
21129 | "\xF7\x90\x68\xCF\xC9\x11\x52\x3E" | ||
21130 | "\xE8\xA1\xA3\x78\x8B\xD0\xAC\x0A" | ||
21131 | "\xD4\xC9\xA3\xA5\x55\x30\xC8\x3E" | ||
21132 | "\xED\x28\x39\xE9\x63\xED\x41\x70" | ||
21133 | "\x51\xE3\xC4\xA0\xFC\xD5\x43\xCB" | ||
21134 | "\x4D\x65\xC8\xFD\x3A\x91\x8F\x60" | ||
21135 | "\x8A\xA6\x6D\x9D\x3E\x01\x23\x4B" | ||
21136 | "\x50\x47\xC9\xDC\x9B\xDE\x37\xC5" | ||
21137 | "\xBF\x67\xB1\x6B\x78\x38\xD5\x7E" | ||
21138 | "\xB6\xFF\x67\x83\x3B\x6E\xBE\x23" | ||
21139 | "\x45\xFA\x1D\x69\x44\xFD\xC6\xB9" | ||
21140 | "\xD0\x4A\x92\xD1\xBE\xF6\x4A\xB7" | ||
21141 | "\xCA\xA8\xA2\x9E\x13\x87\x57\x92" | ||
21142 | "\x64\x7C\x85\x0B\xB3\x29\x37\xD8" | ||
21143 | "\xE6\xAA\xAF\xC4\x03\x67\xA3\xBF" | ||
21144 | "\x2E\x45\x83\xB6\xD8\x54\x00\x89" | ||
21145 | "\xF6\xBC\x3A\x7A\x88\x58\x51\xED" | ||
21146 | "\xF4\x4E\x01\xA5\xC3\x2E\xD9\x42" | ||
21147 | "\xBD\x6E\x0D\x0B\x21\xB0\x1A\xCC" | ||
21148 | "\xA4\xD3\x3F\xDC\x9B\x81\xD8\xF1" | ||
21149 | "\xEA\x7A\x6A\xB7\x07\xC9\x6D\x91" | ||
21150 | "\x6D\x3A\xF5\x5F\xA6\xFF\x87\x1E" | ||
21151 | "\x3F\xDD\xC0\x72\xEA\xAC\x08\x15" | ||
21152 | "\x21\xE6\xC6\xB6\x0D\xD8\x51\x86" | ||
21153 | "\x2A\x03\x73\xF7\x29\xD4\xC4\xE4" | ||
21154 | "\x7F\x95\x10\xF7\xAB\x3F\x92\x23" | ||
21155 | "\xD3\xCE\x9C\x2E\x46\x3B\x63\x43" | ||
21156 | "\xBB\xC2\x82\x7A\x83\xD5\x55\xE2" | ||
21157 | "\xE7\x9B\x2F\x92\xAF\xFD\x81\x56" | ||
21158 | "\x79\xFD\x3E\xF9\x46\xE0\x25\xD4" | ||
21159 | "\x38\xDE\xBC\x2C\xC4\x7A\x2A\x8F" | ||
21160 | "\x94\x4F\xD0\xAD\x9B\x37\x18\xD4" | ||
21161 | "\x0E\x4D\x0F\x02\x3A\xDC\x5A\xA2" | ||
21162 | "\x39\x25\x55\x20\x5A\xA6\x02\x9F" | ||
21163 | "\xE6\x77\x21\x77\xE5\x4B\x7B\x0B" | ||
21164 | "\x30\xF8\x5F\x33\x0F\x49\xCD\xFF" | ||
21165 | "\xF2\xE4\x35\xF9\xF0\x63\xC3\x7E" | ||
21166 | "\xF1\xA6\x73\xB4\xDF\xE7\xBB\x78" | ||
21167 | "\xFF\x21\xA9\xF3\xF3\xCF\x5D\xBA" | ||
21168 | "\xED\x87\x98\xAC\xFE\x48\x97\x6D" | ||
21169 | "\xA6\x7F\x69\x31\xB1\xC4\xFF\x14" | ||
21170 | "\xC6\x76\xD4\x10\xDD\xF6\x49\x2C" | ||
21171 | "\x9C\xC8\x6D\x76\xC0\x8F\x5F\x55" | ||
21172 | "\x2F\x3C\x8A\x30\xAA\xC3\x16\x55" | ||
21173 | "\xC6\xFC\x8D\x8B\xB9\xE5\x80\x6C" | ||
21174 | "\xC8\x7E\xBD\x65\x58\x36\xD5\xBC" | ||
21175 | "\xF0\x33\x52\x29\x70\xF9\x5C\xE9" | ||
21176 | "\xAC\x1F\xB5\x73\x56\x66\x54\xAF" | ||
21177 | "\x1B\x8F\x7D\xED\xAB\x03\xCE\xE3" | ||
21178 | "\xAE\x47\xB6\x69\x86\xE9\x01\x31" | ||
21179 | "\x83\x18\x3D\xF4\x74\x7B\xF9\x42" | ||
21180 | "\x4C\xFD\x75\x4A\x6D\xF0\x03\xA6" | ||
21181 | "\x2B\x20\x63\xDA\x49\x65\x5E\x8B" | ||
21182 | "\xC0\x19\xE3\x8D\xD9\xF3\xB0\x34" | ||
21183 | "\xD3\x52\xFC\x68\x00\x43\x1B\x37" | ||
21184 | "\x31\x93\x51\x1C\x63\x97\x70\xB0" | ||
21185 | "\x99\x78\x83\x13\xFD\xCF\x53\x81" | ||
21186 | "\x36\x46\xB5\x42\x52\x2F\x32\xEB" | ||
21187 | "\x4A\x3D\xF1\x8F\x1C\x54\x2E\xFC" | ||
21188 | "\x41\x75\x5A\x8C\x8E\x6F\xE7\x1A" | ||
21189 | "\xAE\xEF\x3E\x82\x12\x0B\x74\x72" | ||
21190 | "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55" | ||
21191 | "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66", | ||
21192 | .rlen = 1008, | ||
20851 | .also_non_np = 1, | 21193 | .also_non_np = 1, |
20852 | .np = 2, | 21194 | .np = 2, |
20853 | .tap = { 496 - 16, 16 }, | 21195 | .tap = { 1008 - 16, 16 }, |
20854 | }, | 21196 | }, |
20855 | }; | 21197 | }; |
20856 | 21198 | ||
@@ -20955,8 +21297,72 @@ static struct cipher_testvec camellia_dec_tv_template[] = { | |||
20955 | "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" | 21297 | "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3" |
20956 | "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" | 21298 | "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44" |
20957 | "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" | 21299 | "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4" |
20958 | "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB", | 21300 | "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB" |
20959 | .ilen = 496, | 21301 | "\xA4\xAD\xCF\x5D\xD4\x58\xC9\xCD" |
21302 | "\xF7\x90\x68\xCF\xC9\x11\x52\x3E" | ||
21303 | "\xE8\xA1\xA3\x78\x8B\xD0\xAC\x0A" | ||
21304 | "\xD4\xC9\xA3\xA5\x55\x30\xC8\x3E" | ||
21305 | "\xED\x28\x39\xE9\x63\xED\x41\x70" | ||
21306 | "\x51\xE3\xC4\xA0\xFC\xD5\x43\xCB" | ||
21307 | "\x4D\x65\xC8\xFD\x3A\x91\x8F\x60" | ||
21308 | "\x8A\xA6\x6D\x9D\x3E\x01\x23\x4B" | ||
21309 | "\x50\x47\xC9\xDC\x9B\xDE\x37\xC5" | ||
21310 | "\xBF\x67\xB1\x6B\x78\x38\xD5\x7E" | ||
21311 | "\xB6\xFF\x67\x83\x3B\x6E\xBE\x23" | ||
21312 | "\x45\xFA\x1D\x69\x44\xFD\xC6\xB9" | ||
21313 | "\xD0\x4A\x92\xD1\xBE\xF6\x4A\xB7" | ||
21314 | "\xCA\xA8\xA2\x9E\x13\x87\x57\x92" | ||
21315 | "\x64\x7C\x85\x0B\xB3\x29\x37\xD8" | ||
21316 | "\xE6\xAA\xAF\xC4\x03\x67\xA3\xBF" | ||
21317 | "\x2E\x45\x83\xB6\xD8\x54\x00\x89" | ||
21318 | "\xF6\xBC\x3A\x7A\x88\x58\x51\xED" | ||
21319 | "\xF4\x4E\x01\xA5\xC3\x2E\xD9\x42" | ||
21320 | "\xBD\x6E\x0D\x0B\x21\xB0\x1A\xCC" | ||
21321 | "\xA4\xD3\x3F\xDC\x9B\x81\xD8\xF1" | ||
21322 | "\xEA\x7A\x6A\xB7\x07\xC9\x6D\x91" | ||
21323 | "\x6D\x3A\xF5\x5F\xA6\xFF\x87\x1E" | ||
21324 | "\x3F\xDD\xC0\x72\xEA\xAC\x08\x15" | ||
21325 | "\x21\xE6\xC6\xB6\x0D\xD8\x51\x86" | ||
21326 | "\x2A\x03\x73\xF7\x29\xD4\xC4\xE4" | ||
21327 | "\x7F\x95\x10\xF7\xAB\x3F\x92\x23" | ||
21328 | "\xD3\xCE\x9C\x2E\x46\x3B\x63\x43" | ||
21329 | "\xBB\xC2\x82\x7A\x83\xD5\x55\xE2" | ||
21330 | "\xE7\x9B\x2F\x92\xAF\xFD\x81\x56" | ||
21331 | "\x79\xFD\x3E\xF9\x46\xE0\x25\xD4" | ||
21332 | "\x38\xDE\xBC\x2C\xC4\x7A\x2A\x8F" | ||
21333 | "\x94\x4F\xD0\xAD\x9B\x37\x18\xD4" | ||
21334 | "\x0E\x4D\x0F\x02\x3A\xDC\x5A\xA2" | ||
21335 | "\x39\x25\x55\x20\x5A\xA6\x02\x9F" | ||
21336 | "\xE6\x77\x21\x77\xE5\x4B\x7B\x0B" | ||
21337 | "\x30\xF8\x5F\x33\x0F\x49\xCD\xFF" | ||
21338 | "\xF2\xE4\x35\xF9\xF0\x63\xC3\x7E" | ||
21339 | "\xF1\xA6\x73\xB4\xDF\xE7\xBB\x78" | ||
21340 | "\xFF\x21\xA9\xF3\xF3\xCF\x5D\xBA" | ||
21341 | "\xED\x87\x98\xAC\xFE\x48\x97\x6D" | ||
21342 | "\xA6\x7F\x69\x31\xB1\xC4\xFF\x14" | ||
21343 | "\xC6\x76\xD4\x10\xDD\xF6\x49\x2C" | ||
21344 | "\x9C\xC8\x6D\x76\xC0\x8F\x5F\x55" | ||
21345 | "\x2F\x3C\x8A\x30\xAA\xC3\x16\x55" | ||
21346 | "\xC6\xFC\x8D\x8B\xB9\xE5\x80\x6C" | ||
21347 | "\xC8\x7E\xBD\x65\x58\x36\xD5\xBC" | ||
21348 | "\xF0\x33\x52\x29\x70\xF9\x5C\xE9" | ||
21349 | "\xAC\x1F\xB5\x73\x56\x66\x54\xAF" | ||
21350 | "\x1B\x8F\x7D\xED\xAB\x03\xCE\xE3" | ||
21351 | "\xAE\x47\xB6\x69\x86\xE9\x01\x31" | ||
21352 | "\x83\x18\x3D\xF4\x74\x7B\xF9\x42" | ||
21353 | "\x4C\xFD\x75\x4A\x6D\xF0\x03\xA6" | ||
21354 | "\x2B\x20\x63\xDA\x49\x65\x5E\x8B" | ||
21355 | "\xC0\x19\xE3\x8D\xD9\xF3\xB0\x34" | ||
21356 | "\xD3\x52\xFC\x68\x00\x43\x1B\x37" | ||
21357 | "\x31\x93\x51\x1C\x63\x97\x70\xB0" | ||
21358 | "\x99\x78\x83\x13\xFD\xCF\x53\x81" | ||
21359 | "\x36\x46\xB5\x42\x52\x2F\x32\xEB" | ||
21360 | "\x4A\x3D\xF1\x8F\x1C\x54\x2E\xFC" | ||
21361 | "\x41\x75\x5A\x8C\x8E\x6F\xE7\x1A" | ||
21362 | "\xAE\xEF\x3E\x82\x12\x0B\x74\x72" | ||
21363 | "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55" | ||
21364 | "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66", | ||
21365 | .ilen = 1008, | ||
20960 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" | 21366 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" |
20961 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" | 21367 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" |
20962 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" | 21368 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" |
@@ -21018,11 +21424,75 @@ static struct cipher_testvec camellia_dec_tv_template[] = { | |||
21018 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 21424 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
21019 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 21425 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
21020 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 21426 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
21021 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 21427 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
21022 | .rlen = 496, | 21428 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
21429 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
21430 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
21431 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
21432 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
21433 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
21434 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
21435 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
21436 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
21437 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
21438 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
21439 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
21440 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
21441 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
21442 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
21443 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
21444 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
21445 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
21446 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
21447 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
21448 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
21449 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
21450 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
21451 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
21452 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
21453 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
21454 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
21455 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
21456 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
21457 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
21458 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
21459 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
21460 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
21461 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
21462 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
21463 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
21464 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
21465 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
21466 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
21467 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
21468 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
21469 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
21470 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
21471 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
21472 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
21473 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
21474 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
21475 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
21476 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
21477 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
21478 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
21479 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
21480 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
21481 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
21482 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
21483 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
21484 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
21485 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
21486 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
21487 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
21488 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
21489 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
21490 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
21491 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
21492 | .rlen = 1008, | ||
21023 | .also_non_np = 1, | 21493 | .also_non_np = 1, |
21024 | .np = 2, | 21494 | .np = 2, |
21025 | .tap = { 496 - 16, 16 }, | 21495 | .tap = { 1008 - 16, 16 }, |
21026 | }, | 21496 | }, |
21027 | }; | 21497 | }; |
21028 | 21498 | ||
@@ -21123,8 +21593,72 @@ static struct cipher_testvec camellia_cbc_enc_tv_template[] = { | |||
21123 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 21593 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
21124 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 21594 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
21125 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 21595 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
21126 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 21596 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
21127 | .ilen = 496, | 21597 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
21598 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
21599 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
21600 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
21601 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
21602 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
21603 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
21604 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
21605 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
21606 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
21607 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
21608 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
21609 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
21610 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
21611 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
21612 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
21613 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
21614 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
21615 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
21616 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
21617 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
21618 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
21619 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
21620 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
21621 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
21622 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
21623 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
21624 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
21625 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
21626 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
21627 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
21628 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
21629 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
21630 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
21631 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
21632 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
21633 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
21634 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
21635 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
21636 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
21637 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
21638 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
21639 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
21640 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
21641 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
21642 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
21643 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
21644 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
21645 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
21646 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
21647 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
21648 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
21649 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
21650 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
21651 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
21652 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
21653 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
21654 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
21655 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
21656 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
21657 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
21658 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
21659 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
21660 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
21661 | .ilen = 1008, | ||
21128 | .result = "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77" | 21662 | .result = "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77" |
21129 | "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40" | 21663 | "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40" |
21130 | "\x88\x39\xE3\xFD\x94\x4B\x25\x58" | 21664 | "\x88\x39\xE3\xFD\x94\x4B\x25\x58" |
@@ -21186,11 +21720,75 @@ static struct cipher_testvec camellia_cbc_enc_tv_template[] = { | |||
21186 | "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" | 21720 | "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" |
21187 | "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" | 21721 | "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" |
21188 | "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" | 21722 | "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" |
21189 | "\x5F\x74\x8C\x86\x5B\x71\xD0\x20", | 21723 | "\x5F\x74\x8C\x86\x5B\x71\xD0\x20" |
21190 | .rlen = 496, | 21724 | "\x78\x1A\x7F\x18\x8C\xD9\xCD\xF5" |
21725 | "\x21\x41\x56\x72\x13\xE1\x86\x07" | ||
21726 | "\x07\x26\xF3\x4F\x7B\xEA\xB5\x18" | ||
21727 | "\xFE\x94\x2D\x9F\xE0\x72\x18\x65" | ||
21728 | "\xB2\xA5\x63\x48\xB4\x13\x22\xF7" | ||
21729 | "\x25\xF1\x80\xA8\x7F\x54\x86\x7B" | ||
21730 | "\x39\xAE\x95\x0C\x09\x32\x22\x2D" | ||
21731 | "\x4D\x73\x39\x0C\x09\x2C\x7C\x10" | ||
21732 | "\xD0\x4B\x53\xF6\x90\xC5\x99\x2F" | ||
21733 | "\x15\xE1\x7F\xC6\xC5\x7A\x52\x14" | ||
21734 | "\x65\xEE\x93\x54\xD0\x66\x15\x3C" | ||
21735 | "\x4C\x68\xFD\x64\x0F\xF9\x10\x39" | ||
21736 | "\x46\x7A\xDD\x97\x20\xEE\xC7\xD2" | ||
21737 | "\x98\x4A\xB6\xE6\xF5\xA8\x1F\x4F" | ||
21738 | "\xDB\xAB\x6D\xD5\x9B\x34\x16\x97" | ||
21739 | "\x2F\x64\xE5\x37\xEF\x0E\xA1\xE9" | ||
21740 | "\xBE\x31\x31\x96\x8B\x40\x18\x75" | ||
21741 | "\x11\x75\x14\x32\xA5\x2D\x1B\x6B" | ||
21742 | "\xDB\x59\xEB\xFA\x3D\x8E\x7C\xC4" | ||
21743 | "\xDE\x68\xC8\x9F\xC9\x99\xE3\xC6" | ||
21744 | "\x71\xB0\x12\x57\x89\x0D\xC0\x2B" | ||
21745 | "\x9F\x12\x6A\x04\x67\xF1\x95\x31" | ||
21746 | "\x59\xFD\x84\x95\x2C\x9C\x5B\xEC" | ||
21747 | "\x09\xB0\x43\x96\x4A\x64\x80\x40" | ||
21748 | "\xB9\x72\x19\xDD\x70\x42\xFA\xB1" | ||
21749 | "\x4A\x2C\x0C\x0A\x60\x6E\xE3\x7C" | ||
21750 | "\x37\x5A\xBE\xA4\x62\xCF\x29\xAB" | ||
21751 | "\x7F\x4D\xA6\xB3\xE2\xB6\x64\xC6" | ||
21752 | "\x33\x0B\xF3\xD5\x01\x38\x74\xA4" | ||
21753 | "\x67\x1E\x75\x68\xC3\xAD\x76\xE9" | ||
21754 | "\xE9\xBC\xF0\xEB\xD8\xFD\x31\x8A" | ||
21755 | "\x5F\xC9\x18\x94\x4B\x86\x66\xFC" | ||
21756 | "\xBD\x0B\x3D\xB3\x9F\xFA\x1F\xD9" | ||
21757 | "\x78\xC4\xE3\x24\x1C\x67\xA2\xF8" | ||
21758 | "\x43\xBC\x76\x75\xBF\x6C\x05\xB3" | ||
21759 | "\x32\xE8\x7C\x80\xDB\xC7\xB6\x61" | ||
21760 | "\x1A\x3E\x2B\xA7\x25\xED\x8F\xA0" | ||
21761 | "\x00\x4B\xF8\x90\xCA\xD8\xFB\x12" | ||
21762 | "\xAC\x1F\x18\xE9\xD2\x5E\xA2\x8E" | ||
21763 | "\xE4\x84\x6B\x9D\xEB\x1E\x6B\xA3" | ||
21764 | "\x7B\xDC\xCE\x15\x97\x27\xB2\x65" | ||
21765 | "\xBC\x0E\x47\xAB\x55\x13\x53\xAB" | ||
21766 | "\x0E\x34\x55\x02\x5F\x27\xC5\x89" | ||
21767 | "\xDF\xC5\x70\xC4\xDD\x76\x82\xEE" | ||
21768 | "\x68\xA6\x09\xB0\xE5\x5E\xF1\x0C" | ||
21769 | "\xE3\xF3\x09\x9B\xFE\x65\x4B\xB8" | ||
21770 | "\x30\xEC\xD5\x7C\x6A\xEC\x1D\xD2" | ||
21771 | "\x93\xB7\xA1\x1A\x02\xD4\xC0\xD6" | ||
21772 | "\x8D\x4D\x83\x9A\xED\x29\x4E\x14" | ||
21773 | "\x86\xD5\x3C\x1A\xD5\xB9\x0A\x6A" | ||
21774 | "\x72\x22\xD5\x92\x38\xF1\xA1\x86" | ||
21775 | "\xB2\x41\x51\xCA\x4E\xAB\x8F\xD3" | ||
21776 | "\x80\x56\xC3\xD7\x65\xE1\xB3\x86" | ||
21777 | "\xCB\xCE\x98\xA1\xD4\x59\x1C\x06" | ||
21778 | "\x01\xED\xF8\x29\x91\x19\x5C\x9A" | ||
21779 | "\xEE\x28\x1B\x48\xD7\x32\xEF\x9F" | ||
21780 | "\x6C\x2B\x66\x4E\x78\xD5\x8B\x72" | ||
21781 | "\x80\xE7\x29\xDC\x23\x55\x98\x54" | ||
21782 | "\xB1\xFF\x3E\x95\x56\xA8\x78\x78" | ||
21783 | "\xEF\xC4\xA5\x11\x2D\x2B\xD8\x93" | ||
21784 | "\x30\x6E\x7E\x51\xBB\x42\x5F\x03" | ||
21785 | "\x43\x94\x23\x7E\xEE\xF0\xA5\x79" | ||
21786 | "\x55\x01\xD4\x58\xB2\xF2\x85\x49" | ||
21787 | "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C", | ||
21788 | .rlen = 1008, | ||
21191 | .also_non_np = 1, | 21789 | .also_non_np = 1, |
21192 | .np = 2, | 21790 | .np = 2, |
21193 | .tap = { 496 - 16, 16 }, | 21791 | .tap = { 1008 - 16, 16 }, |
21194 | }, | 21792 | }, |
21195 | }; | 21793 | }; |
21196 | 21794 | ||
@@ -21291,8 +21889,72 @@ static struct cipher_testvec camellia_cbc_dec_tv_template[] = { | |||
21291 | "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" | 21889 | "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA" |
21292 | "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" | 21890 | "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97" |
21293 | "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" | 21891 | "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36" |
21294 | "\x5F\x74\x8C\x86\x5B\x71\xD0\x20", | 21892 | "\x5F\x74\x8C\x86\x5B\x71\xD0\x20" |
21295 | .ilen = 496, | 21893 | "\x78\x1A\x7F\x18\x8C\xD9\xCD\xF5" |
21894 | "\x21\x41\x56\x72\x13\xE1\x86\x07" | ||
21895 | "\x07\x26\xF3\x4F\x7B\xEA\xB5\x18" | ||
21896 | "\xFE\x94\x2D\x9F\xE0\x72\x18\x65" | ||
21897 | "\xB2\xA5\x63\x48\xB4\x13\x22\xF7" | ||
21898 | "\x25\xF1\x80\xA8\x7F\x54\x86\x7B" | ||
21899 | "\x39\xAE\x95\x0C\x09\x32\x22\x2D" | ||
21900 | "\x4D\x73\x39\x0C\x09\x2C\x7C\x10" | ||
21901 | "\xD0\x4B\x53\xF6\x90\xC5\x99\x2F" | ||
21902 | "\x15\xE1\x7F\xC6\xC5\x7A\x52\x14" | ||
21903 | "\x65\xEE\x93\x54\xD0\x66\x15\x3C" | ||
21904 | "\x4C\x68\xFD\x64\x0F\xF9\x10\x39" | ||
21905 | "\x46\x7A\xDD\x97\x20\xEE\xC7\xD2" | ||
21906 | "\x98\x4A\xB6\xE6\xF5\xA8\x1F\x4F" | ||
21907 | "\xDB\xAB\x6D\xD5\x9B\x34\x16\x97" | ||
21908 | "\x2F\x64\xE5\x37\xEF\x0E\xA1\xE9" | ||
21909 | "\xBE\x31\x31\x96\x8B\x40\x18\x75" | ||
21910 | "\x11\x75\x14\x32\xA5\x2D\x1B\x6B" | ||
21911 | "\xDB\x59\xEB\xFA\x3D\x8E\x7C\xC4" | ||
21912 | "\xDE\x68\xC8\x9F\xC9\x99\xE3\xC6" | ||
21913 | "\x71\xB0\x12\x57\x89\x0D\xC0\x2B" | ||
21914 | "\x9F\x12\x6A\x04\x67\xF1\x95\x31" | ||
21915 | "\x59\xFD\x84\x95\x2C\x9C\x5B\xEC" | ||
21916 | "\x09\xB0\x43\x96\x4A\x64\x80\x40" | ||
21917 | "\xB9\x72\x19\xDD\x70\x42\xFA\xB1" | ||
21918 | "\x4A\x2C\x0C\x0A\x60\x6E\xE3\x7C" | ||
21919 | "\x37\x5A\xBE\xA4\x62\xCF\x29\xAB" | ||
21920 | "\x7F\x4D\xA6\xB3\xE2\xB6\x64\xC6" | ||
21921 | "\x33\x0B\xF3\xD5\x01\x38\x74\xA4" | ||
21922 | "\x67\x1E\x75\x68\xC3\xAD\x76\xE9" | ||
21923 | "\xE9\xBC\xF0\xEB\xD8\xFD\x31\x8A" | ||
21924 | "\x5F\xC9\x18\x94\x4B\x86\x66\xFC" | ||
21925 | "\xBD\x0B\x3D\xB3\x9F\xFA\x1F\xD9" | ||
21926 | "\x78\xC4\xE3\x24\x1C\x67\xA2\xF8" | ||
21927 | "\x43\xBC\x76\x75\xBF\x6C\x05\xB3" | ||
21928 | "\x32\xE8\x7C\x80\xDB\xC7\xB6\x61" | ||
21929 | "\x1A\x3E\x2B\xA7\x25\xED\x8F\xA0" | ||
21930 | "\x00\x4B\xF8\x90\xCA\xD8\xFB\x12" | ||
21931 | "\xAC\x1F\x18\xE9\xD2\x5E\xA2\x8E" | ||
21932 | "\xE4\x84\x6B\x9D\xEB\x1E\x6B\xA3" | ||
21933 | "\x7B\xDC\xCE\x15\x97\x27\xB2\x65" | ||
21934 | "\xBC\x0E\x47\xAB\x55\x13\x53\xAB" | ||
21935 | "\x0E\x34\x55\x02\x5F\x27\xC5\x89" | ||
21936 | "\xDF\xC5\x70\xC4\xDD\x76\x82\xEE" | ||
21937 | "\x68\xA6\x09\xB0\xE5\x5E\xF1\x0C" | ||
21938 | "\xE3\xF3\x09\x9B\xFE\x65\x4B\xB8" | ||
21939 | "\x30\xEC\xD5\x7C\x6A\xEC\x1D\xD2" | ||
21940 | "\x93\xB7\xA1\x1A\x02\xD4\xC0\xD6" | ||
21941 | "\x8D\x4D\x83\x9A\xED\x29\x4E\x14" | ||
21942 | "\x86\xD5\x3C\x1A\xD5\xB9\x0A\x6A" | ||
21943 | "\x72\x22\xD5\x92\x38\xF1\xA1\x86" | ||
21944 | "\xB2\x41\x51\xCA\x4E\xAB\x8F\xD3" | ||
21945 | "\x80\x56\xC3\xD7\x65\xE1\xB3\x86" | ||
21946 | "\xCB\xCE\x98\xA1\xD4\x59\x1C\x06" | ||
21947 | "\x01\xED\xF8\x29\x91\x19\x5C\x9A" | ||
21948 | "\xEE\x28\x1B\x48\xD7\x32\xEF\x9F" | ||
21949 | "\x6C\x2B\x66\x4E\x78\xD5\x8B\x72" | ||
21950 | "\x80\xE7\x29\xDC\x23\x55\x98\x54" | ||
21951 | "\xB1\xFF\x3E\x95\x56\xA8\x78\x78" | ||
21952 | "\xEF\xC4\xA5\x11\x2D\x2B\xD8\x93" | ||
21953 | "\x30\x6E\x7E\x51\xBB\x42\x5F\x03" | ||
21954 | "\x43\x94\x23\x7E\xEE\xF0\xA5\x79" | ||
21955 | "\x55\x01\xD4\x58\xB2\xF2\x85\x49" | ||
21956 | "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C", | ||
21957 | .ilen = 1008, | ||
21296 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" | 21958 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" |
21297 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" | 21959 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" |
21298 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" | 21960 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" |
@@ -21354,11 +22016,75 @@ static struct cipher_testvec camellia_cbc_dec_tv_template[] = { | |||
21354 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 22016 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
21355 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 22017 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
21356 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 22018 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
21357 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 22019 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
21358 | .rlen = 496, | 22020 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
22021 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
22022 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
22023 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
22024 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
22025 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
22026 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
22027 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
22028 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
22029 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
22030 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
22031 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
22032 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
22033 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
22034 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
22035 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
22036 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
22037 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
22038 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
22039 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
22040 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
22041 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
22042 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
22043 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
22044 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
22045 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
22046 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
22047 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
22048 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
22049 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
22050 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
22051 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
22052 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
22053 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
22054 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
22055 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
22056 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
22057 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
22058 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
22059 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
22060 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
22061 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
22062 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
22063 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
22064 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
22065 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
22066 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
22067 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
22068 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
22069 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
22070 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
22071 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
22072 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
22073 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
22074 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
22075 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
22076 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
22077 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
22078 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
22079 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
22080 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
22081 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
22082 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
22083 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
22084 | .rlen = 1008, | ||
21359 | .also_non_np = 1, | 22085 | .also_non_np = 1, |
21360 | .np = 2, | 22086 | .np = 2, |
21361 | .tap = { 496 - 16, 16 }, | 22087 | .tap = { 1008 - 16, 16 }, |
21362 | }, | 22088 | }, |
21363 | }; | 22089 | }; |
21364 | 22090 | ||
@@ -21567,8 +22293,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = { | |||
21567 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 22293 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
21568 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 22294 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
21569 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" | 22295 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
21570 | "\x2B\xC2\x59", | 22296 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
21571 | .ilen = 499, | 22297 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" |
22298 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
22299 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
22300 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
22301 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
22302 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
22303 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
22304 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
22305 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
22306 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
22307 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
22308 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
22309 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
22310 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
22311 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
22312 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
22313 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
22314 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
22315 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
22316 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
22317 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
22318 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
22319 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
22320 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
22321 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
22322 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
22323 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
22324 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
22325 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
22326 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
22327 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
22328 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
22329 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
22330 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
22331 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
22332 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
22333 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
22334 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
22335 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
22336 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
22337 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
22338 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
22339 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
22340 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
22341 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
22342 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
22343 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
22344 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
22345 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
22346 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
22347 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
22348 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
22349 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
22350 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
22351 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
22352 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
22353 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
22354 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
22355 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
22356 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
22357 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
22358 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
22359 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D" | ||
22360 | "\xE4\x7B\x12", | ||
22361 | .ilen = 1011, | ||
21572 | .result = "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11" | 22362 | .result = "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11" |
21573 | "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE" | 22363 | "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE" |
21574 | "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4" | 22364 | "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4" |
@@ -21631,11 +22421,75 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = { | |||
21631 | "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" | 22421 | "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" |
21632 | "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" | 22422 | "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" |
21633 | "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" | 22423 | "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" |
21634 | "\x93\x11\x1C", | 22424 | "\x93\x11\x1C\xE9\xD2\x9F\x6E\x90" |
21635 | .rlen = 499, | 22425 | "\xE5\x41\x4A\xE2\x3C\x45\x29\x35" |
22426 | "\xEC\xD6\x47\x50\xCB\x7B\xA2\x32" | ||
22427 | "\xF7\x8B\x62\xF1\xE3\x9A\xFE\xC7" | ||
22428 | "\x1D\x8C\x02\x72\x68\x09\xE9\xB6" | ||
22429 | "\x4A\x80\xE6\xB1\x56\xDF\x90\xD4" | ||
22430 | "\x93\x74\xA4\xCE\x20\x23\xBF\x48" | ||
22431 | "\xA5\xDE\x1B\xFA\x40\x69\x31\x98" | ||
22432 | "\x62\x6E\xA5\xC7\xBF\x0C\x62\xE5" | ||
22433 | "\x6D\xE1\x93\xF1\x83\x10\x1C\xCA" | ||
22434 | "\xF6\x5C\x19\xF8\x90\x78\xCB\xE4" | ||
22435 | "\x0B\x3A\xB5\xF8\x43\x86\xD3\x3F" | ||
22436 | "\xBA\x83\x34\x3C\x42\xCC\x7D\x28" | ||
22437 | "\x29\x63\x4F\xD8\x02\x17\xC5\x07" | ||
22438 | "\x2C\xA4\xAC\x79\xCB\xC3\xA9\x09" | ||
22439 | "\x81\x45\x18\xED\xE4\xCB\x42\x3B" | ||
22440 | "\x87\x2D\x23\xDC\xC5\xBA\x45\xBD" | ||
22441 | "\x92\xE5\x02\x97\x96\xCE\xAD\xEC" | ||
22442 | "\xBA\xD8\x76\xF8\xCA\xC1\x31\xEC" | ||
22443 | "\x1E\x4F\x3F\x83\xF8\x33\xE8\x6E" | ||
22444 | "\xCC\xF8\x5F\xDD\x65\x50\x99\x69" | ||
22445 | "\xAF\x48\xCE\xA5\xBA\xB6\x14\x9F" | ||
22446 | "\x05\x93\xB2\xE6\x59\xC8\x28\xFE" | ||
22447 | "\x8F\x37\xF9\x64\xB9\xA5\x56\x8F" | ||
22448 | "\xF1\x1B\x90\xEF\xAE\xEB\xFC\x09" | ||
22449 | "\x11\x7A\xF2\x19\x0A\x0A\x9A\x3C" | ||
22450 | "\xE2\x5E\x29\xFA\x31\x9B\xC1\x74" | ||
22451 | "\x1E\x10\x3E\x07\xA9\x31\x6D\xF8" | ||
22452 | "\x81\xF5\xD5\x8A\x04\x23\x51\xAC" | ||
22453 | "\xA2\xE2\x63\xFD\x27\x1F\x79\x5B" | ||
22454 | "\x1F\xE8\xDA\x11\x49\x4D\x1C\xBA" | ||
22455 | "\x54\xCC\x0F\xBA\x92\x69\xE5\xCB" | ||
22456 | "\x41\x1A\x67\xA6\x40\x82\x70\x8C" | ||
22457 | "\x19\x79\x08\xA4\x51\x20\x7D\xC9" | ||
22458 | "\x12\x27\xAE\x20\x0D\x2C\xA1\x6D" | ||
22459 | "\xF4\x55\xD4\xE7\xE6\xD4\x28\x08" | ||
22460 | "\x00\x70\x12\x56\x56\x50\xAD\x14" | ||
22461 | "\x5C\x3E\xA2\xD1\x36\x3F\x36\x48" | ||
22462 | "\xED\xB1\x57\x3E\x5D\x15\xF6\x1E" | ||
22463 | "\x53\xE9\xA4\x3E\xED\x7D\xCF\x7D" | ||
22464 | "\x29\xAF\xF3\x1E\x51\xA8\x9F\x85" | ||
22465 | "\x8B\xF0\xBB\xCE\xCC\x39\xC3\x64" | ||
22466 | "\x4B\xF2\xAD\x70\x19\xD4\x44\x8F" | ||
22467 | "\x91\x76\xE8\x15\x66\x34\x9F\xF6" | ||
22468 | "\x0F\x15\xA4\xA8\x24\xF8\x58\xB1" | ||
22469 | "\x38\x46\x47\xC7\x9B\xCA\xE9\x42" | ||
22470 | "\x44\xAA\xE6\xB5\x9C\x91\xA4\xD3" | ||
22471 | "\x16\xA0\xED\x42\xBE\xB5\x06\x19" | ||
22472 | "\xBE\x67\xE8\xBC\x22\x32\xA4\x1E" | ||
22473 | "\x93\xEB\xBE\xE9\xE1\x93\xE5\x31" | ||
22474 | "\x3A\xA2\x75\xDF\xE3\x6B\xE7\xCC" | ||
22475 | "\xB4\x70\x20\xE0\x6D\x82\x7C\xC8" | ||
22476 | "\x94\x5C\x5E\x37\x18\xAD\xED\x8B" | ||
22477 | "\x44\x86\xCA\x5E\x07\xB7\x70\x8D" | ||
22478 | "\x40\x48\x19\x73\x7C\x78\x64\x0B" | ||
22479 | "\xDB\x01\xCA\xAE\x63\x19\xE9\xD1" | ||
22480 | "\x6B\x2C\x84\x10\x45\x42\x2E\xC3" | ||
22481 | "\xDF\x7F\xAA\xE8\x87\x1B\x63\x46" | ||
22482 | "\x74\x28\x9D\x05\x30\x20\x62\x41" | ||
22483 | "\xC0\x9F\x2C\x36\x2B\x78\xD7\x26" | ||
22484 | "\xDF\x58\x51\xED\xFA\xDC\x87\x79" | ||
22485 | "\xBF\x8C\xBF\xC4\x0F\xE5\x05\xDA" | ||
22486 | "\x45\xE3\x35\x0D\x69\x91\x54\x1C" | ||
22487 | "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C" | ||
22488 | "\xF1\x6B\xD9", | ||
22489 | .rlen = 1011, | ||
21636 | .also_non_np = 1, | 22490 | .also_non_np = 1, |
21637 | .np = 2, | 22491 | .np = 2, |
21638 | .tap = { 499 - 16, 16 }, | 22492 | .tap = { 1011 - 16, 16 }, |
21639 | }, { /* Generated with Crypto++ */ | 22493 | }, { /* Generated with Crypto++ */ |
21640 | .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" | 22494 | .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" |
21641 | "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" | 22495 | "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" |
@@ -21705,8 +22559,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = { | |||
21705 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 22559 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
21706 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 22560 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
21707 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 22561 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
21708 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 22562 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
21709 | .ilen = 496, | 22563 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
22564 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
22565 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
22566 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
22567 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
22568 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
22569 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
22570 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
22571 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
22572 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
22573 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
22574 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
22575 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
22576 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
22577 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
22578 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
22579 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
22580 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
22581 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
22582 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
22583 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
22584 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
22585 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
22586 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
22587 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
22588 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
22589 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
22590 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
22591 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
22592 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
22593 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
22594 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
22595 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
22596 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
22597 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
22598 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
22599 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
22600 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
22601 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
22602 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
22603 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
22604 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
22605 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
22606 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
22607 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
22608 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
22609 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
22610 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
22611 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
22612 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
22613 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
22614 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
22615 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
22616 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
22617 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
22618 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
22619 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
22620 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
22621 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
22622 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
22623 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
22624 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
22625 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
22626 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
22627 | .ilen = 1008, | ||
21710 | .result = "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9" | 22628 | .result = "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9" |
21711 | "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E" | 22629 | "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E" |
21712 | "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB" | 22630 | "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB" |
@@ -21768,8 +22686,72 @@ static struct cipher_testvec camellia_ctr_enc_tv_template[] = { | |||
21768 | "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" | 22686 | "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" |
21769 | "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" | 22687 | "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" |
21770 | "\x12\xA8\x01\x64\x16\x0B\x26\x5A" | 22688 | "\x12\xA8\x01\x64\x16\x0B\x26\x5A" |
21771 | "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C", | 22689 | "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C" |
21772 | .rlen = 496, | 22690 | "\xCF\xF5\xD5\xB7\x7A\x34\x23\xB6" |
22691 | "\xAA\x9E\xA8\x98\xA2\xF8\x3D\xD3" | ||
22692 | "\x3F\x23\x69\x63\x56\x96\x45\xD6" | ||
22693 | "\x74\x23\x1D\x5C\x63\xCC\xD8\x78" | ||
22694 | "\x16\xE2\x9C\xD2\x80\x02\xF2\x28" | ||
22695 | "\x69\x2F\xC4\xA8\x15\x15\x24\x3B" | ||
22696 | "\xCB\xF0\x14\xE4\x62\xC8\xF3\xD1" | ||
22697 | "\x03\x58\x1B\x33\x77\x74\x1F\xB4" | ||
22698 | "\x07\x86\xF2\x21\xB7\x41\xAE\xBF" | ||
22699 | "\x25\xC2\xFF\x51\xEF\xEA\xCE\xC4" | ||
22700 | "\x5F\xD9\xB8\x18\x6A\xF0\x0F\x0D" | ||
22701 | "\xF8\x04\xBB\x6D\x62\x33\x87\x26" | ||
22702 | "\x4F\x2F\x14\x6E\xDC\xDB\x66\x09" | ||
22703 | "\x2A\xEF\x7D\x84\x10\xAC\x82\x5E" | ||
22704 | "\xD2\xE4\xAD\x74\x7A\x6D\xCC\x3A" | ||
22705 | "\x7B\x62\xD8\xD6\x07\x2D\xF7\xDF" | ||
22706 | "\x9B\xB3\x82\xCF\x9C\x1D\x76\x5C" | ||
22707 | "\xAC\x7B\xD4\x9B\x45\xA1\x64\x11" | ||
22708 | "\x66\xF1\xA7\x0B\xF9\xDD\x00\xDD" | ||
22709 | "\xA4\x45\x3D\x3E\x03\xC9\x2E\xCB" | ||
22710 | "\xC3\x14\x84\x72\xFD\x41\xDC\xBD" | ||
22711 | "\x75\xBE\xA8\xE5\x16\x48\x64\x39" | ||
22712 | "\xCA\xF3\xE6\xDC\x25\x24\xF1\x6D" | ||
22713 | "\xB2\x8D\xC5\x38\x54\xD3\x5D\x6D" | ||
22714 | "\x0B\x29\x10\x15\x0E\x13\x3B\xAC" | ||
22715 | "\x7E\xCC\x9E\x3E\x18\x48\xA6\x02" | ||
22716 | "\xEF\x03\xB2\x2E\xE3\xD2\x70\x21" | ||
22717 | "\xB4\x19\x26\xBE\x3A\x3D\x05\xE0" | ||
22718 | "\xF8\x09\xAF\xE4\x31\x26\x92\x2F" | ||
22719 | "\x8F\x55\xAC\xED\x0B\xB2\xA5\x34" | ||
22720 | "\xBE\x50\xB1\x02\x22\x96\xE3\x40" | ||
22721 | "\x7B\x70\x50\x6E\x3B\xD5\xE5\xA0" | ||
22722 | "\x8E\xA2\xAD\x14\x60\x5C\x7A\x2B" | ||
22723 | "\x3D\x1B\x7F\xC1\xC0\x2C\x56\x36" | ||
22724 | "\xD2\x0A\x32\x06\x97\x34\xB9\xF4" | ||
22725 | "\x6F\x9F\x7E\x80\xD0\x9D\xF7\x6A" | ||
22726 | "\x21\xC1\xA2\x6A\xB1\x96\x5B\x4D" | ||
22727 | "\x7A\x15\x6C\xC4\x4E\xB8\xE0\x9E" | ||
22728 | "\x6C\x50\xF3\x9C\xC9\xB5\x23\xB7" | ||
22729 | "\xF1\xD4\x29\x4A\x23\xC4\xAD\x1E" | ||
22730 | "\x2C\x07\xD2\x43\x5F\x57\x93\xCA" | ||
22731 | "\x85\xF9\x9F\xAD\x4C\xF1\xE4\xB1" | ||
22732 | "\x1A\x8E\x28\xA4\xB6\x52\x77\x7E" | ||
22733 | "\x68\xC6\x47\xB9\x76\xCC\x65\x5F" | ||
22734 | "\x0B\xF9\x67\x93\xD8\x0E\x9A\x37" | ||
22735 | "\x5F\x41\xED\x64\x6C\xAD\x5F\xED" | ||
22736 | "\x3F\x8D\xFB\x8E\x1E\xA0\xE4\x1F" | ||
22737 | "\xC2\xC7\xED\x18\x43\xE1\x20\x86" | ||
22738 | "\x5D\xBC\x30\x70\x22\xA1\xDC\x53" | ||
22739 | "\x10\x3A\x8D\x47\x82\xCD\x7F\x59" | ||
22740 | "\x03\x2D\x6D\xF5\xE7\x79\xD4\x07" | ||
22741 | "\x68\x2A\xA5\x42\x19\x4D\xAF\xF5" | ||
22742 | "\xED\x47\x83\xBC\x5F\x62\x84\xDA" | ||
22743 | "\xDA\x41\xFF\xB0\x1D\x64\xA3\xC8" | ||
22744 | "\xBD\x4E\xE0\xB8\x7F\xEE\x55\x0A" | ||
22745 | "\x4E\x61\xB2\x51\xF6\x9C\x95\xF6" | ||
22746 | "\x92\xBB\xF6\xC5\xF0\x09\x86\xDE" | ||
22747 | "\x37\x9E\x29\xF9\x2A\x18\x73\x0D" | ||
22748 | "\xDC\x7E\x6B\x7B\x1B\x43\x8C\xEA" | ||
22749 | "\x13\xC8\x1A\x47\x0A\x2D\x6D\x56" | ||
22750 | "\xCD\xD2\xE7\x53\x1A\xAB\x1C\x3C" | ||
22751 | "\xC5\x9B\x03\x70\x29\x2A\x49\x09" | ||
22752 | "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71" | ||
22753 | "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36", | ||
22754 | .rlen = 1008, | ||
21773 | }, | 22755 | }, |
21774 | }; | 22756 | }; |
21775 | 22757 | ||
@@ -21978,8 +22960,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = { | |||
21978 | "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" | 22960 | "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48" |
21979 | "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" | 22961 | "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0" |
21980 | "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" | 22962 | "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D" |
21981 | "\x93\x11\x1C", | 22963 | "\x93\x11\x1C\xE9\xD2\x9F\x6E\x90" |
21982 | .ilen = 499, | 22964 | "\xE5\x41\x4A\xE2\x3C\x45\x29\x35" |
22965 | "\xEC\xD6\x47\x50\xCB\x7B\xA2\x32" | ||
22966 | "\xF7\x8B\x62\xF1\xE3\x9A\xFE\xC7" | ||
22967 | "\x1D\x8C\x02\x72\x68\x09\xE9\xB6" | ||
22968 | "\x4A\x80\xE6\xB1\x56\xDF\x90\xD4" | ||
22969 | "\x93\x74\xA4\xCE\x20\x23\xBF\x48" | ||
22970 | "\xA5\xDE\x1B\xFA\x40\x69\x31\x98" | ||
22971 | "\x62\x6E\xA5\xC7\xBF\x0C\x62\xE5" | ||
22972 | "\x6D\xE1\x93\xF1\x83\x10\x1C\xCA" | ||
22973 | "\xF6\x5C\x19\xF8\x90\x78\xCB\xE4" | ||
22974 | "\x0B\x3A\xB5\xF8\x43\x86\xD3\x3F" | ||
22975 | "\xBA\x83\x34\x3C\x42\xCC\x7D\x28" | ||
22976 | "\x29\x63\x4F\xD8\x02\x17\xC5\x07" | ||
22977 | "\x2C\xA4\xAC\x79\xCB\xC3\xA9\x09" | ||
22978 | "\x81\x45\x18\xED\xE4\xCB\x42\x3B" | ||
22979 | "\x87\x2D\x23\xDC\xC5\xBA\x45\xBD" | ||
22980 | "\x92\xE5\x02\x97\x96\xCE\xAD\xEC" | ||
22981 | "\xBA\xD8\x76\xF8\xCA\xC1\x31\xEC" | ||
22982 | "\x1E\x4F\x3F\x83\xF8\x33\xE8\x6E" | ||
22983 | "\xCC\xF8\x5F\xDD\x65\x50\x99\x69" | ||
22984 | "\xAF\x48\xCE\xA5\xBA\xB6\x14\x9F" | ||
22985 | "\x05\x93\xB2\xE6\x59\xC8\x28\xFE" | ||
22986 | "\x8F\x37\xF9\x64\xB9\xA5\x56\x8F" | ||
22987 | "\xF1\x1B\x90\xEF\xAE\xEB\xFC\x09" | ||
22988 | "\x11\x7A\xF2\x19\x0A\x0A\x9A\x3C" | ||
22989 | "\xE2\x5E\x29\xFA\x31\x9B\xC1\x74" | ||
22990 | "\x1E\x10\x3E\x07\xA9\x31\x6D\xF8" | ||
22991 | "\x81\xF5\xD5\x8A\x04\x23\x51\xAC" | ||
22992 | "\xA2\xE2\x63\xFD\x27\x1F\x79\x5B" | ||
22993 | "\x1F\xE8\xDA\x11\x49\x4D\x1C\xBA" | ||
22994 | "\x54\xCC\x0F\xBA\x92\x69\xE5\xCB" | ||
22995 | "\x41\x1A\x67\xA6\x40\x82\x70\x8C" | ||
22996 | "\x19\x79\x08\xA4\x51\x20\x7D\xC9" | ||
22997 | "\x12\x27\xAE\x20\x0D\x2C\xA1\x6D" | ||
22998 | "\xF4\x55\xD4\xE7\xE6\xD4\x28\x08" | ||
22999 | "\x00\x70\x12\x56\x56\x50\xAD\x14" | ||
23000 | "\x5C\x3E\xA2\xD1\x36\x3F\x36\x48" | ||
23001 | "\xED\xB1\x57\x3E\x5D\x15\xF6\x1E" | ||
23002 | "\x53\xE9\xA4\x3E\xED\x7D\xCF\x7D" | ||
23003 | "\x29\xAF\xF3\x1E\x51\xA8\x9F\x85" | ||
23004 | "\x8B\xF0\xBB\xCE\xCC\x39\xC3\x64" | ||
23005 | "\x4B\xF2\xAD\x70\x19\xD4\x44\x8F" | ||
23006 | "\x91\x76\xE8\x15\x66\x34\x9F\xF6" | ||
23007 | "\x0F\x15\xA4\xA8\x24\xF8\x58\xB1" | ||
23008 | "\x38\x46\x47\xC7\x9B\xCA\xE9\x42" | ||
23009 | "\x44\xAA\xE6\xB5\x9C\x91\xA4\xD3" | ||
23010 | "\x16\xA0\xED\x42\xBE\xB5\x06\x19" | ||
23011 | "\xBE\x67\xE8\xBC\x22\x32\xA4\x1E" | ||
23012 | "\x93\xEB\xBE\xE9\xE1\x93\xE5\x31" | ||
23013 | "\x3A\xA2\x75\xDF\xE3\x6B\xE7\xCC" | ||
23014 | "\xB4\x70\x20\xE0\x6D\x82\x7C\xC8" | ||
23015 | "\x94\x5C\x5E\x37\x18\xAD\xED\x8B" | ||
23016 | "\x44\x86\xCA\x5E\x07\xB7\x70\x8D" | ||
23017 | "\x40\x48\x19\x73\x7C\x78\x64\x0B" | ||
23018 | "\xDB\x01\xCA\xAE\x63\x19\xE9\xD1" | ||
23019 | "\x6B\x2C\x84\x10\x45\x42\x2E\xC3" | ||
23020 | "\xDF\x7F\xAA\xE8\x87\x1B\x63\x46" | ||
23021 | "\x74\x28\x9D\x05\x30\x20\x62\x41" | ||
23022 | "\xC0\x9F\x2C\x36\x2B\x78\xD7\x26" | ||
23023 | "\xDF\x58\x51\xED\xFA\xDC\x87\x79" | ||
23024 | "\xBF\x8C\xBF\xC4\x0F\xE5\x05\xDA" | ||
23025 | "\x45\xE3\x35\x0D\x69\x91\x54\x1C" | ||
23026 | "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C" | ||
23027 | "\xF1\x6B\xD9", | ||
23028 | .ilen = 1011, | ||
21983 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" | 23029 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" |
21984 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" | 23030 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" |
21985 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" | 23031 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" |
@@ -22042,11 +23088,75 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = { | |||
22042 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 23088 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
22043 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 23089 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
22044 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" | 23090 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
22045 | "\x2B\xC2\x59", | 23091 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
22046 | .rlen = 499, | 23092 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" |
23093 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
23094 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
23095 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
23096 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
23097 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
23098 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
23099 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
23100 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
23101 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
23102 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
23103 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
23104 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
23105 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
23106 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
23107 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
23108 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
23109 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
23110 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
23111 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
23112 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
23113 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
23114 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
23115 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
23116 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
23117 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
23118 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
23119 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
23120 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
23121 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
23122 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
23123 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
23124 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
23125 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
23126 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
23127 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
23128 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
23129 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
23130 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
23131 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
23132 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
23133 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
23134 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
23135 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
23136 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
23137 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
23138 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
23139 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
23140 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
23141 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
23142 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
23143 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
23144 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
23145 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
23146 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
23147 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
23148 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
23149 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
23150 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
23151 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
23152 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
23153 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
23154 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D" | ||
23155 | "\xE4\x7B\x12", | ||
23156 | .rlen = 1011, | ||
22047 | .also_non_np = 1, | 23157 | .also_non_np = 1, |
22048 | .np = 2, | 23158 | .np = 2, |
22049 | .tap = { 499 - 16, 16 }, | 23159 | .tap = { 1011 - 16, 16 }, |
22050 | }, { /* Generated with Crypto++ */ | 23160 | }, { /* Generated with Crypto++ */ |
22051 | .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" | 23161 | .key = "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9" |
22052 | "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" | 23162 | "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A" |
@@ -22116,8 +23226,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = { | |||
22116 | "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" | 23226 | "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22" |
22117 | "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" | 23227 | "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E" |
22118 | "\x12\xA8\x01\x64\x16\x0B\x26\x5A" | 23228 | "\x12\xA8\x01\x64\x16\x0B\x26\x5A" |
22119 | "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C", | 23229 | "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C" |
22120 | .ilen = 496, | 23230 | "\xCF\xF5\xD5\xB7\x7A\x34\x23\xB6" |
23231 | "\xAA\x9E\xA8\x98\xA2\xF8\x3D\xD3" | ||
23232 | "\x3F\x23\x69\x63\x56\x96\x45\xD6" | ||
23233 | "\x74\x23\x1D\x5C\x63\xCC\xD8\x78" | ||
23234 | "\x16\xE2\x9C\xD2\x80\x02\xF2\x28" | ||
23235 | "\x69\x2F\xC4\xA8\x15\x15\x24\x3B" | ||
23236 | "\xCB\xF0\x14\xE4\x62\xC8\xF3\xD1" | ||
23237 | "\x03\x58\x1B\x33\x77\x74\x1F\xB4" | ||
23238 | "\x07\x86\xF2\x21\xB7\x41\xAE\xBF" | ||
23239 | "\x25\xC2\xFF\x51\xEF\xEA\xCE\xC4" | ||
23240 | "\x5F\xD9\xB8\x18\x6A\xF0\x0F\x0D" | ||
23241 | "\xF8\x04\xBB\x6D\x62\x33\x87\x26" | ||
23242 | "\x4F\x2F\x14\x6E\xDC\xDB\x66\x09" | ||
23243 | "\x2A\xEF\x7D\x84\x10\xAC\x82\x5E" | ||
23244 | "\xD2\xE4\xAD\x74\x7A\x6D\xCC\x3A" | ||
23245 | "\x7B\x62\xD8\xD6\x07\x2D\xF7\xDF" | ||
23246 | "\x9B\xB3\x82\xCF\x9C\x1D\x76\x5C" | ||
23247 | "\xAC\x7B\xD4\x9B\x45\xA1\x64\x11" | ||
23248 | "\x66\xF1\xA7\x0B\xF9\xDD\x00\xDD" | ||
23249 | "\xA4\x45\x3D\x3E\x03\xC9\x2E\xCB" | ||
23250 | "\xC3\x14\x84\x72\xFD\x41\xDC\xBD" | ||
23251 | "\x75\xBE\xA8\xE5\x16\x48\x64\x39" | ||
23252 | "\xCA\xF3\xE6\xDC\x25\x24\xF1\x6D" | ||
23253 | "\xB2\x8D\xC5\x38\x54\xD3\x5D\x6D" | ||
23254 | "\x0B\x29\x10\x15\x0E\x13\x3B\xAC" | ||
23255 | "\x7E\xCC\x9E\x3E\x18\x48\xA6\x02" | ||
23256 | "\xEF\x03\xB2\x2E\xE3\xD2\x70\x21" | ||
23257 | "\xB4\x19\x26\xBE\x3A\x3D\x05\xE0" | ||
23258 | "\xF8\x09\xAF\xE4\x31\x26\x92\x2F" | ||
23259 | "\x8F\x55\xAC\xED\x0B\xB2\xA5\x34" | ||
23260 | "\xBE\x50\xB1\x02\x22\x96\xE3\x40" | ||
23261 | "\x7B\x70\x50\x6E\x3B\xD5\xE5\xA0" | ||
23262 | "\x8E\xA2\xAD\x14\x60\x5C\x7A\x2B" | ||
23263 | "\x3D\x1B\x7F\xC1\xC0\x2C\x56\x36" | ||
23264 | "\xD2\x0A\x32\x06\x97\x34\xB9\xF4" | ||
23265 | "\x6F\x9F\x7E\x80\xD0\x9D\xF7\x6A" | ||
23266 | "\x21\xC1\xA2\x6A\xB1\x96\x5B\x4D" | ||
23267 | "\x7A\x15\x6C\xC4\x4E\xB8\xE0\x9E" | ||
23268 | "\x6C\x50\xF3\x9C\xC9\xB5\x23\xB7" | ||
23269 | "\xF1\xD4\x29\x4A\x23\xC4\xAD\x1E" | ||
23270 | "\x2C\x07\xD2\x43\x5F\x57\x93\xCA" | ||
23271 | "\x85\xF9\x9F\xAD\x4C\xF1\xE4\xB1" | ||
23272 | "\x1A\x8E\x28\xA4\xB6\x52\x77\x7E" | ||
23273 | "\x68\xC6\x47\xB9\x76\xCC\x65\x5F" | ||
23274 | "\x0B\xF9\x67\x93\xD8\x0E\x9A\x37" | ||
23275 | "\x5F\x41\xED\x64\x6C\xAD\x5F\xED" | ||
23276 | "\x3F\x8D\xFB\x8E\x1E\xA0\xE4\x1F" | ||
23277 | "\xC2\xC7\xED\x18\x43\xE1\x20\x86" | ||
23278 | "\x5D\xBC\x30\x70\x22\xA1\xDC\x53" | ||
23279 | "\x10\x3A\x8D\x47\x82\xCD\x7F\x59" | ||
23280 | "\x03\x2D\x6D\xF5\xE7\x79\xD4\x07" | ||
23281 | "\x68\x2A\xA5\x42\x19\x4D\xAF\xF5" | ||
23282 | "\xED\x47\x83\xBC\x5F\x62\x84\xDA" | ||
23283 | "\xDA\x41\xFF\xB0\x1D\x64\xA3\xC8" | ||
23284 | "\xBD\x4E\xE0\xB8\x7F\xEE\x55\x0A" | ||
23285 | "\x4E\x61\xB2\x51\xF6\x9C\x95\xF6" | ||
23286 | "\x92\xBB\xF6\xC5\xF0\x09\x86\xDE" | ||
23287 | "\x37\x9E\x29\xF9\x2A\x18\x73\x0D" | ||
23288 | "\xDC\x7E\x6B\x7B\x1B\x43\x8C\xEA" | ||
23289 | "\x13\xC8\x1A\x47\x0A\x2D\x6D\x56" | ||
23290 | "\xCD\xD2\xE7\x53\x1A\xAB\x1C\x3C" | ||
23291 | "\xC5\x9B\x03\x70\x29\x2A\x49\x09" | ||
23292 | "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71" | ||
23293 | "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36", | ||
23294 | .ilen = 1008, | ||
22121 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" | 23295 | .result = "\x56\xED\x84\x1B\x8F\x26\xBD\x31" |
22122 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" | 23296 | "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3" |
22123 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" | 23297 | "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15" |
@@ -22179,8 +23353,72 @@ static struct cipher_testvec camellia_ctr_dec_tv_template[] = { | |||
22179 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" | 23353 | "\x86\x1D\xB4\x28\xBF\x56\xED\x61" |
22180 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" | 23354 | "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3" |
22181 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" | 23355 | "\x6A\x01\x75\x0C\xA3\x17\xAE\x45" |
22182 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7", | 23356 | "\xDC\x50\xE7\x7E\x15\x89\x20\xB7" |
22183 | .rlen = 496, | 23357 | "\x2B\xC2\x59\xF0\x64\xFB\x92\x06" |
23358 | "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78" | ||
23359 | "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA" | ||
23360 | "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C" | ||
23361 | "\xF3\x67\xFE\x95\x09\xA0\x37\xCE" | ||
23362 | "\x42\xD9\x70\x07\x7B\x12\xA9\x1D" | ||
23363 | "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F" | ||
23364 | "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01" | ||
23365 | "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73" | ||
23366 | "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5" | ||
23367 | "\x59\xF0\x87\x1E\x92\x29\xC0\x34" | ||
23368 | "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6" | ||
23369 | "\x3D\xD4\x48\xDF\x76\x0D\x81\x18" | ||
23370 | "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A" | ||
23371 | "\x21\x95\x2C\xC3\x37\xCE\x65\xFC" | ||
23372 | "\x70\x07\x9E\x12\xA9\x40\xD7\x4B" | ||
23373 | "\xE2\x79\x10\x84\x1B\xB2\x26\xBD" | ||
23374 | "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F" | ||
23375 | "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1" | ||
23376 | "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13" | ||
23377 | "\x87\x1E\xB5\x29\xC0\x57\xEE\x62" | ||
23378 | "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4" | ||
23379 | "\x6B\x02\x76\x0D\xA4\x18\xAF\x46" | ||
23380 | "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8" | ||
23381 | "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07" | ||
23382 | "\x9E\x35\xCC\x40\xD7\x6E\x05\x79" | ||
23383 | "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB" | ||
23384 | "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D" | ||
23385 | "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF" | ||
23386 | "\x43\xDA\x71\x08\x7C\x13\xAA\x1E" | ||
23387 | "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90" | ||
23388 | "\x27\xBE\x32\xC9\x60\xF7\x6B\x02" | ||
23389 | "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74" | ||
23390 | "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6" | ||
23391 | "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35" | ||
23392 | "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7" | ||
23393 | "\x3E\xD5\x49\xE0\x77\x0E\x82\x19" | ||
23394 | "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B" | ||
23395 | "\x22\x96\x2D\xC4\x38\xCF\x66\xFD" | ||
23396 | "\x71\x08\x9F\x13\xAA\x41\xD8\x4C" | ||
23397 | "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE" | ||
23398 | "\x55\xEC\x60\xF7\x8E\x02\x99\x30" | ||
23399 | "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2" | ||
23400 | "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14" | ||
23401 | "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63" | ||
23402 | "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5" | ||
23403 | "\x6C\x03\x77\x0E\xA5\x19\xB0\x47" | ||
23404 | "\xDE\x52\xE9\x80\x17\x8B\x22\xB9" | ||
23405 | "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08" | ||
23406 | "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A" | ||
23407 | "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC" | ||
23408 | "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E" | ||
23409 | "\xF5\x69\x00\x97\x0B\xA2\x39\xD0" | ||
23410 | "\x44\xDB\x72\x09\x7D\x14\xAB\x1F" | ||
23411 | "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91" | ||
23412 | "\x28\xBF\x33\xCA\x61\xF8\x6C\x03" | ||
23413 | "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75" | ||
23414 | "\x0C\x80\x17\xAE\x22\xB9\x50\xE7" | ||
23415 | "\x5B\xF2\x89\x20\x94\x2B\xC2\x36" | ||
23416 | "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8" | ||
23417 | "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A" | ||
23418 | "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C" | ||
23419 | "\x00\x97\x2E\xC5\x39\xD0\x67\xFE" | ||
23420 | "\x72\x09\xA0\x14\xAB\x42\xD9\x4D", | ||
23421 | .rlen = 1008, | ||
22184 | }, | 23422 | }, |
22185 | }; | 23423 | }; |
22186 | 23424 | ||
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index c5a0262251bc..2f9dbf7568fb 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig | |||
@@ -86,6 +86,18 @@ config HW_RANDOM_BCM63XX | |||
86 | 86 | ||
87 | If unusure, say Y. | 87 | If unusure, say Y. |
88 | 88 | ||
89 | config HW_RANDOM_BCM2835 | ||
90 | tristate "Broadcom BCM2835 Random Number Generator support" | ||
91 | depends on HW_RANDOM && ARCH_BCM2835 | ||
92 | default HW_RANDOM | ||
93 | ---help--- | ||
94 | This driver provides kernel-side support for the Random Number | ||
95 | Generator hardware found on the Broadcom BCM2835 SoCs. | ||
96 | |||
97 | To compile this driver as a module, choose M here: the | ||
98 | module will be called bcm2835-rng | ||
99 | |||
100 | If unsure, say Y. | ||
89 | 101 | ||
90 | config HW_RANDOM_GEODE | 102 | config HW_RANDOM_GEODE |
91 | tristate "AMD Geode HW Random Number Generator support" | 103 | tristate "AMD Geode HW Random Number Generator support" |
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 1fd7eec9fbf6..bed467c9300e 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile | |||
@@ -26,3 +26,4 @@ obj-$(CONFIG_HW_RANDOM_PPC4XX) += ppc4xx-rng.o | |||
26 | obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o | 26 | obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o |
27 | obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o | 27 | obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o |
28 | obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o | 28 | obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o |
29 | obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o | ||
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c new file mode 100644 index 000000000000..eb7f14725ebd --- /dev/null +++ b/drivers/char/hw_random/bcm2835-rng.c | |||
@@ -0,0 +1,113 @@ | |||
1 | /** | ||
2 | * Copyright (c) 2010-2012 Broadcom. All rights reserved. | ||
3 | * Copyright (c) 2013 Lubomir Rintel | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License ("GPL") | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/hw_random.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/io.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/of_address.h> | ||
16 | #include <linux/of_platform.h> | ||
17 | #include <linux/platform_device.h> | ||
18 | #include <linux/printk.h> | ||
19 | |||
20 | #define RNG_CTRL 0x0 | ||
21 | #define RNG_STATUS 0x4 | ||
22 | #define RNG_DATA 0x8 | ||
23 | |||
24 | /* enable rng */ | ||
25 | #define RNG_RBGEN 0x1 | ||
26 | |||
27 | /* the initial numbers generated are "less random" so will be discarded */ | ||
28 | #define RNG_WARMUP_COUNT 0x40000 | ||
29 | |||
30 | static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max, | ||
31 | bool wait) | ||
32 | { | ||
33 | void __iomem *rng_base = (void __iomem *)rng->priv; | ||
34 | |||
35 | while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) { | ||
36 | if (!wait) | ||
37 | return 0; | ||
38 | cpu_relax(); | ||
39 | } | ||
40 | |||
41 | *(u32 *)buf = __raw_readl(rng_base + RNG_DATA); | ||
42 | return sizeof(u32); | ||
43 | } | ||
44 | |||
45 | static struct hwrng bcm2835_rng_ops = { | ||
46 | .name = "bcm2835", | ||
47 | .read = bcm2835_rng_read, | ||
48 | }; | ||
49 | |||
50 | static int bcm2835_rng_probe(struct platform_device *pdev) | ||
51 | { | ||
52 | struct device *dev = &pdev->dev; | ||
53 | struct device_node *np = dev->of_node; | ||
54 | void __iomem *rng_base; | ||
55 | int err; | ||
56 | |||
57 | /* map peripheral */ | ||
58 | rng_base = of_iomap(np, 0); | ||
59 | if (!rng_base) { | ||
60 | dev_err(dev, "failed to remap rng regs"); | ||
61 | return -ENODEV; | ||
62 | } | ||
63 | bcm2835_rng_ops.priv = (unsigned long)rng_base; | ||
64 | |||
65 | /* register driver */ | ||
66 | err = hwrng_register(&bcm2835_rng_ops); | ||
67 | if (err) { | ||
68 | dev_err(dev, "hwrng registration failed\n"); | ||
69 | iounmap(rng_base); | ||
70 | } else { | ||
71 | dev_info(dev, "hwrng registered\n"); | ||
72 | |||
73 | /* set warm-up count & enable */ | ||
74 | __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS); | ||
75 | __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL); | ||
76 | } | ||
77 | return err; | ||
78 | } | ||
79 | |||
80 | static int bcm2835_rng_remove(struct platform_device *pdev) | ||
81 | { | ||
82 | void __iomem *rng_base = (void __iomem *)bcm2835_rng_ops.priv; | ||
83 | |||
84 | /* disable rng hardware */ | ||
85 | __raw_writel(0, rng_base + RNG_CTRL); | ||
86 | |||
87 | /* unregister driver */ | ||
88 | hwrng_unregister(&bcm2835_rng_ops); | ||
89 | iounmap(rng_base); | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static const struct of_device_id bcm2835_rng_of_match[] = { | ||
95 | { .compatible = "brcm,bcm2835-rng", }, | ||
96 | {}, | ||
97 | }; | ||
98 | MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match); | ||
99 | |||
100 | static struct platform_driver bcm2835_rng_driver = { | ||
101 | .driver = { | ||
102 | .name = "bcm2835-rng", | ||
103 | .owner = THIS_MODULE, | ||
104 | .of_match_table = bcm2835_rng_of_match, | ||
105 | }, | ||
106 | .probe = bcm2835_rng_probe, | ||
107 | .remove = bcm2835_rng_remove, | ||
108 | }; | ||
109 | module_platform_driver(bcm2835_rng_driver); | ||
110 | |||
111 | MODULE_AUTHOR("Lubomir Rintel <lkundrak@v3.sk>"); | ||
112 | MODULE_DESCRIPTION("BCM2835 Random Number Generator (RNG) driver"); | ||
113 | MODULE_LICENSE("GPLv2"); | ||
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c index ac47631ab34f..402ccfb625c5 100644 --- a/drivers/char/hw_random/exynos-rng.c +++ b/drivers/char/hw_random/exynos-rng.c | |||
@@ -144,6 +144,7 @@ static int exynos_rng_remove(struct platform_device *pdev) | |||
144 | return 0; | 144 | return 0; |
145 | } | 145 | } |
146 | 146 | ||
147 | #if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME) | ||
147 | static int exynos_rng_runtime_suspend(struct device *dev) | 148 | static int exynos_rng_runtime_suspend(struct device *dev) |
148 | { | 149 | { |
149 | struct platform_device *pdev = to_platform_device(dev); | 150 | struct platform_device *pdev = to_platform_device(dev); |
@@ -161,7 +162,7 @@ static int exynos_rng_runtime_resume(struct device *dev) | |||
161 | 162 | ||
162 | return clk_prepare_enable(exynos_rng->clk); | 163 | return clk_prepare_enable(exynos_rng->clk); |
163 | } | 164 | } |
164 | 165 | #endif | |
165 | 166 | ||
166 | static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend, | 167 | static UNIVERSAL_DEV_PM_OPS(exynos_rng_pm_ops, exynos_rng_runtime_suspend, |
167 | exynos_rng_runtime_resume, NULL); | 168 | exynos_rng_runtime_resume, NULL); |
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 895d0b8fb9ab..4ca35e8a5d8c 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c | |||
@@ -142,7 +142,7 @@ static void mxc_rnga_cleanup(struct hwrng *rng) | |||
142 | static int __init mxc_rnga_probe(struct platform_device *pdev) | 142 | static int __init mxc_rnga_probe(struct platform_device *pdev) |
143 | { | 143 | { |
144 | int err = -ENODEV; | 144 | int err = -ENODEV; |
145 | struct resource *res, *mem; | 145 | struct resource *res; |
146 | struct mxc_rng *mxc_rng; | 146 | struct mxc_rng *mxc_rng; |
147 | 147 | ||
148 | mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng), | 148 | mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng), |
@@ -172,15 +172,9 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) | |||
172 | goto err_region; | 172 | goto err_region; |
173 | } | 173 | } |
174 | 174 | ||
175 | mem = request_mem_region(res->start, resource_size(res), pdev->name); | 175 | mxc_rng->mem = devm_ioremap_resource(&pdev->dev, res); |
176 | if (mem == NULL) { | 176 | if (IS_ERR(mxc_rng->mem)) { |
177 | err = -EBUSY; | 177 | err = PTR_ERR(mxc_rng->mem); |
178 | goto err_region; | ||
179 | } | ||
180 | |||
181 | mxc_rng->mem = ioremap(res->start, resource_size(res)); | ||
182 | if (!mxc_rng->mem) { | ||
183 | err = -ENOMEM; | ||
184 | goto err_ioremap; | 178 | goto err_ioremap; |
185 | } | 179 | } |
186 | 180 | ||
@@ -195,8 +189,6 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) | |||
195 | return 0; | 189 | return 0; |
196 | 190 | ||
197 | err_ioremap: | 191 | err_ioremap: |
198 | release_mem_region(res->start, resource_size(res)); | ||
199 | |||
200 | err_region: | 192 | err_region: |
201 | clk_disable_unprepare(mxc_rng->clk); | 193 | clk_disable_unprepare(mxc_rng->clk); |
202 | 194 | ||
@@ -206,15 +198,10 @@ out: | |||
206 | 198 | ||
207 | static int __exit mxc_rnga_remove(struct platform_device *pdev) | 199 | static int __exit mxc_rnga_remove(struct platform_device *pdev) |
208 | { | 200 | { |
209 | struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
210 | struct mxc_rng *mxc_rng = platform_get_drvdata(pdev); | 201 | struct mxc_rng *mxc_rng = platform_get_drvdata(pdev); |
211 | 202 | ||
212 | hwrng_unregister(&mxc_rng->rng); | 203 | hwrng_unregister(&mxc_rng->rng); |
213 | 204 | ||
214 | iounmap(mxc_rng->mem); | ||
215 | |||
216 | release_mem_region(res->start, resource_size(res)); | ||
217 | |||
218 | clk_disable_unprepare(mxc_rng->clk); | 205 | clk_disable_unprepare(mxc_rng->clk); |
219 | 206 | ||
220 | return 0; | 207 | return 0; |
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 849db199c02c..3e75737f5fe1 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c | |||
@@ -23,127 +23,209 @@ | |||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/platform_device.h> | 25 | #include <linux/platform_device.h> |
26 | #include <linux/of.h> | ||
26 | #include <linux/hw_random.h> | 27 | #include <linux/hw_random.h> |
27 | #include <linux/io.h> | 28 | #include <linux/io.h> |
29 | #include <linux/slab.h> | ||
28 | #include <linux/timeriomem-rng.h> | 30 | #include <linux/timeriomem-rng.h> |
29 | #include <linux/jiffies.h> | 31 | #include <linux/jiffies.h> |
30 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
31 | #include <linux/timer.h> | 33 | #include <linux/timer.h> |
32 | #include <linux/completion.h> | 34 | #include <linux/completion.h> |
33 | 35 | ||
34 | static struct timeriomem_rng_data *timeriomem_rng_data; | 36 | struct timeriomem_rng_private_data { |
37 | void __iomem *io_base; | ||
38 | unsigned int expires; | ||
39 | unsigned int period; | ||
40 | unsigned int present:1; | ||
35 | 41 | ||
36 | static void timeriomem_rng_trigger(unsigned long); | 42 | struct timer_list timer; |
37 | static DEFINE_TIMER(timeriomem_rng_timer, timeriomem_rng_trigger, 0, 0); | 43 | struct completion completion; |
44 | |||
45 | struct hwrng timeriomem_rng_ops; | ||
46 | }; | ||
47 | |||
48 | #define to_rng_priv(rng) \ | ||
49 | ((struct timeriomem_rng_private_data *)rng->priv) | ||
38 | 50 | ||
39 | /* | 51 | /* |
40 | * have data return 1, however return 0 if we have nothing | 52 | * have data return 1, however return 0 if we have nothing |
41 | */ | 53 | */ |
42 | static int timeriomem_rng_data_present(struct hwrng *rng, int wait) | 54 | static int timeriomem_rng_data_present(struct hwrng *rng, int wait) |
43 | { | 55 | { |
44 | if (rng->priv == 0) | 56 | struct timeriomem_rng_private_data *priv = to_rng_priv(rng); |
45 | return 1; | ||
46 | 57 | ||
47 | if (!wait || timeriomem_rng_data->present) | 58 | if (!wait || priv->present) |
48 | return timeriomem_rng_data->present; | 59 | return priv->present; |
49 | 60 | ||
50 | wait_for_completion(&timeriomem_rng_data->completion); | 61 | wait_for_completion(&priv->completion); |
51 | 62 | ||
52 | return 1; | 63 | return 1; |
53 | } | 64 | } |
54 | 65 | ||
55 | static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data) | 66 | static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data) |
56 | { | 67 | { |
68 | struct timeriomem_rng_private_data *priv = to_rng_priv(rng); | ||
57 | unsigned long cur; | 69 | unsigned long cur; |
58 | s32 delay; | 70 | s32 delay; |
59 | 71 | ||
60 | *data = readl(timeriomem_rng_data->address); | 72 | *data = readl(priv->io_base); |
61 | 73 | ||
62 | if (rng->priv != 0) { | 74 | cur = jiffies; |
63 | cur = jiffies; | ||
64 | 75 | ||
65 | delay = cur - timeriomem_rng_timer.expires; | 76 | delay = cur - priv->expires; |
66 | delay = rng->priv - (delay % rng->priv); | 77 | delay = priv->period - (delay % priv->period); |
67 | 78 | ||
68 | timeriomem_rng_timer.expires = cur + delay; | 79 | priv->expires = cur + delay; |
69 | timeriomem_rng_data->present = 0; | 80 | priv->present = 0; |
70 | 81 | ||
71 | init_completion(&timeriomem_rng_data->completion); | 82 | INIT_COMPLETION(priv->completion); |
72 | add_timer(&timeriomem_rng_timer); | 83 | mod_timer(&priv->timer, priv->expires); |
73 | } | ||
74 | 84 | ||
75 | return 4; | 85 | return 4; |
76 | } | 86 | } |
77 | 87 | ||
78 | static void timeriomem_rng_trigger(unsigned long dummy) | 88 | static void timeriomem_rng_trigger(unsigned long data) |
79 | { | 89 | { |
80 | timeriomem_rng_data->present = 1; | 90 | struct timeriomem_rng_private_data *priv |
81 | complete(&timeriomem_rng_data->completion); | 91 | = (struct timeriomem_rng_private_data *)data; |
82 | } | ||
83 | 92 | ||
84 | static struct hwrng timeriomem_rng_ops = { | 93 | priv->present = 1; |
85 | .name = "timeriomem", | 94 | complete(&priv->completion); |
86 | .data_present = timeriomem_rng_data_present, | 95 | } |
87 | .data_read = timeriomem_rng_data_read, | ||
88 | .priv = 0, | ||
89 | }; | ||
90 | 96 | ||
91 | static int timeriomem_rng_probe(struct platform_device *pdev) | 97 | static int timeriomem_rng_probe(struct platform_device *pdev) |
92 | { | 98 | { |
99 | struct timeriomem_rng_data *pdata = pdev->dev.platform_data; | ||
100 | struct timeriomem_rng_private_data *priv; | ||
93 | struct resource *res; | 101 | struct resource *res; |
94 | int ret; | 102 | int err = 0; |
103 | int period; | ||
95 | 104 | ||
96 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | 105 | if (!pdev->dev.of_node && !pdata) { |
106 | dev_err(&pdev->dev, "timeriomem_rng_data is missing\n"); | ||
107 | return -EINVAL; | ||
108 | } | ||
97 | 109 | ||
110 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
98 | if (!res) | 111 | if (!res) |
99 | return -ENOENT; | 112 | return -ENXIO; |
100 | 113 | ||
101 | timeriomem_rng_data = pdev->dev.platform_data; | 114 | if (res->start % 4 != 0 || resource_size(res) != 4) { |
115 | dev_err(&pdev->dev, | ||
116 | "address must be four bytes wide and aligned\n"); | ||
117 | return -EINVAL; | ||
118 | } | ||
102 | 119 | ||
103 | timeriomem_rng_data->address = ioremap(res->start, resource_size(res)); | 120 | /* Allocate memory for the device structure (and zero it) */ |
104 | if (!timeriomem_rng_data->address) | 121 | priv = kzalloc(sizeof(struct timeriomem_rng_private_data), GFP_KERNEL); |
105 | return -EIO; | 122 | if (!priv) { |
123 | dev_err(&pdev->dev, "failed to allocate device structure.\n"); | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | |||
127 | platform_set_drvdata(pdev, priv); | ||
128 | |||
129 | if (pdev->dev.of_node) { | ||
130 | int i; | ||
131 | |||
132 | if (!of_property_read_u32(pdev->dev.of_node, | ||
133 | "period", &i)) | ||
134 | period = i; | ||
135 | else { | ||
136 | dev_err(&pdev->dev, "missing period\n"); | ||
137 | err = -EINVAL; | ||
138 | goto out_free; | ||
139 | } | ||
140 | } else | ||
141 | period = pdata->period; | ||
142 | |||
143 | priv->period = usecs_to_jiffies(period); | ||
144 | if (priv->period < 1) { | ||
145 | dev_err(&pdev->dev, "period is less than one jiffy\n"); | ||
146 | err = -EINVAL; | ||
147 | goto out_free; | ||
148 | } | ||
106 | 149 | ||
107 | if (timeriomem_rng_data->period != 0 | 150 | priv->expires = jiffies; |
108 | && usecs_to_jiffies(timeriomem_rng_data->period) > 0) { | 151 | priv->present = 1; |
109 | timeriomem_rng_timer.expires = jiffies; | ||
110 | 152 | ||
111 | timeriomem_rng_ops.priv = usecs_to_jiffies( | 153 | init_completion(&priv->completion); |
112 | timeriomem_rng_data->period); | 154 | complete(&priv->completion); |
155 | |||
156 | setup_timer(&priv->timer, timeriomem_rng_trigger, (unsigned long)priv); | ||
157 | |||
158 | priv->timeriomem_rng_ops.name = dev_name(&pdev->dev); | ||
159 | priv->timeriomem_rng_ops.data_present = timeriomem_rng_data_present; | ||
160 | priv->timeriomem_rng_ops.data_read = timeriomem_rng_data_read; | ||
161 | priv->timeriomem_rng_ops.priv = (unsigned long)priv; | ||
162 | |||
163 | if (!request_mem_region(res->start, resource_size(res), | ||
164 | dev_name(&pdev->dev))) { | ||
165 | dev_err(&pdev->dev, "request_mem_region failed\n"); | ||
166 | err = -EBUSY; | ||
167 | goto out_timer; | ||
113 | } | 168 | } |
114 | timeriomem_rng_data->present = 1; | ||
115 | 169 | ||
116 | ret = hwrng_register(&timeriomem_rng_ops); | 170 | priv->io_base = ioremap(res->start, resource_size(res)); |
117 | if (ret) | 171 | if (priv->io_base == NULL) { |
118 | goto failed; | 172 | dev_err(&pdev->dev, "ioremap failed\n"); |
173 | err = -EIO; | ||
174 | goto out_release_io; | ||
175 | } | ||
176 | |||
177 | err = hwrng_register(&priv->timeriomem_rng_ops); | ||
178 | if (err) { | ||
179 | dev_err(&pdev->dev, "problem registering\n"); | ||
180 | goto out; | ||
181 | } | ||
119 | 182 | ||
120 | dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", | 183 | dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", |
121 | timeriomem_rng_data->address, | 184 | priv->io_base, period); |
122 | timeriomem_rng_data->period); | ||
123 | 185 | ||
124 | return 0; | 186 | return 0; |
125 | 187 | ||
126 | failed: | 188 | out: |
127 | dev_err(&pdev->dev, "problem registering\n"); | 189 | iounmap(priv->io_base); |
128 | iounmap(timeriomem_rng_data->address); | 190 | out_release_io: |
129 | 191 | release_mem_region(res->start, resource_size(res)); | |
130 | return ret; | 192 | out_timer: |
193 | del_timer_sync(&priv->timer); | ||
194 | out_free: | ||
195 | platform_set_drvdata(pdev, NULL); | ||
196 | kfree(priv); | ||
197 | return err; | ||
131 | } | 198 | } |
132 | 199 | ||
133 | static int timeriomem_rng_remove(struct platform_device *pdev) | 200 | static int timeriomem_rng_remove(struct platform_device *pdev) |
134 | { | 201 | { |
135 | del_timer_sync(&timeriomem_rng_timer); | 202 | struct timeriomem_rng_private_data *priv = platform_get_drvdata(pdev); |
136 | hwrng_unregister(&timeriomem_rng_ops); | 203 | struct resource *res; |
204 | |||
205 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
137 | 206 | ||
138 | iounmap(timeriomem_rng_data->address); | 207 | hwrng_unregister(&priv->timeriomem_rng_ops); |
208 | |||
209 | del_timer_sync(&priv->timer); | ||
210 | iounmap(priv->io_base); | ||
211 | release_mem_region(res->start, resource_size(res)); | ||
212 | platform_set_drvdata(pdev, NULL); | ||
213 | kfree(priv); | ||
139 | 214 | ||
140 | return 0; | 215 | return 0; |
141 | } | 216 | } |
142 | 217 | ||
218 | static const struct of_device_id timeriomem_rng_match[] = { | ||
219 | { .compatible = "timeriomem_rng" }, | ||
220 | {}, | ||
221 | }; | ||
222 | MODULE_DEVICE_TABLE(of, timeriomem_rng_match); | ||
223 | |||
143 | static struct platform_driver timeriomem_rng_driver = { | 224 | static struct platform_driver timeriomem_rng_driver = { |
144 | .driver = { | 225 | .driver = { |
145 | .name = "timeriomem_rng", | 226 | .name = "timeriomem_rng", |
146 | .owner = THIS_MODULE, | 227 | .owner = THIS_MODULE, |
228 | .of_match_table = timeriomem_rng_match, | ||
147 | }, | 229 | }, |
148 | .probe = timeriomem_rng_probe, | 230 | .probe = timeriomem_rng_probe, |
149 | .remove = timeriomem_rng_remove, | 231 | .remove = timeriomem_rng_remove, |
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 87ec4d027c25..dffb85525368 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig | |||
@@ -276,6 +276,16 @@ config CRYPTO_DEV_PICOXCELL | |||
276 | 276 | ||
277 | Saying m here will build a module named pipcoxcell_crypto. | 277 | Saying m here will build a module named pipcoxcell_crypto. |
278 | 278 | ||
279 | config CRYPTO_DEV_SAHARA | ||
280 | tristate "Support for SAHARA crypto accelerator" | ||
281 | depends on ARCH_MXC && EXPERIMENTAL && OF | ||
282 | select CRYPTO_BLKCIPHER | ||
283 | select CRYPTO_AES | ||
284 | select CRYPTO_ECB | ||
285 | help | ||
286 | This option enables support for the SAHARA HW crypto accelerator | ||
287 | found in some Freescale i.MX chips. | ||
288 | |||
279 | config CRYPTO_DEV_S5P | 289 | config CRYPTO_DEV_S5P |
280 | tristate "Support for Samsung S5PV210 crypto accelerator" | 290 | tristate "Support for Samsung S5PV210 crypto accelerator" |
281 | depends on ARCH_S5PV210 | 291 | depends on ARCH_S5PV210 |
@@ -361,15 +371,17 @@ config CRYPTO_DEV_ATMEL_TDES | |||
361 | will be called atmel-tdes. | 371 | will be called atmel-tdes. |
362 | 372 | ||
363 | config CRYPTO_DEV_ATMEL_SHA | 373 | config CRYPTO_DEV_ATMEL_SHA |
364 | tristate "Support for Atmel SHA1/SHA256 hw accelerator" | 374 | tristate "Support for Atmel SHA hw accelerator" |
365 | depends on ARCH_AT91 | 375 | depends on ARCH_AT91 |
366 | select CRYPTO_SHA1 | 376 | select CRYPTO_SHA1 |
367 | select CRYPTO_SHA256 | 377 | select CRYPTO_SHA256 |
378 | select CRYPTO_SHA512 | ||
368 | select CRYPTO_ALGAPI | 379 | select CRYPTO_ALGAPI |
369 | help | 380 | help |
370 | Some Atmel processors have SHA1/SHA256 hw accelerator. | 381 | Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512 |
382 | hw accelerator. | ||
371 | Select this if you want to use the Atmel module for | 383 | Select this if you want to use the Atmel module for |
372 | SHA1/SHA256 algorithms. | 384 | SHA1/SHA224/SHA256/SHA384/SHA512 algorithms. |
373 | 385 | ||
374 | To compile this driver as a module, choose M here: the module | 386 | To compile this driver as a module, choose M here: the module |
375 | will be called atmel-sha. | 387 | will be called atmel-sha. |
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 880a47b0b023..38ce13d3b79b 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile | |||
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ | |||
12 | obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o | 12 | obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o |
13 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o | 13 | obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o |
14 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o | 14 | obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o |
15 | obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o | ||
15 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o | 16 | obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o |
16 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o | 17 | obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o |
17 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ | 18 | obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ |
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 6f22ba51f969..c1efd910d97b 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c | |||
@@ -38,7 +38,7 @@ | |||
38 | #include <crypto/aes.h> | 38 | #include <crypto/aes.h> |
39 | #include <crypto/hash.h> | 39 | #include <crypto/hash.h> |
40 | #include <crypto/internal/hash.h> | 40 | #include <crypto/internal/hash.h> |
41 | #include <linux/platform_data/atmel-aes.h> | 41 | #include <linux/platform_data/crypto-atmel.h> |
42 | #include "atmel-aes-regs.h" | 42 | #include "atmel-aes-regs.h" |
43 | 43 | ||
44 | #define CFB8_BLOCK_SIZE 1 | 44 | #define CFB8_BLOCK_SIZE 1 |
@@ -47,7 +47,7 @@ | |||
47 | #define CFB64_BLOCK_SIZE 8 | 47 | #define CFB64_BLOCK_SIZE 8 |
48 | 48 | ||
49 | /* AES flags */ | 49 | /* AES flags */ |
50 | #define AES_FLAGS_MODE_MASK 0x01ff | 50 | #define AES_FLAGS_MODE_MASK 0x03ff |
51 | #define AES_FLAGS_ENCRYPT BIT(0) | 51 | #define AES_FLAGS_ENCRYPT BIT(0) |
52 | #define AES_FLAGS_CBC BIT(1) | 52 | #define AES_FLAGS_CBC BIT(1) |
53 | #define AES_FLAGS_CFB BIT(2) | 53 | #define AES_FLAGS_CFB BIT(2) |
@@ -55,21 +55,26 @@ | |||
55 | #define AES_FLAGS_CFB16 BIT(4) | 55 | #define AES_FLAGS_CFB16 BIT(4) |
56 | #define AES_FLAGS_CFB32 BIT(5) | 56 | #define AES_FLAGS_CFB32 BIT(5) |
57 | #define AES_FLAGS_CFB64 BIT(6) | 57 | #define AES_FLAGS_CFB64 BIT(6) |
58 | #define AES_FLAGS_OFB BIT(7) | 58 | #define AES_FLAGS_CFB128 BIT(7) |
59 | #define AES_FLAGS_CTR BIT(8) | 59 | #define AES_FLAGS_OFB BIT(8) |
60 | #define AES_FLAGS_CTR BIT(9) | ||
60 | 61 | ||
61 | #define AES_FLAGS_INIT BIT(16) | 62 | #define AES_FLAGS_INIT BIT(16) |
62 | #define AES_FLAGS_DMA BIT(17) | 63 | #define AES_FLAGS_DMA BIT(17) |
63 | #define AES_FLAGS_BUSY BIT(18) | 64 | #define AES_FLAGS_BUSY BIT(18) |
65 | #define AES_FLAGS_FAST BIT(19) | ||
64 | 66 | ||
65 | #define AES_FLAGS_DUALBUFF BIT(24) | 67 | #define ATMEL_AES_QUEUE_LENGTH 50 |
66 | |||
67 | #define ATMEL_AES_QUEUE_LENGTH 1 | ||
68 | #define ATMEL_AES_CACHE_SIZE 0 | ||
69 | 68 | ||
70 | #define ATMEL_AES_DMA_THRESHOLD 16 | 69 | #define ATMEL_AES_DMA_THRESHOLD 16 |
71 | 70 | ||
72 | 71 | ||
72 | struct atmel_aes_caps { | ||
73 | bool has_dualbuff; | ||
74 | bool has_cfb64; | ||
75 | u32 max_burst_size; | ||
76 | }; | ||
77 | |||
73 | struct atmel_aes_dev; | 78 | struct atmel_aes_dev; |
74 | 79 | ||
75 | struct atmel_aes_ctx { | 80 | struct atmel_aes_ctx { |
@@ -77,6 +82,8 @@ struct atmel_aes_ctx { | |||
77 | 82 | ||
78 | int keylen; | 83 | int keylen; |
79 | u32 key[AES_KEYSIZE_256 / sizeof(u32)]; | 84 | u32 key[AES_KEYSIZE_256 / sizeof(u32)]; |
85 | |||
86 | u16 block_size; | ||
80 | }; | 87 | }; |
81 | 88 | ||
82 | struct atmel_aes_reqctx { | 89 | struct atmel_aes_reqctx { |
@@ -112,20 +119,27 @@ struct atmel_aes_dev { | |||
112 | 119 | ||
113 | struct scatterlist *in_sg; | 120 | struct scatterlist *in_sg; |
114 | unsigned int nb_in_sg; | 121 | unsigned int nb_in_sg; |
115 | 122 | size_t in_offset; | |
116 | struct scatterlist *out_sg; | 123 | struct scatterlist *out_sg; |
117 | unsigned int nb_out_sg; | 124 | unsigned int nb_out_sg; |
125 | size_t out_offset; | ||
118 | 126 | ||
119 | size_t bufcnt; | 127 | size_t bufcnt; |
128 | size_t buflen; | ||
129 | size_t dma_size; | ||
120 | 130 | ||
121 | u8 buf_in[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); | 131 | void *buf_in; |
122 | int dma_in; | 132 | int dma_in; |
133 | dma_addr_t dma_addr_in; | ||
123 | struct atmel_aes_dma dma_lch_in; | 134 | struct atmel_aes_dma dma_lch_in; |
124 | 135 | ||
125 | u8 buf_out[ATMEL_AES_DMA_THRESHOLD] __aligned(sizeof(u32)); | 136 | void *buf_out; |
126 | int dma_out; | 137 | int dma_out; |
138 | dma_addr_t dma_addr_out; | ||
127 | struct atmel_aes_dma dma_lch_out; | 139 | struct atmel_aes_dma dma_lch_out; |
128 | 140 | ||
141 | struct atmel_aes_caps caps; | ||
142 | |||
129 | u32 hw_version; | 143 | u32 hw_version; |
130 | }; | 144 | }; |
131 | 145 | ||
@@ -165,6 +179,37 @@ static int atmel_aes_sg_length(struct ablkcipher_request *req, | |||
165 | return sg_nb; | 179 | return sg_nb; |
166 | } | 180 | } |
167 | 181 | ||
182 | static int atmel_aes_sg_copy(struct scatterlist **sg, size_t *offset, | ||
183 | void *buf, size_t buflen, size_t total, int out) | ||
184 | { | ||
185 | unsigned int count, off = 0; | ||
186 | |||
187 | while (buflen && total) { | ||
188 | count = min((*sg)->length - *offset, total); | ||
189 | count = min(count, buflen); | ||
190 | |||
191 | if (!count) | ||
192 | return off; | ||
193 | |||
194 | scatterwalk_map_and_copy(buf + off, *sg, *offset, count, out); | ||
195 | |||
196 | off += count; | ||
197 | buflen -= count; | ||
198 | *offset += count; | ||
199 | total -= count; | ||
200 | |||
201 | if (*offset == (*sg)->length) { | ||
202 | *sg = sg_next(*sg); | ||
203 | if (*sg) | ||
204 | *offset = 0; | ||
205 | else | ||
206 | total = 0; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | return off; | ||
211 | } | ||
212 | |||
168 | static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset) | 213 | static inline u32 atmel_aes_read(struct atmel_aes_dev *dd, u32 offset) |
169 | { | 214 | { |
170 | return readl_relaxed(dd->io_base + offset); | 215 | return readl_relaxed(dd->io_base + offset); |
@@ -190,14 +235,6 @@ static void atmel_aes_write_n(struct atmel_aes_dev *dd, u32 offset, | |||
190 | atmel_aes_write(dd, offset, *value); | 235 | atmel_aes_write(dd, offset, *value); |
191 | } | 236 | } |
192 | 237 | ||
193 | static void atmel_aes_dualbuff_test(struct atmel_aes_dev *dd) | ||
194 | { | ||
195 | atmel_aes_write(dd, AES_MR, AES_MR_DUALBUFF); | ||
196 | |||
197 | if (atmel_aes_read(dd, AES_MR) & AES_MR_DUALBUFF) | ||
198 | dd->flags |= AES_FLAGS_DUALBUFF; | ||
199 | } | ||
200 | |||
201 | static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) | 238 | static struct atmel_aes_dev *atmel_aes_find_dev(struct atmel_aes_ctx *ctx) |
202 | { | 239 | { |
203 | struct atmel_aes_dev *aes_dd = NULL; | 240 | struct atmel_aes_dev *aes_dd = NULL; |
@@ -225,7 +262,7 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd) | |||
225 | 262 | ||
226 | if (!(dd->flags & AES_FLAGS_INIT)) { | 263 | if (!(dd->flags & AES_FLAGS_INIT)) { |
227 | atmel_aes_write(dd, AES_CR, AES_CR_SWRST); | 264 | atmel_aes_write(dd, AES_CR, AES_CR_SWRST); |
228 | atmel_aes_dualbuff_test(dd); | 265 | atmel_aes_write(dd, AES_MR, 0xE << AES_MR_CKEY_OFFSET); |
229 | dd->flags |= AES_FLAGS_INIT; | 266 | dd->flags |= AES_FLAGS_INIT; |
230 | dd->err = 0; | 267 | dd->err = 0; |
231 | } | 268 | } |
@@ -233,11 +270,19 @@ static int atmel_aes_hw_init(struct atmel_aes_dev *dd) | |||
233 | return 0; | 270 | return 0; |
234 | } | 271 | } |
235 | 272 | ||
273 | static inline unsigned int atmel_aes_get_version(struct atmel_aes_dev *dd) | ||
274 | { | ||
275 | return atmel_aes_read(dd, AES_HW_VERSION) & 0x00000fff; | ||
276 | } | ||
277 | |||
236 | static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd) | 278 | static void atmel_aes_hw_version_init(struct atmel_aes_dev *dd) |
237 | { | 279 | { |
238 | atmel_aes_hw_init(dd); | 280 | atmel_aes_hw_init(dd); |
239 | 281 | ||
240 | dd->hw_version = atmel_aes_read(dd, AES_HW_VERSION); | 282 | dd->hw_version = atmel_aes_get_version(dd); |
283 | |||
284 | dev_info(dd->dev, | ||
285 | "version: 0x%x\n", dd->hw_version); | ||
241 | 286 | ||
242 | clk_disable_unprepare(dd->iclk); | 287 | clk_disable_unprepare(dd->iclk); |
243 | } | 288 | } |
@@ -260,50 +305,77 @@ static void atmel_aes_dma_callback(void *data) | |||
260 | tasklet_schedule(&dd->done_task); | 305 | tasklet_schedule(&dd->done_task); |
261 | } | 306 | } |
262 | 307 | ||
263 | static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd) | 308 | static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd, |
309 | dma_addr_t dma_addr_in, dma_addr_t dma_addr_out, int length) | ||
264 | { | 310 | { |
311 | struct scatterlist sg[2]; | ||
265 | struct dma_async_tx_descriptor *in_desc, *out_desc; | 312 | struct dma_async_tx_descriptor *in_desc, *out_desc; |
266 | int nb_dma_sg_in, nb_dma_sg_out; | ||
267 | 313 | ||
268 | dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg); | 314 | dd->dma_size = length; |
269 | if (!dd->nb_in_sg) | ||
270 | goto exit_err; | ||
271 | 315 | ||
272 | nb_dma_sg_in = dma_map_sg(dd->dev, dd->in_sg, dd->nb_in_sg, | 316 | if (!(dd->flags & AES_FLAGS_FAST)) { |
273 | DMA_TO_DEVICE); | 317 | dma_sync_single_for_device(dd->dev, dma_addr_in, length, |
274 | if (!nb_dma_sg_in) | 318 | DMA_TO_DEVICE); |
275 | goto exit_err; | 319 | } |
276 | 320 | ||
277 | in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, dd->in_sg, | 321 | if (dd->flags & AES_FLAGS_CFB8) { |
278 | nb_dma_sg_in, DMA_MEM_TO_DEV, | 322 | dd->dma_lch_in.dma_conf.dst_addr_width = |
279 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 323 | DMA_SLAVE_BUSWIDTH_1_BYTE; |
324 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
325 | DMA_SLAVE_BUSWIDTH_1_BYTE; | ||
326 | } else if (dd->flags & AES_FLAGS_CFB16) { | ||
327 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
328 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
329 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
330 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
331 | } else { | ||
332 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
333 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
334 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
335 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
336 | } | ||
280 | 337 | ||
281 | if (!in_desc) | 338 | if (dd->flags & (AES_FLAGS_CFB8 | AES_FLAGS_CFB16 | |
282 | goto unmap_in; | 339 | AES_FLAGS_CFB32 | AES_FLAGS_CFB64)) { |
340 | dd->dma_lch_in.dma_conf.src_maxburst = 1; | ||
341 | dd->dma_lch_in.dma_conf.dst_maxburst = 1; | ||
342 | dd->dma_lch_out.dma_conf.src_maxburst = 1; | ||
343 | dd->dma_lch_out.dma_conf.dst_maxburst = 1; | ||
344 | } else { | ||
345 | dd->dma_lch_in.dma_conf.src_maxburst = dd->caps.max_burst_size; | ||
346 | dd->dma_lch_in.dma_conf.dst_maxburst = dd->caps.max_burst_size; | ||
347 | dd->dma_lch_out.dma_conf.src_maxburst = dd->caps.max_burst_size; | ||
348 | dd->dma_lch_out.dma_conf.dst_maxburst = dd->caps.max_burst_size; | ||
349 | } | ||
283 | 350 | ||
284 | /* callback not needed */ | 351 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); |
352 | dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf); | ||
285 | 353 | ||
286 | dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg); | 354 | dd->flags |= AES_FLAGS_DMA; |
287 | if (!dd->nb_out_sg) | ||
288 | goto unmap_in; | ||
289 | 355 | ||
290 | nb_dma_sg_out = dma_map_sg(dd->dev, dd->out_sg, dd->nb_out_sg, | 356 | sg_init_table(&sg[0], 1); |
291 | DMA_FROM_DEVICE); | 357 | sg_dma_address(&sg[0]) = dma_addr_in; |
292 | if (!nb_dma_sg_out) | 358 | sg_dma_len(&sg[0]) = length; |
293 | goto unmap_out; | ||
294 | 359 | ||
295 | out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, dd->out_sg, | 360 | sg_init_table(&sg[1], 1); |
296 | nb_dma_sg_out, DMA_DEV_TO_MEM, | 361 | sg_dma_address(&sg[1]) = dma_addr_out; |
297 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 362 | sg_dma_len(&sg[1]) = length; |
363 | |||
364 | in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, &sg[0], | ||
365 | 1, DMA_MEM_TO_DEV, | ||
366 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
367 | if (!in_desc) | ||
368 | return -EINVAL; | ||
298 | 369 | ||
370 | out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, &sg[1], | ||
371 | 1, DMA_DEV_TO_MEM, | ||
372 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
299 | if (!out_desc) | 373 | if (!out_desc) |
300 | goto unmap_out; | 374 | return -EINVAL; |
301 | 375 | ||
302 | out_desc->callback = atmel_aes_dma_callback; | 376 | out_desc->callback = atmel_aes_dma_callback; |
303 | out_desc->callback_param = dd; | 377 | out_desc->callback_param = dd; |
304 | 378 | ||
305 | dd->total -= dd->req->nbytes; | ||
306 | |||
307 | dmaengine_submit(out_desc); | 379 | dmaengine_submit(out_desc); |
308 | dma_async_issue_pending(dd->dma_lch_out.chan); | 380 | dma_async_issue_pending(dd->dma_lch_out.chan); |
309 | 381 | ||
@@ -311,15 +383,6 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd) | |||
311 | dma_async_issue_pending(dd->dma_lch_in.chan); | 383 | dma_async_issue_pending(dd->dma_lch_in.chan); |
312 | 384 | ||
313 | return 0; | 385 | return 0; |
314 | |||
315 | unmap_out: | ||
316 | dma_unmap_sg(dd->dev, dd->out_sg, dd->nb_out_sg, | ||
317 | DMA_FROM_DEVICE); | ||
318 | unmap_in: | ||
319 | dma_unmap_sg(dd->dev, dd->in_sg, dd->nb_in_sg, | ||
320 | DMA_TO_DEVICE); | ||
321 | exit_err: | ||
322 | return -EINVAL; | ||
323 | } | 386 | } |
324 | 387 | ||
325 | static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) | 388 | static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) |
@@ -352,30 +415,66 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) | |||
352 | 415 | ||
353 | static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) | 416 | static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd) |
354 | { | 417 | { |
355 | int err; | 418 | int err, fast = 0, in, out; |
419 | size_t count; | ||
420 | dma_addr_t addr_in, addr_out; | ||
421 | |||
422 | if ((!dd->in_offset) && (!dd->out_offset)) { | ||
423 | /* check for alignment */ | ||
424 | in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)) && | ||
425 | IS_ALIGNED(dd->in_sg->length, dd->ctx->block_size); | ||
426 | out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)) && | ||
427 | IS_ALIGNED(dd->out_sg->length, dd->ctx->block_size); | ||
428 | fast = in && out; | ||
429 | |||
430 | if (sg_dma_len(dd->in_sg) != sg_dma_len(dd->out_sg)) | ||
431 | fast = 0; | ||
432 | } | ||
433 | |||
434 | |||
435 | if (fast) { | ||
436 | count = min(dd->total, sg_dma_len(dd->in_sg)); | ||
437 | count = min(count, sg_dma_len(dd->out_sg)); | ||
438 | |||
439 | err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | ||
440 | if (!err) { | ||
441 | dev_err(dd->dev, "dma_map_sg() error\n"); | ||
442 | return -EINVAL; | ||
443 | } | ||
444 | |||
445 | err = dma_map_sg(dd->dev, dd->out_sg, 1, | ||
446 | DMA_FROM_DEVICE); | ||
447 | if (!err) { | ||
448 | dev_err(dd->dev, "dma_map_sg() error\n"); | ||
449 | dma_unmap_sg(dd->dev, dd->in_sg, 1, | ||
450 | DMA_TO_DEVICE); | ||
451 | return -EINVAL; | ||
452 | } | ||
453 | |||
454 | addr_in = sg_dma_address(dd->in_sg); | ||
455 | addr_out = sg_dma_address(dd->out_sg); | ||
456 | |||
457 | dd->flags |= AES_FLAGS_FAST; | ||
356 | 458 | ||
357 | if (dd->flags & AES_FLAGS_CFB8) { | ||
358 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
359 | DMA_SLAVE_BUSWIDTH_1_BYTE; | ||
360 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
361 | DMA_SLAVE_BUSWIDTH_1_BYTE; | ||
362 | } else if (dd->flags & AES_FLAGS_CFB16) { | ||
363 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
364 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
365 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
366 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
367 | } else { | 459 | } else { |
368 | dd->dma_lch_in.dma_conf.dst_addr_width = | 460 | /* use cache buffers */ |
369 | DMA_SLAVE_BUSWIDTH_4_BYTES; | 461 | count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset, |
370 | dd->dma_lch_out.dma_conf.src_addr_width = | 462 | dd->buf_in, dd->buflen, dd->total, 0); |
371 | DMA_SLAVE_BUSWIDTH_4_BYTES; | 463 | |
464 | addr_in = dd->dma_addr_in; | ||
465 | addr_out = dd->dma_addr_out; | ||
466 | |||
467 | dd->flags &= ~AES_FLAGS_FAST; | ||
372 | } | 468 | } |
373 | 469 | ||
374 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); | 470 | dd->total -= count; |
375 | dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf); | ||
376 | 471 | ||
377 | dd->flags |= AES_FLAGS_DMA; | 472 | err = atmel_aes_crypt_dma(dd, addr_in, addr_out, count); |
378 | err = atmel_aes_crypt_dma(dd); | 473 | |
474 | if (err && (dd->flags & AES_FLAGS_FAST)) { | ||
475 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | ||
476 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); | ||
477 | } | ||
379 | 478 | ||
380 | return err; | 479 | return err; |
381 | } | 480 | } |
@@ -410,6 +509,8 @@ static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd) | |||
410 | valmr |= AES_MR_CFBS_32b; | 509 | valmr |= AES_MR_CFBS_32b; |
411 | else if (dd->flags & AES_FLAGS_CFB64) | 510 | else if (dd->flags & AES_FLAGS_CFB64) |
412 | valmr |= AES_MR_CFBS_64b; | 511 | valmr |= AES_MR_CFBS_64b; |
512 | else if (dd->flags & AES_FLAGS_CFB128) | ||
513 | valmr |= AES_MR_CFBS_128b; | ||
413 | } else if (dd->flags & AES_FLAGS_OFB) { | 514 | } else if (dd->flags & AES_FLAGS_OFB) { |
414 | valmr |= AES_MR_OPMOD_OFB; | 515 | valmr |= AES_MR_OPMOD_OFB; |
415 | } else if (dd->flags & AES_FLAGS_CTR) { | 516 | } else if (dd->flags & AES_FLAGS_CTR) { |
@@ -423,7 +524,7 @@ static int atmel_aes_write_ctrl(struct atmel_aes_dev *dd) | |||
423 | 524 | ||
424 | if (dd->total > ATMEL_AES_DMA_THRESHOLD) { | 525 | if (dd->total > ATMEL_AES_DMA_THRESHOLD) { |
425 | valmr |= AES_MR_SMOD_IDATAR0; | 526 | valmr |= AES_MR_SMOD_IDATAR0; |
426 | if (dd->flags & AES_FLAGS_DUALBUFF) | 527 | if (dd->caps.has_dualbuff) |
427 | valmr |= AES_MR_DUALBUFF; | 528 | valmr |= AES_MR_DUALBUFF; |
428 | } else { | 529 | } else { |
429 | valmr |= AES_MR_SMOD_AUTO; | 530 | valmr |= AES_MR_SMOD_AUTO; |
@@ -477,7 +578,9 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, | |||
477 | /* assign new request to device */ | 578 | /* assign new request to device */ |
478 | dd->req = req; | 579 | dd->req = req; |
479 | dd->total = req->nbytes; | 580 | dd->total = req->nbytes; |
581 | dd->in_offset = 0; | ||
480 | dd->in_sg = req->src; | 582 | dd->in_sg = req->src; |
583 | dd->out_offset = 0; | ||
481 | dd->out_sg = req->dst; | 584 | dd->out_sg = req->dst; |
482 | 585 | ||
483 | rctx = ablkcipher_request_ctx(req); | 586 | rctx = ablkcipher_request_ctx(req); |
@@ -506,18 +609,86 @@ static int atmel_aes_handle_queue(struct atmel_aes_dev *dd, | |||
506 | static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) | 609 | static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd) |
507 | { | 610 | { |
508 | int err = -EINVAL; | 611 | int err = -EINVAL; |
612 | size_t count; | ||
509 | 613 | ||
510 | if (dd->flags & AES_FLAGS_DMA) { | 614 | if (dd->flags & AES_FLAGS_DMA) { |
511 | dma_unmap_sg(dd->dev, dd->out_sg, | ||
512 | dd->nb_out_sg, DMA_FROM_DEVICE); | ||
513 | dma_unmap_sg(dd->dev, dd->in_sg, | ||
514 | dd->nb_in_sg, DMA_TO_DEVICE); | ||
515 | err = 0; | 615 | err = 0; |
616 | if (dd->flags & AES_FLAGS_FAST) { | ||
617 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); | ||
618 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | ||
619 | } else { | ||
620 | dma_sync_single_for_device(dd->dev, dd->dma_addr_out, | ||
621 | dd->dma_size, DMA_FROM_DEVICE); | ||
622 | |||
623 | /* copy data */ | ||
624 | count = atmel_aes_sg_copy(&dd->out_sg, &dd->out_offset, | ||
625 | dd->buf_out, dd->buflen, dd->dma_size, 1); | ||
626 | if (count != dd->dma_size) { | ||
627 | err = -EINVAL; | ||
628 | pr_err("not all data converted: %u\n", count); | ||
629 | } | ||
630 | } | ||
516 | } | 631 | } |
517 | 632 | ||
518 | return err; | 633 | return err; |
519 | } | 634 | } |
520 | 635 | ||
636 | |||
637 | static int atmel_aes_buff_init(struct atmel_aes_dev *dd) | ||
638 | { | ||
639 | int err = -ENOMEM; | ||
640 | |||
641 | dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, 0); | ||
642 | dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, 0); | ||
643 | dd->buflen = PAGE_SIZE; | ||
644 | dd->buflen &= ~(AES_BLOCK_SIZE - 1); | ||
645 | |||
646 | if (!dd->buf_in || !dd->buf_out) { | ||
647 | dev_err(dd->dev, "unable to alloc pages.\n"); | ||
648 | goto err_alloc; | ||
649 | } | ||
650 | |||
651 | /* MAP here */ | ||
652 | dd->dma_addr_in = dma_map_single(dd->dev, dd->buf_in, | ||
653 | dd->buflen, DMA_TO_DEVICE); | ||
654 | if (dma_mapping_error(dd->dev, dd->dma_addr_in)) { | ||
655 | dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); | ||
656 | err = -EINVAL; | ||
657 | goto err_map_in; | ||
658 | } | ||
659 | |||
660 | dd->dma_addr_out = dma_map_single(dd->dev, dd->buf_out, | ||
661 | dd->buflen, DMA_FROM_DEVICE); | ||
662 | if (dma_mapping_error(dd->dev, dd->dma_addr_out)) { | ||
663 | dev_err(dd->dev, "dma %d bytes error\n", dd->buflen); | ||
664 | err = -EINVAL; | ||
665 | goto err_map_out; | ||
666 | } | ||
667 | |||
668 | return 0; | ||
669 | |||
670 | err_map_out: | ||
671 | dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, | ||
672 | DMA_TO_DEVICE); | ||
673 | err_map_in: | ||
674 | free_page((unsigned long)dd->buf_out); | ||
675 | free_page((unsigned long)dd->buf_in); | ||
676 | err_alloc: | ||
677 | if (err) | ||
678 | pr_err("error: %d\n", err); | ||
679 | return err; | ||
680 | } | ||
681 | |||
682 | static void atmel_aes_buff_cleanup(struct atmel_aes_dev *dd) | ||
683 | { | ||
684 | dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, | ||
685 | DMA_FROM_DEVICE); | ||
686 | dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, | ||
687 | DMA_TO_DEVICE); | ||
688 | free_page((unsigned long)dd->buf_out); | ||
689 | free_page((unsigned long)dd->buf_in); | ||
690 | } | ||
691 | |||
521 | static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode) | 692 | static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode) |
522 | { | 693 | { |
523 | struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx( | 694 | struct atmel_aes_ctx *ctx = crypto_ablkcipher_ctx( |
@@ -525,9 +696,30 @@ static int atmel_aes_crypt(struct ablkcipher_request *req, unsigned long mode) | |||
525 | struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req); | 696 | struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req); |
526 | struct atmel_aes_dev *dd; | 697 | struct atmel_aes_dev *dd; |
527 | 698 | ||
528 | if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { | 699 | if (mode & AES_FLAGS_CFB8) { |
529 | pr_err("request size is not exact amount of AES blocks\n"); | 700 | if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) { |
530 | return -EINVAL; | 701 | pr_err("request size is not exact amount of CFB8 blocks\n"); |
702 | return -EINVAL; | ||
703 | } | ||
704 | ctx->block_size = CFB8_BLOCK_SIZE; | ||
705 | } else if (mode & AES_FLAGS_CFB16) { | ||
706 | if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) { | ||
707 | pr_err("request size is not exact amount of CFB16 blocks\n"); | ||
708 | return -EINVAL; | ||
709 | } | ||
710 | ctx->block_size = CFB16_BLOCK_SIZE; | ||
711 | } else if (mode & AES_FLAGS_CFB32) { | ||
712 | if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) { | ||
713 | pr_err("request size is not exact amount of CFB32 blocks\n"); | ||
714 | return -EINVAL; | ||
715 | } | ||
716 | ctx->block_size = CFB32_BLOCK_SIZE; | ||
717 | } else { | ||
718 | if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { | ||
719 | pr_err("request size is not exact amount of AES blocks\n"); | ||
720 | return -EINVAL; | ||
721 | } | ||
722 | ctx->block_size = AES_BLOCK_SIZE; | ||
531 | } | 723 | } |
532 | 724 | ||
533 | dd = atmel_aes_find_dev(ctx); | 725 | dd = atmel_aes_find_dev(ctx); |
@@ -551,14 +743,12 @@ static bool atmel_aes_filter(struct dma_chan *chan, void *slave) | |||
551 | } | 743 | } |
552 | } | 744 | } |
553 | 745 | ||
554 | static int atmel_aes_dma_init(struct atmel_aes_dev *dd) | 746 | static int atmel_aes_dma_init(struct atmel_aes_dev *dd, |
747 | struct crypto_platform_data *pdata) | ||
555 | { | 748 | { |
556 | int err = -ENOMEM; | 749 | int err = -ENOMEM; |
557 | struct aes_platform_data *pdata; | ||
558 | dma_cap_mask_t mask_in, mask_out; | 750 | dma_cap_mask_t mask_in, mask_out; |
559 | 751 | ||
560 | pdata = dd->dev->platform_data; | ||
561 | |||
562 | if (pdata && pdata->dma_slave->txdata.dma_dev && | 752 | if (pdata && pdata->dma_slave->txdata.dma_dev && |
563 | pdata->dma_slave->rxdata.dma_dev) { | 753 | pdata->dma_slave->rxdata.dma_dev) { |
564 | 754 | ||
@@ -568,28 +758,38 @@ static int atmel_aes_dma_init(struct atmel_aes_dev *dd) | |||
568 | 758 | ||
569 | dd->dma_lch_in.chan = dma_request_channel(mask_in, | 759 | dd->dma_lch_in.chan = dma_request_channel(mask_in, |
570 | atmel_aes_filter, &pdata->dma_slave->rxdata); | 760 | atmel_aes_filter, &pdata->dma_slave->rxdata); |
761 | |||
571 | if (!dd->dma_lch_in.chan) | 762 | if (!dd->dma_lch_in.chan) |
572 | goto err_dma_in; | 763 | goto err_dma_in; |
573 | 764 | ||
574 | dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; | 765 | dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; |
575 | dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + | 766 | dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + |
576 | AES_IDATAR(0); | 767 | AES_IDATAR(0); |
577 | dd->dma_lch_in.dma_conf.src_maxburst = 1; | 768 | dd->dma_lch_in.dma_conf.src_maxburst = dd->caps.max_burst_size; |
578 | dd->dma_lch_in.dma_conf.dst_maxburst = 1; | 769 | dd->dma_lch_in.dma_conf.src_addr_width = |
770 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
771 | dd->dma_lch_in.dma_conf.dst_maxburst = dd->caps.max_burst_size; | ||
772 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
773 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
579 | dd->dma_lch_in.dma_conf.device_fc = false; | 774 | dd->dma_lch_in.dma_conf.device_fc = false; |
580 | 775 | ||
581 | dma_cap_zero(mask_out); | 776 | dma_cap_zero(mask_out); |
582 | dma_cap_set(DMA_SLAVE, mask_out); | 777 | dma_cap_set(DMA_SLAVE, mask_out); |
583 | dd->dma_lch_out.chan = dma_request_channel(mask_out, | 778 | dd->dma_lch_out.chan = dma_request_channel(mask_out, |
584 | atmel_aes_filter, &pdata->dma_slave->txdata); | 779 | atmel_aes_filter, &pdata->dma_slave->txdata); |
780 | |||
585 | if (!dd->dma_lch_out.chan) | 781 | if (!dd->dma_lch_out.chan) |
586 | goto err_dma_out; | 782 | goto err_dma_out; |
587 | 783 | ||
588 | dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM; | 784 | dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM; |
589 | dd->dma_lch_out.dma_conf.src_addr = dd->phys_base + | 785 | dd->dma_lch_out.dma_conf.src_addr = dd->phys_base + |
590 | AES_ODATAR(0); | 786 | AES_ODATAR(0); |
591 | dd->dma_lch_out.dma_conf.src_maxburst = 1; | 787 | dd->dma_lch_out.dma_conf.src_maxburst = dd->caps.max_burst_size; |
592 | dd->dma_lch_out.dma_conf.dst_maxburst = 1; | 788 | dd->dma_lch_out.dma_conf.src_addr_width = |
789 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
790 | dd->dma_lch_out.dma_conf.dst_maxburst = dd->caps.max_burst_size; | ||
791 | dd->dma_lch_out.dma_conf.dst_addr_width = | ||
792 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
593 | dd->dma_lch_out.dma_conf.device_fc = false; | 793 | dd->dma_lch_out.dma_conf.device_fc = false; |
594 | 794 | ||
595 | return 0; | 795 | return 0; |
@@ -665,13 +865,13 @@ static int atmel_aes_ofb_decrypt(struct ablkcipher_request *req) | |||
665 | static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req) | 865 | static int atmel_aes_cfb_encrypt(struct ablkcipher_request *req) |
666 | { | 866 | { |
667 | return atmel_aes_crypt(req, | 867 | return atmel_aes_crypt(req, |
668 | AES_FLAGS_ENCRYPT | AES_FLAGS_CFB); | 868 | AES_FLAGS_ENCRYPT | AES_FLAGS_CFB | AES_FLAGS_CFB128); |
669 | } | 869 | } |
670 | 870 | ||
671 | static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req) | 871 | static int atmel_aes_cfb_decrypt(struct ablkcipher_request *req) |
672 | { | 872 | { |
673 | return atmel_aes_crypt(req, | 873 | return atmel_aes_crypt(req, |
674 | AES_FLAGS_CFB); | 874 | AES_FLAGS_CFB | AES_FLAGS_CFB128); |
675 | } | 875 | } |
676 | 876 | ||
677 | static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req) | 877 | static int atmel_aes_cfb64_encrypt(struct ablkcipher_request *req) |
@@ -753,7 +953,7 @@ static struct crypto_alg aes_algs[] = { | |||
753 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 953 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
754 | .cra_blocksize = AES_BLOCK_SIZE, | 954 | .cra_blocksize = AES_BLOCK_SIZE, |
755 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 955 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
756 | .cra_alignmask = 0x0, | 956 | .cra_alignmask = 0xf, |
757 | .cra_type = &crypto_ablkcipher_type, | 957 | .cra_type = &crypto_ablkcipher_type, |
758 | .cra_module = THIS_MODULE, | 958 | .cra_module = THIS_MODULE, |
759 | .cra_init = atmel_aes_cra_init, | 959 | .cra_init = atmel_aes_cra_init, |
@@ -773,7 +973,7 @@ static struct crypto_alg aes_algs[] = { | |||
773 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 973 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
774 | .cra_blocksize = AES_BLOCK_SIZE, | 974 | .cra_blocksize = AES_BLOCK_SIZE, |
775 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 975 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
776 | .cra_alignmask = 0x0, | 976 | .cra_alignmask = 0xf, |
777 | .cra_type = &crypto_ablkcipher_type, | 977 | .cra_type = &crypto_ablkcipher_type, |
778 | .cra_module = THIS_MODULE, | 978 | .cra_module = THIS_MODULE, |
779 | .cra_init = atmel_aes_cra_init, | 979 | .cra_init = atmel_aes_cra_init, |
@@ -794,7 +994,7 @@ static struct crypto_alg aes_algs[] = { | |||
794 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 994 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
795 | .cra_blocksize = AES_BLOCK_SIZE, | 995 | .cra_blocksize = AES_BLOCK_SIZE, |
796 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 996 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
797 | .cra_alignmask = 0x0, | 997 | .cra_alignmask = 0xf, |
798 | .cra_type = &crypto_ablkcipher_type, | 998 | .cra_type = &crypto_ablkcipher_type, |
799 | .cra_module = THIS_MODULE, | 999 | .cra_module = THIS_MODULE, |
800 | .cra_init = atmel_aes_cra_init, | 1000 | .cra_init = atmel_aes_cra_init, |
@@ -815,7 +1015,7 @@ static struct crypto_alg aes_algs[] = { | |||
815 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1015 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
816 | .cra_blocksize = AES_BLOCK_SIZE, | 1016 | .cra_blocksize = AES_BLOCK_SIZE, |
817 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 1017 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
818 | .cra_alignmask = 0x0, | 1018 | .cra_alignmask = 0xf, |
819 | .cra_type = &crypto_ablkcipher_type, | 1019 | .cra_type = &crypto_ablkcipher_type, |
820 | .cra_module = THIS_MODULE, | 1020 | .cra_module = THIS_MODULE, |
821 | .cra_init = atmel_aes_cra_init, | 1021 | .cra_init = atmel_aes_cra_init, |
@@ -836,7 +1036,7 @@ static struct crypto_alg aes_algs[] = { | |||
836 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1036 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
837 | .cra_blocksize = CFB32_BLOCK_SIZE, | 1037 | .cra_blocksize = CFB32_BLOCK_SIZE, |
838 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 1038 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
839 | .cra_alignmask = 0x0, | 1039 | .cra_alignmask = 0x3, |
840 | .cra_type = &crypto_ablkcipher_type, | 1040 | .cra_type = &crypto_ablkcipher_type, |
841 | .cra_module = THIS_MODULE, | 1041 | .cra_module = THIS_MODULE, |
842 | .cra_init = atmel_aes_cra_init, | 1042 | .cra_init = atmel_aes_cra_init, |
@@ -857,7 +1057,7 @@ static struct crypto_alg aes_algs[] = { | |||
857 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1057 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
858 | .cra_blocksize = CFB16_BLOCK_SIZE, | 1058 | .cra_blocksize = CFB16_BLOCK_SIZE, |
859 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 1059 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
860 | .cra_alignmask = 0x0, | 1060 | .cra_alignmask = 0x1, |
861 | .cra_type = &crypto_ablkcipher_type, | 1061 | .cra_type = &crypto_ablkcipher_type, |
862 | .cra_module = THIS_MODULE, | 1062 | .cra_module = THIS_MODULE, |
863 | .cra_init = atmel_aes_cra_init, | 1063 | .cra_init = atmel_aes_cra_init, |
@@ -899,7 +1099,7 @@ static struct crypto_alg aes_algs[] = { | |||
899 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1099 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
900 | .cra_blocksize = AES_BLOCK_SIZE, | 1100 | .cra_blocksize = AES_BLOCK_SIZE, |
901 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 1101 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
902 | .cra_alignmask = 0x0, | 1102 | .cra_alignmask = 0xf, |
903 | .cra_type = &crypto_ablkcipher_type, | 1103 | .cra_type = &crypto_ablkcipher_type, |
904 | .cra_module = THIS_MODULE, | 1104 | .cra_module = THIS_MODULE, |
905 | .cra_init = atmel_aes_cra_init, | 1105 | .cra_init = atmel_aes_cra_init, |
@@ -915,15 +1115,14 @@ static struct crypto_alg aes_algs[] = { | |||
915 | }, | 1115 | }, |
916 | }; | 1116 | }; |
917 | 1117 | ||
918 | static struct crypto_alg aes_cfb64_alg[] = { | 1118 | static struct crypto_alg aes_cfb64_alg = { |
919 | { | ||
920 | .cra_name = "cfb64(aes)", | 1119 | .cra_name = "cfb64(aes)", |
921 | .cra_driver_name = "atmel-cfb64-aes", | 1120 | .cra_driver_name = "atmel-cfb64-aes", |
922 | .cra_priority = 100, | 1121 | .cra_priority = 100, |
923 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1122 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
924 | .cra_blocksize = CFB64_BLOCK_SIZE, | 1123 | .cra_blocksize = CFB64_BLOCK_SIZE, |
925 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), | 1124 | .cra_ctxsize = sizeof(struct atmel_aes_ctx), |
926 | .cra_alignmask = 0x0, | 1125 | .cra_alignmask = 0x7, |
927 | .cra_type = &crypto_ablkcipher_type, | 1126 | .cra_type = &crypto_ablkcipher_type, |
928 | .cra_module = THIS_MODULE, | 1127 | .cra_module = THIS_MODULE, |
929 | .cra_init = atmel_aes_cra_init, | 1128 | .cra_init = atmel_aes_cra_init, |
@@ -936,7 +1135,6 @@ static struct crypto_alg aes_cfb64_alg[] = { | |||
936 | .encrypt = atmel_aes_cfb64_encrypt, | 1135 | .encrypt = atmel_aes_cfb64_encrypt, |
937 | .decrypt = atmel_aes_cfb64_decrypt, | 1136 | .decrypt = atmel_aes_cfb64_decrypt, |
938 | } | 1137 | } |
939 | }, | ||
940 | }; | 1138 | }; |
941 | 1139 | ||
942 | static void atmel_aes_queue_task(unsigned long data) | 1140 | static void atmel_aes_queue_task(unsigned long data) |
@@ -969,7 +1167,14 @@ static void atmel_aes_done_task(unsigned long data) | |||
969 | err = dd->err ? : err; | 1167 | err = dd->err ? : err; |
970 | 1168 | ||
971 | if (dd->total && !err) { | 1169 | if (dd->total && !err) { |
972 | err = atmel_aes_crypt_dma_start(dd); | 1170 | if (dd->flags & AES_FLAGS_FAST) { |
1171 | dd->in_sg = sg_next(dd->in_sg); | ||
1172 | dd->out_sg = sg_next(dd->out_sg); | ||
1173 | if (!dd->in_sg || !dd->out_sg) | ||
1174 | err = -EINVAL; | ||
1175 | } | ||
1176 | if (!err) | ||
1177 | err = atmel_aes_crypt_dma_start(dd); | ||
973 | if (!err) | 1178 | if (!err) |
974 | return; /* DMA started. Not fininishing. */ | 1179 | return; /* DMA started. Not fininishing. */ |
975 | } | 1180 | } |
@@ -1003,8 +1208,8 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) | |||
1003 | 1208 | ||
1004 | for (i = 0; i < ARRAY_SIZE(aes_algs); i++) | 1209 | for (i = 0; i < ARRAY_SIZE(aes_algs); i++) |
1005 | crypto_unregister_alg(&aes_algs[i]); | 1210 | crypto_unregister_alg(&aes_algs[i]); |
1006 | if (dd->hw_version >= 0x130) | 1211 | if (dd->caps.has_cfb64) |
1007 | crypto_unregister_alg(&aes_cfb64_alg[0]); | 1212 | crypto_unregister_alg(&aes_cfb64_alg); |
1008 | } | 1213 | } |
1009 | 1214 | ||
1010 | static int atmel_aes_register_algs(struct atmel_aes_dev *dd) | 1215 | static int atmel_aes_register_algs(struct atmel_aes_dev *dd) |
@@ -1017,10 +1222,8 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd) | |||
1017 | goto err_aes_algs; | 1222 | goto err_aes_algs; |
1018 | } | 1223 | } |
1019 | 1224 | ||
1020 | atmel_aes_hw_version_init(dd); | 1225 | if (dd->caps.has_cfb64) { |
1021 | 1226 | err = crypto_register_alg(&aes_cfb64_alg); | |
1022 | if (dd->hw_version >= 0x130) { | ||
1023 | err = crypto_register_alg(&aes_cfb64_alg[0]); | ||
1024 | if (err) | 1227 | if (err) |
1025 | goto err_aes_cfb64_alg; | 1228 | goto err_aes_cfb64_alg; |
1026 | } | 1229 | } |
@@ -1036,10 +1239,32 @@ err_aes_algs: | |||
1036 | return err; | 1239 | return err; |
1037 | } | 1240 | } |
1038 | 1241 | ||
1242 | static void atmel_aes_get_cap(struct atmel_aes_dev *dd) | ||
1243 | { | ||
1244 | dd->caps.has_dualbuff = 0; | ||
1245 | dd->caps.has_cfb64 = 0; | ||
1246 | dd->caps.max_burst_size = 1; | ||
1247 | |||
1248 | /* keep only major version number */ | ||
1249 | switch (dd->hw_version & 0xff0) { | ||
1250 | case 0x130: | ||
1251 | dd->caps.has_dualbuff = 1; | ||
1252 | dd->caps.has_cfb64 = 1; | ||
1253 | dd->caps.max_burst_size = 4; | ||
1254 | break; | ||
1255 | case 0x120: | ||
1256 | break; | ||
1257 | default: | ||
1258 | dev_warn(dd->dev, | ||
1259 | "Unmanaged aes version, set minimum capabilities\n"); | ||
1260 | break; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1039 | static int atmel_aes_probe(struct platform_device *pdev) | 1264 | static int atmel_aes_probe(struct platform_device *pdev) |
1040 | { | 1265 | { |
1041 | struct atmel_aes_dev *aes_dd; | 1266 | struct atmel_aes_dev *aes_dd; |
1042 | struct aes_platform_data *pdata; | 1267 | struct crypto_platform_data *pdata; |
1043 | struct device *dev = &pdev->dev; | 1268 | struct device *dev = &pdev->dev; |
1044 | struct resource *aes_res; | 1269 | struct resource *aes_res; |
1045 | unsigned long aes_phys_size; | 1270 | unsigned long aes_phys_size; |
@@ -1099,7 +1324,7 @@ static int atmel_aes_probe(struct platform_device *pdev) | |||
1099 | } | 1324 | } |
1100 | 1325 | ||
1101 | /* Initializing the clock */ | 1326 | /* Initializing the clock */ |
1102 | aes_dd->iclk = clk_get(&pdev->dev, NULL); | 1327 | aes_dd->iclk = clk_get(&pdev->dev, "aes_clk"); |
1103 | if (IS_ERR(aes_dd->iclk)) { | 1328 | if (IS_ERR(aes_dd->iclk)) { |
1104 | dev_err(dev, "clock intialization failed.\n"); | 1329 | dev_err(dev, "clock intialization failed.\n"); |
1105 | err = PTR_ERR(aes_dd->iclk); | 1330 | err = PTR_ERR(aes_dd->iclk); |
@@ -1113,7 +1338,15 @@ static int atmel_aes_probe(struct platform_device *pdev) | |||
1113 | goto aes_io_err; | 1338 | goto aes_io_err; |
1114 | } | 1339 | } |
1115 | 1340 | ||
1116 | err = atmel_aes_dma_init(aes_dd); | 1341 | atmel_aes_hw_version_init(aes_dd); |
1342 | |||
1343 | atmel_aes_get_cap(aes_dd); | ||
1344 | |||
1345 | err = atmel_aes_buff_init(aes_dd); | ||
1346 | if (err) | ||
1347 | goto err_aes_buff; | ||
1348 | |||
1349 | err = atmel_aes_dma_init(aes_dd, pdata); | ||
1117 | if (err) | 1350 | if (err) |
1118 | goto err_aes_dma; | 1351 | goto err_aes_dma; |
1119 | 1352 | ||
@@ -1135,6 +1368,8 @@ err_algs: | |||
1135 | spin_unlock(&atmel_aes.lock); | 1368 | spin_unlock(&atmel_aes.lock); |
1136 | atmel_aes_dma_cleanup(aes_dd); | 1369 | atmel_aes_dma_cleanup(aes_dd); |
1137 | err_aes_dma: | 1370 | err_aes_dma: |
1371 | atmel_aes_buff_cleanup(aes_dd); | ||
1372 | err_aes_buff: | ||
1138 | iounmap(aes_dd->io_base); | 1373 | iounmap(aes_dd->io_base); |
1139 | aes_io_err: | 1374 | aes_io_err: |
1140 | clk_put(aes_dd->iclk); | 1375 | clk_put(aes_dd->iclk); |
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h index dc53a20d7da1..83b2d7425666 100644 --- a/drivers/crypto/atmel-sha-regs.h +++ b/drivers/crypto/atmel-sha-regs.h | |||
@@ -14,10 +14,13 @@ | |||
14 | #define SHA_MR_MODE_MANUAL 0x0 | 14 | #define SHA_MR_MODE_MANUAL 0x0 |
15 | #define SHA_MR_MODE_AUTO 0x1 | 15 | #define SHA_MR_MODE_AUTO 0x1 |
16 | #define SHA_MR_MODE_PDC 0x2 | 16 | #define SHA_MR_MODE_PDC 0x2 |
17 | #define SHA_MR_DUALBUFF (1 << 3) | ||
18 | #define SHA_MR_PROCDLY (1 << 4) | 17 | #define SHA_MR_PROCDLY (1 << 4) |
19 | #define SHA_MR_ALGO_SHA1 (0 << 8) | 18 | #define SHA_MR_ALGO_SHA1 (0 << 8) |
20 | #define SHA_MR_ALGO_SHA256 (1 << 8) | 19 | #define SHA_MR_ALGO_SHA256 (1 << 8) |
20 | #define SHA_MR_ALGO_SHA384 (2 << 8) | ||
21 | #define SHA_MR_ALGO_SHA512 (3 << 8) | ||
22 | #define SHA_MR_ALGO_SHA224 (4 << 8) | ||
23 | #define SHA_MR_DUALBUFF (1 << 16) | ||
21 | 24 | ||
22 | #define SHA_IER 0x10 | 25 | #define SHA_IER 0x10 |
23 | #define SHA_IDR 0x14 | 26 | #define SHA_IDR 0x14 |
@@ -33,6 +36,8 @@ | |||
33 | #define SHA_ISR_URAT_MR (0x2 << 12) | 36 | #define SHA_ISR_URAT_MR (0x2 << 12) |
34 | #define SHA_ISR_URAT_WO (0x5 << 12) | 37 | #define SHA_ISR_URAT_WO (0x5 << 12) |
35 | 38 | ||
39 | #define SHA_HW_VERSION 0xFC | ||
40 | |||
36 | #define SHA_TPR 0x108 | 41 | #define SHA_TPR 0x108 |
37 | #define SHA_TCR 0x10C | 42 | #define SHA_TCR 0x10C |
38 | #define SHA_TNPR 0x118 | 43 | #define SHA_TNPR 0x118 |
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 4918e9424d31..eaed8bf183bc 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <crypto/sha.h> | 38 | #include <crypto/sha.h> |
39 | #include <crypto/hash.h> | 39 | #include <crypto/hash.h> |
40 | #include <crypto/internal/hash.h> | 40 | #include <crypto/internal/hash.h> |
41 | #include <linux/platform_data/crypto-atmel.h> | ||
41 | #include "atmel-sha-regs.h" | 42 | #include "atmel-sha-regs.h" |
42 | 43 | ||
43 | /* SHA flags */ | 44 | /* SHA flags */ |
@@ -52,11 +53,12 @@ | |||
52 | #define SHA_FLAGS_FINUP BIT(16) | 53 | #define SHA_FLAGS_FINUP BIT(16) |
53 | #define SHA_FLAGS_SG BIT(17) | 54 | #define SHA_FLAGS_SG BIT(17) |
54 | #define SHA_FLAGS_SHA1 BIT(18) | 55 | #define SHA_FLAGS_SHA1 BIT(18) |
55 | #define SHA_FLAGS_SHA256 BIT(19) | 56 | #define SHA_FLAGS_SHA224 BIT(19) |
56 | #define SHA_FLAGS_ERROR BIT(20) | 57 | #define SHA_FLAGS_SHA256 BIT(20) |
57 | #define SHA_FLAGS_PAD BIT(21) | 58 | #define SHA_FLAGS_SHA384 BIT(21) |
58 | 59 | #define SHA_FLAGS_SHA512 BIT(22) | |
59 | #define SHA_FLAGS_DUALBUFF BIT(24) | 60 | #define SHA_FLAGS_ERROR BIT(23) |
61 | #define SHA_FLAGS_PAD BIT(24) | ||
60 | 62 | ||
61 | #define SHA_OP_UPDATE 1 | 63 | #define SHA_OP_UPDATE 1 |
62 | #define SHA_OP_FINAL 2 | 64 | #define SHA_OP_FINAL 2 |
@@ -65,6 +67,12 @@ | |||
65 | 67 | ||
66 | #define ATMEL_SHA_DMA_THRESHOLD 56 | 68 | #define ATMEL_SHA_DMA_THRESHOLD 56 |
67 | 69 | ||
70 | struct atmel_sha_caps { | ||
71 | bool has_dma; | ||
72 | bool has_dualbuff; | ||
73 | bool has_sha224; | ||
74 | bool has_sha_384_512; | ||
75 | }; | ||
68 | 76 | ||
69 | struct atmel_sha_dev; | 77 | struct atmel_sha_dev; |
70 | 78 | ||
@@ -73,8 +81,8 @@ struct atmel_sha_reqctx { | |||
73 | unsigned long flags; | 81 | unsigned long flags; |
74 | unsigned long op; | 82 | unsigned long op; |
75 | 83 | ||
76 | u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32)); | 84 | u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32)); |
77 | size_t digcnt; | 85 | u64 digcnt[2]; |
78 | size_t bufcnt; | 86 | size_t bufcnt; |
79 | size_t buflen; | 87 | size_t buflen; |
80 | dma_addr_t dma_addr; | 88 | dma_addr_t dma_addr; |
@@ -84,6 +92,8 @@ struct atmel_sha_reqctx { | |||
84 | unsigned int offset; /* offset in current sg */ | 92 | unsigned int offset; /* offset in current sg */ |
85 | unsigned int total; /* total request */ | 93 | unsigned int total; /* total request */ |
86 | 94 | ||
95 | size_t block_size; | ||
96 | |||
87 | u8 buffer[0] __aligned(sizeof(u32)); | 97 | u8 buffer[0] __aligned(sizeof(u32)); |
88 | }; | 98 | }; |
89 | 99 | ||
@@ -97,7 +107,12 @@ struct atmel_sha_ctx { | |||
97 | 107 | ||
98 | }; | 108 | }; |
99 | 109 | ||
100 | #define ATMEL_SHA_QUEUE_LENGTH 1 | 110 | #define ATMEL_SHA_QUEUE_LENGTH 50 |
111 | |||
112 | struct atmel_sha_dma { | ||
113 | struct dma_chan *chan; | ||
114 | struct dma_slave_config dma_conf; | ||
115 | }; | ||
101 | 116 | ||
102 | struct atmel_sha_dev { | 117 | struct atmel_sha_dev { |
103 | struct list_head list; | 118 | struct list_head list; |
@@ -114,6 +129,12 @@ struct atmel_sha_dev { | |||
114 | unsigned long flags; | 129 | unsigned long flags; |
115 | struct crypto_queue queue; | 130 | struct crypto_queue queue; |
116 | struct ahash_request *req; | 131 | struct ahash_request *req; |
132 | |||
133 | struct atmel_sha_dma dma_lch_in; | ||
134 | |||
135 | struct atmel_sha_caps caps; | ||
136 | |||
137 | u32 hw_version; | ||
117 | }; | 138 | }; |
118 | 139 | ||
119 | struct atmel_sha_drv { | 140 | struct atmel_sha_drv { |
@@ -137,14 +158,6 @@ static inline void atmel_sha_write(struct atmel_sha_dev *dd, | |||
137 | writel_relaxed(value, dd->io_base + offset); | 158 | writel_relaxed(value, dd->io_base + offset); |
138 | } | 159 | } |
139 | 160 | ||
140 | static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd) | ||
141 | { | ||
142 | atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF); | ||
143 | |||
144 | if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF) | ||
145 | dd->flags |= SHA_FLAGS_DUALBUFF; | ||
146 | } | ||
147 | |||
148 | static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) | 161 | static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) |
149 | { | 162 | { |
150 | size_t count; | 163 | size_t count; |
@@ -176,31 +189,58 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx) | |||
176 | } | 189 | } |
177 | 190 | ||
178 | /* | 191 | /* |
179 | * The purpose of this padding is to ensure that the padded message | 192 | * The purpose of this padding is to ensure that the padded message is a |
180 | * is a multiple of 512 bits. The bit "1" is appended at the end of | 193 | * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). |
181 | * the message followed by "padlen-1" zero bits. Then a 64 bits block | 194 | * The bit "1" is appended at the end of the message followed by |
182 | * equals to the message length in bits is appended. | 195 | * "padlen-1" zero bits. Then a 64 bits block (SHA1/SHA224/SHA256) or |
196 | * 128 bits block (SHA384/SHA512) equals to the message length in bits | ||
197 | * is appended. | ||
183 | * | 198 | * |
184 | * padlen is calculated as followed: | 199 | * For SHA1/SHA224/SHA256, padlen is calculated as followed: |
185 | * - if message length < 56 bytes then padlen = 56 - message length | 200 | * - if message length < 56 bytes then padlen = 56 - message length |
186 | * - else padlen = 64 + 56 - message length | 201 | * - else padlen = 64 + 56 - message length |
202 | * | ||
203 | * For SHA384/SHA512, padlen is calculated as followed: | ||
204 | * - if message length < 112 bytes then padlen = 112 - message length | ||
205 | * - else padlen = 128 + 112 - message length | ||
187 | */ | 206 | */ |
188 | static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) | 207 | static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length) |
189 | { | 208 | { |
190 | unsigned int index, padlen; | 209 | unsigned int index, padlen; |
191 | u64 bits; | 210 | u64 bits[2]; |
192 | u64 size; | 211 | u64 size[2]; |
193 | 212 | ||
194 | bits = (ctx->bufcnt + ctx->digcnt + length) << 3; | 213 | size[0] = ctx->digcnt[0]; |
195 | size = cpu_to_be64(bits); | 214 | size[1] = ctx->digcnt[1]; |
196 | 215 | ||
197 | index = ctx->bufcnt & 0x3f; | 216 | size[0] += ctx->bufcnt; |
198 | padlen = (index < 56) ? (56 - index) : ((64+56) - index); | 217 | if (size[0] < ctx->bufcnt) |
199 | *(ctx->buffer + ctx->bufcnt) = 0x80; | 218 | size[1]++; |
200 | memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1); | 219 | |
201 | memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8); | 220 | size[0] += length; |
202 | ctx->bufcnt += padlen + 8; | 221 | if (size[0] < length) |
203 | ctx->flags |= SHA_FLAGS_PAD; | 222 | size[1]++; |
223 | |||
224 | bits[1] = cpu_to_be64(size[0] << 3); | ||
225 | bits[0] = cpu_to_be64(size[1] << 3 | size[0] >> 61); | ||
226 | |||
227 | if (ctx->flags & (SHA_FLAGS_SHA384 | SHA_FLAGS_SHA512)) { | ||
228 | index = ctx->bufcnt & 0x7f; | ||
229 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
230 | *(ctx->buffer + ctx->bufcnt) = 0x80; | ||
231 | memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1); | ||
232 | memcpy(ctx->buffer + ctx->bufcnt + padlen, bits, 16); | ||
233 | ctx->bufcnt += padlen + 16; | ||
234 | ctx->flags |= SHA_FLAGS_PAD; | ||
235 | } else { | ||
236 | index = ctx->bufcnt & 0x3f; | ||
237 | padlen = (index < 56) ? (56 - index) : ((64+56) - index); | ||
238 | *(ctx->buffer + ctx->bufcnt) = 0x80; | ||
239 | memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1); | ||
240 | memcpy(ctx->buffer + ctx->bufcnt + padlen, &bits[1], 8); | ||
241 | ctx->bufcnt += padlen + 8; | ||
242 | ctx->flags |= SHA_FLAGS_PAD; | ||
243 | } | ||
204 | } | 244 | } |
205 | 245 | ||
206 | static int atmel_sha_init(struct ahash_request *req) | 246 | static int atmel_sha_init(struct ahash_request *req) |
@@ -231,13 +271,35 @@ static int atmel_sha_init(struct ahash_request *req) | |||
231 | dev_dbg(dd->dev, "init: digest size: %d\n", | 271 | dev_dbg(dd->dev, "init: digest size: %d\n", |
232 | crypto_ahash_digestsize(tfm)); | 272 | crypto_ahash_digestsize(tfm)); |
233 | 273 | ||
234 | if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE) | 274 | switch (crypto_ahash_digestsize(tfm)) { |
275 | case SHA1_DIGEST_SIZE: | ||
235 | ctx->flags |= SHA_FLAGS_SHA1; | 276 | ctx->flags |= SHA_FLAGS_SHA1; |
236 | else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE) | 277 | ctx->block_size = SHA1_BLOCK_SIZE; |
278 | break; | ||
279 | case SHA224_DIGEST_SIZE: | ||
280 | ctx->flags |= SHA_FLAGS_SHA224; | ||
281 | ctx->block_size = SHA224_BLOCK_SIZE; | ||
282 | break; | ||
283 | case SHA256_DIGEST_SIZE: | ||
237 | ctx->flags |= SHA_FLAGS_SHA256; | 284 | ctx->flags |= SHA_FLAGS_SHA256; |
285 | ctx->block_size = SHA256_BLOCK_SIZE; | ||
286 | break; | ||
287 | case SHA384_DIGEST_SIZE: | ||
288 | ctx->flags |= SHA_FLAGS_SHA384; | ||
289 | ctx->block_size = SHA384_BLOCK_SIZE; | ||
290 | break; | ||
291 | case SHA512_DIGEST_SIZE: | ||
292 | ctx->flags |= SHA_FLAGS_SHA512; | ||
293 | ctx->block_size = SHA512_BLOCK_SIZE; | ||
294 | break; | ||
295 | default: | ||
296 | return -EINVAL; | ||
297 | break; | ||
298 | } | ||
238 | 299 | ||
239 | ctx->bufcnt = 0; | 300 | ctx->bufcnt = 0; |
240 | ctx->digcnt = 0; | 301 | ctx->digcnt[0] = 0; |
302 | ctx->digcnt[1] = 0; | ||
241 | ctx->buflen = SHA_BUFFER_LEN; | 303 | ctx->buflen = SHA_BUFFER_LEN; |
242 | 304 | ||
243 | return 0; | 305 | return 0; |
@@ -249,19 +311,28 @@ static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma) | |||
249 | u32 valcr = 0, valmr = SHA_MR_MODE_AUTO; | 311 | u32 valcr = 0, valmr = SHA_MR_MODE_AUTO; |
250 | 312 | ||
251 | if (likely(dma)) { | 313 | if (likely(dma)) { |
252 | atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE); | 314 | if (!dd->caps.has_dma) |
315 | atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE); | ||
253 | valmr = SHA_MR_MODE_PDC; | 316 | valmr = SHA_MR_MODE_PDC; |
254 | if (dd->flags & SHA_FLAGS_DUALBUFF) | 317 | if (dd->caps.has_dualbuff) |
255 | valmr = SHA_MR_DUALBUFF; | 318 | valmr |= SHA_MR_DUALBUFF; |
256 | } else { | 319 | } else { |
257 | atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); | 320 | atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); |
258 | } | 321 | } |
259 | 322 | ||
260 | if (ctx->flags & SHA_FLAGS_SHA256) | 323 | if (ctx->flags & SHA_FLAGS_SHA1) |
324 | valmr |= SHA_MR_ALGO_SHA1; | ||
325 | else if (ctx->flags & SHA_FLAGS_SHA224) | ||
326 | valmr |= SHA_MR_ALGO_SHA224; | ||
327 | else if (ctx->flags & SHA_FLAGS_SHA256) | ||
261 | valmr |= SHA_MR_ALGO_SHA256; | 328 | valmr |= SHA_MR_ALGO_SHA256; |
329 | else if (ctx->flags & SHA_FLAGS_SHA384) | ||
330 | valmr |= SHA_MR_ALGO_SHA384; | ||
331 | else if (ctx->flags & SHA_FLAGS_SHA512) | ||
332 | valmr |= SHA_MR_ALGO_SHA512; | ||
262 | 333 | ||
263 | /* Setting CR_FIRST only for the first iteration */ | 334 | /* Setting CR_FIRST only for the first iteration */ |
264 | if (!ctx->digcnt) | 335 | if (!(ctx->digcnt[0] || ctx->digcnt[1])) |
265 | valcr = SHA_CR_FIRST; | 336 | valcr = SHA_CR_FIRST; |
266 | 337 | ||
267 | atmel_sha_write(dd, SHA_CR, valcr); | 338 | atmel_sha_write(dd, SHA_CR, valcr); |
@@ -275,13 +346,15 @@ static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf, | |||
275 | int count, len32; | 346 | int count, len32; |
276 | const u32 *buffer = (const u32 *)buf; | 347 | const u32 *buffer = (const u32 *)buf; |
277 | 348 | ||
278 | dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", | 349 | dev_dbg(dd->dev, "xmit_cpu: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", |
279 | ctx->digcnt, length, final); | 350 | ctx->digcnt[1], ctx->digcnt[0], length, final); |
280 | 351 | ||
281 | atmel_sha_write_ctrl(dd, 0); | 352 | atmel_sha_write_ctrl(dd, 0); |
282 | 353 | ||
283 | /* should be non-zero before next lines to disable clocks later */ | 354 | /* should be non-zero before next lines to disable clocks later */ |
284 | ctx->digcnt += length; | 355 | ctx->digcnt[0] += length; |
356 | if (ctx->digcnt[0] < length) | ||
357 | ctx->digcnt[1]++; | ||
285 | 358 | ||
286 | if (final) | 359 | if (final) |
287 | dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ | 360 | dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ |
@@ -302,8 +375,8 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | |||
302 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); | 375 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); |
303 | int len32; | 376 | int len32; |
304 | 377 | ||
305 | dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n", | 378 | dev_dbg(dd->dev, "xmit_pdc: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", |
306 | ctx->digcnt, length1, final); | 379 | ctx->digcnt[1], ctx->digcnt[0], length1, final); |
307 | 380 | ||
308 | len32 = DIV_ROUND_UP(length1, sizeof(u32)); | 381 | len32 = DIV_ROUND_UP(length1, sizeof(u32)); |
309 | atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS); | 382 | atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS); |
@@ -317,7 +390,9 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | |||
317 | atmel_sha_write_ctrl(dd, 1); | 390 | atmel_sha_write_ctrl(dd, 1); |
318 | 391 | ||
319 | /* should be non-zero before next lines to disable clocks later */ | 392 | /* should be non-zero before next lines to disable clocks later */ |
320 | ctx->digcnt += length1; | 393 | ctx->digcnt[0] += length1; |
394 | if (ctx->digcnt[0] < length1) | ||
395 | ctx->digcnt[1]++; | ||
321 | 396 | ||
322 | if (final) | 397 | if (final) |
323 | dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ | 398 | dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ |
@@ -330,6 +405,86 @@ static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | |||
330 | return -EINPROGRESS; | 405 | return -EINPROGRESS; |
331 | } | 406 | } |
332 | 407 | ||
408 | static void atmel_sha_dma_callback(void *data) | ||
409 | { | ||
410 | struct atmel_sha_dev *dd = data; | ||
411 | |||
412 | /* dma_lch_in - completed - wait DATRDY */ | ||
413 | atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY); | ||
414 | } | ||
415 | |||
416 | static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | ||
417 | size_t length1, dma_addr_t dma_addr2, size_t length2, int final) | ||
418 | { | ||
419 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); | ||
420 | struct dma_async_tx_descriptor *in_desc; | ||
421 | struct scatterlist sg[2]; | ||
422 | |||
423 | dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n", | ||
424 | ctx->digcnt[1], ctx->digcnt[0], length1, final); | ||
425 | |||
426 | if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 | | ||
427 | SHA_FLAGS_SHA256)) { | ||
428 | dd->dma_lch_in.dma_conf.src_maxburst = 16; | ||
429 | dd->dma_lch_in.dma_conf.dst_maxburst = 16; | ||
430 | } else { | ||
431 | dd->dma_lch_in.dma_conf.src_maxburst = 32; | ||
432 | dd->dma_lch_in.dma_conf.dst_maxburst = 32; | ||
433 | } | ||
434 | |||
435 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); | ||
436 | |||
437 | if (length2) { | ||
438 | sg_init_table(sg, 2); | ||
439 | sg_dma_address(&sg[0]) = dma_addr1; | ||
440 | sg_dma_len(&sg[0]) = length1; | ||
441 | sg_dma_address(&sg[1]) = dma_addr2; | ||
442 | sg_dma_len(&sg[1]) = length2; | ||
443 | in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, sg, 2, | ||
444 | DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
445 | } else { | ||
446 | sg_init_table(sg, 1); | ||
447 | sg_dma_address(&sg[0]) = dma_addr1; | ||
448 | sg_dma_len(&sg[0]) = length1; | ||
449 | in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, sg, 1, | ||
450 | DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
451 | } | ||
452 | if (!in_desc) | ||
453 | return -EINVAL; | ||
454 | |||
455 | in_desc->callback = atmel_sha_dma_callback; | ||
456 | in_desc->callback_param = dd; | ||
457 | |||
458 | atmel_sha_write_ctrl(dd, 1); | ||
459 | |||
460 | /* should be non-zero before next lines to disable clocks later */ | ||
461 | ctx->digcnt[0] += length1; | ||
462 | if (ctx->digcnt[0] < length1) | ||
463 | ctx->digcnt[1]++; | ||
464 | |||
465 | if (final) | ||
466 | dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */ | ||
467 | |||
468 | dd->flags |= SHA_FLAGS_DMA_ACTIVE; | ||
469 | |||
470 | /* Start DMA transfer */ | ||
471 | dmaengine_submit(in_desc); | ||
472 | dma_async_issue_pending(dd->dma_lch_in.chan); | ||
473 | |||
474 | return -EINPROGRESS; | ||
475 | } | ||
476 | |||
477 | static int atmel_sha_xmit_start(struct atmel_sha_dev *dd, dma_addr_t dma_addr1, | ||
478 | size_t length1, dma_addr_t dma_addr2, size_t length2, int final) | ||
479 | { | ||
480 | if (dd->caps.has_dma) | ||
481 | return atmel_sha_xmit_dma(dd, dma_addr1, length1, | ||
482 | dma_addr2, length2, final); | ||
483 | else | ||
484 | return atmel_sha_xmit_pdc(dd, dma_addr1, length1, | ||
485 | dma_addr2, length2, final); | ||
486 | } | ||
487 | |||
333 | static int atmel_sha_update_cpu(struct atmel_sha_dev *dd) | 488 | static int atmel_sha_update_cpu(struct atmel_sha_dev *dd) |
334 | { | 489 | { |
335 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); | 490 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req); |
@@ -337,7 +492,6 @@ static int atmel_sha_update_cpu(struct atmel_sha_dev *dd) | |||
337 | 492 | ||
338 | atmel_sha_append_sg(ctx); | 493 | atmel_sha_append_sg(ctx); |
339 | atmel_sha_fill_padding(ctx, 0); | 494 | atmel_sha_fill_padding(ctx, 0); |
340 | |||
341 | bufcnt = ctx->bufcnt; | 495 | bufcnt = ctx->bufcnt; |
342 | ctx->bufcnt = 0; | 496 | ctx->bufcnt = 0; |
343 | 497 | ||
@@ -349,17 +503,17 @@ static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd, | |||
349 | size_t length, int final) | 503 | size_t length, int final) |
350 | { | 504 | { |
351 | ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, | 505 | ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, |
352 | ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); | 506 | ctx->buflen + ctx->block_size, DMA_TO_DEVICE); |
353 | if (dma_mapping_error(dd->dev, ctx->dma_addr)) { | 507 | if (dma_mapping_error(dd->dev, ctx->dma_addr)) { |
354 | dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + | 508 | dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen + |
355 | SHA1_BLOCK_SIZE); | 509 | ctx->block_size); |
356 | return -EINVAL; | 510 | return -EINVAL; |
357 | } | 511 | } |
358 | 512 | ||
359 | ctx->flags &= ~SHA_FLAGS_SG; | 513 | ctx->flags &= ~SHA_FLAGS_SG; |
360 | 514 | ||
361 | /* next call does not fail... so no unmap in the case of error */ | 515 | /* next call does not fail... so no unmap in the case of error */ |
362 | return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final); | 516 | return atmel_sha_xmit_start(dd, ctx->dma_addr, length, 0, 0, final); |
363 | } | 517 | } |
364 | 518 | ||
365 | static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) | 519 | static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) |
@@ -372,8 +526,8 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd) | |||
372 | 526 | ||
373 | final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; | 527 | final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total; |
374 | 528 | ||
375 | dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n", | 529 | dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: 0x%llx 0x%llx, final: %d\n", |
376 | ctx->bufcnt, ctx->digcnt, final); | 530 | ctx->bufcnt, ctx->digcnt[1], ctx->digcnt[0], final); |
377 | 531 | ||
378 | if (final) | 532 | if (final) |
379 | atmel_sha_fill_padding(ctx, 0); | 533 | atmel_sha_fill_padding(ctx, 0); |
@@ -400,30 +554,25 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
400 | if (ctx->bufcnt || ctx->offset) | 554 | if (ctx->bufcnt || ctx->offset) |
401 | return atmel_sha_update_dma_slow(dd); | 555 | return atmel_sha_update_dma_slow(dd); |
402 | 556 | ||
403 | dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", | 557 | dev_dbg(dd->dev, "fast: digcnt: 0x%llx 0x%llx, bufcnt: %u, total: %u\n", |
404 | ctx->digcnt, ctx->bufcnt, ctx->total); | 558 | ctx->digcnt[1], ctx->digcnt[0], ctx->bufcnt, ctx->total); |
405 | 559 | ||
406 | sg = ctx->sg; | 560 | sg = ctx->sg; |
407 | 561 | ||
408 | if (!IS_ALIGNED(sg->offset, sizeof(u32))) | 562 | if (!IS_ALIGNED(sg->offset, sizeof(u32))) |
409 | return atmel_sha_update_dma_slow(dd); | 563 | return atmel_sha_update_dma_slow(dd); |
410 | 564 | ||
411 | if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE)) | 565 | if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, ctx->block_size)) |
412 | /* size is not SHA1_BLOCK_SIZE aligned */ | 566 | /* size is not ctx->block_size aligned */ |
413 | return atmel_sha_update_dma_slow(dd); | 567 | return atmel_sha_update_dma_slow(dd); |
414 | 568 | ||
415 | length = min(ctx->total, sg->length); | 569 | length = min(ctx->total, sg->length); |
416 | 570 | ||
417 | if (sg_is_last(sg)) { | 571 | if (sg_is_last(sg)) { |
418 | if (!(ctx->flags & SHA_FLAGS_FINUP)) { | 572 | if (!(ctx->flags & SHA_FLAGS_FINUP)) { |
419 | /* not last sg must be SHA1_BLOCK_SIZE aligned */ | 573 | /* not last sg must be ctx->block_size aligned */ |
420 | tail = length & (SHA1_BLOCK_SIZE - 1); | 574 | tail = length & (ctx->block_size - 1); |
421 | length -= tail; | 575 | length -= tail; |
422 | if (length == 0) { | ||
423 | /* offset where to start slow */ | ||
424 | ctx->offset = length; | ||
425 | return atmel_sha_update_dma_slow(dd); | ||
426 | } | ||
427 | } | 576 | } |
428 | } | 577 | } |
429 | 578 | ||
@@ -434,7 +583,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
434 | 583 | ||
435 | /* Add padding */ | 584 | /* Add padding */ |
436 | if (final) { | 585 | if (final) { |
437 | tail = length & (SHA1_BLOCK_SIZE - 1); | 586 | tail = length & (ctx->block_size - 1); |
438 | length -= tail; | 587 | length -= tail; |
439 | ctx->total += tail; | 588 | ctx->total += tail; |
440 | ctx->offset = length; /* offset where to start slow */ | 589 | ctx->offset = length; /* offset where to start slow */ |
@@ -445,10 +594,10 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
445 | atmel_sha_fill_padding(ctx, length); | 594 | atmel_sha_fill_padding(ctx, length); |
446 | 595 | ||
447 | ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, | 596 | ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, |
448 | ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); | 597 | ctx->buflen + ctx->block_size, DMA_TO_DEVICE); |
449 | if (dma_mapping_error(dd->dev, ctx->dma_addr)) { | 598 | if (dma_mapping_error(dd->dev, ctx->dma_addr)) { |
450 | dev_err(dd->dev, "dma %u bytes error\n", | 599 | dev_err(dd->dev, "dma %u bytes error\n", |
451 | ctx->buflen + SHA1_BLOCK_SIZE); | 600 | ctx->buflen + ctx->block_size); |
452 | return -EINVAL; | 601 | return -EINVAL; |
453 | } | 602 | } |
454 | 603 | ||
@@ -456,7 +605,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
456 | ctx->flags &= ~SHA_FLAGS_SG; | 605 | ctx->flags &= ~SHA_FLAGS_SG; |
457 | count = ctx->bufcnt; | 606 | count = ctx->bufcnt; |
458 | ctx->bufcnt = 0; | 607 | ctx->bufcnt = 0; |
459 | return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0, | 608 | return atmel_sha_xmit_start(dd, ctx->dma_addr, count, 0, |
460 | 0, final); | 609 | 0, final); |
461 | } else { | 610 | } else { |
462 | ctx->sg = sg; | 611 | ctx->sg = sg; |
@@ -470,7 +619,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
470 | 619 | ||
471 | count = ctx->bufcnt; | 620 | count = ctx->bufcnt; |
472 | ctx->bufcnt = 0; | 621 | ctx->bufcnt = 0; |
473 | return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), | 622 | return atmel_sha_xmit_start(dd, sg_dma_address(ctx->sg), |
474 | length, ctx->dma_addr, count, final); | 623 | length, ctx->dma_addr, count, final); |
475 | } | 624 | } |
476 | } | 625 | } |
@@ -483,7 +632,7 @@ static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd) | |||
483 | ctx->flags |= SHA_FLAGS_SG; | 632 | ctx->flags |= SHA_FLAGS_SG; |
484 | 633 | ||
485 | /* next call does not fail... so no unmap in the case of error */ | 634 | /* next call does not fail... so no unmap in the case of error */ |
486 | return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0, | 635 | return atmel_sha_xmit_start(dd, sg_dma_address(ctx->sg), length, 0, |
487 | 0, final); | 636 | 0, final); |
488 | } | 637 | } |
489 | 638 | ||
@@ -498,12 +647,13 @@ static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd) | |||
498 | if (ctx->sg) | 647 | if (ctx->sg) |
499 | ctx->offset = 0; | 648 | ctx->offset = 0; |
500 | } | 649 | } |
501 | if (ctx->flags & SHA_FLAGS_PAD) | 650 | if (ctx->flags & SHA_FLAGS_PAD) { |
502 | dma_unmap_single(dd->dev, ctx->dma_addr, | 651 | dma_unmap_single(dd->dev, ctx->dma_addr, |
503 | ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE); | 652 | ctx->buflen + ctx->block_size, DMA_TO_DEVICE); |
653 | } | ||
504 | } else { | 654 | } else { |
505 | dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen + | 655 | dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen + |
506 | SHA1_BLOCK_SIZE, DMA_TO_DEVICE); | 656 | ctx->block_size, DMA_TO_DEVICE); |
507 | } | 657 | } |
508 | 658 | ||
509 | return 0; | 659 | return 0; |
@@ -515,8 +665,8 @@ static int atmel_sha_update_req(struct atmel_sha_dev *dd) | |||
515 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); | 665 | struct atmel_sha_reqctx *ctx = ahash_request_ctx(req); |
516 | int err; | 666 | int err; |
517 | 667 | ||
518 | dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n", | 668 | dev_dbg(dd->dev, "update_req: total: %u, digcnt: 0x%llx 0x%llx\n", |
519 | ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0); | 669 | ctx->total, ctx->digcnt[1], ctx->digcnt[0]); |
520 | 670 | ||
521 | if (ctx->flags & SHA_FLAGS_CPU) | 671 | if (ctx->flags & SHA_FLAGS_CPU) |
522 | err = atmel_sha_update_cpu(dd); | 672 | err = atmel_sha_update_cpu(dd); |
@@ -524,8 +674,8 @@ static int atmel_sha_update_req(struct atmel_sha_dev *dd) | |||
524 | err = atmel_sha_update_dma_start(dd); | 674 | err = atmel_sha_update_dma_start(dd); |
525 | 675 | ||
526 | /* wait for dma completion before can take more data */ | 676 | /* wait for dma completion before can take more data */ |
527 | dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", | 677 | dev_dbg(dd->dev, "update: err: %d, digcnt: 0x%llx 0%llx\n", |
528 | err, ctx->digcnt); | 678 | err, ctx->digcnt[1], ctx->digcnt[0]); |
529 | 679 | ||
530 | return err; | 680 | return err; |
531 | } | 681 | } |
@@ -562,12 +712,21 @@ static void atmel_sha_copy_hash(struct ahash_request *req) | |||
562 | u32 *hash = (u32 *)ctx->digest; | 712 | u32 *hash = (u32 *)ctx->digest; |
563 | int i; | 713 | int i; |
564 | 714 | ||
565 | if (likely(ctx->flags & SHA_FLAGS_SHA1)) | 715 | if (ctx->flags & SHA_FLAGS_SHA1) |
566 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) | 716 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) |
567 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); | 717 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); |
568 | else | 718 | else if (ctx->flags & SHA_FLAGS_SHA224) |
719 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(u32); i++) | ||
720 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); | ||
721 | else if (ctx->flags & SHA_FLAGS_SHA256) | ||
569 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++) | 722 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++) |
570 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); | 723 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); |
724 | else if (ctx->flags & SHA_FLAGS_SHA384) | ||
725 | for (i = 0; i < SHA384_DIGEST_SIZE / sizeof(u32); i++) | ||
726 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); | ||
727 | else | ||
728 | for (i = 0; i < SHA512_DIGEST_SIZE / sizeof(u32); i++) | ||
729 | hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i)); | ||
571 | } | 730 | } |
572 | 731 | ||
573 | static void atmel_sha_copy_ready_hash(struct ahash_request *req) | 732 | static void atmel_sha_copy_ready_hash(struct ahash_request *req) |
@@ -577,10 +736,16 @@ static void atmel_sha_copy_ready_hash(struct ahash_request *req) | |||
577 | if (!req->result) | 736 | if (!req->result) |
578 | return; | 737 | return; |
579 | 738 | ||
580 | if (likely(ctx->flags & SHA_FLAGS_SHA1)) | 739 | if (ctx->flags & SHA_FLAGS_SHA1) |
581 | memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); | 740 | memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE); |
582 | else | 741 | else if (ctx->flags & SHA_FLAGS_SHA224) |
742 | memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE); | ||
743 | else if (ctx->flags & SHA_FLAGS_SHA256) | ||
583 | memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); | 744 | memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE); |
745 | else if (ctx->flags & SHA_FLAGS_SHA384) | ||
746 | memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE); | ||
747 | else | ||
748 | memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE); | ||
584 | } | 749 | } |
585 | 750 | ||
586 | static int atmel_sha_finish(struct ahash_request *req) | 751 | static int atmel_sha_finish(struct ahash_request *req) |
@@ -589,11 +754,11 @@ static int atmel_sha_finish(struct ahash_request *req) | |||
589 | struct atmel_sha_dev *dd = ctx->dd; | 754 | struct atmel_sha_dev *dd = ctx->dd; |
590 | int err = 0; | 755 | int err = 0; |
591 | 756 | ||
592 | if (ctx->digcnt) | 757 | if (ctx->digcnt[0] || ctx->digcnt[1]) |
593 | atmel_sha_copy_ready_hash(req); | 758 | atmel_sha_copy_ready_hash(req); |
594 | 759 | ||
595 | dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, | 760 | dev_dbg(dd->dev, "digcnt: 0x%llx 0x%llx, bufcnt: %d\n", ctx->digcnt[1], |
596 | ctx->bufcnt); | 761 | ctx->digcnt[0], ctx->bufcnt); |
597 | 762 | ||
598 | return err; | 763 | return err; |
599 | } | 764 | } |
@@ -628,9 +793,8 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd) | |||
628 | { | 793 | { |
629 | clk_prepare_enable(dd->iclk); | 794 | clk_prepare_enable(dd->iclk); |
630 | 795 | ||
631 | if (SHA_FLAGS_INIT & dd->flags) { | 796 | if (!(SHA_FLAGS_INIT & dd->flags)) { |
632 | atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); | 797 | atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST); |
633 | atmel_sha_dualbuff_test(dd); | ||
634 | dd->flags |= SHA_FLAGS_INIT; | 798 | dd->flags |= SHA_FLAGS_INIT; |
635 | dd->err = 0; | 799 | dd->err = 0; |
636 | } | 800 | } |
@@ -638,6 +802,23 @@ static int atmel_sha_hw_init(struct atmel_sha_dev *dd) | |||
638 | return 0; | 802 | return 0; |
639 | } | 803 | } |
640 | 804 | ||
805 | static inline unsigned int atmel_sha_get_version(struct atmel_sha_dev *dd) | ||
806 | { | ||
807 | return atmel_sha_read(dd, SHA_HW_VERSION) & 0x00000fff; | ||
808 | } | ||
809 | |||
810 | static void atmel_sha_hw_version_init(struct atmel_sha_dev *dd) | ||
811 | { | ||
812 | atmel_sha_hw_init(dd); | ||
813 | |||
814 | dd->hw_version = atmel_sha_get_version(dd); | ||
815 | |||
816 | dev_info(dd->dev, | ||
817 | "version: 0x%x\n", dd->hw_version); | ||
818 | |||
819 | clk_disable_unprepare(dd->iclk); | ||
820 | } | ||
821 | |||
641 | static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, | 822 | static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, |
642 | struct ahash_request *req) | 823 | struct ahash_request *req) |
643 | { | 824 | { |
@@ -682,10 +863,9 @@ static int atmel_sha_handle_queue(struct atmel_sha_dev *dd, | |||
682 | 863 | ||
683 | if (ctx->op == SHA_OP_UPDATE) { | 864 | if (ctx->op == SHA_OP_UPDATE) { |
684 | err = atmel_sha_update_req(dd); | 865 | err = atmel_sha_update_req(dd); |
685 | if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) { | 866 | if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) |
686 | /* no final() after finup() */ | 867 | /* no final() after finup() */ |
687 | err = atmel_sha_final_req(dd); | 868 | err = atmel_sha_final_req(dd); |
688 | } | ||
689 | } else if (ctx->op == SHA_OP_FINAL) { | 869 | } else if (ctx->op == SHA_OP_FINAL) { |
690 | err = atmel_sha_final_req(dd); | 870 | err = atmel_sha_final_req(dd); |
691 | } | 871 | } |
@@ -808,7 +988,7 @@ static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) | |||
808 | } | 988 | } |
809 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | 989 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), |
810 | sizeof(struct atmel_sha_reqctx) + | 990 | sizeof(struct atmel_sha_reqctx) + |
811 | SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); | 991 | SHA_BUFFER_LEN + SHA512_BLOCK_SIZE); |
812 | 992 | ||
813 | return 0; | 993 | return 0; |
814 | } | 994 | } |
@@ -826,7 +1006,7 @@ static void atmel_sha_cra_exit(struct crypto_tfm *tfm) | |||
826 | tctx->fallback = NULL; | 1006 | tctx->fallback = NULL; |
827 | } | 1007 | } |
828 | 1008 | ||
829 | static struct ahash_alg sha_algs[] = { | 1009 | static struct ahash_alg sha_1_256_algs[] = { |
830 | { | 1010 | { |
831 | .init = atmel_sha_init, | 1011 | .init = atmel_sha_init, |
832 | .update = atmel_sha_update, | 1012 | .update = atmel_sha_update, |
@@ -875,6 +1055,79 @@ static struct ahash_alg sha_algs[] = { | |||
875 | }, | 1055 | }, |
876 | }; | 1056 | }; |
877 | 1057 | ||
1058 | static struct ahash_alg sha_224_alg = { | ||
1059 | .init = atmel_sha_init, | ||
1060 | .update = atmel_sha_update, | ||
1061 | .final = atmel_sha_final, | ||
1062 | .finup = atmel_sha_finup, | ||
1063 | .digest = atmel_sha_digest, | ||
1064 | .halg = { | ||
1065 | .digestsize = SHA224_DIGEST_SIZE, | ||
1066 | .base = { | ||
1067 | .cra_name = "sha224", | ||
1068 | .cra_driver_name = "atmel-sha224", | ||
1069 | .cra_priority = 100, | ||
1070 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
1071 | CRYPTO_ALG_NEED_FALLBACK, | ||
1072 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
1073 | .cra_ctxsize = sizeof(struct atmel_sha_ctx), | ||
1074 | .cra_alignmask = 0, | ||
1075 | .cra_module = THIS_MODULE, | ||
1076 | .cra_init = atmel_sha_cra_init, | ||
1077 | .cra_exit = atmel_sha_cra_exit, | ||
1078 | } | ||
1079 | } | ||
1080 | }; | ||
1081 | |||
1082 | static struct ahash_alg sha_384_512_algs[] = { | ||
1083 | { | ||
1084 | .init = atmel_sha_init, | ||
1085 | .update = atmel_sha_update, | ||
1086 | .final = atmel_sha_final, | ||
1087 | .finup = atmel_sha_finup, | ||
1088 | .digest = atmel_sha_digest, | ||
1089 | .halg = { | ||
1090 | .digestsize = SHA384_DIGEST_SIZE, | ||
1091 | .base = { | ||
1092 | .cra_name = "sha384", | ||
1093 | .cra_driver_name = "atmel-sha384", | ||
1094 | .cra_priority = 100, | ||
1095 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
1096 | CRYPTO_ALG_NEED_FALLBACK, | ||
1097 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
1098 | .cra_ctxsize = sizeof(struct atmel_sha_ctx), | ||
1099 | .cra_alignmask = 0x3, | ||
1100 | .cra_module = THIS_MODULE, | ||
1101 | .cra_init = atmel_sha_cra_init, | ||
1102 | .cra_exit = atmel_sha_cra_exit, | ||
1103 | } | ||
1104 | } | ||
1105 | }, | ||
1106 | { | ||
1107 | .init = atmel_sha_init, | ||
1108 | .update = atmel_sha_update, | ||
1109 | .final = atmel_sha_final, | ||
1110 | .finup = atmel_sha_finup, | ||
1111 | .digest = atmel_sha_digest, | ||
1112 | .halg = { | ||
1113 | .digestsize = SHA512_DIGEST_SIZE, | ||
1114 | .base = { | ||
1115 | .cra_name = "sha512", | ||
1116 | .cra_driver_name = "atmel-sha512", | ||
1117 | .cra_priority = 100, | ||
1118 | .cra_flags = CRYPTO_ALG_ASYNC | | ||
1119 | CRYPTO_ALG_NEED_FALLBACK, | ||
1120 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
1121 | .cra_ctxsize = sizeof(struct atmel_sha_ctx), | ||
1122 | .cra_alignmask = 0x3, | ||
1123 | .cra_module = THIS_MODULE, | ||
1124 | .cra_init = atmel_sha_cra_init, | ||
1125 | .cra_exit = atmel_sha_cra_exit, | ||
1126 | } | ||
1127 | } | ||
1128 | }, | ||
1129 | }; | ||
1130 | |||
878 | static void atmel_sha_done_task(unsigned long data) | 1131 | static void atmel_sha_done_task(unsigned long data) |
879 | { | 1132 | { |
880 | struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; | 1133 | struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data; |
@@ -941,32 +1194,142 @@ static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd) | |||
941 | { | 1194 | { |
942 | int i; | 1195 | int i; |
943 | 1196 | ||
944 | for (i = 0; i < ARRAY_SIZE(sha_algs); i++) | 1197 | for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) |
945 | crypto_unregister_ahash(&sha_algs[i]); | 1198 | crypto_unregister_ahash(&sha_1_256_algs[i]); |
1199 | |||
1200 | if (dd->caps.has_sha224) | ||
1201 | crypto_unregister_ahash(&sha_224_alg); | ||
1202 | |||
1203 | if (dd->caps.has_sha_384_512) { | ||
1204 | for (i = 0; i < ARRAY_SIZE(sha_384_512_algs); i++) | ||
1205 | crypto_unregister_ahash(&sha_384_512_algs[i]); | ||
1206 | } | ||
946 | } | 1207 | } |
947 | 1208 | ||
948 | static int atmel_sha_register_algs(struct atmel_sha_dev *dd) | 1209 | static int atmel_sha_register_algs(struct atmel_sha_dev *dd) |
949 | { | 1210 | { |
950 | int err, i, j; | 1211 | int err, i, j; |
951 | 1212 | ||
952 | for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { | 1213 | for (i = 0; i < ARRAY_SIZE(sha_1_256_algs); i++) { |
953 | err = crypto_register_ahash(&sha_algs[i]); | 1214 | err = crypto_register_ahash(&sha_1_256_algs[i]); |
954 | if (err) | 1215 | if (err) |
955 | goto err_sha_algs; | 1216 | goto err_sha_1_256_algs; |
1217 | } | ||
1218 | |||
1219 | if (dd->caps.has_sha224) { | ||
1220 | err = crypto_register_ahash(&sha_224_alg); | ||
1221 | if (err) | ||
1222 | goto err_sha_224_algs; | ||
1223 | } | ||
1224 | |||
1225 | if (dd->caps.has_sha_384_512) { | ||
1226 | for (i = 0; i < ARRAY_SIZE(sha_384_512_algs); i++) { | ||
1227 | err = crypto_register_ahash(&sha_384_512_algs[i]); | ||
1228 | if (err) | ||
1229 | goto err_sha_384_512_algs; | ||
1230 | } | ||
956 | } | 1231 | } |
957 | 1232 | ||
958 | return 0; | 1233 | return 0; |
959 | 1234 | ||
960 | err_sha_algs: | 1235 | err_sha_384_512_algs: |
1236 | for (j = 0; j < i; j++) | ||
1237 | crypto_unregister_ahash(&sha_384_512_algs[j]); | ||
1238 | crypto_unregister_ahash(&sha_224_alg); | ||
1239 | err_sha_224_algs: | ||
1240 | i = ARRAY_SIZE(sha_1_256_algs); | ||
1241 | err_sha_1_256_algs: | ||
961 | for (j = 0; j < i; j++) | 1242 | for (j = 0; j < i; j++) |
962 | crypto_unregister_ahash(&sha_algs[j]); | 1243 | crypto_unregister_ahash(&sha_1_256_algs[j]); |
963 | 1244 | ||
964 | return err; | 1245 | return err; |
965 | } | 1246 | } |
966 | 1247 | ||
1248 | static bool atmel_sha_filter(struct dma_chan *chan, void *slave) | ||
1249 | { | ||
1250 | struct at_dma_slave *sl = slave; | ||
1251 | |||
1252 | if (sl && sl->dma_dev == chan->device->dev) { | ||
1253 | chan->private = sl; | ||
1254 | return true; | ||
1255 | } else { | ||
1256 | return false; | ||
1257 | } | ||
1258 | } | ||
1259 | |||
1260 | static int atmel_sha_dma_init(struct atmel_sha_dev *dd, | ||
1261 | struct crypto_platform_data *pdata) | ||
1262 | { | ||
1263 | int err = -ENOMEM; | ||
1264 | dma_cap_mask_t mask_in; | ||
1265 | |||
1266 | if (pdata && pdata->dma_slave->rxdata.dma_dev) { | ||
1267 | /* Try to grab DMA channel */ | ||
1268 | dma_cap_zero(mask_in); | ||
1269 | dma_cap_set(DMA_SLAVE, mask_in); | ||
1270 | |||
1271 | dd->dma_lch_in.chan = dma_request_channel(mask_in, | ||
1272 | atmel_sha_filter, &pdata->dma_slave->rxdata); | ||
1273 | |||
1274 | if (!dd->dma_lch_in.chan) | ||
1275 | return err; | ||
1276 | |||
1277 | dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; | ||
1278 | dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + | ||
1279 | SHA_REG_DIN(0); | ||
1280 | dd->dma_lch_in.dma_conf.src_maxburst = 1; | ||
1281 | dd->dma_lch_in.dma_conf.src_addr_width = | ||
1282 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
1283 | dd->dma_lch_in.dma_conf.dst_maxburst = 1; | ||
1284 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
1285 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
1286 | dd->dma_lch_in.dma_conf.device_fc = false; | ||
1287 | |||
1288 | return 0; | ||
1289 | } | ||
1290 | |||
1291 | return -ENODEV; | ||
1292 | } | ||
1293 | |||
1294 | static void atmel_sha_dma_cleanup(struct atmel_sha_dev *dd) | ||
1295 | { | ||
1296 | dma_release_channel(dd->dma_lch_in.chan); | ||
1297 | } | ||
1298 | |||
1299 | static void atmel_sha_get_cap(struct atmel_sha_dev *dd) | ||
1300 | { | ||
1301 | |||
1302 | dd->caps.has_dma = 0; | ||
1303 | dd->caps.has_dualbuff = 0; | ||
1304 | dd->caps.has_sha224 = 0; | ||
1305 | dd->caps.has_sha_384_512 = 0; | ||
1306 | |||
1307 | /* keep only major version number */ | ||
1308 | switch (dd->hw_version & 0xff0) { | ||
1309 | case 0x410: | ||
1310 | dd->caps.has_dma = 1; | ||
1311 | dd->caps.has_dualbuff = 1; | ||
1312 | dd->caps.has_sha224 = 1; | ||
1313 | dd->caps.has_sha_384_512 = 1; | ||
1314 | break; | ||
1315 | case 0x400: | ||
1316 | dd->caps.has_dma = 1; | ||
1317 | dd->caps.has_dualbuff = 1; | ||
1318 | dd->caps.has_sha224 = 1; | ||
1319 | break; | ||
1320 | case 0x320: | ||
1321 | break; | ||
1322 | default: | ||
1323 | dev_warn(dd->dev, | ||
1324 | "Unmanaged sha version, set minimum capabilities\n"); | ||
1325 | break; | ||
1326 | } | ||
1327 | } | ||
1328 | |||
967 | static int atmel_sha_probe(struct platform_device *pdev) | 1329 | static int atmel_sha_probe(struct platform_device *pdev) |
968 | { | 1330 | { |
969 | struct atmel_sha_dev *sha_dd; | 1331 | struct atmel_sha_dev *sha_dd; |
1332 | struct crypto_platform_data *pdata; | ||
970 | struct device *dev = &pdev->dev; | 1333 | struct device *dev = &pdev->dev; |
971 | struct resource *sha_res; | 1334 | struct resource *sha_res; |
972 | unsigned long sha_phys_size; | 1335 | unsigned long sha_phys_size; |
@@ -1018,7 +1381,7 @@ static int atmel_sha_probe(struct platform_device *pdev) | |||
1018 | } | 1381 | } |
1019 | 1382 | ||
1020 | /* Initializing the clock */ | 1383 | /* Initializing the clock */ |
1021 | sha_dd->iclk = clk_get(&pdev->dev, NULL); | 1384 | sha_dd->iclk = clk_get(&pdev->dev, "sha_clk"); |
1022 | if (IS_ERR(sha_dd->iclk)) { | 1385 | if (IS_ERR(sha_dd->iclk)) { |
1023 | dev_err(dev, "clock intialization failed.\n"); | 1386 | dev_err(dev, "clock intialization failed.\n"); |
1024 | err = PTR_ERR(sha_dd->iclk); | 1387 | err = PTR_ERR(sha_dd->iclk); |
@@ -1032,6 +1395,22 @@ static int atmel_sha_probe(struct platform_device *pdev) | |||
1032 | goto sha_io_err; | 1395 | goto sha_io_err; |
1033 | } | 1396 | } |
1034 | 1397 | ||
1398 | atmel_sha_hw_version_init(sha_dd); | ||
1399 | |||
1400 | atmel_sha_get_cap(sha_dd); | ||
1401 | |||
1402 | if (sha_dd->caps.has_dma) { | ||
1403 | pdata = pdev->dev.platform_data; | ||
1404 | if (!pdata) { | ||
1405 | dev_err(&pdev->dev, "platform data not available\n"); | ||
1406 | err = -ENXIO; | ||
1407 | goto err_pdata; | ||
1408 | } | ||
1409 | err = atmel_sha_dma_init(sha_dd, pdata); | ||
1410 | if (err) | ||
1411 | goto err_sha_dma; | ||
1412 | } | ||
1413 | |||
1035 | spin_lock(&atmel_sha.lock); | 1414 | spin_lock(&atmel_sha.lock); |
1036 | list_add_tail(&sha_dd->list, &atmel_sha.dev_list); | 1415 | list_add_tail(&sha_dd->list, &atmel_sha.dev_list); |
1037 | spin_unlock(&atmel_sha.lock); | 1416 | spin_unlock(&atmel_sha.lock); |
@@ -1048,6 +1427,10 @@ err_algs: | |||
1048 | spin_lock(&atmel_sha.lock); | 1427 | spin_lock(&atmel_sha.lock); |
1049 | list_del(&sha_dd->list); | 1428 | list_del(&sha_dd->list); |
1050 | spin_unlock(&atmel_sha.lock); | 1429 | spin_unlock(&atmel_sha.lock); |
1430 | if (sha_dd->caps.has_dma) | ||
1431 | atmel_sha_dma_cleanup(sha_dd); | ||
1432 | err_sha_dma: | ||
1433 | err_pdata: | ||
1051 | iounmap(sha_dd->io_base); | 1434 | iounmap(sha_dd->io_base); |
1052 | sha_io_err: | 1435 | sha_io_err: |
1053 | clk_put(sha_dd->iclk); | 1436 | clk_put(sha_dd->iclk); |
@@ -1078,6 +1461,9 @@ static int atmel_sha_remove(struct platform_device *pdev) | |||
1078 | 1461 | ||
1079 | tasklet_kill(&sha_dd->done_task); | 1462 | tasklet_kill(&sha_dd->done_task); |
1080 | 1463 | ||
1464 | if (sha_dd->caps.has_dma) | ||
1465 | atmel_sha_dma_cleanup(sha_dd); | ||
1466 | |||
1081 | iounmap(sha_dd->io_base); | 1467 | iounmap(sha_dd->io_base); |
1082 | 1468 | ||
1083 | clk_put(sha_dd->iclk); | 1469 | clk_put(sha_dd->iclk); |
@@ -1102,6 +1488,6 @@ static struct platform_driver atmel_sha_driver = { | |||
1102 | 1488 | ||
1103 | module_platform_driver(atmel_sha_driver); | 1489 | module_platform_driver(atmel_sha_driver); |
1104 | 1490 | ||
1105 | MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support."); | 1491 | MODULE_DESCRIPTION("Atmel SHA (1/256/224/384/512) hw acceleration support."); |
1106 | MODULE_LICENSE("GPL v2"); | 1492 | MODULE_LICENSE("GPL v2"); |
1107 | MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); | 1493 | MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique"); |
diff --git a/drivers/crypto/atmel-tdes-regs.h b/drivers/crypto/atmel-tdes-regs.h index 5ac2a900d80c..f86734d0fda4 100644 --- a/drivers/crypto/atmel-tdes-regs.h +++ b/drivers/crypto/atmel-tdes-regs.h | |||
@@ -69,6 +69,8 @@ | |||
69 | #define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0) | 69 | #define TDES_XTEARNDR_XTEA_RNDS_MASK (0x3F << 0) |
70 | #define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0 | 70 | #define TDES_XTEARNDR_XTEA_RNDS_OFFSET 0 |
71 | 71 | ||
72 | #define TDES_HW_VERSION 0xFC | ||
73 | |||
72 | #define TDES_RPR 0x100 | 74 | #define TDES_RPR 0x100 |
73 | #define TDES_RCR 0x104 | 75 | #define TDES_RCR 0x104 |
74 | #define TDES_TPR 0x108 | 76 | #define TDES_TPR 0x108 |
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 7c73fbb17538..4a99564a08e6 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c | |||
@@ -38,29 +38,35 @@ | |||
38 | #include <crypto/des.h> | 38 | #include <crypto/des.h> |
39 | #include <crypto/hash.h> | 39 | #include <crypto/hash.h> |
40 | #include <crypto/internal/hash.h> | 40 | #include <crypto/internal/hash.h> |
41 | #include <linux/platform_data/crypto-atmel.h> | ||
41 | #include "atmel-tdes-regs.h" | 42 | #include "atmel-tdes-regs.h" |
42 | 43 | ||
43 | /* TDES flags */ | 44 | /* TDES flags */ |
44 | #define TDES_FLAGS_MODE_MASK 0x007f | 45 | #define TDES_FLAGS_MODE_MASK 0x00ff |
45 | #define TDES_FLAGS_ENCRYPT BIT(0) | 46 | #define TDES_FLAGS_ENCRYPT BIT(0) |
46 | #define TDES_FLAGS_CBC BIT(1) | 47 | #define TDES_FLAGS_CBC BIT(1) |
47 | #define TDES_FLAGS_CFB BIT(2) | 48 | #define TDES_FLAGS_CFB BIT(2) |
48 | #define TDES_FLAGS_CFB8 BIT(3) | 49 | #define TDES_FLAGS_CFB8 BIT(3) |
49 | #define TDES_FLAGS_CFB16 BIT(4) | 50 | #define TDES_FLAGS_CFB16 BIT(4) |
50 | #define TDES_FLAGS_CFB32 BIT(5) | 51 | #define TDES_FLAGS_CFB32 BIT(5) |
51 | #define TDES_FLAGS_OFB BIT(6) | 52 | #define TDES_FLAGS_CFB64 BIT(6) |
53 | #define TDES_FLAGS_OFB BIT(7) | ||
52 | 54 | ||
53 | #define TDES_FLAGS_INIT BIT(16) | 55 | #define TDES_FLAGS_INIT BIT(16) |
54 | #define TDES_FLAGS_FAST BIT(17) | 56 | #define TDES_FLAGS_FAST BIT(17) |
55 | #define TDES_FLAGS_BUSY BIT(18) | 57 | #define TDES_FLAGS_BUSY BIT(18) |
58 | #define TDES_FLAGS_DMA BIT(19) | ||
56 | 59 | ||
57 | #define ATMEL_TDES_QUEUE_LENGTH 1 | 60 | #define ATMEL_TDES_QUEUE_LENGTH 50 |
58 | 61 | ||
59 | #define CFB8_BLOCK_SIZE 1 | 62 | #define CFB8_BLOCK_SIZE 1 |
60 | #define CFB16_BLOCK_SIZE 2 | 63 | #define CFB16_BLOCK_SIZE 2 |
61 | #define CFB32_BLOCK_SIZE 4 | 64 | #define CFB32_BLOCK_SIZE 4 |
62 | #define CFB64_BLOCK_SIZE 8 | ||
63 | 65 | ||
66 | struct atmel_tdes_caps { | ||
67 | bool has_dma; | ||
68 | u32 has_cfb_3keys; | ||
69 | }; | ||
64 | 70 | ||
65 | struct atmel_tdes_dev; | 71 | struct atmel_tdes_dev; |
66 | 72 | ||
@@ -70,12 +76,19 @@ struct atmel_tdes_ctx { | |||
70 | int keylen; | 76 | int keylen; |
71 | u32 key[3*DES_KEY_SIZE / sizeof(u32)]; | 77 | u32 key[3*DES_KEY_SIZE / sizeof(u32)]; |
72 | unsigned long flags; | 78 | unsigned long flags; |
79 | |||
80 | u16 block_size; | ||
73 | }; | 81 | }; |
74 | 82 | ||
75 | struct atmel_tdes_reqctx { | 83 | struct atmel_tdes_reqctx { |
76 | unsigned long mode; | 84 | unsigned long mode; |
77 | }; | 85 | }; |
78 | 86 | ||
87 | struct atmel_tdes_dma { | ||
88 | struct dma_chan *chan; | ||
89 | struct dma_slave_config dma_conf; | ||
90 | }; | ||
91 | |||
79 | struct atmel_tdes_dev { | 92 | struct atmel_tdes_dev { |
80 | struct list_head list; | 93 | struct list_head list; |
81 | unsigned long phys_base; | 94 | unsigned long phys_base; |
@@ -99,8 +112,10 @@ struct atmel_tdes_dev { | |||
99 | size_t total; | 112 | size_t total; |
100 | 113 | ||
101 | struct scatterlist *in_sg; | 114 | struct scatterlist *in_sg; |
115 | unsigned int nb_in_sg; | ||
102 | size_t in_offset; | 116 | size_t in_offset; |
103 | struct scatterlist *out_sg; | 117 | struct scatterlist *out_sg; |
118 | unsigned int nb_out_sg; | ||
104 | size_t out_offset; | 119 | size_t out_offset; |
105 | 120 | ||
106 | size_t buflen; | 121 | size_t buflen; |
@@ -109,10 +124,16 @@ struct atmel_tdes_dev { | |||
109 | void *buf_in; | 124 | void *buf_in; |
110 | int dma_in; | 125 | int dma_in; |
111 | dma_addr_t dma_addr_in; | 126 | dma_addr_t dma_addr_in; |
127 | struct atmel_tdes_dma dma_lch_in; | ||
112 | 128 | ||
113 | void *buf_out; | 129 | void *buf_out; |
114 | int dma_out; | 130 | int dma_out; |
115 | dma_addr_t dma_addr_out; | 131 | dma_addr_t dma_addr_out; |
132 | struct atmel_tdes_dma dma_lch_out; | ||
133 | |||
134 | struct atmel_tdes_caps caps; | ||
135 | |||
136 | u32 hw_version; | ||
116 | }; | 137 | }; |
117 | 138 | ||
118 | struct atmel_tdes_drv { | 139 | struct atmel_tdes_drv { |
@@ -207,6 +228,31 @@ static int atmel_tdes_hw_init(struct atmel_tdes_dev *dd) | |||
207 | return 0; | 228 | return 0; |
208 | } | 229 | } |
209 | 230 | ||
231 | static inline unsigned int atmel_tdes_get_version(struct atmel_tdes_dev *dd) | ||
232 | { | ||
233 | return atmel_tdes_read(dd, TDES_HW_VERSION) & 0x00000fff; | ||
234 | } | ||
235 | |||
236 | static void atmel_tdes_hw_version_init(struct atmel_tdes_dev *dd) | ||
237 | { | ||
238 | atmel_tdes_hw_init(dd); | ||
239 | |||
240 | dd->hw_version = atmel_tdes_get_version(dd); | ||
241 | |||
242 | dev_info(dd->dev, | ||
243 | "version: 0x%x\n", dd->hw_version); | ||
244 | |||
245 | clk_disable_unprepare(dd->iclk); | ||
246 | } | ||
247 | |||
248 | static void atmel_tdes_dma_callback(void *data) | ||
249 | { | ||
250 | struct atmel_tdes_dev *dd = data; | ||
251 | |||
252 | /* dma_lch_out - completed */ | ||
253 | tasklet_schedule(&dd->done_task); | ||
254 | } | ||
255 | |||
210 | static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) | 256 | static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) |
211 | { | 257 | { |
212 | int err; | 258 | int err; |
@@ -217,7 +263,9 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) | |||
217 | if (err) | 263 | if (err) |
218 | return err; | 264 | return err; |
219 | 265 | ||
220 | atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); | 266 | if (!dd->caps.has_dma) |
267 | atmel_tdes_write(dd, TDES_PTCR, | ||
268 | TDES_PTCR_TXTDIS | TDES_PTCR_RXTDIS); | ||
221 | 269 | ||
222 | /* MR register must be set before IV registers */ | 270 | /* MR register must be set before IV registers */ |
223 | if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) { | 271 | if (dd->ctx->keylen > (DES_KEY_SIZE << 1)) { |
@@ -241,6 +289,8 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) | |||
241 | valmr |= TDES_MR_CFBS_16b; | 289 | valmr |= TDES_MR_CFBS_16b; |
242 | else if (dd->flags & TDES_FLAGS_CFB32) | 290 | else if (dd->flags & TDES_FLAGS_CFB32) |
243 | valmr |= TDES_MR_CFBS_32b; | 291 | valmr |= TDES_MR_CFBS_32b; |
292 | else if (dd->flags & TDES_FLAGS_CFB64) | ||
293 | valmr |= TDES_MR_CFBS_64b; | ||
244 | } else if (dd->flags & TDES_FLAGS_OFB) { | 294 | } else if (dd->flags & TDES_FLAGS_OFB) { |
245 | valmr |= TDES_MR_OPMOD_OFB; | 295 | valmr |= TDES_MR_OPMOD_OFB; |
246 | } | 296 | } |
@@ -262,7 +312,7 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) | |||
262 | return 0; | 312 | return 0; |
263 | } | 313 | } |
264 | 314 | ||
265 | static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) | 315 | static int atmel_tdes_crypt_pdc_stop(struct atmel_tdes_dev *dd) |
266 | { | 316 | { |
267 | int err = 0; | 317 | int err = 0; |
268 | size_t count; | 318 | size_t count; |
@@ -288,7 +338,7 @@ static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) | |||
288 | return err; | 338 | return err; |
289 | } | 339 | } |
290 | 340 | ||
291 | static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd) | 341 | static int atmel_tdes_buff_init(struct atmel_tdes_dev *dd) |
292 | { | 342 | { |
293 | int err = -ENOMEM; | 343 | int err = -ENOMEM; |
294 | 344 | ||
@@ -333,7 +383,7 @@ err_alloc: | |||
333 | return err; | 383 | return err; |
334 | } | 384 | } |
335 | 385 | ||
336 | static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd) | 386 | static void atmel_tdes_buff_cleanup(struct atmel_tdes_dev *dd) |
337 | { | 387 | { |
338 | dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, | 388 | dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, |
339 | DMA_FROM_DEVICE); | 389 | DMA_FROM_DEVICE); |
@@ -343,7 +393,7 @@ static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd) | |||
343 | free_page((unsigned long)dd->buf_in); | 393 | free_page((unsigned long)dd->buf_in); |
344 | } | 394 | } |
345 | 395 | ||
346 | static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, | 396 | static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, |
347 | dma_addr_t dma_addr_out, int length) | 397 | dma_addr_t dma_addr_out, int length) |
348 | { | 398 | { |
349 | struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); | 399 | struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); |
@@ -379,7 +429,76 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, | |||
379 | return 0; | 429 | return 0; |
380 | } | 430 | } |
381 | 431 | ||
382 | static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) | 432 | static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, |
433 | dma_addr_t dma_addr_out, int length) | ||
434 | { | ||
435 | struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
436 | struct atmel_tdes_dev *dd = ctx->dd; | ||
437 | struct scatterlist sg[2]; | ||
438 | struct dma_async_tx_descriptor *in_desc, *out_desc; | ||
439 | |||
440 | dd->dma_size = length; | ||
441 | |||
442 | if (!(dd->flags & TDES_FLAGS_FAST)) { | ||
443 | dma_sync_single_for_device(dd->dev, dma_addr_in, length, | ||
444 | DMA_TO_DEVICE); | ||
445 | } | ||
446 | |||
447 | if (dd->flags & TDES_FLAGS_CFB8) { | ||
448 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
449 | DMA_SLAVE_BUSWIDTH_1_BYTE; | ||
450 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
451 | DMA_SLAVE_BUSWIDTH_1_BYTE; | ||
452 | } else if (dd->flags & TDES_FLAGS_CFB16) { | ||
453 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
454 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
455 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
456 | DMA_SLAVE_BUSWIDTH_2_BYTES; | ||
457 | } else { | ||
458 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
459 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
460 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
461 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
462 | } | ||
463 | |||
464 | dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); | ||
465 | dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf); | ||
466 | |||
467 | dd->flags |= TDES_FLAGS_DMA; | ||
468 | |||
469 | sg_init_table(&sg[0], 1); | ||
470 | sg_dma_address(&sg[0]) = dma_addr_in; | ||
471 | sg_dma_len(&sg[0]) = length; | ||
472 | |||
473 | sg_init_table(&sg[1], 1); | ||
474 | sg_dma_address(&sg[1]) = dma_addr_out; | ||
475 | sg_dma_len(&sg[1]) = length; | ||
476 | |||
477 | in_desc = dmaengine_prep_slave_sg(dd->dma_lch_in.chan, &sg[0], | ||
478 | 1, DMA_MEM_TO_DEV, | ||
479 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
480 | if (!in_desc) | ||
481 | return -EINVAL; | ||
482 | |||
483 | out_desc = dmaengine_prep_slave_sg(dd->dma_lch_out.chan, &sg[1], | ||
484 | 1, DMA_DEV_TO_MEM, | ||
485 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | ||
486 | if (!out_desc) | ||
487 | return -EINVAL; | ||
488 | |||
489 | out_desc->callback = atmel_tdes_dma_callback; | ||
490 | out_desc->callback_param = dd; | ||
491 | |||
492 | dmaengine_submit(out_desc); | ||
493 | dma_async_issue_pending(dd->dma_lch_out.chan); | ||
494 | |||
495 | dmaengine_submit(in_desc); | ||
496 | dma_async_issue_pending(dd->dma_lch_in.chan); | ||
497 | |||
498 | return 0; | ||
499 | } | ||
500 | |||
501 | static int atmel_tdes_crypt_start(struct atmel_tdes_dev *dd) | ||
383 | { | 502 | { |
384 | struct crypto_tfm *tfm = crypto_ablkcipher_tfm( | 503 | struct crypto_tfm *tfm = crypto_ablkcipher_tfm( |
385 | crypto_ablkcipher_reqtfm(dd->req)); | 504 | crypto_ablkcipher_reqtfm(dd->req)); |
@@ -387,23 +506,23 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) | |||
387 | size_t count; | 506 | size_t count; |
388 | dma_addr_t addr_in, addr_out; | 507 | dma_addr_t addr_in, addr_out; |
389 | 508 | ||
390 | if (sg_is_last(dd->in_sg) && sg_is_last(dd->out_sg)) { | 509 | if ((!dd->in_offset) && (!dd->out_offset)) { |
391 | /* check for alignment */ | 510 | /* check for alignment */ |
392 | in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)); | 511 | in = IS_ALIGNED((u32)dd->in_sg->offset, sizeof(u32)) && |
393 | out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)); | 512 | IS_ALIGNED(dd->in_sg->length, dd->ctx->block_size); |
394 | 513 | out = IS_ALIGNED((u32)dd->out_sg->offset, sizeof(u32)) && | |
514 | IS_ALIGNED(dd->out_sg->length, dd->ctx->block_size); | ||
395 | fast = in && out; | 515 | fast = in && out; |
516 | |||
517 | if (sg_dma_len(dd->in_sg) != sg_dma_len(dd->out_sg)) | ||
518 | fast = 0; | ||
396 | } | 519 | } |
397 | 520 | ||
521 | |||
398 | if (fast) { | 522 | if (fast) { |
399 | count = min(dd->total, sg_dma_len(dd->in_sg)); | 523 | count = min(dd->total, sg_dma_len(dd->in_sg)); |
400 | count = min(count, sg_dma_len(dd->out_sg)); | 524 | count = min(count, sg_dma_len(dd->out_sg)); |
401 | 525 | ||
402 | if (count != dd->total) { | ||
403 | pr_err("request length != buffer length\n"); | ||
404 | return -EINVAL; | ||
405 | } | ||
406 | |||
407 | err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | 526 | err = dma_map_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); |
408 | if (!err) { | 527 | if (!err) { |
409 | dev_err(dd->dev, "dma_map_sg() error\n"); | 528 | dev_err(dd->dev, "dma_map_sg() error\n"); |
@@ -433,13 +552,16 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) | |||
433 | addr_out = dd->dma_addr_out; | 552 | addr_out = dd->dma_addr_out; |
434 | 553 | ||
435 | dd->flags &= ~TDES_FLAGS_FAST; | 554 | dd->flags &= ~TDES_FLAGS_FAST; |
436 | |||
437 | } | 555 | } |
438 | 556 | ||
439 | dd->total -= count; | 557 | dd->total -= count; |
440 | 558 | ||
441 | err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count); | 559 | if (dd->caps.has_dma) |
442 | if (err) { | 560 | err = atmel_tdes_crypt_dma(tfm, addr_in, addr_out, count); |
561 | else | ||
562 | err = atmel_tdes_crypt_pdc(tfm, addr_in, addr_out, count); | ||
563 | |||
564 | if (err && (dd->flags & TDES_FLAGS_FAST)) { | ||
443 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | 565 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); |
444 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); | 566 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); |
445 | } | 567 | } |
@@ -447,7 +569,6 @@ static int atmel_tdes_crypt_dma_start(struct atmel_tdes_dev *dd) | |||
447 | return err; | 569 | return err; |
448 | } | 570 | } |
449 | 571 | ||
450 | |||
451 | static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) | 572 | static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) |
452 | { | 573 | { |
453 | struct ablkcipher_request *req = dd->req; | 574 | struct ablkcipher_request *req = dd->req; |
@@ -506,7 +627,7 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, | |||
506 | 627 | ||
507 | err = atmel_tdes_write_ctrl(dd); | 628 | err = atmel_tdes_write_ctrl(dd); |
508 | if (!err) | 629 | if (!err) |
509 | err = atmel_tdes_crypt_dma_start(dd); | 630 | err = atmel_tdes_crypt_start(dd); |
510 | if (err) { | 631 | if (err) { |
511 | /* des_task will not finish it, so do it here */ | 632 | /* des_task will not finish it, so do it here */ |
512 | atmel_tdes_finish_req(dd, err); | 633 | atmel_tdes_finish_req(dd, err); |
@@ -516,41 +637,145 @@ static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, | |||
516 | return ret; | 637 | return ret; |
517 | } | 638 | } |
518 | 639 | ||
640 | static int atmel_tdes_crypt_dma_stop(struct atmel_tdes_dev *dd) | ||
641 | { | ||
642 | int err = -EINVAL; | ||
643 | size_t count; | ||
644 | |||
645 | if (dd->flags & TDES_FLAGS_DMA) { | ||
646 | err = 0; | ||
647 | if (dd->flags & TDES_FLAGS_FAST) { | ||
648 | dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); | ||
649 | dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); | ||
650 | } else { | ||
651 | dma_sync_single_for_device(dd->dev, dd->dma_addr_out, | ||
652 | dd->dma_size, DMA_FROM_DEVICE); | ||
653 | |||
654 | /* copy data */ | ||
655 | count = atmel_tdes_sg_copy(&dd->out_sg, &dd->out_offset, | ||
656 | dd->buf_out, dd->buflen, dd->dma_size, 1); | ||
657 | if (count != dd->dma_size) { | ||
658 | err = -EINVAL; | ||
659 | pr_err("not all data converted: %u\n", count); | ||
660 | } | ||
661 | } | ||
662 | } | ||
663 | return err; | ||
664 | } | ||
519 | 665 | ||
520 | static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode) | 666 | static int atmel_tdes_crypt(struct ablkcipher_request *req, unsigned long mode) |
521 | { | 667 | { |
522 | struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx( | 668 | struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx( |
523 | crypto_ablkcipher_reqtfm(req)); | 669 | crypto_ablkcipher_reqtfm(req)); |
524 | struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req); | 670 | struct atmel_tdes_reqctx *rctx = ablkcipher_request_ctx(req); |
525 | struct atmel_tdes_dev *dd; | ||
526 | 671 | ||
527 | if (mode & TDES_FLAGS_CFB8) { | 672 | if (mode & TDES_FLAGS_CFB8) { |
528 | if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) { | 673 | if (!IS_ALIGNED(req->nbytes, CFB8_BLOCK_SIZE)) { |
529 | pr_err("request size is not exact amount of CFB8 blocks\n"); | 674 | pr_err("request size is not exact amount of CFB8 blocks\n"); |
530 | return -EINVAL; | 675 | return -EINVAL; |
531 | } | 676 | } |
677 | ctx->block_size = CFB8_BLOCK_SIZE; | ||
532 | } else if (mode & TDES_FLAGS_CFB16) { | 678 | } else if (mode & TDES_FLAGS_CFB16) { |
533 | if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) { | 679 | if (!IS_ALIGNED(req->nbytes, CFB16_BLOCK_SIZE)) { |
534 | pr_err("request size is not exact amount of CFB16 blocks\n"); | 680 | pr_err("request size is not exact amount of CFB16 blocks\n"); |
535 | return -EINVAL; | 681 | return -EINVAL; |
536 | } | 682 | } |
683 | ctx->block_size = CFB16_BLOCK_SIZE; | ||
537 | } else if (mode & TDES_FLAGS_CFB32) { | 684 | } else if (mode & TDES_FLAGS_CFB32) { |
538 | if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) { | 685 | if (!IS_ALIGNED(req->nbytes, CFB32_BLOCK_SIZE)) { |
539 | pr_err("request size is not exact amount of CFB32 blocks\n"); | 686 | pr_err("request size is not exact amount of CFB32 blocks\n"); |
540 | return -EINVAL; | 687 | return -EINVAL; |
541 | } | 688 | } |
542 | } else if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) { | 689 | ctx->block_size = CFB32_BLOCK_SIZE; |
543 | pr_err("request size is not exact amount of DES blocks\n"); | 690 | } else { |
544 | return -EINVAL; | 691 | if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) { |
692 | pr_err("request size is not exact amount of DES blocks\n"); | ||
693 | return -EINVAL; | ||
694 | } | ||
695 | ctx->block_size = DES_BLOCK_SIZE; | ||
545 | } | 696 | } |
546 | 697 | ||
547 | dd = atmel_tdes_find_dev(ctx); | 698 | rctx->mode = mode; |
548 | if (!dd) | 699 | |
700 | return atmel_tdes_handle_queue(ctx->dd, req); | ||
701 | } | ||
702 | |||
703 | static bool atmel_tdes_filter(struct dma_chan *chan, void *slave) | ||
704 | { | ||
705 | struct at_dma_slave *sl = slave; | ||
706 | |||
707 | if (sl && sl->dma_dev == chan->device->dev) { | ||
708 | chan->private = sl; | ||
709 | return true; | ||
710 | } else { | ||
711 | return false; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | static int atmel_tdes_dma_init(struct atmel_tdes_dev *dd, | ||
716 | struct crypto_platform_data *pdata) | ||
717 | { | ||
718 | int err = -ENOMEM; | ||
719 | dma_cap_mask_t mask_in, mask_out; | ||
720 | |||
721 | if (pdata && pdata->dma_slave->txdata.dma_dev && | ||
722 | pdata->dma_slave->rxdata.dma_dev) { | ||
723 | |||
724 | /* Try to grab 2 DMA channels */ | ||
725 | dma_cap_zero(mask_in); | ||
726 | dma_cap_set(DMA_SLAVE, mask_in); | ||
727 | |||
728 | dd->dma_lch_in.chan = dma_request_channel(mask_in, | ||
729 | atmel_tdes_filter, &pdata->dma_slave->rxdata); | ||
730 | |||
731 | if (!dd->dma_lch_in.chan) | ||
732 | goto err_dma_in; | ||
733 | |||
734 | dd->dma_lch_in.dma_conf.direction = DMA_MEM_TO_DEV; | ||
735 | dd->dma_lch_in.dma_conf.dst_addr = dd->phys_base + | ||
736 | TDES_IDATA1R; | ||
737 | dd->dma_lch_in.dma_conf.src_maxburst = 1; | ||
738 | dd->dma_lch_in.dma_conf.src_addr_width = | ||
739 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
740 | dd->dma_lch_in.dma_conf.dst_maxburst = 1; | ||
741 | dd->dma_lch_in.dma_conf.dst_addr_width = | ||
742 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
743 | dd->dma_lch_in.dma_conf.device_fc = false; | ||
744 | |||
745 | dma_cap_zero(mask_out); | ||
746 | dma_cap_set(DMA_SLAVE, mask_out); | ||
747 | dd->dma_lch_out.chan = dma_request_channel(mask_out, | ||
748 | atmel_tdes_filter, &pdata->dma_slave->txdata); | ||
749 | |||
750 | if (!dd->dma_lch_out.chan) | ||
751 | goto err_dma_out; | ||
752 | |||
753 | dd->dma_lch_out.dma_conf.direction = DMA_DEV_TO_MEM; | ||
754 | dd->dma_lch_out.dma_conf.src_addr = dd->phys_base + | ||
755 | TDES_ODATA1R; | ||
756 | dd->dma_lch_out.dma_conf.src_maxburst = 1; | ||
757 | dd->dma_lch_out.dma_conf.src_addr_width = | ||
758 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
759 | dd->dma_lch_out.dma_conf.dst_maxburst = 1; | ||
760 | dd->dma_lch_out.dma_conf.dst_addr_width = | ||
761 | DMA_SLAVE_BUSWIDTH_4_BYTES; | ||
762 | dd->dma_lch_out.dma_conf.device_fc = false; | ||
763 | |||
764 | return 0; | ||
765 | } else { | ||
549 | return -ENODEV; | 766 | return -ENODEV; |
767 | } | ||
550 | 768 | ||
551 | rctx->mode = mode; | 769 | err_dma_out: |
770 | dma_release_channel(dd->dma_lch_in.chan); | ||
771 | err_dma_in: | ||
772 | return err; | ||
773 | } | ||
552 | 774 | ||
553 | return atmel_tdes_handle_queue(dd, req); | 775 | static void atmel_tdes_dma_cleanup(struct atmel_tdes_dev *dd) |
776 | { | ||
777 | dma_release_channel(dd->dma_lch_in.chan); | ||
778 | dma_release_channel(dd->dma_lch_out.chan); | ||
554 | } | 779 | } |
555 | 780 | ||
556 | static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | 781 | static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, |
@@ -590,7 +815,8 @@ static int atmel_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | |||
590 | /* | 815 | /* |
591 | * HW bug in cfb 3-keys mode. | 816 | * HW bug in cfb 3-keys mode. |
592 | */ | 817 | */ |
593 | if (strstr(alg_name, "cfb") && (keylen != 2*DES_KEY_SIZE)) { | 818 | if (!ctx->dd->caps.has_cfb_3keys && strstr(alg_name, "cfb") |
819 | && (keylen != 2*DES_KEY_SIZE)) { | ||
594 | crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 820 | crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
595 | return -EINVAL; | 821 | return -EINVAL; |
596 | } else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) { | 822 | } else if ((keylen != 2*DES_KEY_SIZE) && (keylen != 3*DES_KEY_SIZE)) { |
@@ -678,8 +904,15 @@ static int atmel_tdes_ofb_decrypt(struct ablkcipher_request *req) | |||
678 | 904 | ||
679 | static int atmel_tdes_cra_init(struct crypto_tfm *tfm) | 905 | static int atmel_tdes_cra_init(struct crypto_tfm *tfm) |
680 | { | 906 | { |
907 | struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
908 | struct atmel_tdes_dev *dd; | ||
909 | |||
681 | tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx); | 910 | tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_tdes_reqctx); |
682 | 911 | ||
912 | dd = atmel_tdes_find_dev(ctx); | ||
913 | if (!dd) | ||
914 | return -ENODEV; | ||
915 | |||
683 | return 0; | 916 | return 0; |
684 | } | 917 | } |
685 | 918 | ||
@@ -695,7 +928,7 @@ static struct crypto_alg tdes_algs[] = { | |||
695 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 928 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
696 | .cra_blocksize = DES_BLOCK_SIZE, | 929 | .cra_blocksize = DES_BLOCK_SIZE, |
697 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 930 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
698 | .cra_alignmask = 0, | 931 | .cra_alignmask = 0x7, |
699 | .cra_type = &crypto_ablkcipher_type, | 932 | .cra_type = &crypto_ablkcipher_type, |
700 | .cra_module = THIS_MODULE, | 933 | .cra_module = THIS_MODULE, |
701 | .cra_init = atmel_tdes_cra_init, | 934 | .cra_init = atmel_tdes_cra_init, |
@@ -715,7 +948,7 @@ static struct crypto_alg tdes_algs[] = { | |||
715 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 948 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
716 | .cra_blocksize = DES_BLOCK_SIZE, | 949 | .cra_blocksize = DES_BLOCK_SIZE, |
717 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 950 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
718 | .cra_alignmask = 0, | 951 | .cra_alignmask = 0x7, |
719 | .cra_type = &crypto_ablkcipher_type, | 952 | .cra_type = &crypto_ablkcipher_type, |
720 | .cra_module = THIS_MODULE, | 953 | .cra_module = THIS_MODULE, |
721 | .cra_init = atmel_tdes_cra_init, | 954 | .cra_init = atmel_tdes_cra_init, |
@@ -736,7 +969,7 @@ static struct crypto_alg tdes_algs[] = { | |||
736 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 969 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
737 | .cra_blocksize = DES_BLOCK_SIZE, | 970 | .cra_blocksize = DES_BLOCK_SIZE, |
738 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 971 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
739 | .cra_alignmask = 0, | 972 | .cra_alignmask = 0x7, |
740 | .cra_type = &crypto_ablkcipher_type, | 973 | .cra_type = &crypto_ablkcipher_type, |
741 | .cra_module = THIS_MODULE, | 974 | .cra_module = THIS_MODULE, |
742 | .cra_init = atmel_tdes_cra_init, | 975 | .cra_init = atmel_tdes_cra_init, |
@@ -778,7 +1011,7 @@ static struct crypto_alg tdes_algs[] = { | |||
778 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1011 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
779 | .cra_blocksize = CFB16_BLOCK_SIZE, | 1012 | .cra_blocksize = CFB16_BLOCK_SIZE, |
780 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1013 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
781 | .cra_alignmask = 0, | 1014 | .cra_alignmask = 0x1, |
782 | .cra_type = &crypto_ablkcipher_type, | 1015 | .cra_type = &crypto_ablkcipher_type, |
783 | .cra_module = THIS_MODULE, | 1016 | .cra_module = THIS_MODULE, |
784 | .cra_init = atmel_tdes_cra_init, | 1017 | .cra_init = atmel_tdes_cra_init, |
@@ -799,7 +1032,7 @@ static struct crypto_alg tdes_algs[] = { | |||
799 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1032 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
800 | .cra_blocksize = CFB32_BLOCK_SIZE, | 1033 | .cra_blocksize = CFB32_BLOCK_SIZE, |
801 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1034 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
802 | .cra_alignmask = 0, | 1035 | .cra_alignmask = 0x3, |
803 | .cra_type = &crypto_ablkcipher_type, | 1036 | .cra_type = &crypto_ablkcipher_type, |
804 | .cra_module = THIS_MODULE, | 1037 | .cra_module = THIS_MODULE, |
805 | .cra_init = atmel_tdes_cra_init, | 1038 | .cra_init = atmel_tdes_cra_init, |
@@ -820,7 +1053,7 @@ static struct crypto_alg tdes_algs[] = { | |||
820 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1053 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
821 | .cra_blocksize = DES_BLOCK_SIZE, | 1054 | .cra_blocksize = DES_BLOCK_SIZE, |
822 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1055 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
823 | .cra_alignmask = 0, | 1056 | .cra_alignmask = 0x7, |
824 | .cra_type = &crypto_ablkcipher_type, | 1057 | .cra_type = &crypto_ablkcipher_type, |
825 | .cra_module = THIS_MODULE, | 1058 | .cra_module = THIS_MODULE, |
826 | .cra_init = atmel_tdes_cra_init, | 1059 | .cra_init = atmel_tdes_cra_init, |
@@ -841,7 +1074,7 @@ static struct crypto_alg tdes_algs[] = { | |||
841 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1074 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
842 | .cra_blocksize = DES_BLOCK_SIZE, | 1075 | .cra_blocksize = DES_BLOCK_SIZE, |
843 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1076 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
844 | .cra_alignmask = 0, | 1077 | .cra_alignmask = 0x7, |
845 | .cra_type = &crypto_ablkcipher_type, | 1078 | .cra_type = &crypto_ablkcipher_type, |
846 | .cra_module = THIS_MODULE, | 1079 | .cra_module = THIS_MODULE, |
847 | .cra_init = atmel_tdes_cra_init, | 1080 | .cra_init = atmel_tdes_cra_init, |
@@ -861,7 +1094,7 @@ static struct crypto_alg tdes_algs[] = { | |||
861 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1094 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
862 | .cra_blocksize = DES_BLOCK_SIZE, | 1095 | .cra_blocksize = DES_BLOCK_SIZE, |
863 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1096 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
864 | .cra_alignmask = 0, | 1097 | .cra_alignmask = 0x7, |
865 | .cra_type = &crypto_ablkcipher_type, | 1098 | .cra_type = &crypto_ablkcipher_type, |
866 | .cra_module = THIS_MODULE, | 1099 | .cra_module = THIS_MODULE, |
867 | .cra_init = atmel_tdes_cra_init, | 1100 | .cra_init = atmel_tdes_cra_init, |
@@ -882,7 +1115,7 @@ static struct crypto_alg tdes_algs[] = { | |||
882 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1115 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
883 | .cra_blocksize = DES_BLOCK_SIZE, | 1116 | .cra_blocksize = DES_BLOCK_SIZE, |
884 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1117 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
885 | .cra_alignmask = 0, | 1118 | .cra_alignmask = 0x7, |
886 | .cra_type = &crypto_ablkcipher_type, | 1119 | .cra_type = &crypto_ablkcipher_type, |
887 | .cra_module = THIS_MODULE, | 1120 | .cra_module = THIS_MODULE, |
888 | .cra_init = atmel_tdes_cra_init, | 1121 | .cra_init = atmel_tdes_cra_init, |
@@ -924,7 +1157,7 @@ static struct crypto_alg tdes_algs[] = { | |||
924 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1157 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
925 | .cra_blocksize = CFB16_BLOCK_SIZE, | 1158 | .cra_blocksize = CFB16_BLOCK_SIZE, |
926 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1159 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
927 | .cra_alignmask = 0, | 1160 | .cra_alignmask = 0x1, |
928 | .cra_type = &crypto_ablkcipher_type, | 1161 | .cra_type = &crypto_ablkcipher_type, |
929 | .cra_module = THIS_MODULE, | 1162 | .cra_module = THIS_MODULE, |
930 | .cra_init = atmel_tdes_cra_init, | 1163 | .cra_init = atmel_tdes_cra_init, |
@@ -945,7 +1178,7 @@ static struct crypto_alg tdes_algs[] = { | |||
945 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1178 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
946 | .cra_blocksize = CFB32_BLOCK_SIZE, | 1179 | .cra_blocksize = CFB32_BLOCK_SIZE, |
947 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1180 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
948 | .cra_alignmask = 0, | 1181 | .cra_alignmask = 0x3, |
949 | .cra_type = &crypto_ablkcipher_type, | 1182 | .cra_type = &crypto_ablkcipher_type, |
950 | .cra_module = THIS_MODULE, | 1183 | .cra_module = THIS_MODULE, |
951 | .cra_init = atmel_tdes_cra_init, | 1184 | .cra_init = atmel_tdes_cra_init, |
@@ -966,7 +1199,7 @@ static struct crypto_alg tdes_algs[] = { | |||
966 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1199 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
967 | .cra_blocksize = DES_BLOCK_SIZE, | 1200 | .cra_blocksize = DES_BLOCK_SIZE, |
968 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), | 1201 | .cra_ctxsize = sizeof(struct atmel_tdes_ctx), |
969 | .cra_alignmask = 0, | 1202 | .cra_alignmask = 0x7, |
970 | .cra_type = &crypto_ablkcipher_type, | 1203 | .cra_type = &crypto_ablkcipher_type, |
971 | .cra_module = THIS_MODULE, | 1204 | .cra_module = THIS_MODULE, |
972 | .cra_init = atmel_tdes_cra_init, | 1205 | .cra_init = atmel_tdes_cra_init, |
@@ -994,14 +1227,24 @@ static void atmel_tdes_done_task(unsigned long data) | |||
994 | struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data; | 1227 | struct atmel_tdes_dev *dd = (struct atmel_tdes_dev *) data; |
995 | int err; | 1228 | int err; |
996 | 1229 | ||
997 | err = atmel_tdes_crypt_dma_stop(dd); | 1230 | if (!(dd->flags & TDES_FLAGS_DMA)) |
1231 | err = atmel_tdes_crypt_pdc_stop(dd); | ||
1232 | else | ||
1233 | err = atmel_tdes_crypt_dma_stop(dd); | ||
998 | 1234 | ||
999 | err = dd->err ? : err; | 1235 | err = dd->err ? : err; |
1000 | 1236 | ||
1001 | if (dd->total && !err) { | 1237 | if (dd->total && !err) { |
1002 | err = atmel_tdes_crypt_dma_start(dd); | 1238 | if (dd->flags & TDES_FLAGS_FAST) { |
1239 | dd->in_sg = sg_next(dd->in_sg); | ||
1240 | dd->out_sg = sg_next(dd->out_sg); | ||
1241 | if (!dd->in_sg || !dd->out_sg) | ||
1242 | err = -EINVAL; | ||
1243 | } | ||
1003 | if (!err) | 1244 | if (!err) |
1004 | return; | 1245 | err = atmel_tdes_crypt_start(dd); |
1246 | if (!err) | ||
1247 | return; /* DMA started. Not fininishing. */ | ||
1005 | } | 1248 | } |
1006 | 1249 | ||
1007 | atmel_tdes_finish_req(dd, err); | 1250 | atmel_tdes_finish_req(dd, err); |
@@ -1053,9 +1296,31 @@ err_tdes_algs: | |||
1053 | return err; | 1296 | return err; |
1054 | } | 1297 | } |
1055 | 1298 | ||
1299 | static void atmel_tdes_get_cap(struct atmel_tdes_dev *dd) | ||
1300 | { | ||
1301 | |||
1302 | dd->caps.has_dma = 0; | ||
1303 | dd->caps.has_cfb_3keys = 0; | ||
1304 | |||
1305 | /* keep only major version number */ | ||
1306 | switch (dd->hw_version & 0xf00) { | ||
1307 | case 0x700: | ||
1308 | dd->caps.has_dma = 1; | ||
1309 | dd->caps.has_cfb_3keys = 1; | ||
1310 | break; | ||
1311 | case 0x600: | ||
1312 | break; | ||
1313 | default: | ||
1314 | dev_warn(dd->dev, | ||
1315 | "Unmanaged tdes version, set minimum capabilities\n"); | ||
1316 | break; | ||
1317 | } | ||
1318 | } | ||
1319 | |||
1056 | static int atmel_tdes_probe(struct platform_device *pdev) | 1320 | static int atmel_tdes_probe(struct platform_device *pdev) |
1057 | { | 1321 | { |
1058 | struct atmel_tdes_dev *tdes_dd; | 1322 | struct atmel_tdes_dev *tdes_dd; |
1323 | struct crypto_platform_data *pdata; | ||
1059 | struct device *dev = &pdev->dev; | 1324 | struct device *dev = &pdev->dev; |
1060 | struct resource *tdes_res; | 1325 | struct resource *tdes_res; |
1061 | unsigned long tdes_phys_size; | 1326 | unsigned long tdes_phys_size; |
@@ -1109,7 +1374,7 @@ static int atmel_tdes_probe(struct platform_device *pdev) | |||
1109 | } | 1374 | } |
1110 | 1375 | ||
1111 | /* Initializing the clock */ | 1376 | /* Initializing the clock */ |
1112 | tdes_dd->iclk = clk_get(&pdev->dev, NULL); | 1377 | tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk"); |
1113 | if (IS_ERR(tdes_dd->iclk)) { | 1378 | if (IS_ERR(tdes_dd->iclk)) { |
1114 | dev_err(dev, "clock intialization failed.\n"); | 1379 | dev_err(dev, "clock intialization failed.\n"); |
1115 | err = PTR_ERR(tdes_dd->iclk); | 1380 | err = PTR_ERR(tdes_dd->iclk); |
@@ -1123,9 +1388,25 @@ static int atmel_tdes_probe(struct platform_device *pdev) | |||
1123 | goto tdes_io_err; | 1388 | goto tdes_io_err; |
1124 | } | 1389 | } |
1125 | 1390 | ||
1126 | err = atmel_tdes_dma_init(tdes_dd); | 1391 | atmel_tdes_hw_version_init(tdes_dd); |
1392 | |||
1393 | atmel_tdes_get_cap(tdes_dd); | ||
1394 | |||
1395 | err = atmel_tdes_buff_init(tdes_dd); | ||
1127 | if (err) | 1396 | if (err) |
1128 | goto err_tdes_dma; | 1397 | goto err_tdes_buff; |
1398 | |||
1399 | if (tdes_dd->caps.has_dma) { | ||
1400 | pdata = pdev->dev.platform_data; | ||
1401 | if (!pdata) { | ||
1402 | dev_err(&pdev->dev, "platform data not available\n"); | ||
1403 | err = -ENXIO; | ||
1404 | goto err_pdata; | ||
1405 | } | ||
1406 | err = atmel_tdes_dma_init(tdes_dd, pdata); | ||
1407 | if (err) | ||
1408 | goto err_tdes_dma; | ||
1409 | } | ||
1129 | 1410 | ||
1130 | spin_lock(&atmel_tdes.lock); | 1411 | spin_lock(&atmel_tdes.lock); |
1131 | list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list); | 1412 | list_add_tail(&tdes_dd->list, &atmel_tdes.dev_list); |
@@ -1143,8 +1424,12 @@ err_algs: | |||
1143 | spin_lock(&atmel_tdes.lock); | 1424 | spin_lock(&atmel_tdes.lock); |
1144 | list_del(&tdes_dd->list); | 1425 | list_del(&tdes_dd->list); |
1145 | spin_unlock(&atmel_tdes.lock); | 1426 | spin_unlock(&atmel_tdes.lock); |
1146 | atmel_tdes_dma_cleanup(tdes_dd); | 1427 | if (tdes_dd->caps.has_dma) |
1428 | atmel_tdes_dma_cleanup(tdes_dd); | ||
1147 | err_tdes_dma: | 1429 | err_tdes_dma: |
1430 | err_pdata: | ||
1431 | atmel_tdes_buff_cleanup(tdes_dd); | ||
1432 | err_tdes_buff: | ||
1148 | iounmap(tdes_dd->io_base); | 1433 | iounmap(tdes_dd->io_base); |
1149 | tdes_io_err: | 1434 | tdes_io_err: |
1150 | clk_put(tdes_dd->iclk); | 1435 | clk_put(tdes_dd->iclk); |
@@ -1178,7 +1463,10 @@ static int atmel_tdes_remove(struct platform_device *pdev) | |||
1178 | tasklet_kill(&tdes_dd->done_task); | 1463 | tasklet_kill(&tdes_dd->done_task); |
1179 | tasklet_kill(&tdes_dd->queue_task); | 1464 | tasklet_kill(&tdes_dd->queue_task); |
1180 | 1465 | ||
1181 | atmel_tdes_dma_cleanup(tdes_dd); | 1466 | if (tdes_dd->caps.has_dma) |
1467 | atmel_tdes_dma_cleanup(tdes_dd); | ||
1468 | |||
1469 | atmel_tdes_buff_cleanup(tdes_dd); | ||
1182 | 1470 | ||
1183 | iounmap(tdes_dd->io_base); | 1471 | iounmap(tdes_dd->io_base); |
1184 | 1472 | ||
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 827913d7d33a..d797f31f5d85 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c | |||
@@ -151,7 +151,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req) | |||
151 | struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); | 151 | struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req); |
152 | struct bfin_crypto_crc *crc; | 152 | struct bfin_crypto_crc *crc; |
153 | 153 | ||
154 | dev_dbg(crc->dev, "crc_init\n"); | 154 | dev_dbg(ctx->crc->dev, "crc_init\n"); |
155 | spin_lock_bh(&crc_list.lock); | 155 | spin_lock_bh(&crc_list.lock); |
156 | list_for_each_entry(crc, &crc_list.dev_list, list) { | 156 | list_for_each_entry(crc, &crc_list.dev_list, list) { |
157 | crc_ctx->crc = crc; | 157 | crc_ctx->crc = crc; |
@@ -160,7 +160,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req) | |||
160 | spin_unlock_bh(&crc_list.lock); | 160 | spin_unlock_bh(&crc_list.lock); |
161 | 161 | ||
162 | if (sg_count(req->src) > CRC_MAX_DMA_DESC) { | 162 | if (sg_count(req->src) > CRC_MAX_DMA_DESC) { |
163 | dev_dbg(crc->dev, "init: requested sg list is too big > %d\n", | 163 | dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n", |
164 | CRC_MAX_DMA_DESC); | 164 | CRC_MAX_DMA_DESC); |
165 | return -EINVAL; | 165 | return -EINVAL; |
166 | } | 166 | } |
@@ -175,7 +175,7 @@ static int bfin_crypto_crc_init(struct ahash_request *req) | |||
175 | /* init crc results */ | 175 | /* init crc results */ |
176 | put_unaligned_le32(crc_ctx->key, req->result); | 176 | put_unaligned_le32(crc_ctx->key, req->result); |
177 | 177 | ||
178 | dev_dbg(crc->dev, "init: digest size: %d\n", | 178 | dev_dbg(ctx->crc->dev, "init: digest size: %d\n", |
179 | crypto_ahash_digestsize(tfm)); | 179 | crypto_ahash_digestsize(tfm)); |
180 | 180 | ||
181 | return bfin_crypto_crc_init_hw(crc, crc_ctx->key); | 181 | return bfin_crypto_crc_init_hw(crc, crc_ctx->key); |
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 65c7668614ab..b44091c47f75 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig | |||
@@ -78,7 +78,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API | |||
78 | tristate "Register hash algorithm implementations with Crypto API" | 78 | tristate "Register hash algorithm implementations with Crypto API" |
79 | depends on CRYPTO_DEV_FSL_CAAM | 79 | depends on CRYPTO_DEV_FSL_CAAM |
80 | default y | 80 | default y |
81 | select CRYPTO_AHASH | 81 | select CRYPTO_HASH |
82 | help | 82 | help |
83 | Selecting this will offload ahash for users of the | 83 | Selecting this will offload ahash for users of the |
84 | scatterlist crypto API to the SEC4 via job ring. | 84 | scatterlist crypto API to the SEC4 via job ring. |
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index cf268b14ae9a..765fdf5ce579 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c | |||
@@ -1693,6 +1693,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1693 | .name = "authenc(hmac(sha224),cbc(aes))", | 1693 | .name = "authenc(hmac(sha224),cbc(aes))", |
1694 | .driver_name = "authenc-hmac-sha224-cbc-aes-caam", | 1694 | .driver_name = "authenc-hmac-sha224-cbc-aes-caam", |
1695 | .blocksize = AES_BLOCK_SIZE, | 1695 | .blocksize = AES_BLOCK_SIZE, |
1696 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1696 | .template_aead = { | 1697 | .template_aead = { |
1697 | .setkey = aead_setkey, | 1698 | .setkey = aead_setkey, |
1698 | .setauthsize = aead_setauthsize, | 1699 | .setauthsize = aead_setauthsize, |
@@ -1732,6 +1733,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1732 | .name = "authenc(hmac(sha384),cbc(aes))", | 1733 | .name = "authenc(hmac(sha384),cbc(aes))", |
1733 | .driver_name = "authenc-hmac-sha384-cbc-aes-caam", | 1734 | .driver_name = "authenc-hmac-sha384-cbc-aes-caam", |
1734 | .blocksize = AES_BLOCK_SIZE, | 1735 | .blocksize = AES_BLOCK_SIZE, |
1736 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1735 | .template_aead = { | 1737 | .template_aead = { |
1736 | .setkey = aead_setkey, | 1738 | .setkey = aead_setkey, |
1737 | .setauthsize = aead_setauthsize, | 1739 | .setauthsize = aead_setauthsize, |
@@ -1810,6 +1812,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1810 | .name = "authenc(hmac(sha224),cbc(des3_ede))", | 1812 | .name = "authenc(hmac(sha224),cbc(des3_ede))", |
1811 | .driver_name = "authenc-hmac-sha224-cbc-des3_ede-caam", | 1813 | .driver_name = "authenc-hmac-sha224-cbc-des3_ede-caam", |
1812 | .blocksize = DES3_EDE_BLOCK_SIZE, | 1814 | .blocksize = DES3_EDE_BLOCK_SIZE, |
1815 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1813 | .template_aead = { | 1816 | .template_aead = { |
1814 | .setkey = aead_setkey, | 1817 | .setkey = aead_setkey, |
1815 | .setauthsize = aead_setauthsize, | 1818 | .setauthsize = aead_setauthsize, |
@@ -1849,6 +1852,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1849 | .name = "authenc(hmac(sha384),cbc(des3_ede))", | 1852 | .name = "authenc(hmac(sha384),cbc(des3_ede))", |
1850 | .driver_name = "authenc-hmac-sha384-cbc-des3_ede-caam", | 1853 | .driver_name = "authenc-hmac-sha384-cbc-des3_ede-caam", |
1851 | .blocksize = DES3_EDE_BLOCK_SIZE, | 1854 | .blocksize = DES3_EDE_BLOCK_SIZE, |
1855 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1852 | .template_aead = { | 1856 | .template_aead = { |
1853 | .setkey = aead_setkey, | 1857 | .setkey = aead_setkey, |
1854 | .setauthsize = aead_setauthsize, | 1858 | .setauthsize = aead_setauthsize, |
@@ -1926,6 +1930,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1926 | .name = "authenc(hmac(sha224),cbc(des))", | 1930 | .name = "authenc(hmac(sha224),cbc(des))", |
1927 | .driver_name = "authenc-hmac-sha224-cbc-des-caam", | 1931 | .driver_name = "authenc-hmac-sha224-cbc-des-caam", |
1928 | .blocksize = DES_BLOCK_SIZE, | 1932 | .blocksize = DES_BLOCK_SIZE, |
1933 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1929 | .template_aead = { | 1934 | .template_aead = { |
1930 | .setkey = aead_setkey, | 1935 | .setkey = aead_setkey, |
1931 | .setauthsize = aead_setauthsize, | 1936 | .setauthsize = aead_setauthsize, |
@@ -1965,6 +1970,7 @@ static struct caam_alg_template driver_algs[] = { | |||
1965 | .name = "authenc(hmac(sha384),cbc(des))", | 1970 | .name = "authenc(hmac(sha384),cbc(des))", |
1966 | .driver_name = "authenc-hmac-sha384-cbc-des-caam", | 1971 | .driver_name = "authenc-hmac-sha384-cbc-des-caam", |
1967 | .blocksize = DES_BLOCK_SIZE, | 1972 | .blocksize = DES_BLOCK_SIZE, |
1973 | .type = CRYPTO_ALG_TYPE_AEAD, | ||
1968 | .template_aead = { | 1974 | .template_aead = { |
1969 | .setkey = aead_setkey, | 1975 | .setkey = aead_setkey, |
1970 | .setauthsize = aead_setauthsize, | 1976 | .setauthsize = aead_setauthsize, |
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 32aba7a61503..5996521a1caf 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c | |||
@@ -411,7 +411,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) | |||
411 | return 0; | 411 | return 0; |
412 | } | 412 | } |
413 | 413 | ||
414 | static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, | 414 | static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, |
415 | u32 keylen) | 415 | u32 keylen) |
416 | { | 416 | { |
417 | return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, | 417 | return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, |
@@ -420,7 +420,7 @@ static u32 gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, | |||
420 | } | 420 | } |
421 | 421 | ||
422 | /* Digest hash size if it is too large */ | 422 | /* Digest hash size if it is too large */ |
423 | static u32 hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, | 423 | static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, |
424 | u32 *keylen, u8 *key_out, u32 digestsize) | 424 | u32 *keylen, u8 *key_out, u32 digestsize) |
425 | { | 425 | { |
426 | struct device *jrdev = ctx->jrdev; | 426 | struct device *jrdev = ctx->jrdev; |
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 8acf00490fd5..6e94bcd94678 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c | |||
@@ -304,6 +304,9 @@ static int caam_probe(struct platform_device *pdev) | |||
304 | caam_remove(pdev); | 304 | caam_remove(pdev); |
305 | return ret; | 305 | return ret; |
306 | } | 306 | } |
307 | |||
308 | /* Enable RDB bit so that RNG works faster */ | ||
309 | setbits32(&topregs->ctrl.scfgr, SCFGR_RDBENABLE); | ||
307 | } | 310 | } |
308 | 311 | ||
309 | /* NOTE: RTIC detection ought to go here, around Si time */ | 312 | /* NOTE: RTIC detection ought to go here, around Si time */ |
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 30b8f74833d4..9f25f5296029 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c | |||
@@ -36,7 +36,7 @@ static void report_jump_idx(u32 status, char *outstr) | |||
36 | 36 | ||
37 | static void report_ccb_status(u32 status, char *outstr) | 37 | static void report_ccb_status(u32 status, char *outstr) |
38 | { | 38 | { |
39 | char *cha_id_list[] = { | 39 | static const char * const cha_id_list[] = { |
40 | "", | 40 | "", |
41 | "AES", | 41 | "AES", |
42 | "DES", | 42 | "DES", |
@@ -51,7 +51,7 @@ static void report_ccb_status(u32 status, char *outstr) | |||
51 | "ZUCE", | 51 | "ZUCE", |
52 | "ZUCA", | 52 | "ZUCA", |
53 | }; | 53 | }; |
54 | char *err_id_list[] = { | 54 | static const char * const err_id_list[] = { |
55 | "No error.", | 55 | "No error.", |
56 | "Mode error.", | 56 | "Mode error.", |
57 | "Data size error.", | 57 | "Data size error.", |
@@ -69,7 +69,7 @@ static void report_ccb_status(u32 status, char *outstr) | |||
69 | "Invalid CHA combination was selected", | 69 | "Invalid CHA combination was selected", |
70 | "Invalid CHA selected.", | 70 | "Invalid CHA selected.", |
71 | }; | 71 | }; |
72 | char *rng_err_id_list[] = { | 72 | static const char * const rng_err_id_list[] = { |
73 | "", | 73 | "", |
74 | "", | 74 | "", |
75 | "", | 75 | "", |
@@ -117,7 +117,7 @@ static void report_jump_status(u32 status, char *outstr) | |||
117 | 117 | ||
118 | static void report_deco_status(u32 status, char *outstr) | 118 | static void report_deco_status(u32 status, char *outstr) |
119 | { | 119 | { |
120 | const struct { | 120 | static const struct { |
121 | u8 value; | 121 | u8 value; |
122 | char *error_text; | 122 | char *error_text; |
123 | } desc_error_list[] = { | 123 | } desc_error_list[] = { |
@@ -245,7 +245,7 @@ static void report_cond_code_status(u32 status, char *outstr) | |||
245 | 245 | ||
246 | char *caam_jr_strstatus(char *outstr, u32 status) | 246 | char *caam_jr_strstatus(char *outstr, u32 status) |
247 | { | 247 | { |
248 | struct stat_src { | 248 | static const struct stat_src { |
249 | void (*report_ssed)(u32 status, char *outstr); | 249 | void (*report_ssed)(u32 status, char *outstr); |
250 | char *error; | 250 | char *error; |
251 | } status_src[] = { | 251 | } status_src[] = { |
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 5cd4c1b268a1..e4a16b741371 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h | |||
@@ -41,6 +41,7 @@ struct caam_jrentry_info { | |||
41 | /* Private sub-storage for a single JobR */ | 41 | /* Private sub-storage for a single JobR */ |
42 | struct caam_drv_private_jr { | 42 | struct caam_drv_private_jr { |
43 | struct device *parentdev; /* points back to controller dev */ | 43 | struct device *parentdev; /* points back to controller dev */ |
44 | struct platform_device *jr_pdev;/* points to platform device for JR */ | ||
44 | int ridx; | 45 | int ridx; |
45 | struct caam_job_ring __iomem *rregs; /* JobR's register space */ | 46 | struct caam_job_ring __iomem *rregs; /* JobR's register space */ |
46 | struct tasklet_struct irqtask; | 47 | struct tasklet_struct irqtask; |
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 93d14070141a..b4aa773ecbc8 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c | |||
@@ -407,6 +407,7 @@ int caam_jr_shutdown(struct device *dev) | |||
407 | dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, | 407 | dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH, |
408 | jrp->outring, outbusaddr); | 408 | jrp->outring, outbusaddr); |
409 | kfree(jrp->entinfo); | 409 | kfree(jrp->entinfo); |
410 | of_device_unregister(jrp->jr_pdev); | ||
410 | 411 | ||
411 | return ret; | 412 | return ret; |
412 | } | 413 | } |
@@ -454,6 +455,8 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np, | |||
454 | kfree(jrpriv); | 455 | kfree(jrpriv); |
455 | return -EINVAL; | 456 | return -EINVAL; |
456 | } | 457 | } |
458 | |||
459 | jrpriv->jr_pdev = jr_pdev; | ||
457 | jrdev = &jr_pdev->dev; | 460 | jrdev = &jr_pdev->dev; |
458 | dev_set_drvdata(jrdev, jrpriv); | 461 | dev_set_drvdata(jrdev, jrpriv); |
459 | ctrlpriv->jrdev[ring] = jrdev; | 462 | ctrlpriv->jrdev[ring] = jrdev; |
@@ -472,6 +475,7 @@ int caam_jr_probe(struct platform_device *pdev, struct device_node *np, | |||
472 | /* Now do the platform independent part */ | 475 | /* Now do the platform independent part */ |
473 | error = caam_jr_init(jrdev); /* now turn on hardware */ | 476 | error = caam_jr_init(jrdev); /* now turn on hardware */ |
474 | if (error) { | 477 | if (error) { |
478 | of_device_unregister(jr_pdev); | ||
475 | kfree(jrpriv); | 479 | kfree(jrpriv); |
476 | return error; | 480 | return error; |
477 | } | 481 | } |
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index f6dba10246c3..87138d2adb5f 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c | |||
@@ -44,7 +44,7 @@ Split key generation----------------------------------------------- | |||
44 | [06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 | 44 | [06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 |
45 | @0xffe04000 | 45 | @0xffe04000 |
46 | */ | 46 | */ |
47 | u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, | 47 | int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, |
48 | int split_key_pad_len, const u8 *key_in, u32 keylen, | 48 | int split_key_pad_len, const u8 *key_in, u32 keylen, |
49 | u32 alg_op) | 49 | u32 alg_op) |
50 | { | 50 | { |
diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h index d95d290c6e8b..c5588f6d8109 100644 --- a/drivers/crypto/caam/key_gen.h +++ b/drivers/crypto/caam/key_gen.h | |||
@@ -12,6 +12,6 @@ struct split_key_result { | |||
12 | 12 | ||
13 | void split_key_done(struct device *dev, u32 *desc, u32 err, void *context); | 13 | void split_key_done(struct device *dev, u32 *desc, u32 err, void *context); |
14 | 14 | ||
15 | u32 gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, | 15 | int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, |
16 | int split_key_pad_len, const u8 *key_in, u32 keylen, | 16 | int split_key_pad_len, const u8 *key_in, u32 keylen, |
17 | u32 alg_op); | 17 | u32 alg_op); |
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 3223fc6d647c..cd6fedad9935 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h | |||
@@ -252,7 +252,8 @@ struct caam_ctrl { | |||
252 | /* Read/Writable */ | 252 | /* Read/Writable */ |
253 | u32 rsvd1; | 253 | u32 rsvd1; |
254 | u32 mcr; /* MCFG Master Config Register */ | 254 | u32 mcr; /* MCFG Master Config Register */ |
255 | u32 rsvd2[2]; | 255 | u32 rsvd2; |
256 | u32 scfgr; /* SCFGR, Security Config Register */ | ||
256 | 257 | ||
257 | /* Bus Access Configuration Section 010-11f */ | 258 | /* Bus Access Configuration Section 010-11f */ |
258 | /* Read/Writable */ | 259 | /* Read/Writable */ |
@@ -299,6 +300,7 @@ struct caam_ctrl { | |||
299 | #define MCFGR_WDFAIL 0x20000000 /* DECO watchdog force-fail */ | 300 | #define MCFGR_WDFAIL 0x20000000 /* DECO watchdog force-fail */ |
300 | #define MCFGR_DMA_RESET 0x10000000 | 301 | #define MCFGR_DMA_RESET 0x10000000 |
301 | #define MCFGR_LONG_PTR 0x00010000 /* Use >32-bit desc addressing */ | 302 | #define MCFGR_LONG_PTR 0x00010000 /* Use >32-bit desc addressing */ |
303 | #define SCFGR_RDBENABLE 0x00000400 | ||
302 | 304 | ||
303 | /* AXI read cache control */ | 305 | /* AXI read cache control */ |
304 | #define MCFGR_ARCACHE_SHIFT 12 | 306 | #define MCFGR_ARCACHE_SHIFT 12 |
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 6aa425fe0ed5..ee15b0f7849a 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c | |||
@@ -636,7 +636,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) | |||
636 | 636 | ||
637 | pr_debug("err: %d\n", err); | 637 | pr_debug("err: %d\n", err); |
638 | 638 | ||
639 | pm_runtime_put_sync(dd->dev); | 639 | pm_runtime_put(dd->dev); |
640 | dd->flags &= ~FLAGS_BUSY; | 640 | dd->flags &= ~FLAGS_BUSY; |
641 | 641 | ||
642 | req->base.complete(&req->base, err); | 642 | req->base.complete(&req->base, err); |
@@ -1248,18 +1248,7 @@ static struct platform_driver omap_aes_driver = { | |||
1248 | }, | 1248 | }, |
1249 | }; | 1249 | }; |
1250 | 1250 | ||
1251 | static int __init omap_aes_mod_init(void) | 1251 | module_platform_driver(omap_aes_driver); |
1252 | { | ||
1253 | return platform_driver_register(&omap_aes_driver); | ||
1254 | } | ||
1255 | |||
1256 | static void __exit omap_aes_mod_exit(void) | ||
1257 | { | ||
1258 | platform_driver_unregister(&omap_aes_driver); | ||
1259 | } | ||
1260 | |||
1261 | module_init(omap_aes_mod_init); | ||
1262 | module_exit(omap_aes_mod_exit); | ||
1263 | 1252 | ||
1264 | MODULE_DESCRIPTION("OMAP AES hw acceleration support."); | 1253 | MODULE_DESCRIPTION("OMAP AES hw acceleration support."); |
1265 | MODULE_LICENSE("GPL v2"); | 1254 | MODULE_LICENSE("GPL v2"); |
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3d1611f5aecf..a1e1b4756ee5 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c | |||
@@ -923,7 +923,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) | |||
923 | dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | | 923 | dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | |
924 | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); | 924 | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); |
925 | 925 | ||
926 | pm_runtime_put_sync(dd->dev); | 926 | pm_runtime_put(dd->dev); |
927 | 927 | ||
928 | if (req->base.complete) | 928 | if (req->base.complete) |
929 | req->base.complete(&req->base, err); | 929 | req->base.complete(&req->base, err); |
@@ -1813,18 +1813,7 @@ static struct platform_driver omap_sham_driver = { | |||
1813 | }, | 1813 | }, |
1814 | }; | 1814 | }; |
1815 | 1815 | ||
1816 | static int __init omap_sham_mod_init(void) | 1816 | module_platform_driver(omap_sham_driver); |
1817 | { | ||
1818 | return platform_driver_register(&omap_sham_driver); | ||
1819 | } | ||
1820 | |||
1821 | static void __exit omap_sham_mod_exit(void) | ||
1822 | { | ||
1823 | platform_driver_unregister(&omap_sham_driver); | ||
1824 | } | ||
1825 | |||
1826 | module_init(omap_sham_mod_init); | ||
1827 | module_exit(omap_sham_mod_exit); | ||
1828 | 1817 | ||
1829 | MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support."); | 1818 | MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support."); |
1830 | MODULE_LICENSE("GPL v2"); | 1819 | MODULE_LICENSE("GPL v2"); |
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 2096d4685a9e..ac30724d923d 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c | |||
@@ -1688,8 +1688,6 @@ static const struct of_device_id spacc_of_id_table[] = { | |||
1688 | { .compatible = "picochip,spacc-l2" }, | 1688 | { .compatible = "picochip,spacc-l2" }, |
1689 | {} | 1689 | {} |
1690 | }; | 1690 | }; |
1691 | #else /* CONFIG_OF */ | ||
1692 | #define spacc_of_id_table NULL | ||
1693 | #endif /* CONFIG_OF */ | 1691 | #endif /* CONFIG_OF */ |
1694 | 1692 | ||
1695 | static bool spacc_is_compatible(struct platform_device *pdev, | 1693 | static bool spacc_is_compatible(struct platform_device *pdev, |
@@ -1874,7 +1872,7 @@ static struct platform_driver spacc_driver = { | |||
1874 | #ifdef CONFIG_PM | 1872 | #ifdef CONFIG_PM |
1875 | .pm = &spacc_pm_ops, | 1873 | .pm = &spacc_pm_ops, |
1876 | #endif /* CONFIG_PM */ | 1874 | #endif /* CONFIG_PM */ |
1877 | .of_match_table = spacc_of_id_table, | 1875 | .of_match_table = of_match_ptr(spacc_of_id_table), |
1878 | }, | 1876 | }, |
1879 | .id_table = spacc_id_table, | 1877 | .id_table = spacc_id_table, |
1880 | }; | 1878 | }; |
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c new file mode 100644 index 000000000000..a97bb6c1596c --- /dev/null +++ b/drivers/crypto/sahara.c | |||
@@ -0,0 +1,1070 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Support for SAHARA cryptographic accelerator. | ||
5 | * | ||
6 | * Copyright (c) 2013 Vista Silicon S.L. | ||
7 | * Author: Javier Martin <javier.martin@vista-silicon.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as published | ||
11 | * by the Free Software Foundation. | ||
12 | * | ||
13 | * Based on omap-aes.c and tegra-aes.c | ||
14 | */ | ||
15 | |||
16 | #include <crypto/algapi.h> | ||
17 | #include <crypto/aes.h> | ||
18 | |||
19 | #include <linux/clk.h> | ||
20 | #include <linux/crypto.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/io.h> | ||
23 | #include <linux/irq.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/of.h> | ||
27 | #include <linux/platform_device.h> | ||
28 | |||
29 | #define SAHARA_NAME "sahara" | ||
30 | #define SAHARA_VERSION_3 3 | ||
31 | #define SAHARA_TIMEOUT_MS 1000 | ||
32 | #define SAHARA_MAX_HW_DESC 2 | ||
33 | #define SAHARA_MAX_HW_LINK 20 | ||
34 | |||
35 | #define FLAGS_MODE_MASK 0x000f | ||
36 | #define FLAGS_ENCRYPT BIT(0) | ||
37 | #define FLAGS_CBC BIT(1) | ||
38 | #define FLAGS_NEW_KEY BIT(3) | ||
39 | #define FLAGS_BUSY 4 | ||
40 | |||
41 | #define SAHARA_HDR_BASE 0x00800000 | ||
42 | #define SAHARA_HDR_SKHA_ALG_AES 0 | ||
43 | #define SAHARA_HDR_SKHA_OP_ENC (1 << 2) | ||
44 | #define SAHARA_HDR_SKHA_MODE_ECB (0 << 3) | ||
45 | #define SAHARA_HDR_SKHA_MODE_CBC (1 << 3) | ||
46 | #define SAHARA_HDR_FORM_DATA (5 << 16) | ||
47 | #define SAHARA_HDR_FORM_KEY (8 << 16) | ||
48 | #define SAHARA_HDR_LLO (1 << 24) | ||
49 | #define SAHARA_HDR_CHA_SKHA (1 << 28) | ||
50 | #define SAHARA_HDR_CHA_MDHA (2 << 28) | ||
51 | #define SAHARA_HDR_PARITY_BIT (1 << 31) | ||
52 | |||
53 | /* SAHARA can only process one request at a time */ | ||
54 | #define SAHARA_QUEUE_LENGTH 1 | ||
55 | |||
56 | #define SAHARA_REG_VERSION 0x00 | ||
57 | #define SAHARA_REG_DAR 0x04 | ||
58 | #define SAHARA_REG_CONTROL 0x08 | ||
59 | #define SAHARA_CONTROL_SET_THROTTLE(x) (((x) & 0xff) << 24) | ||
60 | #define SAHARA_CONTROL_SET_MAXBURST(x) (((x) & 0xff) << 16) | ||
61 | #define SAHARA_CONTROL_RNG_AUTORSD (1 << 7) | ||
62 | #define SAHARA_CONTROL_ENABLE_INT (1 << 4) | ||
63 | #define SAHARA_REG_CMD 0x0C | ||
64 | #define SAHARA_CMD_RESET (1 << 0) | ||
65 | #define SAHARA_CMD_CLEAR_INT (1 << 8) | ||
66 | #define SAHARA_CMD_CLEAR_ERR (1 << 9) | ||
67 | #define SAHARA_CMD_SINGLE_STEP (1 << 10) | ||
68 | #define SAHARA_CMD_MODE_BATCH (1 << 16) | ||
69 | #define SAHARA_CMD_MODE_DEBUG (1 << 18) | ||
70 | #define SAHARA_REG_STATUS 0x10 | ||
71 | #define SAHARA_STATUS_GET_STATE(x) ((x) & 0x7) | ||
72 | #define SAHARA_STATE_IDLE 0 | ||
73 | #define SAHARA_STATE_BUSY 1 | ||
74 | #define SAHARA_STATE_ERR 2 | ||
75 | #define SAHARA_STATE_FAULT 3 | ||
76 | #define SAHARA_STATE_COMPLETE 4 | ||
77 | #define SAHARA_STATE_COMP_FLAG (1 << 2) | ||
78 | #define SAHARA_STATUS_DAR_FULL (1 << 3) | ||
79 | #define SAHARA_STATUS_ERROR (1 << 4) | ||
80 | #define SAHARA_STATUS_SECURE (1 << 5) | ||
81 | #define SAHARA_STATUS_FAIL (1 << 6) | ||
82 | #define SAHARA_STATUS_INIT (1 << 7) | ||
83 | #define SAHARA_STATUS_RNG_RESEED (1 << 8) | ||
84 | #define SAHARA_STATUS_ACTIVE_RNG (1 << 9) | ||
85 | #define SAHARA_STATUS_ACTIVE_MDHA (1 << 10) | ||
86 | #define SAHARA_STATUS_ACTIVE_SKHA (1 << 11) | ||
87 | #define SAHARA_STATUS_MODE_BATCH (1 << 16) | ||
88 | #define SAHARA_STATUS_MODE_DEDICATED (1 << 17) | ||
89 | #define SAHARA_STATUS_MODE_DEBUG (1 << 18) | ||
90 | #define SAHARA_STATUS_GET_ISTATE(x) (((x) >> 24) & 0xff) | ||
91 | #define SAHARA_REG_ERRSTATUS 0x14 | ||
92 | #define SAHARA_ERRSTATUS_GET_SOURCE(x) ((x) & 0xf) | ||
93 | #define SAHARA_ERRSOURCE_CHA 14 | ||
94 | #define SAHARA_ERRSOURCE_DMA 15 | ||
95 | #define SAHARA_ERRSTATUS_DMA_DIR (1 << 8) | ||
96 | #define SAHARA_ERRSTATUS_GET_DMASZ(x)(((x) >> 9) & 0x3) | ||
97 | #define SAHARA_ERRSTATUS_GET_DMASRC(x) (((x) >> 13) & 0x7) | ||
98 | #define SAHARA_ERRSTATUS_GET_CHASRC(x) (((x) >> 16) & 0xfff) | ||
99 | #define SAHARA_ERRSTATUS_GET_CHAERR(x) (((x) >> 28) & 0x3) | ||
100 | #define SAHARA_REG_FADDR 0x18 | ||
101 | #define SAHARA_REG_CDAR 0x1C | ||
102 | #define SAHARA_REG_IDAR 0x20 | ||
103 | |||
104 | struct sahara_hw_desc { | ||
105 | u32 hdr; | ||
106 | u32 len1; | ||
107 | dma_addr_t p1; | ||
108 | u32 len2; | ||
109 | dma_addr_t p2; | ||
110 | dma_addr_t next; | ||
111 | }; | ||
112 | |||
113 | struct sahara_hw_link { | ||
114 | u32 len; | ||
115 | dma_addr_t p; | ||
116 | dma_addr_t next; | ||
117 | }; | ||
118 | |||
119 | struct sahara_ctx { | ||
120 | struct sahara_dev *dev; | ||
121 | unsigned long flags; | ||
122 | int keylen; | ||
123 | u8 key[AES_KEYSIZE_128]; | ||
124 | struct crypto_ablkcipher *fallback; | ||
125 | }; | ||
126 | |||
127 | struct sahara_aes_reqctx { | ||
128 | unsigned long mode; | ||
129 | }; | ||
130 | |||
131 | struct sahara_dev { | ||
132 | struct device *device; | ||
133 | void __iomem *regs_base; | ||
134 | struct clk *clk_ipg; | ||
135 | struct clk *clk_ahb; | ||
136 | |||
137 | struct sahara_ctx *ctx; | ||
138 | spinlock_t lock; | ||
139 | struct crypto_queue queue; | ||
140 | unsigned long flags; | ||
141 | |||
142 | struct tasklet_struct done_task; | ||
143 | struct tasklet_struct queue_task; | ||
144 | |||
145 | struct sahara_hw_desc *hw_desc[SAHARA_MAX_HW_DESC]; | ||
146 | dma_addr_t hw_phys_desc[SAHARA_MAX_HW_DESC]; | ||
147 | |||
148 | u8 *key_base; | ||
149 | dma_addr_t key_phys_base; | ||
150 | |||
151 | u8 *iv_base; | ||
152 | dma_addr_t iv_phys_base; | ||
153 | |||
154 | struct sahara_hw_link *hw_link[SAHARA_MAX_HW_LINK]; | ||
155 | dma_addr_t hw_phys_link[SAHARA_MAX_HW_LINK]; | ||
156 | |||
157 | struct ablkcipher_request *req; | ||
158 | size_t total; | ||
159 | struct scatterlist *in_sg; | ||
160 | unsigned int nb_in_sg; | ||
161 | struct scatterlist *out_sg; | ||
162 | unsigned int nb_out_sg; | ||
163 | |||
164 | u32 error; | ||
165 | struct timer_list watchdog; | ||
166 | }; | ||
167 | |||
168 | static struct sahara_dev *dev_ptr; | ||
169 | |||
170 | static inline void sahara_write(struct sahara_dev *dev, u32 data, u32 reg) | ||
171 | { | ||
172 | writel(data, dev->regs_base + reg); | ||
173 | } | ||
174 | |||
175 | static inline unsigned int sahara_read(struct sahara_dev *dev, u32 reg) | ||
176 | { | ||
177 | return readl(dev->regs_base + reg); | ||
178 | } | ||
179 | |||
180 | static u32 sahara_aes_key_hdr(struct sahara_dev *dev) | ||
181 | { | ||
182 | u32 hdr = SAHARA_HDR_BASE | SAHARA_HDR_SKHA_ALG_AES | | ||
183 | SAHARA_HDR_FORM_KEY | SAHARA_HDR_LLO | | ||
184 | SAHARA_HDR_CHA_SKHA | SAHARA_HDR_PARITY_BIT; | ||
185 | |||
186 | if (dev->flags & FLAGS_CBC) { | ||
187 | hdr |= SAHARA_HDR_SKHA_MODE_CBC; | ||
188 | hdr ^= SAHARA_HDR_PARITY_BIT; | ||
189 | } | ||
190 | |||
191 | if (dev->flags & FLAGS_ENCRYPT) { | ||
192 | hdr |= SAHARA_HDR_SKHA_OP_ENC; | ||
193 | hdr ^= SAHARA_HDR_PARITY_BIT; | ||
194 | } | ||
195 | |||
196 | return hdr; | ||
197 | } | ||
198 | |||
199 | static u32 sahara_aes_data_link_hdr(struct sahara_dev *dev) | ||
200 | { | ||
201 | return SAHARA_HDR_BASE | SAHARA_HDR_FORM_DATA | | ||
202 | SAHARA_HDR_CHA_SKHA | SAHARA_HDR_PARITY_BIT; | ||
203 | } | ||
204 | |||
205 | static int sahara_sg_length(struct scatterlist *sg, | ||
206 | unsigned int total) | ||
207 | { | ||
208 | int sg_nb; | ||
209 | unsigned int len; | ||
210 | struct scatterlist *sg_list; | ||
211 | |||
212 | sg_nb = 0; | ||
213 | sg_list = sg; | ||
214 | |||
215 | while (total) { | ||
216 | len = min(sg_list->length, total); | ||
217 | |||
218 | sg_nb++; | ||
219 | total -= len; | ||
220 | |||
221 | sg_list = sg_next(sg_list); | ||
222 | if (!sg_list) | ||
223 | total = 0; | ||
224 | } | ||
225 | |||
226 | return sg_nb; | ||
227 | } | ||
228 | |||
229 | static char *sahara_err_src[16] = { | ||
230 | "No error", | ||
231 | "Header error", | ||
232 | "Descriptor length error", | ||
233 | "Descriptor length or pointer error", | ||
234 | "Link length error", | ||
235 | "Link pointer error", | ||
236 | "Input buffer error", | ||
237 | "Output buffer error", | ||
238 | "Output buffer starvation", | ||
239 | "Internal state fault", | ||
240 | "General descriptor problem", | ||
241 | "Reserved", | ||
242 | "Descriptor address error", | ||
243 | "Link address error", | ||
244 | "CHA error", | ||
245 | "DMA error" | ||
246 | }; | ||
247 | |||
248 | static char *sahara_err_dmasize[4] = { | ||
249 | "Byte transfer", | ||
250 | "Half-word transfer", | ||
251 | "Word transfer", | ||
252 | "Reserved" | ||
253 | }; | ||
254 | |||
255 | static char *sahara_err_dmasrc[8] = { | ||
256 | "No error", | ||
257 | "AHB bus error", | ||
258 | "Internal IP bus error", | ||
259 | "Parity error", | ||
260 | "DMA crosses 256 byte boundary", | ||
261 | "DMA is busy", | ||
262 | "Reserved", | ||
263 | "DMA HW error" | ||
264 | }; | ||
265 | |||
266 | static char *sahara_cha_errsrc[12] = { | ||
267 | "Input buffer non-empty", | ||
268 | "Illegal address", | ||
269 | "Illegal mode", | ||
270 | "Illegal data size", | ||
271 | "Illegal key size", | ||
272 | "Write during processing", | ||
273 | "CTX read during processing", | ||
274 | "HW error", | ||
275 | "Input buffer disabled/underflow", | ||
276 | "Output buffer disabled/overflow", | ||
277 | "DES key parity error", | ||
278 | "Reserved" | ||
279 | }; | ||
280 | |||
281 | static char *sahara_cha_err[4] = { "No error", "SKHA", "MDHA", "RNG" }; | ||
282 | |||
283 | static void sahara_decode_error(struct sahara_dev *dev, unsigned int error) | ||
284 | { | ||
285 | u8 source = SAHARA_ERRSTATUS_GET_SOURCE(error); | ||
286 | u16 chasrc = ffs(SAHARA_ERRSTATUS_GET_CHASRC(error)); | ||
287 | |||
288 | dev_err(dev->device, "%s: Error Register = 0x%08x\n", __func__, error); | ||
289 | |||
290 | dev_err(dev->device, " - %s.\n", sahara_err_src[source]); | ||
291 | |||
292 | if (source == SAHARA_ERRSOURCE_DMA) { | ||
293 | if (error & SAHARA_ERRSTATUS_DMA_DIR) | ||
294 | dev_err(dev->device, " * DMA read.\n"); | ||
295 | else | ||
296 | dev_err(dev->device, " * DMA write.\n"); | ||
297 | |||
298 | dev_err(dev->device, " * %s.\n", | ||
299 | sahara_err_dmasize[SAHARA_ERRSTATUS_GET_DMASZ(error)]); | ||
300 | dev_err(dev->device, " * %s.\n", | ||
301 | sahara_err_dmasrc[SAHARA_ERRSTATUS_GET_DMASRC(error)]); | ||
302 | } else if (source == SAHARA_ERRSOURCE_CHA) { | ||
303 | dev_err(dev->device, " * %s.\n", | ||
304 | sahara_cha_errsrc[chasrc]); | ||
305 | dev_err(dev->device, " * %s.\n", | ||
306 | sahara_cha_err[SAHARA_ERRSTATUS_GET_CHAERR(error)]); | ||
307 | } | ||
308 | dev_err(dev->device, "\n"); | ||
309 | } | ||
310 | |||
311 | static char *sahara_state[4] = { "Idle", "Busy", "Error", "HW Fault" }; | ||
312 | |||
313 | static void sahara_decode_status(struct sahara_dev *dev, unsigned int status) | ||
314 | { | ||
315 | u8 state; | ||
316 | |||
317 | if (!IS_ENABLED(DEBUG)) | ||
318 | return; | ||
319 | |||
320 | state = SAHARA_STATUS_GET_STATE(status); | ||
321 | |||
322 | dev_dbg(dev->device, "%s: Status Register = 0x%08x\n", | ||
323 | __func__, status); | ||
324 | |||
325 | dev_dbg(dev->device, " - State = %d:\n", state); | ||
326 | if (state & SAHARA_STATE_COMP_FLAG) | ||
327 | dev_dbg(dev->device, " * Descriptor completed. IRQ pending.\n"); | ||
328 | |||
329 | dev_dbg(dev->device, " * %s.\n", | ||
330 | sahara_state[state & ~SAHARA_STATE_COMP_FLAG]); | ||
331 | |||
332 | if (status & SAHARA_STATUS_DAR_FULL) | ||
333 | dev_dbg(dev->device, " - DAR Full.\n"); | ||
334 | if (status & SAHARA_STATUS_ERROR) | ||
335 | dev_dbg(dev->device, " - Error.\n"); | ||
336 | if (status & SAHARA_STATUS_SECURE) | ||
337 | dev_dbg(dev->device, " - Secure.\n"); | ||
338 | if (status & SAHARA_STATUS_FAIL) | ||
339 | dev_dbg(dev->device, " - Fail.\n"); | ||
340 | if (status & SAHARA_STATUS_RNG_RESEED) | ||
341 | dev_dbg(dev->device, " - RNG Reseed Request.\n"); | ||
342 | if (status & SAHARA_STATUS_ACTIVE_RNG) | ||
343 | dev_dbg(dev->device, " - RNG Active.\n"); | ||
344 | if (status & SAHARA_STATUS_ACTIVE_MDHA) | ||
345 | dev_dbg(dev->device, " - MDHA Active.\n"); | ||
346 | if (status & SAHARA_STATUS_ACTIVE_SKHA) | ||
347 | dev_dbg(dev->device, " - SKHA Active.\n"); | ||
348 | |||
349 | if (status & SAHARA_STATUS_MODE_BATCH) | ||
350 | dev_dbg(dev->device, " - Batch Mode.\n"); | ||
351 | else if (status & SAHARA_STATUS_MODE_DEDICATED) | ||
352 | dev_dbg(dev->device, " - Decidated Mode.\n"); | ||
353 | else if (status & SAHARA_STATUS_MODE_DEBUG) | ||
354 | dev_dbg(dev->device, " - Debug Mode.\n"); | ||
355 | |||
356 | dev_dbg(dev->device, " - Internal state = 0x%02x\n", | ||
357 | SAHARA_STATUS_GET_ISTATE(status)); | ||
358 | |||
359 | dev_dbg(dev->device, "Current DAR: 0x%08x\n", | ||
360 | sahara_read(dev, SAHARA_REG_CDAR)); | ||
361 | dev_dbg(dev->device, "Initial DAR: 0x%08x\n\n", | ||
362 | sahara_read(dev, SAHARA_REG_IDAR)); | ||
363 | } | ||
364 | |||
365 | static void sahara_dump_descriptors(struct sahara_dev *dev) | ||
366 | { | ||
367 | int i; | ||
368 | |||
369 | if (!IS_ENABLED(DEBUG)) | ||
370 | return; | ||
371 | |||
372 | for (i = 0; i < SAHARA_MAX_HW_DESC; i++) { | ||
373 | dev_dbg(dev->device, "Descriptor (%d) (0x%08x):\n", | ||
374 | i, dev->hw_phys_desc[i]); | ||
375 | dev_dbg(dev->device, "\thdr = 0x%08x\n", dev->hw_desc[i]->hdr); | ||
376 | dev_dbg(dev->device, "\tlen1 = %u\n", dev->hw_desc[i]->len1); | ||
377 | dev_dbg(dev->device, "\tp1 = 0x%08x\n", dev->hw_desc[i]->p1); | ||
378 | dev_dbg(dev->device, "\tlen2 = %u\n", dev->hw_desc[i]->len2); | ||
379 | dev_dbg(dev->device, "\tp2 = 0x%08x\n", dev->hw_desc[i]->p2); | ||
380 | dev_dbg(dev->device, "\tnext = 0x%08x\n", | ||
381 | dev->hw_desc[i]->next); | ||
382 | } | ||
383 | dev_dbg(dev->device, "\n"); | ||
384 | } | ||
385 | |||
386 | static void sahara_dump_links(struct sahara_dev *dev) | ||
387 | { | ||
388 | int i; | ||
389 | |||
390 | if (!IS_ENABLED(DEBUG)) | ||
391 | return; | ||
392 | |||
393 | for (i = 0; i < SAHARA_MAX_HW_LINK; i++) { | ||
394 | dev_dbg(dev->device, "Link (%d) (0x%08x):\n", | ||
395 | i, dev->hw_phys_link[i]); | ||
396 | dev_dbg(dev->device, "\tlen = %u\n", dev->hw_link[i]->len); | ||
397 | dev_dbg(dev->device, "\tp = 0x%08x\n", dev->hw_link[i]->p); | ||
398 | dev_dbg(dev->device, "\tnext = 0x%08x\n", | ||
399 | dev->hw_link[i]->next); | ||
400 | } | ||
401 | dev_dbg(dev->device, "\n"); | ||
402 | } | ||
403 | |||
404 | static void sahara_aes_done_task(unsigned long data) | ||
405 | { | ||
406 | struct sahara_dev *dev = (struct sahara_dev *)data; | ||
407 | |||
408 | dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, | ||
409 | DMA_TO_DEVICE); | ||
410 | dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, | ||
411 | DMA_FROM_DEVICE); | ||
412 | |||
413 | spin_lock(&dev->lock); | ||
414 | clear_bit(FLAGS_BUSY, &dev->flags); | ||
415 | spin_unlock(&dev->lock); | ||
416 | |||
417 | dev->req->base.complete(&dev->req->base, dev->error); | ||
418 | } | ||
419 | |||
420 | void sahara_watchdog(unsigned long data) | ||
421 | { | ||
422 | struct sahara_dev *dev = (struct sahara_dev *)data; | ||
423 | unsigned int err = sahara_read(dev, SAHARA_REG_ERRSTATUS); | ||
424 | unsigned int stat = sahara_read(dev, SAHARA_REG_STATUS); | ||
425 | |||
426 | sahara_decode_status(dev, stat); | ||
427 | sahara_decode_error(dev, err); | ||
428 | dev->error = -ETIMEDOUT; | ||
429 | sahara_aes_done_task(data); | ||
430 | } | ||
431 | |||
432 | static int sahara_hw_descriptor_create(struct sahara_dev *dev) | ||
433 | { | ||
434 | struct sahara_ctx *ctx = dev->ctx; | ||
435 | struct scatterlist *sg; | ||
436 | int ret; | ||
437 | int i, j; | ||
438 | |||
439 | /* Copy new key if necessary */ | ||
440 | if (ctx->flags & FLAGS_NEW_KEY) { | ||
441 | memcpy(dev->key_base, ctx->key, ctx->keylen); | ||
442 | ctx->flags &= ~FLAGS_NEW_KEY; | ||
443 | |||
444 | if (dev->flags & FLAGS_CBC) { | ||
445 | dev->hw_desc[0]->len1 = AES_BLOCK_SIZE; | ||
446 | dev->hw_desc[0]->p1 = dev->iv_phys_base; | ||
447 | } else { | ||
448 | dev->hw_desc[0]->len1 = 0; | ||
449 | dev->hw_desc[0]->p1 = 0; | ||
450 | } | ||
451 | dev->hw_desc[0]->len2 = ctx->keylen; | ||
452 | dev->hw_desc[0]->p2 = dev->key_phys_base; | ||
453 | dev->hw_desc[0]->next = dev->hw_phys_desc[1]; | ||
454 | } | ||
455 | dev->hw_desc[0]->hdr = sahara_aes_key_hdr(dev); | ||
456 | |||
457 | dev->nb_in_sg = sahara_sg_length(dev->in_sg, dev->total); | ||
458 | dev->nb_out_sg = sahara_sg_length(dev->out_sg, dev->total); | ||
459 | if ((dev->nb_in_sg + dev->nb_out_sg) > SAHARA_MAX_HW_LINK) { | ||
460 | dev_err(dev->device, "not enough hw links (%d)\n", | ||
461 | dev->nb_in_sg + dev->nb_out_sg); | ||
462 | return -EINVAL; | ||
463 | } | ||
464 | |||
465 | ret = dma_map_sg(dev->device, dev->in_sg, dev->nb_in_sg, | ||
466 | DMA_TO_DEVICE); | ||
467 | if (ret != dev->nb_in_sg) { | ||
468 | dev_err(dev->device, "couldn't map in sg\n"); | ||
469 | goto unmap_in; | ||
470 | } | ||
471 | ret = dma_map_sg(dev->device, dev->out_sg, dev->nb_out_sg, | ||
472 | DMA_FROM_DEVICE); | ||
473 | if (ret != dev->nb_out_sg) { | ||
474 | dev_err(dev->device, "couldn't map out sg\n"); | ||
475 | goto unmap_out; | ||
476 | } | ||
477 | |||
478 | /* Create input links */ | ||
479 | dev->hw_desc[1]->p1 = dev->hw_phys_link[0]; | ||
480 | sg = dev->in_sg; | ||
481 | for (i = 0; i < dev->nb_in_sg; i++) { | ||
482 | dev->hw_link[i]->len = sg->length; | ||
483 | dev->hw_link[i]->p = sg->dma_address; | ||
484 | if (i == (dev->nb_in_sg - 1)) { | ||
485 | dev->hw_link[i]->next = 0; | ||
486 | } else { | ||
487 | dev->hw_link[i]->next = dev->hw_phys_link[i + 1]; | ||
488 | sg = sg_next(sg); | ||
489 | } | ||
490 | } | ||
491 | |||
492 | /* Create output links */ | ||
493 | dev->hw_desc[1]->p2 = dev->hw_phys_link[i]; | ||
494 | sg = dev->out_sg; | ||
495 | for (j = i; j < dev->nb_out_sg + i; j++) { | ||
496 | dev->hw_link[j]->len = sg->length; | ||
497 | dev->hw_link[j]->p = sg->dma_address; | ||
498 | if (j == (dev->nb_out_sg + i - 1)) { | ||
499 | dev->hw_link[j]->next = 0; | ||
500 | } else { | ||
501 | dev->hw_link[j]->next = dev->hw_phys_link[j + 1]; | ||
502 | sg = sg_next(sg); | ||
503 | } | ||
504 | } | ||
505 | |||
506 | /* Fill remaining fields of hw_desc[1] */ | ||
507 | dev->hw_desc[1]->hdr = sahara_aes_data_link_hdr(dev); | ||
508 | dev->hw_desc[1]->len1 = dev->total; | ||
509 | dev->hw_desc[1]->len2 = dev->total; | ||
510 | dev->hw_desc[1]->next = 0; | ||
511 | |||
512 | sahara_dump_descriptors(dev); | ||
513 | sahara_dump_links(dev); | ||
514 | |||
515 | /* Start processing descriptor chain. */ | ||
516 | mod_timer(&dev->watchdog, | ||
517 | jiffies + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); | ||
518 | sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); | ||
519 | |||
520 | return 0; | ||
521 | |||
522 | unmap_out: | ||
523 | dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, | ||
524 | DMA_TO_DEVICE); | ||
525 | unmap_in: | ||
526 | dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, | ||
527 | DMA_FROM_DEVICE); | ||
528 | |||
529 | return -EINVAL; | ||
530 | } | ||
531 | |||
532 | static void sahara_aes_queue_task(unsigned long data) | ||
533 | { | ||
534 | struct sahara_dev *dev = (struct sahara_dev *)data; | ||
535 | struct crypto_async_request *async_req, *backlog; | ||
536 | struct sahara_ctx *ctx; | ||
537 | struct sahara_aes_reqctx *rctx; | ||
538 | struct ablkcipher_request *req; | ||
539 | int ret; | ||
540 | |||
541 | spin_lock(&dev->lock); | ||
542 | backlog = crypto_get_backlog(&dev->queue); | ||
543 | async_req = crypto_dequeue_request(&dev->queue); | ||
544 | if (!async_req) | ||
545 | clear_bit(FLAGS_BUSY, &dev->flags); | ||
546 | spin_unlock(&dev->lock); | ||
547 | |||
548 | if (!async_req) | ||
549 | return; | ||
550 | |||
551 | if (backlog) | ||
552 | backlog->complete(backlog, -EINPROGRESS); | ||
553 | |||
554 | req = ablkcipher_request_cast(async_req); | ||
555 | |||
556 | /* Request is ready to be dispatched by the device */ | ||
557 | dev_dbg(dev->device, | ||
558 | "dispatch request (nbytes=%d, src=%p, dst=%p)\n", | ||
559 | req->nbytes, req->src, req->dst); | ||
560 | |||
561 | /* assign new request to device */ | ||
562 | dev->req = req; | ||
563 | dev->total = req->nbytes; | ||
564 | dev->in_sg = req->src; | ||
565 | dev->out_sg = req->dst; | ||
566 | |||
567 | rctx = ablkcipher_request_ctx(req); | ||
568 | ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); | ||
569 | rctx->mode &= FLAGS_MODE_MASK; | ||
570 | dev->flags = (dev->flags & ~FLAGS_MODE_MASK) | rctx->mode; | ||
571 | |||
572 | if ((dev->flags & FLAGS_CBC) && req->info) | ||
573 | memcpy(dev->iv_base, req->info, AES_KEYSIZE_128); | ||
574 | |||
575 | /* assign new context to device */ | ||
576 | ctx->dev = dev; | ||
577 | dev->ctx = ctx; | ||
578 | |||
579 | ret = sahara_hw_descriptor_create(dev); | ||
580 | if (ret < 0) { | ||
581 | spin_lock(&dev->lock); | ||
582 | clear_bit(FLAGS_BUSY, &dev->flags); | ||
583 | spin_unlock(&dev->lock); | ||
584 | dev->req->base.complete(&dev->req->base, ret); | ||
585 | } | ||
586 | } | ||
587 | |||
588 | static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, | ||
589 | unsigned int keylen) | ||
590 | { | ||
591 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
592 | int ret; | ||
593 | |||
594 | ctx->keylen = keylen; | ||
595 | |||
596 | /* SAHARA only supports 128bit keys */ | ||
597 | if (keylen == AES_KEYSIZE_128) { | ||
598 | memcpy(ctx->key, key, keylen); | ||
599 | ctx->flags |= FLAGS_NEW_KEY; | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | if (keylen != AES_KEYSIZE_128 && | ||
604 | keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) | ||
605 | return -EINVAL; | ||
606 | |||
607 | /* | ||
608 | * The requested key size is not supported by HW, do a fallback. | ||
609 | */ | ||
610 | ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; | ||
611 | ctx->fallback->base.crt_flags |= | ||
612 | (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); | ||
613 | |||
614 | ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); | ||
615 | if (ret) { | ||
616 | struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); | ||
617 | |||
618 | tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; | ||
619 | tfm_aux->crt_flags |= | ||
620 | (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode) | ||
626 | { | ||
627 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx( | ||
628 | crypto_ablkcipher_reqtfm(req)); | ||
629 | struct sahara_aes_reqctx *rctx = ablkcipher_request_ctx(req); | ||
630 | struct sahara_dev *dev = dev_ptr; | ||
631 | int err = 0; | ||
632 | int busy; | ||
633 | |||
634 | dev_dbg(dev->device, "nbytes: %d, enc: %d, cbc: %d\n", | ||
635 | req->nbytes, !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC)); | ||
636 | |||
637 | if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { | ||
638 | dev_err(dev->device, | ||
639 | "request size is not exact amount of AES blocks\n"); | ||
640 | return -EINVAL; | ||
641 | } | ||
642 | |||
643 | ctx->dev = dev; | ||
644 | |||
645 | rctx->mode = mode; | ||
646 | spin_lock_bh(&dev->lock); | ||
647 | err = ablkcipher_enqueue_request(&dev->queue, req); | ||
648 | busy = test_and_set_bit(FLAGS_BUSY, &dev->flags); | ||
649 | spin_unlock_bh(&dev->lock); | ||
650 | |||
651 | if (!busy) | ||
652 | tasklet_schedule(&dev->queue_task); | ||
653 | |||
654 | return err; | ||
655 | } | ||
656 | |||
657 | static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) | ||
658 | { | ||
659 | struct crypto_tfm *tfm = | ||
660 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
661 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx( | ||
662 | crypto_ablkcipher_reqtfm(req)); | ||
663 | int err; | ||
664 | |||
665 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
666 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
667 | err = crypto_ablkcipher_encrypt(req); | ||
668 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
669 | return err; | ||
670 | } | ||
671 | |||
672 | return sahara_aes_crypt(req, FLAGS_ENCRYPT); | ||
673 | } | ||
674 | |||
675 | static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) | ||
676 | { | ||
677 | struct crypto_tfm *tfm = | ||
678 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
679 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx( | ||
680 | crypto_ablkcipher_reqtfm(req)); | ||
681 | int err; | ||
682 | |||
683 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
684 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
685 | err = crypto_ablkcipher_decrypt(req); | ||
686 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
687 | return err; | ||
688 | } | ||
689 | |||
690 | return sahara_aes_crypt(req, 0); | ||
691 | } | ||
692 | |||
693 | static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) | ||
694 | { | ||
695 | struct crypto_tfm *tfm = | ||
696 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
697 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx( | ||
698 | crypto_ablkcipher_reqtfm(req)); | ||
699 | int err; | ||
700 | |||
701 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
702 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
703 | err = crypto_ablkcipher_encrypt(req); | ||
704 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
705 | return err; | ||
706 | } | ||
707 | |||
708 | return sahara_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC); | ||
709 | } | ||
710 | |||
711 | static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) | ||
712 | { | ||
713 | struct crypto_tfm *tfm = | ||
714 | crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); | ||
715 | struct sahara_ctx *ctx = crypto_ablkcipher_ctx( | ||
716 | crypto_ablkcipher_reqtfm(req)); | ||
717 | int err; | ||
718 | |||
719 | if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { | ||
720 | ablkcipher_request_set_tfm(req, ctx->fallback); | ||
721 | err = crypto_ablkcipher_decrypt(req); | ||
722 | ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); | ||
723 | return err; | ||
724 | } | ||
725 | |||
726 | return sahara_aes_crypt(req, FLAGS_CBC); | ||
727 | } | ||
728 | |||
729 | static int sahara_aes_cra_init(struct crypto_tfm *tfm) | ||
730 | { | ||
731 | const char *name = tfm->__crt_alg->cra_name; | ||
732 | struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); | ||
733 | |||
734 | ctx->fallback = crypto_alloc_ablkcipher(name, 0, | ||
735 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); | ||
736 | if (IS_ERR(ctx->fallback)) { | ||
737 | pr_err("Error allocating fallback algo %s\n", name); | ||
738 | return PTR_ERR(ctx->fallback); | ||
739 | } | ||
740 | |||
741 | tfm->crt_ablkcipher.reqsize = sizeof(struct sahara_aes_reqctx); | ||
742 | |||
743 | return 0; | ||
744 | } | ||
745 | |||
746 | static void sahara_aes_cra_exit(struct crypto_tfm *tfm) | ||
747 | { | ||
748 | struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); | ||
749 | |||
750 | if (ctx->fallback) | ||
751 | crypto_free_ablkcipher(ctx->fallback); | ||
752 | ctx->fallback = NULL; | ||
753 | } | ||
754 | |||
755 | static struct crypto_alg aes_algs[] = { | ||
756 | { | ||
757 | .cra_name = "ecb(aes)", | ||
758 | .cra_driver_name = "sahara-ecb-aes", | ||
759 | .cra_priority = 300, | ||
760 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
761 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, | ||
762 | .cra_blocksize = AES_BLOCK_SIZE, | ||
763 | .cra_ctxsize = sizeof(struct sahara_ctx), | ||
764 | .cra_alignmask = 0x0, | ||
765 | .cra_type = &crypto_ablkcipher_type, | ||
766 | .cra_module = THIS_MODULE, | ||
767 | .cra_init = sahara_aes_cra_init, | ||
768 | .cra_exit = sahara_aes_cra_exit, | ||
769 | .cra_u.ablkcipher = { | ||
770 | .min_keysize = AES_MIN_KEY_SIZE , | ||
771 | .max_keysize = AES_MAX_KEY_SIZE, | ||
772 | .setkey = sahara_aes_setkey, | ||
773 | .encrypt = sahara_aes_ecb_encrypt, | ||
774 | .decrypt = sahara_aes_ecb_decrypt, | ||
775 | } | ||
776 | }, { | ||
777 | .cra_name = "cbc(aes)", | ||
778 | .cra_driver_name = "sahara-cbc-aes", | ||
779 | .cra_priority = 300, | ||
780 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | | ||
781 | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, | ||
782 | .cra_blocksize = AES_BLOCK_SIZE, | ||
783 | .cra_ctxsize = sizeof(struct sahara_ctx), | ||
784 | .cra_alignmask = 0x0, | ||
785 | .cra_type = &crypto_ablkcipher_type, | ||
786 | .cra_module = THIS_MODULE, | ||
787 | .cra_init = sahara_aes_cra_init, | ||
788 | .cra_exit = sahara_aes_cra_exit, | ||
789 | .cra_u.ablkcipher = { | ||
790 | .min_keysize = AES_MIN_KEY_SIZE , | ||
791 | .max_keysize = AES_MAX_KEY_SIZE, | ||
792 | .ivsize = AES_BLOCK_SIZE, | ||
793 | .setkey = sahara_aes_setkey, | ||
794 | .encrypt = sahara_aes_cbc_encrypt, | ||
795 | .decrypt = sahara_aes_cbc_decrypt, | ||
796 | } | ||
797 | } | ||
798 | }; | ||
799 | |||
800 | static irqreturn_t sahara_irq_handler(int irq, void *data) | ||
801 | { | ||
802 | struct sahara_dev *dev = (struct sahara_dev *)data; | ||
803 | unsigned int stat = sahara_read(dev, SAHARA_REG_STATUS); | ||
804 | unsigned int err = sahara_read(dev, SAHARA_REG_ERRSTATUS); | ||
805 | |||
806 | del_timer(&dev->watchdog); | ||
807 | |||
808 | sahara_write(dev, SAHARA_CMD_CLEAR_INT | SAHARA_CMD_CLEAR_ERR, | ||
809 | SAHARA_REG_CMD); | ||
810 | |||
811 | sahara_decode_status(dev, stat); | ||
812 | |||
813 | if (SAHARA_STATUS_GET_STATE(stat) == SAHARA_STATE_BUSY) { | ||
814 | return IRQ_NONE; | ||
815 | } else if (SAHARA_STATUS_GET_STATE(stat) == SAHARA_STATE_COMPLETE) { | ||
816 | dev->error = 0; | ||
817 | } else { | ||
818 | sahara_decode_error(dev, err); | ||
819 | dev->error = -EINVAL; | ||
820 | } | ||
821 | |||
822 | tasklet_schedule(&dev->done_task); | ||
823 | |||
824 | return IRQ_HANDLED; | ||
825 | } | ||
826 | |||
827 | |||
828 | static int sahara_register_algs(struct sahara_dev *dev) | ||
829 | { | ||
830 | int err, i, j; | ||
831 | |||
832 | for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { | ||
833 | INIT_LIST_HEAD(&aes_algs[i].cra_list); | ||
834 | err = crypto_register_alg(&aes_algs[i]); | ||
835 | if (err) | ||
836 | goto err_aes_algs; | ||
837 | } | ||
838 | |||
839 | return 0; | ||
840 | |||
841 | err_aes_algs: | ||
842 | for (j = 0; j < i; j++) | ||
843 | crypto_unregister_alg(&aes_algs[j]); | ||
844 | |||
845 | return err; | ||
846 | } | ||
847 | |||
848 | static void sahara_unregister_algs(struct sahara_dev *dev) | ||
849 | { | ||
850 | int i; | ||
851 | |||
852 | for (i = 0; i < ARRAY_SIZE(aes_algs); i++) | ||
853 | crypto_unregister_alg(&aes_algs[i]); | ||
854 | } | ||
855 | |||
856 | static struct platform_device_id sahara_platform_ids[] = { | ||
857 | { .name = "sahara-imx27" }, | ||
858 | { /* sentinel */ } | ||
859 | }; | ||
860 | MODULE_DEVICE_TABLE(platform, sahara_platform_ids); | ||
861 | |||
862 | static struct of_device_id sahara_dt_ids[] = { | ||
863 | { .compatible = "fsl,imx27-sahara" }, | ||
864 | { /* sentinel */ } | ||
865 | }; | ||
866 | MODULE_DEVICE_TABLE(platform, sahara_dt_ids); | ||
867 | |||
868 | static int sahara_probe(struct platform_device *pdev) | ||
869 | { | ||
870 | struct sahara_dev *dev; | ||
871 | struct resource *res; | ||
872 | u32 version; | ||
873 | int irq; | ||
874 | int err; | ||
875 | int i; | ||
876 | |||
877 | dev = devm_kzalloc(&pdev->dev, sizeof(struct sahara_dev), GFP_KERNEL); | ||
878 | if (dev == NULL) { | ||
879 | dev_err(&pdev->dev, "unable to alloc data struct.\n"); | ||
880 | return -ENOMEM; | ||
881 | } | ||
882 | |||
883 | dev->device = &pdev->dev; | ||
884 | platform_set_drvdata(pdev, dev); | ||
885 | |||
886 | /* Get the base address */ | ||
887 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | ||
888 | if (!res) { | ||
889 | dev_err(&pdev->dev, "failed to get memory region resource\n"); | ||
890 | return -ENODEV; | ||
891 | } | ||
892 | |||
893 | if (devm_request_mem_region(&pdev->dev, res->start, | ||
894 | resource_size(res), SAHARA_NAME) == NULL) { | ||
895 | dev_err(&pdev->dev, "failed to request memory region\n"); | ||
896 | return -ENOENT; | ||
897 | } | ||
898 | dev->regs_base = devm_ioremap(&pdev->dev, res->start, | ||
899 | resource_size(res)); | ||
900 | if (!dev->regs_base) { | ||
901 | dev_err(&pdev->dev, "failed to ioremap address region\n"); | ||
902 | return -ENOENT; | ||
903 | } | ||
904 | |||
905 | /* Get the IRQ */ | ||
906 | irq = platform_get_irq(pdev, 0); | ||
907 | if (irq < 0) { | ||
908 | dev_err(&pdev->dev, "failed to get irq resource\n"); | ||
909 | return irq; | ||
910 | } | ||
911 | |||
912 | if (devm_request_irq(&pdev->dev, irq, sahara_irq_handler, | ||
913 | 0, SAHARA_NAME, dev) < 0) { | ||
914 | dev_err(&pdev->dev, "failed to request irq\n"); | ||
915 | return -ENOENT; | ||
916 | } | ||
917 | |||
918 | /* clocks */ | ||
919 | dev->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); | ||
920 | if (IS_ERR(dev->clk_ipg)) { | ||
921 | dev_err(&pdev->dev, "Could not get ipg clock\n"); | ||
922 | return PTR_ERR(dev->clk_ipg); | ||
923 | } | ||
924 | |||
925 | dev->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); | ||
926 | if (IS_ERR(dev->clk_ahb)) { | ||
927 | dev_err(&pdev->dev, "Could not get ahb clock\n"); | ||
928 | return PTR_ERR(dev->clk_ahb); | ||
929 | } | ||
930 | |||
931 | /* Allocate HW descriptors */ | ||
932 | dev->hw_desc[0] = dma_alloc_coherent(&pdev->dev, | ||
933 | SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc), | ||
934 | &dev->hw_phys_desc[0], GFP_KERNEL); | ||
935 | if (!dev->hw_desc[0]) { | ||
936 | dev_err(&pdev->dev, "Could not allocate hw descriptors\n"); | ||
937 | return -ENOMEM; | ||
938 | } | ||
939 | dev->hw_desc[1] = dev->hw_desc[0] + 1; | ||
940 | dev->hw_phys_desc[1] = dev->hw_phys_desc[0] + | ||
941 | sizeof(struct sahara_hw_desc); | ||
942 | |||
943 | /* Allocate space for iv and key */ | ||
944 | dev->key_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, | ||
945 | &dev->key_phys_base, GFP_KERNEL); | ||
946 | if (!dev->key_base) { | ||
947 | dev_err(&pdev->dev, "Could not allocate memory for key\n"); | ||
948 | err = -ENOMEM; | ||
949 | goto err_key; | ||
950 | } | ||
951 | dev->iv_base = dev->key_base + AES_KEYSIZE_128; | ||
952 | dev->iv_phys_base = dev->key_phys_base + AES_KEYSIZE_128; | ||
953 | |||
954 | /* Allocate space for HW links */ | ||
955 | dev->hw_link[0] = dma_alloc_coherent(&pdev->dev, | ||
956 | SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link), | ||
957 | &dev->hw_phys_link[0], GFP_KERNEL); | ||
958 | if (!dev->hw_link) { | ||
959 | dev_err(&pdev->dev, "Could not allocate hw links\n"); | ||
960 | err = -ENOMEM; | ||
961 | goto err_link; | ||
962 | } | ||
963 | for (i = 1; i < SAHARA_MAX_HW_LINK; i++) { | ||
964 | dev->hw_phys_link[i] = dev->hw_phys_link[i - 1] + | ||
965 | sizeof(struct sahara_hw_link); | ||
966 | dev->hw_link[i] = dev->hw_link[i - 1] + 1; | ||
967 | } | ||
968 | |||
969 | crypto_init_queue(&dev->queue, SAHARA_QUEUE_LENGTH); | ||
970 | |||
971 | dev_ptr = dev; | ||
972 | |||
973 | tasklet_init(&dev->queue_task, sahara_aes_queue_task, | ||
974 | (unsigned long)dev); | ||
975 | tasklet_init(&dev->done_task, sahara_aes_done_task, | ||
976 | (unsigned long)dev); | ||
977 | |||
978 | init_timer(&dev->watchdog); | ||
979 | dev->watchdog.function = &sahara_watchdog; | ||
980 | dev->watchdog.data = (unsigned long)dev; | ||
981 | |||
982 | clk_prepare_enable(dev->clk_ipg); | ||
983 | clk_prepare_enable(dev->clk_ahb); | ||
984 | |||
985 | version = sahara_read(dev, SAHARA_REG_VERSION); | ||
986 | if (version != SAHARA_VERSION_3) { | ||
987 | dev_err(&pdev->dev, "SAHARA version %d not supported\n", | ||
988 | version); | ||
989 | err = -ENODEV; | ||
990 | goto err_algs; | ||
991 | } | ||
992 | |||
993 | sahara_write(dev, SAHARA_CMD_RESET | SAHARA_CMD_MODE_BATCH, | ||
994 | SAHARA_REG_CMD); | ||
995 | sahara_write(dev, SAHARA_CONTROL_SET_THROTTLE(0) | | ||
996 | SAHARA_CONTROL_SET_MAXBURST(8) | | ||
997 | SAHARA_CONTROL_RNG_AUTORSD | | ||
998 | SAHARA_CONTROL_ENABLE_INT, | ||
999 | SAHARA_REG_CONTROL); | ||
1000 | |||
1001 | err = sahara_register_algs(dev); | ||
1002 | if (err) | ||
1003 | goto err_algs; | ||
1004 | |||
1005 | dev_info(&pdev->dev, "SAHARA version %d initialized\n", version); | ||
1006 | |||
1007 | return 0; | ||
1008 | |||
1009 | err_algs: | ||
1010 | dma_free_coherent(&pdev->dev, | ||
1011 | SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link), | ||
1012 | dev->hw_link[0], dev->hw_phys_link[0]); | ||
1013 | clk_disable_unprepare(dev->clk_ipg); | ||
1014 | clk_disable_unprepare(dev->clk_ahb); | ||
1015 | dev_ptr = NULL; | ||
1016 | err_link: | ||
1017 | dma_free_coherent(&pdev->dev, | ||
1018 | 2 * AES_KEYSIZE_128, | ||
1019 | dev->key_base, dev->key_phys_base); | ||
1020 | err_key: | ||
1021 | dma_free_coherent(&pdev->dev, | ||
1022 | SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc), | ||
1023 | dev->hw_desc[0], dev->hw_phys_desc[0]); | ||
1024 | |||
1025 | return err; | ||
1026 | } | ||
1027 | |||
1028 | static int sahara_remove(struct platform_device *pdev) | ||
1029 | { | ||
1030 | struct sahara_dev *dev = platform_get_drvdata(pdev); | ||
1031 | |||
1032 | dma_free_coherent(&pdev->dev, | ||
1033 | SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link), | ||
1034 | dev->hw_link[0], dev->hw_phys_link[0]); | ||
1035 | dma_free_coherent(&pdev->dev, | ||
1036 | 2 * AES_KEYSIZE_128, | ||
1037 | dev->key_base, dev->key_phys_base); | ||
1038 | dma_free_coherent(&pdev->dev, | ||
1039 | SAHARA_MAX_HW_DESC * sizeof(struct sahara_hw_desc), | ||
1040 | dev->hw_desc[0], dev->hw_phys_desc[0]); | ||
1041 | |||
1042 | tasklet_kill(&dev->done_task); | ||
1043 | tasklet_kill(&dev->queue_task); | ||
1044 | |||
1045 | sahara_unregister_algs(dev); | ||
1046 | |||
1047 | clk_disable_unprepare(dev->clk_ipg); | ||
1048 | clk_disable_unprepare(dev->clk_ahb); | ||
1049 | |||
1050 | dev_ptr = NULL; | ||
1051 | |||
1052 | return 0; | ||
1053 | } | ||
1054 | |||
1055 | static struct platform_driver sahara_driver = { | ||
1056 | .probe = sahara_probe, | ||
1057 | .remove = sahara_remove, | ||
1058 | .driver = { | ||
1059 | .name = SAHARA_NAME, | ||
1060 | .owner = THIS_MODULE, | ||
1061 | .of_match_table = of_match_ptr(sahara_dt_ids), | ||
1062 | }, | ||
1063 | .id_table = sahara_platform_ids, | ||
1064 | }; | ||
1065 | |||
1066 | module_platform_driver(sahara_driver); | ||
1067 | |||
1068 | MODULE_LICENSE("GPL"); | ||
1069 | MODULE_AUTHOR("Javier Martin <javier.martin@vista-silicon.com>"); | ||
1070 | MODULE_DESCRIPTION("SAHARA2 HW crypto accelerator"); | ||
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 1827e9f1f873..cf5508967539 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c | |||
@@ -938,6 +938,7 @@ static int hash_dma_final(struct ahash_request *req) | |||
938 | if (!ctx->device->dma.nents) { | 938 | if (!ctx->device->dma.nents) { |
939 | dev_err(device_data->dev, "[%s] " | 939 | dev_err(device_data->dev, "[%s] " |
940 | "ctx->device->dma.nents = 0", __func__); | 940 | "ctx->device->dma.nents = 0", __func__); |
941 | ret = ctx->device->dma.nents; | ||
941 | goto out; | 942 | goto out; |
942 | } | 943 | } |
943 | 944 | ||
@@ -945,6 +946,7 @@ static int hash_dma_final(struct ahash_request *req) | |||
945 | if (bytes_written != req->nbytes) { | 946 | if (bytes_written != req->nbytes) { |
946 | dev_err(device_data->dev, "[%s] " | 947 | dev_err(device_data->dev, "[%s] " |
947 | "hash_dma_write() failed!", __func__); | 948 | "hash_dma_write() failed!", __func__); |
949 | ret = bytes_written; | ||
948 | goto out; | 950 | goto out; |
949 | } | 951 | } |
950 | 952 | ||
@@ -1367,14 +1369,12 @@ static int hash_setkey(struct crypto_ahash *tfm, | |||
1367 | /** | 1369 | /** |
1368 | * Freed in final. | 1370 | * Freed in final. |
1369 | */ | 1371 | */ |
1370 | ctx->key = kmalloc(keylen, GFP_KERNEL); | 1372 | ctx->key = kmemdup(key, keylen, GFP_KERNEL); |
1371 | if (!ctx->key) { | 1373 | if (!ctx->key) { |
1372 | pr_err(DEV_DBG_NAME " [%s] Failed to allocate ctx->key " | 1374 | pr_err(DEV_DBG_NAME " [%s] Failed to allocate ctx->key " |
1373 | "for %d\n", __func__, alg); | 1375 | "for %d\n", __func__, alg); |
1374 | return -ENOMEM; | 1376 | return -ENOMEM; |
1375 | } | 1377 | } |
1376 | |||
1377 | memcpy(ctx->key, key, keylen); | ||
1378 | ctx->keylen = keylen; | 1378 | ctx->keylen = keylen; |
1379 | 1379 | ||
1380 | return ret; | 1380 | return ret; |
diff --git a/include/crypto/sha.h b/include/crypto/sha.h index c6c9c1fe460c..190f8a0e0242 100644 --- a/include/crypto/sha.h +++ b/include/crypto/sha.h | |||
@@ -87,4 +87,9 @@ struct shash_desc; | |||
87 | extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, | 87 | extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data, |
88 | unsigned int len); | 88 | unsigned int len); |
89 | 89 | ||
90 | extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data, | ||
91 | unsigned int len); | ||
92 | |||
93 | extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len); | ||
90 | #endif | 95 | #endif |
diff --git a/include/linux/platform_data/atmel-aes.h b/include/linux/platform_data/atmel-aes.h deleted file mode 100644 index ab68082fbcb0..000000000000 --- a/include/linux/platform_data/atmel-aes.h +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | #ifndef __LINUX_ATMEL_AES_H | ||
2 | #define __LINUX_ATMEL_AES_H | ||
3 | |||
4 | #include <linux/platform_data/dma-atmel.h> | ||
5 | |||
6 | /** | ||
7 | * struct aes_dma_data - DMA data for AES | ||
8 | */ | ||
9 | struct aes_dma_data { | ||
10 | struct at_dma_slave txdata; | ||
11 | struct at_dma_slave rxdata; | ||
12 | }; | ||
13 | |||
14 | /** | ||
15 | * struct aes_platform_data - board-specific AES configuration | ||
16 | * @dma_slave: DMA slave interface to use in data transfers. | ||
17 | */ | ||
18 | struct aes_platform_data { | ||
19 | struct aes_dma_data *dma_slave; | ||
20 | }; | ||
21 | |||
22 | #endif /* __LINUX_ATMEL_AES_H */ | ||
diff --git a/include/linux/platform_data/crypto-atmel.h b/include/linux/platform_data/crypto-atmel.h new file mode 100644 index 000000000000..b46e0d9062a0 --- /dev/null +++ b/include/linux/platform_data/crypto-atmel.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef __LINUX_CRYPTO_ATMEL_H | ||
2 | #define __LINUX_CRYPTO_ATMEL_H | ||
3 | |||
4 | #include <linux/platform_data/dma-atmel.h> | ||
5 | |||
6 | /** | ||
7 | * struct crypto_dma_data - DMA data for AES/TDES/SHA | ||
8 | */ | ||
9 | struct crypto_dma_data { | ||
10 | struct at_dma_slave txdata; | ||
11 | struct at_dma_slave rxdata; | ||
12 | }; | ||
13 | |||
14 | /** | ||
15 | * struct crypto_platform_data - board-specific AES/TDES/SHA configuration | ||
16 | * @dma_slave: DMA slave interface to use in data transfers. | ||
17 | */ | ||
18 | struct crypto_platform_data { | ||
19 | struct crypto_dma_data *dma_slave; | ||
20 | }; | ||
21 | |||
22 | #endif /* __LINUX_CRYPTO_ATMEL_H */ | ||
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h index 3e08a1c86830..46eb27ddbfab 100644 --- a/include/linux/timeriomem-rng.h +++ b/include/linux/timeriomem-rng.h | |||
@@ -8,12 +8,7 @@ | |||
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/completion.h> | ||
12 | |||
13 | struct timeriomem_rng_data { | 11 | struct timeriomem_rng_data { |
14 | struct completion completion; | ||
15 | unsigned int present:1; | ||
16 | |||
17 | void __iomem *address; | 12 | void __iomem *address; |
18 | 13 | ||
19 | /* measures in usecs */ | 14 | /* measures in usecs */ |
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 6fb9d00a75dc..ab4ef72f0b1d 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c | |||
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = { | |||
311 | .sadb_alg_maxbits = 128 | 311 | .sadb_alg_maxbits = 128 |
312 | } | 312 | } |
313 | }, | 313 | }, |
314 | { | ||
315 | /* rfc4494 */ | ||
316 | .name = "cmac(aes)", | ||
317 | |||
318 | .uinfo = { | ||
319 | .auth = { | ||
320 | .icv_truncbits = 96, | ||
321 | .icv_fullbits = 128, | ||
322 | } | ||
323 | }, | ||
324 | |||
325 | .pfkey_supported = 0, | ||
326 | }, | ||
314 | }; | 327 | }; |
315 | 328 | ||
316 | static struct xfrm_algo_desc ealg_list[] = { | 329 | static struct xfrm_algo_desc ealg_list[] = { |