diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-25 18:56:15 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-25 18:56:15 -0500 |
commit | 32dc43e40a2707d0cb1ab8768d080c3e9bcfed52 (patch) | |
tree | 415f3a1935fba0db2f0410360983587bf65ee712 /arch | |
parent | d414c104e26fd3b597f855cc29473a8b1527fb4c (diff) | |
parent | 8fd61d34226014fe7886babfca6f45a7eff89d25 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 3.9:
- Added accelerated implementation of crc32 using pclmulqdq.
- Added test vector for fcrypt.
- Added support for OMAP4/AM33XX cipher and hash.
- Fixed loose crypto_user input checks.
- Misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (43 commits)
crypto: user - ensure user supplied strings are nul-terminated
crypto: user - fix empty string test in report API
crypto: user - fix info leaks in report API
crypto: caam - Added property fsl,sec-era in SEC4.0 device tree binding.
crypto: use ERR_CAST
crypto: atmel-aes - adjust duplicate test
crypto: crc32-pclmul - Kill warning on x86-32
crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels
crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC
crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets
crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*
crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions
crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC
crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions
crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: aesni-intel - add ENDPROC statements for assembler functions
crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets
crypto: testmgr - add test vector for fcrypt
...
Diffstat (limited to 'arch')
24 files changed, 668 insertions, 294 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0ca7c9ac383..63947a8f9f0f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | |||
27 | 27 | ||
28 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o | 28 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o |
29 | obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | 29 | obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o |
30 | obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o | ||
30 | 31 | ||
31 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 32 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
32 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o | 33 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | |||
52 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 53 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
53 | crc32c-intel-y := crc32c-intel_glue.o | 54 | crc32c-intel-y := crc32c-intel_glue.o |
54 | crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o | 55 | crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o |
56 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | ||
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index b949ec2f9af4..2849dbc59e11 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S | |||
@@ -36,6 +36,7 @@ | |||
36 | .file "aes-i586-asm.S" | 36 | .file "aes-i586-asm.S" |
37 | .text | 37 | .text |
38 | 38 | ||
39 | #include <linux/linkage.h> | ||
39 | #include <asm/asm-offsets.h> | 40 | #include <asm/asm-offsets.h> |
40 | 41 | ||
41 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) | 42 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) |
@@ -219,14 +220,10 @@ | |||
219 | // AES (Rijndael) Encryption Subroutine | 220 | // AES (Rijndael) Encryption Subroutine |
220 | /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ | 221 | /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
221 | 222 | ||
222 | .global aes_enc_blk | ||
223 | |||
224 | .extern crypto_ft_tab | 223 | .extern crypto_ft_tab |
225 | .extern crypto_fl_tab | 224 | .extern crypto_fl_tab |
226 | 225 | ||
227 | .align 4 | 226 | ENTRY(aes_enc_blk) |
228 | |||
229 | aes_enc_blk: | ||
230 | push %ebp | 227 | push %ebp |
231 | mov ctx(%esp),%ebp | 228 | mov ctx(%esp),%ebp |
232 | 229 | ||
@@ -290,18 +287,15 @@ aes_enc_blk: | |||
290 | mov %r0,(%ebp) | 287 | mov %r0,(%ebp) |
291 | pop %ebp | 288 | pop %ebp |
292 | ret | 289 | ret |
290 | ENDPROC(aes_enc_blk) | ||
293 | 291 | ||
294 | // AES (Rijndael) Decryption Subroutine | 292 | // AES (Rijndael) Decryption Subroutine |
295 | /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ | 293 | /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
296 | 294 | ||
297 | .global aes_dec_blk | ||
298 | |||
299 | .extern crypto_it_tab | 295 | .extern crypto_it_tab |
300 | .extern crypto_il_tab | 296 | .extern crypto_il_tab |
301 | 297 | ||
302 | .align 4 | 298 | ENTRY(aes_dec_blk) |
303 | |||
304 | aes_dec_blk: | ||
305 | push %ebp | 299 | push %ebp |
306 | mov ctx(%esp),%ebp | 300 | mov ctx(%esp),%ebp |
307 | 301 | ||
@@ -365,3 +359,4 @@ aes_dec_blk: | |||
365 | mov %r0,(%ebp) | 359 | mov %r0,(%ebp) |
366 | pop %ebp | 360 | pop %ebp |
367 | ret | 361 | ret |
362 | ENDPROC(aes_dec_blk) | ||
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5a059b..910565547163 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | .text | 16 | .text |
17 | 17 | ||
18 | #include <linux/linkage.h> | ||
18 | #include <asm/asm-offsets.h> | 19 | #include <asm/asm-offsets.h> |
19 | 20 | ||
20 | #define R1 %rax | 21 | #define R1 %rax |
@@ -49,10 +50,8 @@ | |||
49 | #define R11 %r11 | 50 | #define R11 %r11 |
50 | 51 | ||
51 | #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ | 52 | #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ |
52 | .global FUNC; \ | 53 | ENTRY(FUNC); \ |
53 | .type FUNC,@function; \ | 54 | movq r1,r2; \ |
54 | .align 8; \ | ||
55 | FUNC: movq r1,r2; \ | ||
56 | movq r3,r4; \ | 55 | movq r3,r4; \ |
57 | leaq KEY+48(r8),r9; \ | 56 | leaq KEY+48(r8),r9; \ |
58 | movq r10,r11; \ | 57 | movq r10,r11; \ |
@@ -71,14 +70,15 @@ FUNC: movq r1,r2; \ | |||
71 | je B192; \ | 70 | je B192; \ |
72 | leaq 32(r9),r9; | 71 | leaq 32(r9),r9; |
73 | 72 | ||
74 | #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ | 73 | #define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ |
75 | movq r1,r2; \ | 74 | movq r1,r2; \ |
76 | movq r3,r4; \ | 75 | movq r3,r4; \ |
77 | movl r5 ## E,(r9); \ | 76 | movl r5 ## E,(r9); \ |
78 | movl r6 ## E,4(r9); \ | 77 | movl r6 ## E,4(r9); \ |
79 | movl r7 ## E,8(r9); \ | 78 | movl r7 ## E,8(r9); \ |
80 | movl r8 ## E,12(r9); \ | 79 | movl r8 ## E,12(r9); \ |
81 | ret; | 80 | ret; \ |
81 | ENDPROC(FUNC); | ||
82 | 82 | ||
83 | #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ | 83 | #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ |
84 | movzbl r2 ## H,r5 ## E; \ | 84 | movzbl r2 ## H,r5 ## E; \ |
@@ -133,7 +133,7 @@ FUNC: movq r1,r2; \ | |||
133 | #define entry(FUNC,KEY,B128,B192) \ | 133 | #define entry(FUNC,KEY,B128,B192) \ |
134 | prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) | 134 | prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) |
135 | 135 | ||
136 | #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) | 136 | #define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) |
137 | 137 | ||
138 | #define encrypt_round(TAB,OFFSET) \ | 138 | #define encrypt_round(TAB,OFFSET) \ |
139 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ | 139 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ |
@@ -151,12 +151,12 @@ FUNC: movq r1,r2; \ | |||
151 | 151 | ||
152 | /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ | 152 | /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
153 | 153 | ||
154 | entry(aes_enc_blk,0,enc128,enc192) | 154 | entry(aes_enc_blk,0,.Le128,.Le192) |
155 | encrypt_round(crypto_ft_tab,-96) | 155 | encrypt_round(crypto_ft_tab,-96) |
156 | encrypt_round(crypto_ft_tab,-80) | 156 | encrypt_round(crypto_ft_tab,-80) |
157 | enc192: encrypt_round(crypto_ft_tab,-64) | 157 | .Le192: encrypt_round(crypto_ft_tab,-64) |
158 | encrypt_round(crypto_ft_tab,-48) | 158 | encrypt_round(crypto_ft_tab,-48) |
159 | enc128: encrypt_round(crypto_ft_tab,-32) | 159 | .Le128: encrypt_round(crypto_ft_tab,-32) |
160 | encrypt_round(crypto_ft_tab,-16) | 160 | encrypt_round(crypto_ft_tab,-16) |
161 | encrypt_round(crypto_ft_tab, 0) | 161 | encrypt_round(crypto_ft_tab, 0) |
162 | encrypt_round(crypto_ft_tab, 16) | 162 | encrypt_round(crypto_ft_tab, 16) |
@@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32) | |||
166 | encrypt_round(crypto_ft_tab, 80) | 166 | encrypt_round(crypto_ft_tab, 80) |
167 | encrypt_round(crypto_ft_tab, 96) | 167 | encrypt_round(crypto_ft_tab, 96) |
168 | encrypt_final(crypto_fl_tab,112) | 168 | encrypt_final(crypto_fl_tab,112) |
169 | return | 169 | return(aes_enc_blk) |
170 | 170 | ||
171 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ | 171 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
172 | 172 | ||
173 | entry(aes_dec_blk,240,dec128,dec192) | 173 | entry(aes_dec_blk,240,.Ld128,.Ld192) |
174 | decrypt_round(crypto_it_tab,-96) | 174 | decrypt_round(crypto_it_tab,-96) |
175 | decrypt_round(crypto_it_tab,-80) | 175 | decrypt_round(crypto_it_tab,-80) |
176 | dec192: decrypt_round(crypto_it_tab,-64) | 176 | .Ld192: decrypt_round(crypto_it_tab,-64) |
177 | decrypt_round(crypto_it_tab,-48) | 177 | decrypt_round(crypto_it_tab,-48) |
178 | dec128: decrypt_round(crypto_it_tab,-32) | 178 | .Ld128: decrypt_round(crypto_it_tab,-32) |
179 | decrypt_round(crypto_it_tab,-16) | 179 | decrypt_round(crypto_it_tab,-16) |
180 | decrypt_round(crypto_it_tab, 0) | 180 | decrypt_round(crypto_it_tab, 0) |
181 | decrypt_round(crypto_it_tab, 16) | 181 | decrypt_round(crypto_it_tab, 16) |
@@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32) | |||
185 | decrypt_round(crypto_it_tab, 80) | 185 | decrypt_round(crypto_it_tab, 80) |
186 | decrypt_round(crypto_it_tab, 96) | 186 | decrypt_round(crypto_it_tab, 96) |
187 | decrypt_final(crypto_il_tab,112) | 187 | decrypt_final(crypto_il_tab,112) |
188 | return | 188 | return(aes_dec_blk) |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624d7835..04b797767b9e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst | |||
1262 | * poly = x^128 + x^127 + x^126 + x^121 + 1 | 1262 | * poly = x^128 + x^127 + x^126 + x^121 + 1 |
1263 | * | 1263 | * |
1264 | *****************************************************************************/ | 1264 | *****************************************************************************/ |
1265 | |||
1266 | ENTRY(aesni_gcm_dec) | 1265 | ENTRY(aesni_gcm_dec) |
1267 | push %r12 | 1266 | push %r12 |
1268 | push %r13 | 1267 | push %r13 |
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt: | |||
1437 | pop %r13 | 1436 | pop %r13 |
1438 | pop %r12 | 1437 | pop %r12 |
1439 | ret | 1438 | ret |
1439 | ENDPROC(aesni_gcm_dec) | ||
1440 | 1440 | ||
1441 | 1441 | ||
1442 | /***************************************************************************** | 1442 | /***************************************************************************** |
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt: | |||
1700 | pop %r13 | 1700 | pop %r13 |
1701 | pop %r12 | 1701 | pop %r12 |
1702 | ret | 1702 | ret |
1703 | ENDPROC(aesni_gcm_enc) | ||
1703 | 1704 | ||
1704 | #endif | 1705 | #endif |
1705 | 1706 | ||
1706 | 1707 | ||
1708 | .align 4 | ||
1707 | _key_expansion_128: | 1709 | _key_expansion_128: |
1708 | _key_expansion_256a: | 1710 | _key_expansion_256a: |
1709 | pshufd $0b11111111, %xmm1, %xmm1 | 1711 | pshufd $0b11111111, %xmm1, %xmm1 |
@@ -1715,6 +1717,8 @@ _key_expansion_256a: | |||
1715 | movaps %xmm0, (TKEYP) | 1717 | movaps %xmm0, (TKEYP) |
1716 | add $0x10, TKEYP | 1718 | add $0x10, TKEYP |
1717 | ret | 1719 | ret |
1720 | ENDPROC(_key_expansion_128) | ||
1721 | ENDPROC(_key_expansion_256a) | ||
1718 | 1722 | ||
1719 | .align 4 | 1723 | .align 4 |
1720 | _key_expansion_192a: | 1724 | _key_expansion_192a: |
@@ -1739,6 +1743,7 @@ _key_expansion_192a: | |||
1739 | movaps %xmm1, 0x10(TKEYP) | 1743 | movaps %xmm1, 0x10(TKEYP) |
1740 | add $0x20, TKEYP | 1744 | add $0x20, TKEYP |
1741 | ret | 1745 | ret |
1746 | ENDPROC(_key_expansion_192a) | ||
1742 | 1747 | ||
1743 | .align 4 | 1748 | .align 4 |
1744 | _key_expansion_192b: | 1749 | _key_expansion_192b: |
@@ -1758,6 +1763,7 @@ _key_expansion_192b: | |||
1758 | movaps %xmm0, (TKEYP) | 1763 | movaps %xmm0, (TKEYP) |
1759 | add $0x10, TKEYP | 1764 | add $0x10, TKEYP |
1760 | ret | 1765 | ret |
1766 | ENDPROC(_key_expansion_192b) | ||
1761 | 1767 | ||
1762 | .align 4 | 1768 | .align 4 |
1763 | _key_expansion_256b: | 1769 | _key_expansion_256b: |
@@ -1770,6 +1776,7 @@ _key_expansion_256b: | |||
1770 | movaps %xmm2, (TKEYP) | 1776 | movaps %xmm2, (TKEYP) |
1771 | add $0x10, TKEYP | 1777 | add $0x10, TKEYP |
1772 | ret | 1778 | ret |
1779 | ENDPROC(_key_expansion_256b) | ||
1773 | 1780 | ||
1774 | /* | 1781 | /* |
1775 | * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 1782 | * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key) | |||
1882 | popl KEYP | 1889 | popl KEYP |
1883 | #endif | 1890 | #endif |
1884 | ret | 1891 | ret |
1892 | ENDPROC(aesni_set_key) | ||
1885 | 1893 | ||
1886 | /* | 1894 | /* |
1887 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1895 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc) | |||
1903 | popl KEYP | 1911 | popl KEYP |
1904 | #endif | 1912 | #endif |
1905 | ret | 1913 | ret |
1914 | ENDPROC(aesni_enc) | ||
1906 | 1915 | ||
1907 | /* | 1916 | /* |
1908 | * _aesni_enc1: internal ABI | 1917 | * _aesni_enc1: internal ABI |
@@ -1960,6 +1969,7 @@ _aesni_enc1: | |||
1960 | movaps 0x70(TKEYP), KEY | 1969 | movaps 0x70(TKEYP), KEY |
1961 | AESENCLAST KEY STATE | 1970 | AESENCLAST KEY STATE |
1962 | ret | 1971 | ret |
1972 | ENDPROC(_aesni_enc1) | ||
1963 | 1973 | ||
1964 | /* | 1974 | /* |
1965 | * _aesni_enc4: internal ABI | 1975 | * _aesni_enc4: internal ABI |
@@ -2068,6 +2078,7 @@ _aesni_enc4: | |||
2068 | AESENCLAST KEY STATE3 | 2078 | AESENCLAST KEY STATE3 |
2069 | AESENCLAST KEY STATE4 | 2079 | AESENCLAST KEY STATE4 |
2070 | ret | 2080 | ret |
2081 | ENDPROC(_aesni_enc4) | ||
2071 | 2082 | ||
2072 | /* | 2083 | /* |
2073 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 2084 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec) | |||
2090 | popl KEYP | 2101 | popl KEYP |
2091 | #endif | 2102 | #endif |
2092 | ret | 2103 | ret |
2104 | ENDPROC(aesni_dec) | ||
2093 | 2105 | ||
2094 | /* | 2106 | /* |
2095 | * _aesni_dec1: internal ABI | 2107 | * _aesni_dec1: internal ABI |
@@ -2147,6 +2159,7 @@ _aesni_dec1: | |||
2147 | movaps 0x70(TKEYP), KEY | 2159 | movaps 0x70(TKEYP), KEY |
2148 | AESDECLAST KEY STATE | 2160 | AESDECLAST KEY STATE |
2149 | ret | 2161 | ret |
2162 | ENDPROC(_aesni_dec1) | ||
2150 | 2163 | ||
2151 | /* | 2164 | /* |
2152 | * _aesni_dec4: internal ABI | 2165 | * _aesni_dec4: internal ABI |
@@ -2255,6 +2268,7 @@ _aesni_dec4: | |||
2255 | AESDECLAST KEY STATE3 | 2268 | AESDECLAST KEY STATE3 |
2256 | AESDECLAST KEY STATE4 | 2269 | AESDECLAST KEY STATE4 |
2257 | ret | 2270 | ret |
2271 | ENDPROC(_aesni_dec4) | ||
2258 | 2272 | ||
2259 | /* | 2273 | /* |
2260 | * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 2274 | * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc) | |||
2312 | popl LEN | 2326 | popl LEN |
2313 | #endif | 2327 | #endif |
2314 | ret | 2328 | ret |
2329 | ENDPROC(aesni_ecb_enc) | ||
2315 | 2330 | ||
2316 | /* | 2331 | /* |
2317 | * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 2332 | * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec) | |||
2370 | popl LEN | 2385 | popl LEN |
2371 | #endif | 2386 | #endif |
2372 | ret | 2387 | ret |
2388 | ENDPROC(aesni_ecb_dec) | ||
2373 | 2389 | ||
2374 | /* | 2390 | /* |
2375 | * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 2391 | * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc) | |||
2411 | popl IVP | 2427 | popl IVP |
2412 | #endif | 2428 | #endif |
2413 | ret | 2429 | ret |
2430 | ENDPROC(aesni_cbc_enc) | ||
2414 | 2431 | ||
2415 | /* | 2432 | /* |
2416 | * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 2433 | * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec) | |||
2501 | popl IVP | 2518 | popl IVP |
2502 | #endif | 2519 | #endif |
2503 | ret | 2520 | ret |
2521 | ENDPROC(aesni_cbc_dec) | ||
2504 | 2522 | ||
2505 | #ifdef __x86_64__ | 2523 | #ifdef __x86_64__ |
2506 | .align 16 | 2524 | .align 16 |
@@ -2527,6 +2545,7 @@ _aesni_inc_init: | |||
2527 | MOVQ_R64_XMM TCTR_LOW INC | 2545 | MOVQ_R64_XMM TCTR_LOW INC |
2528 | MOVQ_R64_XMM CTR TCTR_LOW | 2546 | MOVQ_R64_XMM CTR TCTR_LOW |
2529 | ret | 2547 | ret |
2548 | ENDPROC(_aesni_inc_init) | ||
2530 | 2549 | ||
2531 | /* | 2550 | /* |
2532 | * _aesni_inc: internal ABI | 2551 | * _aesni_inc: internal ABI |
@@ -2555,6 +2574,7 @@ _aesni_inc: | |||
2555 | movaps CTR, IV | 2574 | movaps CTR, IV |
2556 | PSHUFB_XMM BSWAP_MASK IV | 2575 | PSHUFB_XMM BSWAP_MASK IV |
2557 | ret | 2576 | ret |
2577 | ENDPROC(_aesni_inc) | ||
2558 | 2578 | ||
2559 | /* | 2579 | /* |
2560 | * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | 2580 | * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc) | |||
2615 | movups IV, (IVP) | 2635 | movups IV, (IVP) |
2616 | .Lctr_enc_just_ret: | 2636 | .Lctr_enc_just_ret: |
2617 | ret | 2637 | ret |
2638 | ENDPROC(aesni_ctr_enc) | ||
2618 | #endif | 2639 | #endif |
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 391d245dc086..246c67006ed0 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S | |||
@@ -20,6 +20,8 @@ | |||
20 | * | 20 | * |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/linkage.h> | ||
24 | |||
23 | .file "blowfish-x86_64-asm.S" | 25 | .file "blowfish-x86_64-asm.S" |
24 | .text | 26 | .text |
25 | 27 | ||
@@ -116,11 +118,7 @@ | |||
116 | bswapq RX0; \ | 118 | bswapq RX0; \ |
117 | xorq RX0, (RIO); | 119 | xorq RX0, (RIO); |
118 | 120 | ||
119 | .align 8 | 121 | ENTRY(__blowfish_enc_blk) |
120 | .global __blowfish_enc_blk | ||
121 | .type __blowfish_enc_blk,@function; | ||
122 | |||
123 | __blowfish_enc_blk: | ||
124 | /* input: | 122 | /* input: |
125 | * %rdi: ctx, CTX | 123 | * %rdi: ctx, CTX |
126 | * %rsi: dst | 124 | * %rsi: dst |
@@ -148,19 +146,16 @@ __blowfish_enc_blk: | |||
148 | 146 | ||
149 | movq %r10, RIO; | 147 | movq %r10, RIO; |
150 | test %cl, %cl; | 148 | test %cl, %cl; |
151 | jnz __enc_xor; | 149 | jnz .L__enc_xor; |
152 | 150 | ||
153 | write_block(); | 151 | write_block(); |
154 | ret; | 152 | ret; |
155 | __enc_xor: | 153 | .L__enc_xor: |
156 | xor_block(); | 154 | xor_block(); |
157 | ret; | 155 | ret; |
156 | ENDPROC(__blowfish_enc_blk) | ||
158 | 157 | ||
159 | .align 8 | 158 | ENTRY(blowfish_dec_blk) |
160 | .global blowfish_dec_blk | ||
161 | .type blowfish_dec_blk,@function; | ||
162 | |||
163 | blowfish_dec_blk: | ||
164 | /* input: | 159 | /* input: |
165 | * %rdi: ctx, CTX | 160 | * %rdi: ctx, CTX |
166 | * %rsi: dst | 161 | * %rsi: dst |
@@ -189,6 +184,7 @@ blowfish_dec_blk: | |||
189 | movq %r11, %rbp; | 184 | movq %r11, %rbp; |
190 | 185 | ||
191 | ret; | 186 | ret; |
187 | ENDPROC(blowfish_dec_blk) | ||
192 | 188 | ||
193 | /********************************************************************** | 189 | /********************************************************************** |
194 | 4-way blowfish, four blocks parallel | 190 | 4-way blowfish, four blocks parallel |
@@ -300,11 +296,7 @@ blowfish_dec_blk: | |||
300 | bswapq RX3; \ | 296 | bswapq RX3; \ |
301 | xorq RX3, 24(RIO); | 297 | xorq RX3, 24(RIO); |
302 | 298 | ||
303 | .align 8 | 299 | ENTRY(__blowfish_enc_blk_4way) |
304 | .global __blowfish_enc_blk_4way | ||
305 | .type __blowfish_enc_blk_4way,@function; | ||
306 | |||
307 | __blowfish_enc_blk_4way: | ||
308 | /* input: | 300 | /* input: |
309 | * %rdi: ctx, CTX | 301 | * %rdi: ctx, CTX |
310 | * %rsi: dst | 302 | * %rsi: dst |
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way: | |||
336 | movq %r11, RIO; | 328 | movq %r11, RIO; |
337 | 329 | ||
338 | test %bpl, %bpl; | 330 | test %bpl, %bpl; |
339 | jnz __enc_xor4; | 331 | jnz .L__enc_xor4; |
340 | 332 | ||
341 | write_block4(); | 333 | write_block4(); |
342 | 334 | ||
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way: | |||
344 | popq %rbp; | 336 | popq %rbp; |
345 | ret; | 337 | ret; |
346 | 338 | ||
347 | __enc_xor4: | 339 | .L__enc_xor4: |
348 | xor_block4(); | 340 | xor_block4(); |
349 | 341 | ||
350 | popq %rbx; | 342 | popq %rbx; |
351 | popq %rbp; | 343 | popq %rbp; |
352 | ret; | 344 | ret; |
345 | ENDPROC(__blowfish_enc_blk_4way) | ||
353 | 346 | ||
354 | .align 8 | 347 | ENTRY(blowfish_dec_blk_4way) |
355 | .global blowfish_dec_blk_4way | ||
356 | .type blowfish_dec_blk_4way,@function; | ||
357 | |||
358 | blowfish_dec_blk_4way: | ||
359 | /* input: | 348 | /* input: |
360 | * %rdi: ctx, CTX | 349 | * %rdi: ctx, CTX |
361 | * %rsi: dst | 350 | * %rsi: dst |
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way: | |||
387 | popq %rbp; | 376 | popq %rbp; |
388 | 377 | ||
389 | ret; | 378 | ret; |
390 | 379 | ENDPROC(blowfish_dec_blk_4way) | |
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2306d2e4816f..cfc163469c71 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S | |||
@@ -15,6 +15,8 @@ | |||
15 | * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz | 15 | * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/linkage.h> | ||
19 | |||
18 | #define CAMELLIA_TABLE_BYTE_LEN 272 | 20 | #define CAMELLIA_TABLE_BYTE_LEN 272 |
19 | 21 | ||
20 | /* struct camellia_ctx: */ | 22 | /* struct camellia_ctx: */ |
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: | |||
190 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, | 192 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, |
191 | %rcx, (%r9)); | 193 | %rcx, (%r9)); |
192 | ret; | 194 | ret; |
195 | ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) | ||
193 | 196 | ||
194 | .align 8 | 197 | .align 8 |
195 | roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | 198 | roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: |
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | |||
197 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, | 200 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, |
198 | %rax, (%r9)); | 201 | %rax, (%r9)); |
199 | ret; | 202 | ret; |
203 | ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) | ||
200 | 204 | ||
201 | /* | 205 | /* |
202 | * IN/OUT: | 206 | * IN/OUT: |
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | |||
709 | .text | 713 | .text |
710 | 714 | ||
711 | .align 8 | 715 | .align 8 |
712 | .type __camellia_enc_blk16,@function; | ||
713 | |||
714 | __camellia_enc_blk16: | 716 | __camellia_enc_blk16: |
715 | /* input: | 717 | /* input: |
716 | * %rdi: ctx, CTX | 718 | * %rdi: ctx, CTX |
@@ -793,10 +795,9 @@ __camellia_enc_blk16: | |||
793 | %xmm15, %rax, %rcx, 24); | 795 | %xmm15, %rax, %rcx, 24); |
794 | 796 | ||
795 | jmp .Lenc_done; | 797 | jmp .Lenc_done; |
798 | ENDPROC(__camellia_enc_blk16) | ||
796 | 799 | ||
797 | .align 8 | 800 | .align 8 |
798 | .type __camellia_dec_blk16,@function; | ||
799 | |||
800 | __camellia_dec_blk16: | 801 | __camellia_dec_blk16: |
801 | /* input: | 802 | /* input: |
802 | * %rdi: ctx, CTX | 803 | * %rdi: ctx, CTX |
@@ -877,12 +878,9 @@ __camellia_dec_blk16: | |||
877 | ((key_table + (24) * 8) + 4)(CTX)); | 878 | ((key_table + (24) * 8) + 4)(CTX)); |
878 | 879 | ||
879 | jmp .Ldec_max24; | 880 | jmp .Ldec_max24; |
881 | ENDPROC(__camellia_dec_blk16) | ||
880 | 882 | ||
881 | .align 8 | 883 | ENTRY(camellia_ecb_enc_16way) |
882 | .global camellia_ecb_enc_16way | ||
883 | .type camellia_ecb_enc_16way,@function; | ||
884 | |||
885 | camellia_ecb_enc_16way: | ||
886 | /* input: | 884 | /* input: |
887 | * %rdi: ctx, CTX | 885 | * %rdi: ctx, CTX |
888 | * %rsi: dst (16 blocks) | 886 | * %rsi: dst (16 blocks) |
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way: | |||
903 | %xmm8, %rsi); | 901 | %xmm8, %rsi); |
904 | 902 | ||
905 | ret; | 903 | ret; |
904 | ENDPROC(camellia_ecb_enc_16way) | ||
906 | 905 | ||
907 | .align 8 | 906 | ENTRY(camellia_ecb_dec_16way) |
908 | .global camellia_ecb_dec_16way | ||
909 | .type camellia_ecb_dec_16way,@function; | ||
910 | |||
911 | camellia_ecb_dec_16way: | ||
912 | /* input: | 907 | /* input: |
913 | * %rdi: ctx, CTX | 908 | * %rdi: ctx, CTX |
914 | * %rsi: dst (16 blocks) | 909 | * %rsi: dst (16 blocks) |
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way: | |||
934 | %xmm8, %rsi); | 929 | %xmm8, %rsi); |
935 | 930 | ||
936 | ret; | 931 | ret; |
932 | ENDPROC(camellia_ecb_dec_16way) | ||
937 | 933 | ||
938 | .align 8 | 934 | ENTRY(camellia_cbc_dec_16way) |
939 | .global camellia_cbc_dec_16way | ||
940 | .type camellia_cbc_dec_16way,@function; | ||
941 | |||
942 | camellia_cbc_dec_16way: | ||
943 | /* input: | 935 | /* input: |
944 | * %rdi: ctx, CTX | 936 | * %rdi: ctx, CTX |
945 | * %rsi: dst (16 blocks) | 937 | * %rsi: dst (16 blocks) |
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way: | |||
986 | %xmm8, %rsi); | 978 | %xmm8, %rsi); |
987 | 979 | ||
988 | ret; | 980 | ret; |
981 | ENDPROC(camellia_cbc_dec_16way) | ||
989 | 982 | ||
990 | #define inc_le128(x, minus_one, tmp) \ | 983 | #define inc_le128(x, minus_one, tmp) \ |
991 | vpcmpeqq minus_one, x, tmp; \ | 984 | vpcmpeqq minus_one, x, tmp; \ |
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way: | |||
993 | vpslldq $8, tmp, tmp; \ | 986 | vpslldq $8, tmp, tmp; \ |
994 | vpsubq tmp, x, x; | 987 | vpsubq tmp, x, x; |
995 | 988 | ||
996 | .align 8 | 989 | ENTRY(camellia_ctr_16way) |
997 | .global camellia_ctr_16way | ||
998 | .type camellia_ctr_16way,@function; | ||
999 | |||
1000 | camellia_ctr_16way: | ||
1001 | /* input: | 990 | /* input: |
1002 | * %rdi: ctx, CTX | 991 | * %rdi: ctx, CTX |
1003 | * %rsi: dst (16 blocks) | 992 | * %rsi: dst (16 blocks) |
@@ -1100,3 +1089,4 @@ camellia_ctr_16way: | |||
1100 | %xmm8, %rsi); | 1089 | %xmm8, %rsi); |
1101 | 1090 | ||
1102 | ret; | 1091 | ret; |
1092 | ENDPROC(camellia_ctr_16way) | ||
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 0b3374335fdc..310319c601ed 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S | |||
@@ -20,6 +20,8 @@ | |||
20 | * | 20 | * |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/linkage.h> | ||
24 | |||
23 | .file "camellia-x86_64-asm_64.S" | 25 | .file "camellia-x86_64-asm_64.S" |
24 | .text | 26 | .text |
25 | 27 | ||
@@ -188,10 +190,7 @@ | |||
188 | bswapq RAB0; \ | 190 | bswapq RAB0; \ |
189 | movq RAB0, 4*2(RIO); | 191 | movq RAB0, 4*2(RIO); |
190 | 192 | ||
191 | .global __camellia_enc_blk; | 193 | ENTRY(__camellia_enc_blk) |
192 | .type __camellia_enc_blk,@function; | ||
193 | |||
194 | __camellia_enc_blk: | ||
195 | /* input: | 194 | /* input: |
196 | * %rdi: ctx, CTX | 195 | * %rdi: ctx, CTX |
197 | * %rsi: dst | 196 | * %rsi: dst |
@@ -214,33 +213,31 @@ __camellia_enc_blk: | |||
214 | movl $24, RT1d; /* max */ | 213 | movl $24, RT1d; /* max */ |
215 | 214 | ||
216 | cmpb $16, key_length(CTX); | 215 | cmpb $16, key_length(CTX); |
217 | je __enc_done; | 216 | je .L__enc_done; |
218 | 217 | ||
219 | enc_fls(24); | 218 | enc_fls(24); |
220 | enc_rounds(24); | 219 | enc_rounds(24); |
221 | movl $32, RT1d; /* max */ | 220 | movl $32, RT1d; /* max */ |
222 | 221 | ||
223 | __enc_done: | 222 | .L__enc_done: |
224 | testb RXORbl, RXORbl; | 223 | testb RXORbl, RXORbl; |
225 | movq RDST, RIO; | 224 | movq RDST, RIO; |
226 | 225 | ||
227 | jnz __enc_xor; | 226 | jnz .L__enc_xor; |
228 | 227 | ||
229 | enc_outunpack(mov, RT1); | 228 | enc_outunpack(mov, RT1); |
230 | 229 | ||
231 | movq RRBP, %rbp; | 230 | movq RRBP, %rbp; |
232 | ret; | 231 | ret; |
233 | 232 | ||
234 | __enc_xor: | 233 | .L__enc_xor: |
235 | enc_outunpack(xor, RT1); | 234 | enc_outunpack(xor, RT1); |
236 | 235 | ||
237 | movq RRBP, %rbp; | 236 | movq RRBP, %rbp; |
238 | ret; | 237 | ret; |
238 | ENDPROC(__camellia_enc_blk) | ||
239 | 239 | ||
240 | .global camellia_dec_blk; | 240 | ENTRY(camellia_dec_blk) |
241 | .type camellia_dec_blk,@function; | ||
242 | |||
243 | camellia_dec_blk: | ||
244 | /* input: | 241 | /* input: |
245 | * %rdi: ctx, CTX | 242 | * %rdi: ctx, CTX |
246 | * %rsi: dst | 243 | * %rsi: dst |
@@ -258,12 +255,12 @@ camellia_dec_blk: | |||
258 | dec_inpack(RT2); | 255 | dec_inpack(RT2); |
259 | 256 | ||
260 | cmpb $24, RT2bl; | 257 | cmpb $24, RT2bl; |
261 | je __dec_rounds16; | 258 | je .L__dec_rounds16; |
262 | 259 | ||
263 | dec_rounds(24); | 260 | dec_rounds(24); |
264 | dec_fls(24); | 261 | dec_fls(24); |
265 | 262 | ||
266 | __dec_rounds16: | 263 | .L__dec_rounds16: |
267 | dec_rounds(16); | 264 | dec_rounds(16); |
268 | dec_fls(16); | 265 | dec_fls(16); |
269 | dec_rounds(8); | 266 | dec_rounds(8); |
@@ -276,6 +273,7 @@ __dec_rounds16: | |||
276 | 273 | ||
277 | movq RRBP, %rbp; | 274 | movq RRBP, %rbp; |
278 | ret; | 275 | ret; |
276 | ENDPROC(camellia_dec_blk) | ||
279 | 277 | ||
280 | /********************************************************************** | 278 | /********************************************************************** |
281 | 2-way camellia | 279 | 2-way camellia |
@@ -426,10 +424,7 @@ __dec_rounds16: | |||
426 | bswapq RAB1; \ | 424 | bswapq RAB1; \ |
427 | movq RAB1, 12*2(RIO); | 425 | movq RAB1, 12*2(RIO); |
428 | 426 | ||
429 | .global __camellia_enc_blk_2way; | 427 | ENTRY(__camellia_enc_blk_2way) |
430 | .type __camellia_enc_blk_2way,@function; | ||
431 | |||
432 | __camellia_enc_blk_2way: | ||
433 | /* input: | 428 | /* input: |
434 | * %rdi: ctx, CTX | 429 | * %rdi: ctx, CTX |
435 | * %rsi: dst | 430 | * %rsi: dst |
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way: | |||
453 | movl $24, RT2d; /* max */ | 448 | movl $24, RT2d; /* max */ |
454 | 449 | ||
455 | cmpb $16, key_length(CTX); | 450 | cmpb $16, key_length(CTX); |
456 | je __enc2_done; | 451 | je .L__enc2_done; |
457 | 452 | ||
458 | enc_fls2(24); | 453 | enc_fls2(24); |
459 | enc_rounds2(24); | 454 | enc_rounds2(24); |
460 | movl $32, RT2d; /* max */ | 455 | movl $32, RT2d; /* max */ |
461 | 456 | ||
462 | __enc2_done: | 457 | .L__enc2_done: |
463 | test RXORbl, RXORbl; | 458 | test RXORbl, RXORbl; |
464 | movq RDST, RIO; | 459 | movq RDST, RIO; |
465 | jnz __enc2_xor; | 460 | jnz .L__enc2_xor; |
466 | 461 | ||
467 | enc_outunpack2(mov, RT2); | 462 | enc_outunpack2(mov, RT2); |
468 | 463 | ||
@@ -470,17 +465,15 @@ __enc2_done: | |||
470 | popq %rbx; | 465 | popq %rbx; |
471 | ret; | 466 | ret; |
472 | 467 | ||
473 | __enc2_xor: | 468 | .L__enc2_xor: |
474 | enc_outunpack2(xor, RT2); | 469 | enc_outunpack2(xor, RT2); |
475 | 470 | ||
476 | movq RRBP, %rbp; | 471 | movq RRBP, %rbp; |
477 | popq %rbx; | 472 | popq %rbx; |
478 | ret; | 473 | ret; |
474 | ENDPROC(__camellia_enc_blk_2way) | ||
479 | 475 | ||
480 | .global camellia_dec_blk_2way; | 476 | ENTRY(camellia_dec_blk_2way) |
481 | .type camellia_dec_blk_2way,@function; | ||
482 | |||
483 | camellia_dec_blk_2way: | ||
484 | /* input: | 477 | /* input: |
485 | * %rdi: ctx, CTX | 478 | * %rdi: ctx, CTX |
486 | * %rsi: dst | 479 | * %rsi: dst |
@@ -499,12 +492,12 @@ camellia_dec_blk_2way: | |||
499 | dec_inpack2(RT2); | 492 | dec_inpack2(RT2); |
500 | 493 | ||
501 | cmpb $24, RT2bl; | 494 | cmpb $24, RT2bl; |
502 | je __dec2_rounds16; | 495 | je .L__dec2_rounds16; |
503 | 496 | ||
504 | dec_rounds2(24); | 497 | dec_rounds2(24); |
505 | dec_fls2(24); | 498 | dec_fls2(24); |
506 | 499 | ||
507 | __dec2_rounds16: | 500 | .L__dec2_rounds16: |
508 | dec_rounds2(16); | 501 | dec_rounds2(16); |
509 | dec_fls2(16); | 502 | dec_fls2(16); |
510 | dec_rounds2(8); | 503 | dec_rounds2(8); |
@@ -518,3 +511,4 @@ __dec2_rounds16: | |||
518 | movq RRBP, %rbp; | 511 | movq RRBP, %rbp; |
519 | movq RXOR, %rbx; | 512 | movq RXOR, %rbx; |
520 | ret; | 513 | ret; |
514 | ENDPROC(camellia_dec_blk_2way) | ||
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 15b00ac7cbd3..c35fd5d6ecd2 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S | |||
@@ -23,6 +23,8 @@ | |||
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/linkage.h> | ||
27 | |||
26 | .file "cast5-avx-x86_64-asm_64.S" | 28 | .file "cast5-avx-x86_64-asm_64.S" |
27 | 29 | ||
28 | .extern cast_s1 | 30 | .extern cast_s1 |
@@ -211,8 +213,6 @@ | |||
211 | .text | 213 | .text |
212 | 214 | ||
213 | .align 16 | 215 | .align 16 |
214 | .type __cast5_enc_blk16,@function; | ||
215 | |||
216 | __cast5_enc_blk16: | 216 | __cast5_enc_blk16: |
217 | /* input: | 217 | /* input: |
218 | * %rdi: ctx, CTX | 218 | * %rdi: ctx, CTX |
@@ -263,14 +263,14 @@ __cast5_enc_blk16: | |||
263 | 263 | ||
264 | movzbl rr(CTX), %eax; | 264 | movzbl rr(CTX), %eax; |
265 | testl %eax, %eax; | 265 | testl %eax, %eax; |
266 | jnz __skip_enc; | 266 | jnz .L__skip_enc; |
267 | 267 | ||
268 | round(RL, RR, 12, 1); | 268 | round(RL, RR, 12, 1); |
269 | round(RR, RL, 13, 2); | 269 | round(RR, RL, 13, 2); |
270 | round(RL, RR, 14, 3); | 270 | round(RL, RR, 14, 3); |
271 | round(RR, RL, 15, 1); | 271 | round(RR, RL, 15, 1); |
272 | 272 | ||
273 | __skip_enc: | 273 | .L__skip_enc: |
274 | popq %rbx; | 274 | popq %rbx; |
275 | popq %rbp; | 275 | popq %rbp; |
276 | 276 | ||
@@ -282,10 +282,9 @@ __skip_enc: | |||
282 | outunpack_blocks(RR4, RL4, RTMP, RX, RKM); | 282 | outunpack_blocks(RR4, RL4, RTMP, RX, RKM); |
283 | 283 | ||
284 | ret; | 284 | ret; |
285 | ENDPROC(__cast5_enc_blk16) | ||
285 | 286 | ||
286 | .align 16 | 287 | .align 16 |
287 | .type __cast5_dec_blk16,@function; | ||
288 | |||
289 | __cast5_dec_blk16: | 288 | __cast5_dec_blk16: |
290 | /* input: | 289 | /* input: |
291 | * %rdi: ctx, CTX | 290 | * %rdi: ctx, CTX |
@@ -323,14 +322,14 @@ __cast5_dec_blk16: | |||
323 | 322 | ||
324 | movzbl rr(CTX), %eax; | 323 | movzbl rr(CTX), %eax; |
325 | testl %eax, %eax; | 324 | testl %eax, %eax; |
326 | jnz __skip_dec; | 325 | jnz .L__skip_dec; |
327 | 326 | ||
328 | round(RL, RR, 15, 1); | 327 | round(RL, RR, 15, 1); |
329 | round(RR, RL, 14, 3); | 328 | round(RR, RL, 14, 3); |
330 | round(RL, RR, 13, 2); | 329 | round(RL, RR, 13, 2); |
331 | round(RR, RL, 12, 1); | 330 | round(RR, RL, 12, 1); |
332 | 331 | ||
333 | __dec_tail: | 332 | .L__dec_tail: |
334 | round(RL, RR, 11, 3); | 333 | round(RL, RR, 11, 3); |
335 | round(RR, RL, 10, 2); | 334 | round(RR, RL, 10, 2); |
336 | round(RL, RR, 9, 1); | 335 | round(RL, RR, 9, 1); |
@@ -355,15 +354,12 @@ __dec_tail: | |||
355 | 354 | ||
356 | ret; | 355 | ret; |
357 | 356 | ||
358 | __skip_dec: | 357 | .L__skip_dec: |
359 | vpsrldq $4, RKR, RKR; | 358 | vpsrldq $4, RKR, RKR; |
360 | jmp __dec_tail; | 359 | jmp .L__dec_tail; |
360 | ENDPROC(__cast5_dec_blk16) | ||
361 | 361 | ||
362 | .align 16 | 362 | ENTRY(cast5_ecb_enc_16way) |
363 | .global cast5_ecb_enc_16way | ||
364 | .type cast5_ecb_enc_16way,@function; | ||
365 | |||
366 | cast5_ecb_enc_16way: | ||
367 | /* input: | 363 | /* input: |
368 | * %rdi: ctx, CTX | 364 | * %rdi: ctx, CTX |
369 | * %rsi: dst | 365 | * %rsi: dst |
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way: | |||
393 | vmovdqu RL4, (7*4*4)(%r11); | 389 | vmovdqu RL4, (7*4*4)(%r11); |
394 | 390 | ||
395 | ret; | 391 | ret; |
392 | ENDPROC(cast5_ecb_enc_16way) | ||
396 | 393 | ||
397 | .align 16 | 394 | ENTRY(cast5_ecb_dec_16way) |
398 | .global cast5_ecb_dec_16way | ||
399 | .type cast5_ecb_dec_16way,@function; | ||
400 | |||
401 | cast5_ecb_dec_16way: | ||
402 | /* input: | 395 | /* input: |
403 | * %rdi: ctx, CTX | 396 | * %rdi: ctx, CTX |
404 | * %rsi: dst | 397 | * %rsi: dst |
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way: | |||
428 | vmovdqu RL4, (7*4*4)(%r11); | 421 | vmovdqu RL4, (7*4*4)(%r11); |
429 | 422 | ||
430 | ret; | 423 | ret; |
424 | ENDPROC(cast5_ecb_dec_16way) | ||
431 | 425 | ||
432 | .align 16 | 426 | ENTRY(cast5_cbc_dec_16way) |
433 | .global cast5_cbc_dec_16way | ||
434 | .type cast5_cbc_dec_16way,@function; | ||
435 | |||
436 | cast5_cbc_dec_16way: | ||
437 | /* input: | 427 | /* input: |
438 | * %rdi: ctx, CTX | 428 | * %rdi: ctx, CTX |
439 | * %rsi: dst | 429 | * %rsi: dst |
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way: | |||
480 | popq %r12; | 470 | popq %r12; |
481 | 471 | ||
482 | ret; | 472 | ret; |
473 | ENDPROC(cast5_cbc_dec_16way) | ||
483 | 474 | ||
484 | .align 16 | 475 | ENTRY(cast5_ctr_16way) |
485 | .global cast5_ctr_16way | ||
486 | .type cast5_ctr_16way,@function; | ||
487 | |||
488 | cast5_ctr_16way: | ||
489 | /* input: | 476 | /* input: |
490 | * %rdi: ctx, CTX | 477 | * %rdi: ctx, CTX |
491 | * %rsi: dst | 478 | * %rsi: dst |
@@ -556,3 +543,4 @@ cast5_ctr_16way: | |||
556 | popq %r12; | 543 | popq %r12; |
557 | 544 | ||
558 | ret; | 545 | ret; |
546 | ENDPROC(cast5_ctr_16way) | ||
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 2569d0da841f..f93b6105a0ce 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S | |||
@@ -23,6 +23,7 @@ | |||
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/linkage.h> | ||
26 | #include "glue_helper-asm-avx.S" | 27 | #include "glue_helper-asm-avx.S" |
27 | 28 | ||
28 | .file "cast6-avx-x86_64-asm_64.S" | 29 | .file "cast6-avx-x86_64-asm_64.S" |
@@ -250,8 +251,6 @@ | |||
250 | .text | 251 | .text |
251 | 252 | ||
252 | .align 8 | 253 | .align 8 |
253 | .type __cast6_enc_blk8,@function; | ||
254 | |||
255 | __cast6_enc_blk8: | 254 | __cast6_enc_blk8: |
256 | /* input: | 255 | /* input: |
257 | * %rdi: ctx, CTX | 256 | * %rdi: ctx, CTX |
@@ -295,10 +294,9 @@ __cast6_enc_blk8: | |||
295 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | 294 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); |
296 | 295 | ||
297 | ret; | 296 | ret; |
297 | ENDPROC(__cast6_enc_blk8) | ||
298 | 298 | ||
299 | .align 8 | 299 | .align 8 |
300 | .type __cast6_dec_blk8,@function; | ||
301 | |||
302 | __cast6_dec_blk8: | 300 | __cast6_dec_blk8: |
303 | /* input: | 301 | /* input: |
304 | * %rdi: ctx, CTX | 302 | * %rdi: ctx, CTX |
@@ -341,12 +339,9 @@ __cast6_dec_blk8: | |||
341 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | 339 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); |
342 | 340 | ||
343 | ret; | 341 | ret; |
342 | ENDPROC(__cast6_dec_blk8) | ||
344 | 343 | ||
345 | .align 8 | 344 | ENTRY(cast6_ecb_enc_8way) |
346 | .global cast6_ecb_enc_8way | ||
347 | .type cast6_ecb_enc_8way,@function; | ||
348 | |||
349 | cast6_ecb_enc_8way: | ||
350 | /* input: | 345 | /* input: |
351 | * %rdi: ctx, CTX | 346 | * %rdi: ctx, CTX |
352 | * %rsi: dst | 347 | * %rsi: dst |
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way: | |||
362 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 357 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
363 | 358 | ||
364 | ret; | 359 | ret; |
360 | ENDPROC(cast6_ecb_enc_8way) | ||
365 | 361 | ||
366 | .align 8 | 362 | ENTRY(cast6_ecb_dec_8way) |
367 | .global cast6_ecb_dec_8way | ||
368 | .type cast6_ecb_dec_8way,@function; | ||
369 | |||
370 | cast6_ecb_dec_8way: | ||
371 | /* input: | 363 | /* input: |
372 | * %rdi: ctx, CTX | 364 | * %rdi: ctx, CTX |
373 | * %rsi: dst | 365 | * %rsi: dst |
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way: | |||
383 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 375 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
384 | 376 | ||
385 | ret; | 377 | ret; |
378 | ENDPROC(cast6_ecb_dec_8way) | ||
386 | 379 | ||
387 | .align 8 | 380 | ENTRY(cast6_cbc_dec_8way) |
388 | .global cast6_cbc_dec_8way | ||
389 | .type cast6_cbc_dec_8way,@function; | ||
390 | |||
391 | cast6_cbc_dec_8way: | ||
392 | /* input: | 381 | /* input: |
393 | * %rdi: ctx, CTX | 382 | * %rdi: ctx, CTX |
394 | * %rsi: dst | 383 | * %rsi: dst |
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way: | |||
409 | popq %r12; | 398 | popq %r12; |
410 | 399 | ||
411 | ret; | 400 | ret; |
401 | ENDPROC(cast6_cbc_dec_8way) | ||
412 | 402 | ||
413 | .align 8 | 403 | ENTRY(cast6_ctr_8way) |
414 | .global cast6_ctr_8way | ||
415 | .type cast6_ctr_8way,@function; | ||
416 | |||
417 | cast6_ctr_8way: | ||
418 | /* input: | 404 | /* input: |
419 | * %rdi: ctx, CTX | 405 | * %rdi: ctx, CTX |
420 | * %rsi: dst | 406 | * %rsi: dst |
@@ -437,3 +423,4 @@ cast6_ctr_8way: | |||
437 | popq %r12; | 423 | popq %r12; |
438 | 424 | ||
439 | ret; | 425 | ret; |
426 | ENDPROC(cast6_ctr_8way) | ||
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S new file mode 100644 index 000000000000..c8335014a044 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_asm.S | |||
@@ -0,0 +1,246 @@ | |||
1 | /* GPL HEADER START | ||
2 | * | ||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 only, | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License version 2 for more details (a copy is included | ||
13 | * in the LICENSE file that accompanied this code). | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * version 2 along with this program; If not, see http://www.gnu.org/licenses | ||
17 | * | ||
18 | * Please visit http://www.xyratex.com/contact if you need additional | ||
19 | * information or have any questions. | ||
20 | * | ||
21 | * GPL HEADER END | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * Copyright 2012 Xyratex Technology Limited | ||
26 | * | ||
27 | * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 | ||
28 | * calculation. | ||
29 | * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) | ||
30 | * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found | ||
31 | * at: | ||
32 | * http://www.intel.com/products/processor/manuals/ | ||
33 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual | ||
34 | * Volume 2B: Instruction Set Reference, N-Z | ||
35 | * | ||
36 | * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> | ||
37 | * Alexander Boyko <Alexander_Boyko@xyratex.com> | ||
38 | */ | ||
39 | |||
40 | #include <linux/linkage.h> | ||
41 | #include <asm/inst.h> | ||
42 | |||
43 | |||
44 | .align 16 | ||
45 | /* | ||
46 | * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 | ||
47 | * #define CONSTANT_R1 0x154442bd4LL | ||
48 | * | ||
49 | * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 | ||
50 | * #define CONSTANT_R2 0x1c6e41596LL | ||
51 | */ | ||
52 | .Lconstant_R2R1: | ||
53 | .octa 0x00000001c6e415960000000154442bd4 | ||
54 | /* | ||
55 | * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 | ||
56 | * #define CONSTANT_R3 0x1751997d0LL | ||
57 | * | ||
58 | * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e | ||
59 | * #define CONSTANT_R4 0x0ccaa009eLL | ||
60 | */ | ||
61 | .Lconstant_R4R3: | ||
62 | .octa 0x00000000ccaa009e00000001751997d0 | ||
63 | /* | ||
64 | * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 | ||
65 | * #define CONSTANT_R5 0x163cd6124LL | ||
66 | */ | ||
67 | .Lconstant_R5: | ||
68 | .octa 0x00000000000000000000000163cd6124 | ||
69 | .Lconstant_mask32: | ||
70 | .octa 0x000000000000000000000000FFFFFFFF | ||
71 | /* | ||
72 | * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL | ||
73 | * | ||
74 | * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL | ||
75 | * #define CONSTANT_RU 0x1F7011641LL | ||
76 | */ | ||
77 | .Lconstant_RUpoly: | ||
78 | .octa 0x00000001F701164100000001DB710641 | ||
79 | |||
80 | #define CONSTANT %xmm0 | ||
81 | |||
82 | #ifdef __x86_64__ | ||
83 | #define BUF %rdi | ||
84 | #define LEN %rsi | ||
85 | #define CRC %edx | ||
86 | #else | ||
87 | #define BUF %eax | ||
88 | #define LEN %edx | ||
89 | #define CRC %ecx | ||
90 | #endif | ||
91 | |||
92 | |||
93 | |||
94 | .text | ||
95 | /** | ||
96 | * Calculate crc32 | ||
97 | * BUF - buffer (16 bytes aligned) | ||
98 | * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 | ||
99 | * CRC - initial crc32 | ||
100 | * return %eax crc32 | ||
101 | * uint crc32_pclmul_le_16(unsigned char const *buffer, | ||
102 | * size_t len, uint crc32) | ||
103 | */ | ||
104 | .globl crc32_pclmul_le_16 | ||
105 | .align 4, 0x90 | ||
106 | crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ | ||
107 | movdqa (BUF), %xmm1 | ||
108 | movdqa 0x10(BUF), %xmm2 | ||
109 | movdqa 0x20(BUF), %xmm3 | ||
110 | movdqa 0x30(BUF), %xmm4 | ||
111 | movd CRC, CONSTANT | ||
112 | pxor CONSTANT, %xmm1 | ||
113 | sub $0x40, LEN | ||
114 | add $0x40, BUF | ||
115 | #ifndef __x86_64__ | ||
116 | /* This is for position independent code(-fPIC) support for 32bit */ | ||
117 | call delta | ||
118 | delta: | ||
119 | pop %ecx | ||
120 | #endif | ||
121 | cmp $0x40, LEN | ||
122 | jb less_64 | ||
123 | |||
124 | #ifdef __x86_64__ | ||
125 | movdqa .Lconstant_R2R1(%rip), CONSTANT | ||
126 | #else | ||
127 | movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT | ||
128 | #endif | ||
129 | |||
130 | loop_64:/* 64 bytes Full cache line folding */ | ||
131 | prefetchnta 0x40(BUF) | ||
132 | movdqa %xmm1, %xmm5 | ||
133 | movdqa %xmm2, %xmm6 | ||
134 | movdqa %xmm3, %xmm7 | ||
135 | #ifdef __x86_64__ | ||
136 | movdqa %xmm4, %xmm8 | ||
137 | #endif | ||
138 | PCLMULQDQ 00, CONSTANT, %xmm1 | ||
139 | PCLMULQDQ 00, CONSTANT, %xmm2 | ||
140 | PCLMULQDQ 00, CONSTANT, %xmm3 | ||
141 | #ifdef __x86_64__ | ||
142 | PCLMULQDQ 00, CONSTANT, %xmm4 | ||
143 | #endif | ||
144 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
145 | PCLMULQDQ 0x11, CONSTANT, %xmm6 | ||
146 | PCLMULQDQ 0x11, CONSTANT, %xmm7 | ||
147 | #ifdef __x86_64__ | ||
148 | PCLMULQDQ 0x11, CONSTANT, %xmm8 | ||
149 | #endif | ||
150 | pxor %xmm5, %xmm1 | ||
151 | pxor %xmm6, %xmm2 | ||
152 | pxor %xmm7, %xmm3 | ||
153 | #ifdef __x86_64__ | ||
154 | pxor %xmm8, %xmm4 | ||
155 | #else | ||
156 | /* xmm8 unsupported for x32 */ | ||
157 | movdqa %xmm4, %xmm5 | ||
158 | PCLMULQDQ 00, CONSTANT, %xmm4 | ||
159 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
160 | pxor %xmm5, %xmm4 | ||
161 | #endif | ||
162 | |||
163 | pxor (BUF), %xmm1 | ||
164 | pxor 0x10(BUF), %xmm2 | ||
165 | pxor 0x20(BUF), %xmm3 | ||
166 | pxor 0x30(BUF), %xmm4 | ||
167 | |||
168 | sub $0x40, LEN | ||
169 | add $0x40, BUF | ||
170 | cmp $0x40, LEN | ||
171 | jge loop_64 | ||
172 | less_64:/* Folding cache line into 128bit */ | ||
173 | #ifdef __x86_64__ | ||
174 | movdqa .Lconstant_R4R3(%rip), CONSTANT | ||
175 | #else | ||
176 | movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT | ||
177 | #endif | ||
178 | prefetchnta (BUF) | ||
179 | |||
180 | movdqa %xmm1, %xmm5 | ||
181 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
182 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
183 | pxor %xmm5, %xmm1 | ||
184 | pxor %xmm2, %xmm1 | ||
185 | |||
186 | movdqa %xmm1, %xmm5 | ||
187 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
188 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
189 | pxor %xmm5, %xmm1 | ||
190 | pxor %xmm3, %xmm1 | ||
191 | |||
192 | movdqa %xmm1, %xmm5 | ||
193 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
194 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
195 | pxor %xmm5, %xmm1 | ||
196 | pxor %xmm4, %xmm1 | ||
197 | |||
198 | cmp $0x10, LEN | ||
199 | jb fold_64 | ||
200 | loop_16:/* Folding rest buffer into 128bit */ | ||
201 | movdqa %xmm1, %xmm5 | ||
202 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
203 | PCLMULQDQ 0x11, CONSTANT, %xmm5 | ||
204 | pxor %xmm5, %xmm1 | ||
205 | pxor (BUF), %xmm1 | ||
206 | sub $0x10, LEN | ||
207 | add $0x10, BUF | ||
208 | cmp $0x10, LEN | ||
209 | jge loop_16 | ||
210 | |||
211 | fold_64: | ||
212 | /* perform the last 64 bit fold, also adds 32 zeroes | ||
213 | * to the input stream */ | ||
214 | PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ | ||
215 | psrldq $0x08, %xmm1 | ||
216 | pxor CONSTANT, %xmm1 | ||
217 | |||
218 | /* final 32-bit fold */ | ||
219 | movdqa %xmm1, %xmm2 | ||
220 | #ifdef __x86_64__ | ||
221 | movdqa .Lconstant_R5(%rip), CONSTANT | ||
222 | movdqa .Lconstant_mask32(%rip), %xmm3 | ||
223 | #else | ||
224 | movdqa .Lconstant_R5 - delta(%ecx), CONSTANT | ||
225 | movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 | ||
226 | #endif | ||
227 | psrldq $0x04, %xmm2 | ||
228 | pand %xmm3, %xmm1 | ||
229 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
230 | pxor %xmm2, %xmm1 | ||
231 | |||
232 | /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ | ||
233 | #ifdef __x86_64__ | ||
234 | movdqa .Lconstant_RUpoly(%rip), CONSTANT | ||
235 | #else | ||
236 | movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT | ||
237 | #endif | ||
238 | movdqa %xmm1, %xmm2 | ||
239 | pand %xmm3, %xmm1 | ||
240 | PCLMULQDQ 0x10, CONSTANT, %xmm1 | ||
241 | pand %xmm3, %xmm1 | ||
242 | PCLMULQDQ 0x00, CONSTANT, %xmm1 | ||
243 | pxor %xmm2, %xmm1 | ||
244 | pextrd $0x01, %xmm1, %eax | ||
245 | |||
246 | ret | ||
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c new file mode 100644 index 000000000000..9d014a74ef96 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_glue.c | |||
@@ -0,0 +1,201 @@ | |||
1 | /* GPL HEADER START | ||
2 | * | ||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 only, | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
12 | * General Public License version 2 for more details (a copy is included | ||
13 | * in the LICENSE file that accompanied this code). | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * version 2 along with this program; If not, see http://www.gnu.org/licenses | ||
17 | * | ||
18 | * Please visit http://www.xyratex.com/contact if you need additional | ||
19 | * information or have any questions. | ||
20 | * | ||
21 | * GPL HEADER END | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * Copyright 2012 Xyratex Technology Limited | ||
26 | * | ||
27 | * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation. | ||
28 | */ | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/crc32.h> | ||
34 | #include <crypto/internal/hash.h> | ||
35 | |||
36 | #include <asm/cpufeature.h> | ||
37 | #include <asm/cpu_device_id.h> | ||
38 | #include <asm/i387.h> | ||
39 | |||
40 | #define CHKSUM_BLOCK_SIZE 1 | ||
41 | #define CHKSUM_DIGEST_SIZE 4 | ||
42 | |||
43 | #define PCLMUL_MIN_LEN 64L /* minimum size of buffer | ||
44 | * for crc32_pclmul_le_16 */ | ||
45 | #define SCALE_F 16L /* size of xmm register */ | ||
46 | #define SCALE_F_MASK (SCALE_F - 1) | ||
47 | |||
48 | u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); | ||
49 | |||
50 | static u32 __attribute__((pure)) | ||
51 | crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) | ||
52 | { | ||
53 | unsigned int iquotient; | ||
54 | unsigned int iremainder; | ||
55 | unsigned int prealign; | ||
56 | |||
57 | if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) | ||
58 | return crc32_le(crc, p, len); | ||
59 | |||
60 | if ((long)p & SCALE_F_MASK) { | ||
61 | /* align p to 16 byte */ | ||
62 | prealign = SCALE_F - ((long)p & SCALE_F_MASK); | ||
63 | |||
64 | crc = crc32_le(crc, p, prealign); | ||
65 | len -= prealign; | ||
66 | p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & | ||
67 | ~SCALE_F_MASK); | ||
68 | } | ||
69 | iquotient = len & (~SCALE_F_MASK); | ||
70 | iremainder = len & SCALE_F_MASK; | ||
71 | |||
72 | kernel_fpu_begin(); | ||
73 | crc = crc32_pclmul_le_16(p, iquotient, crc); | ||
74 | kernel_fpu_end(); | ||
75 | |||
76 | if (iremainder) | ||
77 | crc = crc32_le(crc, p + iquotient, iremainder); | ||
78 | |||
79 | return crc; | ||
80 | } | ||
81 | |||
82 | static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) | ||
83 | { | ||
84 | u32 *key = crypto_tfm_ctx(tfm); | ||
85 | |||
86 | *key = 0; | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, | ||
92 | unsigned int keylen) | ||
93 | { | ||
94 | u32 *mctx = crypto_shash_ctx(hash); | ||
95 | |||
96 | if (keylen != sizeof(u32)) { | ||
97 | crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
98 | return -EINVAL; | ||
99 | } | ||
100 | *mctx = le32_to_cpup((__le32 *)key); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int crc32_pclmul_init(struct shash_desc *desc) | ||
105 | { | ||
106 | u32 *mctx = crypto_shash_ctx(desc->tfm); | ||
107 | u32 *crcp = shash_desc_ctx(desc); | ||
108 | |||
109 | *crcp = *mctx; | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, | ||
115 | unsigned int len) | ||
116 | { | ||
117 | u32 *crcp = shash_desc_ctx(desc); | ||
118 | |||
119 | *crcp = crc32_pclmul_le(*crcp, data, len); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* No final XOR 0xFFFFFFFF, like crc32_le */ | ||
124 | static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, | ||
125 | u8 *out) | ||
126 | { | ||
127 | *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, | ||
132 | unsigned int len, u8 *out) | ||
133 | { | ||
134 | return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); | ||
135 | } | ||
136 | |||
137 | static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) | ||
138 | { | ||
139 | u32 *crcp = shash_desc_ctx(desc); | ||
140 | |||
141 | *(__le32 *)out = cpu_to_le32p(crcp); | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, | ||
146 | unsigned int len, u8 *out) | ||
147 | { | ||
148 | return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
149 | out); | ||
150 | } | ||
151 | |||
152 | static struct shash_alg alg = { | ||
153 | .setkey = crc32_pclmul_setkey, | ||
154 | .init = crc32_pclmul_init, | ||
155 | .update = crc32_pclmul_update, | ||
156 | .final = crc32_pclmul_final, | ||
157 | .finup = crc32_pclmul_finup, | ||
158 | .digest = crc32_pclmul_digest, | ||
159 | .descsize = sizeof(u32), | ||
160 | .digestsize = CHKSUM_DIGEST_SIZE, | ||
161 | .base = { | ||
162 | .cra_name = "crc32", | ||
163 | .cra_driver_name = "crc32-pclmul", | ||
164 | .cra_priority = 200, | ||
165 | .cra_blocksize = CHKSUM_BLOCK_SIZE, | ||
166 | .cra_ctxsize = sizeof(u32), | ||
167 | .cra_module = THIS_MODULE, | ||
168 | .cra_init = crc32_pclmul_cra_init, | ||
169 | } | ||
170 | }; | ||
171 | |||
172 | static const struct x86_cpu_id crc32pclmul_cpu_id[] = { | ||
173 | X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), | ||
174 | {} | ||
175 | }; | ||
176 | MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); | ||
177 | |||
178 | |||
179 | static int __init crc32_pclmul_mod_init(void) | ||
180 | { | ||
181 | |||
182 | if (!x86_match_cpu(crc32pclmul_cpu_id)) { | ||
183 | pr_info("PCLMULQDQ-NI instructions are not detected.\n"); | ||
184 | return -ENODEV; | ||
185 | } | ||
186 | return crypto_register_shash(&alg); | ||
187 | } | ||
188 | |||
189 | static void __exit crc32_pclmul_mod_fini(void) | ||
190 | { | ||
191 | crypto_unregister_shash(&alg); | ||
192 | } | ||
193 | |||
194 | module_init(crc32_pclmul_mod_init); | ||
195 | module_exit(crc32_pclmul_mod_fini); | ||
196 | |||
197 | MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); | ||
198 | MODULE_LICENSE("GPL"); | ||
199 | |||
200 | MODULE_ALIAS("crc32"); | ||
201 | MODULE_ALIAS("crc32-pclmul"); | ||
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 93c6d39237ac..cf1a7ec4cc3a 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S | |||
@@ -42,6 +42,8 @@ | |||
42 | * SOFTWARE. | 42 | * SOFTWARE. |
43 | */ | 43 | */ |
44 | 44 | ||
45 | #include <linux/linkage.h> | ||
46 | |||
45 | ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction | 47 | ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction |
46 | 48 | ||
47 | .macro LABEL prefix n | 49 | .macro LABEL prefix n |
@@ -68,8 +70,7 @@ | |||
68 | 70 | ||
69 | # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); | 71 | # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); |
70 | 72 | ||
71 | .global crc_pcl | 73 | ENTRY(crc_pcl) |
72 | crc_pcl: | ||
73 | #define bufp %rdi | 74 | #define bufp %rdi |
74 | #define bufp_dw %edi | 75 | #define bufp_dw %edi |
75 | #define bufp_w %di | 76 | #define bufp_w %di |
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i | |||
323 | .noaltmacro | 324 | .noaltmacro |
324 | i=i+1 | 325 | i=i+1 |
325 | .endr | 326 | .endr |
327 | |||
328 | ENDPROC(crc_pcl) | ||
329 | |||
326 | ################################################################ | 330 | ################################################################ |
327 | ## PCLMULQDQ tables | 331 | ## PCLMULQDQ tables |
328 | ## Table is 128 entries x 2 quad words each | 332 | ## Table is 128 entries x 2 quad words each |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1eb7f90cb7b9..586f41aac361 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble: | |||
94 | pxor T2, T1 | 94 | pxor T2, T1 |
95 | pxor T1, DATA | 95 | pxor T1, DATA |
96 | ret | 96 | ret |
97 | ENDPROC(__clmul_gf128mul_ble) | ||
97 | 98 | ||
98 | /* void clmul_ghash_mul(char *dst, const be128 *shash) */ | 99 | /* void clmul_ghash_mul(char *dst, const be128 *shash) */ |
99 | ENTRY(clmul_ghash_mul) | 100 | ENTRY(clmul_ghash_mul) |
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul) | |||
105 | PSHUFB_XMM BSWAP DATA | 106 | PSHUFB_XMM BSWAP DATA |
106 | movups DATA, (%rdi) | 107 | movups DATA, (%rdi) |
107 | ret | 108 | ret |
109 | ENDPROC(clmul_ghash_mul) | ||
108 | 110 | ||
109 | /* | 111 | /* |
110 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | 112 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, |
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update) | |||
131 | movups DATA, (%rdi) | 133 | movups DATA, (%rdi) |
132 | .Lupdate_just_ret: | 134 | .Lupdate_just_ret: |
133 | ret | 135 | ret |
136 | ENDPROC(clmul_ghash_update) | ||
134 | 137 | ||
135 | /* | 138 | /* |
136 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); | 139 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); |
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey) | |||
155 | pxor %xmm1, %xmm0 | 158 | pxor %xmm1, %xmm0 |
156 | movups %xmm0, (%rdi) | 159 | movups %xmm0, (%rdi) |
157 | ret | 160 | ret |
161 | ENDPROC(clmul_ghash_setkey) | ||
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S index 72eb306680b2..329452b8f794 100644 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ b/arch/x86/crypto/salsa20-i586-asm_32.S | |||
@@ -2,11 +2,12 @@ | |||
2 | # D. J. Bernstein | 2 | # D. J. Bernstein |
3 | # Public domain. | 3 | # Public domain. |
4 | 4 | ||
5 | # enter ECRYPT_encrypt_bytes | 5 | #include <linux/linkage.h> |
6 | |||
6 | .text | 7 | .text |
7 | .p2align 5 | 8 | |
8 | .globl ECRYPT_encrypt_bytes | 9 | # enter salsa20_encrypt_bytes |
9 | ECRYPT_encrypt_bytes: | 10 | ENTRY(salsa20_encrypt_bytes) |
10 | mov %esp,%eax | 11 | mov %esp,%eax |
11 | and $31,%eax | 12 | and $31,%eax |
12 | add $256,%eax | 13 | add $256,%eax |
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes: | |||
933 | add $64,%esi | 934 | add $64,%esi |
934 | # goto bytesatleast1 | 935 | # goto bytesatleast1 |
935 | jmp ._bytesatleast1 | 936 | jmp ._bytesatleast1 |
936 | # enter ECRYPT_keysetup | 937 | ENDPROC(salsa20_encrypt_bytes) |
937 | .text | 938 | |
938 | .p2align 5 | 939 | # enter salsa20_keysetup |
939 | .globl ECRYPT_keysetup | 940 | ENTRY(salsa20_keysetup) |
940 | ECRYPT_keysetup: | ||
941 | mov %esp,%eax | 941 | mov %esp,%eax |
942 | and $31,%eax | 942 | and $31,%eax |
943 | add $256,%eax | 943 | add $256,%eax |
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup: | |||
1060 | # leave | 1060 | # leave |
1061 | add %eax,%esp | 1061 | add %eax,%esp |
1062 | ret | 1062 | ret |
1063 | # enter ECRYPT_ivsetup | 1063 | ENDPROC(salsa20_keysetup) |
1064 | .text | 1064 | |
1065 | .p2align 5 | 1065 | # enter salsa20_ivsetup |
1066 | .globl ECRYPT_ivsetup | 1066 | ENTRY(salsa20_ivsetup) |
1067 | ECRYPT_ivsetup: | ||
1068 | mov %esp,%eax | 1067 | mov %esp,%eax |
1069 | and $31,%eax | 1068 | and $31,%eax |
1070 | add $256,%eax | 1069 | add $256,%eax |
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup: | |||
1112 | # leave | 1111 | # leave |
1113 | add %eax,%esp | 1112 | add %eax,%esp |
1114 | ret | 1113 | ret |
1114 | ENDPROC(salsa20_ivsetup) | ||
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b09706..9279e0b2d60e 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S | |||
@@ -1,8 +1,7 @@ | |||
1 | # enter ECRYPT_encrypt_bytes | 1 | #include <linux/linkage.h> |
2 | .text | 2 | |
3 | .p2align 5 | 3 | # enter salsa20_encrypt_bytes |
4 | .globl ECRYPT_encrypt_bytes | 4 | ENTRY(salsa20_encrypt_bytes) |
5 | ECRYPT_encrypt_bytes: | ||
6 | mov %rsp,%r11 | 5 | mov %rsp,%r11 |
7 | and $31,%r11 | 6 | and $31,%r11 |
8 | add $256,%r11 | 7 | add $256,%r11 |
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes: | |||
802 | # comment:fp stack unchanged by jump | 801 | # comment:fp stack unchanged by jump |
803 | # goto bytesatleast1 | 802 | # goto bytesatleast1 |
804 | jmp ._bytesatleast1 | 803 | jmp ._bytesatleast1 |
805 | # enter ECRYPT_keysetup | 804 | ENDPROC(salsa20_encrypt_bytes) |
806 | .text | 805 | |
807 | .p2align 5 | 806 | # enter salsa20_keysetup |
808 | .globl ECRYPT_keysetup | 807 | ENTRY(salsa20_keysetup) |
809 | ECRYPT_keysetup: | ||
810 | mov %rsp,%r11 | 808 | mov %rsp,%r11 |
811 | and $31,%r11 | 809 | and $31,%r11 |
812 | add $256,%r11 | 810 | add $256,%r11 |
@@ -892,11 +890,10 @@ ECRYPT_keysetup: | |||
892 | mov %rdi,%rax | 890 | mov %rdi,%rax |
893 | mov %rsi,%rdx | 891 | mov %rsi,%rdx |
894 | ret | 892 | ret |
895 | # enter ECRYPT_ivsetup | 893 | ENDPROC(salsa20_keysetup) |
896 | .text | 894 | |
897 | .p2align 5 | 895 | # enter salsa20_ivsetup |
898 | .globl ECRYPT_ivsetup | 896 | ENTRY(salsa20_ivsetup) |
899 | ECRYPT_ivsetup: | ||
900 | mov %rsp,%r11 | 897 | mov %rsp,%r11 |
901 | and $31,%r11 | 898 | and $31,%r11 |
902 | add $256,%r11 | 899 | add $256,%r11 |
@@ -918,3 +915,4 @@ ECRYPT_ivsetup: | |||
918 | mov %rdi,%rax | 915 | mov %rdi,%rax |
919 | mov %rsi,%rdx | 916 | mov %rsi,%rdx |
920 | ret | 917 | ret |
918 | ENDPROC(salsa20_ivsetup) | ||
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index a3a3c0205c16..5e8e67739bb5 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c | |||
@@ -26,11 +26,6 @@ | |||
26 | #define SALSA20_MIN_KEY_SIZE 16U | 26 | #define SALSA20_MIN_KEY_SIZE 16U |
27 | #define SALSA20_MAX_KEY_SIZE 32U | 27 | #define SALSA20_MAX_KEY_SIZE 32U |
28 | 28 | ||
29 | // use the ECRYPT_* function names | ||
30 | #define salsa20_keysetup ECRYPT_keysetup | ||
31 | #define salsa20_ivsetup ECRYPT_ivsetup | ||
32 | #define salsa20_encrypt_bytes ECRYPT_encrypt_bytes | ||
33 | |||
34 | struct salsa20_ctx | 29 | struct salsa20_ctx |
35 | { | 30 | { |
36 | u32 input[16]; | 31 | u32 input[16]; |
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 02b0e9fe997c..43c938612b74 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -24,6 +24,7 @@ | |||
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | ||
27 | #include "glue_helper-asm-avx.S" | 28 | #include "glue_helper-asm-avx.S" |
28 | 29 | ||
29 | .file "serpent-avx-x86_64-asm_64.S" | 30 | .file "serpent-avx-x86_64-asm_64.S" |
@@ -566,8 +567,6 @@ | |||
566 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | 567 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) |
567 | 568 | ||
568 | .align 8 | 569 | .align 8 |
569 | .type __serpent_enc_blk8_avx,@function; | ||
570 | |||
571 | __serpent_enc_blk8_avx: | 570 | __serpent_enc_blk8_avx: |
572 | /* input: | 571 | /* input: |
573 | * %rdi: ctx, CTX | 572 | * %rdi: ctx, CTX |
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx: | |||
619 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | 618 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); |
620 | 619 | ||
621 | ret; | 620 | ret; |
621 | ENDPROC(__serpent_enc_blk8_avx) | ||
622 | 622 | ||
623 | .align 8 | 623 | .align 8 |
624 | .type __serpent_dec_blk8_avx,@function; | ||
625 | |||
626 | __serpent_dec_blk8_avx: | 624 | __serpent_dec_blk8_avx: |
627 | /* input: | 625 | /* input: |
628 | * %rdi: ctx, CTX | 626 | * %rdi: ctx, CTX |
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx: | |||
674 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); | 672 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); |
675 | 673 | ||
676 | ret; | 674 | ret; |
675 | ENDPROC(__serpent_dec_blk8_avx) | ||
677 | 676 | ||
678 | .align 8 | 677 | ENTRY(serpent_ecb_enc_8way_avx) |
679 | .global serpent_ecb_enc_8way_avx | ||
680 | .type serpent_ecb_enc_8way_avx,@function; | ||
681 | |||
682 | serpent_ecb_enc_8way_avx: | ||
683 | /* input: | 678 | /* input: |
684 | * %rdi: ctx, CTX | 679 | * %rdi: ctx, CTX |
685 | * %rsi: dst | 680 | * %rsi: dst |
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx: | |||
693 | store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 688 | store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
694 | 689 | ||
695 | ret; | 690 | ret; |
691 | ENDPROC(serpent_ecb_enc_8way_avx) | ||
696 | 692 | ||
697 | .align 8 | 693 | ENTRY(serpent_ecb_dec_8way_avx) |
698 | .global serpent_ecb_dec_8way_avx | ||
699 | .type serpent_ecb_dec_8way_avx,@function; | ||
700 | |||
701 | serpent_ecb_dec_8way_avx: | ||
702 | /* input: | 694 | /* input: |
703 | * %rdi: ctx, CTX | 695 | * %rdi: ctx, CTX |
704 | * %rsi: dst | 696 | * %rsi: dst |
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx: | |||
712 | store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | 704 | store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); |
713 | 705 | ||
714 | ret; | 706 | ret; |
707 | ENDPROC(serpent_ecb_dec_8way_avx) | ||
715 | 708 | ||
716 | .align 8 | 709 | ENTRY(serpent_cbc_dec_8way_avx) |
717 | .global serpent_cbc_dec_8way_avx | ||
718 | .type serpent_cbc_dec_8way_avx,@function; | ||
719 | |||
720 | serpent_cbc_dec_8way_avx: | ||
721 | /* input: | 710 | /* input: |
722 | * %rdi: ctx, CTX | 711 | * %rdi: ctx, CTX |
723 | * %rsi: dst | 712 | * %rsi: dst |
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx: | |||
731 | store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | 720 | store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); |
732 | 721 | ||
733 | ret; | 722 | ret; |
723 | ENDPROC(serpent_cbc_dec_8way_avx) | ||
734 | 724 | ||
735 | .align 8 | 725 | ENTRY(serpent_ctr_8way_avx) |
736 | .global serpent_ctr_8way_avx | ||
737 | .type serpent_ctr_8way_avx,@function; | ||
738 | |||
739 | serpent_ctr_8way_avx: | ||
740 | /* input: | 726 | /* input: |
741 | * %rdi: ctx, CTX | 727 | * %rdi: ctx, CTX |
742 | * %rsi: dst | 728 | * %rsi: dst |
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx: | |||
752 | store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 738 | store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
753 | 739 | ||
754 | ret; | 740 | ret; |
741 | ENDPROC(serpent_ctr_8way_avx) | ||
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index c00053d42f99..d348f1553a79 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S | |||
@@ -24,6 +24,8 @@ | |||
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | ||
28 | |||
27 | .file "serpent-sse2-i586-asm_32.S" | 29 | .file "serpent-sse2-i586-asm_32.S" |
28 | .text | 30 | .text |
29 | 31 | ||
@@ -510,11 +512,7 @@ | |||
510 | pxor t0, x3; \ | 512 | pxor t0, x3; \ |
511 | movdqu x3, (3*4*4)(out); | 513 | movdqu x3, (3*4*4)(out); |
512 | 514 | ||
513 | .align 8 | 515 | ENTRY(__serpent_enc_blk_4way) |
514 | .global __serpent_enc_blk_4way | ||
515 | .type __serpent_enc_blk_4way,@function; | ||
516 | |||
517 | __serpent_enc_blk_4way: | ||
518 | /* input: | 516 | /* input: |
519 | * arg_ctx(%esp): ctx, CTX | 517 | * arg_ctx(%esp): ctx, CTX |
520 | * arg_dst(%esp): dst | 518 | * arg_dst(%esp): dst |
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way: | |||
566 | movl arg_dst(%esp), %eax; | 564 | movl arg_dst(%esp), %eax; |
567 | 565 | ||
568 | cmpb $0, arg_xor(%esp); | 566 | cmpb $0, arg_xor(%esp); |
569 | jnz __enc_xor4; | 567 | jnz .L__enc_xor4; |
570 | 568 | ||
571 | write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | 569 | write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); |
572 | 570 | ||
573 | ret; | 571 | ret; |
574 | 572 | ||
575 | __enc_xor4: | 573 | .L__enc_xor4: |
576 | xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | 574 | xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); |
577 | 575 | ||
578 | ret; | 576 | ret; |
577 | ENDPROC(__serpent_enc_blk_4way) | ||
579 | 578 | ||
580 | .align 8 | 579 | ENTRY(serpent_dec_blk_4way) |
581 | .global serpent_dec_blk_4way | ||
582 | .type serpent_dec_blk_4way,@function; | ||
583 | |||
584 | serpent_dec_blk_4way: | ||
585 | /* input: | 580 | /* input: |
586 | * arg_ctx(%esp): ctx, CTX | 581 | * arg_ctx(%esp): ctx, CTX |
587 | * arg_dst(%esp): dst | 582 | * arg_dst(%esp): dst |
@@ -633,3 +628,4 @@ serpent_dec_blk_4way: | |||
633 | write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); | 628 | write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); |
634 | 629 | ||
635 | ret; | 630 | ret; |
631 | ENDPROC(serpent_dec_blk_4way) | ||
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 3ee1ff04d3e9..acc066c7c6b2 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | |||
@@ -24,6 +24,8 @@ | |||
24 | * | 24 | * |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | ||
28 | |||
27 | .file "serpent-sse2-x86_64-asm_64.S" | 29 | .file "serpent-sse2-x86_64-asm_64.S" |
28 | .text | 30 | .text |
29 | 31 | ||
@@ -632,11 +634,7 @@ | |||
632 | pxor t0, x3; \ | 634 | pxor t0, x3; \ |
633 | movdqu x3, (3*4*4)(out); | 635 | movdqu x3, (3*4*4)(out); |
634 | 636 | ||
635 | .align 8 | 637 | ENTRY(__serpent_enc_blk_8way) |
636 | .global __serpent_enc_blk_8way | ||
637 | .type __serpent_enc_blk_8way,@function; | ||
638 | |||
639 | __serpent_enc_blk_8way: | ||
640 | /* input: | 638 | /* input: |
641 | * %rdi: ctx, CTX | 639 | * %rdi: ctx, CTX |
642 | * %rsi: dst | 640 | * %rsi: dst |
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way: | |||
687 | leaq (4*4*4)(%rsi), %rax; | 685 | leaq (4*4*4)(%rsi), %rax; |
688 | 686 | ||
689 | testb %cl, %cl; | 687 | testb %cl, %cl; |
690 | jnz __enc_xor8; | 688 | jnz .L__enc_xor8; |
691 | 689 | ||
692 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | 690 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); |
693 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | 691 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); |
694 | 692 | ||
695 | ret; | 693 | ret; |
696 | 694 | ||
697 | __enc_xor8: | 695 | .L__enc_xor8: |
698 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | 696 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); |
699 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | 697 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); |
700 | 698 | ||
701 | ret; | 699 | ret; |
700 | ENDPROC(__serpent_enc_blk_8way) | ||
702 | 701 | ||
703 | .align 8 | 702 | ENTRY(serpent_dec_blk_8way) |
704 | .global serpent_dec_blk_8way | ||
705 | .type serpent_dec_blk_8way,@function; | ||
706 | |||
707 | serpent_dec_blk_8way: | ||
708 | /* input: | 703 | /* input: |
709 | * %rdi: ctx, CTX | 704 | * %rdi: ctx, CTX |
710 | * %rsi: dst | 705 | * %rsi: dst |
@@ -756,3 +751,4 @@ serpent_dec_blk_8way: | |||
756 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | 751 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); |
757 | 752 | ||
758 | ret; | 753 | ret; |
754 | ENDPROC(serpent_dec_blk_8way) | ||
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 49d6987a73d9..a4109506a5e8 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -28,6 +28,8 @@ | |||
28 | * (at your option) any later version. | 28 | * (at your option) any later version. |
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/linkage.h> | ||
32 | |||
31 | #define CTX %rdi // arg1 | 33 | #define CTX %rdi // arg1 |
32 | #define BUF %rsi // arg2 | 34 | #define BUF %rsi // arg2 |
33 | #define CNT %rdx // arg3 | 35 | #define CNT %rdx // arg3 |
@@ -69,10 +71,8 @@ | |||
69 | * param: function's name | 71 | * param: function's name |
70 | */ | 72 | */ |
71 | .macro SHA1_VECTOR_ASM name | 73 | .macro SHA1_VECTOR_ASM name |
72 | .global \name | 74 | ENTRY(\name) |
73 | .type \name, @function | 75 | |
74 | .align 32 | ||
75 | \name: | ||
76 | push %rbx | 76 | push %rbx |
77 | push %rbp | 77 | push %rbp |
78 | push %r12 | 78 | push %r12 |
@@ -106,7 +106,7 @@ | |||
106 | pop %rbx | 106 | pop %rbx |
107 | ret | 107 | ret |
108 | 108 | ||
109 | .size \name, .-\name | 109 | ENDPROC(\name) |
110 | .endm | 110 | .endm |
111 | 111 | ||
112 | /* | 112 | /* |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index ebac16bfa830..8d3e113b2c95 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -23,6 +23,7 @@ | |||
23 | * | 23 | * |
24 | */ | 24 | */ |
25 | 25 | ||
26 | #include <linux/linkage.h> | ||
26 | #include "glue_helper-asm-avx.S" | 27 | #include "glue_helper-asm-avx.S" |
27 | 28 | ||
28 | .file "twofish-avx-x86_64-asm_64.S" | 29 | .file "twofish-avx-x86_64-asm_64.S" |
@@ -243,8 +244,6 @@ | |||
243 | vpxor x3, wkey, x3; | 244 | vpxor x3, wkey, x3; |
244 | 245 | ||
245 | .align 8 | 246 | .align 8 |
246 | .type __twofish_enc_blk8,@function; | ||
247 | |||
248 | __twofish_enc_blk8: | 247 | __twofish_enc_blk8: |
249 | /* input: | 248 | /* input: |
250 | * %rdi: ctx, CTX | 249 | * %rdi: ctx, CTX |
@@ -284,10 +283,9 @@ __twofish_enc_blk8: | |||
284 | outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); | 283 | outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); |
285 | 284 | ||
286 | ret; | 285 | ret; |
286 | ENDPROC(__twofish_enc_blk8) | ||
287 | 287 | ||
288 | .align 8 | 288 | .align 8 |
289 | .type __twofish_dec_blk8,@function; | ||
290 | |||
291 | __twofish_dec_blk8: | 289 | __twofish_dec_blk8: |
292 | /* input: | 290 | /* input: |
293 | * %rdi: ctx, CTX | 291 | * %rdi: ctx, CTX |
@@ -325,12 +323,9 @@ __twofish_dec_blk8: | |||
325 | outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); | 323 | outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); |
326 | 324 | ||
327 | ret; | 325 | ret; |
326 | ENDPROC(__twofish_dec_blk8) | ||
328 | 327 | ||
329 | .align 8 | 328 | ENTRY(twofish_ecb_enc_8way) |
330 | .global twofish_ecb_enc_8way | ||
331 | .type twofish_ecb_enc_8way,@function; | ||
332 | |||
333 | twofish_ecb_enc_8way: | ||
334 | /* input: | 329 | /* input: |
335 | * %rdi: ctx, CTX | 330 | * %rdi: ctx, CTX |
336 | * %rsi: dst | 331 | * %rsi: dst |
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way: | |||
346 | store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | 341 | store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); |
347 | 342 | ||
348 | ret; | 343 | ret; |
344 | ENDPROC(twofish_ecb_enc_8way) | ||
349 | 345 | ||
350 | .align 8 | 346 | ENTRY(twofish_ecb_dec_8way) |
351 | .global twofish_ecb_dec_8way | ||
352 | .type twofish_ecb_dec_8way,@function; | ||
353 | |||
354 | twofish_ecb_dec_8way: | ||
355 | /* input: | 347 | /* input: |
356 | * %rdi: ctx, CTX | 348 | * %rdi: ctx, CTX |
357 | * %rsi: dst | 349 | * %rsi: dst |
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way: | |||
367 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 359 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
368 | 360 | ||
369 | ret; | 361 | ret; |
362 | ENDPROC(twofish_ecb_dec_8way) | ||
370 | 363 | ||
371 | .align 8 | 364 | ENTRY(twofish_cbc_dec_8way) |
372 | .global twofish_cbc_dec_8way | ||
373 | .type twofish_cbc_dec_8way,@function; | ||
374 | |||
375 | twofish_cbc_dec_8way: | ||
376 | /* input: | 365 | /* input: |
377 | * %rdi: ctx, CTX | 366 | * %rdi: ctx, CTX |
378 | * %rsi: dst | 367 | * %rsi: dst |
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way: | |||
393 | popq %r12; | 382 | popq %r12; |
394 | 383 | ||
395 | ret; | 384 | ret; |
385 | ENDPROC(twofish_cbc_dec_8way) | ||
396 | 386 | ||
397 | .align 8 | 387 | ENTRY(twofish_ctr_8way) |
398 | .global twofish_ctr_8way | ||
399 | .type twofish_ctr_8way,@function; | ||
400 | |||
401 | twofish_ctr_8way: | ||
402 | /* input: | 388 | /* input: |
403 | * %rdi: ctx, CTX | 389 | * %rdi: ctx, CTX |
404 | * %rsi: dst | 390 | * %rsi: dst |
@@ -421,3 +407,4 @@ twofish_ctr_8way: | |||
421 | popq %r12; | 407 | popq %r12; |
422 | 408 | ||
423 | ret; | 409 | ret; |
410 | ENDPROC(twofish_ctr_8way) | ||
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 658af4bb35c9..694ea4587ba7 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S | |||
@@ -20,6 +20,7 @@ | |||
20 | .file "twofish-i586-asm.S" | 20 | .file "twofish-i586-asm.S" |
21 | .text | 21 | .text |
22 | 22 | ||
23 | #include <linux/linkage.h> | ||
23 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
24 | 25 | ||
25 | /* return address at 0 */ | 26 | /* return address at 0 */ |
@@ -219,11 +220,7 @@ | |||
219 | xor %esi, d ## D;\ | 220 | xor %esi, d ## D;\ |
220 | ror $1, d ## D; | 221 | ror $1, d ## D; |
221 | 222 | ||
222 | .align 4 | 223 | ENTRY(twofish_enc_blk) |
223 | .global twofish_enc_blk | ||
224 | .global twofish_dec_blk | ||
225 | |||
226 | twofish_enc_blk: | ||
227 | push %ebp /* save registers according to calling convention*/ | 224 | push %ebp /* save registers according to calling convention*/ |
228 | push %ebx | 225 | push %ebx |
229 | push %esi | 226 | push %esi |
@@ -277,8 +274,9 @@ twofish_enc_blk: | |||
277 | pop %ebp | 274 | pop %ebp |
278 | mov $1, %eax | 275 | mov $1, %eax |
279 | ret | 276 | ret |
277 | ENDPROC(twofish_enc_blk) | ||
280 | 278 | ||
281 | twofish_dec_blk: | 279 | ENTRY(twofish_dec_blk) |
282 | push %ebp /* save registers according to calling convention*/ | 280 | push %ebp /* save registers according to calling convention*/ |
283 | push %ebx | 281 | push %ebx |
284 | push %esi | 282 | push %esi |
@@ -333,3 +331,4 @@ twofish_dec_blk: | |||
333 | pop %ebp | 331 | pop %ebp |
334 | mov $1, %eax | 332 | mov $1, %eax |
335 | ret | 333 | ret |
334 | ENDPROC(twofish_dec_blk) | ||
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 5b012a2c5119..1c3b7ceb36d2 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S | |||
@@ -20,6 +20,8 @@ | |||
20 | * | 20 | * |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/linkage.h> | ||
24 | |||
23 | .file "twofish-x86_64-asm-3way.S" | 25 | .file "twofish-x86_64-asm-3way.S" |
24 | .text | 26 | .text |
25 | 27 | ||
@@ -214,11 +216,7 @@ | |||
214 | rorq $32, RAB2; \ | 216 | rorq $32, RAB2; \ |
215 | outunpack3(mov, RIO, 2, RAB, 2); | 217 | outunpack3(mov, RIO, 2, RAB, 2); |
216 | 218 | ||
217 | .align 8 | 219 | ENTRY(__twofish_enc_blk_3way) |
218 | .global __twofish_enc_blk_3way | ||
219 | .type __twofish_enc_blk_3way,@function; | ||
220 | |||
221 | __twofish_enc_blk_3way: | ||
222 | /* input: | 220 | /* input: |
223 | * %rdi: ctx, CTX | 221 | * %rdi: ctx, CTX |
224 | * %rsi: dst | 222 | * %rsi: dst |
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way: | |||
250 | popq %rbp; /* bool xor */ | 248 | popq %rbp; /* bool xor */ |
251 | 249 | ||
252 | testb %bpl, %bpl; | 250 | testb %bpl, %bpl; |
253 | jnz __enc_xor3; | 251 | jnz .L__enc_xor3; |
254 | 252 | ||
255 | outunpack_enc3(mov); | 253 | outunpack_enc3(mov); |
256 | 254 | ||
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way: | |||
262 | popq %r15; | 260 | popq %r15; |
263 | ret; | 261 | ret; |
264 | 262 | ||
265 | __enc_xor3: | 263 | .L__enc_xor3: |
266 | outunpack_enc3(xor); | 264 | outunpack_enc3(xor); |
267 | 265 | ||
268 | popq %rbx; | 266 | popq %rbx; |
@@ -272,11 +270,9 @@ __enc_xor3: | |||
272 | popq %r14; | 270 | popq %r14; |
273 | popq %r15; | 271 | popq %r15; |
274 | ret; | 272 | ret; |
273 | ENDPROC(__twofish_enc_blk_3way) | ||
275 | 274 | ||
276 | .global twofish_dec_blk_3way | 275 | ENTRY(twofish_dec_blk_3way) |
277 | .type twofish_dec_blk_3way,@function; | ||
278 | |||
279 | twofish_dec_blk_3way: | ||
280 | /* input: | 276 | /* input: |
281 | * %rdi: ctx, CTX | 277 | * %rdi: ctx, CTX |
282 | * %rsi: dst | 278 | * %rsi: dst |
@@ -313,4 +309,4 @@ twofish_dec_blk_3way: | |||
313 | popq %r14; | 309 | popq %r14; |
314 | popq %r15; | 310 | popq %r15; |
315 | ret; | 311 | ret; |
316 | 312 | ENDPROC(twofish_dec_blk_3way) | |
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fcc3668..a039d21986a2 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S | |||
@@ -20,6 +20,7 @@ | |||
20 | .file "twofish-x86_64-asm.S" | 20 | .file "twofish-x86_64-asm.S" |
21 | .text | 21 | .text |
22 | 22 | ||
23 | #include <linux/linkage.h> | ||
23 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
24 | 25 | ||
25 | #define a_offset 0 | 26 | #define a_offset 0 |
@@ -214,11 +215,7 @@ | |||
214 | xor %r8d, d ## D;\ | 215 | xor %r8d, d ## D;\ |
215 | ror $1, d ## D; | 216 | ror $1, d ## D; |
216 | 217 | ||
217 | .align 8 | 218 | ENTRY(twofish_enc_blk) |
218 | .global twofish_enc_blk | ||
219 | .global twofish_dec_blk | ||
220 | |||
221 | twofish_enc_blk: | ||
222 | pushq R1 | 219 | pushq R1 |
223 | 220 | ||
224 | /* %rdi contains the ctx address */ | 221 | /* %rdi contains the ctx address */ |
@@ -269,8 +266,9 @@ twofish_enc_blk: | |||
269 | popq R1 | 266 | popq R1 |
270 | movq $1,%rax | 267 | movq $1,%rax |
271 | ret | 268 | ret |
269 | ENDPROC(twofish_enc_blk) | ||
272 | 270 | ||
273 | twofish_dec_blk: | 271 | ENTRY(twofish_dec_blk) |
274 | pushq R1 | 272 | pushq R1 |
275 | 273 | ||
276 | /* %rdi contains the ctx address */ | 274 | /* %rdi contains the ctx address */ |
@@ -320,3 +318,4 @@ twofish_dec_blk: | |||
320 | popq R1 | 318 | popq R1 |
321 | movq $1,%rax | 319 | movq $1,%rax |
322 | ret | 320 | ret |
321 | ENDPROC(twofish_dec_blk) | ||