diff options
author | Mathias Krause <minipli@googlemail.com> | 2010-11-27 03:34:46 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2010-11-27 03:34:46 -0500 |
commit | 0d258efb6a58fe047197c3b9cff8746bb176d58a (patch) | |
tree | 8576f2af5212ec50509de1071cf7afe1ed9531a8 | |
parent | 21ea28abcf825729f9698afd7357dfbf7040d4f8 (diff) |
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit
architecture may profit from those, too.
To illustrate the performance gain here's a short summary of a dm-crypt
speed test on a Core i7 M620 running at 2.67GHz comparing both assembler
implementations:
x86: i568 aes-ni delta
ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4%
CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3%
LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5%
XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7%
Additionally, due to some minor optimizations, the 64-bit version also
got a minor performance gain as seen below:
x86-64: old impl. new impl. delta
ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5%
CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9%
LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6%
XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7%
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 197 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 22 | ||||
-rw-r--r-- | crypto/Kconfig | 12 |
3 files changed, 191 insertions, 40 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index aafced54df64..f592e03dc375 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -20,6 +20,9 @@ | |||
20 | * Wajdi Feghali (wajdi.k.feghali@intel.com) | 20 | * Wajdi Feghali (wajdi.k.feghali@intel.com) |
21 | * Copyright (c) 2010, Intel Corporation. | 21 | * Copyright (c) 2010, Intel Corporation. |
22 | * | 22 | * |
23 | * Ported x86_64 version to x86: | ||
24 | * Author: Mathias Krause <minipli@googlemail.com> | ||
25 | * | ||
23 | * This program is free software; you can redistribute it and/or modify | 26 | * This program is free software; you can redistribute it and/or modify |
24 | * it under the terms of the GNU General Public License as published by | 27 | * it under the terms of the GNU General Public License as published by |
25 | * the Free Software Foundation; either version 2 of the License, or | 28 | * the Free Software Foundation; either version 2 of the License, or |
@@ -95,12 +98,16 @@ enc: .octa 0x2 | |||
95 | #define IN IN1 | 98 | #define IN IN1 |
96 | #define KEY %xmm2 | 99 | #define KEY %xmm2 |
97 | #define IV %xmm3 | 100 | #define IV %xmm3 |
101 | |||
98 | #define BSWAP_MASK %xmm10 | 102 | #define BSWAP_MASK %xmm10 |
99 | #define CTR %xmm11 | 103 | #define CTR %xmm11 |
100 | #define INC %xmm12 | 104 | #define INC %xmm12 |
101 | 105 | ||
106 | #ifdef __x86_64__ | ||
107 | #define AREG %rax | ||
102 | #define KEYP %rdi | 108 | #define KEYP %rdi |
103 | #define OUTP %rsi | 109 | #define OUTP %rsi |
110 | #define UKEYP OUTP | ||
104 | #define INP %rdx | 111 | #define INP %rdx |
105 | #define LEN %rcx | 112 | #define LEN %rcx |
106 | #define IVP %r8 | 113 | #define IVP %r8 |
@@ -109,6 +116,18 @@ enc: .octa 0x2 | |||
109 | #define TKEYP T1 | 116 | #define TKEYP T1 |
110 | #define T2 %r11 | 117 | #define T2 %r11 |
111 | #define TCTR_LOW T2 | 118 | #define TCTR_LOW T2 |
119 | #else | ||
120 | #define AREG %eax | ||
121 | #define KEYP %edi | ||
122 | #define OUTP AREG | ||
123 | #define UKEYP OUTP | ||
124 | #define INP %edx | ||
125 | #define LEN %esi | ||
126 | #define IVP %ebp | ||
127 | #define KLEN %ebx | ||
128 | #define T1 %ecx | ||
129 | #define TKEYP T1 | ||
130 | #endif | ||
112 | 131 | ||
113 | 132 | ||
114 | /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | 133 | /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) |
@@ -1247,10 +1266,11 @@ _key_expansion_256a: | |||
1247 | shufps $0b10001100, %xmm0, %xmm4 | 1266 | shufps $0b10001100, %xmm0, %xmm4 |
1248 | pxor %xmm4, %xmm0 | 1267 | pxor %xmm4, %xmm0 |
1249 | pxor %xmm1, %xmm0 | 1268 | pxor %xmm1, %xmm0 |
1250 | movaps %xmm0, (%rcx) | 1269 | movaps %xmm0, (TKEYP) |
1251 | add $0x10, %rcx | 1270 | add $0x10, TKEYP |
1252 | ret | 1271 | ret |
1253 | 1272 | ||
1273 | .align 4 | ||
1254 | _key_expansion_192a: | 1274 | _key_expansion_192a: |
1255 | pshufd $0b01010101, %xmm1, %xmm1 | 1275 | pshufd $0b01010101, %xmm1, %xmm1 |
1256 | shufps $0b00010000, %xmm0, %xmm4 | 1276 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -1268,12 +1288,13 @@ _key_expansion_192a: | |||
1268 | 1288 | ||
1269 | movaps %xmm0, %xmm1 | 1289 | movaps %xmm0, %xmm1 |
1270 | shufps $0b01000100, %xmm0, %xmm6 | 1290 | shufps $0b01000100, %xmm0, %xmm6 |
1271 | movaps %xmm6, (%rcx) | 1291 | movaps %xmm6, (TKEYP) |
1272 | shufps $0b01001110, %xmm2, %xmm1 | 1292 | shufps $0b01001110, %xmm2, %xmm1 |
1273 | movaps %xmm1, 16(%rcx) | 1293 | movaps %xmm1, 0x10(TKEYP) |
1274 | add $0x20, %rcx | 1294 | add $0x20, TKEYP |
1275 | ret | 1295 | ret |
1276 | 1296 | ||
1297 | .align 4 | ||
1277 | _key_expansion_192b: | 1298 | _key_expansion_192b: |
1278 | pshufd $0b01010101, %xmm1, %xmm1 | 1299 | pshufd $0b01010101, %xmm1, %xmm1 |
1279 | shufps $0b00010000, %xmm0, %xmm4 | 1300 | shufps $0b00010000, %xmm0, %xmm4 |
@@ -1288,10 +1309,11 @@ _key_expansion_192b: | |||
1288 | pxor %xmm3, %xmm2 | 1309 | pxor %xmm3, %xmm2 |
1289 | pxor %xmm5, %xmm2 | 1310 | pxor %xmm5, %xmm2 |
1290 | 1311 | ||
1291 | movaps %xmm0, (%rcx) | 1312 | movaps %xmm0, (TKEYP) |
1292 | add $0x10, %rcx | 1313 | add $0x10, TKEYP |
1293 | ret | 1314 | ret |
1294 | 1315 | ||
1316 | .align 4 | ||
1295 | _key_expansion_256b: | 1317 | _key_expansion_256b: |
1296 | pshufd $0b10101010, %xmm1, %xmm1 | 1318 | pshufd $0b10101010, %xmm1, %xmm1 |
1297 | shufps $0b00010000, %xmm2, %xmm4 | 1319 | shufps $0b00010000, %xmm2, %xmm4 |
@@ -1299,8 +1321,8 @@ _key_expansion_256b: | |||
1299 | shufps $0b10001100, %xmm2, %xmm4 | 1321 | shufps $0b10001100, %xmm2, %xmm4 |
1300 | pxor %xmm4, %xmm2 | 1322 | pxor %xmm4, %xmm2 |
1301 | pxor %xmm1, %xmm2 | 1323 | pxor %xmm1, %xmm2 |
1302 | movaps %xmm2, (%rcx) | 1324 | movaps %xmm2, (TKEYP) |
1303 | add $0x10, %rcx | 1325 | add $0x10, TKEYP |
1304 | ret | 1326 | ret |
1305 | 1327 | ||
1306 | /* | 1328 | /* |
@@ -1308,17 +1330,23 @@ _key_expansion_256b: | |||
1308 | * unsigned int key_len) | 1330 | * unsigned int key_len) |
1309 | */ | 1331 | */ |
1310 | ENTRY(aesni_set_key) | 1332 | ENTRY(aesni_set_key) |
1311 | movups (%rsi), %xmm0 # user key (first 16 bytes) | 1333 | #ifndef __x86_64__ |
1312 | movaps %xmm0, (%rdi) | 1334 | pushl KEYP |
1313 | lea 0x10(%rdi), %rcx # key addr | 1335 | movl 8(%esp), KEYP # ctx |
1314 | movl %edx, 480(%rdi) | 1336 | movl 12(%esp), UKEYP # in_key |
1337 | movl 16(%esp), %edx # key_len | ||
1338 | #endif | ||
1339 | movups (UKEYP), %xmm0 # user key (first 16 bytes) | ||
1340 | movaps %xmm0, (KEYP) | ||
1341 | lea 0x10(KEYP), TKEYP # key addr | ||
1342 | movl %edx, 480(KEYP) | ||
1315 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x | 1343 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x |
1316 | cmp $24, %dl | 1344 | cmp $24, %dl |
1317 | jb .Lenc_key128 | 1345 | jb .Lenc_key128 |
1318 | je .Lenc_key192 | 1346 | je .Lenc_key192 |
1319 | movups 0x10(%rsi), %xmm2 # other user key | 1347 | movups 0x10(UKEYP), %xmm2 # other user key |
1320 | movaps %xmm2, (%rcx) | 1348 | movaps %xmm2, (TKEYP) |
1321 | add $0x10, %rcx | 1349 | add $0x10, TKEYP |
1322 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1350 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
1323 | call _key_expansion_256a | 1351 | call _key_expansion_256a |
1324 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 | 1352 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
@@ -1347,7 +1375,7 @@ ENTRY(aesni_set_key) | |||
1347 | call _key_expansion_256a | 1375 | call _key_expansion_256a |
1348 | jmp .Ldec_key | 1376 | jmp .Ldec_key |
1349 | .Lenc_key192: | 1377 | .Lenc_key192: |
1350 | movq 0x10(%rsi), %xmm2 # other user key | 1378 | movq 0x10(UKEYP), %xmm2 # other user key |
1351 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 | 1379 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
1352 | call _key_expansion_192a | 1380 | call _key_expansion_192a |
1353 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 | 1381 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
@@ -1387,33 +1415,47 @@ ENTRY(aesni_set_key) | |||
1387 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 | 1415 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
1388 | call _key_expansion_128 | 1416 | call _key_expansion_128 |
1389 | .Ldec_key: | 1417 | .Ldec_key: |
1390 | sub $0x10, %rcx | 1418 | sub $0x10, TKEYP |
1391 | movaps (%rdi), %xmm0 | 1419 | movaps (KEYP), %xmm0 |
1392 | movaps (%rcx), %xmm1 | 1420 | movaps (TKEYP), %xmm1 |
1393 | movaps %xmm0, 240(%rcx) | 1421 | movaps %xmm0, 240(TKEYP) |
1394 | movaps %xmm1, 240(%rdi) | 1422 | movaps %xmm1, 240(KEYP) |
1395 | add $0x10, %rdi | 1423 | add $0x10, KEYP |
1396 | lea 240-16(%rcx), %rsi | 1424 | lea 240-16(TKEYP), UKEYP |
1397 | .align 4 | 1425 | .align 4 |
1398 | .Ldec_key_loop: | 1426 | .Ldec_key_loop: |
1399 | movaps (%rdi), %xmm0 | 1427 | movaps (KEYP), %xmm0 |
1400 | AESIMC %xmm0 %xmm1 | 1428 | AESIMC %xmm0 %xmm1 |
1401 | movaps %xmm1, (%rsi) | 1429 | movaps %xmm1, (UKEYP) |
1402 | add $0x10, %rdi | 1430 | add $0x10, KEYP |
1403 | sub $0x10, %rsi | 1431 | sub $0x10, UKEYP |
1404 | cmp %rcx, %rdi | 1432 | cmp TKEYP, KEYP |
1405 | jb .Ldec_key_loop | 1433 | jb .Ldec_key_loop |
1406 | xor %rax, %rax | 1434 | xor AREG, AREG |
1435 | #ifndef __x86_64__ | ||
1436 | popl KEYP | ||
1437 | #endif | ||
1407 | ret | 1438 | ret |
1408 | 1439 | ||
1409 | /* | 1440 | /* |
1410 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1441 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
1411 | */ | 1442 | */ |
1412 | ENTRY(aesni_enc) | 1443 | ENTRY(aesni_enc) |
1444 | #ifndef __x86_64__ | ||
1445 | pushl KEYP | ||
1446 | pushl KLEN | ||
1447 | movl 12(%esp), KEYP | ||
1448 | movl 16(%esp), OUTP | ||
1449 | movl 20(%esp), INP | ||
1450 | #endif | ||
1413 | movl 480(KEYP), KLEN # key length | 1451 | movl 480(KEYP), KLEN # key length |
1414 | movups (INP), STATE # input | 1452 | movups (INP), STATE # input |
1415 | call _aesni_enc1 | 1453 | call _aesni_enc1 |
1416 | movups STATE, (OUTP) # output | 1454 | movups STATE, (OUTP) # output |
1455 | #ifndef __x86_64__ | ||
1456 | popl KLEN | ||
1457 | popl KEYP | ||
1458 | #endif | ||
1417 | ret | 1459 | ret |
1418 | 1460 | ||
1419 | /* | 1461 | /* |
@@ -1428,6 +1470,7 @@ ENTRY(aesni_enc) | |||
1428 | * KEY | 1470 | * KEY |
1429 | * TKEYP (T1) | 1471 | * TKEYP (T1) |
1430 | */ | 1472 | */ |
1473 | .align 4 | ||
1431 | _aesni_enc1: | 1474 | _aesni_enc1: |
1432 | movaps (KEYP), KEY # key | 1475 | movaps (KEYP), KEY # key |
1433 | mov KEYP, TKEYP | 1476 | mov KEYP, TKEYP |
@@ -1490,6 +1533,7 @@ _aesni_enc1: | |||
1490 | * KEY | 1533 | * KEY |
1491 | * TKEYP (T1) | 1534 | * TKEYP (T1) |
1492 | */ | 1535 | */ |
1536 | .align 4 | ||
1493 | _aesni_enc4: | 1537 | _aesni_enc4: |
1494 | movaps (KEYP), KEY # key | 1538 | movaps (KEYP), KEY # key |
1495 | mov KEYP, TKEYP | 1539 | mov KEYP, TKEYP |
@@ -1583,11 +1627,22 @@ _aesni_enc4: | |||
1583 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | 1627 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
1584 | */ | 1628 | */ |
1585 | ENTRY(aesni_dec) | 1629 | ENTRY(aesni_dec) |
1630 | #ifndef __x86_64__ | ||
1631 | pushl KEYP | ||
1632 | pushl KLEN | ||
1633 | movl 12(%esp), KEYP | ||
1634 | movl 16(%esp), OUTP | ||
1635 | movl 20(%esp), INP | ||
1636 | #endif | ||
1586 | mov 480(KEYP), KLEN # key length | 1637 | mov 480(KEYP), KLEN # key length |
1587 | add $240, KEYP | 1638 | add $240, KEYP |
1588 | movups (INP), STATE # input | 1639 | movups (INP), STATE # input |
1589 | call _aesni_dec1 | 1640 | call _aesni_dec1 |
1590 | movups STATE, (OUTP) #output | 1641 | movups STATE, (OUTP) #output |
1642 | #ifndef __x86_64__ | ||
1643 | popl KLEN | ||
1644 | popl KEYP | ||
1645 | #endif | ||
1591 | ret | 1646 | ret |
1592 | 1647 | ||
1593 | /* | 1648 | /* |
@@ -1602,6 +1657,7 @@ ENTRY(aesni_dec) | |||
1602 | * KEY | 1657 | * KEY |
1603 | * TKEYP (T1) | 1658 | * TKEYP (T1) |
1604 | */ | 1659 | */ |
1660 | .align 4 | ||
1605 | _aesni_dec1: | 1661 | _aesni_dec1: |
1606 | movaps (KEYP), KEY # key | 1662 | movaps (KEYP), KEY # key |
1607 | mov KEYP, TKEYP | 1663 | mov KEYP, TKEYP |
@@ -1664,6 +1720,7 @@ _aesni_dec1: | |||
1664 | * KEY | 1720 | * KEY |
1665 | * TKEYP (T1) | 1721 | * TKEYP (T1) |
1666 | */ | 1722 | */ |
1723 | .align 4 | ||
1667 | _aesni_dec4: | 1724 | _aesni_dec4: |
1668 | movaps (KEYP), KEY # key | 1725 | movaps (KEYP), KEY # key |
1669 | mov KEYP, TKEYP | 1726 | mov KEYP, TKEYP |
@@ -1758,6 +1815,15 @@ _aesni_dec4: | |||
1758 | * size_t len) | 1815 | * size_t len) |
1759 | */ | 1816 | */ |
1760 | ENTRY(aesni_ecb_enc) | 1817 | ENTRY(aesni_ecb_enc) |
1818 | #ifndef __x86_64__ | ||
1819 | pushl LEN | ||
1820 | pushl KEYP | ||
1821 | pushl KLEN | ||
1822 | movl 16(%esp), KEYP | ||
1823 | movl 20(%esp), OUTP | ||
1824 | movl 24(%esp), INP | ||
1825 | movl 28(%esp), LEN | ||
1826 | #endif | ||
1761 | test LEN, LEN # check length | 1827 | test LEN, LEN # check length |
1762 | jz .Lecb_enc_ret | 1828 | jz .Lecb_enc_ret |
1763 | mov 480(KEYP), KLEN | 1829 | mov 480(KEYP), KLEN |
@@ -1794,6 +1860,11 @@ ENTRY(aesni_ecb_enc) | |||
1794 | cmp $16, LEN | 1860 | cmp $16, LEN |
1795 | jge .Lecb_enc_loop1 | 1861 | jge .Lecb_enc_loop1 |
1796 | .Lecb_enc_ret: | 1862 | .Lecb_enc_ret: |
1863 | #ifndef __x86_64__ | ||
1864 | popl KLEN | ||
1865 | popl KEYP | ||
1866 | popl LEN | ||
1867 | #endif | ||
1797 | ret | 1868 | ret |
1798 | 1869 | ||
1799 | /* | 1870 | /* |
@@ -1801,6 +1872,15 @@ ENTRY(aesni_ecb_enc) | |||
1801 | * size_t len); | 1872 | * size_t len); |
1802 | */ | 1873 | */ |
1803 | ENTRY(aesni_ecb_dec) | 1874 | ENTRY(aesni_ecb_dec) |
1875 | #ifndef __x86_64__ | ||
1876 | pushl LEN | ||
1877 | pushl KEYP | ||
1878 | pushl KLEN | ||
1879 | movl 16(%esp), KEYP | ||
1880 | movl 20(%esp), OUTP | ||
1881 | movl 24(%esp), INP | ||
1882 | movl 28(%esp), LEN | ||
1883 | #endif | ||
1804 | test LEN, LEN | 1884 | test LEN, LEN |
1805 | jz .Lecb_dec_ret | 1885 | jz .Lecb_dec_ret |
1806 | mov 480(KEYP), KLEN | 1886 | mov 480(KEYP), KLEN |
@@ -1838,6 +1918,11 @@ ENTRY(aesni_ecb_dec) | |||
1838 | cmp $16, LEN | 1918 | cmp $16, LEN |
1839 | jge .Lecb_dec_loop1 | 1919 | jge .Lecb_dec_loop1 |
1840 | .Lecb_dec_ret: | 1920 | .Lecb_dec_ret: |
1921 | #ifndef __x86_64__ | ||
1922 | popl KLEN | ||
1923 | popl KEYP | ||
1924 | popl LEN | ||
1925 | #endif | ||
1841 | ret | 1926 | ret |
1842 | 1927 | ||
1843 | /* | 1928 | /* |
@@ -1845,6 +1930,17 @@ ENTRY(aesni_ecb_dec) | |||
1845 | * size_t len, u8 *iv) | 1930 | * size_t len, u8 *iv) |
1846 | */ | 1931 | */ |
1847 | ENTRY(aesni_cbc_enc) | 1932 | ENTRY(aesni_cbc_enc) |
1933 | #ifndef __x86_64__ | ||
1934 | pushl IVP | ||
1935 | pushl LEN | ||
1936 | pushl KEYP | ||
1937 | pushl KLEN | ||
1938 | movl 20(%esp), KEYP | ||
1939 | movl 24(%esp), OUTP | ||
1940 | movl 28(%esp), INP | ||
1941 | movl 32(%esp), LEN | ||
1942 | movl 36(%esp), IVP | ||
1943 | #endif | ||
1848 | cmp $16, LEN | 1944 | cmp $16, LEN |
1849 | jb .Lcbc_enc_ret | 1945 | jb .Lcbc_enc_ret |
1850 | mov 480(KEYP), KLEN | 1946 | mov 480(KEYP), KLEN |
@@ -1862,6 +1958,12 @@ ENTRY(aesni_cbc_enc) | |||
1862 | jge .Lcbc_enc_loop | 1958 | jge .Lcbc_enc_loop |
1863 | movups STATE, (IVP) | 1959 | movups STATE, (IVP) |
1864 | .Lcbc_enc_ret: | 1960 | .Lcbc_enc_ret: |
1961 | #ifndef __x86_64__ | ||
1962 | popl KLEN | ||
1963 | popl KEYP | ||
1964 | popl LEN | ||
1965 | popl IVP | ||
1966 | #endif | ||
1865 | ret | 1967 | ret |
1866 | 1968 | ||
1867 | /* | 1969 | /* |
@@ -1869,6 +1971,17 @@ ENTRY(aesni_cbc_enc) | |||
1869 | * size_t len, u8 *iv) | 1971 | * size_t len, u8 *iv) |
1870 | */ | 1972 | */ |
1871 | ENTRY(aesni_cbc_dec) | 1973 | ENTRY(aesni_cbc_dec) |
1974 | #ifndef __x86_64__ | ||
1975 | pushl IVP | ||
1976 | pushl LEN | ||
1977 | pushl KEYP | ||
1978 | pushl KLEN | ||
1979 | movl 20(%esp), KEYP | ||
1980 | movl 24(%esp), OUTP | ||
1981 | movl 28(%esp), INP | ||
1982 | movl 32(%esp), LEN | ||
1983 | movl 36(%esp), IVP | ||
1984 | #endif | ||
1872 | cmp $16, LEN | 1985 | cmp $16, LEN |
1873 | jb .Lcbc_dec_just_ret | 1986 | jb .Lcbc_dec_just_ret |
1874 | mov 480(KEYP), KLEN | 1987 | mov 480(KEYP), KLEN |
@@ -1882,16 +1995,30 @@ ENTRY(aesni_cbc_dec) | |||
1882 | movaps IN1, STATE1 | 1995 | movaps IN1, STATE1 |
1883 | movups 0x10(INP), IN2 | 1996 | movups 0x10(INP), IN2 |
1884 | movaps IN2, STATE2 | 1997 | movaps IN2, STATE2 |
1998 | #ifdef __x86_64__ | ||
1885 | movups 0x20(INP), IN3 | 1999 | movups 0x20(INP), IN3 |
1886 | movaps IN3, STATE3 | 2000 | movaps IN3, STATE3 |
1887 | movups 0x30(INP), IN4 | 2001 | movups 0x30(INP), IN4 |
1888 | movaps IN4, STATE4 | 2002 | movaps IN4, STATE4 |
2003 | #else | ||
2004 | movups 0x20(INP), IN1 | ||
2005 | movaps IN1, STATE3 | ||
2006 | movups 0x30(INP), IN2 | ||
2007 | movaps IN2, STATE4 | ||
2008 | #endif | ||
1889 | call _aesni_dec4 | 2009 | call _aesni_dec4 |
1890 | pxor IV, STATE1 | 2010 | pxor IV, STATE1 |
2011 | #ifdef __x86_64__ | ||
1891 | pxor IN1, STATE2 | 2012 | pxor IN1, STATE2 |
1892 | pxor IN2, STATE3 | 2013 | pxor IN2, STATE3 |
1893 | pxor IN3, STATE4 | 2014 | pxor IN3, STATE4 |
1894 | movaps IN4, IV | 2015 | movaps IN4, IV |
2016 | #else | ||
2017 | pxor (INP), STATE2 | ||
2018 | pxor 0x10(INP), STATE3 | ||
2019 | pxor IN1, STATE4 | ||
2020 | movaps IN2, IV | ||
2021 | #endif | ||
1895 | movups STATE1, (OUTP) | 2022 | movups STATE1, (OUTP) |
1896 | movups STATE2, 0x10(OUTP) | 2023 | movups STATE2, 0x10(OUTP) |
1897 | movups STATE3, 0x20(OUTP) | 2024 | movups STATE3, 0x20(OUTP) |
@@ -1919,8 +2046,15 @@ ENTRY(aesni_cbc_dec) | |||
1919 | .Lcbc_dec_ret: | 2046 | .Lcbc_dec_ret: |
1920 | movups IV, (IVP) | 2047 | movups IV, (IVP) |
1921 | .Lcbc_dec_just_ret: | 2048 | .Lcbc_dec_just_ret: |
2049 | #ifndef __x86_64__ | ||
2050 | popl KLEN | ||
2051 | popl KEYP | ||
2052 | popl LEN | ||
2053 | popl IVP | ||
2054 | #endif | ||
1922 | ret | 2055 | ret |
1923 | 2056 | ||
2057 | #ifdef __x86_64__ | ||
1924 | .align 16 | 2058 | .align 16 |
1925 | .Lbswap_mask: | 2059 | .Lbswap_mask: |
1926 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 2060 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
@@ -1936,6 +2070,7 @@ ENTRY(aesni_cbc_dec) | |||
1936 | * INC: == 1, in little endian | 2070 | * INC: == 1, in little endian |
1937 | * BSWAP_MASK == endian swapping mask | 2071 | * BSWAP_MASK == endian swapping mask |
1938 | */ | 2072 | */ |
2073 | .align 4 | ||
1939 | _aesni_inc_init: | 2074 | _aesni_inc_init: |
1940 | movaps .Lbswap_mask, BSWAP_MASK | 2075 | movaps .Lbswap_mask, BSWAP_MASK |
1941 | movaps IV, CTR | 2076 | movaps IV, CTR |
@@ -1960,6 +2095,7 @@ _aesni_inc_init: | |||
1960 | * CTR: == output IV, in little endian | 2095 | * CTR: == output IV, in little endian |
1961 | * TCTR_LOW: == lower qword of CTR | 2096 | * TCTR_LOW: == lower qword of CTR |
1962 | */ | 2097 | */ |
2098 | .align 4 | ||
1963 | _aesni_inc: | 2099 | _aesni_inc: |
1964 | paddq INC, CTR | 2100 | paddq INC, CTR |
1965 | add $1, TCTR_LOW | 2101 | add $1, TCTR_LOW |
@@ -2031,3 +2167,4 @@ ENTRY(aesni_ctr_enc) | |||
2031 | movups IV, (IVP) | 2167 | movups IV, (IVP) |
2032 | .Lctr_enc_just_ret: | 2168 | .Lctr_enc_just_ret: |
2033 | ret | 2169 | ret |
2170 | #endif | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 02d349d64423..8a3b80075216 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -94,8 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
94 | const u8 *in, unsigned int len, u8 *iv); | 94 | const u8 *in, unsigned int len, u8 *iv); |
95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 95 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
96 | const u8 *in, unsigned int len, u8 *iv); | 96 | const u8 *in, unsigned int len, u8 *iv); |
97 | #ifdef CONFIG_X86_64 | ||
97 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 98 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
98 | const u8 *in, unsigned int len, u8 *iv); | 99 | const u8 *in, unsigned int len, u8 *iv); |
100 | #endif | ||
99 | 101 | ||
100 | /* asmlinkage void aesni_gcm_enc() | 102 | /* asmlinkage void aesni_gcm_enc() |
101 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. | 103 | * void *ctx, AES Key schedule. Starts on a 16 byte boundary. |
@@ -410,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = { | |||
410 | }, | 412 | }, |
411 | }; | 413 | }; |
412 | 414 | ||
415 | #ifdef CONFIG_X86_64 | ||
413 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | 416 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, |
414 | struct blkcipher_walk *walk) | 417 | struct blkcipher_walk *walk) |
415 | { | 418 | { |
@@ -475,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = { | |||
475 | }, | 478 | }, |
476 | }, | 479 | }, |
477 | }; | 480 | }; |
481 | #endif | ||
478 | 482 | ||
479 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 483 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
480 | unsigned int key_len) | 484 | unsigned int key_len) |
@@ -622,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = { | |||
622 | }, | 626 | }, |
623 | }; | 627 | }; |
624 | 628 | ||
629 | #ifdef CONFIG_X86_64 | ||
625 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 630 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
626 | { | 631 | { |
627 | struct cryptd_ablkcipher *cryptd_tfm; | 632 | struct cryptd_ablkcipher *cryptd_tfm; |
@@ -698,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { | |||
698 | }, | 703 | }, |
699 | }; | 704 | }; |
700 | #endif | 705 | #endif |
706 | #endif | ||
701 | 707 | ||
702 | #ifdef HAS_LRW | 708 | #ifdef HAS_LRW |
703 | static int ablk_lrw_init(struct crypto_tfm *tfm) | 709 | static int ablk_lrw_init(struct crypto_tfm *tfm) |
@@ -1249,18 +1255,20 @@ static int __init aesni_init(void) | |||
1249 | goto blk_ecb_err; | 1255 | goto blk_ecb_err; |
1250 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 1256 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
1251 | goto blk_cbc_err; | 1257 | goto blk_cbc_err; |
1252 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1253 | goto blk_ctr_err; | ||
1254 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 1258 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
1255 | goto ablk_ecb_err; | 1259 | goto ablk_ecb_err; |
1256 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 1260 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
1257 | goto ablk_cbc_err; | 1261 | goto ablk_cbc_err; |
1262 | #ifdef CONFIG_X86_64 | ||
1263 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1264 | goto blk_ctr_err; | ||
1258 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 1265 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
1259 | goto ablk_ctr_err; | 1266 | goto ablk_ctr_err; |
1260 | #ifdef HAS_CTR | 1267 | #ifdef HAS_CTR |
1261 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | 1268 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) |
1262 | goto ablk_rfc3686_ctr_err; | 1269 | goto ablk_rfc3686_ctr_err; |
1263 | #endif | 1270 | #endif |
1271 | #endif | ||
1264 | #ifdef HAS_LRW | 1272 | #ifdef HAS_LRW |
1265 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 1273 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
1266 | goto ablk_lrw_err; | 1274 | goto ablk_lrw_err; |
@@ -1296,18 +1304,20 @@ ablk_pcbc_err: | |||
1296 | crypto_unregister_alg(&ablk_lrw_alg); | 1304 | crypto_unregister_alg(&ablk_lrw_alg); |
1297 | ablk_lrw_err: | 1305 | ablk_lrw_err: |
1298 | #endif | 1306 | #endif |
1307 | #ifdef CONFIG_X86_64 | ||
1299 | #ifdef HAS_CTR | 1308 | #ifdef HAS_CTR |
1300 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1309 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
1301 | ablk_rfc3686_ctr_err: | 1310 | ablk_rfc3686_ctr_err: |
1302 | #endif | 1311 | #endif |
1303 | crypto_unregister_alg(&ablk_ctr_alg); | 1312 | crypto_unregister_alg(&ablk_ctr_alg); |
1304 | ablk_ctr_err: | 1313 | ablk_ctr_err: |
1314 | crypto_unregister_alg(&blk_ctr_alg); | ||
1315 | blk_ctr_err: | ||
1316 | #endif | ||
1305 | crypto_unregister_alg(&ablk_cbc_alg); | 1317 | crypto_unregister_alg(&ablk_cbc_alg); |
1306 | ablk_cbc_err: | 1318 | ablk_cbc_err: |
1307 | crypto_unregister_alg(&ablk_ecb_alg); | 1319 | crypto_unregister_alg(&ablk_ecb_alg); |
1308 | ablk_ecb_err: | 1320 | ablk_ecb_err: |
1309 | crypto_unregister_alg(&blk_ctr_alg); | ||
1310 | blk_ctr_err: | ||
1311 | crypto_unregister_alg(&blk_cbc_alg); | 1321 | crypto_unregister_alg(&blk_cbc_alg); |
1312 | blk_cbc_err: | 1322 | blk_cbc_err: |
1313 | crypto_unregister_alg(&blk_ecb_alg); | 1323 | crypto_unregister_alg(&blk_ecb_alg); |
@@ -1332,13 +1342,15 @@ static void __exit aesni_exit(void) | |||
1332 | #ifdef HAS_LRW | 1342 | #ifdef HAS_LRW |
1333 | crypto_unregister_alg(&ablk_lrw_alg); | 1343 | crypto_unregister_alg(&ablk_lrw_alg); |
1334 | #endif | 1344 | #endif |
1345 | #ifdef CONFIG_X86_64 | ||
1335 | #ifdef HAS_CTR | 1346 | #ifdef HAS_CTR |
1336 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | 1347 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
1337 | #endif | 1348 | #endif |
1338 | crypto_unregister_alg(&ablk_ctr_alg); | 1349 | crypto_unregister_alg(&ablk_ctr_alg); |
1350 | crypto_unregister_alg(&blk_ctr_alg); | ||
1351 | #endif | ||
1339 | crypto_unregister_alg(&ablk_cbc_alg); | 1352 | crypto_unregister_alg(&ablk_cbc_alg); |
1340 | crypto_unregister_alg(&ablk_ecb_alg); | 1353 | crypto_unregister_alg(&ablk_ecb_alg); |
1341 | crypto_unregister_alg(&blk_ctr_alg); | ||
1342 | crypto_unregister_alg(&blk_cbc_alg); | 1354 | crypto_unregister_alg(&blk_cbc_alg); |
1343 | crypto_unregister_alg(&blk_ecb_alg); | 1355 | crypto_unregister_alg(&blk_ecb_alg); |
1344 | crypto_unregister_alg(&__aesni_alg); | 1356 | crypto_unregister_alg(&__aesni_alg); |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 69437e21217f..467491df3e3a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -539,8 +539,9 @@ config CRYPTO_AES_X86_64 | |||
539 | 539 | ||
540 | config CRYPTO_AES_NI_INTEL | 540 | config CRYPTO_AES_NI_INTEL |
541 | tristate "AES cipher algorithms (AES-NI)" | 541 | tristate "AES cipher algorithms (AES-NI)" |
542 | depends on (X86 || UML_X86) && 64BIT | 542 | depends on (X86 || UML_X86) |
543 | select CRYPTO_AES_X86_64 | 543 | select CRYPTO_AES_X86_64 if 64BIT |
544 | select CRYPTO_AES_586 if !64BIT | ||
544 | select CRYPTO_CRYPTD | 545 | select CRYPTO_CRYPTD |
545 | select CRYPTO_ALGAPI | 546 | select CRYPTO_ALGAPI |
546 | select CRYPTO_FPU | 547 | select CRYPTO_FPU |
@@ -563,9 +564,10 @@ config CRYPTO_AES_NI_INTEL | |||
563 | 564 | ||
564 | See <http://csrc.nist.gov/encryption/aes/> for more information. | 565 | See <http://csrc.nist.gov/encryption/aes/> for more information. |
565 | 566 | ||
566 | In addition to AES cipher algorithm support, the | 567 | In addition to AES cipher algorithm support, the acceleration |
567 | acceleration for some popular block cipher mode is supported | 568 | for some popular block cipher mode is supported too, including |
568 | too, including ECB, CBC, CTR, LRW, PCBC, XTS. | 569 | ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional |
570 | acceleration for CTR. | ||
569 | 571 | ||
570 | config CRYPTO_ANUBIS | 572 | config CRYPTO_ANUBIS |
571 | tristate "Anubis cipher algorithm" | 573 | tristate "Anubis cipher algorithm" |