aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathias Krause <minipli@googlemail.com>2010-11-27 03:34:46 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2010-11-27 03:34:46 -0500
commit0d258efb6a58fe047197c3b9cff8746bb176d58a (patch)
tree8576f2af5212ec50509de1071cf7afe1ed9531a8
parent21ea28abcf825729f9698afd7357dfbf7040d4f8 (diff)
crypto: aesni-intel - Ported implementation to x86-32
The AES-NI instructions are also available in legacy mode so the 32-bit architecture may profit from those, too. To illustrate the performance gain here's a short summary of a dm-crypt speed test on a Core i7 M620 running at 2.67GHz comparing both assembler implementations: x86: i568 aes-ni delta ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4% CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3% LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5% XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7% Additionally, due to some minor optimizations, the 64-bit version also got a minor performance gain as seen below: x86-64: old impl. new impl. delta ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5% CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9% LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6% XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7% Signed-off-by: Mathias Krause <minipli@googlemail.com> Reviewed-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S197
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c22
-rw-r--r--crypto/Kconfig12
3 files changed, 191 insertions, 40 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index aafced54df64..f592e03dc375 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -20,6 +20,9 @@
20 * Wajdi Feghali (wajdi.k.feghali@intel.com) 20 * Wajdi Feghali (wajdi.k.feghali@intel.com)
21 * Copyright (c) 2010, Intel Corporation. 21 * Copyright (c) 2010, Intel Corporation.
22 * 22 *
23 * Ported x86_64 version to x86:
24 * Author: Mathias Krause <minipli@googlemail.com>
25 *
23 * This program is free software; you can redistribute it and/or modify 26 * This program is free software; you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License as published by 27 * it under the terms of the GNU General Public License as published by
25 * the Free Software Foundation; either version 2 of the License, or 28 * the Free Software Foundation; either version 2 of the License, or
@@ -95,12 +98,16 @@ enc: .octa 0x2
95#define IN IN1 98#define IN IN1
96#define KEY %xmm2 99#define KEY %xmm2
97#define IV %xmm3 100#define IV %xmm3
101
98#define BSWAP_MASK %xmm10 102#define BSWAP_MASK %xmm10
99#define CTR %xmm11 103#define CTR %xmm11
100#define INC %xmm12 104#define INC %xmm12
101 105
106#ifdef __x86_64__
107#define AREG %rax
102#define KEYP %rdi 108#define KEYP %rdi
103#define OUTP %rsi 109#define OUTP %rsi
110#define UKEYP OUTP
104#define INP %rdx 111#define INP %rdx
105#define LEN %rcx 112#define LEN %rcx
106#define IVP %r8 113#define IVP %r8
@@ -109,6 +116,18 @@ enc: .octa 0x2
109#define TKEYP T1 116#define TKEYP T1
110#define T2 %r11 117#define T2 %r11
111#define TCTR_LOW T2 118#define TCTR_LOW T2
119#else
120#define AREG %eax
121#define KEYP %edi
122#define OUTP AREG
123#define UKEYP OUTP
124#define INP %edx
125#define LEN %esi
126#define IVP %ebp
127#define KLEN %ebx
128#define T1 %ecx
129#define TKEYP T1
130#endif
112 131
113 132
114/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) 133/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -1247,10 +1266,11 @@ _key_expansion_256a:
1247 shufps $0b10001100, %xmm0, %xmm4 1266 shufps $0b10001100, %xmm0, %xmm4
1248 pxor %xmm4, %xmm0 1267 pxor %xmm4, %xmm0
1249 pxor %xmm1, %xmm0 1268 pxor %xmm1, %xmm0
1250 movaps %xmm0, (%rcx) 1269 movaps %xmm0, (TKEYP)
1251 add $0x10, %rcx 1270 add $0x10, TKEYP
1252 ret 1271 ret
1253 1272
1273.align 4
1254_key_expansion_192a: 1274_key_expansion_192a:
1255 pshufd $0b01010101, %xmm1, %xmm1 1275 pshufd $0b01010101, %xmm1, %xmm1
1256 shufps $0b00010000, %xmm0, %xmm4 1276 shufps $0b00010000, %xmm0, %xmm4
@@ -1268,12 +1288,13 @@ _key_expansion_192a:
1268 1288
1269 movaps %xmm0, %xmm1 1289 movaps %xmm0, %xmm1
1270 shufps $0b01000100, %xmm0, %xmm6 1290 shufps $0b01000100, %xmm0, %xmm6
1271 movaps %xmm6, (%rcx) 1291 movaps %xmm6, (TKEYP)
1272 shufps $0b01001110, %xmm2, %xmm1 1292 shufps $0b01001110, %xmm2, %xmm1
1273 movaps %xmm1, 16(%rcx) 1293 movaps %xmm1, 0x10(TKEYP)
1274 add $0x20, %rcx 1294 add $0x20, TKEYP
1275 ret 1295 ret
1276 1296
1297.align 4
1277_key_expansion_192b: 1298_key_expansion_192b:
1278 pshufd $0b01010101, %xmm1, %xmm1 1299 pshufd $0b01010101, %xmm1, %xmm1
1279 shufps $0b00010000, %xmm0, %xmm4 1300 shufps $0b00010000, %xmm0, %xmm4
@@ -1288,10 +1309,11 @@ _key_expansion_192b:
1288 pxor %xmm3, %xmm2 1309 pxor %xmm3, %xmm2
1289 pxor %xmm5, %xmm2 1310 pxor %xmm5, %xmm2
1290 1311
1291 movaps %xmm0, (%rcx) 1312 movaps %xmm0, (TKEYP)
1292 add $0x10, %rcx 1313 add $0x10, TKEYP
1293 ret 1314 ret
1294 1315
1316.align 4
1295_key_expansion_256b: 1317_key_expansion_256b:
1296 pshufd $0b10101010, %xmm1, %xmm1 1318 pshufd $0b10101010, %xmm1, %xmm1
1297 shufps $0b00010000, %xmm2, %xmm4 1319 shufps $0b00010000, %xmm2, %xmm4
@@ -1299,8 +1321,8 @@ _key_expansion_256b:
1299 shufps $0b10001100, %xmm2, %xmm4 1321 shufps $0b10001100, %xmm2, %xmm4
1300 pxor %xmm4, %xmm2 1322 pxor %xmm4, %xmm2
1301 pxor %xmm1, %xmm2 1323 pxor %xmm1, %xmm2
1302 movaps %xmm2, (%rcx) 1324 movaps %xmm2, (TKEYP)
1303 add $0x10, %rcx 1325 add $0x10, TKEYP
1304 ret 1326 ret
1305 1327
1306/* 1328/*
@@ -1308,17 +1330,23 @@ _key_expansion_256b:
1308 * unsigned int key_len) 1330 * unsigned int key_len)
1309 */ 1331 */
1310ENTRY(aesni_set_key) 1332ENTRY(aesni_set_key)
1311 movups (%rsi), %xmm0 # user key (first 16 bytes) 1333#ifndef __x86_64__
1312 movaps %xmm0, (%rdi) 1334 pushl KEYP
1313 lea 0x10(%rdi), %rcx # key addr 1335 movl 8(%esp), KEYP # ctx
1314 movl %edx, 480(%rdi) 1336 movl 12(%esp), UKEYP # in_key
1337 movl 16(%esp), %edx # key_len
1338#endif
1339 movups (UKEYP), %xmm0 # user key (first 16 bytes)
1340 movaps %xmm0, (KEYP)
1341 lea 0x10(KEYP), TKEYP # key addr
1342 movl %edx, 480(KEYP)
1315 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x 1343 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
1316 cmp $24, %dl 1344 cmp $24, %dl
1317 jb .Lenc_key128 1345 jb .Lenc_key128
1318 je .Lenc_key192 1346 je .Lenc_key192
1319 movups 0x10(%rsi), %xmm2 # other user key 1347 movups 0x10(UKEYP), %xmm2 # other user key
1320 movaps %xmm2, (%rcx) 1348 movaps %xmm2, (TKEYP)
1321 add $0x10, %rcx 1349 add $0x10, TKEYP
1322 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 1350 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
1323 call _key_expansion_256a 1351 call _key_expansion_256a
1324 AESKEYGENASSIST 0x1 %xmm0 %xmm1 1352 AESKEYGENASSIST 0x1 %xmm0 %xmm1
@@ -1347,7 +1375,7 @@ ENTRY(aesni_set_key)
1347 call _key_expansion_256a 1375 call _key_expansion_256a
1348 jmp .Ldec_key 1376 jmp .Ldec_key
1349.Lenc_key192: 1377.Lenc_key192:
1350 movq 0x10(%rsi), %xmm2 # other user key 1378 movq 0x10(UKEYP), %xmm2 # other user key
1351 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 1379 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
1352 call _key_expansion_192a 1380 call _key_expansion_192a
1353 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 1381 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
@@ -1387,33 +1415,47 @@ ENTRY(aesni_set_key)
1387 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 1415 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
1388 call _key_expansion_128 1416 call _key_expansion_128
1389.Ldec_key: 1417.Ldec_key:
1390 sub $0x10, %rcx 1418 sub $0x10, TKEYP
1391 movaps (%rdi), %xmm0 1419 movaps (KEYP), %xmm0
1392 movaps (%rcx), %xmm1 1420 movaps (TKEYP), %xmm1
1393 movaps %xmm0, 240(%rcx) 1421 movaps %xmm0, 240(TKEYP)
1394 movaps %xmm1, 240(%rdi) 1422 movaps %xmm1, 240(KEYP)
1395 add $0x10, %rdi 1423 add $0x10, KEYP
1396 lea 240-16(%rcx), %rsi 1424 lea 240-16(TKEYP), UKEYP
1397.align 4 1425.align 4
1398.Ldec_key_loop: 1426.Ldec_key_loop:
1399 movaps (%rdi), %xmm0 1427 movaps (KEYP), %xmm0
1400 AESIMC %xmm0 %xmm1 1428 AESIMC %xmm0 %xmm1
1401 movaps %xmm1, (%rsi) 1429 movaps %xmm1, (UKEYP)
1402 add $0x10, %rdi 1430 add $0x10, KEYP
1403 sub $0x10, %rsi 1431 sub $0x10, UKEYP
1404 cmp %rcx, %rdi 1432 cmp TKEYP, KEYP
1405 jb .Ldec_key_loop 1433 jb .Ldec_key_loop
1406 xor %rax, %rax 1434 xor AREG, AREG
1435#ifndef __x86_64__
1436 popl KEYP
1437#endif
1407 ret 1438 ret
1408 1439
1409/* 1440/*
1410 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1441 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
1411 */ 1442 */
1412ENTRY(aesni_enc) 1443ENTRY(aesni_enc)
1444#ifndef __x86_64__
1445 pushl KEYP
1446 pushl KLEN
1447 movl 12(%esp), KEYP
1448 movl 16(%esp), OUTP
1449 movl 20(%esp), INP
1450#endif
1413 movl 480(KEYP), KLEN # key length 1451 movl 480(KEYP), KLEN # key length
1414 movups (INP), STATE # input 1452 movups (INP), STATE # input
1415 call _aesni_enc1 1453 call _aesni_enc1
1416 movups STATE, (OUTP) # output 1454 movups STATE, (OUTP) # output
1455#ifndef __x86_64__
1456 popl KLEN
1457 popl KEYP
1458#endif
1417 ret 1459 ret
1418 1460
1419/* 1461/*
@@ -1428,6 +1470,7 @@ ENTRY(aesni_enc)
1428 * KEY 1470 * KEY
1429 * TKEYP (T1) 1471 * TKEYP (T1)
1430 */ 1472 */
1473.align 4
1431_aesni_enc1: 1474_aesni_enc1:
1432 movaps (KEYP), KEY # key 1475 movaps (KEYP), KEY # key
1433 mov KEYP, TKEYP 1476 mov KEYP, TKEYP
@@ -1490,6 +1533,7 @@ _aesni_enc1:
1490 * KEY 1533 * KEY
1491 * TKEYP (T1) 1534 * TKEYP (T1)
1492 */ 1535 */
1536.align 4
1493_aesni_enc4: 1537_aesni_enc4:
1494 movaps (KEYP), KEY # key 1538 movaps (KEYP), KEY # key
1495 mov KEYP, TKEYP 1539 mov KEYP, TKEYP
@@ -1583,11 +1627,22 @@ _aesni_enc4:
1583 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) 1627 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
1584 */ 1628 */
1585ENTRY(aesni_dec) 1629ENTRY(aesni_dec)
1630#ifndef __x86_64__
1631 pushl KEYP
1632 pushl KLEN
1633 movl 12(%esp), KEYP
1634 movl 16(%esp), OUTP
1635 movl 20(%esp), INP
1636#endif
1586 mov 480(KEYP), KLEN # key length 1637 mov 480(KEYP), KLEN # key length
1587 add $240, KEYP 1638 add $240, KEYP
1588 movups (INP), STATE # input 1639 movups (INP), STATE # input
1589 call _aesni_dec1 1640 call _aesni_dec1
1590 movups STATE, (OUTP) #output 1641 movups STATE, (OUTP) #output
1642#ifndef __x86_64__
1643 popl KLEN
1644 popl KEYP
1645#endif
1591 ret 1646 ret
1592 1647
1593/* 1648/*
@@ -1602,6 +1657,7 @@ ENTRY(aesni_dec)
1602 * KEY 1657 * KEY
1603 * TKEYP (T1) 1658 * TKEYP (T1)
1604 */ 1659 */
1660.align 4
1605_aesni_dec1: 1661_aesni_dec1:
1606 movaps (KEYP), KEY # key 1662 movaps (KEYP), KEY # key
1607 mov KEYP, TKEYP 1663 mov KEYP, TKEYP
@@ -1664,6 +1720,7 @@ _aesni_dec1:
1664 * KEY 1720 * KEY
1665 * TKEYP (T1) 1721 * TKEYP (T1)
1666 */ 1722 */
1723.align 4
1667_aesni_dec4: 1724_aesni_dec4:
1668 movaps (KEYP), KEY # key 1725 movaps (KEYP), KEY # key
1669 mov KEYP, TKEYP 1726 mov KEYP, TKEYP
@@ -1758,6 +1815,15 @@ _aesni_dec4:
1758 * size_t len) 1815 * size_t len)
1759 */ 1816 */
1760ENTRY(aesni_ecb_enc) 1817ENTRY(aesni_ecb_enc)
1818#ifndef __x86_64__
1819 pushl LEN
1820 pushl KEYP
1821 pushl KLEN
1822 movl 16(%esp), KEYP
1823 movl 20(%esp), OUTP
1824 movl 24(%esp), INP
1825 movl 28(%esp), LEN
1826#endif
1761 test LEN, LEN # check length 1827 test LEN, LEN # check length
1762 jz .Lecb_enc_ret 1828 jz .Lecb_enc_ret
1763 mov 480(KEYP), KLEN 1829 mov 480(KEYP), KLEN
@@ -1794,6 +1860,11 @@ ENTRY(aesni_ecb_enc)
1794 cmp $16, LEN 1860 cmp $16, LEN
1795 jge .Lecb_enc_loop1 1861 jge .Lecb_enc_loop1
1796.Lecb_enc_ret: 1862.Lecb_enc_ret:
1863#ifndef __x86_64__
1864 popl KLEN
1865 popl KEYP
1866 popl LEN
1867#endif
1797 ret 1868 ret
1798 1869
1799/* 1870/*
@@ -1801,6 +1872,15 @@ ENTRY(aesni_ecb_enc)
1801 * size_t len); 1872 * size_t len);
1802 */ 1873 */
1803ENTRY(aesni_ecb_dec) 1874ENTRY(aesni_ecb_dec)
1875#ifndef __x86_64__
1876 pushl LEN
1877 pushl KEYP
1878 pushl KLEN
1879 movl 16(%esp), KEYP
1880 movl 20(%esp), OUTP
1881 movl 24(%esp), INP
1882 movl 28(%esp), LEN
1883#endif
1804 test LEN, LEN 1884 test LEN, LEN
1805 jz .Lecb_dec_ret 1885 jz .Lecb_dec_ret
1806 mov 480(KEYP), KLEN 1886 mov 480(KEYP), KLEN
@@ -1838,6 +1918,11 @@ ENTRY(aesni_ecb_dec)
1838 cmp $16, LEN 1918 cmp $16, LEN
1839 jge .Lecb_dec_loop1 1919 jge .Lecb_dec_loop1
1840.Lecb_dec_ret: 1920.Lecb_dec_ret:
1921#ifndef __x86_64__
1922 popl KLEN
1923 popl KEYP
1924 popl LEN
1925#endif
1841 ret 1926 ret
1842 1927
1843/* 1928/*
@@ -1845,6 +1930,17 @@ ENTRY(aesni_ecb_dec)
1845 * size_t len, u8 *iv) 1930 * size_t len, u8 *iv)
1846 */ 1931 */
1847ENTRY(aesni_cbc_enc) 1932ENTRY(aesni_cbc_enc)
1933#ifndef __x86_64__
1934 pushl IVP
1935 pushl LEN
1936 pushl KEYP
1937 pushl KLEN
1938 movl 20(%esp), KEYP
1939 movl 24(%esp), OUTP
1940 movl 28(%esp), INP
1941 movl 32(%esp), LEN
1942 movl 36(%esp), IVP
1943#endif
1848 cmp $16, LEN 1944 cmp $16, LEN
1849 jb .Lcbc_enc_ret 1945 jb .Lcbc_enc_ret
1850 mov 480(KEYP), KLEN 1946 mov 480(KEYP), KLEN
@@ -1862,6 +1958,12 @@ ENTRY(aesni_cbc_enc)
1862 jge .Lcbc_enc_loop 1958 jge .Lcbc_enc_loop
1863 movups STATE, (IVP) 1959 movups STATE, (IVP)
1864.Lcbc_enc_ret: 1960.Lcbc_enc_ret:
1961#ifndef __x86_64__
1962 popl KLEN
1963 popl KEYP
1964 popl LEN
1965 popl IVP
1966#endif
1865 ret 1967 ret
1866 1968
1867/* 1969/*
@@ -1869,6 +1971,17 @@ ENTRY(aesni_cbc_enc)
1869 * size_t len, u8 *iv) 1971 * size_t len, u8 *iv)
1870 */ 1972 */
1871ENTRY(aesni_cbc_dec) 1973ENTRY(aesni_cbc_dec)
1974#ifndef __x86_64__
1975 pushl IVP
1976 pushl LEN
1977 pushl KEYP
1978 pushl KLEN
1979 movl 20(%esp), KEYP
1980 movl 24(%esp), OUTP
1981 movl 28(%esp), INP
1982 movl 32(%esp), LEN
1983 movl 36(%esp), IVP
1984#endif
1872 cmp $16, LEN 1985 cmp $16, LEN
1873 jb .Lcbc_dec_just_ret 1986 jb .Lcbc_dec_just_ret
1874 mov 480(KEYP), KLEN 1987 mov 480(KEYP), KLEN
@@ -1882,16 +1995,30 @@ ENTRY(aesni_cbc_dec)
1882 movaps IN1, STATE1 1995 movaps IN1, STATE1
1883 movups 0x10(INP), IN2 1996 movups 0x10(INP), IN2
1884 movaps IN2, STATE2 1997 movaps IN2, STATE2
1998#ifdef __x86_64__
1885 movups 0x20(INP), IN3 1999 movups 0x20(INP), IN3
1886 movaps IN3, STATE3 2000 movaps IN3, STATE3
1887 movups 0x30(INP), IN4 2001 movups 0x30(INP), IN4
1888 movaps IN4, STATE4 2002 movaps IN4, STATE4
2003#else
2004 movups 0x20(INP), IN1
2005 movaps IN1, STATE3
2006 movups 0x30(INP), IN2
2007 movaps IN2, STATE4
2008#endif
1889 call _aesni_dec4 2009 call _aesni_dec4
1890 pxor IV, STATE1 2010 pxor IV, STATE1
2011#ifdef __x86_64__
1891 pxor IN1, STATE2 2012 pxor IN1, STATE2
1892 pxor IN2, STATE3 2013 pxor IN2, STATE3
1893 pxor IN3, STATE4 2014 pxor IN3, STATE4
1894 movaps IN4, IV 2015 movaps IN4, IV
2016#else
2017 pxor (INP), STATE2
2018 pxor 0x10(INP), STATE3
2019 pxor IN1, STATE4
2020 movaps IN2, IV
2021#endif
1895 movups STATE1, (OUTP) 2022 movups STATE1, (OUTP)
1896 movups STATE2, 0x10(OUTP) 2023 movups STATE2, 0x10(OUTP)
1897 movups STATE3, 0x20(OUTP) 2024 movups STATE3, 0x20(OUTP)
@@ -1919,8 +2046,15 @@ ENTRY(aesni_cbc_dec)
1919.Lcbc_dec_ret: 2046.Lcbc_dec_ret:
1920 movups IV, (IVP) 2047 movups IV, (IVP)
1921.Lcbc_dec_just_ret: 2048.Lcbc_dec_just_ret:
2049#ifndef __x86_64__
2050 popl KLEN
2051 popl KEYP
2052 popl LEN
2053 popl IVP
2054#endif
1922 ret 2055 ret
1923 2056
2057#ifdef __x86_64__
1924.align 16 2058.align 16
1925.Lbswap_mask: 2059.Lbswap_mask:
1926 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 2060 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
@@ -1936,6 +2070,7 @@ ENTRY(aesni_cbc_dec)
1936 * INC: == 1, in little endian 2070 * INC: == 1, in little endian
1937 * BSWAP_MASK == endian swapping mask 2071 * BSWAP_MASK == endian swapping mask
1938 */ 2072 */
2073.align 4
1939_aesni_inc_init: 2074_aesni_inc_init:
1940 movaps .Lbswap_mask, BSWAP_MASK 2075 movaps .Lbswap_mask, BSWAP_MASK
1941 movaps IV, CTR 2076 movaps IV, CTR
@@ -1960,6 +2095,7 @@ _aesni_inc_init:
1960 * CTR: == output IV, in little endian 2095 * CTR: == output IV, in little endian
1961 * TCTR_LOW: == lower qword of CTR 2096 * TCTR_LOW: == lower qword of CTR
1962 */ 2097 */
2098.align 4
1963_aesni_inc: 2099_aesni_inc:
1964 paddq INC, CTR 2100 paddq INC, CTR
1965 add $1, TCTR_LOW 2101 add $1, TCTR_LOW
@@ -2031,3 +2167,4 @@ ENTRY(aesni_ctr_enc)
2031 movups IV, (IVP) 2167 movups IV, (IVP)
2032.Lctr_enc_just_ret: 2168.Lctr_enc_just_ret:
2033 ret 2169 ret
2170#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 02d349d64423..8a3b80075216 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -94,8 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
94 const u8 *in, unsigned int len, u8 *iv); 94 const u8 *in, unsigned int len, u8 *iv);
95asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 95asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
96 const u8 *in, unsigned int len, u8 *iv); 96 const u8 *in, unsigned int len, u8 *iv);
97#ifdef CONFIG_X86_64
97asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 98asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
98 const u8 *in, unsigned int len, u8 *iv); 99 const u8 *in, unsigned int len, u8 *iv);
100#endif
99 101
100/* asmlinkage void aesni_gcm_enc() 102/* asmlinkage void aesni_gcm_enc()
101 * void *ctx, AES Key schedule. Starts on a 16 byte boundary. 103 * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
@@ -410,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = {
410 }, 412 },
411}; 413};
412 414
415#ifdef CONFIG_X86_64
413static void ctr_crypt_final(struct crypto_aes_ctx *ctx, 416static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
414 struct blkcipher_walk *walk) 417 struct blkcipher_walk *walk)
415{ 418{
@@ -475,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = {
475 }, 478 },
476 }, 479 },
477}; 480};
481#endif
478 482
479static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, 483static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
480 unsigned int key_len) 484 unsigned int key_len)
@@ -622,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = {
622 }, 626 },
623}; 627};
624 628
629#ifdef CONFIG_X86_64
625static int ablk_ctr_init(struct crypto_tfm *tfm) 630static int ablk_ctr_init(struct crypto_tfm *tfm)
626{ 631{
627 struct cryptd_ablkcipher *cryptd_tfm; 632 struct cryptd_ablkcipher *cryptd_tfm;
@@ -698,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = {
698 }, 703 },
699}; 704};
700#endif 705#endif
706#endif
701 707
702#ifdef HAS_LRW 708#ifdef HAS_LRW
703static int ablk_lrw_init(struct crypto_tfm *tfm) 709static int ablk_lrw_init(struct crypto_tfm *tfm)
@@ -1249,18 +1255,20 @@ static int __init aesni_init(void)
1249 goto blk_ecb_err; 1255 goto blk_ecb_err;
1250 if ((err = crypto_register_alg(&blk_cbc_alg))) 1256 if ((err = crypto_register_alg(&blk_cbc_alg)))
1251 goto blk_cbc_err; 1257 goto blk_cbc_err;
1252 if ((err = crypto_register_alg(&blk_ctr_alg)))
1253 goto blk_ctr_err;
1254 if ((err = crypto_register_alg(&ablk_ecb_alg))) 1258 if ((err = crypto_register_alg(&ablk_ecb_alg)))
1255 goto ablk_ecb_err; 1259 goto ablk_ecb_err;
1256 if ((err = crypto_register_alg(&ablk_cbc_alg))) 1260 if ((err = crypto_register_alg(&ablk_cbc_alg)))
1257 goto ablk_cbc_err; 1261 goto ablk_cbc_err;
1262#ifdef CONFIG_X86_64
1263 if ((err = crypto_register_alg(&blk_ctr_alg)))
1264 goto blk_ctr_err;
1258 if ((err = crypto_register_alg(&ablk_ctr_alg))) 1265 if ((err = crypto_register_alg(&ablk_ctr_alg)))
1259 goto ablk_ctr_err; 1266 goto ablk_ctr_err;
1260#ifdef HAS_CTR 1267#ifdef HAS_CTR
1261 if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) 1268 if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
1262 goto ablk_rfc3686_ctr_err; 1269 goto ablk_rfc3686_ctr_err;
1263#endif 1270#endif
1271#endif
1264#ifdef HAS_LRW 1272#ifdef HAS_LRW
1265 if ((err = crypto_register_alg(&ablk_lrw_alg))) 1273 if ((err = crypto_register_alg(&ablk_lrw_alg)))
1266 goto ablk_lrw_err; 1274 goto ablk_lrw_err;
@@ -1296,18 +1304,20 @@ ablk_pcbc_err:
1296 crypto_unregister_alg(&ablk_lrw_alg); 1304 crypto_unregister_alg(&ablk_lrw_alg);
1297ablk_lrw_err: 1305ablk_lrw_err:
1298#endif 1306#endif
1307#ifdef CONFIG_X86_64
1299#ifdef HAS_CTR 1308#ifdef HAS_CTR
1300 crypto_unregister_alg(&ablk_rfc3686_ctr_alg); 1309 crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
1301ablk_rfc3686_ctr_err: 1310ablk_rfc3686_ctr_err:
1302#endif 1311#endif
1303 crypto_unregister_alg(&ablk_ctr_alg); 1312 crypto_unregister_alg(&ablk_ctr_alg);
1304ablk_ctr_err: 1313ablk_ctr_err:
1314 crypto_unregister_alg(&blk_ctr_alg);
1315blk_ctr_err:
1316#endif
1305 crypto_unregister_alg(&ablk_cbc_alg); 1317 crypto_unregister_alg(&ablk_cbc_alg);
1306ablk_cbc_err: 1318ablk_cbc_err:
1307 crypto_unregister_alg(&ablk_ecb_alg); 1319 crypto_unregister_alg(&ablk_ecb_alg);
1308ablk_ecb_err: 1320ablk_ecb_err:
1309 crypto_unregister_alg(&blk_ctr_alg);
1310blk_ctr_err:
1311 crypto_unregister_alg(&blk_cbc_alg); 1321 crypto_unregister_alg(&blk_cbc_alg);
1312blk_cbc_err: 1322blk_cbc_err:
1313 crypto_unregister_alg(&blk_ecb_alg); 1323 crypto_unregister_alg(&blk_ecb_alg);
@@ -1332,13 +1342,15 @@ static void __exit aesni_exit(void)
1332#ifdef HAS_LRW 1342#ifdef HAS_LRW
1333 crypto_unregister_alg(&ablk_lrw_alg); 1343 crypto_unregister_alg(&ablk_lrw_alg);
1334#endif 1344#endif
1345#ifdef CONFIG_X86_64
1335#ifdef HAS_CTR 1346#ifdef HAS_CTR
1336 crypto_unregister_alg(&ablk_rfc3686_ctr_alg); 1347 crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
1337#endif 1348#endif
1338 crypto_unregister_alg(&ablk_ctr_alg); 1349 crypto_unregister_alg(&ablk_ctr_alg);
1350 crypto_unregister_alg(&blk_ctr_alg);
1351#endif
1339 crypto_unregister_alg(&ablk_cbc_alg); 1352 crypto_unregister_alg(&ablk_cbc_alg);
1340 crypto_unregister_alg(&ablk_ecb_alg); 1353 crypto_unregister_alg(&ablk_ecb_alg);
1341 crypto_unregister_alg(&blk_ctr_alg);
1342 crypto_unregister_alg(&blk_cbc_alg); 1354 crypto_unregister_alg(&blk_cbc_alg);
1343 crypto_unregister_alg(&blk_ecb_alg); 1355 crypto_unregister_alg(&blk_ecb_alg);
1344 crypto_unregister_alg(&__aesni_alg); 1356 crypto_unregister_alg(&__aesni_alg);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 69437e21217f..467491df3e3a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -539,8 +539,9 @@ config CRYPTO_AES_X86_64
539 539
540config CRYPTO_AES_NI_INTEL 540config CRYPTO_AES_NI_INTEL
541 tristate "AES cipher algorithms (AES-NI)" 541 tristate "AES cipher algorithms (AES-NI)"
542 depends on (X86 || UML_X86) && 64BIT 542 depends on (X86 || UML_X86)
543 select CRYPTO_AES_X86_64 543 select CRYPTO_AES_X86_64 if 64BIT
544 select CRYPTO_AES_586 if !64BIT
544 select CRYPTO_CRYPTD 545 select CRYPTO_CRYPTD
545 select CRYPTO_ALGAPI 546 select CRYPTO_ALGAPI
546 select CRYPTO_FPU 547 select CRYPTO_FPU
@@ -563,9 +564,10 @@ config CRYPTO_AES_NI_INTEL
563 564
564 See <http://csrc.nist.gov/encryption/aes/> for more information. 565 See <http://csrc.nist.gov/encryption/aes/> for more information.
565 566
566 In addition to AES cipher algorithm support, the 567 In addition to AES cipher algorithm support, the acceleration
567 acceleration for some popular block cipher mode is supported 568 for some popular block cipher mode is supported too, including
568 too, including ECB, CBC, CTR, LRW, PCBC, XTS. 569 ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
570 acceleration for CTR.
569 571
570config CRYPTO_ANUBIS 572config CRYPTO_ANUBIS
571 tristate "Anubis cipher algorithm" 573 tristate "Anubis cipher algorithm"