aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/crypto
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-08-30 11:40:44 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-30 11:40:44 -0400
commit4e71bb49f256e4efc94a9fdaaa430d906cd88e6b (patch)
tree0218bd2ab7a173b138e2959e81b481f6a3418b9d /arch/sparc/crypto
parent301013159e4cdce44700418c8fd5eadb270e2d3a (diff)
sparc64: Unroll CTR crypt loops in AES driver.
Before: testing speed of ctr(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 244 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 360 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 814 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 378 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6395 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 249 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 414 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1073 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 7110 cycles (8192 bytes) testing speed of ctr(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 225 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 810 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 376 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 938 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6380 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 251 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 411 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1070 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 7114 cycles (8192 bytes) After: testing speed of ctr(aes) encryption test 0 (128 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 246 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 799 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4975 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 236 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 365 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 6055 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 255 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 404 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6669 cycles (8192 bytes) testing speed of ctr(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 340 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 818 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4956 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 239 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 361 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5996 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 248 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 395 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6664 cycles (8192 bytes) Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/crypto')
-rw-r--r--arch/sparc/crypto/aes_asm.S142
1 files changed, 118 insertions, 24 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index 0bd3e04ac42d..0fadad0c60ad 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S
@@ -48,6 +48,10 @@
48 .word 0x81b0230d; 48 .word 0x81b0230d;
49#define MOVXTOD_O5_F2 \ 49#define MOVXTOD_O5_F2 \
50 .word 0x85b0230d; 50 .word 0x85b0230d;
51#define MOVXTOD_O5_F4 \
52 .word 0x89b0230d;
53#define MOVXTOD_O5_F6 \
54 .word 0x8db0230d;
51#define MOVXTOD_G3_F60 \ 55#define MOVXTOD_G3_F60 \
52 .word 0xbbb02303; 56 .word 0xbbb02303;
53#define MOVXTOD_G7_F62 \ 57#define MOVXTOD_G7_F62 \
@@ -1400,8 +1404,10 @@ ENTRY(aes_sparc64_ctr_crypt_128)
1400 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ 1404 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1401 ldx [%o4 + 0x00], %g3 1405 ldx [%o4 + 0x00], %g3
1402 ldx [%o4 + 0x08], %g7 1406 ldx [%o4 + 0x08], %g7
1407 subcc %o3, 0x10, %o3
1403 ldx [%o0 + 0x00], %g1 1408 ldx [%o0 + 0x00], %g1
1404 ldx [%o0 + 0x08], %g2 1409 be 10f
1410 ldx [%o0 + 0x08], %g2
14051: xor %g1, %g3, %o5 14111: xor %g1, %g3, %o5
1406 MOVXTOD_O5_F0 1412 MOVXTOD_O5_F0
1407 xor %g2, %g7, %o5 1413 xor %g2, %g7, %o5
@@ -1409,6 +1415,39 @@ ENTRY(aes_sparc64_ctr_crypt_128)
1409 add %g7, 1, %g7 1415 add %g7, 1, %g7
1410 add %g3, 1, %o5 1416 add %g3, 1, %o5
1411 movrz %g7, %o5, %g3 1417 movrz %g7, %o5, %g3
1418 xor %g1, %g3, %o5
1419 MOVXTOD_O5_F4
1420 xor %g2, %g7, %o5
1421 MOVXTOD_O5_F6
1422 add %g7, 1, %g7
1423 add %g3, 1, %o5
1424 movrz %g7, %o5, %g3
1425 ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1426 ldd [%o1 + 0x00], %f56
1427 ldd [%o1 + 0x08], %f58
1428 ldd [%o1 + 0x10], %f60
1429 ldd [%o1 + 0x18], %f62
1430 fxor %f56, %f0, %f56
1431 fxor %f58, %f2, %f58
1432 fxor %f60, %f4, %f60
1433 fxor %f62, %f6, %f62
1434 std %f56, [%o2 + 0x00]
1435 std %f58, [%o2 + 0x08]
1436 std %f60, [%o2 + 0x10]
1437 std %f62, [%o2 + 0x18]
1438 subcc %o3, 0x20, %o3
1439 add %o1, 0x20, %o1
1440 brgz %o3, 1b
1441 add %o2, 0x20, %o2
1442 brlz,pt %o3, 11f
1443 nop
144410: xor %g1, %g3, %o5
1445 MOVXTOD_O5_F0
1446 xor %g2, %g7, %o5
1447 MOVXTOD_O5_F2
1448 add %g7, 1, %g7
1449 add %g3, 1, %o5
1450 movrz %g7, %o5, %g3
1412 ENCRYPT_128(8, 0, 2, 4, 6) 1451 ENCRYPT_128(8, 0, 2, 4, 6)
1413 ldd [%o1 + 0x00], %f4 1452 ldd [%o1 + 0x00], %f4
1414 ldd [%o1 + 0x08], %f6 1453 ldd [%o1 + 0x08], %f6
@@ -1416,14 +1455,9 @@ ENTRY(aes_sparc64_ctr_crypt_128)
1416 fxor %f6, %f2, %f6 1455 fxor %f6, %f2, %f6
1417 std %f4, [%o2 + 0x00] 1456 std %f4, [%o2 + 0x00]
1418 std %f6, [%o2 + 0x08] 1457 std %f6, [%o2 + 0x08]
1419 subcc %o3, 0x10, %o3 145811: stx %g3, [%o4 + 0x00]
1420 add %o1, 0x10, %o1
1421 bne,pt %xcc, 1b
1422 add %o2, 0x10, %o2
1423 stx %g3, [%o4 + 0x00]
1424 stx %g7, [%o4 + 0x08]
1425 retl 1459 retl
1426 nop 1460 stx %g7, [%o4 + 0x08]
1427ENDPROC(aes_sparc64_ctr_crypt_128) 1461ENDPROC(aes_sparc64_ctr_crypt_128)
1428 1462
1429 .align 32 1463 .align 32
@@ -1431,8 +1465,10 @@ ENTRY(aes_sparc64_ctr_crypt_192)
1431 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ 1465 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1432 ldx [%o4 + 0x00], %g3 1466 ldx [%o4 + 0x00], %g3
1433 ldx [%o4 + 0x08], %g7 1467 ldx [%o4 + 0x08], %g7
1468 subcc %o3, 0x10, %o3
1434 ldx [%o0 + 0x00], %g1 1469 ldx [%o0 + 0x00], %g1
1435 ldx [%o0 + 0x08], %g2 1470 be 10f
1471 ldx [%o0 + 0x08], %g2
14361: xor %g1, %g3, %o5 14721: xor %g1, %g3, %o5
1437 MOVXTOD_O5_F0 1473 MOVXTOD_O5_F0
1438 xor %g2, %g7, %o5 1474 xor %g2, %g7, %o5
@@ -1440,6 +1476,39 @@ ENTRY(aes_sparc64_ctr_crypt_192)
1440 add %g7, 1, %g7 1476 add %g7, 1, %g7
1441 add %g3, 1, %o5 1477 add %g3, 1, %o5
1442 movrz %g7, %o5, %g3 1478 movrz %g7, %o5, %g3
1479 xor %g1, %g3, %o5
1480 MOVXTOD_O5_F4
1481 xor %g2, %g7, %o5
1482 MOVXTOD_O5_F6
1483 add %g7, 1, %g7
1484 add %g3, 1, %o5
1485 movrz %g7, %o5, %g3
1486 ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
1487 ldd [%o1 + 0x00], %f56
1488 ldd [%o1 + 0x08], %f58
1489 ldd [%o1 + 0x10], %f60
1490 ldd [%o1 + 0x18], %f62
1491 fxor %f56, %f0, %f56
1492 fxor %f58, %f2, %f58
1493 fxor %f60, %f4, %f60
1494 fxor %f62, %f6, %f62
1495 std %f56, [%o2 + 0x00]
1496 std %f58, [%o2 + 0x08]
1497 std %f60, [%o2 + 0x10]
1498 std %f62, [%o2 + 0x18]
1499 subcc %o3, 0x20, %o3
1500 add %o1, 0x20, %o1
1501 brgz %o3, 1b
1502 add %o2, 0x20, %o2
1503 brlz,pt %o3, 11f
1504 nop
150510: xor %g1, %g3, %o5
1506 MOVXTOD_O5_F0
1507 xor %g2, %g7, %o5
1508 MOVXTOD_O5_F2
1509 add %g7, 1, %g7
1510 add %g3, 1, %o5
1511 movrz %g7, %o5, %g3
1443 ENCRYPT_192(8, 0, 2, 4, 6) 1512 ENCRYPT_192(8, 0, 2, 4, 6)
1444 ldd [%o1 + 0x00], %f4 1513 ldd [%o1 + 0x00], %f4
1445 ldd [%o1 + 0x08], %f6 1514 ldd [%o1 + 0x08], %f6
@@ -1447,14 +1516,9 @@ ENTRY(aes_sparc64_ctr_crypt_192)
1447 fxor %f6, %f2, %f6 1516 fxor %f6, %f2, %f6
1448 std %f4, [%o2 + 0x00] 1517 std %f4, [%o2 + 0x00]
1449 std %f6, [%o2 + 0x08] 1518 std %f6, [%o2 + 0x08]
1450 subcc %o3, 0x10, %o3 151911: stx %g3, [%o4 + 0x00]
1451 add %o1, 0x10, %o1
1452 bne,pt %xcc, 1b
1453 add %o2, 0x10, %o2
1454 stx %g3, [%o4 + 0x00]
1455 stx %g7, [%o4 + 0x08]
1456 retl 1520 retl
1457 nop 1521 stx %g7, [%o4 + 0x08]
1458ENDPROC(aes_sparc64_ctr_crypt_192) 1522ENDPROC(aes_sparc64_ctr_crypt_192)
1459 1523
1460 .align 32 1524 .align 32
@@ -1462,8 +1526,10 @@ ENTRY(aes_sparc64_ctr_crypt_256)
1462 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ 1526 /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
1463 ldx [%o4 + 0x00], %g3 1527 ldx [%o4 + 0x00], %g3
1464 ldx [%o4 + 0x08], %g7 1528 ldx [%o4 + 0x08], %g7
1529 subcc %o3, 0x10, %o3
1465 ldx [%o0 + 0x00], %g1 1530 ldx [%o0 + 0x00], %g1
1466 ldx [%o0 + 0x08], %g2 1531 be 10f
1532 ldx [%o0 + 0x08], %g2
14671: xor %g1, %g3, %o5 15331: xor %g1, %g3, %o5
1468 MOVXTOD_O5_F0 1534 MOVXTOD_O5_F0
1469 xor %g2, %g7, %o5 1535 xor %g2, %g7, %o5
@@ -1471,6 +1537,39 @@ ENTRY(aes_sparc64_ctr_crypt_256)
1471 add %g7, 1, %g7 1537 add %g7, 1, %g7
1472 add %g3, 1, %o5 1538 add %g3, 1, %o5
1473 movrz %g7, %o5, %g3 1539 movrz %g7, %o5, %g3
1540 xor %g1, %g3, %o5
1541 MOVXTOD_O5_F4
1542 xor %g2, %g7, %o5
1543 MOVXTOD_O5_F6
1544 add %g7, 1, %g7
1545 add %g3, 1, %o5
1546 movrz %g7, %o5, %g3
1547 ENCRYPT_256_2(8, 0, 2, 4, 6)
1548 ldd [%o1 + 0x00], %f56
1549 ldd [%o1 + 0x08], %f58
1550 ldd [%o1 + 0x10], %f60
1551 ldd [%o1 + 0x18], %f62
1552 fxor %f56, %f0, %f56
1553 fxor %f58, %f2, %f58
1554 fxor %f60, %f4, %f60
1555 fxor %f62, %f6, %f62
1556 std %f56, [%o2 + 0x00]
1557 std %f58, [%o2 + 0x08]
1558 std %f60, [%o2 + 0x10]
1559 std %f62, [%o2 + 0x18]
1560 subcc %o3, 0x20, %o3
1561 add %o1, 0x20, %o1
1562 brgz %o3, 1b
1563 add %o2, 0x20, %o2
1564 brlz,pt %o3, 11f
1565 nop
156610: xor %g1, %g3, %o5
1567 MOVXTOD_O5_F0
1568 xor %g2, %g7, %o5
1569 MOVXTOD_O5_F2
1570 add %g7, 1, %g7
1571 add %g3, 1, %o5
1572 movrz %g7, %o5, %g3
1474 ENCRYPT_256(8, 0, 2, 4, 6) 1573 ENCRYPT_256(8, 0, 2, 4, 6)
1475 ldd [%o1 + 0x00], %f4 1574 ldd [%o1 + 0x00], %f4
1476 ldd [%o1 + 0x08], %f6 1575 ldd [%o1 + 0x08], %f6
@@ -1478,12 +1577,7 @@ ENTRY(aes_sparc64_ctr_crypt_256)
1478 fxor %f6, %f2, %f6 1577 fxor %f6, %f2, %f6
1479 std %f4, [%o2 + 0x00] 1578 std %f4, [%o2 + 0x00]
1480 std %f6, [%o2 + 0x08] 1579 std %f6, [%o2 + 0x08]
1481 subcc %o3, 0x10, %o3 158011: stx %g3, [%o4 + 0x00]
1482 add %o1, 0x10, %o1
1483 bne,pt %xcc, 1b
1484 add %o2, 0x10, %o2
1485 stx %g3, [%o4 + 0x00]
1486 stx %g7, [%o4 + 0x08]
1487 retl 1581 retl
1488 nop 1582 stx %g7, [%o4 + 0x08]
1489ENDPROC(aes_sparc64_ctr_crypt_256) 1583ENDPROC(aes_sparc64_ctr_crypt_256)