diff options
author | David S. Miller <davem@davemloft.net> | 2012-08-30 11:40:44 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-30 11:40:44 -0400 |
commit | 4e71bb49f256e4efc94a9fdaaa430d906cd88e6b (patch) | |
tree | 0218bd2ab7a173b138e2959e81b481f6a3418b9d /arch/sparc/crypto | |
parent | 301013159e4cdce44700418c8fd5eadb270e2d3a (diff) |
sparc64: Unroll CTR crypt loops in AES driver.
Before:
testing speed of ctr(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 244 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 360 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 814 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 378 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 6395 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 249 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 414 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1073 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 7110 cycles (8192 bytes)
testing speed of ctr(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 225 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 810 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 5021 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 376 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 938 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 6380 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 251 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 411 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1070 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 7114 cycles (8192 bytes)
After:
testing speed of ctr(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 246 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 344 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 799 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 4975 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 236 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 365 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 6055 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 209 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 255 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 404 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 6669 cycles (8192 bytes)
testing speed of ctr(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 210 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 233 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 340 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 818 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 4956 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 206 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 239 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 361 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 888 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 5996 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 248 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 395 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 1010 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 6664 cycles (8192 bytes)
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/crypto')
-rw-r--r-- | arch/sparc/crypto/aes_asm.S | 142 |
1 files changed, 118 insertions, 24 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 0bd3e04ac42d..0fadad0c60ad 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S | |||
@@ -48,6 +48,10 @@ | |||
48 | .word 0x81b0230d; | 48 | .word 0x81b0230d; |
49 | #define MOVXTOD_O5_F2 \ | 49 | #define MOVXTOD_O5_F2 \ |
50 | .word 0x85b0230d; | 50 | .word 0x85b0230d; |
51 | #define MOVXTOD_O5_F4 \ | ||
52 | .word 0x89b0230d; | ||
53 | #define MOVXTOD_O5_F6 \ | ||
54 | .word 0x8db0230d; | ||
51 | #define MOVXTOD_G3_F60 \ | 55 | #define MOVXTOD_G3_F60 \ |
52 | .word 0xbbb02303; | 56 | .word 0xbbb02303; |
53 | #define MOVXTOD_G7_F62 \ | 57 | #define MOVXTOD_G7_F62 \ |
@@ -1400,8 +1404,10 @@ ENTRY(aes_sparc64_ctr_crypt_128) | |||
1400 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | 1404 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ |
1401 | ldx [%o4 + 0x00], %g3 | 1405 | ldx [%o4 + 0x00], %g3 |
1402 | ldx [%o4 + 0x08], %g7 | 1406 | ldx [%o4 + 0x08], %g7 |
1407 | subcc %o3, 0x10, %o3 | ||
1403 | ldx [%o0 + 0x00], %g1 | 1408 | ldx [%o0 + 0x00], %g1 |
1404 | ldx [%o0 + 0x08], %g2 | 1409 | be 10f |
1410 | ldx [%o0 + 0x08], %g2 | ||
1405 | 1: xor %g1, %g3, %o5 | 1411 | 1: xor %g1, %g3, %o5 |
1406 | MOVXTOD_O5_F0 | 1412 | MOVXTOD_O5_F0 |
1407 | xor %g2, %g7, %o5 | 1413 | xor %g2, %g7, %o5 |
@@ -1409,6 +1415,39 @@ ENTRY(aes_sparc64_ctr_crypt_128) | |||
1409 | add %g7, 1, %g7 | 1415 | add %g7, 1, %g7 |
1410 | add %g3, 1, %o5 | 1416 | add %g3, 1, %o5 |
1411 | movrz %g7, %o5, %g3 | 1417 | movrz %g7, %o5, %g3 |
1418 | xor %g1, %g3, %o5 | ||
1419 | MOVXTOD_O5_F4 | ||
1420 | xor %g2, %g7, %o5 | ||
1421 | MOVXTOD_O5_F6 | ||
1422 | add %g7, 1, %g7 | ||
1423 | add %g3, 1, %o5 | ||
1424 | movrz %g7, %o5, %g3 | ||
1425 | ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1426 | ldd [%o1 + 0x00], %f56 | ||
1427 | ldd [%o1 + 0x08], %f58 | ||
1428 | ldd [%o1 + 0x10], %f60 | ||
1429 | ldd [%o1 + 0x18], %f62 | ||
1430 | fxor %f56, %f0, %f56 | ||
1431 | fxor %f58, %f2, %f58 | ||
1432 | fxor %f60, %f4, %f60 | ||
1433 | fxor %f62, %f6, %f62 | ||
1434 | std %f56, [%o2 + 0x00] | ||
1435 | std %f58, [%o2 + 0x08] | ||
1436 | std %f60, [%o2 + 0x10] | ||
1437 | std %f62, [%o2 + 0x18] | ||
1438 | subcc %o3, 0x20, %o3 | ||
1439 | add %o1, 0x20, %o1 | ||
1440 | brgz %o3, 1b | ||
1441 | add %o2, 0x20, %o2 | ||
1442 | brlz,pt %o3, 11f | ||
1443 | nop | ||
1444 | 10: xor %g1, %g3, %o5 | ||
1445 | MOVXTOD_O5_F0 | ||
1446 | xor %g2, %g7, %o5 | ||
1447 | MOVXTOD_O5_F2 | ||
1448 | add %g7, 1, %g7 | ||
1449 | add %g3, 1, %o5 | ||
1450 | movrz %g7, %o5, %g3 | ||
1412 | ENCRYPT_128(8, 0, 2, 4, 6) | 1451 | ENCRYPT_128(8, 0, 2, 4, 6) |
1413 | ldd [%o1 + 0x00], %f4 | 1452 | ldd [%o1 + 0x00], %f4 |
1414 | ldd [%o1 + 0x08], %f6 | 1453 | ldd [%o1 + 0x08], %f6 |
@@ -1416,14 +1455,9 @@ ENTRY(aes_sparc64_ctr_crypt_128) | |||
1416 | fxor %f6, %f2, %f6 | 1455 | fxor %f6, %f2, %f6 |
1417 | std %f4, [%o2 + 0x00] | 1456 | std %f4, [%o2 + 0x00] |
1418 | std %f6, [%o2 + 0x08] | 1457 | std %f6, [%o2 + 0x08] |
1419 | subcc %o3, 0x10, %o3 | 1458 | 11: stx %g3, [%o4 + 0x00] |
1420 | add %o1, 0x10, %o1 | ||
1421 | bne,pt %xcc, 1b | ||
1422 | add %o2, 0x10, %o2 | ||
1423 | stx %g3, [%o4 + 0x00] | ||
1424 | stx %g7, [%o4 + 0x08] | ||
1425 | retl | 1459 | retl |
1426 | nop | 1460 | stx %g7, [%o4 + 0x08] |
1427 | ENDPROC(aes_sparc64_ctr_crypt_128) | 1461 | ENDPROC(aes_sparc64_ctr_crypt_128) |
1428 | 1462 | ||
1429 | .align 32 | 1463 | .align 32 |
@@ -1431,8 +1465,10 @@ ENTRY(aes_sparc64_ctr_crypt_192) | |||
1431 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | 1465 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ |
1432 | ldx [%o4 + 0x00], %g3 | 1466 | ldx [%o4 + 0x00], %g3 |
1433 | ldx [%o4 + 0x08], %g7 | 1467 | ldx [%o4 + 0x08], %g7 |
1468 | subcc %o3, 0x10, %o3 | ||
1434 | ldx [%o0 + 0x00], %g1 | 1469 | ldx [%o0 + 0x00], %g1 |
1435 | ldx [%o0 + 0x08], %g2 | 1470 | be 10f |
1471 | ldx [%o0 + 0x08], %g2 | ||
1436 | 1: xor %g1, %g3, %o5 | 1472 | 1: xor %g1, %g3, %o5 |
1437 | MOVXTOD_O5_F0 | 1473 | MOVXTOD_O5_F0 |
1438 | xor %g2, %g7, %o5 | 1474 | xor %g2, %g7, %o5 |
@@ -1440,6 +1476,39 @@ ENTRY(aes_sparc64_ctr_crypt_192) | |||
1440 | add %g7, 1, %g7 | 1476 | add %g7, 1, %g7 |
1441 | add %g3, 1, %o5 | 1477 | add %g3, 1, %o5 |
1442 | movrz %g7, %o5, %g3 | 1478 | movrz %g7, %o5, %g3 |
1479 | xor %g1, %g3, %o5 | ||
1480 | MOVXTOD_O5_F4 | ||
1481 | xor %g2, %g7, %o5 | ||
1482 | MOVXTOD_O5_F6 | ||
1483 | add %g7, 1, %g7 | ||
1484 | add %g3, 1, %o5 | ||
1485 | movrz %g7, %o5, %g3 | ||
1486 | ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1487 | ldd [%o1 + 0x00], %f56 | ||
1488 | ldd [%o1 + 0x08], %f58 | ||
1489 | ldd [%o1 + 0x10], %f60 | ||
1490 | ldd [%o1 + 0x18], %f62 | ||
1491 | fxor %f56, %f0, %f56 | ||
1492 | fxor %f58, %f2, %f58 | ||
1493 | fxor %f60, %f4, %f60 | ||
1494 | fxor %f62, %f6, %f62 | ||
1495 | std %f56, [%o2 + 0x00] | ||
1496 | std %f58, [%o2 + 0x08] | ||
1497 | std %f60, [%o2 + 0x10] | ||
1498 | std %f62, [%o2 + 0x18] | ||
1499 | subcc %o3, 0x20, %o3 | ||
1500 | add %o1, 0x20, %o1 | ||
1501 | brgz %o3, 1b | ||
1502 | add %o2, 0x20, %o2 | ||
1503 | brlz,pt %o3, 11f | ||
1504 | nop | ||
1505 | 10: xor %g1, %g3, %o5 | ||
1506 | MOVXTOD_O5_F0 | ||
1507 | xor %g2, %g7, %o5 | ||
1508 | MOVXTOD_O5_F2 | ||
1509 | add %g7, 1, %g7 | ||
1510 | add %g3, 1, %o5 | ||
1511 | movrz %g7, %o5, %g3 | ||
1443 | ENCRYPT_192(8, 0, 2, 4, 6) | 1512 | ENCRYPT_192(8, 0, 2, 4, 6) |
1444 | ldd [%o1 + 0x00], %f4 | 1513 | ldd [%o1 + 0x00], %f4 |
1445 | ldd [%o1 + 0x08], %f6 | 1514 | ldd [%o1 + 0x08], %f6 |
@@ -1447,14 +1516,9 @@ ENTRY(aes_sparc64_ctr_crypt_192) | |||
1447 | fxor %f6, %f2, %f6 | 1516 | fxor %f6, %f2, %f6 |
1448 | std %f4, [%o2 + 0x00] | 1517 | std %f4, [%o2 + 0x00] |
1449 | std %f6, [%o2 + 0x08] | 1518 | std %f6, [%o2 + 0x08] |
1450 | subcc %o3, 0x10, %o3 | 1519 | 11: stx %g3, [%o4 + 0x00] |
1451 | add %o1, 0x10, %o1 | ||
1452 | bne,pt %xcc, 1b | ||
1453 | add %o2, 0x10, %o2 | ||
1454 | stx %g3, [%o4 + 0x00] | ||
1455 | stx %g7, [%o4 + 0x08] | ||
1456 | retl | 1520 | retl |
1457 | nop | 1521 | stx %g7, [%o4 + 0x08] |
1458 | ENDPROC(aes_sparc64_ctr_crypt_192) | 1522 | ENDPROC(aes_sparc64_ctr_crypt_192) |
1459 | 1523 | ||
1460 | .align 32 | 1524 | .align 32 |
@@ -1462,8 +1526,10 @@ ENTRY(aes_sparc64_ctr_crypt_256) | |||
1462 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | 1526 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ |
1463 | ldx [%o4 + 0x00], %g3 | 1527 | ldx [%o4 + 0x00], %g3 |
1464 | ldx [%o4 + 0x08], %g7 | 1528 | ldx [%o4 + 0x08], %g7 |
1529 | subcc %o3, 0x10, %o3 | ||
1465 | ldx [%o0 + 0x00], %g1 | 1530 | ldx [%o0 + 0x00], %g1 |
1466 | ldx [%o0 + 0x08], %g2 | 1531 | be 10f |
1532 | ldx [%o0 + 0x08], %g2 | ||
1467 | 1: xor %g1, %g3, %o5 | 1533 | 1: xor %g1, %g3, %o5 |
1468 | MOVXTOD_O5_F0 | 1534 | MOVXTOD_O5_F0 |
1469 | xor %g2, %g7, %o5 | 1535 | xor %g2, %g7, %o5 |
@@ -1471,6 +1537,39 @@ ENTRY(aes_sparc64_ctr_crypt_256) | |||
1471 | add %g7, 1, %g7 | 1537 | add %g7, 1, %g7 |
1472 | add %g3, 1, %o5 | 1538 | add %g3, 1, %o5 |
1473 | movrz %g7, %o5, %g3 | 1539 | movrz %g7, %o5, %g3 |
1540 | xor %g1, %g3, %o5 | ||
1541 | MOVXTOD_O5_F4 | ||
1542 | xor %g2, %g7, %o5 | ||
1543 | MOVXTOD_O5_F6 | ||
1544 | add %g7, 1, %g7 | ||
1545 | add %g3, 1, %o5 | ||
1546 | movrz %g7, %o5, %g3 | ||
1547 | ENCRYPT_256_2(8, 0, 2, 4, 6) | ||
1548 | ldd [%o1 + 0x00], %f56 | ||
1549 | ldd [%o1 + 0x08], %f58 | ||
1550 | ldd [%o1 + 0x10], %f60 | ||
1551 | ldd [%o1 + 0x18], %f62 | ||
1552 | fxor %f56, %f0, %f56 | ||
1553 | fxor %f58, %f2, %f58 | ||
1554 | fxor %f60, %f4, %f60 | ||
1555 | fxor %f62, %f6, %f62 | ||
1556 | std %f56, [%o2 + 0x00] | ||
1557 | std %f58, [%o2 + 0x08] | ||
1558 | std %f60, [%o2 + 0x10] | ||
1559 | std %f62, [%o2 + 0x18] | ||
1560 | subcc %o3, 0x20, %o3 | ||
1561 | add %o1, 0x20, %o1 | ||
1562 | brgz %o3, 1b | ||
1563 | add %o2, 0x20, %o2 | ||
1564 | brlz,pt %o3, 11f | ||
1565 | nop | ||
1566 | 10: xor %g1, %g3, %o5 | ||
1567 | MOVXTOD_O5_F0 | ||
1568 | xor %g2, %g7, %o5 | ||
1569 | MOVXTOD_O5_F2 | ||
1570 | add %g7, 1, %g7 | ||
1571 | add %g3, 1, %o5 | ||
1572 | movrz %g7, %o5, %g3 | ||
1474 | ENCRYPT_256(8, 0, 2, 4, 6) | 1573 | ENCRYPT_256(8, 0, 2, 4, 6) |
1475 | ldd [%o1 + 0x00], %f4 | 1574 | ldd [%o1 + 0x00], %f4 |
1476 | ldd [%o1 + 0x08], %f6 | 1575 | ldd [%o1 + 0x08], %f6 |
@@ -1478,12 +1577,7 @@ ENTRY(aes_sparc64_ctr_crypt_256) | |||
1478 | fxor %f6, %f2, %f6 | 1577 | fxor %f6, %f2, %f6 |
1479 | std %f4, [%o2 + 0x00] | 1578 | std %f4, [%o2 + 0x00] |
1480 | std %f6, [%o2 + 0x08] | 1579 | std %f6, [%o2 + 0x08] |
1481 | subcc %o3, 0x10, %o3 | 1580 | 11: stx %g3, [%o4 + 0x00] |
1482 | add %o1, 0x10, %o1 | ||
1483 | bne,pt %xcc, 1b | ||
1484 | add %o2, 0x10, %o2 | ||
1485 | stx %g3, [%o4 + 0x00] | ||
1486 | stx %g7, [%o4 + 0x08] | ||
1487 | retl | 1581 | retl |
1488 | nop | 1582 | stx %g7, [%o4 + 0x08] |
1489 | ENDPROC(aes_sparc64_ctr_crypt_256) | 1583 | ENDPROC(aes_sparc64_ctr_crypt_256) |