diff options
| author | Michael Ellerman <michael@ellerman.id.au> | 2012-09-13 19:00:49 -0400 |
|---|---|---|
| committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2013-01-09 22:43:45 -0500 |
| commit | 323a6bf1d6f4ec7907d9d8aacb4ae9590f755dda (patch) | |
| tree | 90b75e02d5d9eb74b424155ae726eef5e7f16ff3 | |
| parent | 5c49985c21bba4d2f899e3a97121868a5c58a876 (diff) | |
powerpc: Add a powerpc implementation of SHA-1
This patch adds a crypto driver which provides a powerpc accelerated
implementation of SHA-1, accelerated in that it is written in asm.
Original patch by Paul, minor fixups for upstream by moi.
Lightly tested on 64-bit with the test program here:
http://michael.ellerman.id.au/files/junkcode/sha1test.c
Seems to work, and is "not slower" than the generic version.
Needs testing on 32-bit.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
| -rw-r--r-- | arch/powerpc/Makefile | 1 | ||||
| -rw-r--r-- | arch/powerpc/crypto/Makefile | 9 | ||||
| -rw-r--r-- | arch/powerpc/crypto/sha1-powerpc-asm.S | 179 | ||||
| -rw-r--r-- | arch/powerpc/crypto/sha1.c | 157 | ||||
| -rw-r--r-- | crypto/Kconfig | 7 |
5 files changed, 353 insertions, 0 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b639852116fa..ba45cad088c9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile | |||
| @@ -143,6 +143,7 @@ core-y += arch/powerpc/kernel/ \ | |||
| 143 | arch/powerpc/sysdev/ \ | 143 | arch/powerpc/sysdev/ \ |
| 144 | arch/powerpc/platforms/ \ | 144 | arch/powerpc/platforms/ \ |
| 145 | arch/powerpc/math-emu/ \ | 145 | arch/powerpc/math-emu/ \ |
| 146 | arch/powerpc/crypto/ \ | ||
| 146 | arch/powerpc/net/ | 147 | arch/powerpc/net/ |
| 147 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ | 148 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ |
| 148 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ | 149 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ |
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile new file mode 100644 index 000000000000..2926fb9c570a --- /dev/null +++ b/arch/powerpc/crypto/Makefile | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | # | ||
| 2 | # powerpc/crypto/Makefile | ||
| 3 | # | ||
| 4 | # Arch-specific CryptoAPI modules. | ||
| 5 | # | ||
| 6 | |||
| 7 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o | ||
| 8 | |||
| 9 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o | ||
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S new file mode 100644 index 000000000000..a5f8264d2d3c --- /dev/null +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S | |||
| @@ -0,0 +1,179 @@ | |||
| 1 | /* | ||
| 2 | * SHA-1 implementation for PowerPC. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2005 Paul Mackerras <paulus@samba.org> | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <asm/ppc_asm.h> | ||
| 8 | #include <asm/asm-offsets.h> | ||
| 9 | |||
| 10 | /* | ||
| 11 | * We roll the registers for T, A, B, C, D, E around on each | ||
| 12 | * iteration; T on iteration t is A on iteration t+1, and so on. | ||
| 13 | * We use registers 7 - 12 for this. | ||
| 14 | */ | ||
| 15 | #define RT(t) ((((t)+5)%6)+7) | ||
| 16 | #define RA(t) ((((t)+4)%6)+7) | ||
| 17 | #define RB(t) ((((t)+3)%6)+7) | ||
| 18 | #define RC(t) ((((t)+2)%6)+7) | ||
| 19 | #define RD(t) ((((t)+1)%6)+7) | ||
| 20 | #define RE(t) ((((t)+0)%6)+7) | ||
| 21 | |||
| 22 | /* We use registers 16 - 31 for the W values */ | ||
| 23 | #define W(t) (((t)%16)+16) | ||
| 24 | |||
| 25 | #define LOADW(t) \ | ||
| 26 | lwz W(t),(t)*4(r4) | ||
| 27 | |||
| 28 | #define STEPD0_LOAD(t) \ | ||
| 29 | andc r0,RD(t),RB(t); \ | ||
| 30 | and r6,RB(t),RC(t); \ | ||
| 31 | rotlwi RT(t),RA(t),5; \ | ||
| 32 | or r6,r6,r0; \ | ||
| 33 | add r0,RE(t),r15; \ | ||
| 34 | add RT(t),RT(t),r6; \ | ||
| 35 | add r14,r0,W(t); \ | ||
| 36 | lwz W((t)+4),((t)+4)*4(r4); \ | ||
| 37 | rotlwi RB(t),RB(t),30; \ | ||
| 38 | add RT(t),RT(t),r14 | ||
| 39 | |||
| 40 | #define STEPD0_UPDATE(t) \ | ||
| 41 | and r6,RB(t),RC(t); \ | ||
| 42 | andc r0,RD(t),RB(t); \ | ||
| 43 | rotlwi RT(t),RA(t),5; \ | ||
| 44 | rotlwi RB(t),RB(t),30; \ | ||
| 45 | or r6,r6,r0; \ | ||
| 46 | add r0,RE(t),r15; \ | ||
| 47 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
| 48 | add RT(t),RT(t),r6; \ | ||
| 49 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
| 50 | add r0,r0,W(t); \ | ||
| 51 | xor W((t)+4),W((t)+4),r5; \ | ||
| 52 | add RT(t),RT(t),r0; \ | ||
| 53 | rotlwi W((t)+4),W((t)+4),1 | ||
| 54 | |||
| 55 | #define STEPD1(t) \ | ||
| 56 | xor r6,RB(t),RC(t); \ | ||
| 57 | rotlwi RT(t),RA(t),5; \ | ||
| 58 | rotlwi RB(t),RB(t),30; \ | ||
| 59 | xor r6,r6,RD(t); \ | ||
| 60 | add r0,RE(t),r15; \ | ||
| 61 | add RT(t),RT(t),r6; \ | ||
| 62 | add r0,r0,W(t); \ | ||
| 63 | add RT(t),RT(t),r0 | ||
| 64 | |||
| 65 | #define STEPD1_UPDATE(t) \ | ||
| 66 | xor r6,RB(t),RC(t); \ | ||
| 67 | rotlwi RT(t),RA(t),5; \ | ||
| 68 | rotlwi RB(t),RB(t),30; \ | ||
| 69 | xor r6,r6,RD(t); \ | ||
| 70 | add r0,RE(t),r15; \ | ||
| 71 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
| 72 | add RT(t),RT(t),r6; \ | ||
| 73 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
| 74 | add r0,r0,W(t); \ | ||
| 75 | xor W((t)+4),W((t)+4),r5; \ | ||
| 76 | add RT(t),RT(t),r0; \ | ||
| 77 | rotlwi W((t)+4),W((t)+4),1 | ||
| 78 | |||
| 79 | #define STEPD2_UPDATE(t) \ | ||
| 80 | and r6,RB(t),RC(t); \ | ||
| 81 | and r0,RB(t),RD(t); \ | ||
| 82 | rotlwi RT(t),RA(t),5; \ | ||
| 83 | or r6,r6,r0; \ | ||
| 84 | rotlwi RB(t),RB(t),30; \ | ||
| 85 | and r0,RC(t),RD(t); \ | ||
| 86 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
| 87 | or r6,r6,r0; \ | ||
| 88 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
| 89 | add r0,RE(t),r15; \ | ||
| 90 | add RT(t),RT(t),r6; \ | ||
| 91 | add r0,r0,W(t); \ | ||
| 92 | xor W((t)+4),W((t)+4),r5; \ | ||
| 93 | add RT(t),RT(t),r0; \ | ||
| 94 | rotlwi W((t)+4),W((t)+4),1 | ||
| 95 | |||
| 96 | #define STEP0LD4(t) \ | ||
| 97 | STEPD0_LOAD(t); \ | ||
| 98 | STEPD0_LOAD((t)+1); \ | ||
| 99 | STEPD0_LOAD((t)+2); \ | ||
| 100 | STEPD0_LOAD((t)+3) | ||
| 101 | |||
| 102 | #define STEPUP4(t, fn) \ | ||
| 103 | STEP##fn##_UPDATE(t); \ | ||
| 104 | STEP##fn##_UPDATE((t)+1); \ | ||
| 105 | STEP##fn##_UPDATE((t)+2); \ | ||
| 106 | STEP##fn##_UPDATE((t)+3) | ||
| 107 | |||
| 108 | #define STEPUP20(t, fn) \ | ||
| 109 | STEPUP4(t, fn); \ | ||
| 110 | STEPUP4((t)+4, fn); \ | ||
| 111 | STEPUP4((t)+8, fn); \ | ||
| 112 | STEPUP4((t)+12, fn); \ | ||
| 113 | STEPUP4((t)+16, fn) | ||
| 114 | |||
| 115 | _GLOBAL(powerpc_sha_transform) | ||
| 116 | PPC_STLU r1,-STACKFRAMESIZE(r1) | ||
| 117 | SAVE_8GPRS(14, r1) | ||
| 118 | SAVE_10GPRS(22, r1) | ||
| 119 | |||
| 120 | /* Load up A - E */ | ||
| 121 | lwz RA(0),0(r3) /* A */ | ||
| 122 | lwz RB(0),4(r3) /* B */ | ||
| 123 | lwz RC(0),8(r3) /* C */ | ||
| 124 | lwz RD(0),12(r3) /* D */ | ||
| 125 | lwz RE(0),16(r3) /* E */ | ||
| 126 | |||
| 127 | LOADW(0) | ||
| 128 | LOADW(1) | ||
| 129 | LOADW(2) | ||
| 130 | LOADW(3) | ||
| 131 | |||
| 132 | lis r15,0x5a82 /* K0-19 */ | ||
| 133 | ori r15,r15,0x7999 | ||
| 134 | STEP0LD4(0) | ||
| 135 | STEP0LD4(4) | ||
| 136 | STEP0LD4(8) | ||
| 137 | STEPUP4(12, D0) | ||
| 138 | STEPUP4(16, D0) | ||
| 139 | |||
| 140 | lis r15,0x6ed9 /* K20-39 */ | ||
| 141 | ori r15,r15,0xeba1 | ||
| 142 | STEPUP20(20, D1) | ||
| 143 | |||
| 144 | lis r15,0x8f1b /* K40-59 */ | ||
| 145 | ori r15,r15,0xbcdc | ||
| 146 | STEPUP20(40, D2) | ||
| 147 | |||
| 148 | lis r15,0xca62 /* K60-79 */ | ||
| 149 | ori r15,r15,0xc1d6 | ||
| 150 | STEPUP4(60, D1) | ||
| 151 | STEPUP4(64, D1) | ||
| 152 | STEPUP4(68, D1) | ||
| 153 | STEPUP4(72, D1) | ||
| 154 | lwz r20,16(r3) | ||
| 155 | STEPD1(76) | ||
| 156 | lwz r19,12(r3) | ||
| 157 | STEPD1(77) | ||
| 158 | lwz r18,8(r3) | ||
| 159 | STEPD1(78) | ||
| 160 | lwz r17,4(r3) | ||
| 161 | STEPD1(79) | ||
| 162 | |||
| 163 | lwz r16,0(r3) | ||
| 164 | add r20,RE(80),r20 | ||
| 165 | add RD(0),RD(80),r19 | ||
| 166 | add RC(0),RC(80),r18 | ||
| 167 | add RB(0),RB(80),r17 | ||
| 168 | add RA(0),RA(80),r16 | ||
| 169 | mr RE(0),r20 | ||
| 170 | stw RA(0),0(r3) | ||
| 171 | stw RB(0),4(r3) | ||
| 172 | stw RC(0),8(r3) | ||
| 173 | stw RD(0),12(r3) | ||
| 174 | stw RE(0),16(r3) | ||
| 175 | |||
| 176 | REST_8GPRS(14, r1) | ||
| 177 | REST_10GPRS(22, r1) | ||
| 178 | addi r1,r1,STACKFRAMESIZE | ||
| 179 | blr | ||
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c new file mode 100644 index 000000000000..f9e8b9491efc --- /dev/null +++ b/arch/powerpc/crypto/sha1.c | |||
| @@ -0,0 +1,157 @@ | |||
| 1 | /* | ||
| 2 | * Cryptographic API. | ||
| 3 | * | ||
| 4 | * powerpc implementation of the SHA1 Secure Hash Algorithm. | ||
| 5 | * | ||
| 6 | * Derived from cryptoapi implementation, adapted for in-place | ||
| 7 | * scatterlist interface. | ||
| 8 | * | ||
| 9 | * Derived from "crypto/sha1.c" | ||
| 10 | * Copyright (c) Alan Smithee. | ||
| 11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
| 12 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
| 13 | * | ||
| 14 | * This program is free software; you can redistribute it and/or modify it | ||
| 15 | * under the terms of the GNU General Public License as published by the Free | ||
| 16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
| 17 | * any later version. | ||
| 18 | * | ||
| 19 | */ | ||
| 20 | #include <crypto/internal/hash.h> | ||
| 21 | #include <linux/init.h> | ||
| 22 | #include <linux/module.h> | ||
| 23 | #include <linux/mm.h> | ||
| 24 | #include <linux/cryptohash.h> | ||
| 25 | #include <linux/types.h> | ||
| 26 | #include <crypto/sha.h> | ||
| 27 | #include <asm/byteorder.h> | ||
| 28 | |||
| 29 | extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp); | ||
| 30 | |||
| 31 | static int sha1_init(struct shash_desc *desc) | ||
| 32 | { | ||
| 33 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
| 34 | |||
| 35 | *sctx = (struct sha1_state){ | ||
| 36 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
| 37 | }; | ||
| 38 | |||
| 39 | return 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
| 43 | unsigned int len) | ||
| 44 | { | ||
| 45 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
| 46 | unsigned int partial, done; | ||
| 47 | const u8 *src; | ||
| 48 | |||
| 49 | partial = sctx->count & 0x3f; | ||
| 50 | sctx->count += len; | ||
| 51 | done = 0; | ||
| 52 | src = data; | ||
| 53 | |||
| 54 | if ((partial + len) > 63) { | ||
| 55 | u32 temp[SHA_WORKSPACE_WORDS]; | ||
| 56 | |||
| 57 | if (partial) { | ||
| 58 | done = -partial; | ||
| 59 | memcpy(sctx->buffer + partial, data, done + 64); | ||
| 60 | src = sctx->buffer; | ||
| 61 | } | ||
| 62 | |||
| 63 | do { | ||
| 64 | powerpc_sha_transform(sctx->state, src, temp); | ||
| 65 | done += 64; | ||
| 66 | src = data + done; | ||
| 67 | } while (done + 63 < len); | ||
| 68 | |||
| 69 | memset(temp, 0, sizeof(temp)); | ||
| 70 | partial = 0; | ||
| 71 | } | ||
| 72 | memcpy(sctx->buffer + partial, src, len - done); | ||
| 73 | |||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | |||
| 78 | /* Add padding and return the message digest. */ | ||
| 79 | static int sha1_final(struct shash_desc *desc, u8 *out) | ||
| 80 | { | ||
| 81 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
| 82 | __be32 *dst = (__be32 *)out; | ||
| 83 | u32 i, index, padlen; | ||
| 84 | __be64 bits; | ||
| 85 | static const u8 padding[64] = { 0x80, }; | ||
| 86 | |||
| 87 | bits = cpu_to_be64(sctx->count << 3); | ||
| 88 | |||
| 89 | /* Pad out to 56 mod 64 */ | ||
| 90 | index = sctx->count & 0x3f; | ||
| 91 | padlen = (index < 56) ? (56 - index) : ((64+56) - index); | ||
| 92 | sha1_update(desc, padding, padlen); | ||
| 93 | |||
| 94 | /* Append length */ | ||
| 95 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
| 96 | |||
| 97 | /* Store state in digest */ | ||
| 98 | for (i = 0; i < 5; i++) | ||
| 99 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
| 100 | |||
| 101 | /* Wipe context */ | ||
| 102 | memset(sctx, 0, sizeof *sctx); | ||
| 103 | |||
| 104 | return 0; | ||
| 105 | } | ||
| 106 | |||
| 107 | static int sha1_export(struct shash_desc *desc, void *out) | ||
| 108 | { | ||
| 109 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
| 110 | |||
| 111 | memcpy(out, sctx, sizeof(*sctx)); | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
| 116 | { | ||
| 117 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
| 118 | |||
| 119 | memcpy(sctx, in, sizeof(*sctx)); | ||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | |||
| 123 | static struct shash_alg alg = { | ||
| 124 | .digestsize = SHA1_DIGEST_SIZE, | ||
| 125 | .init = sha1_init, | ||
| 126 | .update = sha1_update, | ||
| 127 | .final = sha1_final, | ||
| 128 | .export = sha1_export, | ||
| 129 | .import = sha1_import, | ||
| 130 | .descsize = sizeof(struct sha1_state), | ||
| 131 | .statesize = sizeof(struct sha1_state), | ||
| 132 | .base = { | ||
| 133 | .cra_name = "sha1", | ||
| 134 | .cra_driver_name= "sha1-powerpc", | ||
| 135 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
| 136 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
| 137 | .cra_module = THIS_MODULE, | ||
| 138 | } | ||
| 139 | }; | ||
| 140 | |||
| 141 | static int __init sha1_powerpc_mod_init(void) | ||
| 142 | { | ||
| 143 | return crypto_register_shash(&alg); | ||
| 144 | } | ||
| 145 | |||
| 146 | static void __exit sha1_powerpc_mod_fini(void) | ||
| 147 | { | ||
| 148 | crypto_unregister_shash(&alg); | ||
| 149 | } | ||
| 150 | |||
| 151 | module_init(sha1_powerpc_mod_init); | ||
| 152 | module_exit(sha1_powerpc_mod_fini); | ||
| 153 | |||
| 154 | MODULE_LICENSE("GPL"); | ||
| 155 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); | ||
| 156 | |||
| 157 | MODULE_ALIAS("sha1-powerpc"); | ||
diff --git a/crypto/Kconfig b/crypto/Kconfig index 4641d95651d3..8e6ae5ed8379 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
| @@ -479,6 +479,13 @@ config CRYPTO_SHA1_ARM | |||
| 479 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | 479 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented |
| 480 | using optimized ARM assembler. | 480 | using optimized ARM assembler. |
| 481 | 481 | ||
| 482 | config CRYPTO_SHA1_PPC | ||
| 483 | tristate "SHA1 digest algorithm (powerpc)" | ||
| 484 | depends on PPC | ||
| 485 | help | ||
| 486 | This is the powerpc hardware accelerated implementation of the | ||
| 487 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). | ||
| 488 | |||
| 482 | config CRYPTO_SHA256 | 489 | config CRYPTO_SHA256 |
| 483 | tristate "SHA224 and SHA256 digest algorithm" | 490 | tristate "SHA224 and SHA256 digest algorithm" |
| 484 | select CRYPTO_HASH | 491 | select CRYPTO_HASH |
