diff options
author | Michael Ellerman <michael@ellerman.id.au> | 2012-09-13 19:00:49 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2013-01-09 22:43:45 -0500 |
commit | 323a6bf1d6f4ec7907d9d8aacb4ae9590f755dda (patch) | |
tree | 90b75e02d5d9eb74b424155ae726eef5e7f16ff3 | |
parent | 5c49985c21bba4d2f899e3a97121868a5c58a876 (diff) |
powerpc: Add a powerpc implementation of SHA-1
This patch adds a crypto driver which provides a powerpc accelerated
implementation of SHA-1, accelerated in that it is written in asm.
Original patch by Paul, minor fixups for upstream by moi.
Lightly tested on 64-bit with the test program here:
http://michael.ellerman.id.au/files/junkcode/sha1test.c
Seems to work, and is "not slower" than the generic version.
Needs testing on 32-bit.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/crypto/Makefile | 9 | ||||
-rw-r--r-- | arch/powerpc/crypto/sha1-powerpc-asm.S | 179 | ||||
-rw-r--r-- | arch/powerpc/crypto/sha1.c | 157 | ||||
-rw-r--r-- | crypto/Kconfig | 7 |
5 files changed, 353 insertions, 0 deletions
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b639852116fa..ba45cad088c9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile | |||
@@ -143,6 +143,7 @@ core-y += arch/powerpc/kernel/ \ | |||
143 | arch/powerpc/sysdev/ \ | 143 | arch/powerpc/sysdev/ \ |
144 | arch/powerpc/platforms/ \ | 144 | arch/powerpc/platforms/ \ |
145 | arch/powerpc/math-emu/ \ | 145 | arch/powerpc/math-emu/ \ |
146 | arch/powerpc/crypto/ \ | ||
146 | arch/powerpc/net/ | 147 | arch/powerpc/net/ |
147 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ | 148 | core-$(CONFIG_XMON) += arch/powerpc/xmon/ |
148 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ | 149 | core-$(CONFIG_KVM) += arch/powerpc/kvm/ |
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile new file mode 100644 index 000000000000..2926fb9c570a --- /dev/null +++ b/arch/powerpc/crypto/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # powerpc/crypto/Makefile | ||
3 | # | ||
4 | # Arch-specific CryptoAPI modules. | ||
5 | # | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o | ||
8 | |||
9 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o | ||
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S new file mode 100644 index 000000000000..a5f8264d2d3c --- /dev/null +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S | |||
@@ -0,0 +1,179 @@ | |||
1 | /* | ||
2 | * SHA-1 implementation for PowerPC. | ||
3 | * | ||
4 | * Copyright (C) 2005 Paul Mackerras <paulus@samba.org> | ||
5 | */ | ||
6 | |||
7 | #include <asm/ppc_asm.h> | ||
8 | #include <asm/asm-offsets.h> | ||
9 | |||
10 | /* | ||
11 | * We roll the registers for T, A, B, C, D, E around on each | ||
12 | * iteration; T on iteration t is A on iteration t+1, and so on. | ||
13 | * We use registers 7 - 12 for this. | ||
14 | */ | ||
15 | #define RT(t) ((((t)+5)%6)+7) | ||
16 | #define RA(t) ((((t)+4)%6)+7) | ||
17 | #define RB(t) ((((t)+3)%6)+7) | ||
18 | #define RC(t) ((((t)+2)%6)+7) | ||
19 | #define RD(t) ((((t)+1)%6)+7) | ||
20 | #define RE(t) ((((t)+0)%6)+7) | ||
21 | |||
22 | /* We use registers 16 - 31 for the W values */ | ||
23 | #define W(t) (((t)%16)+16) | ||
24 | |||
25 | #define LOADW(t) \ | ||
26 | lwz W(t),(t)*4(r4) | ||
27 | |||
28 | #define STEPD0_LOAD(t) \ | ||
29 | andc r0,RD(t),RB(t); \ | ||
30 | and r6,RB(t),RC(t); \ | ||
31 | rotlwi RT(t),RA(t),5; \ | ||
32 | or r6,r6,r0; \ | ||
33 | add r0,RE(t),r15; \ | ||
34 | add RT(t),RT(t),r6; \ | ||
35 | add r14,r0,W(t); \ | ||
36 | lwz W((t)+4),((t)+4)*4(r4); \ | ||
37 | rotlwi RB(t),RB(t),30; \ | ||
38 | add RT(t),RT(t),r14 | ||
39 | |||
40 | #define STEPD0_UPDATE(t) \ | ||
41 | and r6,RB(t),RC(t); \ | ||
42 | andc r0,RD(t),RB(t); \ | ||
43 | rotlwi RT(t),RA(t),5; \ | ||
44 | rotlwi RB(t),RB(t),30; \ | ||
45 | or r6,r6,r0; \ | ||
46 | add r0,RE(t),r15; \ | ||
47 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
48 | add RT(t),RT(t),r6; \ | ||
49 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
50 | add r0,r0,W(t); \ | ||
51 | xor W((t)+4),W((t)+4),r5; \ | ||
52 | add RT(t),RT(t),r0; \ | ||
53 | rotlwi W((t)+4),W((t)+4),1 | ||
54 | |||
55 | #define STEPD1(t) \ | ||
56 | xor r6,RB(t),RC(t); \ | ||
57 | rotlwi RT(t),RA(t),5; \ | ||
58 | rotlwi RB(t),RB(t),30; \ | ||
59 | xor r6,r6,RD(t); \ | ||
60 | add r0,RE(t),r15; \ | ||
61 | add RT(t),RT(t),r6; \ | ||
62 | add r0,r0,W(t); \ | ||
63 | add RT(t),RT(t),r0 | ||
64 | |||
65 | #define STEPD1_UPDATE(t) \ | ||
66 | xor r6,RB(t),RC(t); \ | ||
67 | rotlwi RT(t),RA(t),5; \ | ||
68 | rotlwi RB(t),RB(t),30; \ | ||
69 | xor r6,r6,RD(t); \ | ||
70 | add r0,RE(t),r15; \ | ||
71 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
72 | add RT(t),RT(t),r6; \ | ||
73 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
74 | add r0,r0,W(t); \ | ||
75 | xor W((t)+4),W((t)+4),r5; \ | ||
76 | add RT(t),RT(t),r0; \ | ||
77 | rotlwi W((t)+4),W((t)+4),1 | ||
78 | |||
79 | #define STEPD2_UPDATE(t) \ | ||
80 | and r6,RB(t),RC(t); \ | ||
81 | and r0,RB(t),RD(t); \ | ||
82 | rotlwi RT(t),RA(t),5; \ | ||
83 | or r6,r6,r0; \ | ||
84 | rotlwi RB(t),RB(t),30; \ | ||
85 | and r0,RC(t),RD(t); \ | ||
86 | xor r5,W((t)+4-3),W((t)+4-8); \ | ||
87 | or r6,r6,r0; \ | ||
88 | xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
89 | add r0,RE(t),r15; \ | ||
90 | add RT(t),RT(t),r6; \ | ||
91 | add r0,r0,W(t); \ | ||
92 | xor W((t)+4),W((t)+4),r5; \ | ||
93 | add RT(t),RT(t),r0; \ | ||
94 | rotlwi W((t)+4),W((t)+4),1 | ||
95 | |||
96 | #define STEP0LD4(t) \ | ||
97 | STEPD0_LOAD(t); \ | ||
98 | STEPD0_LOAD((t)+1); \ | ||
99 | STEPD0_LOAD((t)+2); \ | ||
100 | STEPD0_LOAD((t)+3) | ||
101 | |||
102 | #define STEPUP4(t, fn) \ | ||
103 | STEP##fn##_UPDATE(t); \ | ||
104 | STEP##fn##_UPDATE((t)+1); \ | ||
105 | STEP##fn##_UPDATE((t)+2); \ | ||
106 | STEP##fn##_UPDATE((t)+3) | ||
107 | |||
108 | #define STEPUP20(t, fn) \ | ||
109 | STEPUP4(t, fn); \ | ||
110 | STEPUP4((t)+4, fn); \ | ||
111 | STEPUP4((t)+8, fn); \ | ||
112 | STEPUP4((t)+12, fn); \ | ||
113 | STEPUP4((t)+16, fn) | ||
114 | |||
115 | _GLOBAL(powerpc_sha_transform) | ||
116 | PPC_STLU r1,-STACKFRAMESIZE(r1) | ||
117 | SAVE_8GPRS(14, r1) | ||
118 | SAVE_10GPRS(22, r1) | ||
119 | |||
120 | /* Load up A - E */ | ||
121 | lwz RA(0),0(r3) /* A */ | ||
122 | lwz RB(0),4(r3) /* B */ | ||
123 | lwz RC(0),8(r3) /* C */ | ||
124 | lwz RD(0),12(r3) /* D */ | ||
125 | lwz RE(0),16(r3) /* E */ | ||
126 | |||
127 | LOADW(0) | ||
128 | LOADW(1) | ||
129 | LOADW(2) | ||
130 | LOADW(3) | ||
131 | |||
132 | lis r15,0x5a82 /* K0-19 */ | ||
133 | ori r15,r15,0x7999 | ||
134 | STEP0LD4(0) | ||
135 | STEP0LD4(4) | ||
136 | STEP0LD4(8) | ||
137 | STEPUP4(12, D0) | ||
138 | STEPUP4(16, D0) | ||
139 | |||
140 | lis r15,0x6ed9 /* K20-39 */ | ||
141 | ori r15,r15,0xeba1 | ||
142 | STEPUP20(20, D1) | ||
143 | |||
144 | lis r15,0x8f1b /* K40-59 */ | ||
145 | ori r15,r15,0xbcdc | ||
146 | STEPUP20(40, D2) | ||
147 | |||
148 | lis r15,0xca62 /* K60-79 */ | ||
149 | ori r15,r15,0xc1d6 | ||
150 | STEPUP4(60, D1) | ||
151 | STEPUP4(64, D1) | ||
152 | STEPUP4(68, D1) | ||
153 | STEPUP4(72, D1) | ||
154 | lwz r20,16(r3) | ||
155 | STEPD1(76) | ||
156 | lwz r19,12(r3) | ||
157 | STEPD1(77) | ||
158 | lwz r18,8(r3) | ||
159 | STEPD1(78) | ||
160 | lwz r17,4(r3) | ||
161 | STEPD1(79) | ||
162 | |||
163 | lwz r16,0(r3) | ||
164 | add r20,RE(80),r20 | ||
165 | add RD(0),RD(80),r19 | ||
166 | add RC(0),RC(80),r18 | ||
167 | add RB(0),RB(80),r17 | ||
168 | add RA(0),RA(80),r16 | ||
169 | mr RE(0),r20 | ||
170 | stw RA(0),0(r3) | ||
171 | stw RB(0),4(r3) | ||
172 | stw RC(0),8(r3) | ||
173 | stw RD(0),12(r3) | ||
174 | stw RE(0),16(r3) | ||
175 | |||
176 | REST_8GPRS(14, r1) | ||
177 | REST_10GPRS(22, r1) | ||
178 | addi r1,r1,STACKFRAMESIZE | ||
179 | blr | ||
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c new file mode 100644 index 000000000000..f9e8b9491efc --- /dev/null +++ b/arch/powerpc/crypto/sha1.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * powerpc implementation of the SHA1 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Derived from cryptoapi implementation, adapted for in-place | ||
7 | * scatterlist interface. | ||
8 | * | ||
9 | * Derived from "crypto/sha1.c" | ||
10 | * Copyright (c) Alan Smithee. | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | * | ||
19 | */ | ||
20 | #include <crypto/internal/hash.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/mm.h> | ||
24 | #include <linux/cryptohash.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <crypto/sha.h> | ||
27 | #include <asm/byteorder.h> | ||
28 | |||
29 | extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp); | ||
30 | |||
31 | static int sha1_init(struct shash_desc *desc) | ||
32 | { | ||
33 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
34 | |||
35 | *sctx = (struct sha1_state){ | ||
36 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
37 | }; | ||
38 | |||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
43 | unsigned int len) | ||
44 | { | ||
45 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
46 | unsigned int partial, done; | ||
47 | const u8 *src; | ||
48 | |||
49 | partial = sctx->count & 0x3f; | ||
50 | sctx->count += len; | ||
51 | done = 0; | ||
52 | src = data; | ||
53 | |||
54 | if ((partial + len) > 63) { | ||
55 | u32 temp[SHA_WORKSPACE_WORDS]; | ||
56 | |||
57 | if (partial) { | ||
58 | done = -partial; | ||
59 | memcpy(sctx->buffer + partial, data, done + 64); | ||
60 | src = sctx->buffer; | ||
61 | } | ||
62 | |||
63 | do { | ||
64 | powerpc_sha_transform(sctx->state, src, temp); | ||
65 | done += 64; | ||
66 | src = data + done; | ||
67 | } while (done + 63 < len); | ||
68 | |||
69 | memset(temp, 0, sizeof(temp)); | ||
70 | partial = 0; | ||
71 | } | ||
72 | memcpy(sctx->buffer + partial, src, len - done); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | |||
78 | /* Add padding and return the message digest. */ | ||
79 | static int sha1_final(struct shash_desc *desc, u8 *out) | ||
80 | { | ||
81 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
82 | __be32 *dst = (__be32 *)out; | ||
83 | u32 i, index, padlen; | ||
84 | __be64 bits; | ||
85 | static const u8 padding[64] = { 0x80, }; | ||
86 | |||
87 | bits = cpu_to_be64(sctx->count << 3); | ||
88 | |||
89 | /* Pad out to 56 mod 64 */ | ||
90 | index = sctx->count & 0x3f; | ||
91 | padlen = (index < 56) ? (56 - index) : ((64+56) - index); | ||
92 | sha1_update(desc, padding, padlen); | ||
93 | |||
94 | /* Append length */ | ||
95 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
96 | |||
97 | /* Store state in digest */ | ||
98 | for (i = 0; i < 5; i++) | ||
99 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
100 | |||
101 | /* Wipe context */ | ||
102 | memset(sctx, 0, sizeof *sctx); | ||
103 | |||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | static int sha1_export(struct shash_desc *desc, void *out) | ||
108 | { | ||
109 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
110 | |||
111 | memcpy(out, sctx, sizeof(*sctx)); | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
116 | { | ||
117 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
118 | |||
119 | memcpy(sctx, in, sizeof(*sctx)); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | static struct shash_alg alg = { | ||
124 | .digestsize = SHA1_DIGEST_SIZE, | ||
125 | .init = sha1_init, | ||
126 | .update = sha1_update, | ||
127 | .final = sha1_final, | ||
128 | .export = sha1_export, | ||
129 | .import = sha1_import, | ||
130 | .descsize = sizeof(struct sha1_state), | ||
131 | .statesize = sizeof(struct sha1_state), | ||
132 | .base = { | ||
133 | .cra_name = "sha1", | ||
134 | .cra_driver_name= "sha1-powerpc", | ||
135 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
136 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
137 | .cra_module = THIS_MODULE, | ||
138 | } | ||
139 | }; | ||
140 | |||
141 | static int __init sha1_powerpc_mod_init(void) | ||
142 | { | ||
143 | return crypto_register_shash(&alg); | ||
144 | } | ||
145 | |||
146 | static void __exit sha1_powerpc_mod_fini(void) | ||
147 | { | ||
148 | crypto_unregister_shash(&alg); | ||
149 | } | ||
150 | |||
151 | module_init(sha1_powerpc_mod_init); | ||
152 | module_exit(sha1_powerpc_mod_fini); | ||
153 | |||
154 | MODULE_LICENSE("GPL"); | ||
155 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); | ||
156 | |||
157 | MODULE_ALIAS("sha1-powerpc"); | ||
diff --git a/crypto/Kconfig b/crypto/Kconfig index 4641d95651d3..8e6ae5ed8379 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -479,6 +479,13 @@ config CRYPTO_SHA1_ARM | |||
479 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | 479 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented |
480 | using optimized ARM assembler. | 480 | using optimized ARM assembler. |
481 | 481 | ||
482 | config CRYPTO_SHA1_PPC | ||
483 | tristate "SHA1 digest algorithm (powerpc)" | ||
484 | depends on PPC | ||
485 | help | ||
486 | This is the powerpc hardware accelerated implementation of the | ||
487 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). | ||
488 | |||
482 | config CRYPTO_SHA256 | 489 | config CRYPTO_SHA256 |
483 | tristate "SHA224 and SHA256 digest algorithm" | 490 | tristate "SHA224 and SHA256 digest algorithm" |
484 | select CRYPTO_HASH | 491 | select CRYPTO_HASH |