arm: remove "optimized" SHA1 routines

Since commit 1eb19a12bd22 ("lib/sha1: use the git implementation of SHA-1"), the ARM SHA1 routines no longer work. The reason? They depended on the larger 320-byte workspace, and now the sha1 workspace is just 16 words (64 bytes). So the assembly version would overwrite the stack randomly. The optimized asm version is also probably slower than the new improved C version, so there's no reason to keep it around. At least that was the case in git, where what appears to be the same assembly language version was removed two years ago because the optimized C BLK_SHA1 code was faster. Reported-and-tested-by: Joachim Eastwood <manabian@gmail.com> Cc: Andreas Schwab <schwab@linux-m68k.org> Cc: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-08-07 17:07:03 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-08-07 17:07:03 -0400
commit: 4d4487140d34c1b9b321889d2d209321b0da6643 (patch)
tree: 1f1fa8d67a9f36980e457b77234d89f6761098c9
parent: 3295514841c2112d94451ba5deaf54f5afb78ea9 (diff)
2 files changed, 1 insertions, 212 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 59ff42ddf0ae..cf73a7f742dd 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -12,7 +12,7 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
                   strchr.o strrchr.o                                 \
                   testchangebit.o testclearbit.o testsetbit.o        \
                   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
-                   ucmpdi2.o lib1funcs.o div64.o sha1.o               \
+                   ucmpdi2.o lib1funcs.o div64.o                      \
                   io-readsb.o io-writesb.o io-readsl.o io-writesl.o
 mmu-y   := clear_user.o copy_page.o getuser.o putuser.o
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
deleted file mode 100644
index eb0edb80d7b8..000000000000
--- a/arch/arm/lib/sha1.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- *  linux/arch/arm/lib/sha1.S
- *
- *  SHA transform optimized for ARM
- *
- *  Copyright:  (C) 2005 by Nicolas Pitre <nico@fluxnic.net>
- *  Created:    September 17, 2005
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  The reference implementation for this code is linux/lib/sha1.c
- */
-#include <linux/linkage.h>
-        .text
-/*
- * void sha_transform(__u32 *digest, const char *in, __u32 *W)
- *
- * Note: the "in" ptr may be unaligned.
- */
-ENTRY(sha_transform)
-        stmfd   sp!, {r4 - r8, lr}
-        @ for (i = 0; i < 16; i++)
-        @         W[i] = be32_to_cpu(in[i]);
-#ifdef __ARMEB__
-        mov     r4, r0
-        mov     r0, r2
-        mov     r2, #64
-        bl      memcpy
-        mov     r2, r0
-        mov     r0, r4
-#else
-        mov     r3, r2
-        mov     lr, #16
-1:      ldrb    r4, [r1], #1
-        ldrb    r5, [r1], #1
-        ldrb    r6, [r1], #1
-        ldrb    r7, [r1], #1
-        subs    lr, lr, #1
-        orr     r5, r5, r4, lsl #8
-        orr     r6, r6, r5, lsl #8
-        orr     r7, r7, r6, lsl #8
-        str     r7, [r3], #4
-        bne     1b
-#endif
-        @ for (i = 0; i < 64; i++)
-        @         W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
-        sub     r3, r2, #4
-        mov     lr, #64
-2:      ldr     r4, [r3, #4]!
-        subs    lr, lr, #1
-        ldr     r5, [r3, #8]
-        ldr     r6, [r3, #32]
-        ldr     r7, [r3, #52]
-        eor     r4, r4, r5
-        eor     r4, r4, r6
-        eor     r4, r4, r7
-        mov     r4, r4, ror #31
-        str     r4, [r3, #64]
-        bne     2b
-        /*
-         * The SHA functions are:
-         *
-         * f1(B,C,D) = (D ^ (B & (C ^ D)))
-         * f2(B,C,D) = (B ^ C ^ D)
-         * f3(B,C,D) = ((B & C) | (D & (B | C)))
-         *
-         * Then the sub-blocks are processed as follows:
-         *
-         * A' = ror(A, 27) + f(B,C,D) + E + K + *W++
-         * B' = A
-         * C' = ror(B, 2)
-         * D' = C
-         * E' = D
-         *
-         * We therefore unroll each loop 5 times to avoid register shuffling.
-         * Also the ror for C (and also D and E which are successivelyderived
-         * from it) is applied in place to cut on an additional mov insn for
-         * each round.
-         */
-        .macro  sha_f1, A, B, C, D, E
-        ldr     r3, [r2], #4
-        eor     ip, \C, \D
-        add     \E, r1, \E, ror #2
-        and     ip, \B, ip, ror #2
-        add     \E, \E, \A, ror #27
-        eor     ip, ip, \D, ror #2
-        add     \E, \E, r3
-        add     \E, \E, ip
-        .endm
-        .macro  sha_f2, A, B, C, D, E
-        ldr     r3, [r2], #4
-        add     \E, r1, \E, ror #2
-        eor     ip, \B, \C, ror #2
-        add     \E, \E, \A, ror #27
-        eor     ip, ip, \D, ror #2
-        add     \E, \E, r3
-        add     \E, \E, ip
-        .endm
-        .macro  sha_f3, A, B, C, D, E
-        ldr     r3, [r2], #4
-        add     \E, r1, \E, ror #2
-        orr     ip, \B, \C, ror #2
-        add     \E, \E, \A, ror #27
-        and     ip, ip, \D, ror #2
-        add     \E, \E, r3
-        and     r3, \B, \C, ror #2
-        orr     ip, ip, r3
-        add     \E, \E, ip
-        .endm
-        ldmia   r0, {r4 - r8}
-        mov     lr, #4
-        ldr     r1, .L_sha_K + 0
-        /* adjust initial values */
-        mov     r6, r6, ror #30
-        mov     r7, r7, ror #30
-        mov     r8, r8, ror #30
-3:      subs    lr, lr, #1
-        sha_f1  r4, r5, r6, r7, r8
-        sha_f1  r8, r4, r5, r6, r7
-        sha_f1  r7, r8, r4, r5, r6
-        sha_f1  r6, r7, r8, r4, r5
-        sha_f1  r5, r6, r7, r8, r4
-        bne     3b
-        ldr     r1, .L_sha_K + 4
-        mov     lr, #4
-4:      subs    lr, lr, #1
-        sha_f2  r4, r5, r6, r7, r8
-        sha_f2  r8, r4, r5, r6, r7
-        sha_f2  r7, r8, r4, r5, r6
-        sha_f2  r6, r7, r8, r4, r5
-        sha_f2  r5, r6, r7, r8, r4
-        bne     4b
-        ldr     r1, .L_sha_K + 8
-        mov     lr, #4
-5:      subs    lr, lr, #1
-        sha_f3  r4, r5, r6, r7, r8
-        sha_f3  r8, r4, r5, r6, r7
-        sha_f3  r7, r8, r4, r5, r6
-        sha_f3  r6, r7, r8, r4, r5
-        sha_f3  r5, r6, r7, r8, r4
-        bne     5b
-        ldr     r1, .L_sha_K + 12
-        mov     lr, #4
-6:      subs    lr, lr, #1
-        sha_f2  r4, r5, r6, r7, r8
-        sha_f2  r8, r4, r5, r6, r7
-        sha_f2  r7, r8, r4, r5, r6
-        sha_f2  r6, r7, r8, r4, r5
-        sha_f2  r5, r6, r7, r8, r4
-        bne     6b
-        ldmia   r0, {r1, r2, r3, ip, lr}
-        add     r4, r1, r4
-        add     r5, r2, r5
-        add     r6, r3, r6, ror #2
-        add     r7, ip, r7, ror #2
-        add     r8, lr, r8, ror #2
-        stmia   r0, {r4 - r8}
-        ldmfd   sp!, {r4 - r8, pc}
-ENDPROC(sha_transform)
-        .align  2
-.L_sha_K:
-        .word   0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
-/*
- * void sha_init(__u32 *buf)
- */
-        .align  2
-.L_sha_initial_digest:
-        .word   0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
-ENTRY(sha_init)
-        str     lr, [sp, #-4]!
-        adr     r1, .L_sha_initial_digest
-        ldmia   r1, {r1, r2, r3, ip, lr}
-        stmia   r0, {r1, r2, r3, ip, lr}
-        ldr     pc, [sp], #4
-ENDPROC(sha_init)
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-08-07 17:07:03 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-08-07 17:07:03 -0400
commit	4d4487140d34c1b9b321889d2d209321b0da6643 (patch)
tree	1f1fa8d67a9f36980e457b77234d89f6761098c9
parent	3295514841c2112d94451ba5deaf54f5afb78ea9 (diff)

diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 59ff42ddf0ae..cf73a7f742dd 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
12	strchr.o strrchr.o \	12	strchr.o strrchr.o \
13	testchangebit.o testclearbit.o testsetbit.o \	13	testchangebit.o testclearbit.o testsetbit.o \
14	ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \	14	ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
15	ucmpdi2.o lib1funcs.o div64.o sha1.o \	15	ucmpdi2.o lib1funcs.o div64.o \
16	io-readsb.o io-writesb.o io-readsl.o io-writesl.o	16	io-readsb.o io-writesb.o io-readsl.o io-writesl.o
17		17
18	mmu-y := clear_user.o copy_page.o getuser.o putuser.o	18	mmu-y := clear_user.o copy_page.o getuser.o putuser.o


diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S deleted file mode 100644 index eb0edb80d7b8..000000000000 --- a/arch/arm/lib/sha1.S +++ /dev/null
@@ -1,211 +0,0 @@
1	/*
2	* linux/arch/arm/lib/sha1.S
3	*
4	* SHA transform optimized for ARM
5	*
6	* Copyright: (C) 2005 by Nicolas Pitre <nico@fluxnic.net>
7	* Created: September 17, 2005
8	*
9	* This program is free software; you can redistribute it and/or modify
10	* it under the terms of the GNU General Public License version 2 as
11	* published by the Free Software Foundation.
12	*
13	* The reference implementation for this code is linux/lib/sha1.c
14	*/
15
16	#include <linux/linkage.h>
17
18	.text
19
20
21	/*
22	* void sha_transform(__u32 digest, const char in, __u32 *W)
23	*
24	* Note: the "in" ptr may be unaligned.
25	*/
26
27	ENTRY(sha_transform)
28
29	stmfd sp!, {r4 - r8, lr}
30
31	@ for (i = 0; i < 16; i++)
32	@ W[i] = be32_to_cpu(in[i]);
33
34	#ifdef __ARMEB__
35	mov r4, r0
36	mov r0, r2
37	mov r2, #64
38	bl memcpy
39	mov r2, r0
40	mov r0, r4
41	#else
42	mov r3, r2
43	mov lr, #16
44	1: ldrb r4, [r1], #1
45	ldrb r5, [r1], #1
46	ldrb r6, [r1], #1
47	ldrb r7, [r1], #1
48	subs lr, lr, #1
49	orr r5, r5, r4, lsl #8
50	orr r6, r6, r5, lsl #8
51	orr r7, r7, r6, lsl #8
52	str r7, [r3], #4
53	bne 1b
54	#endif
55
56	@ for (i = 0; i < 64; i++)
57	@ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31);
58
59	sub r3, r2, #4
60	mov lr, #64
61	2: ldr r4, [r3, #4]!
62	subs lr, lr, #1
63	ldr r5, [r3, #8]
64	ldr r6, [r3, #32]
65	ldr r7, [r3, #52]
66	eor r4, r4, r5
67	eor r4, r4, r6
68	eor r4, r4, r7
69	mov r4, r4, ror #31
70	str r4, [r3, #64]
71	bne 2b
72
73	/*
74	* The SHA functions are:
75	*
76	* f1(B,C,D) = (D ^ (B & (C ^ D)))
77	* f2(B,C,D) = (B ^ C ^ D)
78	* f3(B,C,D) = ((B & C) \| (D & (B \| C)))
79	*
80	* Then the sub-blocks are processed as follows:
81	*
82	* A' = ror(A, 27) + f(B,C,D) + E + K + *W++
83	* B' = A
84	* C' = ror(B, 2)
85	* D' = C
86	* E' = D
87	*
88	* We therefore unroll each loop 5 times to avoid register shuffling.
89	* Also the ror for C (and also D and E which are successivelyderived
90	* from it) is applied in place to cut on an additional mov insn for
91	* each round.
92	*/
93
94	.macro sha_f1, A, B, C, D, E
95	ldr r3, [r2], #4
96	eor ip, \C, \D
97	add \E, r1, \E, ror #2
98	and ip, \B, ip, ror #2
99	add \E, \E, \A, ror #27
100	eor ip, ip, \D, ror #2
101	add \E, \E, r3
102	add \E, \E, ip
103	.endm
104
105	.macro sha_f2, A, B, C, D, E
106	ldr r3, [r2], #4
107	add \E, r1, \E, ror #2
108	eor ip, \B, \C, ror #2
109	add \E, \E, \A, ror #27
110	eor ip, ip, \D, ror #2
111	add \E, \E, r3
112	add \E, \E, ip
113	.endm
114
115	.macro sha_f3, A, B, C, D, E
116	ldr r3, [r2], #4
117	add \E, r1, \E, ror #2
118	orr ip, \B, \C, ror #2
119	add \E, \E, \A, ror #27
120	and ip, ip, \D, ror #2
121	add \E, \E, r3
122	and r3, \B, \C, ror #2
123	orr ip, ip, r3
124	add \E, \E, ip
125	.endm
126
127	ldmia r0, {r4 - r8}
128
129	mov lr, #4
130	ldr r1, .L_sha_K + 0
131
132	/* adjust initial values */
133	mov r6, r6, ror #30
134	mov r7, r7, ror #30
135	mov r8, r8, ror #30
136
137	3: subs lr, lr, #1
138	sha_f1 r4, r5, r6, r7, r8
139	sha_f1 r8, r4, r5, r6, r7
140	sha_f1 r7, r8, r4, r5, r6
141	sha_f1 r6, r7, r8, r4, r5
142	sha_f1 r5, r6, r7, r8, r4
143	bne 3b
144
145	ldr r1, .L_sha_K + 4
146	mov lr, #4
147
148	4: subs lr, lr, #1
149	sha_f2 r4, r5, r6, r7, r8
150	sha_f2 r8, r4, r5, r6, r7
151	sha_f2 r7, r8, r4, r5, r6
152	sha_f2 r6, r7, r8, r4, r5
153	sha_f2 r5, r6, r7, r8, r4
154	bne 4b
155
156	ldr r1, .L_sha_K + 8
157	mov lr, #4
158
159	5: subs lr, lr, #1
160	sha_f3 r4, r5, r6, r7, r8
161	sha_f3 r8, r4, r5, r6, r7
162	sha_f3 r7, r8, r4, r5, r6
163	sha_f3 r6, r7, r8, r4, r5
164	sha_f3 r5, r6, r7, r8, r4
165	bne 5b
166
167	ldr r1, .L_sha_K + 12
168	mov lr, #4
169
170	6: subs lr, lr, #1
171	sha_f2 r4, r5, r6, r7, r8
172	sha_f2 r8, r4, r5, r6, r7
173	sha_f2 r7, r8, r4, r5, r6
174	sha_f2 r6, r7, r8, r4, r5
175	sha_f2 r5, r6, r7, r8, r4
176	bne 6b
177
178	ldmia r0, {r1, r2, r3, ip, lr}
179	add r4, r1, r4
180	add r5, r2, r5
181	add r6, r3, r6, ror #2
182	add r7, ip, r7, ror #2
183	add r8, lr, r8, ror #2
184	stmia r0, {r4 - r8}
185
186	ldmfd sp!, {r4 - r8, pc}
187
188	ENDPROC(sha_transform)
189
190	.align 2
191	.L_sha_K:
192	.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
193
194
195	/*
196	* void sha_init(__u32 *buf)
197	*/
198
199	.align 2
200	.L_sha_initial_digest:
201	.word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
202
203	ENTRY(sha_init)
204
205	str lr, [sp, #-4]!
206	adr r1, .L_sha_initial_digest
207	ldmia r1, {r1, r2, r3, ip, lr}
208	stmia r0, {r1, r2, r3, ip, lr}
209	ldr pc, [sp], #4
210
211	ENDPROC(sha_init)