aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/sha512_generic.c
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2012-02-04 23:09:28 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2012-02-04 23:09:28 -0500
commit3a92d687c8015860a19213e3c102cad6b722f83c (patch)
tree9f133af75426149336883da050848c4024ef2928 /crypto/sha512_generic.c
parent58d7d18b5268febb8b1391c6dffc8e2aaa751fcd (diff)
crypto: sha512 - Avoid stack bloat on i386
Unfortunately in reducing W from 80 to 16 we ended up unrolling the loop twice. As gcc has issues dealing with 64-bit ops on i386 this means that we end up using even more stack space (>1K). This patch solves the W reduction by moving LOAD_OP/BLEND_OP into the loop itself, thus avoiding the need to duplicate it. While the stack space still isn't great (>0.5K) it is at least in the same ball park as the amount of stack used for our C sha1 implementation. Note that this patch basically reverts to the original code so the diff looks bigger than it really is. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/sha512_generic.c')
-rw-r--r--crypto/sha512_generic.c68
1 files changed, 32 insertions, 36 deletions
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 3edebfd4dbec..f04af931a682 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -89,46 +89,42 @@ sha512_transform(u64 *state, const u8 *input)
89 int i; 89 int i;
90 u64 W[16]; 90 u64 W[16];
91 91
92 /* load the input */
93 for (i = 0; i < 16; i++)
94 LOAD_OP(i, W, input);
95
96 /* load the state into our registers */ 92 /* load the state into our registers */
97 a=state[0]; b=state[1]; c=state[2]; d=state[3]; 93 a=state[0]; b=state[1]; c=state[2]; d=state[3];
98 e=state[4]; f=state[5]; g=state[6]; h=state[7]; 94 e=state[4]; f=state[5]; g=state[6]; h=state[7];
99 95
100#define SHA512_0_15(i, a, b, c, d, e, f, g, h) \ 96 /* now iterate */
101 t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i]; \ 97 for (i=0; i<80; i+=8) {
102 t2 = e0(a) + Maj(a, b, c); \ 98 if (!(i & 8)) {
103 d += t1; \ 99 int j;
104 h = t1 + t2 100
105 101 if (i < 16) {
106#define SHA512_16_79(i, a, b, c, d, e, f, g, h) \ 102 /* load the input */
107 BLEND_OP(i, W); \ 103 for (j = 0; j < 16; j++)
108 t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)&15]; \ 104 LOAD_OP(i + j, W, input);
109 t2 = e0(a) + Maj(a, b, c); \ 105 } else {
110 d += t1; \ 106 for (j = 0; j < 16; j++) {
111 h = t1 + t2 107 BLEND_OP(i + j, W);
112 108 }
113 for (i = 0; i < 16; i += 8) { 109 }
114 SHA512_0_15(i, a, b, c, d, e, f, g, h); 110 }
115 SHA512_0_15(i + 1, h, a, b, c, d, e, f, g); 111
116 SHA512_0_15(i + 2, g, h, a, b, c, d, e, f); 112 t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i ] + W[(i & 15)];
117 SHA512_0_15(i + 3, f, g, h, a, b, c, d, e); 113 t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
118 SHA512_0_15(i + 4, e, f, g, h, a, b, c, d); 114 t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
119 SHA512_0_15(i + 5, d, e, f, g, h, a, b, c); 115 t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
120 SHA512_0_15(i + 6, c, d, e, f, g, h, a, b); 116 t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
121 SHA512_0_15(i + 7, b, c, d, e, f, g, h, a); 117 t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
122 } 118 t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
123 for (i = 16; i < 80; i += 8) { 119 t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
124 SHA512_16_79(i, a, b, c, d, e, f, g, h); 120 t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
125 SHA512_16_79(i + 1, h, a, b, c, d, e, f, g); 121 t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
126 SHA512_16_79(i + 2, g, h, a, b, c, d, e, f); 122 t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
127 SHA512_16_79(i + 3, f, g, h, a, b, c, d, e); 123 t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
128 SHA512_16_79(i + 4, e, f, g, h, a, b, c, d); 124 t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
129 SHA512_16_79(i + 5, d, e, f, g, h, a, b, c); 125 t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
130 SHA512_16_79(i + 6, c, d, e, f, g, h, a, b); 126 t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
131 SHA512_16_79(i + 7, b, c, d, e, f, g, h, a); 127 t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
132 } 128 }
133 129
134 state[0] += a; state[1] += b; state[2] += c; state[3] += d; 130 state[0] += a; state[1] += b; state[2] += c; state[3] += d;