diff options
author | Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 2012-10-20 08:06:51 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2012-10-24 09:10:55 -0400 |
commit | facd416fbc1cdee357730909a414898934f16ae1 (patch) | |
tree | e590e4bdd151c06c820bdcc1635b0660e525f84d /arch/x86/crypto/serpent_avx_glue.c | |
parent | 8435a3c3003c00c43f1b267368bbe1d8dada35d1 (diff) |
crypto: serpent/avx - avoid using temporary stack buffers
Introduce new assembler functions to avoid use temporary stack buffers in glue
code. This also allows use of vector instructions for xoring output in CTR and
CBC modes and construction of IVs for CTR mode.
ECB mode sees ~0.5% decrease in speed because added one extra function
call. CBC mode decryption and CTR mode benefit from vector operations
and gain ~3%.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/serpent_avx_glue.c')
-rw-r--r-- | arch/x86/crypto/serpent_avx_glue.c | 43 |
1 files changed, 6 insertions, 37 deletions
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 2aa31ade1e68..52abaaf28e7f 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -42,20 +42,6 @@ | |||
42 | #include <asm/crypto/ablk_helper.h> | 42 | #include <asm/crypto/ablk_helper.h> |
43 | #include <asm/crypto/glue_helper.h> | 43 | #include <asm/crypto/glue_helper.h> |
44 | 44 | ||
45 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
46 | { | ||
47 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
48 | unsigned int j; | ||
49 | |||
50 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
51 | ivs[j] = src[j]; | ||
52 | |||
53 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
54 | |||
55 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
56 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
57 | } | ||
58 | |||
59 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 45 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
60 | { | 46 | { |
61 | be128 ctrblk; | 47 | be128 ctrblk; |
@@ -67,30 +53,13 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | |||
67 | u128_xor(dst, src, (u128 *)&ctrblk); | 53 | u128_xor(dst, src, (u128 *)&ctrblk); |
68 | } | 54 | } |
69 | 55 | ||
70 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
71 | le128 *iv) | ||
72 | { | ||
73 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
74 | unsigned int i; | ||
75 | |||
76 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
77 | if (dst != src) | ||
78 | dst[i] = src[i]; | ||
79 | |||
80 | le128_to_be128(&ctrblks[i], iv); | ||
81 | le128_inc(iv); | ||
82 | } | ||
83 | |||
84 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
85 | } | ||
86 | |||
87 | static const struct common_glue_ctx serpent_enc = { | 56 | static const struct common_glue_ctx serpent_enc = { |
88 | .num_funcs = 2, | 57 | .num_funcs = 2, |
89 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | 58 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
90 | 59 | ||
91 | .funcs = { { | 60 | .funcs = { { |
92 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | 61 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
93 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | 62 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } |
94 | }, { | 63 | }, { |
95 | .num_blocks = 1, | 64 | .num_blocks = 1, |
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | 65 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } |
@@ -103,7 +72,7 @@ static const struct common_glue_ctx serpent_ctr = { | |||
103 | 72 | ||
104 | .funcs = { { | 73 | .funcs = { { |
105 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | 74 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
106 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | 75 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } |
107 | }, { | 76 | }, { |
108 | .num_blocks = 1, | 77 | .num_blocks = 1, |
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | 78 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } |
@@ -116,7 +85,7 @@ static const struct common_glue_ctx serpent_dec = { | |||
116 | 85 | ||
117 | .funcs = { { | 86 | .funcs = { { |
118 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | 87 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
119 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | 88 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } |
120 | }, { | 89 | }, { |
121 | .num_blocks = 1, | 90 | .num_blocks = 1, |
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | 91 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } |
@@ -129,7 +98,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { | |||
129 | 98 | ||
130 | .funcs = { { | 99 | .funcs = { { |
131 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | 100 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
132 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | 101 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } |
133 | }, { | 102 | }, { |
134 | .num_blocks = 1, | 103 | .num_blocks = 1, |
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | 104 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } |
@@ -193,7 +162,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
193 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | 162 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); |
194 | 163 | ||
195 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | 164 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { |
196 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | 165 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); |
197 | return; | 166 | return; |
198 | } | 167 | } |
199 | 168 | ||
@@ -210,7 +179,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
210 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | 179 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); |
211 | 180 | ||
212 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | 181 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { |
213 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | 182 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); |
214 | return; | 183 | return; |
215 | } | 184 | } |
216 | 185 | ||