aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/cast6_avx_glue.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>2012-10-20 08:06:41 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2012-10-24 09:10:54 -0400
commitcba1cce05498d55f363c28cd2512368e95605518 (patch)
treea8728af2175a89a598b865c89dc2e3f08313c813 /arch/x86/crypto/cast6_avx_glue.c
parent58990986f1cba40c23c0c10592ace08616de3ffa (diff)
crypto: cast6/avx - avoid using temporary stack buffers
Introduce new assembler functions to avoid use temporary stack buffers in glue code. This also allows use of vector instructions for xoring output in CTR and CBC modes and construction of IVs for CTR mode. ECB mode sees ~0.5% decrease in speed because added one extra function call. CBC mode decryption and CTR mode benefit from vector operations and gain ~2%. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/cast6_avx_glue.c')
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c71
1 files changed, 13 insertions, 58 deletions
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 1dfd33b5b4fb..92f7ca24790a 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -40,43 +40,15 @@
40 40
41#define CAST6_PARALLEL_BLOCKS 8 41#define CAST6_PARALLEL_BLOCKS 8
42 42
43asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst, 43asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
44 const u8 *src, bool xor); 44 const u8 *src);
45asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst, 45asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
46 const u8 *src); 46 const u8 *src);
47 47
48static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst, 48asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
49 const u8 *src) 49 const u8 *src);
50{ 50asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
51 __cast6_enc_blk_8way(ctx, dst, src, false); 51 le128 *iv);
52}
53
54static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
55 const u8 *src)
56{
57 __cast6_enc_blk_8way(ctx, dst, src, true);
58}
59
60static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 cast6_dec_blk_8way(ctx, dst, src);
64}
65
66
67static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
68{
69 u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
70 unsigned int j;
71
72 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
73 ivs[j] = src[j];
74
75 cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
76
77 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
78 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
79}
80 52
81static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 53static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
82{ 54{
@@ -89,30 +61,13 @@ static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
89 u128_xor(dst, src, (u128 *)&ctrblk); 61 u128_xor(dst, src, (u128 *)&ctrblk);
90} 62}
91 63
92static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
93 le128 *iv)
94{
95 be128 ctrblks[CAST6_PARALLEL_BLOCKS];
96 unsigned int i;
97
98 for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
99 if (dst != src)
100 dst[i] = src[i];
101
102 le128_to_be128(&ctrblks[i], iv);
103 le128_inc(iv);
104 }
105
106 cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
107}
108
109static const struct common_glue_ctx cast6_enc = { 64static const struct common_glue_ctx cast6_enc = {
110 .num_funcs = 2, 65 .num_funcs = 2,
111 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, 66 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
112 67
113 .funcs = { { 68 .funcs = { {
114 .num_blocks = CAST6_PARALLEL_BLOCKS, 69 .num_blocks = CAST6_PARALLEL_BLOCKS,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) } 70 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
116 }, { 71 }, {
117 .num_blocks = 1, 72 .num_blocks = 1,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } 73 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
@@ -125,7 +80,7 @@ static const struct common_glue_ctx cast6_ctr = {
125 80
126 .funcs = { { 81 .funcs = { {
127 .num_blocks = CAST6_PARALLEL_BLOCKS, 82 .num_blocks = CAST6_PARALLEL_BLOCKS,
128 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) } 83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
129 }, { 84 }, {
130 .num_blocks = 1, 85 .num_blocks = 1,
131 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } 86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
@@ -138,7 +93,7 @@ static const struct common_glue_ctx cast6_dec = {
138 93
139 .funcs = { { 94 .funcs = { {
140 .num_blocks = CAST6_PARALLEL_BLOCKS, 95 .num_blocks = CAST6_PARALLEL_BLOCKS,
141 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) } 96 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
142 }, { 97 }, {
143 .num_blocks = 1, 98 .num_blocks = 1,
144 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } 99 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
@@ -151,7 +106,7 @@ static const struct common_glue_ctx cast6_dec_cbc = {
151 106
152 .funcs = { { 107 .funcs = { {
153 .num_blocks = CAST6_PARALLEL_BLOCKS, 108 .num_blocks = CAST6_PARALLEL_BLOCKS,
154 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) } 109 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
155 }, { 110 }, {
156 .num_blocks = 1, 111 .num_blocks = 1,
157 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } 112 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
@@ -215,7 +170,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
215 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); 170 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
216 171
217 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { 172 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
218 cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst); 173 cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
219 return; 174 return;
220 } 175 }
221 176
@@ -232,7 +187,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
232 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); 187 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
233 188
234 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { 189 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
235 cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst); 190 cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
236 return; 191 return;
237 } 192 }
238 193