aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S180
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c91
2 files changed, 229 insertions, 42 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index cfc163469c71..ce71f9212409 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * x86_64/AVX/AES-NI assembler implementation of Camellia 2 * x86_64/AVX/AES-NI assembler implementation of Camellia
3 * 3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -589,6 +589,10 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
589.Lbswap128_mask: 589.Lbswap128_mask:
590 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 590 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
591 591
592/* For XTS mode IV generation */
593.Lxts_gf128mul_and_shl1_mask:
594 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
595
592/* 596/*
593 * pre-SubByte transform 597 * pre-SubByte transform
594 * 598 *
@@ -1090,3 +1094,177 @@ ENTRY(camellia_ctr_16way)
1090 1094
1091 ret; 1095 ret;
1092ENDPROC(camellia_ctr_16way) 1096ENDPROC(camellia_ctr_16way)
1097
1098#define gf128mul_x_ble(iv, mask, tmp) \
1099 vpsrad $31, iv, tmp; \
1100 vpaddq iv, iv, iv; \
1101 vpshufd $0x13, tmp, tmp; \
1102 vpand mask, tmp, tmp; \
1103 vpxor tmp, iv, iv;
1104
1105.align 8
1106camellia_xts_crypt_16way:
1107 /* input:
1108 * %rdi: ctx, CTX
1109 * %rsi: dst (16 blocks)
1110 * %rdx: src (16 blocks)
1111 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1112 * %r8: index for input whitening key
1113 * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
1114 */
1115
1116 subq $(16 * 16), %rsp;
1117 movq %rsp, %rax;
1118
1119 vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
1120
1121 /* load IV */
1122 vmovdqu (%rcx), %xmm0;
1123 vpxor 0 * 16(%rdx), %xmm0, %xmm15;
1124 vmovdqu %xmm15, 15 * 16(%rax);
1125 vmovdqu %xmm0, 0 * 16(%rsi);
1126
1127 /* construct IVs */
1128 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1129 vpxor 1 * 16(%rdx), %xmm0, %xmm15;
1130 vmovdqu %xmm15, 14 * 16(%rax);
1131 vmovdqu %xmm0, 1 * 16(%rsi);
1132
1133 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1134 vpxor 2 * 16(%rdx), %xmm0, %xmm13;
1135 vmovdqu %xmm0, 2 * 16(%rsi);
1136
1137 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1138 vpxor 3 * 16(%rdx), %xmm0, %xmm12;
1139 vmovdqu %xmm0, 3 * 16(%rsi);
1140
1141 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1142 vpxor 4 * 16(%rdx), %xmm0, %xmm11;
1143 vmovdqu %xmm0, 4 * 16(%rsi);
1144
1145 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1146 vpxor 5 * 16(%rdx), %xmm0, %xmm10;
1147 vmovdqu %xmm0, 5 * 16(%rsi);
1148
1149 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1150 vpxor 6 * 16(%rdx), %xmm0, %xmm9;
1151 vmovdqu %xmm0, 6 * 16(%rsi);
1152
1153 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1154 vpxor 7 * 16(%rdx), %xmm0, %xmm8;
1155 vmovdqu %xmm0, 7 * 16(%rsi);
1156
1157 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1158 vpxor 8 * 16(%rdx), %xmm0, %xmm7;
1159 vmovdqu %xmm0, 8 * 16(%rsi);
1160
1161 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1162 vpxor 9 * 16(%rdx), %xmm0, %xmm6;
1163 vmovdqu %xmm0, 9 * 16(%rsi);
1164
1165 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1166 vpxor 10 * 16(%rdx), %xmm0, %xmm5;
1167 vmovdqu %xmm0, 10 * 16(%rsi);
1168
1169 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1170 vpxor 11 * 16(%rdx), %xmm0, %xmm4;
1171 vmovdqu %xmm0, 11 * 16(%rsi);
1172
1173 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1174 vpxor 12 * 16(%rdx), %xmm0, %xmm3;
1175 vmovdqu %xmm0, 12 * 16(%rsi);
1176
1177 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1178 vpxor 13 * 16(%rdx), %xmm0, %xmm2;
1179 vmovdqu %xmm0, 13 * 16(%rsi);
1180
1181 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1182 vpxor 14 * 16(%rdx), %xmm0, %xmm1;
1183 vmovdqu %xmm0, 14 * 16(%rsi);
1184
1185 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1186 vpxor 15 * 16(%rdx), %xmm0, %xmm15;
1187 vmovdqu %xmm15, 0 * 16(%rax);
1188 vmovdqu %xmm0, 15 * 16(%rsi);
1189
1190 gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
1191 vmovdqu %xmm0, (%rcx);
1192
1193 /* inpack16_pre: */
1194 vmovq (key_table)(CTX, %r8, 8), %xmm15;
1195 vpshufb .Lpack_bswap, %xmm15, %xmm15;
1196 vpxor 0 * 16(%rax), %xmm15, %xmm0;
1197 vpxor %xmm1, %xmm15, %xmm1;
1198 vpxor %xmm2, %xmm15, %xmm2;
1199 vpxor %xmm3, %xmm15, %xmm3;
1200 vpxor %xmm4, %xmm15, %xmm4;
1201 vpxor %xmm5, %xmm15, %xmm5;
1202 vpxor %xmm6, %xmm15, %xmm6;
1203 vpxor %xmm7, %xmm15, %xmm7;
1204 vpxor %xmm8, %xmm15, %xmm8;
1205 vpxor %xmm9, %xmm15, %xmm9;
1206 vpxor %xmm10, %xmm15, %xmm10;
1207 vpxor %xmm11, %xmm15, %xmm11;
1208 vpxor %xmm12, %xmm15, %xmm12;
1209 vpxor %xmm13, %xmm15, %xmm13;
1210 vpxor 14 * 16(%rax), %xmm15, %xmm14;
1211 vpxor 15 * 16(%rax), %xmm15, %xmm15;
1212
1213 call *%r9;
1214
1215 addq $(16 * 16), %rsp;
1216
1217 vpxor 0 * 16(%rsi), %xmm7, %xmm7;
1218 vpxor 1 * 16(%rsi), %xmm6, %xmm6;
1219 vpxor 2 * 16(%rsi), %xmm5, %xmm5;
1220 vpxor 3 * 16(%rsi), %xmm4, %xmm4;
1221 vpxor 4 * 16(%rsi), %xmm3, %xmm3;
1222 vpxor 5 * 16(%rsi), %xmm2, %xmm2;
1223 vpxor 6 * 16(%rsi), %xmm1, %xmm1;
1224 vpxor 7 * 16(%rsi), %xmm0, %xmm0;
1225 vpxor 8 * 16(%rsi), %xmm15, %xmm15;
1226 vpxor 9 * 16(%rsi), %xmm14, %xmm14;
1227 vpxor 10 * 16(%rsi), %xmm13, %xmm13;
1228 vpxor 11 * 16(%rsi), %xmm12, %xmm12;
1229 vpxor 12 * 16(%rsi), %xmm11, %xmm11;
1230 vpxor 13 * 16(%rsi), %xmm10, %xmm10;
1231 vpxor 14 * 16(%rsi), %xmm9, %xmm9;
1232 vpxor 15 * 16(%rsi), %xmm8, %xmm8;
1233 write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
1234 %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
1235 %xmm8, %rsi);
1236
1237 ret;
1238ENDPROC(camellia_xts_crypt_16way)
1239
1240ENTRY(camellia_xts_enc_16way)
1241 /* input:
1242 * %rdi: ctx, CTX
1243 * %rsi: dst (16 blocks)
1244 * %rdx: src (16 blocks)
1245 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1246 */
1247 xorl %r8d, %r8d; /* input whitening key, 0 for enc */
1248
1249 leaq __camellia_enc_blk16, %r9;
1250
1251 jmp camellia_xts_crypt_16way;
1252ENDPROC(camellia_xts_enc_16way)
1253
1254ENTRY(camellia_xts_dec_16way)
1255 /* input:
1256 * %rdi: ctx, CTX
1257 * %rsi: dst (16 blocks)
1258 * %rdx: src (16 blocks)
1259 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
1260 */
1261
1262 cmpl $16, key_length(CTX);
1263 movl $32, %r8d;
1264 movl $24, %eax;
1265 cmovel %eax, %r8d; /* input whitening key, last for dec */
1266
1267 leaq __camellia_dec_blk16, %r9;
1268
1269 jmp camellia_xts_crypt_16way;
1270ENDPROC(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 96cbb6068fce..4ff7ed47b3db 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia 2 * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
3 * 3 *
4 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -37,6 +37,23 @@ asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
37asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, 37asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
38 const u8 *src, le128 *iv); 38 const u8 *src, le128 *iv);
39 39
40asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
41 const u8 *src, le128 *iv);
42asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
43 const u8 *src, le128 *iv);
44
45static void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
46{
47 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
48 GLUE_FUNC_CAST(camellia_enc_blk));
49}
50
51static void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
52{
53 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
54 GLUE_FUNC_CAST(camellia_dec_blk));
55}
56
40static const struct common_glue_ctx camellia_enc = { 57static const struct common_glue_ctx camellia_enc = {
41 .num_funcs = 3, 58 .num_funcs = 3,
42 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, 59 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
@@ -69,6 +86,19 @@ static const struct common_glue_ctx camellia_ctr = {
69 } } 86 } }
70}; 87};
71 88
89static const struct common_glue_ctx camellia_enc_xts = {
90 .num_funcs = 2,
91 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
92
93 .funcs = { {
94 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
95 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
96 }, {
97 .num_blocks = 1,
98 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
99 } }
100};
101
72static const struct common_glue_ctx camellia_dec = { 102static const struct common_glue_ctx camellia_dec = {
73 .num_funcs = 3, 103 .num_funcs = 3,
74 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, 104 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
@@ -101,6 +131,19 @@ static const struct common_glue_ctx camellia_dec_cbc = {
101 } } 131 } }
102}; 132};
103 133
134static const struct common_glue_ctx camellia_dec_xts = {
135 .num_funcs = 2,
136 .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
137
138 .funcs = { {
139 .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
140 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
141 }, {
142 .num_blocks = 1,
143 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
144 } }
145};
146
104static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 147static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
105 struct scatterlist *src, unsigned int nbytes) 148 struct scatterlist *src, unsigned int nbytes)
106{ 149{
@@ -261,54 +304,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
261 struct scatterlist *src, unsigned int nbytes) 304 struct scatterlist *src, unsigned int nbytes)
262{ 305{
263 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 306 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
264 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
265 struct crypt_priv crypt_ctx = {
266 .ctx = &ctx->crypt_ctx,
267 .fpu_enabled = false,
268 };
269 struct xts_crypt_req req = {
270 .tbuf = buf,
271 .tbuflen = sizeof(buf),
272
273 .tweak_ctx = &ctx->tweak_ctx,
274 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
275 .crypt_ctx = &crypt_ctx,
276 .crypt_fn = encrypt_callback,
277 };
278 int ret;
279
280 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
281 ret = xts_crypt(desc, dst, src, nbytes, &req);
282 camellia_fpu_end(crypt_ctx.fpu_enabled);
283 307
284 return ret; 308 return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
309 XTS_TWEAK_CAST(camellia_enc_blk),
310 &ctx->tweak_ctx, &ctx->crypt_ctx);
285} 311}
286 312
287static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 313static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
288 struct scatterlist *src, unsigned int nbytes) 314 struct scatterlist *src, unsigned int nbytes)
289{ 315{
290 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 316 struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
291 be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
292 struct crypt_priv crypt_ctx = {
293 .ctx = &ctx->crypt_ctx,
294 .fpu_enabled = false,
295 };
296 struct xts_crypt_req req = {
297 .tbuf = buf,
298 .tbuflen = sizeof(buf),
299 317
300 .tweak_ctx = &ctx->tweak_ctx, 318 return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
301 .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), 319 XTS_TWEAK_CAST(camellia_enc_blk),
302 .crypt_ctx = &crypt_ctx, 320 &ctx->tweak_ctx, &ctx->crypt_ctx);
303 .crypt_fn = decrypt_callback,
304 };
305 int ret;
306
307 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
308 ret = xts_crypt(desc, dst, src, nbytes, &req);
309 camellia_fpu_end(crypt_ctx.fpu_enabled);
310
311 return ret;
312} 321}
313 322
314static struct crypto_alg cmll_algs[10] = { { 323static struct crypto_alg cmll_algs[10] = { {