From 9c2019421511a1bc646981d55528334ae46464c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:21 +0200 Subject: i386: move crypto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/crypto/Makefile | 5 - arch/i386/crypto/Makefile_32 | 12 - arch/i386/crypto/aes-i586-asm_32.S | 373 ------------------------ arch/i386/crypto/aes_32.c | 515 --------------------------------- arch/i386/crypto/twofish-i586-asm_32.S | 335 --------------------- arch/i386/crypto/twofish_32.c | 97 ------- arch/x86/crypto/Makefile | 5 + arch/x86/crypto/Makefile_32 | 12 + arch/x86/crypto/aes-i586-asm_32.S | 373 ++++++++++++++++++++++++ arch/x86/crypto/aes_32.c | 515 +++++++++++++++++++++++++++++++++ arch/x86/crypto/twofish-i586-asm_32.S | 335 +++++++++++++++++++++ arch/x86/crypto/twofish_32.c | 97 +++++++ arch/x86_64/crypto/Makefile | 2 +- 14 files changed, 1339 insertions(+), 1339 deletions(-) delete mode 100644 arch/i386/crypto/Makefile delete mode 100644 arch/i386/crypto/Makefile_32 delete mode 100644 arch/i386/crypto/aes-i586-asm_32.S delete mode 100644 arch/i386/crypto/aes_32.c delete mode 100644 arch/i386/crypto/twofish-i586-asm_32.S delete mode 100644 arch/i386/crypto/twofish_32.c create mode 100644 arch/x86/crypto/Makefile create mode 100644 arch/x86/crypto/Makefile_32 create mode 100644 arch/x86/crypto/aes-i586-asm_32.S create mode 100644 arch/x86/crypto/aes_32.c create mode 100644 arch/x86/crypto/twofish-i586-asm_32.S create mode 100644 arch/x86/crypto/twofish_32.c (limited to 'arch') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 45409c13f6e..dca07ae933d 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -105,7 +105,7 @@ libs-y += arch/i386/lib/ core-y += arch/i386/kernel/ \ arch/i386/mm/ \ $(mcore-y)/ \ - arch/i386/crypto/ + arch/x86/crypto/ drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile deleted file mode 100644 index fbd34ac2cda..00000000000 --- a/arch/i386/crypto/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/crypto/Makefile_32 -else -include ${srctree}/arch/x86_64/crypto/Makefile_64 -endif diff --git a/arch/i386/crypto/Makefile_32 b/arch/i386/crypto/Makefile_32 deleted file mode 100644 index 7154b14cd95..00000000000 --- a/arch/i386/crypto/Makefile_32 +++ /dev/null @@ -1,12 +0,0 @@ -# -# i386/crypto/Makefile -# -# Arch-specific CryptoAPI modules. -# - -obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o -obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o - -aes-i586-y := aes-i586-asm_32.o aes_32.o -twofish-i586-y := twofish-i586-asm_32.o twofish_32.o - diff --git a/arch/i386/crypto/aes-i586-asm_32.S b/arch/i386/crypto/aes-i586-asm_32.S deleted file mode 100644 index f942f0c8f63..00000000000 --- a/arch/i386/crypto/aes-i586-asm_32.S +++ /dev/null @@ -1,373 +0,0 @@ -// ------------------------------------------------------------------------- -// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. -// All rights reserved. -// -// LICENSE TERMS -// -// The free distribution and use of this software in both source and binary -// form is allowed (with or without changes) provided that: -// -// 1. distributions of this source code include the above copyright -// notice, this list of conditions and the following disclaimer// -// -// 2. distributions in binary form include the above copyright -// notice, this list of conditions and the following disclaimer -// in the documentation and/or other associated materials// -// -// 3. the copyright holder's name is not used to endorse products -// built using this software without specific written permission. -// -// -// ALTERNATIVELY, provided that this notice is retained in full, this product -// may be distributed under the terms of the GNU General Public License (GPL), -// in which case the provisions of the GPL apply INSTEAD OF those given above. -// -// Copyright (c) 2004 Linus Torvalds -// Copyright (c) 2004 Red Hat, Inc., James Morris - -// DISCLAIMER -// -// This software is provided 'as is' with no explicit or implied warranties -// in respect of its properties including, but not limited to, correctness -// and fitness for purpose. -// ------------------------------------------------------------------------- -// Issue Date: 29/07/2002 - -.file "aes-i586-asm.S" -.text - -#include - -#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) - -/* offsets to parameters with one register pushed onto stack */ -#define tfm 8 -#define out_blk 12 -#define in_blk 16 - -/* offsets in crypto_tfm structure */ -#define ekey (crypto_tfm_ctx_offset + 0) -#define nrnd (crypto_tfm_ctx_offset + 256) -#define dkey (crypto_tfm_ctx_offset + 260) - -// register mapping for encrypt and decrypt subroutines - -#define r0 eax -#define r1 ebx -#define r2 ecx -#define r3 edx -#define r4 esi -#define r5 edi - -#define eaxl al -#define eaxh ah -#define ebxl bl -#define ebxh bh -#define ecxl cl -#define ecxh ch -#define edxl dl -#define edxh dh - -#define _h(reg) reg##h -#define h(reg) _h(reg) - -#define _l(reg) reg##l -#define l(reg) _l(reg) - -// This macro takes a 32-bit word representing a column and uses -// each of its four bytes to index into four tables of 256 32-bit -// words to obtain values that are then xored into the appropriate -// output registers r0, r1, r4 or r5. - -// Parameters: -// table table base address -// %1 out_state[0] -// %2 out_state[1] -// %3 out_state[2] -// %4 out_state[3] -// idx input register for the round (destroyed) -// tmp scratch register for the round -// sched key schedule - -#define do_col(table, a1,a2,a3,a4, idx, tmp) \ - movzx %l(idx),%tmp; \ - xor table(,%tmp,4),%a1; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+2*tlen(,%tmp,4),%a3; \ - xor table+3*tlen(,%idx,4),%a4; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 12 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 4 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 4 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 12 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - - -// original Gladman had conditional saves to MMX regs. -#define save(a1, a2) \ - mov %a2,4*a1(%esp) - -#define restore(a1, a2) \ - mov 4*a2(%esp),%a1 - -// These macros perform a forward encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage. - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define fwd_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define fwd_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ - -// These macros performs an inverse encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define inv_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define inv_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ - -// AES (Rijndael) Encryption Subroutine -/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ - -.global aes_enc_blk - -.extern ft_tab -.extern fl_tab - -.align 4 - -aes_enc_blk: - push %ebp - mov tfm(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov nrnd(%ebp),%r3 // number of rounds - push %edi -#if ekey != 0 - lea ekey(%ebp),%ebp // key pointer -#endif - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - cmp $12,%r3 - jb 4f // 10 rounds for 128-bit key - lea 32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea 32(%ebp),%ebp - -2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key - fwd_rnd2( -48(%ebp) ,ft_tab) -3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key - fwd_rnd2( -16(%ebp) ,ft_tab) -4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key - fwd_rnd2( +16(%ebp) ,ft_tab) - fwd_rnd1( +32(%ebp) ,ft_tab) - fwd_rnd2( +48(%ebp) ,ft_tab) - fwd_rnd1( +64(%ebp) ,ft_tab) - fwd_rnd2( +80(%ebp) ,ft_tab) - fwd_rnd1( +96(%ebp) ,ft_tab) - fwd_rnd2(+112(%ebp) ,ft_tab) - fwd_rnd1(+128(%ebp) ,ft_tab) - fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - mov $1,%eax - ret - -// AES (Rijndael) Decryption Subroutine -/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ - -.global aes_dec_blk - -.extern it_tab -.extern il_tab - -.align 4 - -aes_dec_blk: - push %ebp - mov tfm(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov nrnd(%ebp),%r3 // number of rounds - push %edi -#if dkey != 0 - lea dkey(%ebp),%ebp // key pointer -#endif - mov %r3,%r0 - shl $4,%r0 - add %r0,%ebp - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - sub $16,%ebp // increment to next round key - cmp $12,%r3 - jb 4f // 10 rounds for 128-bit key - lea -32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea -32(%ebp),%ebp - -2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key - inv_rnd2( +48(%ebp), it_tab) -3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key - inv_rnd2( +16(%ebp), it_tab) -4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key - inv_rnd2( -16(%ebp), it_tab) - inv_rnd1( -32(%ebp), it_tab) - inv_rnd2( -48(%ebp), it_tab) - inv_rnd1( -64(%ebp), it_tab) - inv_rnd2( -80(%ebp), it_tab) - inv_rnd1( -96(%ebp), it_tab) - inv_rnd2(-112(%ebp), it_tab) - inv_rnd1(-128(%ebp), it_tab) - inv_rnd2(-144(%ebp), il_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - mov $1,%eax - ret - diff --git a/arch/i386/crypto/aes_32.c b/arch/i386/crypto/aes_32.c deleted file mode 100644 index 49aad9397f1..00000000000 --- a/arch/i386/crypto/aes_32.c +++ /dev/null @@ -1,515 +0,0 @@ -/* - * - * Glue Code for optimized 586 assembler version of AES - * - * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. - * All rights reserved. - * - * LICENSE TERMS - * - * The free distribution and use of this software in both source and binary - * form is allowed (with or without changes) provided that: - * - * 1. distributions of this source code include the above copyright - * notice, this list of conditions and the following disclaimer; - * - * 2. distributions in binary form include the above copyright - * notice, this list of conditions and the following disclaimer - * in the documentation and/or other associated materials; - * - * 3. the copyright holder's name is not used to endorse products - * built using this software without specific written permission. - * - * ALTERNATIVELY, provided that this notice is retained in full, this product - * may be distributed under the terms of the GNU General Public License (GPL), - * in which case the provisions of the GPL apply INSTEAD OF those given above. - * - * DISCLAIMER - * - * This software is provided 'as is' with no explicit or implied warranties - * in respect of its properties, including, but not limited to, correctness - * and/or fitness for purpose. - * - * Copyright (c) 2003, Adam J. Richter (conversion to - * 2.5 API). - * Copyright (c) 2003, 2004 Fruhwirth Clemens - * Copyright (c) 2004 Red Hat, Inc., James Morris - * - */ - -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -#define AES_MIN_KEY_SIZE 16 -#define AES_MAX_KEY_SIZE 32 -#define AES_BLOCK_SIZE 16 -#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE -#define RC_LENGTH 29 - -struct aes_ctx { - u32 ekey[AES_KS_LENGTH]; - u32 rounds; - u32 dkey[AES_KS_LENGTH]; -}; - -#define WPOLY 0x011b -#define bytes2word(b0, b1, b2, b3) \ - (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) - -/* define the finite field multiplies required for Rijndael */ -#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) -#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) -#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) -#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) -#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) -#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) -#define fi(x) ((x) ? pow[255 - log[x]]: 0) - -static inline u32 upr(u32 x, int n) -{ - return (x << 8 * n) | (x >> (32 - 8 * n)); -} - -static inline u8 bval(u32 x, int n) -{ - return x >> 8 * n; -} - -/* The forward and inverse affine transformations used in the S-box */ -#define fwd_affine(x) \ - (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) - -#define inv_affine(x) \ - (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) - -static u32 rcon_tab[RC_LENGTH]; - -u32 ft_tab[4][256]; -u32 fl_tab[4][256]; -static u32 im_tab[4][256]; -u32 il_tab[4][256]; -u32 it_tab[4][256]; - -static void gen_tabs(void) -{ - u32 i, w; - u8 pow[512], log[256]; - - /* - * log and power tables for GF(2^8) finite field with - * WPOLY as modular polynomial - the simplest primitive - * root is 0x03, used here to generate the tables. - */ - i = 0; w = 1; - - do { - pow[i] = (u8)w; - pow[i + 255] = (u8)w; - log[w] = (u8)i++; - w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); - } while (w != 1); - - for(i = 0, w = 1; i < RC_LENGTH; ++i) { - rcon_tab[i] = bytes2word(w, 0, 0, 0); - w = f2(w); - } - - for(i = 0; i < 256; ++i) { - u8 b; - - b = fwd_affine(fi((u8)i)); - w = bytes2word(f2(b), b, b, f3(b)); - - /* tables for a normal encryption round */ - ft_tab[0][i] = w; - ft_tab[1][i] = upr(w, 1); - ft_tab[2][i] = upr(w, 2); - ft_tab[3][i] = upr(w, 3); - w = bytes2word(b, 0, 0, 0); - - /* - * tables for last encryption round - * (may also be used in the key schedule) - */ - fl_tab[0][i] = w; - fl_tab[1][i] = upr(w, 1); - fl_tab[2][i] = upr(w, 2); - fl_tab[3][i] = upr(w, 3); - - b = fi(inv_affine((u8)i)); - w = bytes2word(fe(b), f9(b), fd(b), fb(b)); - - /* tables for the inverse mix column operation */ - im_tab[0][b] = w; - im_tab[1][b] = upr(w, 1); - im_tab[2][b] = upr(w, 2); - im_tab[3][b] = upr(w, 3); - - /* tables for a normal decryption round */ - it_tab[0][i] = w; - it_tab[1][i] = upr(w,1); - it_tab[2][i] = upr(w,2); - it_tab[3][i] = upr(w,3); - - w = bytes2word(b, 0, 0, 0); - - /* tables for last decryption round */ - il_tab[0][i] = w; - il_tab[1][i] = upr(w,1); - il_tab[2][i] = upr(w,2); - il_tab[3][i] = upr(w,3); - } -} - -#define four_tables(x,tab,vf,rf,c) \ -( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ - tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ - tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ - tab[3][bval(vf(x,3,c),rf(3,c))] \ -) - -#define vf1(x,r,c) (x) -#define rf1(r,c) (r) -#define rf2(r,c) ((r-c)&3) - -#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) -#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) - -#define ff(x) inv_mcol(x) - -#define ke4(k,i) \ -{ \ - k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ - k[4*(i)+5] = ss[1] ^= ss[0]; \ - k[4*(i)+6] = ss[2] ^= ss[1]; \ - k[4*(i)+7] = ss[3] ^= ss[2]; \ -} - -#define kel4(k,i) \ -{ \ - k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ - k[4*(i)+5] = ss[1] ^= ss[0]; \ - k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ -} - -#define ke6(k,i) \ -{ \ - k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 7] = ss[1] ^= ss[0]; \ - k[6*(i)+ 8] = ss[2] ^= ss[1]; \ - k[6*(i)+ 9] = ss[3] ^= ss[2]; \ - k[6*(i)+10] = ss[4] ^= ss[3]; \ - k[6*(i)+11] = ss[5] ^= ss[4]; \ -} - -#define kel6(k,i) \ -{ \ - k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 7] = ss[1] ^= ss[0]; \ - k[6*(i)+ 8] = ss[2] ^= ss[1]; \ - k[6*(i)+ 9] = ss[3] ^= ss[2]; \ -} - -#define ke8(k,i) \ -{ \ - k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 9] = ss[1] ^= ss[0]; \ - k[8*(i)+10] = ss[2] ^= ss[1]; \ - k[8*(i)+11] = ss[3] ^= ss[2]; \ - k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ - k[8*(i)+13] = ss[5] ^= ss[4]; \ - k[8*(i)+14] = ss[6] ^= ss[5]; \ - k[8*(i)+15] = ss[7] ^= ss[6]; \ -} - -#define kel8(k,i) \ -{ \ - k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 9] = ss[1] ^= ss[0]; \ - k[8*(i)+10] = ss[2] ^= ss[1]; \ - k[8*(i)+11] = ss[3] ^= ss[2]; \ -} - -#define kdf4(k,i) \ -{ \ - ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ - ss[1] = ss[1] ^ ss[3]; \ - ss[2] = ss[2] ^ ss[3]; \ - ss[3] = ss[3]; \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - ss[4] ^= k[4*(i)]; \ - k[4*(i)+4] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+1]; \ - k[4*(i)+5] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+2]; \ - k[4*(i)+6] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+3]; \ - k[4*(i)+7] = ff(ss[4]); \ -} - -#define kd4(k,i) \ -{ \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - ss[4] = ff(ss[4]); \ - k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ - k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ - k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ - k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ -} - -#define kdl4(k,i) \ -{ \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ - k[4*(i)+5] = ss[1] ^ ss[3]; \ - k[4*(i)+6] = ss[0]; \ - k[4*(i)+7] = ss[1]; \ -} - -#define kdf6(k,i) \ -{ \ - ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 6] = ff(ss[0]); \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ff(ss[1]); \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ff(ss[2]); \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ff(ss[3]); \ - ss[4] ^= ss[3]; \ - k[6*(i)+10] = ff(ss[4]); \ - ss[5] ^= ss[4]; \ - k[6*(i)+11] = ff(ss[5]); \ -} - -#define kd6(k,i) \ -{ \ - ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ - ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ - k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ - ss[4] ^= ss[3]; \ - k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ - ss[5] ^= ss[4]; \ - k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ -} - -#define kdl6(k,i) \ -{ \ - ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 6] = ss[0]; \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ss[1]; \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ss[2]; \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ss[3]; \ -} - -#define kdf8(k,i) \ -{ \ - ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 8] = ff(ss[0]); \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = ff(ss[1]); \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = ff(ss[2]); \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = ff(ss[3]); \ - ss[4] ^= ls_box(ss[3],0); \ - k[8*(i)+12] = ff(ss[4]); \ - ss[5] ^= ss[4]; \ - k[8*(i)+13] = ff(ss[5]); \ - ss[6] ^= ss[5]; \ - k[8*(i)+14] = ff(ss[6]); \ - ss[7] ^= ss[6]; \ - k[8*(i)+15] = ff(ss[7]); \ -} - -#define kd8(k,i) \ -{ \ - u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ - ss[0] ^= __g; \ - __g = ff(__g); \ - k[8*(i)+ 8] = __g ^= k[8*(i)]; \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ - __g = ls_box(ss[3],0); \ - ss[4] ^= __g; \ - __g = ff(__g); \ - k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ - ss[5] ^= ss[4]; \ - k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ - ss[6] ^= ss[5]; \ - k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ - ss[7] ^= ss[6]; \ - k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ -} - -#define kdl8(k,i) \ -{ \ - ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 8] = ss[0]; \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = ss[1]; \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = ss[2]; \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = ss[3]; \ -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - int i; - u32 ss[8]; - struct aes_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *key = (const __le32 *)in_key; - u32 *flags = &tfm->crt_flags; - - /* encryption schedule */ - - ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); - ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); - ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); - ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); - - switch(key_len) { - case 16: - for (i = 0; i < 9; i++) - ke4(ctx->ekey, i); - kel4(ctx->ekey, 9); - ctx->rounds = 10; - break; - - case 24: - ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); - ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); - for (i = 0; i < 7; i++) - ke6(ctx->ekey, i); - kel6(ctx->ekey, 7); - ctx->rounds = 12; - break; - - case 32: - ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); - ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); - ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); - ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); - for (i = 0; i < 6; i++) - ke8(ctx->ekey, i); - kel8(ctx->ekey, 6); - ctx->rounds = 14; - break; - - default: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* decryption schedule */ - - ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); - ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); - ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); - ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); - - switch (key_len) { - case 16: - kdf4(ctx->dkey, 0); - for (i = 1; i < 9; i++) - kd4(ctx->dkey, i); - kdl4(ctx->dkey, 9); - break; - - case 24: - ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); - ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); - kdf6(ctx->dkey, 0); - for (i = 1; i < 7; i++) - kd6(ctx->dkey, i); - kdl6(ctx->dkey, 7); - break; - - case 32: - ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); - ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); - ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); - ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); - kdf8(ctx->dkey, 0); - for (i = 1; i < 6; i++) - kd8(ctx->dkey, i); - kdl8(ctx->dkey, 6); - break; - } - return 0; -} - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_enc_blk(tfm, dst, src); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_dec_blk(tfm, dst, src); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - gen_tabs(); - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); -MODULE_ALIAS("aes"); diff --git a/arch/i386/crypto/twofish-i586-asm_32.S b/arch/i386/crypto/twofish-i586-asm_32.S deleted file mode 100644 index 39b98ed2c1b..00000000000 --- a/arch/i386/crypto/twofish-i586-asm_32.S +++ /dev/null @@ -1,335 +0,0 @@ -/*************************************************************************** -* Copyright (C) 2006 by Joachim Fritschi, * -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with this program; if not, write to the * -* Free Software Foundation, Inc., * -* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * -***************************************************************************/ - -.file "twofish-i586-asm.S" -.text - -#include - -/* return adress at 0 */ - -#define in_blk 12 /* input byte array address parameter*/ -#define out_blk 8 /* output byte array address parameter*/ -#define tfm 4 /* Twofish context structure */ - -#define a_offset 0 -#define b_offset 4 -#define c_offset 8 -#define d_offset 12 - -/* Structure of the crypto context struct*/ - -#define s0 0 /* S0 Array 256 Words each */ -#define s1 1024 /* S1 Array */ -#define s2 2048 /* S2 Array */ -#define s3 3072 /* S3 Array */ -#define w 4096 /* 8 whitening keys (word) */ -#define k 4128 /* key 1-32 ( word ) */ - -/* define a few register aliases to allow macro substitution */ - -#define R0D %eax -#define R0B %al -#define R0H %ah - -#define R1D %ebx -#define R1B %bl -#define R1H %bh - -#define R2D %ecx -#define R2B %cl -#define R2H %ch - -#define R3D %edx -#define R3B %dl -#define R3H %dh - - -/* performs input whitening */ -#define input_whitening(src,context,offset)\ - xor w+offset(context), src; - -/* performs input whitening */ -#define output_whitening(src,context,offset)\ - xor w+16+offset(context), src; - -/* - * a input register containing a (rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - */ -#define encrypt_round(a,b,c,d,round)\ - push d ## D;\ - movzx b ## B, %edi;\ - mov s1(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - mov s2(%ebp,%edi,4),%esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%ebp,%edi,4),d ## D;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),%esi;\ - movzx b ## B, %edi;\ - xor s3(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - xor (%ebp,%edi,4), %esi;\ - movzx b ## H, %edi;\ - ror $15, b ## D;\ - xor (%ebp,%edi,4), d ## D;\ - movzx a ## H, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - pop %edi;\ - add d ## D, %esi;\ - add %esi, d ## D;\ - add k+round(%ebp), %esi;\ - xor %esi, c ## D;\ - rol $15, c ## D;\ - add k+4+round(%ebp),d ## D;\ - xor %edi, d ## D; - -/* - * a input register containing a (rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - * last round has different rotations for the output preparation - */ -#define encrypt_last_round(a,b,c,d,round)\ - push d ## D;\ - movzx b ## B, %edi;\ - mov s1(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - mov s2(%ebp,%edi,4),%esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%ebp,%edi,4),d ## D;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),%esi;\ - movzx b ## B, %edi;\ - xor s3(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - xor (%ebp,%edi,4), %esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), d ## D;\ - movzx a ## H, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - pop %edi;\ - add d ## D, %esi;\ - add %esi, d ## D;\ - add k+round(%ebp), %esi;\ - xor %esi, c ## D;\ - ror $1, c ## D;\ - add k+4+round(%ebp),d ## D;\ - xor %edi, d ## D; - -/* - * a input register containing a - * b input register containing b (rotated 16) - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - */ -#define decrypt_round(a,b,c,d,round)\ - push c ## D;\ - movzx a ## B, %edi;\ - mov (%ebp,%edi,4), c ## D;\ - movzx b ## B, %edi;\ - mov s3(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s1(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), %esi;\ - movzx a ## B, %edi;\ - xor s2(%ebp,%edi,4),c ## D;\ - movzx b ## B, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $15, a ## D;\ - xor s3(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - xor s2(%ebp,%edi,4),%esi;\ - pop %edi;\ - add %esi, c ## D;\ - add c ## D, %esi;\ - add k+round(%ebp), c ## D;\ - xor %edi, c ## D;\ - add k+4+round(%ebp),%esi;\ - xor %esi, d ## D;\ - rol $15, d ## D; - -/* - * a input register containing a - * b input register containing b (rotated 16) - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - * last round has different rotations for the output preparation - */ -#define decrypt_last_round(a,b,c,d,round)\ - push c ## D;\ - movzx a ## B, %edi;\ - mov (%ebp,%edi,4), c ## D;\ - movzx b ## B, %edi;\ - mov s3(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s1(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), %esi;\ - movzx a ## B, %edi;\ - xor s2(%ebp,%edi,4),c ## D;\ - movzx b ## B, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - xor s2(%ebp,%edi,4),%esi;\ - pop %edi;\ - add %esi, c ## D;\ - add c ## D, %esi;\ - add k+round(%ebp), c ## D;\ - xor %edi, c ## D;\ - add k+4+round(%ebp),%esi;\ - xor %esi, d ## D;\ - ror $1, d ## D; - -.align 4 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: - push %ebp /* save registers according to calling convention*/ - push %ebx - push %esi - push %edi - - mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ - add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ - mov in_blk+16(%esp),%edi /* input adress in edi */ - - mov (%edi), %eax - mov b_offset(%edi), %ebx - mov c_offset(%edi), %ecx - mov d_offset(%edi), %edx - input_whitening(%eax,%ebp,a_offset) - ror $16, %eax - input_whitening(%ebx,%ebp,b_offset) - input_whitening(%ecx,%ebp,c_offset) - input_whitening(%edx,%ebp,d_offset) - rol $1, %edx - - encrypt_round(R0,R1,R2,R3,0); - encrypt_round(R2,R3,R0,R1,8); - encrypt_round(R0,R1,R2,R3,2*8); - encrypt_round(R2,R3,R0,R1,3*8); - encrypt_round(R0,R1,R2,R3,4*8); - encrypt_round(R2,R3,R0,R1,5*8); - encrypt_round(R0,R1,R2,R3,6*8); - encrypt_round(R2,R3,R0,R1,7*8); - encrypt_round(R0,R1,R2,R3,8*8); - encrypt_round(R2,R3,R0,R1,9*8); - encrypt_round(R0,R1,R2,R3,10*8); - encrypt_round(R2,R3,R0,R1,11*8); - encrypt_round(R0,R1,R2,R3,12*8); - encrypt_round(R2,R3,R0,R1,13*8); - encrypt_round(R0,R1,R2,R3,14*8); - encrypt_last_round(R2,R3,R0,R1,15*8); - - output_whitening(%eax,%ebp,c_offset) - output_whitening(%ebx,%ebp,d_offset) - output_whitening(%ecx,%ebp,a_offset) - output_whitening(%edx,%ebp,b_offset) - mov out_blk+16(%esp),%edi; - mov %eax, c_offset(%edi) - mov %ebx, d_offset(%edi) - mov %ecx, (%edi) - mov %edx, b_offset(%edi) - - pop %edi - pop %esi - pop %ebx - pop %ebp - mov $1, %eax - ret - -twofish_dec_blk: - push %ebp /* save registers according to calling convention*/ - push %ebx - push %esi - push %edi - - - mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ - add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ - mov in_blk+16(%esp),%edi /* input adress in edi */ - - mov (%edi), %eax - mov b_offset(%edi), %ebx - mov c_offset(%edi), %ecx - mov d_offset(%edi), %edx - output_whitening(%eax,%ebp,a_offset) - output_whitening(%ebx,%ebp,b_offset) - ror $16, %ebx - output_whitening(%ecx,%ebp,c_offset) - output_whitening(%edx,%ebp,d_offset) - rol $1, %ecx - - decrypt_round(R0,R1,R2,R3,15*8); - decrypt_round(R2,R3,R0,R1,14*8); - decrypt_round(R0,R1,R2,R3,13*8); - decrypt_round(R2,R3,R0,R1,12*8); - decrypt_round(R0,R1,R2,R3,11*8); - decrypt_round(R2,R3,R0,R1,10*8); - decrypt_round(R0,R1,R2,R3,9*8); - decrypt_round(R2,R3,R0,R1,8*8); - decrypt_round(R0,R1,R2,R3,7*8); - decrypt_round(R2,R3,R0,R1,6*8); - decrypt_round(R0,R1,R2,R3,5*8); - decrypt_round(R2,R3,R0,R1,4*8); - decrypt_round(R0,R1,R2,R3,3*8); - decrypt_round(R2,R3,R0,R1,2*8); - decrypt_round(R0,R1,R2,R3,1*8); - decrypt_last_round(R2,R3,R0,R1,0); - - input_whitening(%eax,%ebp,c_offset) - input_whitening(%ebx,%ebp,d_offset) - input_whitening(%ecx,%ebp,a_offset) - input_whitening(%edx,%ebp,b_offset) - mov out_blk+16(%esp),%edi; - mov %eax, c_offset(%edi) - mov %ebx, d_offset(%edi) - mov %ecx, (%edi) - mov %edx, b_offset(%edi) - - pop %edi - pop %esi - pop %ebx - pop %ebp - mov $1, %eax - ret diff --git a/arch/i386/crypto/twofish_32.c b/arch/i386/crypto/twofish_32.c deleted file mode 100644 index e3004dfe9c7..00000000000 --- a/arch/i386/crypto/twofish_32.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized 586 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include - - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); -MODULE_ALIAS("twofish"); diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 00000000000..b1bcf7c6302 --- /dev/null +++ b/arch/x86/crypto/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/crypto/Makefile_32 +else +include ${srctree}/arch/x86_64/crypto/Makefile_64 +endif diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32 new file mode 100644 index 00000000000..2d873a2388e --- /dev/null +++ b/arch/x86/crypto/Makefile_32 @@ -0,0 +1,12 @@ +# +# x86/crypto/Makefile +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o + +aes-i586-y := aes-i586-asm_32.o aes_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish_32.o + diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 00000000000..f942f0c8f63 --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -0,0 +1,373 @@ +// ------------------------------------------------------------------------- +// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. +// All rights reserved. +// +// LICENSE TERMS +// +// The free distribution and use of this software in both source and binary +// form is allowed (with or without changes) provided that: +// +// 1. distributions of this source code include the above copyright +// notice, this list of conditions and the following disclaimer// +// +// 2. distributions in binary form include the above copyright +// notice, this list of conditions and the following disclaimer +// in the documentation and/or other associated materials// +// +// 3. the copyright holder's name is not used to endorse products +// built using this software without specific written permission. +// +// +// ALTERNATIVELY, provided that this notice is retained in full, this product +// may be distributed under the terms of the GNU General Public License (GPL), +// in which case the provisions of the GPL apply INSTEAD OF those given above. +// +// Copyright (c) 2004 Linus Torvalds +// Copyright (c) 2004 Red Hat, Inc., James Morris + +// DISCLAIMER +// +// This software is provided 'as is' with no explicit or implied warranties +// in respect of its properties including, but not limited to, correctness +// and fitness for purpose. +// ------------------------------------------------------------------------- +// Issue Date: 29/07/2002 + +.file "aes-i586-asm.S" +.text + +#include + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +/* offsets to parameters with one register pushed onto stack */ +#define tfm 8 +#define out_blk 12 +#define in_blk 16 + +/* offsets in crypto_tfm structure */ +#define ekey (crypto_tfm_ctx_offset + 0) +#define nrnd (crypto_tfm_ctx_offset + 256) +#define dkey (crypto_tfm_ctx_offset + 260) + +// register mapping for encrypt and decrypt subroutines + +#define r0 eax +#define r1 ebx +#define r2 ecx +#define r3 edx +#define r4 esi +#define r5 edi + +#define eaxl al +#define eaxh ah +#define ebxl bl +#define ebxh bh +#define ecxl cl +#define ecxh ch +#define edxl dl +#define edxh dh + +#define _h(reg) reg##h +#define h(reg) _h(reg) + +#define _l(reg) reg##l +#define l(reg) _l(reg) + +// This macro takes a 32-bit word representing a column and uses +// each of its four bytes to index into four tables of 256 32-bit +// words to obtain values that are then xored into the appropriate +// output registers r0, r1, r4 or r5. + +// Parameters: +// table table base address +// %1 out_state[0] +// %2 out_state[1] +// %3 out_state[2] +// %4 out_state[3] +// idx input register for the round (destroyed) +// tmp scratch register for the round +// sched key schedule + +#define do_col(table, a1,a2,a3,a4, idx, tmp) \ + movzx %l(idx),%tmp; \ + xor table(,%tmp,4),%a1; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+2*tlen(,%tmp,4),%a3; \ + xor table+3*tlen(,%idx,4),%a4; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 12 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 4 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 4 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 12 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + + +// original Gladman had conditional saves to MMX regs. +#define save(a1, a2) \ + mov %a2,4*a1(%esp) + +#define restore(a1, a2) \ + mov 4*a2(%esp),%a1 + +// These macros perform a forward encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage. + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define fwd_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define fwd_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ + +// These macros performs an inverse encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define inv_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define inv_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ + +// AES (Rijndael) Encryption Subroutine +/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_enc_blk + +.extern ft_tab +.extern fl_tab + +.align 4 + +aes_enc_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if ekey != 0 + lea ekey(%ebp),%ebp // key pointer +#endif + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp) ,ft_tab) +3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp) ,ft_tab) +4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp) ,ft_tab) + fwd_rnd1( +32(%ebp) ,ft_tab) + fwd_rnd2( +48(%ebp) ,ft_tab) + fwd_rnd1( +64(%ebp) ,ft_tab) + fwd_rnd2( +80(%ebp) ,ft_tab) + fwd_rnd1( +96(%ebp) ,ft_tab) + fwd_rnd2(+112(%ebp) ,ft_tab) + fwd_rnd1(+128(%ebp) ,ft_tab) + fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + +// AES (Rijndael) Decryption Subroutine +/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_dec_blk + +.extern it_tab +.extern il_tab + +.align 4 + +aes_dec_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if dkey != 0 + lea dkey(%ebp),%ebp // key pointer +#endif + mov %r3,%r0 + shl $4,%r0 + add %r0,%ebp + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + sub $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea -32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea -32(%ebp),%ebp + +2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key + inv_rnd2( +48(%ebp), it_tab) +3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key + inv_rnd2( +16(%ebp), it_tab) +4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key + inv_rnd2( -16(%ebp), it_tab) + inv_rnd1( -32(%ebp), it_tab) + inv_rnd2( -48(%ebp), it_tab) + inv_rnd1( -64(%ebp), it_tab) + inv_rnd2( -80(%ebp), it_tab) + inv_rnd1( -96(%ebp), it_tab) + inv_rnd2(-112(%ebp), it_tab) + inv_rnd1(-128(%ebp), it_tab) + inv_rnd2(-144(%ebp), il_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c new file mode 100644 index 00000000000..49aad9397f1 --- /dev/null +++ b/arch/x86/crypto/aes_32.c @@ -0,0 +1,515 @@ +/* + * + * Glue Code for optimized 586 assembler version of AES + * + * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. + * All rights reserved. + * + * LICENSE TERMS + * + * The free distribution and use of this software in both source and binary + * form is allowed (with or without changes) provided that: + * + * 1. distributions of this source code include the above copyright + * notice, this list of conditions and the following disclaimer; + * + * 2. distributions in binary form include the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other associated materials; + * + * 3. the copyright holder's name is not used to endorse products + * built using this software without specific written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this product + * may be distributed under the terms of the GNU General Public License (GPL), + * in which case the provisions of the GPL apply INSTEAD OF those given above. + * + * DISCLAIMER + * + * This software is provided 'as is' with no explicit or implied warranties + * in respect of its properties, including, but not limited to, correctness + * and/or fitness for purpose. + * + * Copyright (c) 2003, Adam J. Richter (conversion to + * 2.5 API). + * Copyright (c) 2003, 2004 Fruhwirth Clemens + * Copyright (c) 2004 Red Hat, Inc., James Morris + * + */ + +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +#define AES_MIN_KEY_SIZE 16 +#define AES_MAX_KEY_SIZE 32 +#define AES_BLOCK_SIZE 16 +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE +#define RC_LENGTH 29 + +struct aes_ctx { + u32 ekey[AES_KS_LENGTH]; + u32 rounds; + u32 dkey[AES_KS_LENGTH]; +}; + +#define WPOLY 0x011b +#define bytes2word(b0, b1, b2, b3) \ + (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) + +/* define the finite field multiplies required for Rijndael */ +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[255 - log[x]]: 0) + +static inline u32 upr(u32 x, int n) +{ + return (x << 8 * n) | (x >> (32 - 8 * n)); +} + +static inline u8 bval(u32 x, int n) +{ + return x >> 8 * n; +} + +/* The forward and inverse affine transformations used in the S-box */ +#define fwd_affine(x) \ + (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) + +static u32 rcon_tab[RC_LENGTH]; + +u32 ft_tab[4][256]; +u32 fl_tab[4][256]; +static u32 im_tab[4][256]; +u32 il_tab[4][256]; +u32 it_tab[4][256]; + +static void gen_tabs(void) +{ + u32 i, w; + u8 pow[512], log[256]; + + /* + * log and power tables for GF(2^8) finite field with + * WPOLY as modular polynomial - the simplest primitive + * root is 0x03, used here to generate the tables. + */ + i = 0; w = 1; + + do { + pow[i] = (u8)w; + pow[i + 255] = (u8)w; + log[w] = (u8)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } while (w != 1); + + for(i = 0, w = 1; i < RC_LENGTH; ++i) { + rcon_tab[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) { + u8 b; + + b = fwd_affine(fi((u8)i)); + w = bytes2word(f2(b), b, b, f3(b)); + + /* tables for a normal encryption round */ + ft_tab[0][i] = w; + ft_tab[1][i] = upr(w, 1); + ft_tab[2][i] = upr(w, 2); + ft_tab[3][i] = upr(w, 3); + w = bytes2word(b, 0, 0, 0); + + /* + * tables for last encryption round + * (may also be used in the key schedule) + */ + fl_tab[0][i] = w; + fl_tab[1][i] = upr(w, 1); + fl_tab[2][i] = upr(w, 2); + fl_tab[3][i] = upr(w, 3); + + b = fi(inv_affine((u8)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + + /* tables for the inverse mix column operation */ + im_tab[0][b] = w; + im_tab[1][b] = upr(w, 1); + im_tab[2][b] = upr(w, 2); + im_tab[3][b] = upr(w, 3); + + /* tables for a normal decryption round */ + it_tab[0][i] = w; + it_tab[1][i] = upr(w,1); + it_tab[2][i] = upr(w,2); + it_tab[3][i] = upr(w,3); + + w = bytes2word(b, 0, 0, 0); + + /* tables for last decryption round */ + il_tab[0][i] = w; + il_tab[1][i] = upr(w,1); + il_tab[2][i] = upr(w,2); + il_tab[3][i] = upr(w,3); + } +} + +#define four_tables(x,tab,vf,rf,c) \ +( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ + tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ + tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ + tab[3][bval(vf(x,3,c),rf(3,c))] \ +) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((r-c)&3) + +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) + +#define ff(x) inv_mcol(x) + +#define ke4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define kel4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} + +#define kel6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} + +#define kel8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#define kdf4(k,i) \ +{ \ + ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[3] = ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[4*(i)]; \ + k[4*(i)+4] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+5] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+6] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+3]; \ + k[4*(i)+7] = ff(ss[4]); \ +} + +#define kd4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] = ff(ss[4]); \ + k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ + k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ +} + +#define kdl4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[4*(i)+5] = ss[1] ^ ss[3]; \ + k[4*(i)+6] = ss[0]; \ + k[4*(i)+7] = ss[1]; \ +} + +#define kdf6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ff(ss[3]); \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ff(ss[5]); \ +} + +#define kd6(k,i) \ +{ \ + ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ + k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ +} + +#define kdl6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[3]; \ +} + +#define kdf8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+12] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = ff(ss[5]); \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = ff(ss[6]); \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ \ + u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ + ss[0] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+ 8] = __g ^= k[8*(i)]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ + __g = ls_box(ss[3],0); \ + ss[4] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ +} + +#define kdl8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ss[3]; \ +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + int i; + u32 ss[8]; + struct aes_ctx *ctx = crypto_tfm_ctx(tfm); + const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; + + /* encryption schedule */ + + ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); + + switch(key_len) { + case 16: + for (i = 0; i < 9; i++) + ke4(ctx->ekey, i); + kel4(ctx->ekey, 9); + ctx->rounds = 10; + break; + + case 24: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + for (i = 0; i < 7; i++) + ke6(ctx->ekey, i); + kel6(ctx->ekey, 7); + ctx->rounds = 12; + break; + + case 32: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); + ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); + for (i = 0; i < 6; i++) + ke8(ctx->ekey, i); + kel8(ctx->ekey, 6); + ctx->rounds = 14; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* decryption schedule */ + + ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); + + switch (key_len) { + case 16: + kdf4(ctx->dkey, 0); + for (i = 1; i < 9; i++) + kd4(ctx->dkey, i); + kdl4(ctx->dkey, 9); + break; + + case 24: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + kdf6(ctx->dkey, 0); + for (i = 1; i < 7; i++) + kd6(ctx->dkey, i); + kdl6(ctx->dkey, 7); + break; + + case 32: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); + ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); + kdf8(ctx->dkey, 0); + for (i = 1; i < 6; i++) + kd8(ctx->dkey, i); + kdl8(ctx->dkey, 6); + break; + } + return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_enc_blk(tfm, dst, src); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_dec_blk(tfm, dst, src); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + gen_tabs(); + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 00000000000..39b98ed2c1b --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -0,0 +1,335 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include + +/* return adress at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define tfm 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +.align 4 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret + +twofish_dec_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c new file mode 100644 index 00000000000..e3004dfe9c7 --- /dev/null +++ b/arch/x86/crypto/twofish_32.c @@ -0,0 +1,97 @@ +/* + * Glue Code for optimized 586 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); +MODULE_ALIAS("twofish"); diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile index fbd34ac2cda..b1bcf7c6302 100644 --- a/arch/x86_64/crypto/Makefile +++ b/arch/x86_64/crypto/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/crypto/Makefile_32 +include ${srctree}/arch/x86/crypto/Makefile_32 else include ${srctree}/arch/x86_64/crypto/Makefile_64 endif -- cgit v1.2.2