Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/crypto
3 files changed, 905 insertions, 0 deletions
diff --git a/arch/i386/crypto/Makefile b/arch/i386/crypto/Makefile
new file mode 100644
index 000000000000..103c353d0a63
--- /dev/null
+++ b/arch/i386/crypto/Makefile
@@ -0,0 +1,9 @@
+# 
+# i386/crypto/Makefile 
+# 
+# Arch-specific CryptoAPI modules.
+# 
+obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
+aes-i586-y := aes-i586-asm.o aes.o
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
new file mode 100644
index 000000000000..7b73c67cb4e8
--- /dev/null
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -0,0 +1,376 @@
+// -------------------------------------------------------------------------
+// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
+// All rights reserved.
+//
+// LICENSE TERMS
+//
+// The free distribution and use of this software in both source and binary 
+// form is allowed (with or without changes) provided that:
+//
+//   1. distributions of this source code include the above copyright 
+//      notice, this list of conditions and the following disclaimer//
+//
+//   2. distributions in binary form include the above copyright
+//      notice, this list of conditions and the following disclaimer
+//      in the documentation and/or other associated materials//
+//
+//   3. the copyright holder's name is not used to endorse products 
+//      built using this software without specific written permission.
+//
+//
+// ALTERNATIVELY, provided that this notice is retained in full, this product
+// may be distributed under the terms of the GNU General Public License (GPL),
+// in which case the provisions of the GPL apply INSTEAD OF those given above.
+//
+// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
+// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+// DISCLAIMER
+//
+// This software is provided 'as is' with no explicit or implied warranties
+// in respect of its properties including, but not limited to, correctness 
+// and fitness for purpose.
+// -------------------------------------------------------------------------
+// Issue Date: 29/07/2002
+.file "aes-i586-asm.S"
+.text
+// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
+// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
+        
+#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
+// offsets to parameters with one register pushed onto stack
+#define in_blk    8  // input byte array address parameter
+#define out_blk  12  // output byte array address parameter
+#define ctx      16  // AES context structure
+// offsets in context structure
+#define ekey     0   // encryption key schedule base address
+#define nrnd   256   // number of rounds
+#define dkey   260   // decryption key schedule base address
+// register mapping for encrypt and decrypt subroutines
+#define r0  eax
+#define r1  ebx
+#define r2  ecx
+#define r3  edx
+#define r4  esi
+#define r5  edi
+#define eaxl  al
+#define eaxh  ah
+#define ebxl  bl
+#define ebxh  bh
+#define ecxl  cl
+#define ecxh  ch
+#define edxl  dl
+#define edxh  dh
+#define _h(reg) reg##h
+#define h(reg) _h(reg)
+#define _l(reg) reg##l
+#define l(reg) _l(reg)
+// This macro takes a 32-bit word representing a column and uses
+// each of its four bytes to index into four tables of 256 32-bit
+// words to obtain values that are then xored into the appropriate
+// output registers r0, r1, r4 or r5.  
+// Parameters:
+// table table base address
+//   %1  out_state[0]
+//   %2  out_state[1]
+//   %3  out_state[2]
+//   %4  out_state[3]
+//   idx input register for the round (destroyed)
+//   tmp scratch register for the round
+// sched key schedule
+#define do_col(table, a1,a2,a3,a4, idx, tmp)    \
+        movzx   %l(idx),%tmp;                   \
+        xor     table(,%tmp,4),%a1;             \
+        movzx   %h(idx),%tmp;                   \
+        shr     $16,%idx;                       \
+        xor     table+tlen(,%tmp,4),%a2;        \
+        movzx   %l(idx),%tmp;                   \
+        movzx   %h(idx),%idx;                   \
+        xor     table+2*tlen(,%tmp,4),%a3;      \
+        xor     table+3*tlen(,%idx,4),%a4;
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
+        mov     0 sched,%a1;                    \
+        movzx   %l(idx),%tmp;                   \
+        mov     12 sched,%a2;                   \
+        xor     table(,%tmp,4),%a1;             \
+        mov     4 sched,%a4;                    \
+        movzx   %h(idx),%tmp;                   \
+        shr     $16,%idx;                       \
+        xor     table+tlen(,%tmp,4),%a2;        \
+        movzx   %l(idx),%tmp;                   \
+        movzx   %h(idx),%idx;                   \
+        xor     table+3*tlen(,%idx,4),%a4;      \
+        mov     %a3,%idx;                       \
+        mov     8 sched,%a3;                    \
+        xor     table+2*tlen(,%tmp,4),%a3;
+// initialise output registers from the key schedule
+// NB1: original value of a3 is in idx on exit
+// NB2: original values of a1,a2,a4 aren't used
+#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
+        mov     0 sched,%a1;                    \
+        movzx   %l(idx),%tmp;                   \
+        mov     4 sched,%a2;                    \
+        xor     table(,%tmp,4),%a1;             \
+        mov     12 sched,%a4;                   \
+        movzx   %h(idx),%tmp;                   \
+        shr     $16,%idx;                       \
+        xor     table+tlen(,%tmp,4),%a2;        \
+        movzx   %l(idx),%tmp;                   \
+        movzx   %h(idx),%idx;                   \
+        xor     table+3*tlen(,%idx,4),%a4;      \
+        mov     %a3,%idx;                       \
+        mov     8 sched,%a3;                    \
+        xor     table+2*tlen(,%tmp,4),%a3;
+// original Gladman had conditional saves to MMX regs.
+#define save(a1, a2)            \
+        mov     %a2,4*a1(%esp)
+#define restore(a1, a2)         \
+        mov     4*a2(%esp),%a1
+// These macros perform a forward encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage.
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define fwd_rnd1(arg, table)                                            \
+        save   (0,r1);                                                  \
+        save   (1,r5);                                                  \
+                                                                        \
+        /* compute new column values */                                 \
+        do_fcol(table, r2,r5,r4,r1, r0,r3, arg);        /* idx=r0 */    \
+        do_col (table, r4,r1,r2,r5, r0,r3);             /* idx=r4 */    \
+        restore(r0,0);                                                  \
+        do_col (table, r1,r2,r5,r4, r0,r3);             /* idx=r1 */    \
+        restore(r0,1);                                                  \
+        do_col (table, r5,r4,r1,r2, r0,r3);             /* idx=r5 */
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define fwd_rnd2(arg, table)                                            \
+        save   (0,r1);                                                  \
+        save   (1,r5);                                                  \
+                                                                        \
+        /* compute new column values */                                 \
+        do_fcol(table, r0,r5,r4,r1, r2,r3, arg);        /* idx=r2 */    \
+        do_col (table, r4,r1,r0,r5, r2,r3);             /* idx=r4 */    \
+        restore(r2,0);                                                  \
+        do_col (table, r1,r0,r5,r4, r2,r3);             /* idx=r1 */    \
+        restore(r2,1);                                                  \
+        do_col (table, r5,r4,r1,r0, r2,r3);             /* idx=r5 */
+// These macros performs an inverse encryption cycle. They are entered with
+// the first previous round column values in r0,r1,r4,r5 and
+// exit with the final values in the same registers, using stack
+// for temporary storage
+// round column values
+// on entry: r0,r1,r4,r5
+// on exit:  r2,r1,r4,r5
+#define inv_rnd1(arg, table)                                            \
+        save    (0,r1);                                                 \
+        save    (1,r5);                                                 \
+                                                                        \
+        /* compute new column values */                                 \
+        do_icol(table, r2,r1,r4,r5, r0,r3, arg);        /* idx=r0 */    \
+        do_col (table, r4,r5,r2,r1, r0,r3);             /* idx=r4 */    \
+        restore(r0,0);                                                  \
+        do_col (table, r1,r4,r5,r2, r0,r3);             /* idx=r1 */    \
+        restore(r0,1);                                                  \
+        do_col (table, r5,r2,r1,r4, r0,r3);             /* idx=r5 */
+// round column values
+// on entry: r2,r1,r4,r5
+// on exit:  r0,r1,r4,r5
+#define inv_rnd2(arg, table)                                            \
+        save    (0,r1);                                                 \
+        save    (1,r5);                                                 \
+                                                                        \
+        /* compute new column values */                                 \
+        do_icol(table, r0,r1,r4,r5, r2,r3, arg);        /* idx=r2 */    \
+        do_col (table, r4,r5,r0,r1, r2,r3);             /* idx=r4 */    \
+        restore(r2,0);                                                  \
+        do_col (table, r1,r4,r5,r0, r2,r3);             /* idx=r1 */    \
+        restore(r2,1);                                                  \
+        do_col (table, r5,r0,r1,r4, r2,r3);             /* idx=r5 */
+// AES (Rijndael) Encryption Subroutine
+.global  aes_enc_blk
+.extern  ft_tab
+.extern  fl_tab
+.align 4
+aes_enc_blk:
+        push    %ebp
+        mov     ctx(%esp),%ebp      // pointer to context
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+1:      push    %ebx
+        mov     in_blk+4(%esp),%r2
+        push    %esi
+        mov     nrnd(%ebp),%r3   // number of rounds
+        push    %edi
+#if ekey != 0
+        lea     ekey(%ebp),%ebp  // key pointer
+#endif
+// input four columns and xor in first round key
+        mov     (%r2),%r0
+        mov     4(%r2),%r1
+        mov     8(%r2),%r4
+        mov     12(%r2),%r5
+        xor     (%ebp),%r0
+        xor     4(%ebp),%r1
+        xor     8(%ebp),%r4
+        xor     12(%ebp),%r5
+        sub     $8,%esp           // space for register saves on stack
+        add     $16,%ebp          // increment to next round key
+        sub     $10,%r3          
+        je      4f              // 10 rounds for 128-bit key
+        add     $32,%ebp
+        sub     $2,%r3
+        je      3f              // 12 rounds for 128-bit key
+        add     $32,%ebp
+2:      fwd_rnd1( -64(%ebp) ,ft_tab)    // 14 rounds for 128-bit key
+        fwd_rnd2( -48(%ebp) ,ft_tab)
+3:      fwd_rnd1( -32(%ebp) ,ft_tab)    // 12 rounds for 128-bit key
+        fwd_rnd2( -16(%ebp) ,ft_tab)
+4:      fwd_rnd1(    (%ebp) ,ft_tab)    // 10 rounds for 128-bit key
+        fwd_rnd2( +16(%ebp) ,ft_tab)
+        fwd_rnd1( +32(%ebp) ,ft_tab)
+        fwd_rnd2( +48(%ebp) ,ft_tab)
+        fwd_rnd1( +64(%ebp) ,ft_tab)
+        fwd_rnd2( +80(%ebp) ,ft_tab)
+        fwd_rnd1( +96(%ebp) ,ft_tab)
+        fwd_rnd2(+112(%ebp) ,ft_tab)
+        fwd_rnd1(+128(%ebp) ,ft_tab)
+        fwd_rnd2(+144(%ebp) ,fl_tab)    // last round uses a different table
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+        add     $8,%esp
+        mov     out_blk+12(%esp),%ebp
+        mov     %r5,12(%ebp)
+        pop     %edi
+        mov     %r4,8(%ebp)
+        pop     %esi
+        mov     %r1,4(%ebp)
+        pop     %ebx
+        mov     %r0,(%ebp)
+        pop     %ebp
+        mov     $1,%eax
+        ret
+// AES (Rijndael) Decryption Subroutine
+.global  aes_dec_blk
+.extern  it_tab
+.extern  il_tab
+.align 4
+aes_dec_blk:
+        push    %ebp
+        mov     ctx(%esp),%ebp       // pointer to context
+// CAUTION: the order and the values used in these assigns 
+// rely on the register mappings
+1:      push    %ebx
+        mov     in_blk+4(%esp),%r2
+        push    %esi
+        mov     nrnd(%ebp),%r3   // number of rounds
+        push    %edi
+#if dkey != 0
+        lea     dkey(%ebp),%ebp  // key pointer
+#endif
+        mov     %r3,%r0
+        shl     $4,%r0
+        add     %r0,%ebp
+        
+// input four columns and xor in first round key
+        mov     (%r2),%r0
+        mov     4(%r2),%r1
+        mov     8(%r2),%r4
+        mov     12(%r2),%r5
+        xor     (%ebp),%r0
+        xor     4(%ebp),%r1
+        xor     8(%ebp),%r4
+        xor     12(%ebp),%r5
+        sub     $8,%esp         // space for register saves on stack
+        sub     $16,%ebp        // increment to next round key
+        sub     $10,%r3          
+        je      4f              // 10 rounds for 128-bit key
+        sub     $32,%ebp
+        sub     $2,%r3
+        je      3f              // 12 rounds for 128-bit key
+        sub     $32,%ebp
+2:      inv_rnd1( +64(%ebp), it_tab)    // 14 rounds for 128-bit key
+        inv_rnd2( +48(%ebp), it_tab)
+3:      inv_rnd1( +32(%ebp), it_tab)    // 12 rounds for 128-bit key
+        inv_rnd2( +16(%ebp), it_tab)
+4:      inv_rnd1(    (%ebp), it_tab)    // 10 rounds for 128-bit key
+        inv_rnd2( -16(%ebp), it_tab)
+        inv_rnd1( -32(%ebp), it_tab)
+        inv_rnd2( -48(%ebp), it_tab)
+        inv_rnd1( -64(%ebp), it_tab)
+        inv_rnd2( -80(%ebp), it_tab)
+        inv_rnd1( -96(%ebp), it_tab)
+        inv_rnd2(-112(%ebp), it_tab)
+        inv_rnd1(-128(%ebp), it_tab)
+        inv_rnd2(-144(%ebp), il_tab)    // last round uses a different table
+// move final values to the output array.  CAUTION: the 
+// order of these assigns rely on the register mappings
+        add     $8,%esp
+        mov     out_blk+12(%esp),%ebp
+        mov     %r5,12(%ebp)
+        pop     %edi
+        mov     %r4,8(%ebp)
+        pop     %esi
+        mov     %r1,4(%ebp)
+        pop     %ebx
+        mov     %r0,(%ebp)
+        pop     %ebp
+        mov     $1,%eax
+        ret
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
new file mode 100644
index 000000000000..1019430fc1f1
--- /dev/null
+++ b/arch/i386/crypto/aes.c
@@ -0,0 +1,520 @@
+/* 
+ * 
+ * Glue Code for optimized 586 assembler version of AES
+ *
+ * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
+ * All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software in both source and binary
+ * form is allowed (with or without changes) provided that:
+ *
+ *   1. distributions of this source code include the above copyright
+ *      notice, this list of conditions and the following disclaimer;
+ *
+ *   2. distributions in binary form include the above copyright
+ *      notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other associated materials;
+ *
+ *   3. the copyright holder's name is not used to endorse products
+ *      built using this software without specific written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this product
+ * may be distributed under the terms of the GNU General Public License (GPL),
+ * in which case the provisions of the GPL apply INSTEAD OF those given above.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ *
+ * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
+ * 2.5 API).
+ * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
+ * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/linkage.h>
+asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
+asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
+#define AES_MIN_KEY_SIZE        16
+#define AES_MAX_KEY_SIZE        32
+#define AES_BLOCK_SIZE          16
+#define AES_KS_LENGTH           4 * AES_BLOCK_SIZE
+#define RC_LENGTH               29
+struct aes_ctx {
+        u32 ekey[AES_KS_LENGTH];
+        u32 rounds;
+        u32 dkey[AES_KS_LENGTH];
+};
+#define WPOLY 0x011b
+#define u32_in(x) le32_to_cpu(*(const u32 *)(x))
+#define bytes2word(b0, b1, b2, b3)  \
+        (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
+/* define the finite field multiplies required for Rijndael */
+#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
+#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
+#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
+#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
+#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
+#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
+#define fi(x) ((x) ?   pow[255 - log[x]]: 0)
+static inline u32 upr(u32 x, int n)
+{
+        return (x << 8 * n) | (x >> (32 - 8 * n));
+}
+static inline u8 bval(u32 x, int n)
+{
+        return x >> 8 * n;
+}
+/* The forward and inverse affine transformations used in the S-box */
+#define fwd_affine(x) \
+        (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
+#define inv_affine(x) \
+        (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
+static u32 rcon_tab[RC_LENGTH];
+u32 ft_tab[4][256];
+u32 fl_tab[4][256];
+static u32 ls_tab[4][256];
+static u32 im_tab[4][256];
+u32 il_tab[4][256];
+u32 it_tab[4][256];
+static void gen_tabs(void)
+{
+        u32 i, w;
+        u8 pow[512], log[256];
+        /*
+         * log and power tables for GF(2^8) finite field with
+         * WPOLY as modular polynomial - the simplest primitive
+         * root is 0x03, used here to generate the tables.
+         */
+        i = 0; w = 1; 
+        
+        do {
+                pow[i] = (u8)w;
+                pow[i + 255] = (u8)w;
+                log[w] = (u8)i++;
+                w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
+        } while (w != 1);
+        
+        for(i = 0, w = 1; i < RC_LENGTH; ++i) {
+                rcon_tab[i] = bytes2word(w, 0, 0, 0);
+                w = f2(w);
+        }
+        for(i = 0; i < 256; ++i) {
+                u8 b;
+                
+                b = fwd_affine(fi((u8)i));
+                w = bytes2word(f2(b), b, b, f3(b));
+                /* tables for a normal encryption round */
+                ft_tab[0][i] = w;
+                ft_tab[1][i] = upr(w, 1);
+                ft_tab[2][i] = upr(w, 2);
+                ft_tab[3][i] = upr(w, 3);
+                w = bytes2word(b, 0, 0, 0);
+                
+                /*
+                 * tables for last encryption round
+                 * (may also be used in the key schedule)
+                 */
+                fl_tab[0][i] = w;
+                fl_tab[1][i] = upr(w, 1);
+                fl_tab[2][i] = upr(w, 2);
+                fl_tab[3][i] = upr(w, 3);
+                
+                /*
+                 * table for key schedule if fl_tab above is
+                 * not of the required form
+                 */
+                ls_tab[0][i] = w;
+                ls_tab[1][i] = upr(w, 1);
+                ls_tab[2][i] = upr(w, 2);
+                ls_tab[3][i] = upr(w, 3);
+                
+                b = fi(inv_affine((u8)i));
+                w = bytes2word(fe(b), f9(b), fd(b), fb(b));
+                /* tables for the inverse mix column operation  */
+                im_tab[0][b] = w;
+                im_tab[1][b] = upr(w, 1);
+                im_tab[2][b] = upr(w, 2);
+                im_tab[3][b] = upr(w, 3);
+                /* tables for a normal decryption round */
+                it_tab[0][i] = w;
+                it_tab[1][i] = upr(w,1);
+                it_tab[2][i] = upr(w,2);
+                it_tab[3][i] = upr(w,3);
+                w = bytes2word(b, 0, 0, 0);
+                
+                /* tables for last decryption round */
+                il_tab[0][i] = w;
+                il_tab[1][i] = upr(w,1);
+                il_tab[2][i] = upr(w,2);
+                il_tab[3][i] = upr(w,3);
+    }
+}
+#define four_tables(x,tab,vf,rf,c)              \
+(       tab[0][bval(vf(x,0,c),rf(0,c))] ^       \
+        tab[1][bval(vf(x,1,c),rf(1,c))] ^       \
+        tab[2][bval(vf(x,2,c),rf(2,c))] ^       \
+        tab[3][bval(vf(x,3,c),rf(3,c))]         \
+)
+#define vf1(x,r,c)  (x)
+#define rf1(r,c)    (r)
+#define rf2(r,c)    ((r-c)&3)
+#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
+#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
+#define ff(x) inv_mcol(x)
+#define ke4(k,i)                                                        \
+{                                                                       \
+        k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
+        k[4*(i)+5] = ss[1] ^= ss[0];                                    \
+        k[4*(i)+6] = ss[2] ^= ss[1];                                    \
+        k[4*(i)+7] = ss[3] ^= ss[2];                                    \
+}
+#define kel4(k,i)                                                       \
+{                                                                       \
+        k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i];            \
+        k[4*(i)+5] = ss[1] ^= ss[0];                                    \
+        k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2];       \
+}
+#define ke6(k,i)                                                        \
+{                                                                       \
+        k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
+        k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
+        k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
+        k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
+        k[6*(i)+10] = ss[4] ^= ss[3];                                   \
+        k[6*(i)+11] = ss[5] ^= ss[4];                                   \
+}
+#define kel6(k,i)                                                       \
+{                                                                       \
+        k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];           \
+        k[6*(i)+ 7] = ss[1] ^= ss[0];                                   \
+        k[6*(i)+ 8] = ss[2] ^= ss[1];                                   \
+        k[6*(i)+ 9] = ss[3] ^= ss[2];                                   \
+}
+#define ke8(k,i)                                                        \
+{                                                                       \
+        k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
+        k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
+        k[8*(i)+10] = ss[2] ^= ss[1];                                   \
+        k[8*(i)+11] = ss[3] ^= ss[2];                                   \
+        k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0);                         \
+        k[8*(i)+13] = ss[5] ^= ss[4];                                   \
+        k[8*(i)+14] = ss[6] ^= ss[5];                                   \
+        k[8*(i)+15] = ss[7] ^= ss[6];                                   \
+}
+#define kel8(k,i)                                                       \
+{                                                                       \
+        k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];           \
+        k[8*(i)+ 9] = ss[1] ^= ss[0];                                   \
+        k[8*(i)+10] = ss[2] ^= ss[1];                                   \
+        k[8*(i)+11] = ss[3] ^= ss[2];                                   \
+}
+#define kdf4(k,i)                                                       \
+{                                                                       \
+        ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3];                          \
+        ss[1] = ss[1] ^ ss[3];                                          \
+        ss[2] = ss[2] ^ ss[3];                                          \
+        ss[3] = ss[3];                                                  \
+        ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+        ss[i % 4] ^= ss[4];                                             \
+        ss[4] ^= k[4*(i)];                                              \
+        k[4*(i)+4] = ff(ss[4]);                                         \
+        ss[4] ^= k[4*(i)+1];                                            \
+        k[4*(i)+5] = ff(ss[4]);                                         \
+        ss[4] ^= k[4*(i)+2];                                            \
+        k[4*(i)+6] = ff(ss[4]);                                         \
+        ss[4] ^= k[4*(i)+3];                                            \
+        k[4*(i)+7] = ff(ss[4]);                                         \
+}
+#define kd4(k,i)                                                        \
+{                                                                       \
+        ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+        ss[i % 4] ^= ss[4];                                             \
+        ss[4] = ff(ss[4]);                                              \
+        k[4*(i)+4] = ss[4] ^= k[4*(i)];                                 \
+        k[4*(i)+5] = ss[4] ^= k[4*(i)+1];                               \
+        k[4*(i)+6] = ss[4] ^= k[4*(i)+2];                               \
+        k[4*(i)+7] = ss[4] ^= k[4*(i)+3];                               \
+}
+#define kdl4(k,i)                                                       \
+{                                                                       \
+        ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i];                 \
+        ss[i % 4] ^= ss[4];                                             \
+        k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3];                  \
+        k[4*(i)+5] = ss[1] ^ ss[3];                                     \
+        k[4*(i)+6] = ss[0];                                             \
+        k[4*(i)+7] = ss[1];                                             \
+}
+#define kdf6(k,i)                                                       \
+{                                                                       \
+        ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
+        k[6*(i)+ 6] = ff(ss[0]);                                        \
+        ss[1] ^= ss[0];                                                 \
+        k[6*(i)+ 7] = ff(ss[1]);                                        \
+        ss[2] ^= ss[1];                                                 \
+        k[6*(i)+ 8] = ff(ss[2]);                                        \
+        ss[3] ^= ss[2];                                                 \
+        k[6*(i)+ 9] = ff(ss[3]);                                        \
+        ss[4] ^= ss[3];                                                 \
+        k[6*(i)+10] = ff(ss[4]);                                        \
+        ss[5] ^= ss[4];                                                 \
+        k[6*(i)+11] = ff(ss[5]);                                        \
+}
+#define kd6(k,i)                                                        \
+{                                                                       \
+        ss[6] = ls_box(ss[5],3) ^ rcon_tab[i];                          \
+        ss[0] ^= ss[6]; ss[6] = ff(ss[6]);                              \
+        k[6*(i)+ 6] = ss[6] ^= k[6*(i)];                                \
+        ss[1] ^= ss[0];                                                 \
+        k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1];                             \
+        ss[2] ^= ss[1];                                                 \
+        k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2];                             \
+        ss[3] ^= ss[2];                                                 \
+        k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3];                             \
+        ss[4] ^= ss[3];                                                 \
+        k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4];                             \
+        ss[5] ^= ss[4];                                                 \
+        k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5];                             \
+}
+#define kdl6(k,i)                                                       \
+{                                                                       \
+        ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i];                         \
+        k[6*(i)+ 6] = ss[0];                                            \
+        ss[1] ^= ss[0];                                                 \
+        k[6*(i)+ 7] = ss[1];                                            \
+        ss[2] ^= ss[1];                                                 \
+        k[6*(i)+ 8] = ss[2];                                            \
+        ss[3] ^= ss[2];                                                 \
+        k[6*(i)+ 9] = ss[3];                                            \
+}
+#define kdf8(k,i)                                                       \
+{                                                                       \
+        ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
+        k[8*(i)+ 8] = ff(ss[0]);                                        \
+        ss[1] ^= ss[0];                                                 \
+        k[8*(i)+ 9] = ff(ss[1]);                                        \
+        ss[2] ^= ss[1];                                                 \
+        k[8*(i)+10] = ff(ss[2]);                                        \
+        ss[3] ^= ss[2];                                                 \
+        k[8*(i)+11] = ff(ss[3]);                                        \
+        ss[4] ^= ls_box(ss[3],0);                                       \
+        k[8*(i)+12] = ff(ss[4]);                                        \
+        ss[5] ^= ss[4];                                                 \
+        k[8*(i)+13] = ff(ss[5]);                                        \
+        ss[6] ^= ss[5];                                                 \
+        k[8*(i)+14] = ff(ss[6]);                                        \
+        ss[7] ^= ss[6];                                                 \
+        k[8*(i)+15] = ff(ss[7]);                                        \
+}
+#define kd8(k,i)                                                        \
+{                                                                       \
+        u32 __g = ls_box(ss[7],3) ^ rcon_tab[i];                        \
+        ss[0] ^= __g;                                                   \
+        __g = ff(__g);                                                  \
+        k[8*(i)+ 8] = __g ^= k[8*(i)];                                  \
+        ss[1] ^= ss[0];                                                 \
+        k[8*(i)+ 9] = __g ^= k[8*(i)+ 1];                               \
+        ss[2] ^= ss[1];                                                 \
+        k[8*(i)+10] = __g ^= k[8*(i)+ 2];                               \
+        ss[3] ^= ss[2];                                                 \
+        k[8*(i)+11] = __g ^= k[8*(i)+ 3];                               \
+        __g = ls_box(ss[3],0);                                          \
+        ss[4] ^= __g;                                                   \
+        __g = ff(__g);                                                  \
+        k[8*(i)+12] = __g ^= k[8*(i)+ 4];                               \
+        ss[5] ^= ss[4];                                                 \
+        k[8*(i)+13] = __g ^= k[8*(i)+ 5];                               \
+        ss[6] ^= ss[5];                                                 \
+        k[8*(i)+14] = __g ^= k[8*(i)+ 6];                               \
+        ss[7] ^= ss[6];                                                 \
+        k[8*(i)+15] = __g ^= k[8*(i)+ 7];                               \
+}
+#define kdl8(k,i)                                                       \
+{                                                                       \
+        ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i];                         \
+        k[8*(i)+ 8] = ss[0];                                            \
+        ss[1] ^= ss[0];                                                 \
+        k[8*(i)+ 9] = ss[1];                                            \
+        ss[2] ^= ss[1];                                                 \
+        k[8*(i)+10] = ss[2];                                            \
+        ss[3] ^= ss[2];                                                 \
+        k[8*(i)+11] = ss[3];                                            \
+}
+static int
+aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+{
+        int i;
+        u32 ss[8];
+        struct aes_ctx *ctx = ctx_arg;
+        /* encryption schedule */
+        
+        ctx->ekey[0] = ss[0] = u32_in(in_key);
+        ctx->ekey[1] = ss[1] = u32_in(in_key + 4);
+        ctx->ekey[2] = ss[2] = u32_in(in_key + 8);
+        ctx->ekey[3] = ss[3] = u32_in(in_key + 12);
+        switch(key_len) {
+        case 16:
+                for (i = 0; i < 9; i++)
+                        ke4(ctx->ekey, i);
+                kel4(ctx->ekey, 9);
+                ctx->rounds = 10;
+                break;
+                
+        case 24:
+                ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
+                ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
+                for (i = 0; i < 7; i++)
+                        ke6(ctx->ekey, i);
+                kel6(ctx->ekey, 7); 
+                ctx->rounds = 12;
+                break;
+        case 32:
+                ctx->ekey[4] = ss[4] = u32_in(in_key + 16);
+                ctx->ekey[5] = ss[5] = u32_in(in_key + 20);
+                ctx->ekey[6] = ss[6] = u32_in(in_key + 24);
+                ctx->ekey[7] = ss[7] = u32_in(in_key + 28);
+                for (i = 0; i < 6; i++)
+                        ke8(ctx->ekey, i);
+                kel8(ctx->ekey, 6);
+                ctx->rounds = 14;
+                break;
+        default:
+                *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+                return -EINVAL;
+        }
+        
+        /* decryption schedule */
+        
+        ctx->dkey[0] = ss[0] = u32_in(in_key);
+        ctx->dkey[1] = ss[1] = u32_in(in_key + 4);
+        ctx->dkey[2] = ss[2] = u32_in(in_key + 8);
+        ctx->dkey[3] = ss[3] = u32_in(in_key + 12);
+        switch (key_len) {
+        case 16:
+                kdf4(ctx->dkey, 0);
+                for (i = 1; i < 9; i++)
+                        kd4(ctx->dkey, i);
+                kdl4(ctx->dkey, 9);
+                break;
+                
+        case 24:
+                ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
+                ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
+                kdf6(ctx->dkey, 0);
+                for (i = 1; i < 7; i++)
+                        kd6(ctx->dkey, i);
+                kdl6(ctx->dkey, 7);
+                break;
+        case 32:
+                ctx->dkey[4] = ff(ss[4] = u32_in(in_key + 16));
+                ctx->dkey[5] = ff(ss[5] = u32_in(in_key + 20));
+                ctx->dkey[6] = ff(ss[6] = u32_in(in_key + 24));
+                ctx->dkey[7] = ff(ss[7] = u32_in(in_key + 28));
+                kdf8(ctx->dkey, 0);
+                for (i = 1; i < 6; i++)
+                        kd8(ctx->dkey, i);
+                kdl8(ctx->dkey, 6);
+                break;
+        }
+        return 0;
+}
+static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
+{
+        aes_enc_blk(src, dst, ctx);
+}
+static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
+{
+        aes_dec_blk(src, dst, ctx);
+}
+static struct crypto_alg aes_alg = {
+        .cra_name               =       "aes",
+        .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
+        .cra_blocksize          =       AES_BLOCK_SIZE,
+        .cra_ctxsize            =       sizeof(struct aes_ctx),
+        .cra_module             =       THIS_MODULE,
+        .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
+        .cra_u                  =       {
+                .cipher = {
+                        .cia_min_keysize        =       AES_MIN_KEY_SIZE,
+                        .cia_max_keysize        =       AES_MAX_KEY_SIZE,
+                        .cia_setkey             =       aes_set_key,
+                        .cia_encrypt            =       aes_encrypt,
+                        .cia_decrypt            =       aes_decrypt
+                }
+        }
+};
+static int __init aes_init(void)
+{
+        gen_tabs();
+        return crypto_register_alg(&aes_alg);
+}
+static void __exit aes_fini(void)
+{
+        crypto_unregister_alg(&aes_alg);
+}
+module_init(aes_init);
+module_exit(aes_fini);
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
+MODULE_ALIAS("aes");
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/crypto