[CRYPTO] Add x86_64 asm AES

Implementation: =============== The encrypt/decrypt code is based on an x86 implementation I did a while ago which I never published. This unpublished implementation does include an assembler based key schedule and precomputed tables. For simplicity and best acceptance, however, I took Gladman's in-kernel code for table generation and key schedule for the kernel port of my assembler code and modified this code to produce the key schedule as required by my assembler implementation. File locations and Kconfig are kept similar to the i586 AES assembler implementation. It may seem a little bit strange to use 32 bit I/O and registers in the assembler implementation but this gives the best code size. My implementation takes one instruction more per round compared to Gladman's x86 assembler but it doesn't require any stack for local variables or saved registers and it is less serialized than Gladman's code. Note that all comparisons to Gladman's code were done after my code was implemented. I did only use FIPS PUB 197 for the implementation so my implementation is independent work. If anybody has a better assembler solution for x86_64 I'll be pleased to have my code replaced with the better solution. Testing: ======== The implementation passes the in-kernel crypto testing module and I'm running it without any problems on my laptop where it is mainly used for dm-crypt. Microbenchmark: =============== The microbenchmark was done in userspace with similar compile flags as used during kernel compile. Encrypt/decrypt is about 35% faster than the generic C implementation. As the generic C as well as my assembler implementation are both table I don't really expect that there is much room for further improvements though I'll be glad to be corrected here. The key schedule is about 5% slower than the generic C implementation. This is due to the fact that some more work has to be done in the key schedule routine to fit the schedule to the assembler implementation. Code Size: ========== Encrypt and decrypt are together about 2.1 Kbytes smaller than the generic C implementation which is important with regard to L1 cache usage. The key schedule routine is about 100 bytes larger than the generic C implementation. Data Size: ========== There's no difference in data size requirements between the assembler implementation and the generic C implementation. License: ======== Gladmans's code is dual BSD/GPL whereas my assembler code is GPLv2 only (I'm not going to change the license for my code). So I had to change the module license for the x86_64 aes module from 'Dual BSD/GPL' to 'GPL' to reflect the most restrictive license within the module. Signed-off-by: Andreas Steinmetz <ast@domdv.de> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Andreas Steinmetz <ast@domdv.de> 2005-07-06 16:55:00 -0400
committer: David S. Miller <davem@davemloft.net> 2005-07-06 16:55:00 -0400
commit: a2a892a236d03a6e985471a7e57d1c863de144c8 (patch)
tree: 33b52c87bdecf0f24936b952a565a445ce03c616
parent: a61cc44812ff94793987bf43b70a3d9bc64a6820 (diff)
5 files changed, 543 insertions, 2 deletions
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 8a73794f9b90..428915697675 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -65,7 +65,9 @@ CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
 libs-y                                  += arch/x86_64/lib/
-core-y                                  += arch/x86_64/kernel/ arch/x86_64/mm/
+core-y                                  += arch/x86_64/kernel/ \
+                                           arch/x86_64/mm/ \
+                                           arch/x86_64/crypto/
 core-$(CONFIG_IA32_EMULATION)           += arch/x86_64/ia32/
 drivers-$(CONFIG_PCI)                   += arch/x86_64/pci/
 drivers-$(CONFIG_OPROFILE)              += arch/x86_64/oprofile/
diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
new file mode 100644
index 000000000000..426d20f4b72e
--- /dev/null
+++ b/arch/x86_64/crypto/Makefile
@@ -0,0 +1,9 @@
+# 
+# x86_64/crypto/Makefile 
+# 
+# Arch-specific CryptoAPI modules.
+# 
+obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+aes-x86_64-y := aes-x86_64-asm.o aes.o
diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S
new file mode 100644
index 000000000000..483cbb23ab8d
--- /dev/null
+++ b/arch/x86_64/crypto/aes-x86_64-asm.S
@@ -0,0 +1,186 @@
+/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
+ *
+ * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
+ *
+ * License:
+ * This code can be distributed under the terms of the GNU General Public
+ * License (GPL) Version 2 provided that the above header down to and
+ * including this sentence is retained in full.
+ */
+.extern aes_ft_tab
+.extern aes_it_tab
+.extern aes_fl_tab
+.extern aes_il_tab
+.text
+#define R1      %rax
+#define R1E     %eax
+#define R1X     %ax
+#define R1H     %ah
+#define R1L     %al
+#define R2      %rbx
+#define R2E     %ebx
+#define R2X     %bx
+#define R2H     %bh
+#define R2L     %bl
+#define R3      %rcx
+#define R3E     %ecx
+#define R3X     %cx
+#define R3H     %ch
+#define R3L     %cl
+#define R4      %rdx
+#define R4E     %edx
+#define R4X     %dx
+#define R4H     %dh
+#define R4L     %dl
+#define R5      %rsi
+#define R5E     %esi
+#define R6      %rdi
+#define R6E     %edi
+#define R7      %rbp
+#define R7E     %ebp
+#define R8      %r8
+#define R9      %r9
+#define R10     %r10
+#define R11     %r11
+#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+        .global FUNC;                   \
+        .type   FUNC,@function;         \
+        .align  8;                      \
+FUNC:   movq    r1,r2;                  \
+        movq    r3,r4;                  \
+        leaq    BASE+52(r8),r9;         \
+        movq    r10,r11;                \
+        movl    (r7),r5 ## E;           \
+        movl    4(r7),r1 ## E;          \
+        movl    8(r7),r6 ## E;          \
+        movl    12(r7),r7 ## E;         \
+        movl    (r8),r10 ## E;          \
+        xorl    -48(r9),r5 ## E;        \
+        xorl    -44(r9),r1 ## E;        \
+        xorl    -40(r9),r6 ## E;        \
+        xorl    -36(r9),r7 ## E;        \
+        cmpl    $24,r10 ## E;           \
+        jb      B128;                   \
+        leaq    32(r9),r9;              \
+        je      B192;                   \
+        leaq    32(r9),r9;
+#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+        movq    r1,r2;                  \
+        movq    r3,r4;                  \
+        movl    r5 ## E,(r9);           \
+        movl    r6 ## E,4(r9);          \
+        movl    r7 ## E,8(r9);          \
+        movl    r8 ## E,12(r9);         \
+        ret;
+#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
+        movzbl  r2 ## H,r5 ## E;        \
+        movzbl  r2 ## L,r6 ## E;        \
+        movl    TAB+1024(,r5,4),r5 ## E;\
+        movw    r4 ## X,r2 ## X;        \
+        movl    TAB(,r6,4),r6 ## E;     \
+        roll    $16,r2 ## E;            \
+        shrl    $16,r4 ## E;            \
+        movzbl  r4 ## H,r7 ## E;        \
+        movzbl  r4 ## L,r4 ## E;        \
+        xorl    OFFSET(r8),ra ## E;     \
+        xorl    OFFSET+4(r8),rb ## E;   \
+        xorl    TAB+3072(,r7,4),r5 ## E;\
+        xorl    TAB+2048(,r4,4),r6 ## E;\
+        movzbl  r1 ## L,r7 ## E;        \
+        movzbl  r1 ## H,r4 ## E;        \
+        movl    TAB+1024(,r4,4),r4 ## E;\
+        movw    r3 ## X,r1 ## X;        \
+        roll    $16,r1 ## E;            \
+        shrl    $16,r3 ## E;            \
+        xorl    TAB(,r7,4),r5 ## E;     \
+        movzbl  r3 ## H,r7 ## E;        \
+        movzbl  r3 ## L,r3 ## E;        \
+        xorl    TAB+3072(,r7,4),r4 ## E;\
+        xorl    TAB+2048(,r3,4),r5 ## E;\
+        movzbl  r1 ## H,r7 ## E;        \
+        movzbl  r1 ## L,r3 ## E;        \
+        shrl    $16,r1 ## E;            \
+        xorl    TAB+3072(,r7,4),r6 ## E;\
+        movl    TAB+2048(,r3,4),r3 ## E;\
+        movzbl  r1 ## H,r7 ## E;        \
+        movzbl  r1 ## L,r1 ## E;        \
+        xorl    TAB+1024(,r7,4),r6 ## E;\
+        xorl    TAB(,r1,4),r3 ## E;     \
+        movzbl  r2 ## H,r1 ## E;        \
+        movzbl  r2 ## L,r7 ## E;        \
+        shrl    $16,r2 ## E;            \
+        xorl    TAB+3072(,r1,4),r3 ## E;\
+        xorl    TAB+2048(,r7,4),r4 ## E;\
+        movzbl  r2 ## H,r1 ## E;        \
+        movzbl  r2 ## L,r2 ## E;        \
+        xorl    OFFSET+8(r8),rc ## E;   \
+        xorl    OFFSET+12(r8),rd ## E;  \
+        xorl    TAB+1024(,r1,4),r3 ## E;\
+        xorl    TAB(,r2,4),r4 ## E;
+#define move_regs(r1,r2,r3,r4) \
+        movl    r3 ## E,r1 ## E;        \
+        movl    r4 ## E,r2 ## E;
+#define entry(FUNC,BASE,B128,B192) \
+        prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define encrypt_round(TAB,OFFSET) \
+        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
+        move_regs(R1,R2,R5,R6)
+#define encrypt_final(TAB,OFFSET) \
+        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
+#define decrypt_round(TAB,OFFSET) \
+        round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
+        move_regs(R1,R2,R5,R6)
+#define decrypt_final(TAB,OFFSET) \
+        round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
+/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
+        entry(aes_encrypt,0,enc128,enc192)
+        encrypt_round(aes_ft_tab,-96)
+        encrypt_round(aes_ft_tab,-80)
+enc192: encrypt_round(aes_ft_tab,-64)
+        encrypt_round(aes_ft_tab,-48)
+enc128: encrypt_round(aes_ft_tab,-32)
+        encrypt_round(aes_ft_tab,-16)
+        encrypt_round(aes_ft_tab,  0)
+        encrypt_round(aes_ft_tab, 16)
+        encrypt_round(aes_ft_tab, 32)
+        encrypt_round(aes_ft_tab, 48)
+        encrypt_round(aes_ft_tab, 64)
+        encrypt_round(aes_ft_tab, 80)
+        encrypt_round(aes_ft_tab, 96)
+        encrypt_final(aes_fl_tab,112)
+        return
+/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
+        entry(aes_decrypt,240,dec128,dec192)
+        decrypt_round(aes_it_tab,-96)
+        decrypt_round(aes_it_tab,-80)
+dec192: decrypt_round(aes_it_tab,-64)
+        decrypt_round(aes_it_tab,-48)
+dec128: decrypt_round(aes_it_tab,-32)
+        decrypt_round(aes_it_tab,-16)
+        decrypt_round(aes_it_tab,  0)
+        decrypt_round(aes_it_tab, 16)
+        decrypt_round(aes_it_tab, 32)
+        decrypt_round(aes_it_tab, 48)
+        decrypt_round(aes_it_tab, 64)
+        decrypt_round(aes_it_tab, 80)
+        decrypt_round(aes_it_tab, 96)
+        decrypt_final(aes_il_tab,112)
+        return
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
new file mode 100644
index 000000000000..2b5c4010ce38
--- /dev/null
+++ b/arch/x86_64/crypto/aes.c
@@ -0,0 +1,324 @@
+/*
+ * Cryptographic API.
+ *
+ * AES Cipher Algorithm.
+ *
+ * Based on Brian Gladman's code.
+ *
+ * Linux developers:
+ *  Alexander Kjeldaas <astor@fast.no>
+ *  Herbert Valerio Riedel <hvr@hvrlab.org>
+ *  Kyle McMartin <kyle@debian.org>
+ *  Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
+ *  Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
+ * All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software in both source and binary
+ * form is allowed (with or without changes) provided that:
+ *
+ *   1. distributions of this source code include the above copyright
+ *      notice, this list of conditions and the following disclaimer;
+ *
+ *   2. distributions in binary form include the above copyright
+ *      notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other associated materials;
+ *
+ *   3. the copyright holder's name is not used to endorse products
+ *      built using this software without specific written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this product
+ * may be distributed under the terms of the GNU General Public License (GPL),
+ * in which case the provisions of the GPL apply INSTEAD OF those given above.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ */
+/* Some changes from the Gladman version:
+    s/RIJNDAEL(e_key)/E_KEY/g
+    s/RIJNDAEL(d_key)/D_KEY/g
+*/
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#define AES_MIN_KEY_SIZE        16
+#define AES_MAX_KEY_SIZE        32
+#define AES_BLOCK_SIZE          16
+/*
+ * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
+ */
+static inline u8 byte(const u32 x, const unsigned n)
+{
+        return x >> (n << 3);
+}
+#define u32_in(x) le32_to_cpu(*(const __le32 *)(x))
+struct aes_ctx
+{
+        u32 key_length;
+        u32 E[60];
+        u32 D[60];
+};
+#define E_KEY ctx->E
+#define D_KEY ctx->D
+static u8 pow_tab[256] __initdata;
+static u8 log_tab[256] __initdata;
+static u8 sbx_tab[256] __initdata;
+static u8 isb_tab[256] __initdata;
+static u32 rco_tab[10];
+u32 aes_ft_tab[4][256];
+u32 aes_it_tab[4][256];
+u32 aes_fl_tab[4][256];
+u32 aes_il_tab[4][256];
+static inline u8 f_mult(u8 a, u8 b)
+{
+        u8 aa = log_tab[a], cc = aa + log_tab[b];
+        return pow_tab[cc + (cc < aa ? 1 : 0)];
+}
+#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
+#define ls_box(x)                               \
+        (aes_fl_tab[0][byte(x, 0)] ^            \
+         aes_fl_tab[1][byte(x, 1)] ^            \
+         aes_fl_tab[2][byte(x, 2)] ^            \
+         aes_fl_tab[3][byte(x, 3)])
+static void __init gen_tabs(void)
+{
+        u32 i, t;
+        u8 p, q;
+        /* log and power tables for GF(2**8) finite field with
+           0x011b as modular polynomial - the simplest primitive
+           root is 0x03, used here to generate the tables */
+        for (i = 0, p = 1; i < 256; ++i) {
+                pow_tab[i] = (u8)p;
+                log_tab[p] = (u8)i;
+                p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+        }
+        log_tab[1] = 0;
+        for (i = 0, p = 1; i < 10; ++i) {
+                rco_tab[i] = p;
+                p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+        }
+        for (i = 0; i < 256; ++i) {
+                p = (i ? pow_tab[255 - log_tab[i]] : 0);
+                q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
+                p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
+                sbx_tab[i] = p;
+                isb_tab[p] = (u8)i;
+        }
+        for (i = 0; i < 256; ++i) {
+                p = sbx_tab[i];
+                t = p;
+                aes_fl_tab[0][i] = t;
+                aes_fl_tab[1][i] = rol32(t, 8);
+                aes_fl_tab[2][i] = rol32(t, 16);
+                aes_fl_tab[3][i] = rol32(t, 24);
+                t = ((u32)ff_mult(2, p)) |
+                    ((u32)p << 8) |
+                    ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
+                aes_ft_tab[0][i] = t;
+                aes_ft_tab[1][i] = rol32(t, 8);
+                aes_ft_tab[2][i] = rol32(t, 16);
+                aes_ft_tab[3][i] = rol32(t, 24);
+                p = isb_tab[i];
+                t = p;
+                aes_il_tab[0][i] = t;
+                aes_il_tab[1][i] = rol32(t, 8);
+                aes_il_tab[2][i] = rol32(t, 16);
+                aes_il_tab[3][i] = rol32(t, 24);
+                t = ((u32)ff_mult(14, p)) |
+                    ((u32)ff_mult(9, p) << 8) |
+                    ((u32)ff_mult(13, p) << 16) |
+                    ((u32)ff_mult(11, p) << 24);
+                aes_it_tab[0][i] = t;
+                aes_it_tab[1][i] = rol32(t, 8);
+                aes_it_tab[2][i] = rol32(t, 16);
+                aes_it_tab[3][i] = rol32(t, 24);
+        }
+}
+#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
+#define imix_col(y, x)                  \
+        u    = star_x(x);               \
+        v    = star_x(u);               \
+        w    = star_x(v);               \
+        t    = w ^ (x);                 \
+        (y)  = u ^ v ^ w;               \
+        (y) ^= ror32(u ^ t,  8) ^       \
+               ror32(v ^ t, 16) ^       \
+               ror32(t, 24)
+/* initialise the key schedule from the user supplied key */
+#define loop4(i)                                        \
+{                                                       \
+        t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];   \
+        t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;    \
+        t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;    \
+        t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;    \
+        t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;    \
+}
+#define loop6(i)                                        \
+{                                                       \
+        t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];   \
+        t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;    \
+        t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;    \
+        t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;    \
+        t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;    \
+        t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;   \
+        t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;   \
+}
+#define loop8(i)                                        \
+{                                                       \
+        t = ror32(t,  8); ; t = ls_box(t) ^ rco_tab[i]; \
+        t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;    \
+        t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;    \
+        t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;   \
+        t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;   \
+        t  = E_KEY[8 * i + 4] ^ ls_box(t);              \
+        E_KEY[8 * i + 12] = t;                          \
+        t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;   \
+        t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;   \
+        t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;   \
+}
+static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len,
+                       u32 *flags)
+{
+        struct aes_ctx *ctx = ctx_arg;
+        u32 i, j, t, u, v, w;
+        if (key_len != 16 && key_len != 24 && key_len != 32) {
+                *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+                return -EINVAL;
+        }
+        ctx->key_length = key_len;
+        D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key);
+        D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4);
+        D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8);
+        D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12);
+        switch (key_len) {
+        case 16:
+                t = E_KEY[3];
+                for (i = 0; i < 10; ++i)
+                        loop4(i);
+                break;
+        case 24:
+                E_KEY[4] = u32_in(in_key + 16);
+                t = E_KEY[5] = u32_in(in_key + 20);
+                for (i = 0; i < 8; ++i)
+                        loop6 (i);
+                break;
+        case 32:
+                E_KEY[4] = u32_in(in_key + 16);
+                E_KEY[5] = u32_in(in_key + 20);
+                E_KEY[6] = u32_in(in_key + 24);
+                t = E_KEY[7] = u32_in(in_key + 28);
+                for (i = 0; i < 7; ++i)
+                        loop8(i);
+                break;
+        }
+        D_KEY[0] = E_KEY[key_len + 24];
+        D_KEY[1] = E_KEY[key_len + 25];
+        D_KEY[2] = E_KEY[key_len + 26];
+        D_KEY[3] = E_KEY[key_len + 27];
+        for (i = 4; i < key_len + 24; ++i) {
+                j = key_len + 24 - (i & ~3) + (i & 3);
+                imix_col(D_KEY[j], E_KEY[i]);
+        }
+        return 0;
+}
+extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in);
+extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in);
+static struct crypto_alg aes_alg = {
+        .cra_name               =       "aes",
+        .cra_flags              =       CRYPTO_ALG_TYPE_CIPHER,
+        .cra_blocksize          =       AES_BLOCK_SIZE,
+        .cra_ctxsize            =       sizeof(struct aes_ctx),
+        .cra_module             =       THIS_MODULE,
+        .cra_list               =       LIST_HEAD_INIT(aes_alg.cra_list),
+        .cra_u                  =       {
+                .cipher = {
+                        .cia_min_keysize        =       AES_MIN_KEY_SIZE,
+                        .cia_max_keysize        =       AES_MAX_KEY_SIZE,
+                        .cia_setkey             =       aes_set_key,
+                        .cia_encrypt            =       aes_encrypt,
+                        .cia_decrypt            =       aes_decrypt
+                }
+        }
+};
+static int __init aes_init(void)
+{
+        gen_tabs();
+        return crypto_register_alg(&aes_alg);
+}
+static void __exit aes_fini(void)
+{
+        crypto_unregister_alg(&aes_alg);
+}
+module_init(aes_init);
+module_exit(aes_fini);
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
+MODULE_LICENSE("GPL");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 90d6089d60ed..256c0b1fed10 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -146,7 +146,7 @@ config CRYPTO_SERPENT
 config CRYPTO_AES
        tristate "AES cipher algorithms"
-        depends on CRYPTO && !((X86 || UML_X86) && !64BIT)
+        depends on CRYPTO && !(X86 || UML_X86)
        help
          AES cipher algorithms (FIPS-197). AES uses the Rijndael 
          algorithm.
@@ -184,6 +184,26 @@ config CRYPTO_AES_586
          See <http://csrc.nist.gov/encryption/aes/> for more information.
+config CRYPTO_AES_X86_64
+        tristate "AES cipher algorithms (x86_64)"
+        depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+        help
+          AES cipher algorithms (FIPS-197). AES uses the Rijndael 
+          algorithm.
+          Rijndael appears to be consistently a very good performer in
+          both hardware and software across a wide range of computing 
+          environments regardless of its use in feedback or non-feedback 
+          modes. Its key setup time is excellent, and its key agility is 
+          good. Rijndael's very low memory requirements make it very well 
+          suited for restricted-space environments, in which it also 
+          demonstrates excellent performance. Rijndael's operations are 
+          among the easiest to defend against power and timing attacks. 
+          The AES specifies three key sizes: 128, 192 and 256 bits        
+          See <http://csrc.nist.gov/encryption/aes/> for more information.
 config CRYPTO_CAST5
        tristate "CAST5 (CAST-128) cipher algorithm"
        depends on CRYPTO
author	Andreas Steinmetz <ast@domdv.de>	2005-07-06 16:55:00 -0400
committer	David S. Miller <davem@davemloft.net>	2005-07-06 16:55:00 -0400
commit	a2a892a236d03a6e985471a7e57d1c863de144c8 (patch)
tree	33b52c87bdecf0f24936b952a565a445ce03c616
parent	a61cc44812ff94793987bf43b70a3d9bc64a6820 (diff)

diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 8a73794f9b90..428915697675 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile
@@ -65,7 +65,9 @@ CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
65	head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o	65	head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
66		66
67	libs-y += arch/x86_64/lib/	67	libs-y += arch/x86_64/lib/
68	core-y += arch/x86_64/kernel/ arch/x86_64/mm/	68	core-y += arch/x86_64/kernel/ \
		69	arch/x86_64/mm/ \
		70	arch/x86_64/crypto/
69	core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/	71	core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
70	drivers-$(CONFIG_PCI) += arch/x86_64/pci/	72	drivers-$(CONFIG_PCI) += arch/x86_64/pci/
71	drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/	73	drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/


diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile new file mode 100644 index 000000000000..426d20f4b72e --- /dev/null +++ b/arch/x86_64/crypto/Makefile
@@ -0,0 +1,9 @@
		1	#
		2	# x86_64/crypto/Makefile
		3	#
		4	# Arch-specific CryptoAPI modules.
		5	#
		6
		7	obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
		8
		9	aes-x86_64-y := aes-x86_64-asm.o aes.o


diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S new file mode 100644 index 000000000000..483cbb23ab8d --- /dev/null +++ b/arch/x86_64/crypto/aes-x86_64-asm.S
@@ -0,0 +1,186 @@
		1	/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
		2	*
		3	* Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
		4	*
		5	* License:
		6	* This code can be distributed under the terms of the GNU General Public
		7	* License (GPL) Version 2 provided that the above header down to and
		8	* including this sentence is retained in full.
		9	*/
		10
		11	.extern aes_ft_tab
		12	.extern aes_it_tab
		13	.extern aes_fl_tab
		14	.extern aes_il_tab
		15
		16	.text
		17
		18	#define R1 %rax
		19	#define R1E %eax
		20	#define R1X %ax
		21	#define R1H %ah
		22	#define R1L %al
		23	#define R2 %rbx
		24	#define R2E %ebx
		25	#define R2X %bx
		26	#define R2H %bh
		27	#define R2L %bl
		28	#define R3 %rcx
		29	#define R3E %ecx
		30	#define R3X %cx
		31	#define R3H %ch
		32	#define R3L %cl
		33	#define R4 %rdx
		34	#define R4E %edx
		35	#define R4X %dx
		36	#define R4H %dh
		37	#define R4L %dl
		38	#define R5 %rsi
		39	#define R5E %esi
		40	#define R6 %rdi
		41	#define R6E %edi
		42	#define R7 %rbp
		43	#define R7E %ebp
		44	#define R8 %r8
		45	#define R9 %r9
		46	#define R10 %r10
		47	#define R11 %r11
		48
		49	#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
		50	.global FUNC; \
		51	.type FUNC,@function; \
		52	.align 8; \
		53	FUNC: movq r1,r2; \
		54	movq r3,r4; \
		55	leaq BASE+52(r8),r9; \
		56	movq r10,r11; \
		57	movl (r7),r5 ## E; \
		58	movl 4(r7),r1 ## E; \
		59	movl 8(r7),r6 ## E; \
		60	movl 12(r7),r7 ## E; \
		61	movl (r8),r10 ## E; \
		62	xorl -48(r9),r5 ## E; \
		63	xorl -44(r9),r1 ## E; \
		64	xorl -40(r9),r6 ## E; \
		65	xorl -36(r9),r7 ## E; \
		66	cmpl $24,r10 ## E; \
		67	jb B128; \
		68	leaq 32(r9),r9; \
		69	je B192; \
		70	leaq 32(r9),r9;
		71
		72	#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
		73	movq r1,r2; \
		74	movq r3,r4; \
		75	movl r5 ## E,(r9); \
		76	movl r6 ## E,4(r9); \
		77	movl r7 ## E,8(r9); \
		78	movl r8 ## E,12(r9); \
		79	ret;
		80
		81	#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
		82	movzbl r2 ## H,r5 ## E; \
		83	movzbl r2 ## L,r6 ## E; \
		84	movl TAB+1024(,r5,4),r5 ## E;\
		85	movw r4 ## X,r2 ## X; \
		86	movl TAB(,r6,4),r6 ## E; \
		87	roll $16,r2 ## E; \
		88	shrl $16,r4 ## E; \
		89	movzbl r4 ## H,r7 ## E; \
		90	movzbl r4 ## L,r4 ## E; \
		91	xorl OFFSET(r8),ra ## E; \
		92	xorl OFFSET+4(r8),rb ## E; \
		93	xorl TAB+3072(,r7,4),r5 ## E;\
		94	xorl TAB+2048(,r4,4),r6 ## E;\
		95	movzbl r1 ## L,r7 ## E; \
		96	movzbl r1 ## H,r4 ## E; \
		97	movl TAB+1024(,r4,4),r4 ## E;\
		98	movw r3 ## X,r1 ## X; \
		99	roll $16,r1 ## E; \
		100	shrl $16,r3 ## E; \
		101	xorl TAB(,r7,4),r5 ## E; \
		102	movzbl r3 ## H,r7 ## E; \
		103	movzbl r3 ## L,r3 ## E; \
		104	xorl TAB+3072(,r7,4),r4 ## E;\
		105	xorl TAB+2048(,r3,4),r5 ## E;\
		106	movzbl r1 ## H,r7 ## E; \
		107	movzbl r1 ## L,r3 ## E; \
		108	shrl $16,r1 ## E; \
		109	xorl TAB+3072(,r7,4),r6 ## E;\
		110	movl TAB+2048(,r3,4),r3 ## E;\
		111	movzbl r1 ## H,r7 ## E; \
		112	movzbl r1 ## L,r1 ## E; \
		113	xorl TAB+1024(,r7,4),r6 ## E;\
		114	xorl TAB(,r1,4),r3 ## E; \
		115	movzbl r2 ## H,r1 ## E; \
		116	movzbl r2 ## L,r7 ## E; \
		117	shrl $16,r2 ## E; \
		118	xorl TAB+3072(,r1,4),r3 ## E;\
		119	xorl TAB+2048(,r7,4),r4 ## E;\
		120	movzbl r2 ## H,r1 ## E; \
		121	movzbl r2 ## L,r2 ## E; \
		122	xorl OFFSET+8(r8),rc ## E; \
		123	xorl OFFSET+12(r8),rd ## E; \
		124	xorl TAB+1024(,r1,4),r3 ## E;\
		125	xorl TAB(,r2,4),r4 ## E;
		126
		127	#define move_regs(r1,r2,r3,r4) \
		128	movl r3 ## E,r1 ## E; \
		129	movl r4 ## E,r2 ## E;
		130
		131	#define entry(FUNC,BASE,B128,B192) \
		132	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
		133
		134	#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
		135
		136	#define encrypt_round(TAB,OFFSET) \
		137	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
		138	move_regs(R1,R2,R5,R6)
		139
		140	#define encrypt_final(TAB,OFFSET) \
		141	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
		142
		143	#define decrypt_round(TAB,OFFSET) \
		144	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
		145	move_regs(R1,R2,R5,R6)
		146
		147	#define decrypt_final(TAB,OFFSET) \
		148	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
		149
		150	/* void aes_encrypt(void ctx, u8 out, const u8 in) /
		151
		152	entry(aes_encrypt,0,enc128,enc192)
		153	encrypt_round(aes_ft_tab,-96)
		154	encrypt_round(aes_ft_tab,-80)
		155	enc192: encrypt_round(aes_ft_tab,-64)
		156	encrypt_round(aes_ft_tab,-48)
		157	enc128: encrypt_round(aes_ft_tab,-32)
		158	encrypt_round(aes_ft_tab,-16)
		159	encrypt_round(aes_ft_tab, 0)
		160	encrypt_round(aes_ft_tab, 16)
		161	encrypt_round(aes_ft_tab, 32)
		162	encrypt_round(aes_ft_tab, 48)
		163	encrypt_round(aes_ft_tab, 64)
		164	encrypt_round(aes_ft_tab, 80)
		165	encrypt_round(aes_ft_tab, 96)
		166	encrypt_final(aes_fl_tab,112)
		167	return
		168
		169	/* void aes_decrypt(void ctx, u8 out, const u8 in) /
		170
		171	entry(aes_decrypt,240,dec128,dec192)
		172	decrypt_round(aes_it_tab,-96)
		173	decrypt_round(aes_it_tab,-80)
		174	dec192: decrypt_round(aes_it_tab,-64)
		175	decrypt_round(aes_it_tab,-48)
		176	dec128: decrypt_round(aes_it_tab,-32)
		177	decrypt_round(aes_it_tab,-16)
		178	decrypt_round(aes_it_tab, 0)
		179	decrypt_round(aes_it_tab, 16)
		180	decrypt_round(aes_it_tab, 32)
		181	decrypt_round(aes_it_tab, 48)
		182	decrypt_round(aes_it_tab, 64)
		183	decrypt_round(aes_it_tab, 80)
		184	decrypt_round(aes_it_tab, 96)
		185	decrypt_final(aes_il_tab,112)
		186	return


diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c new file mode 100644 index 000000000000..2b5c4010ce38 --- /dev/null +++ b/arch/x86_64/crypto/aes.c
@@ -0,0 +1,324 @@
		1	/*
		2	* Cryptographic API.
		3	*
		4	* AES Cipher Algorithm.
		5	*
		6	* Based on Brian Gladman's code.
		7	*
		8	* Linux developers:
		9	* Alexander Kjeldaas <astor@fast.no>
		10	* Herbert Valerio Riedel <hvr@hvrlab.org>
		11	* Kyle McMartin <kyle@debian.org>
		12	* Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
		13	* Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
		14	*
		15	* This program is free software; you can redistribute it and/or modify
		16	* it under the terms of the GNU General Public License as published by
		17	* the Free Software Foundation; either version 2 of the License, or
		18	* (at your option) any later version.
		19	*
		20	* ---------------------------------------------------------------------------
		21	* Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
		22	* All rights reserved.
		23	*
		24	* LICENSE TERMS
		25	*
		26	* The free distribution and use of this software in both source and binary
		27	* form is allowed (with or without changes) provided that:
		28	*
		29	* 1. distributions of this source code include the above copyright
		30	* notice, this list of conditions and the following disclaimer;
		31	*
		32	* 2. distributions in binary form include the above copyright
		33	* notice, this list of conditions and the following disclaimer
		34	* in the documentation and/or other associated materials;
		35	*
		36	* 3. the copyright holder's name is not used to endorse products
		37	* built using this software without specific written permission.
		38	*
		39	* ALTERNATIVELY, provided that this notice is retained in full, this product
		40	* may be distributed under the terms of the GNU General Public License (GPL),
		41	* in which case the provisions of the GPL apply INSTEAD OF those given above.
		42	*
		43	* DISCLAIMER
		44	*
		45	* This software is provided 'as is' with no explicit or implied warranties
		46	* in respect of its properties, including, but not limited to, correctness
		47	* and/or fitness for purpose.
		48	* ---------------------------------------------------------------------------
		49	*/
		50
		51	/* Some changes from the Gladman version:
		52	s/RIJNDAEL(e_key)/E_KEY/g
		53	s/RIJNDAEL(d_key)/D_KEY/g
		54	*/
		55
		56	#include <asm/byteorder.h>
		57	#include <linux/bitops.h>
		58	#include <linux/crypto.h>
		59	#include <linux/errno.h>
		60	#include <linux/init.h>
		61	#include <linux/module.h>
		62	#include <linux/types.h>
		63
		64	#define AES_MIN_KEY_SIZE 16
		65	#define AES_MAX_KEY_SIZE 32
		66
		67	#define AES_BLOCK_SIZE 16
		68
		69	/*
		70	* #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
		71	*/
		72	static inline u8 byte(const u32 x, const unsigned n)
		73	{
		74	return x >> (n << 3);
		75	}
		76
		77	#define u32_in(x) le32_to_cpu((const __le32 )(x))
		78
		79	struct aes_ctx
		80	{
		81	u32 key_length;
		82	u32 E[60];
		83	u32 D[60];
		84	};
		85
		86	#define E_KEY ctx->E
		87	#define D_KEY ctx->D
		88
		89	static u8 pow_tab[256] __initdata;
		90	static u8 log_tab[256] __initdata;
		91	static u8 sbx_tab[256] __initdata;
		92	static u8 isb_tab[256] __initdata;
		93	static u32 rco_tab[10];
		94	u32 aes_ft_tab[4][256];
		95	u32 aes_it_tab[4][256];
		96
		97	u32 aes_fl_tab[4][256];
		98	u32 aes_il_tab[4][256];
		99
		100	static inline u8 f_mult(u8 a, u8 b)
		101	{
		102	u8 aa = log_tab[a], cc = aa + log_tab[b];
		103
		104	return pow_tab[cc + (cc < aa ? 1 : 0)];
		105	}
		106
		107	#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
		108
		109	#define ls_box(x) \
		110	(aes_fl_tab[0][byte(x, 0)] ^ \
		111	aes_fl_tab[1][byte(x, 1)] ^ \
		112	aes_fl_tab[2][byte(x, 2)] ^ \
		113	aes_fl_tab[3][byte(x, 3)])
		114
		115	static void __init gen_tabs(void)
		116	{
		117	u32 i, t;
		118	u8 p, q;
		119
		120	/* log and power tables for GF(2**8) finite field with
		121	0x011b as modular polynomial - the simplest primitive
		122	root is 0x03, used here to generate the tables */
		123
		124	for (i = 0, p = 1; i < 256; ++i) {
		125	pow_tab[i] = (u8)p;
		126	log_tab[p] = (u8)i;
		127
		128	p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
		129	}
		130
		131	log_tab[1] = 0;
		132
		133	for (i = 0, p = 1; i < 10; ++i) {
		134	rco_tab[i] = p;
		135
		136	p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
		137	}
		138
		139	for (i = 0; i < 256; ++i) {
		140	p = (i ? pow_tab[255 - log_tab[i]] : 0);
		141	q = ((p >> 7) \| (p << 1)) ^ ((p >> 6) \| (p << 2));
		142	p ^= 0x63 ^ q ^ ((q >> 6) \| (q << 2));
		143	sbx_tab[i] = p;
		144	isb_tab[p] = (u8)i;
		145	}
		146
		147	for (i = 0; i < 256; ++i) {
		148	p = sbx_tab[i];
		149
		150	t = p;
		151	aes_fl_tab[0][i] = t;
		152	aes_fl_tab[1][i] = rol32(t, 8);
		153	aes_fl_tab[2][i] = rol32(t, 16);
		154	aes_fl_tab[3][i] = rol32(t, 24);
		155
		156	t = ((u32)ff_mult(2, p)) \|
		157	((u32)p << 8) \|
		158	((u32)p << 16) \| ((u32)ff_mult(3, p) << 24);
		159
		160	aes_ft_tab[0][i] = t;
		161	aes_ft_tab[1][i] = rol32(t, 8);
		162	aes_ft_tab[2][i] = rol32(t, 16);
		163	aes_ft_tab[3][i] = rol32(t, 24);
		164
		165	p = isb_tab[i];
		166
		167	t = p;
		168	aes_il_tab[0][i] = t;
		169	aes_il_tab[1][i] = rol32(t, 8);
		170	aes_il_tab[2][i] = rol32(t, 16);
		171	aes_il_tab[3][i] = rol32(t, 24);
		172
		173	t = ((u32)ff_mult(14, p)) \|
		174	((u32)ff_mult(9, p) << 8) \|
		175	((u32)ff_mult(13, p) << 16) \|
		176	((u32)ff_mult(11, p) << 24);
		177
		178	aes_it_tab[0][i] = t;
		179	aes_it_tab[1][i] = rol32(t, 8);
		180	aes_it_tab[2][i] = rol32(t, 16);
		181	aes_it_tab[3][i] = rol32(t, 24);
		182	}
		183	}
		184
		185	#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
		186
		187	#define imix_col(y, x) \
		188	u = star_x(x); \
		189	v = star_x(u); \
		190	w = star_x(v); \
		191	t = w ^ (x); \
		192	(y) = u ^ v ^ w; \
		193	(y) ^= ror32(u ^ t, 8) ^ \
		194	ror32(v ^ t, 16) ^ \
		195	ror32(t, 24)
		196
		197	/* initialise the key schedule from the user supplied key */
		198
		199	#define loop4(i) \
		200	{ \
		201	t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
		202	t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
		203	t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
		204	t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
		205	t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
		206	}
		207
		208	#define loop6(i) \
		209	{ \
		210	t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
		211	t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
		212	t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
		213	t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
		214	t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
		215	t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
		216	t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
		217	}
		218
		219	#define loop8(i) \
		220	{ \
		221	t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
		222	t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
		223	t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
		224	t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
		225	t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
		226	t = E_KEY[8 * i + 4] ^ ls_box(t); \
		227	E_KEY[8 * i + 12] = t; \
		228	t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
		229	t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
		230	t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
		231	}
		232
		233	static int aes_set_key(void ctx_arg, const u8 in_key, unsigned int key_len,
		234	u32 *flags)
		235	{
		236	struct aes_ctx *ctx = ctx_arg;
		237	u32 i, j, t, u, v, w;
		238
		239	if (key_len != 16 && key_len != 24 && key_len != 32) {
		240	*flags \|= CRYPTO_TFM_RES_BAD_KEY_LEN;
		241	return -EINVAL;
		242	}
		243
		244	ctx->key_length = key_len;
		245
		246	D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key);
		247	D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4);
		248	D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8);
		249	D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12);
		250
		251	switch (key_len) {
		252	case 16:
		253	t = E_KEY[3];
		254	for (i = 0; i < 10; ++i)
		255	loop4(i);
		256	break;
		257
		258	case 24:
		259	E_KEY[4] = u32_in(in_key + 16);
		260	t = E_KEY[5] = u32_in(in_key + 20);
		261	for (i = 0; i < 8; ++i)
		262	loop6 (i);
		263	break;
		264
		265	case 32:
		266	E_KEY[4] = u32_in(in_key + 16);
		267	E_KEY[5] = u32_in(in_key + 20);
		268	E_KEY[6] = u32_in(in_key + 24);
		269	t = E_KEY[7] = u32_in(in_key + 28);
		270	for (i = 0; i < 7; ++i)
		271	loop8(i);
		272	break;
		273	}
		274
		275	D_KEY[0] = E_KEY[key_len + 24];
		276	D_KEY[1] = E_KEY[key_len + 25];
		277	D_KEY[2] = E_KEY[key_len + 26];
		278	D_KEY[3] = E_KEY[key_len + 27];
		279
		280	for (i = 4; i < key_len + 24; ++i) {
		281	j = key_len + 24 - (i & ~3) + (i & 3);
		282	imix_col(D_KEY[j], E_KEY[i]);
		283	}
		284
		285	return 0;
		286	}
		287
		288	extern void aes_encrypt(void ctx_arg, u8 out, const u8 *in);
		289	extern void aes_decrypt(void ctx_arg, u8 out, const u8 *in);
		290
		291	static struct crypto_alg aes_alg = {
		292	.cra_name = "aes",
		293	.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
		294	.cra_blocksize = AES_BLOCK_SIZE,
		295	.cra_ctxsize = sizeof(struct aes_ctx),
		296	.cra_module = THIS_MODULE,
		297	.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
		298	.cra_u = {
		299	.cipher = {
		300	.cia_min_keysize = AES_MIN_KEY_SIZE,
		301	.cia_max_keysize = AES_MAX_KEY_SIZE,
		302	.cia_setkey = aes_set_key,
		303	.cia_encrypt = aes_encrypt,
		304	.cia_decrypt = aes_decrypt
		305	}
		306	}
		307	};
		308
		309	static int __init aes_init(void)
		310	{
		311	gen_tabs();
		312	return crypto_register_alg(&aes_alg);
		313	}
		314
		315	static void __exit aes_fini(void)
		316	{
		317	crypto_unregister_alg(&aes_alg);
		318	}
		319
		320	module_init(aes_init);
		321	module_exit(aes_fini);
		322
		323	MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
		324	MODULE_LICENSE("GPL");


diff --git a/crypto/Kconfig b/crypto/Kconfig index 90d6089d60ed..256c0b1fed10 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig
@@ -146,7 +146,7 @@ config CRYPTO_SERPENT
146		146
147	config CRYPTO_AES	147	config CRYPTO_AES
148	tristate "AES cipher algorithms"	148	tristate "AES cipher algorithms"
149	depends on CRYPTO && !((X86 \|\| UML_X86) && !64BIT)	149	depends on CRYPTO && !(X86 \|\| UML_X86)
150	help	150	help
151	AES cipher algorithms (FIPS-197). AES uses the Rijndael	151	AES cipher algorithms (FIPS-197). AES uses the Rijndael
152	algorithm.	152	algorithm.
@@ -184,6 +184,26 @@ config CRYPTO_AES_586
184		184
185	See <http://csrc.nist.gov/encryption/aes/> for more information.	185	See <http://csrc.nist.gov/encryption/aes/> for more information.
186		186
		187	config CRYPTO_AES_X86_64
		188	tristate "AES cipher algorithms (x86_64)"
		189	depends on CRYPTO && ((X86 \|\| UML_X86) && 64BIT)
		190	help
		191	AES cipher algorithms (FIPS-197). AES uses the Rijndael
		192	algorithm.
		193
		194	Rijndael appears to be consistently a very good performer in
		195	both hardware and software across a wide range of computing
		196	environments regardless of its use in feedback or non-feedback
		197	modes. Its key setup time is excellent, and its key agility is
		198	good. Rijndael's very low memory requirements make it very well
		199	suited for restricted-space environments, in which it also
		200	demonstrates excellent performance. Rijndael's operations are
		201	among the easiest to defend against power and timing attacks.
		202
		203	The AES specifies three key sizes: 128, 192 and 256 bits
		204
		205	See <http://csrc.nist.gov/encryption/aes/> for more information.
		206
187	config CRYPTO_CAST5	207	config CRYPTO_CAST5
188	tristate "CAST5 (CAST-128) cipher algorithm"	208	tristate "CAST5 (CAST-128) cipher algorithm"
189	depends on CRYPTO	209	depends on CRYPTO