aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug19
-rw-r--r--arch/x86/boot/compressed/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c4
-rw-r--r--arch/x86/boot/compressed/eboot.c198
-rw-r--r--arch/x86/boot/compressed/head_32.S10
-rw-r--r--arch/x86/boot/compressed/head_64.S10
-rw-r--r--arch/x86/boot/compressed/misc.c31
-rw-r--r--arch/x86/boot/compressed/misc.h27
-rw-r--r--arch/x86/boot/header.S11
-rw-r--r--arch/x86/crypto/Makefile14
-rw-r--r--arch/x86/crypto/ablk_helper.c149
-rw-r--r--arch/x86/crypto/aes_glue.c2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c110
-rw-r--r--arch/x86/crypto/camellia_glue.c355
-rw-r--r--arch/x86/crypto/glue_helper.c307
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S704
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c636
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c513
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c6
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S300
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c624
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c409
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/crypto/ablk_helper.h31
-rw-r--r--arch/x86/include/asm/crypto/aes.h (renamed from arch/x86/include/asm/aes.h)0
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h115
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h32
-rw-r--r--arch/x86/include/asm/crypto/serpent-sse2.h (renamed from arch/x86/include/asm/serpent.h)4
-rw-r--r--arch/x86/include/asm/crypto/twofish.h46
-rw-r--r--arch/x86/include/asm/entry_arch.h9
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h3
-rw-r--r--arch/x86/include/asm/percpu.h17
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/smp.h16
-rw-r--r--arch/x86/include/asm/tlb.h9
-rw-r--r--arch/x86/include/asm/tlbflush.h49
-rw-r--r--arch/x86/include/asm/uv/uv.h5
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common.c31
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/intel.c176
-rw-r--r--arch/x86/kernel/cpu/sched.c55
-rw-r--r--arch/x86/kernel/entry_64.S18
-rw-r--r--arch/x86/kernel/irqinit.c73
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/tlb.c401
-rw-r--r--arch/x86/platform/efi/efi.c30
-rw-r--r--arch/x86/platform/uv/tlb_uv.c6
-rw-r--r--arch/x86/xen/mmu.c12
57 files changed, 4004 insertions, 1650 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index e46c2147397f..b322f124ee3c 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -129,6 +129,25 @@ config DOUBLEFAULT
129 option saves about 4k and might cause you much additional grey 129 option saves about 4k and might cause you much additional grey
130 hair. 130 hair.
131 131
132config DEBUG_TLBFLUSH
133 bool "Set upper limit of TLB entries to flush one-by-one"
134 depends on DEBUG_KERNEL && (X86_64 || X86_INVLPG)
135 ---help---
136
137 X86-only for now.
138
139 This option allows the user to tune the amount of TLB entries the
140 kernel flushes one-by-one instead of doing a full TLB flush. In
141 certain situations, the former is cheaper. This is controlled by the
142 tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it
143 to -1, the code flushes the whole TLB unconditionally. Otherwise,
144 for positive values of it, the kernel will use single TLB entry
145 invalidating instructions according to the following formula:
146
147 flush_entries <= active_tlb_entries / 2^tlb_flushall_shift
148
149 If in doubt, say "N".
150
132config IOMMU_DEBUG 151config IOMMU_DEBUG
133 bool "Enable IOMMU debugging" 152 bool "Enable IOMMU debugging"
134 depends on GART_IOMMU && DEBUG_KERNEL 153 depends on GART_IOMMU && DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index cb62f786990d..10f6b1178c68 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,5 +1,7 @@
1#include "misc.h" 1#include "misc.h"
2 2
3#ifdef CONFIG_EARLY_PRINTK
4
3static unsigned long fs; 5static unsigned long fs;
4static inline void set_fs(unsigned long seg) 6static inline void set_fs(unsigned long seg)
5{ 7{
@@ -19,3 +21,5 @@ int cmdline_find_option_bool(const char *option)
19{ 21{
20 return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); 22 return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
21} 23}
24
25#endif
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
index 261e81fb9582..d3d003cb5481 100644
--- a/arch/x86/boot/compressed/early_serial_console.c
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -1,5 +1,9 @@
1#include "misc.h" 1#include "misc.h"
2 2
3#ifdef CONFIG_EARLY_PRINTK
4
3int early_serial_base; 5int early_serial_base;
4 6
5#include "../early_serial_console.c" 7#include "../early_serial_console.c"
8
9#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 4e85f5f85837..b3e0227df2c9 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -729,32 +729,68 @@ fail:
729 * need to create one ourselves (usually the bootloader would create 729 * need to create one ourselves (usually the bootloader would create
730 * one for us). 730 * one for us).
731 */ 731 */
732static efi_status_t make_boot_params(struct boot_params *boot_params, 732struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
733 efi_loaded_image_t *image,
734 void *handle)
735{ 733{
736 struct efi_info *efi = &boot_params->efi_info; 734 struct boot_params *boot_params;
737 struct apm_bios_info *bi = &boot_params->apm_bios_info; 735 struct sys_desc_table *sdt;
738 struct sys_desc_table *sdt = &boot_params->sys_desc_table; 736 struct apm_bios_info *bi;
739 struct e820entry *e820_map = &boot_params->e820_map[0]; 737 struct setup_header *hdr;
740 struct e820entry *prev = NULL; 738 struct efi_info *efi;
741 struct setup_header *hdr = &boot_params->hdr; 739 efi_loaded_image_t *image;
742 unsigned long size, key, desc_size, _size; 740 void *options;
743 efi_memory_desc_t *mem_map; 741 u32 load_options_size;
744 void *options = image->load_options; 742 efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
745 u32 load_options_size = image->load_options_size / 2; /* ASCII */
746 int options_size = 0; 743 int options_size = 0;
747 efi_status_t status; 744 efi_status_t status;
748 __u32 desc_version;
749 unsigned long cmdline; 745 unsigned long cmdline;
750 u8 nr_entries;
751 u16 *s2; 746 u16 *s2;
752 u8 *s1; 747 u8 *s1;
753 int i; 748 int i;
754 749
750 sys_table = _table;
751
752 /* Check if we were booted by the EFI firmware */
753 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
754 return NULL;
755
756 status = efi_call_phys3(sys_table->boottime->handle_protocol,
757 handle, &proto, (void *)&image);
758 if (status != EFI_SUCCESS) {
759 efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
760 return NULL;
761 }
762
763 status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
764 if (status != EFI_SUCCESS) {
765 efi_printk("Failed to alloc lowmem for boot params\n");
766 return NULL;
767 }
768
769 memset(boot_params, 0x0, 0x4000);
770
771 hdr = &boot_params->hdr;
772 efi = &boot_params->efi_info;
773 bi = &boot_params->apm_bios_info;
774 sdt = &boot_params->sys_desc_table;
775
776 /* Copy the second sector to boot_params */
777 memcpy(&hdr->jump, image->image_base + 512, 512);
778
779 /*
780 * Fill out some of the header fields ourselves because the
781 * EFI firmware loader doesn't load the first sector.
782 */
783 hdr->root_flags = 1;
784 hdr->vid_mode = 0xffff;
785 hdr->boot_flag = 0xAA55;
786
787 hdr->code32_start = (__u64)(unsigned long)image->image_base;
788
755 hdr->type_of_loader = 0x21; 789 hdr->type_of_loader = 0x21;
756 790
757 /* Convert unicode cmdline to ascii */ 791 /* Convert unicode cmdline to ascii */
792 options = image->load_options;
793 load_options_size = image->load_options_size / 2; /* ASCII */
758 cmdline = 0; 794 cmdline = 0;
759 s2 = (u16 *)options; 795 s2 = (u16 *)options;
760 796
@@ -791,18 +827,36 @@ static efi_status_t make_boot_params(struct boot_params *boot_params,
791 hdr->ramdisk_image = 0; 827 hdr->ramdisk_image = 0;
792 hdr->ramdisk_size = 0; 828 hdr->ramdisk_size = 0;
793 829
794 status = handle_ramdisks(image, hdr);
795 if (status != EFI_SUCCESS)
796 goto free_cmdline;
797
798 setup_graphics(boot_params);
799
800 /* Clear APM BIOS info */ 830 /* Clear APM BIOS info */
801 memset(bi, 0, sizeof(*bi)); 831 memset(bi, 0, sizeof(*bi));
802 832
803 memset(sdt, 0, sizeof(*sdt)); 833 memset(sdt, 0, sizeof(*sdt));
804 834
805 memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); 835 status = handle_ramdisks(image, hdr);
836 if (status != EFI_SUCCESS)
837 goto fail2;
838
839 return boot_params;
840fail2:
841 if (options_size)
842 low_free(options_size, hdr->cmd_line_ptr);
843fail:
844 low_free(0x4000, (unsigned long)boot_params);
845 return NULL;
846}
847
848static efi_status_t exit_boot(struct boot_params *boot_params,
849 void *handle)
850{
851 struct efi_info *efi = &boot_params->efi_info;
852 struct e820entry *e820_map = &boot_params->e820_map[0];
853 struct e820entry *prev = NULL;
854 unsigned long size, key, desc_size, _size;
855 efi_memory_desc_t *mem_map;
856 efi_status_t status;
857 __u32 desc_version;
858 u8 nr_entries;
859 int i;
806 860
807 size = sizeof(*mem_map) * 32; 861 size = sizeof(*mem_map) * 32;
808 862
@@ -811,7 +865,7 @@ again:
811 _size = size; 865 _size = size;
812 status = low_alloc(size, 1, (unsigned long *)&mem_map); 866 status = low_alloc(size, 1, (unsigned long *)&mem_map);
813 if (status != EFI_SUCCESS) 867 if (status != EFI_SUCCESS)
814 goto free_cmdline; 868 return status;
815 869
816 status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, 870 status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
817 mem_map, &key, &desc_size, &desc_version); 871 mem_map, &key, &desc_size, &desc_version);
@@ -823,6 +877,7 @@ again:
823 if (status != EFI_SUCCESS) 877 if (status != EFI_SUCCESS)
824 goto free_mem_map; 878 goto free_mem_map;
825 879
880 memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
826 efi->efi_systab = (unsigned long)sys_table; 881 efi->efi_systab = (unsigned long)sys_table;
827 efi->efi_memdesc_size = desc_size; 882 efi->efi_memdesc_size = desc_size;
828 efi->efi_memdesc_version = desc_version; 883 efi->efi_memdesc_version = desc_version;
@@ -906,61 +961,13 @@ again:
906 961
907free_mem_map: 962free_mem_map:
908 low_free(_size, (unsigned long)mem_map); 963 low_free(_size, (unsigned long)mem_map);
909free_cmdline:
910 if (options_size)
911 low_free(options_size, hdr->cmd_line_ptr);
912fail:
913 return status; 964 return status;
914} 965}
915 966
916/* 967static efi_status_t relocate_kernel(struct setup_header *hdr)
917 * On success we return a pointer to a boot_params structure, and NULL
918 * on failure.
919 */
920struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
921{ 968{
922 struct boot_params *boot_params;
923 unsigned long start, nr_pages; 969 unsigned long start, nr_pages;
924 struct desc_ptr *gdt, *idt;
925 efi_loaded_image_t *image;
926 struct setup_header *hdr;
927 efi_status_t status; 970 efi_status_t status;
928 efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
929 struct desc_struct *desc;
930
931 sys_table = _table;
932
933 /* Check if we were booted by the EFI firmware */
934 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
935 goto fail;
936
937 status = efi_call_phys3(sys_table->boottime->handle_protocol,
938 handle, &proto, (void *)&image);
939 if (status != EFI_SUCCESS) {
940 efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
941 goto fail;
942 }
943
944 status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
945 if (status != EFI_SUCCESS) {
946 efi_printk("Failed to alloc lowmem for boot params\n");
947 goto fail;
948 }
949
950 memset(boot_params, 0x0, 0x4000);
951
952 hdr = &boot_params->hdr;
953
954 /* Copy the second sector to boot_params */
955 memcpy(&hdr->jump, image->image_base + 512, 512);
956
957 /*
958 * Fill out some of the header fields ourselves because the
959 * EFI firmware loader doesn't load the first sector.
960 */
961 hdr->root_flags = 1;
962 hdr->vid_mode = 0xffff;
963 hdr->boot_flag = 0xAA55;
964 971
965 /* 972 /*
966 * The EFI firmware loader could have placed the kernel image 973 * The EFI firmware loader could have placed the kernel image
@@ -978,16 +985,40 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
978 if (status != EFI_SUCCESS) { 985 if (status != EFI_SUCCESS) {
979 status = low_alloc(hdr->init_size, hdr->kernel_alignment, 986 status = low_alloc(hdr->init_size, hdr->kernel_alignment,
980 &start); 987 &start);
981 if (status != EFI_SUCCESS) { 988 if (status != EFI_SUCCESS)
982 efi_printk("Failed to alloc mem for kernel\n"); 989 efi_printk("Failed to alloc mem for kernel\n");
983 goto fail;
984 }
985 } 990 }
986 991
992 if (status == EFI_SUCCESS)
993 memcpy((void *)start, (void *)(unsigned long)hdr->code32_start,
994 hdr->init_size);
995
996 hdr->pref_address = hdr->code32_start;
987 hdr->code32_start = (__u32)start; 997 hdr->code32_start = (__u32)start;
988 hdr->pref_address = (__u64)(unsigned long)image->image_base;
989 998
990 memcpy((void *)start, image->image_base, image->image_size); 999 return status;
1000}
1001
1002/*
1003 * On success we return a pointer to a boot_params structure, and NULL
1004 * on failure.
1005 */
1006struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
1007 struct boot_params *boot_params)
1008{
1009 struct desc_ptr *gdt, *idt;
1010 efi_loaded_image_t *image;
1011 struct setup_header *hdr = &boot_params->hdr;
1012 efi_status_t status;
1013 struct desc_struct *desc;
1014
1015 sys_table = _table;
1016
1017 /* Check if we were booted by the EFI firmware */
1018 if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
1019 goto fail;
1020
1021 setup_graphics(boot_params);
991 1022
992 status = efi_call_phys3(sys_table->boottime->allocate_pool, 1023 status = efi_call_phys3(sys_table->boottime->allocate_pool,
993 EFI_LOADER_DATA, sizeof(*gdt), 1024 EFI_LOADER_DATA, sizeof(*gdt),
@@ -1015,7 +1046,18 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
1015 idt->size = 0; 1046 idt->size = 0;
1016 idt->address = 0; 1047 idt->address = 0;
1017 1048
1018 status = make_boot_params(boot_params, image, handle); 1049 /*
1050 * If the kernel isn't already loaded at the preferred load
1051 * address, relocate it.
1052 */
1053 if (hdr->pref_address != hdr->code32_start) {
1054 status = relocate_kernel(hdr);
1055
1056 if (status != EFI_SUCCESS)
1057 goto fail;
1058 }
1059
1060 status = exit_boot(boot_params, handle);
1019 if (status != EFI_SUCCESS) 1061 if (status != EFI_SUCCESS)
1020 goto fail; 1062 goto fail;
1021 1063
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index c85e3ac99bba..aa4aaf1b2380 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -42,6 +42,16 @@ ENTRY(startup_32)
42 */ 42 */
43 add $0x4, %esp 43 add $0x4, %esp
44 44
45 call make_boot_params
46 cmpl $0, %eax
47 je 1f
48 movl 0x4(%esp), %esi
49 movl (%esp), %ecx
50 pushl %eax
51 pushl %esi
52 pushl %ecx
53
54 .org 0x30,0x90
45 call efi_main 55 call efi_main
46 cmpl $0, %eax 56 cmpl $0, %eax
47 movl %eax, %esi 57 movl %eax, %esi
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 87e03a13d8e3..2c4b171eec33 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -209,6 +209,16 @@ ENTRY(startup_64)
209 .org 0x210 209 .org 0x210
210 mov %rcx, %rdi 210 mov %rcx, %rdi
211 mov %rdx, %rsi 211 mov %rdx, %rsi
212 pushq %rdi
213 pushq %rsi
214 call make_boot_params
215 cmpq $0,%rax
216 je 1f
217 mov %rax, %rdx
218 popq %rsi
219 popq %rdi
220
221 .org 0x230,0x90
212 call efi_main 222 call efi_main
213 movq %rax,%rsi 223 movq %rax,%rsi
214 cmpq $0,%rax 224 cmpq $0,%rax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7116dcba0c9e..88f7ff6da404 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -108,8 +108,6 @@ static void error(char *m);
108 * This is set up by the setup-routine at boot-time 108 * This is set up by the setup-routine at boot-time
109 */ 109 */
110struct boot_params *real_mode; /* Pointer to real-mode data */ 110struct boot_params *real_mode; /* Pointer to real-mode data */
111static int quiet;
112static int debug;
113 111
114void *memset(void *s, int c, size_t n); 112void *memset(void *s, int c, size_t n);
115void *memcpy(void *dest, const void *src, size_t n); 113void *memcpy(void *dest, const void *src, size_t n);
@@ -170,15 +168,11 @@ static void serial_putchar(int ch)
170 outb(ch, early_serial_base + TXR); 168 outb(ch, early_serial_base + TXR);
171} 169}
172 170
173void __putstr(int error, const char *s) 171void __putstr(const char *s)
174{ 172{
175 int x, y, pos; 173 int x, y, pos;
176 char c; 174 char c;
177 175
178#ifndef CONFIG_X86_VERBOSE_BOOTUP
179 if (!error)
180 return;
181#endif
182 if (early_serial_base) { 176 if (early_serial_base) {
183 const char *str = s; 177 const char *str = s;
184 while (*str) { 178 while (*str) {
@@ -265,9 +259,9 @@ void *memcpy(void *dest, const void *src, size_t n)
265 259
266static void error(char *x) 260static void error(char *x)
267{ 261{
268 __putstr(1, "\n\n"); 262 error_putstr("\n\n");
269 __putstr(1, x); 263 error_putstr(x);
270 __putstr(1, "\n\n -- System halted"); 264 error_putstr("\n\n -- System halted");
271 265
272 while (1) 266 while (1)
273 asm("hlt"); 267 asm("hlt");
@@ -294,8 +288,7 @@ static void parse_elf(void *output)
294 return; 288 return;
295 } 289 }
296 290
297 if (!quiet) 291 debug_putstr("Parsing ELF... ");
298 putstr("Parsing ELF... ");
299 292
300 phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); 293 phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
301 if (!phdrs) 294 if (!phdrs)
@@ -332,11 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
332{ 325{
333 real_mode = rmode; 326 real_mode = rmode;
334 327
335 if (cmdline_find_option_bool("quiet"))
336 quiet = 1;
337 if (cmdline_find_option_bool("debug"))
338 debug = 1;
339
340 if (real_mode->screen_info.orig_video_mode == 7) { 328 if (real_mode->screen_info.orig_video_mode == 7) {
341 vidmem = (char *) 0xb0000; 329 vidmem = (char *) 0xb0000;
342 vidport = 0x3b4; 330 vidport = 0x3b4;
@@ -349,8 +337,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
349 cols = real_mode->screen_info.orig_video_cols; 337 cols = real_mode->screen_info.orig_video_cols;
350 338
351 console_init(); 339 console_init();
352 if (debug) 340 debug_putstr("early console in decompress_kernel\n");
353 putstr("early console in decompress_kernel\n");
354 341
355 free_mem_ptr = heap; /* Heap */ 342 free_mem_ptr = heap; /* Heap */
356 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 343 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -369,11 +356,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
369 error("Wrong destination address"); 356 error("Wrong destination address");
370#endif 357#endif
371 358
372 if (!quiet) 359 debug_putstr("\nDecompressing Linux... ");
373 putstr("\nDecompressing Linux... ");
374 decompress(input_data, input_len, NULL, NULL, output, NULL, error); 360 decompress(input_data, input_len, NULL, NULL, output, NULL, error);
375 parse_elf(output); 361 parse_elf(output);
376 if (!quiet) 362 debug_putstr("done.\nBooting the kernel.\n");
377 putstr("done.\nBooting the kernel.\n");
378 return; 363 return;
379} 364}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 3f19c81a6203..0e6dc0ee0eea 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -24,9 +24,21 @@
24 24
25/* misc.c */ 25/* misc.c */
26extern struct boot_params *real_mode; /* Pointer to real-mode data */ 26extern struct boot_params *real_mode; /* Pointer to real-mode data */
27void __putstr(int error, const char *s); 27void __putstr(const char *s);
28#define putstr(__x) __putstr(0, __x) 28#define error_putstr(__x) __putstr(__x)
29#define puts(__x) __putstr(0, __x) 29
30#ifdef CONFIG_X86_VERBOSE_BOOTUP
31
32#define debug_putstr(__x) __putstr(__x)
33
34#else
35
36static inline void debug_putstr(const char *s)
37{ }
38
39#endif
40
41#ifdef CONFIG_EARLY_PRINTK
30 42
31/* cmdline.c */ 43/* cmdline.c */
32int cmdline_find_option(const char *option, char *buffer, int bufsize); 44int cmdline_find_option(const char *option, char *buffer, int bufsize);
@@ -36,4 +48,13 @@ int cmdline_find_option_bool(const char *option);
36extern int early_serial_base; 48extern int early_serial_base;
37void console_init(void); 49void console_init(void);
38 50
51#else
52
53/* early_serial_console.c */
54static const int early_serial_base;
55static inline void console_init(void)
56{ }
57
58#endif
59
39#endif 60#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index efe5acfc79c3..b4e15dd6786a 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -283,7 +283,7 @@ _start:
283 # Part 2 of the header, from the old setup.S 283 # Part 2 of the header, from the old setup.S
284 284
285 .ascii "HdrS" # header signature 285 .ascii "HdrS" # header signature
286 .word 0x020a # header version number (>= 0x0105) 286 .word 0x020b # header version number (>= 0x0105)
287 # or else old loadlin-1.5 will fail) 287 # or else old loadlin-1.5 will fail)
288 .globl realmode_swtch 288 .globl realmode_swtch
289realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 289realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -401,18 +401,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
401#define INIT_SIZE VO_INIT_SIZE 401#define INIT_SIZE VO_INIT_SIZE
402#endif 402#endif
403init_size: .long INIT_SIZE # kernel initialization size 403init_size: .long INIT_SIZE # kernel initialization size
404handover_offset: .long 0x30 # offset to the handover
405 # protocol entry point
404 406
405# End of setup header ##################################################### 407# End of setup header #####################################################
406 408
407 .section ".entrytext", "ax" 409 .section ".entrytext", "ax"
408start_of_setup: 410start_of_setup:
409#ifdef SAFE_RESET_DISK_CONTROLLER
410# Reset the disk controller.
411 movw $0x0000, %ax # Reset disk controller
412 movb $0x80, %dl # All disks
413 int $0x13
414#endif
415
416# Force %es = %ds 411# Force %es = %ds
417 movw %ds, %ax 412 movw %ds, %ax
418 movw %ax, %es 413 movw %ax, %es
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e191ac048b59..e908e5de82d3 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,9 @@
2# Arch-specific CryptoAPI modules. 2# Arch-specific CryptoAPI modules.
3# 3#
4 4
5obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
6obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
7
5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o 8obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o 9obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o 10obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
12obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 15obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 16obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
14obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 17obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
18obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
15obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 19obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
16obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o 20obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
21obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
17obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 22obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
18obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o 23obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
19 24
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
30blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 35blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
31twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 36twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
32twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 37twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
38twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
33salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 39salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
34serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o 40serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
41serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
35 42
36aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 43aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
37
38ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 44ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
39
40# enable AVX support only when $(AS) can actually assemble the instructions
41ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
42AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
43CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
44endif
45sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 45sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c
new file mode 100644
index 000000000000..43282fe04a8b
--- /dev/null
+++ b/arch/x86/crypto/ablk_helper.c
@@ -0,0 +1,149 @@
1/*
2 * Shared async block cipher helpers
3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Based on aesni-intel_glue.c by:
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27#include <linux/kernel.h>
28#include <linux/crypto.h>
29#include <linux/init.h>
30#include <linux/module.h>
31#include <crypto/algapi.h>
32#include <crypto/cryptd.h>
33#include <asm/i387.h>
34#include <asm/crypto/ablk_helper.h>
35
36int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
37 unsigned int key_len)
38{
39 struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
40 struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
41 int err;
42
43 crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
44 crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
45 & CRYPTO_TFM_REQ_MASK);
46 err = crypto_ablkcipher_setkey(child, key, key_len);
47 crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
48 & CRYPTO_TFM_RES_MASK);
49 return err;
50}
51EXPORT_SYMBOL_GPL(ablk_set_key);
52
53int __ablk_encrypt(struct ablkcipher_request *req)
54{
55 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
56 struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
57 struct blkcipher_desc desc;
58
59 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
60 desc.info = req->info;
61 desc.flags = 0;
62
63 return crypto_blkcipher_crt(desc.tfm)->encrypt(
64 &desc, req->dst, req->src, req->nbytes);
65}
66EXPORT_SYMBOL_GPL(__ablk_encrypt);
67
68int ablk_encrypt(struct ablkcipher_request *req)
69{
70 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
71 struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
72
73 if (!irq_fpu_usable()) {
74 struct ablkcipher_request *cryptd_req =
75 ablkcipher_request_ctx(req);
76
77 memcpy(cryptd_req, req, sizeof(*req));
78 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
79
80 return crypto_ablkcipher_encrypt(cryptd_req);
81 } else {
82 return __ablk_encrypt(req);
83 }
84}
85EXPORT_SYMBOL_GPL(ablk_encrypt);
86
87int ablk_decrypt(struct ablkcipher_request *req)
88{
89 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
90 struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
91
92 if (!irq_fpu_usable()) {
93 struct ablkcipher_request *cryptd_req =
94 ablkcipher_request_ctx(req);
95
96 memcpy(cryptd_req, req, sizeof(*req));
97 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
98
99 return crypto_ablkcipher_decrypt(cryptd_req);
100 } else {
101 struct blkcipher_desc desc;
102
103 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
104 desc.info = req->info;
105 desc.flags = 0;
106
107 return crypto_blkcipher_crt(desc.tfm)->decrypt(
108 &desc, req->dst, req->src, req->nbytes);
109 }
110}
111EXPORT_SYMBOL_GPL(ablk_decrypt);
112
113void ablk_exit(struct crypto_tfm *tfm)
114{
115 struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
116
117 cryptd_free_ablkcipher(ctx->cryptd_tfm);
118}
119EXPORT_SYMBOL_GPL(ablk_exit);
120
121int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
122{
123 struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
124 struct cryptd_ablkcipher *cryptd_tfm;
125
126 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
127 if (IS_ERR(cryptd_tfm))
128 return PTR_ERR(cryptd_tfm);
129
130 ctx->cryptd_tfm = cryptd_tfm;
131 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
132 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
133
134 return 0;
135}
136EXPORT_SYMBOL_GPL(ablk_init_common);
137
138int ablk_init(struct crypto_tfm *tfm)
139{
140 char drv_name[CRYPTO_MAX_ALG_NAME];
141
142 snprintf(drv_name, sizeof(drv_name), "__driver-%s",
143 crypto_tfm_alg_driver_name(tfm));
144
145 return ablk_init_common(tfm, drv_name);
146}
147EXPORT_SYMBOL_GPL(ablk_init);
148
149MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 8efcf42a9d7e..59b37deb8c8d 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,7 +5,7 @@
5 5
6#include <linux/module.h> 6#include <linux/module.h>
7#include <crypto/aes.h> 7#include <crypto/aes.h>
8#include <asm/aes.h> 8#include <asm/crypto/aes.h>
9 9
10asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); 10asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
11asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); 11asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ac7f5cd019e8..34fdcff4d2c8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -30,7 +30,8 @@
30#include <crypto/ctr.h> 30#include <crypto/ctr.h>
31#include <asm/cpu_device_id.h> 31#include <asm/cpu_device_id.h>
32#include <asm/i387.h> 32#include <asm/i387.h>
33#include <asm/aes.h> 33#include <asm/crypto/aes.h>
34#include <asm/crypto/ablk_helper.h>
34#include <crypto/scatterwalk.h> 35#include <crypto/scatterwalk.h>
35#include <crypto/internal/aead.h> 36#include <crypto/internal/aead.h>
36#include <linux/workqueue.h> 37#include <linux/workqueue.h>
@@ -52,10 +53,6 @@
52#define HAS_XTS 53#define HAS_XTS
53#endif 54#endif
54 55
55struct async_aes_ctx {
56 struct cryptd_ablkcipher *cryptd_tfm;
57};
58
59/* This data is stored at the end of the crypto_tfm struct. 56/* This data is stored at the end of the crypto_tfm struct.
60 * It's a type of per "session" data storage location. 57 * It's a type of per "session" data storage location.
61 * This needs to be 16 byte aligned. 58 * This needs to be 16 byte aligned.
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc,
377} 374}
378#endif 375#endif
379 376
380static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
381 unsigned int key_len)
382{
383 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
384 struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
385 int err;
386
387 crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
388 crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
389 & CRYPTO_TFM_REQ_MASK);
390 err = crypto_ablkcipher_setkey(child, key, key_len);
391 crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
392 & CRYPTO_TFM_RES_MASK);
393 return err;
394}
395
396static int ablk_encrypt(struct ablkcipher_request *req)
397{
398 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
399 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
400
401 if (!irq_fpu_usable()) {
402 struct ablkcipher_request *cryptd_req =
403 ablkcipher_request_ctx(req);
404 memcpy(cryptd_req, req, sizeof(*req));
405 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
406 return crypto_ablkcipher_encrypt(cryptd_req);
407 } else {
408 struct blkcipher_desc desc;
409 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
410 desc.info = req->info;
411 desc.flags = 0;
412 return crypto_blkcipher_crt(desc.tfm)->encrypt(
413 &desc, req->dst, req->src, req->nbytes);
414 }
415}
416
417static int ablk_decrypt(struct ablkcipher_request *req)
418{
419 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
420 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
421
422 if (!irq_fpu_usable()) {
423 struct ablkcipher_request *cryptd_req =
424 ablkcipher_request_ctx(req);
425 memcpy(cryptd_req, req, sizeof(*req));
426 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
427 return crypto_ablkcipher_decrypt(cryptd_req);
428 } else {
429 struct blkcipher_desc desc;
430 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
431 desc.info = req->info;
432 desc.flags = 0;
433 return crypto_blkcipher_crt(desc.tfm)->decrypt(
434 &desc, req->dst, req->src, req->nbytes);
435 }
436}
437
438static void ablk_exit(struct crypto_tfm *tfm)
439{
440 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
441
442 cryptd_free_ablkcipher(ctx->cryptd_tfm);
443}
444
445static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
446{
447 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
448 struct cryptd_ablkcipher *cryptd_tfm;
449
450 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
451 if (IS_ERR(cryptd_tfm))
452 return PTR_ERR(cryptd_tfm);
453
454 ctx->cryptd_tfm = cryptd_tfm;
455 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
456 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
457
458 return 0;
459}
460
461static int ablk_ecb_init(struct crypto_tfm *tfm) 377static int ablk_ecb_init(struct crypto_tfm *tfm)
462{ 378{
463 return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); 379 return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
613 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); 529 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
614 struct aesni_rfc4106_gcm_ctx *child_ctx = 530 struct aesni_rfc4106_gcm_ctx *child_ctx =
615 aesni_rfc4106_gcm_ctx_get(cryptd_child); 531 aesni_rfc4106_gcm_ctx_get(cryptd_child);
616 u8 *new_key_mem = NULL; 532 u8 *new_key_align, *new_key_mem = NULL;
617 533
618 if (key_len < 4) { 534 if (key_len < 4) {
619 crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); 535 crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
637 if (!new_key_mem) 553 if (!new_key_mem)
638 return -ENOMEM; 554 return -ENOMEM;
639 555
640 new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); 556 new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
641 memcpy(new_key_mem, key, key_len); 557 memcpy(new_key_align, key, key_len);
642 key = new_key_mem; 558 key = new_key_align;
643 } 559 }
644 560
645 if (!irq_fpu_usable()) 561 if (!irq_fpu_usable())
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { {
968 .cra_priority = 400, 884 .cra_priority = 400,
969 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 885 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
970 .cra_blocksize = AES_BLOCK_SIZE, 886 .cra_blocksize = AES_BLOCK_SIZE,
971 .cra_ctxsize = sizeof(struct async_aes_ctx), 887 .cra_ctxsize = sizeof(struct async_helper_ctx),
972 .cra_alignmask = 0, 888 .cra_alignmask = 0,
973 .cra_type = &crypto_ablkcipher_type, 889 .cra_type = &crypto_ablkcipher_type,
974 .cra_module = THIS_MODULE, 890 .cra_module = THIS_MODULE,
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { {
989 .cra_priority = 400, 905 .cra_priority = 400,
990 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 906 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
991 .cra_blocksize = AES_BLOCK_SIZE, 907 .cra_blocksize = AES_BLOCK_SIZE,
992 .cra_ctxsize = sizeof(struct async_aes_ctx), 908 .cra_ctxsize = sizeof(struct async_helper_ctx),
993 .cra_alignmask = 0, 909 .cra_alignmask = 0,
994 .cra_type = &crypto_ablkcipher_type, 910 .cra_type = &crypto_ablkcipher_type,
995 .cra_module = THIS_MODULE, 911 .cra_module = THIS_MODULE,
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { {
1033 .cra_priority = 400, 949 .cra_priority = 400,
1034 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 950 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1035 .cra_blocksize = 1, 951 .cra_blocksize = 1,
1036 .cra_ctxsize = sizeof(struct async_aes_ctx), 952 .cra_ctxsize = sizeof(struct async_helper_ctx),
1037 .cra_alignmask = 0, 953 .cra_alignmask = 0,
1038 .cra_type = &crypto_ablkcipher_type, 954 .cra_type = &crypto_ablkcipher_type,
1039 .cra_module = THIS_MODULE, 955 .cra_module = THIS_MODULE,
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { {
1098 .cra_priority = 400, 1014 .cra_priority = 400,
1099 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1015 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1100 .cra_blocksize = 1, 1016 .cra_blocksize = 1,
1101 .cra_ctxsize = sizeof(struct async_aes_ctx), 1017 .cra_ctxsize = sizeof(struct async_helper_ctx),
1102 .cra_alignmask = 0, 1018 .cra_alignmask = 0,
1103 .cra_type = &crypto_ablkcipher_type, 1019 .cra_type = &crypto_ablkcipher_type,
1104 .cra_module = THIS_MODULE, 1020 .cra_module = THIS_MODULE,
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { {
1126 .cra_priority = 400, 1042 .cra_priority = 400,
1127 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1043 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1128 .cra_blocksize = AES_BLOCK_SIZE, 1044 .cra_blocksize = AES_BLOCK_SIZE,
1129 .cra_ctxsize = sizeof(struct async_aes_ctx), 1045 .cra_ctxsize = sizeof(struct async_helper_ctx),
1130 .cra_alignmask = 0, 1046 .cra_alignmask = 0,
1131 .cra_type = &crypto_ablkcipher_type, 1047 .cra_type = &crypto_ablkcipher_type,
1132 .cra_module = THIS_MODULE, 1048 .cra_module = THIS_MODULE,
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { {
1150 .cra_priority = 400, 1066 .cra_priority = 400,
1151 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1067 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1152 .cra_blocksize = AES_BLOCK_SIZE, 1068 .cra_blocksize = AES_BLOCK_SIZE,
1153 .cra_ctxsize = sizeof(struct async_aes_ctx), 1069 .cra_ctxsize = sizeof(struct async_helper_ctx),
1154 .cra_alignmask = 0, 1070 .cra_alignmask = 0,
1155 .cra_type = &crypto_ablkcipher_type, 1071 .cra_type = &crypto_ablkcipher_type,
1156 .cra_module = THIS_MODULE, 1072 .cra_module = THIS_MODULE,
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { {
1174 .cra_priority = 400, 1090 .cra_priority = 400,
1175 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 1091 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1176 .cra_blocksize = AES_BLOCK_SIZE, 1092 .cra_blocksize = AES_BLOCK_SIZE,
1177 .cra_ctxsize = sizeof(struct async_aes_ctx), 1093 .cra_ctxsize = sizeof(struct async_helper_ctx),
1178 .cra_alignmask = 0, 1094 .cra_alignmask = 0,
1179 .cra_type = &crypto_ablkcipher_type, 1095 .cra_type = &crypto_ablkcipher_type,
1180 .cra_module = THIS_MODULE, 1096 .cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 3306dc0b139e..eeb2b3b743e9 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -5,10 +5,6 @@
5 * 5 *
6 * Camellia parts based on code by: 6 * Camellia parts based on code by:
7 * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) 7 * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
8 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
9 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
10 * CTR part based on code (crypto/ctr.c) by:
11 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
12 * 8 *
13 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -34,9 +30,9 @@
34#include <linux/module.h> 30#include <linux/module.h>
35#include <linux/types.h> 31#include <linux/types.h>
36#include <crypto/algapi.h> 32#include <crypto/algapi.h>
37#include <crypto/b128ops.h>
38#include <crypto/lrw.h> 33#include <crypto/lrw.h>
39#include <crypto/xts.h> 34#include <crypto/xts.h>
35#include <asm/crypto/glue_helper.h>
40 36
41#define CAMELLIA_MIN_KEY_SIZE 16 37#define CAMELLIA_MIN_KEY_SIZE 16
42#define CAMELLIA_MAX_KEY_SIZE 32 38#define CAMELLIA_MAX_KEY_SIZE 32
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
1312 &tfm->crt_flags); 1308 &tfm->crt_flags);
1313} 1309}
1314 1310
1315static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 1311static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
1316 void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
1317 void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
1318{ 1312{
1319 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1313 u128 iv = *src;
1320 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1321 unsigned int nbytes;
1322 int err;
1323
1324 err = blkcipher_walk_virt(desc, walk);
1325
1326 while ((nbytes = walk->nbytes)) {
1327 u8 *wsrc = walk->src.virt.addr;
1328 u8 *wdst = walk->dst.virt.addr;
1329
1330 /* Process two block batch */
1331 if (nbytes >= bsize * 2) {
1332 do {
1333 fn_2way(ctx, wdst, wsrc);
1334
1335 wsrc += bsize * 2;
1336 wdst += bsize * 2;
1337 nbytes -= bsize * 2;
1338 } while (nbytes >= bsize * 2);
1339
1340 if (nbytes < bsize)
1341 goto done;
1342 }
1343
1344 /* Handle leftovers */
1345 do {
1346 fn(ctx, wdst, wsrc);
1347
1348 wsrc += bsize;
1349 wdst += bsize;
1350 nbytes -= bsize;
1351 } while (nbytes >= bsize);
1352
1353done:
1354 err = blkcipher_walk_done(desc, walk, nbytes);
1355 }
1356
1357 return err;
1358}
1359
1360static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1361 struct scatterlist *src, unsigned int nbytes)
1362{
1363 struct blkcipher_walk walk;
1364
1365 blkcipher_walk_init(&walk, dst, src, nbytes);
1366 return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
1367}
1368 1314
1369static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1315 camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
1370 struct scatterlist *src, unsigned int nbytes)
1371{
1372 struct blkcipher_walk walk;
1373
1374 blkcipher_walk_init(&walk, dst, src, nbytes);
1375 return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
1376}
1377 1316
1378static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 1317 u128_xor(&dst[1], &dst[1], &iv);
1379 struct blkcipher_walk *walk)
1380{
1381 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
1382 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1383 unsigned int nbytes = walk->nbytes;
1384 u128 *src = (u128 *)walk->src.virt.addr;
1385 u128 *dst = (u128 *)walk->dst.virt.addr;
1386 u128 *iv = (u128 *)walk->iv;
1387
1388 do {
1389 u128_xor(dst, src, iv);
1390 camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
1391 iv = dst;
1392
1393 src += 1;
1394 dst += 1;
1395 nbytes -= bsize;
1396 } while (nbytes >= bsize);
1397
1398 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
1399 return nbytes;
1400} 1318}
1401 1319
1402static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1320static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
1403 struct scatterlist *src, unsigned int nbytes)
1404{ 1321{
1405 struct blkcipher_walk walk; 1322 be128 ctrblk;
1406 int err;
1407 1323
1408 blkcipher_walk_init(&walk, dst, src, nbytes); 1324 if (dst != src)
1409 err = blkcipher_walk_virt(desc, &walk); 1325 *dst = *src;
1410 1326
1411 while ((nbytes = walk.nbytes)) { 1327 u128_to_be128(&ctrblk, iv);
1412 nbytes = __cbc_encrypt(desc, &walk); 1328 u128_inc(iv);
1413 err = blkcipher_walk_done(desc, &walk, nbytes);
1414 }
1415 1329
1416 return err; 1330 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
1417} 1331}
1418 1332
1419static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 1333static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
1420 struct blkcipher_walk *walk) 1334 u128 *iv)
1421{ 1335{
1422 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1336 be128 ctrblks[2];
1423 unsigned int bsize = CAMELLIA_BLOCK_SIZE;
1424 unsigned int nbytes = walk->nbytes;
1425 u128 *src = (u128 *)walk->src.virt.addr;
1426 u128 *dst = (u128 *)walk->dst.virt.addr;
1427 u128 ivs[2 - 1];
1428 u128 last_iv;
1429 1337
1430 /* Start of the last block. */ 1338 if (dst != src) {
1431 src += nbytes / bsize - 1; 1339 dst[0] = src[0];
1432 dst += nbytes / bsize - 1; 1340 dst[1] = src[1];
1433
1434 last_iv = *src;
1435
1436 /* Process two block batch */
1437 if (nbytes >= bsize * 2) {
1438 do {
1439 nbytes -= bsize * (2 - 1);
1440 src -= 2 - 1;
1441 dst -= 2 - 1;
1442
1443 ivs[0] = src[0];
1444
1445 camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
1446
1447 u128_xor(dst + 1, dst + 1, ivs + 0);
1448
1449 nbytes -= bsize;
1450 if (nbytes < bsize)
1451 goto done;
1452
1453 u128_xor(dst, dst, src - 1);
1454 src -= 1;
1455 dst -= 1;
1456 } while (nbytes >= bsize * 2);
1457
1458 if (nbytes < bsize)
1459 goto done;
1460 } 1341 }
1461 1342
1462 /* Handle leftovers */ 1343 u128_to_be128(&ctrblks[0], iv);
1463 for (;;) { 1344 u128_inc(iv);
1464 camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); 1345 u128_to_be128(&ctrblks[1], iv);
1465 1346 u128_inc(iv);
1466 nbytes -= bsize;
1467 if (nbytes < bsize)
1468 break;
1469 1347
1470 u128_xor(dst, dst, src - 1); 1348 camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
1471 src -= 1;
1472 dst -= 1;
1473 }
1474
1475done:
1476 u128_xor(dst, dst, (u128 *)walk->iv);
1477 *(u128 *)walk->iv = last_iv;
1478
1479 return nbytes;
1480} 1349}
1481 1350
1482static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1351static const struct common_glue_ctx camellia_enc = {
1483 struct scatterlist *src, unsigned int nbytes) 1352 .num_funcs = 2,
1484{ 1353 .fpu_blocks_limit = -1,
1485 struct blkcipher_walk walk; 1354
1486 int err; 1355 .funcs = { {
1487 1356 .num_blocks = 2,
1488 blkcipher_walk_init(&walk, dst, src, nbytes); 1357 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
1489 err = blkcipher_walk_virt(desc, &walk); 1358 }, {
1359 .num_blocks = 1,
1360 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
1361 } }
1362};
1490 1363
1491 while ((nbytes = walk.nbytes)) { 1364static const struct common_glue_ctx camellia_ctr = {
1492 nbytes = __cbc_decrypt(desc, &walk); 1365 .num_funcs = 2,
1493 err = blkcipher_walk_done(desc, &walk, nbytes); 1366 .fpu_blocks_limit = -1,
1494 } 1367
1368 .funcs = { {
1369 .num_blocks = 2,
1370 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
1371 }, {
1372 .num_blocks = 1,
1373 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
1374 } }
1375};
1495 1376
1496 return err; 1377static const struct common_glue_ctx camellia_dec = {
1497} 1378 .num_funcs = 2,
1379 .fpu_blocks_limit = -1,
1380
1381 .funcs = { {
1382 .num_blocks = 2,
1383 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
1384 }, {
1385 .num_blocks = 1,
1386 .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
1387 } }
1388};
1498 1389
1499static inline void u128_to_be128(be128 *dst, const u128 *src) 1390static const struct common_glue_ctx camellia_dec_cbc = {
1500{ 1391 .num_funcs = 2,
1501 dst->a = cpu_to_be64(src->a); 1392 .fpu_blocks_limit = -1,
1502 dst->b = cpu_to_be64(src->b); 1393
1503} 1394 .funcs = { {
1395 .num_blocks = 2,
1396 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
1397 }, {
1398 .num_blocks = 1,
1399 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
1400 } }
1401};
1504 1402
1505static inline void be128_to_u128(u128 *dst, const be128 *src) 1403static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1404 struct scatterlist *src, unsigned int nbytes)
1506{ 1405{
1507 dst->a = be64_to_cpu(src->a); 1406 return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
1508 dst->b = be64_to_cpu(src->b);
1509} 1407}
1510 1408
1511static inline void u128_inc(u128 *i) 1409static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1410 struct scatterlist *src, unsigned int nbytes)
1512{ 1411{
1513 i->b++; 1412 return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
1514 if (!i->b)
1515 i->a++;
1516} 1413}
1517 1414
1518static void ctr_crypt_final(struct blkcipher_desc *desc, 1415static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1519 struct blkcipher_walk *walk) 1416 struct scatterlist *src, unsigned int nbytes)
1520{ 1417{
1521 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1418 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
1522 u8 keystream[CAMELLIA_BLOCK_SIZE]; 1419 dst, src, nbytes);
1523 u8 *src = walk->src.virt.addr;
1524 u8 *dst = walk->dst.virt.addr;
1525 unsigned int nbytes = walk->nbytes;
1526 u128 ctrblk;
1527
1528 memcpy(keystream, src, nbytes);
1529 camellia_enc_blk_xor(ctx, keystream, walk->iv);
1530 memcpy(dst, keystream, nbytes);
1531
1532 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1533 u128_inc(&ctrblk);
1534 u128_to_be128((be128 *)walk->iv, &ctrblk);
1535} 1420}
1536 1421
1537static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 1422static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1538 struct blkcipher_walk *walk) 1423 struct scatterlist *src, unsigned int nbytes)
1539{ 1424{
1540 struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 1425 return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
1541 unsigned int bsize = CAMELLIA_BLOCK_SIZE; 1426 nbytes);
1542 unsigned int nbytes = walk->nbytes;
1543 u128 *src = (u128 *)walk->src.virt.addr;
1544 u128 *dst = (u128 *)walk->dst.virt.addr;
1545 u128 ctrblk;
1546 be128 ctrblocks[2];
1547
1548 be128_to_u128(&ctrblk, (be128 *)walk->iv);
1549
1550 /* Process two block batch */
1551 if (nbytes >= bsize * 2) {
1552 do {
1553 if (dst != src) {
1554 dst[0] = src[0];
1555 dst[1] = src[1];
1556 }
1557
1558 /* create ctrblks for parallel encrypt */
1559 u128_to_be128(&ctrblocks[0], &ctrblk);
1560 u128_inc(&ctrblk);
1561 u128_to_be128(&ctrblocks[1], &ctrblk);
1562 u128_inc(&ctrblk);
1563
1564 camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
1565 (u8 *)ctrblocks);
1566
1567 src += 2;
1568 dst += 2;
1569 nbytes -= bsize * 2;
1570 } while (nbytes >= bsize * 2);
1571
1572 if (nbytes < bsize)
1573 goto done;
1574 }
1575
1576 /* Handle leftovers */
1577 do {
1578 if (dst != src)
1579 *dst = *src;
1580
1581 u128_to_be128(&ctrblocks[0], &ctrblk);
1582 u128_inc(&ctrblk);
1583
1584 camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
1585
1586 src += 1;
1587 dst += 1;
1588 nbytes -= bsize;
1589 } while (nbytes >= bsize);
1590
1591done:
1592 u128_to_be128((be128 *)walk->iv, &ctrblk);
1593 return nbytes;
1594} 1427}
1595 1428
1596static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 1429static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
1597 struct scatterlist *src, unsigned int nbytes) 1430 struct scatterlist *src, unsigned int nbytes)
1598{ 1431{
1599 struct blkcipher_walk walk; 1432 return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
1600 int err;
1601
1602 blkcipher_walk_init(&walk, dst, src, nbytes);
1603 err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
1604
1605 while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
1606 nbytes = __ctr_crypt(desc, &walk);
1607 err = blkcipher_walk_done(desc, &walk, nbytes);
1608 }
1609
1610 if (walk.nbytes) {
1611 ctr_crypt_final(desc, &walk);
1612 err = blkcipher_walk_done(desc, &walk, 0);
1613 }
1614
1615 return err;
1616} 1433}
1617 1434
1618static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 1435static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
new file mode 100644
index 000000000000..4854f0f31e4f
--- /dev/null
+++ b/arch/x86/crypto/glue_helper.c
@@ -0,0 +1,307 @@
1/*
2 * Shared glue code for 128bit block ciphers
3 *
4 * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24 * USA
25 *
26 */
27
28#include <linux/module.h>
29#include <crypto/b128ops.h>
30#include <crypto/lrw.h>
31#include <crypto/xts.h>
32#include <asm/crypto/glue_helper.h>
33#include <crypto/scatterwalk.h>
34
35static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
36 struct blkcipher_desc *desc,
37 struct blkcipher_walk *walk)
38{
39 void *ctx = crypto_blkcipher_ctx(desc->tfm);
40 const unsigned int bsize = 128 / 8;
41 unsigned int nbytes, i, func_bytes;
42 bool fpu_enabled = false;
43 int err;
44
45 err = blkcipher_walk_virt(desc, walk);
46
47 while ((nbytes = walk->nbytes)) {
48 u8 *wsrc = walk->src.virt.addr;
49 u8 *wdst = walk->dst.virt.addr;
50
51 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
52 desc, fpu_enabled, nbytes);
53
54 for (i = 0; i < gctx->num_funcs; i++) {
55 func_bytes = bsize * gctx->funcs[i].num_blocks;
56
57 /* Process multi-block batch */
58 if (nbytes >= func_bytes) {
59 do {
60 gctx->funcs[i].fn_u.ecb(ctx, wdst,
61 wsrc);
62
63 wsrc += func_bytes;
64 wdst += func_bytes;
65 nbytes -= func_bytes;
66 } while (nbytes >= func_bytes);
67
68 if (nbytes < bsize)
69 goto done;
70 }
71 }
72
73done:
74 err = blkcipher_walk_done(desc, walk, nbytes);
75 }
76
77 glue_fpu_end(fpu_enabled);
78 return err;
79}
80
81int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
82 struct blkcipher_desc *desc, struct scatterlist *dst,
83 struct scatterlist *src, unsigned int nbytes)
84{
85 struct blkcipher_walk walk;
86
87 blkcipher_walk_init(&walk, dst, src, nbytes);
88 return __glue_ecb_crypt_128bit(gctx, desc, &walk);
89}
90EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
91
92static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
93 struct blkcipher_desc *desc,
94 struct blkcipher_walk *walk)
95{
96 void *ctx = crypto_blkcipher_ctx(desc->tfm);
97 const unsigned int bsize = 128 / 8;
98 unsigned int nbytes = walk->nbytes;
99 u128 *src = (u128 *)walk->src.virt.addr;
100 u128 *dst = (u128 *)walk->dst.virt.addr;
101 u128 *iv = (u128 *)walk->iv;
102
103 do {
104 u128_xor(dst, src, iv);
105 fn(ctx, (u8 *)dst, (u8 *)dst);
106 iv = dst;
107
108 src += 1;
109 dst += 1;
110 nbytes -= bsize;
111 } while (nbytes >= bsize);
112
113 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
114 return nbytes;
115}
116
117int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
118 struct blkcipher_desc *desc,
119 struct scatterlist *dst,
120 struct scatterlist *src, unsigned int nbytes)
121{
122 struct blkcipher_walk walk;
123 int err;
124
125 blkcipher_walk_init(&walk, dst, src, nbytes);
126 err = blkcipher_walk_virt(desc, &walk);
127
128 while ((nbytes = walk.nbytes)) {
129 nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
130 err = blkcipher_walk_done(desc, &walk, nbytes);
131 }
132
133 return err;
134}
135EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
136
137static unsigned int
138__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
139 struct blkcipher_desc *desc,
140 struct blkcipher_walk *walk)
141{
142 void *ctx = crypto_blkcipher_ctx(desc->tfm);
143 const unsigned int bsize = 128 / 8;
144 unsigned int nbytes = walk->nbytes;
145 u128 *src = (u128 *)walk->src.virt.addr;
146 u128 *dst = (u128 *)walk->dst.virt.addr;
147 u128 last_iv;
148 unsigned int num_blocks, func_bytes;
149 unsigned int i;
150
151 /* Start of the last block. */
152 src += nbytes / bsize - 1;
153 dst += nbytes / bsize - 1;
154
155 last_iv = *src;
156
157 for (i = 0; i < gctx->num_funcs; i++) {
158 num_blocks = gctx->funcs[i].num_blocks;
159 func_bytes = bsize * num_blocks;
160
161 /* Process multi-block batch */
162 if (nbytes >= func_bytes) {
163 do {
164 nbytes -= func_bytes - bsize;
165 src -= num_blocks - 1;
166 dst -= num_blocks - 1;
167
168 gctx->funcs[i].fn_u.cbc(ctx, dst, src);
169
170 nbytes -= bsize;
171 if (nbytes < bsize)
172 goto done;
173
174 u128_xor(dst, dst, src - 1);
175 src -= 1;
176 dst -= 1;
177 } while (nbytes >= func_bytes);
178
179 if (nbytes < bsize)
180 goto done;
181 }
182 }
183
184done:
185 u128_xor(dst, dst, (u128 *)walk->iv);
186 *(u128 *)walk->iv = last_iv;
187
188 return nbytes;
189}
190
191int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
192 struct blkcipher_desc *desc,
193 struct scatterlist *dst,
194 struct scatterlist *src, unsigned int nbytes)
195{
196 const unsigned int bsize = 128 / 8;
197 bool fpu_enabled = false;
198 struct blkcipher_walk walk;
199 int err;
200
201 blkcipher_walk_init(&walk, dst, src, nbytes);
202 err = blkcipher_walk_virt(desc, &walk);
203
204 while ((nbytes = walk.nbytes)) {
205 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
206 desc, fpu_enabled, nbytes);
207 nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
208 err = blkcipher_walk_done(desc, &walk, nbytes);
209 }
210
211 glue_fpu_end(fpu_enabled);
212 return err;
213}
214EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
215
216static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
217 struct blkcipher_desc *desc,
218 struct blkcipher_walk *walk)
219{
220 void *ctx = crypto_blkcipher_ctx(desc->tfm);
221 u8 *src = (u8 *)walk->src.virt.addr;
222 u8 *dst = (u8 *)walk->dst.virt.addr;
223 unsigned int nbytes = walk->nbytes;
224 u128 ctrblk;
225 u128 tmp;
226
227 be128_to_u128(&ctrblk, (be128 *)walk->iv);
228
229 memcpy(&tmp, src, nbytes);
230 fn_ctr(ctx, &tmp, &tmp, &ctrblk);
231 memcpy(dst, &tmp, nbytes);
232
233 u128_to_be128((be128 *)walk->iv, &ctrblk);
234}
235EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
236
237static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
238 struct blkcipher_desc *desc,
239 struct blkcipher_walk *walk)
240{
241 const unsigned int bsize = 128 / 8;
242 void *ctx = crypto_blkcipher_ctx(desc->tfm);
243 unsigned int nbytes = walk->nbytes;
244 u128 *src = (u128 *)walk->src.virt.addr;
245 u128 *dst = (u128 *)walk->dst.virt.addr;
246 u128 ctrblk;
247 unsigned int num_blocks, func_bytes;
248 unsigned int i;
249
250 be128_to_u128(&ctrblk, (be128 *)walk->iv);
251
252 /* Process multi-block batch */
253 for (i = 0; i < gctx->num_funcs; i++) {
254 num_blocks = gctx->funcs[i].num_blocks;
255 func_bytes = bsize * num_blocks;
256
257 if (nbytes >= func_bytes) {
258 do {
259 gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
260
261 src += num_blocks;
262 dst += num_blocks;
263 nbytes -= func_bytes;
264 } while (nbytes >= func_bytes);
265
266 if (nbytes < bsize)
267 goto done;
268 }
269 }
270
271done:
272 u128_to_be128((be128 *)walk->iv, &ctrblk);
273 return nbytes;
274}
275
276int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
277 struct blkcipher_desc *desc, struct scatterlist *dst,
278 struct scatterlist *src, unsigned int nbytes)
279{
280 const unsigned int bsize = 128 / 8;
281 bool fpu_enabled = false;
282 struct blkcipher_walk walk;
283 int err;
284
285 blkcipher_walk_init(&walk, dst, src, nbytes);
286 err = blkcipher_walk_virt_block(desc, &walk, bsize);
287
288 while ((nbytes = walk.nbytes) >= bsize) {
289 fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
290 desc, fpu_enabled, nbytes);
291 nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
292 err = blkcipher_walk_done(desc, &walk, nbytes);
293 }
294
295 glue_fpu_end(fpu_enabled);
296
297 if (walk.nbytes) {
298 glue_ctr_crypt_final_128bit(
299 gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
300 err = blkcipher_walk_done(desc, &walk, 0);
301 }
302
303 return err;
304}
305EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
306
307MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..504106bf04a2
--- /dev/null
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -0,0 +1,704 @@
1/*
2 * Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by
8 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27.file "serpent-avx-x86_64-asm_64.S"
28.text
29
30#define CTX %rdi
31
32/**********************************************************************
33 8-way AVX serpent
34 **********************************************************************/
35#define RA1 %xmm0
36#define RB1 %xmm1
37#define RC1 %xmm2
38#define RD1 %xmm3
39#define RE1 %xmm4
40
41#define tp %xmm5
42
43#define RA2 %xmm6
44#define RB2 %xmm7
45#define RC2 %xmm8
46#define RD2 %xmm9
47#define RE2 %xmm10
48
49#define RNOT %xmm11
50
51#define RK0 %xmm12
52#define RK1 %xmm13
53#define RK2 %xmm14
54#define RK3 %xmm15
55
56
57#define S0_1(x0, x1, x2, x3, x4) \
58 vpor x0, x3, tp; \
59 vpxor x3, x0, x0; \
60 vpxor x2, x3, x4; \
61 vpxor RNOT, x4, x4; \
62 vpxor x1, tp, x3; \
63 vpand x0, x1, x1; \
64 vpxor x4, x1, x1; \
65 vpxor x0, x2, x2;
66#define S0_2(x0, x1, x2, x3, x4) \
67 vpxor x3, x0, x0; \
68 vpor x0, x4, x4; \
69 vpxor x2, x0, x0; \
70 vpand x1, x2, x2; \
71 vpxor x2, x3, x3; \
72 vpxor RNOT, x1, x1; \
73 vpxor x4, x2, x2; \
74 vpxor x2, x1, x1;
75
76#define S1_1(x0, x1, x2, x3, x4) \
77 vpxor x0, x1, tp; \
78 vpxor x3, x0, x0; \
79 vpxor RNOT, x3, x3; \
80 vpand tp, x1, x4; \
81 vpor tp, x0, x0; \
82 vpxor x2, x3, x3; \
83 vpxor x3, x0, x0; \
84 vpxor x3, tp, x1;
85#define S1_2(x0, x1, x2, x3, x4) \
86 vpxor x4, x3, x3; \
87 vpor x4, x1, x1; \
88 vpxor x2, x4, x4; \
89 vpand x0, x2, x2; \
90 vpxor x1, x2, x2; \
91 vpor x0, x1, x1; \
92 vpxor RNOT, x0, x0; \
93 vpxor x2, x0, x0; \
94 vpxor x1, x4, x4;
95
96#define S2_1(x0, x1, x2, x3, x4) \
97 vpxor RNOT, x3, x3; \
98 vpxor x0, x1, x1; \
99 vpand x2, x0, tp; \
100 vpxor x3, tp, tp; \
101 vpor x0, x3, x3; \
102 vpxor x1, x2, x2; \
103 vpxor x1, x3, x3; \
104 vpand tp, x1, x1;
105#define S2_2(x0, x1, x2, x3, x4) \
106 vpxor x2, tp, tp; \
107 vpand x3, x2, x2; \
108 vpor x1, x3, x3; \
109 vpxor RNOT, tp, tp; \
110 vpxor tp, x3, x3; \
111 vpxor tp, x0, x4; \
112 vpxor x2, tp, x0; \
113 vpor x2, x1, x1;
114
115#define S3_1(x0, x1, x2, x3, x4) \
116 vpxor x3, x1, tp; \
117 vpor x0, x3, x3; \
118 vpand x0, x1, x4; \
119 vpxor x2, x0, x0; \
120 vpxor tp, x2, x2; \
121 vpand x3, tp, x1; \
122 vpxor x3, x2, x2; \
123 vpor x4, x0, x0; \
124 vpxor x3, x4, x4;
125#define S3_2(x0, x1, x2, x3, x4) \
126 vpxor x0, x1, x1; \
127 vpand x3, x0, x0; \
128 vpand x4, x3, x3; \
129 vpxor x2, x3, x3; \
130 vpor x1, x4, x4; \
131 vpand x1, x2, x2; \
132 vpxor x3, x4, x4; \
133 vpxor x3, x0, x0; \
134 vpxor x2, x3, x3;
135
136#define S4_1(x0, x1, x2, x3, x4) \
137 vpand x0, x3, tp; \
138 vpxor x3, x0, x0; \
139 vpxor x2, tp, tp; \
140 vpor x3, x2, x2; \
141 vpxor x1, x0, x0; \
142 vpxor tp, x3, x4; \
143 vpor x0, x2, x2; \
144 vpxor x1, x2, x2;
145#define S4_2(x0, x1, x2, x3, x4) \
146 vpand x0, x1, x1; \
147 vpxor x4, x1, x1; \
148 vpand x2, x4, x4; \
149 vpxor tp, x2, x2; \
150 vpxor x0, x4, x4; \
151 vpor x1, tp, x3; \
152 vpxor RNOT, x1, x1; \
153 vpxor x0, x3, x3;
154
155#define S5_1(x0, x1, x2, x3, x4) \
156 vpor x0, x1, tp; \
157 vpxor tp, x2, x2; \
158 vpxor RNOT, x3, x3; \
159 vpxor x0, x1, x4; \
160 vpxor x2, x0, x0; \
161 vpand x4, tp, x1; \
162 vpor x3, x4, x4; \
163 vpxor x0, x4, x4;
164#define S5_2(x0, x1, x2, x3, x4) \
165 vpand x3, x0, x0; \
166 vpxor x3, x1, x1; \
167 vpxor x2, x3, x3; \
168 vpxor x1, x0, x0; \
169 vpand x4, x2, x2; \
170 vpxor x2, x1, x1; \
171 vpand x0, x2, x2; \
172 vpxor x2, x3, x3;
173
174#define S6_1(x0, x1, x2, x3, x4) \
175 vpxor x0, x3, x3; \
176 vpxor x2, x1, tp; \
177 vpxor x0, x2, x2; \
178 vpand x3, x0, x0; \
179 vpor x3, tp, tp; \
180 vpxor RNOT, x1, x4; \
181 vpxor tp, x0, x0; \
182 vpxor x2, tp, x1;
183#define S6_2(x0, x1, x2, x3, x4) \
184 vpxor x4, x3, x3; \
185 vpxor x0, x4, x4; \
186 vpand x0, x2, x2; \
187 vpxor x1, x4, x4; \
188 vpxor x3, x2, x2; \
189 vpand x1, x3, x3; \
190 vpxor x0, x3, x3; \
191 vpxor x2, x1, x1;
192
193#define S7_1(x0, x1, x2, x3, x4) \
194 vpxor RNOT, x1, tp; \
195 vpxor RNOT, x0, x0; \
196 vpand x2, tp, x1; \
197 vpxor x3, x1, x1; \
198 vpor tp, x3, x3; \
199 vpxor x2, tp, x4; \
200 vpxor x3, x2, x2; \
201 vpxor x0, x3, x3; \
202 vpor x1, x0, x0;
203#define S7_2(x0, x1, x2, x3, x4) \
204 vpand x0, x2, x2; \
205 vpxor x4, x0, x0; \
206 vpxor x3, x4, x4; \
207 vpand x0, x3, x3; \
208 vpxor x1, x4, x4; \
209 vpxor x4, x2, x2; \
210 vpxor x1, x3, x3; \
211 vpor x0, x4, x4; \
212 vpxor x1, x4, x4;
213
214#define SI0_1(x0, x1, x2, x3, x4) \
215 vpxor x0, x1, x1; \
216 vpor x1, x3, tp; \
217 vpxor x1, x3, x4; \
218 vpxor RNOT, x0, x0; \
219 vpxor tp, x2, x2; \
220 vpxor x0, tp, x3; \
221 vpand x1, x0, x0; \
222 vpxor x2, x0, x0;
223#define SI0_2(x0, x1, x2, x3, x4) \
224 vpand x3, x2, x2; \
225 vpxor x4, x3, x3; \
226 vpxor x3, x2, x2; \
227 vpxor x3, x1, x1; \
228 vpand x0, x3, x3; \
229 vpxor x0, x1, x1; \
230 vpxor x2, x0, x0; \
231 vpxor x3, x4, x4;
232
233#define SI1_1(x0, x1, x2, x3, x4) \
234 vpxor x3, x1, x1; \
235 vpxor x2, x0, tp; \
236 vpxor RNOT, x2, x2; \
237 vpor x1, x0, x4; \
238 vpxor x3, x4, x4; \
239 vpand x1, x3, x3; \
240 vpxor x2, x1, x1; \
241 vpand x4, x2, x2;
242#define SI1_2(x0, x1, x2, x3, x4) \
243 vpxor x1, x4, x4; \
244 vpor x3, x1, x1; \
245 vpxor tp, x3, x3; \
246 vpxor tp, x2, x2; \
247 vpor x4, tp, x0; \
248 vpxor x4, x2, x2; \
249 vpxor x0, x1, x1; \
250 vpxor x1, x4, x4;
251
252#define SI2_1(x0, x1, x2, x3, x4) \
253 vpxor x1, x2, x2; \
254 vpxor RNOT, x3, tp; \
255 vpor x2, tp, tp; \
256 vpxor x3, x2, x2; \
257 vpxor x0, x3, x4; \
258 vpxor x1, tp, x3; \
259 vpor x2, x1, x1; \
260 vpxor x0, x2, x2;
261#define SI2_2(x0, x1, x2, x3, x4) \
262 vpxor x4, x1, x1; \
263 vpor x3, x4, x4; \
264 vpxor x3, x2, x2; \
265 vpxor x2, x4, x4; \
266 vpand x1, x2, x2; \
267 vpxor x3, x2, x2; \
268 vpxor x4, x3, x3; \
269 vpxor x0, x4, x4;
270
271#define SI3_1(x0, x1, x2, x3, x4) \
272 vpxor x1, x2, x2; \
273 vpand x2, x1, tp; \
274 vpxor x0, tp, tp; \
275 vpor x1, x0, x0; \
276 vpxor x3, x1, x4; \
277 vpxor x3, x0, x0; \
278 vpor tp, x3, x3; \
279 vpxor x2, tp, x1;
280#define SI3_2(x0, x1, x2, x3, x4) \
281 vpxor x3, x1, x1; \
282 vpxor x2, x0, x0; \
283 vpxor x3, x2, x2; \
284 vpand x1, x3, x3; \
285 vpxor x0, x1, x1; \
286 vpand x2, x0, x0; \
287 vpxor x3, x4, x4; \
288 vpxor x0, x3, x3; \
289 vpxor x1, x0, x0;
290
291#define SI4_1(x0, x1, x2, x3, x4) \
292 vpxor x3, x2, x2; \
293 vpand x1, x0, tp; \
294 vpxor x2, tp, tp; \
295 vpor x3, x2, x2; \
296 vpxor RNOT, x0, x4; \
297 vpxor tp, x1, x1; \
298 vpxor x2, tp, x0; \
299 vpand x4, x2, x2;
300#define SI4_2(x0, x1, x2, x3, x4) \
301 vpxor x0, x2, x2; \
302 vpor x4, x0, x0; \
303 vpxor x3, x0, x0; \
304 vpand x2, x3, x3; \
305 vpxor x3, x4, x4; \
306 vpxor x1, x3, x3; \
307 vpand x0, x1, x1; \
308 vpxor x1, x4, x4; \
309 vpxor x3, x0, x0;
310
311#define SI5_1(x0, x1, x2, x3, x4) \
312 vpor x2, x1, tp; \
313 vpxor x1, x2, x2; \
314 vpxor x3, tp, tp; \
315 vpand x1, x3, x3; \
316 vpxor x3, x2, x2; \
317 vpor x0, x3, x3; \
318 vpxor RNOT, x0, x0; \
319 vpxor x2, x3, x3; \
320 vpor x0, x2, x2;
321#define SI5_2(x0, x1, x2, x3, x4) \
322 vpxor tp, x1, x4; \
323 vpxor x4, x2, x2; \
324 vpand x0, x4, x4; \
325 vpxor tp, x0, x0; \
326 vpxor x3, tp, x1; \
327 vpand x2, x0, x0; \
328 vpxor x3, x2, x2; \
329 vpxor x2, x0, x0; \
330 vpxor x4, x2, x2; \
331 vpxor x3, x4, x4;
332
333#define SI6_1(x0, x1, x2, x3, x4) \
334 vpxor x2, x0, x0; \
335 vpand x3, x0, tp; \
336 vpxor x3, x2, x2; \
337 vpxor x2, tp, tp; \
338 vpxor x1, x3, x3; \
339 vpor x0, x2, x2; \
340 vpxor x3, x2, x2; \
341 vpand tp, x3, x3;
342#define SI6_2(x0, x1, x2, x3, x4) \
343 vpxor RNOT, tp, tp; \
344 vpxor x1, x3, x3; \
345 vpand x2, x1, x1; \
346 vpxor tp, x0, x4; \
347 vpxor x4, x3, x3; \
348 vpxor x2, x4, x4; \
349 vpxor x1, tp, x0; \
350 vpxor x0, x2, x2;
351
352#define SI7_1(x0, x1, x2, x3, x4) \
353 vpand x0, x3, tp; \
354 vpxor x2, x0, x0; \
355 vpor x3, x2, x2; \
356 vpxor x1, x3, x4; \
357 vpxor RNOT, x0, x0; \
358 vpor tp, x1, x1; \
359 vpxor x0, x4, x4; \
360 vpand x2, x0, x0; \
361 vpxor x1, x0, x0;
362#define SI7_2(x0, x1, x2, x3, x4) \
363 vpand x2, x1, x1; \
364 vpxor x2, tp, x3; \
365 vpxor x3, x4, x4; \
366 vpand x3, x2, x2; \
367 vpor x0, x3, x3; \
368 vpxor x4, x1, x1; \
369 vpxor x4, x3, x3; \
370 vpand x0, x4, x4; \
371 vpxor x2, x4, x4;
372
373#define get_key(i, j, t) \
374 vbroadcastss (4*(i)+(j))*4(CTX), t;
375
376#define K2(x0, x1, x2, x3, x4, i) \
377 get_key(i, 0, RK0); \
378 get_key(i, 1, RK1); \
379 get_key(i, 2, RK2); \
380 get_key(i, 3, RK3); \
381 vpxor RK0, x0 ## 1, x0 ## 1; \
382 vpxor RK1, x1 ## 1, x1 ## 1; \
383 vpxor RK2, x2 ## 1, x2 ## 1; \
384 vpxor RK3, x3 ## 1, x3 ## 1; \
385 vpxor RK0, x0 ## 2, x0 ## 2; \
386 vpxor RK1, x1 ## 2, x1 ## 2; \
387 vpxor RK2, x2 ## 2, x2 ## 2; \
388 vpxor RK3, x3 ## 2, x3 ## 2;
389
390#define LK2(x0, x1, x2, x3, x4, i) \
391 vpslld $13, x0 ## 1, x4 ## 1; \
392 vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \
393 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
394 vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
395 vpslld $3, x2 ## 1, x4 ## 1; \
396 vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \
397 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
398 vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
399 vpslld $13, x0 ## 2, x4 ## 2; \
400 vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \
401 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
402 vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
403 vpslld $3, x2 ## 2, x4 ## 2; \
404 vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \
405 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
406 vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
407 vpslld $1, x1 ## 1, x4 ## 1; \
408 vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \
409 vpor x4 ## 1, x1 ## 1, x1 ## 1; \
410 vpslld $3, x0 ## 1, x4 ## 1; \
411 vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
412 vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
413 get_key(i, 1, RK1); \
414 vpslld $1, x1 ## 2, x4 ## 2; \
415 vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \
416 vpor x4 ## 2, x1 ## 2, x1 ## 2; \
417 vpslld $3, x0 ## 2, x4 ## 2; \
418 vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
419 vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
420 get_key(i, 3, RK3); \
421 vpslld $7, x3 ## 1, x4 ## 1; \
422 vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \
423 vpor x4 ## 1, x3 ## 1, x3 ## 1; \
424 vpslld $7, x1 ## 1, x4 ## 1; \
425 vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
426 vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
427 vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
428 vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
429 get_key(i, 0, RK0); \
430 vpslld $7, x3 ## 2, x4 ## 2; \
431 vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \
432 vpor x4 ## 2, x3 ## 2, x3 ## 2; \
433 vpslld $7, x1 ## 2, x4 ## 2; \
434 vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
435 vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
436 vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
437 vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
438 get_key(i, 2, RK2); \
439 vpxor RK1, x1 ## 1, x1 ## 1; \
440 vpxor RK3, x3 ## 1, x3 ## 1; \
441 vpslld $5, x0 ## 1, x4 ## 1; \
442 vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \
443 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
444 vpslld $22, x2 ## 1, x4 ## 1; \
445 vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \
446 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
447 vpxor RK0, x0 ## 1, x0 ## 1; \
448 vpxor RK2, x2 ## 1, x2 ## 1; \
449 vpxor RK1, x1 ## 2, x1 ## 2; \
450 vpxor RK3, x3 ## 2, x3 ## 2; \
451 vpslld $5, x0 ## 2, x4 ## 2; \
452 vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \
453 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
454 vpslld $22, x2 ## 2, x4 ## 2; \
455 vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \
456 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
457 vpxor RK0, x0 ## 2, x0 ## 2; \
458 vpxor RK2, x2 ## 2, x2 ## 2;
459
460#define KL2(x0, x1, x2, x3, x4, i) \
461 vpxor RK0, x0 ## 1, x0 ## 1; \
462 vpxor RK2, x2 ## 1, x2 ## 1; \
463 vpsrld $5, x0 ## 1, x4 ## 1; \
464 vpslld $(32 - 5), x0 ## 1, x0 ## 1; \
465 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
466 vpxor RK3, x3 ## 1, x3 ## 1; \
467 vpxor RK1, x1 ## 1, x1 ## 1; \
468 vpsrld $22, x2 ## 1, x4 ## 1; \
469 vpslld $(32 - 22), x2 ## 1, x2 ## 1; \
470 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
471 vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
472 vpxor RK0, x0 ## 2, x0 ## 2; \
473 vpxor RK2, x2 ## 2, x2 ## 2; \
474 vpsrld $5, x0 ## 2, x4 ## 2; \
475 vpslld $(32 - 5), x0 ## 2, x0 ## 2; \
476 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
477 vpxor RK3, x3 ## 2, x3 ## 2; \
478 vpxor RK1, x1 ## 2, x1 ## 2; \
479 vpsrld $22, x2 ## 2, x4 ## 2; \
480 vpslld $(32 - 22), x2 ## 2, x2 ## 2; \
481 vpor x4 ## 2, x2 ## 2, x2 ## 2; \
482 vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
483 vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
484 vpslld $7, x1 ## 1, x4 ## 1; \
485 vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
486 vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
487 vpsrld $1, x1 ## 1, x4 ## 1; \
488 vpslld $(32 - 1), x1 ## 1, x1 ## 1; \
489 vpor x4 ## 1, x1 ## 1, x1 ## 1; \
490 vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
491 vpslld $7, x1 ## 2, x4 ## 2; \
492 vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
493 vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
494 vpsrld $1, x1 ## 2, x4 ## 2; \
495 vpslld $(32 - 1), x1 ## 2, x1 ## 2; \
496 vpor x4 ## 2, x1 ## 2, x1 ## 2; \
497 vpsrld $7, x3 ## 1, x4 ## 1; \
498 vpslld $(32 - 7), x3 ## 1, x3 ## 1; \
499 vpor x4 ## 1, x3 ## 1, x3 ## 1; \
500 vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
501 vpslld $3, x0 ## 1, x4 ## 1; \
502 vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
503 vpsrld $7, x3 ## 2, x4 ## 2; \
504 vpslld $(32 - 7), x3 ## 2, x3 ## 2; \
505 vpor x4 ## 2, x3 ## 2, x3 ## 2; \
506 vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
507 vpslld $3, x0 ## 2, x4 ## 2; \
508 vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
509 vpsrld $13, x0 ## 1, x4 ## 1; \
510 vpslld $(32 - 13), x0 ## 1, x0 ## 1; \
511 vpor x4 ## 1, x0 ## 1, x0 ## 1; \
512 vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
513 vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
514 vpsrld $3, x2 ## 1, x4 ## 1; \
515 vpslld $(32 - 3), x2 ## 1, x2 ## 1; \
516 vpor x4 ## 1, x2 ## 1, x2 ## 1; \
517 vpsrld $13, x0 ## 2, x4 ## 2; \
518 vpslld $(32 - 13), x0 ## 2, x0 ## 2; \
519 vpor x4 ## 2, x0 ## 2, x0 ## 2; \
520 vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
521 vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
522 vpsrld $3, x2 ## 2, x4 ## 2; \
523 vpslld $(32 - 3), x2 ## 2, x2 ## 2; \
524 vpor x4 ## 2, x2 ## 2, x2 ## 2;
525
526#define S(SBOX, x0, x1, x2, x3, x4) \
527 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
528 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
529 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
530 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
531
532#define SP(SBOX, x0, x1, x2, x3, x4, i) \
533 get_key(i, 0, RK0); \
534 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
535 get_key(i, 2, RK2); \
536 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
537 get_key(i, 3, RK3); \
538 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
539 get_key(i, 1, RK1); \
540 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
541
542#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
543 vpunpckldq x1, x0, t0; \
544 vpunpckhdq x1, x0, t2; \
545 vpunpckldq x3, x2, t1; \
546 vpunpckhdq x3, x2, x3; \
547 \
548 vpunpcklqdq t1, t0, x0; \
549 vpunpckhqdq t1, t0, x1; \
550 vpunpcklqdq x3, t2, x2; \
551 vpunpckhqdq x3, t2, x3;
552
553#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
554 vmovdqu (0*4*4)(in), x0; \
555 vmovdqu (1*4*4)(in), x1; \
556 vmovdqu (2*4*4)(in), x2; \
557 vmovdqu (3*4*4)(in), x3; \
558 \
559 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
560
561#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
562 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
563 \
564 vmovdqu x0, (0*4*4)(out); \
565 vmovdqu x1, (1*4*4)(out); \
566 vmovdqu x2, (2*4*4)(out); \
567 vmovdqu x3, (3*4*4)(out);
568
569#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
570 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
571 \
572 vpxor (0*4*4)(out), x0, x0; \
573 vmovdqu x0, (0*4*4)(out); \
574 vpxor (1*4*4)(out), x1, x1; \
575 vmovdqu x1, (1*4*4)(out); \
576 vpxor (2*4*4)(out), x2, x2; \
577 vmovdqu x2, (2*4*4)(out); \
578 vpxor (3*4*4)(out), x3, x3; \
579 vmovdqu x3, (3*4*4)(out);
580
581.align 8
582.global __serpent_enc_blk_8way_avx
583.type __serpent_enc_blk_8way_avx,@function;
584
585__serpent_enc_blk_8way_avx:
586 /* input:
587 * %rdi: ctx, CTX
588 * %rsi: dst
589 * %rdx: src
590 * %rcx: bool, if true: xor output
591 */
592
593 vpcmpeqd RNOT, RNOT, RNOT;
594
595 leaq (4*4*4)(%rdx), %rax;
596 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
597 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
598
599 K2(RA, RB, RC, RD, RE, 0);
600 S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
601 S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
602 S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
603 S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
604 S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
605 S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
606 S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
607 S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
608 S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
609 S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
610 S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
611 S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
612 S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
613 S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
614 S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
615 S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
616 S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
617 S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
618 S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
619 S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
620 S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
621 S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
622 S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
623 S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
624 S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
625 S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
626 S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
627 S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
628 S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
629 S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
630 S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
631 S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
632
633 leaq (4*4*4)(%rsi), %rax;
634
635 testb %cl, %cl;
636 jnz __enc_xor8;
637
638 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
639 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
640
641 ret;
642
643__enc_xor8:
644 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
645 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
646
647 ret;
648
649.align 8
650.global serpent_dec_blk_8way_avx
651.type serpent_dec_blk_8way_avx,@function;
652
653serpent_dec_blk_8way_avx:
654 /* input:
655 * %rdi: ctx, CTX
656 * %rsi: dst
657 * %rdx: src
658 */
659
660 vpcmpeqd RNOT, RNOT, RNOT;
661
662 leaq (4*4*4)(%rdx), %rax;
663 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
664 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
665
666 K2(RA, RB, RC, RD, RE, 32);
667 SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
668 SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
669 SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
670 SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
671 SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
672 SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
673 SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
674 SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
675 SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
676 SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
677 SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
678 SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
679 SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
680 SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
681 SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
682 SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
683 SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
684 SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
685 SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
686 SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
687 SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
688 SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
689 SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
690 SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
691 SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
692 SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
693 SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
694 SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
695 SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
696 SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
697 SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
698 S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
699
700 leaq (4*4*4)(%rsi), %rax;
701 write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
702 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
703
704 ret;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
new file mode 100644
index 000000000000..b36bdac237eb
--- /dev/null
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -0,0 +1,636 @@
1/*
2 * Glue Code for AVX assembler versions of Serpent Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * Glue code based on serpent_sse2_glue.c by:
8 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/hardirq.h>
29#include <linux/types.h>
30#include <linux/crypto.h>
31#include <linux/err.h>
32#include <crypto/algapi.h>
33#include <crypto/serpent.h>
34#include <crypto/cryptd.h>
35#include <crypto/b128ops.h>
36#include <crypto/ctr.h>
37#include <crypto/lrw.h>
38#include <crypto/xts.h>
39#include <asm/xcr.h>
40#include <asm/xsave.h>
41#include <asm/crypto/serpent-avx.h>
42#include <asm/crypto/ablk_helper.h>
43#include <asm/crypto/glue_helper.h>
44
45static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
46{
47 u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
48 unsigned int j;
49
50 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
51 ivs[j] = src[j];
52
53 serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
54
55 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
56 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
57}
58
59static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
60{
61 be128 ctrblk;
62
63 u128_to_be128(&ctrblk, iv);
64 u128_inc(iv);
65
66 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
67 u128_xor(dst, src, (u128 *)&ctrblk);
68}
69
70static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
71 u128 *iv)
72{
73 be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
74 unsigned int i;
75
76 for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
77 if (dst != src)
78 dst[i] = src[i];
79
80 u128_to_be128(&ctrblks[i], iv);
81 u128_inc(iv);
82 }
83
84 serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
85}
86
87static const struct common_glue_ctx serpent_enc = {
88 .num_funcs = 2,
89 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
90
91 .funcs = { {
92 .num_blocks = SERPENT_PARALLEL_BLOCKS,
93 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
94 }, {
95 .num_blocks = 1,
96 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
97 } }
98};
99
100static const struct common_glue_ctx serpent_ctr = {
101 .num_funcs = 2,
102 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
103
104 .funcs = { {
105 .num_blocks = SERPENT_PARALLEL_BLOCKS,
106 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
107 }, {
108 .num_blocks = 1,
109 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
110 } }
111};
112
113static const struct common_glue_ctx serpent_dec = {
114 .num_funcs = 2,
115 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
116
117 .funcs = { {
118 .num_blocks = SERPENT_PARALLEL_BLOCKS,
119 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
120 }, {
121 .num_blocks = 1,
122 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
123 } }
124};
125
126static const struct common_glue_ctx serpent_dec_cbc = {
127 .num_funcs = 2,
128 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
129
130 .funcs = { {
131 .num_blocks = SERPENT_PARALLEL_BLOCKS,
132 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
133 }, {
134 .num_blocks = 1,
135 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
136 } }
137};
138
139static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140 struct scatterlist *src, unsigned int nbytes)
141{
142 return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
143}
144
145static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
146 struct scatterlist *src, unsigned int nbytes)
147{
148 return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
149}
150
151static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
152 struct scatterlist *src, unsigned int nbytes)
153{
154 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
155 dst, src, nbytes);
156}
157
158static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
159 struct scatterlist *src, unsigned int nbytes)
160{
161 return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
162 nbytes);
163}
164
165static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
166 struct scatterlist *src, unsigned int nbytes)
167{
168 return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
169}
170
171static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
172{
173 return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
174 NULL, fpu_enabled, nbytes);
175}
176
177static inline void serpent_fpu_end(bool fpu_enabled)
178{
179 glue_fpu_end(fpu_enabled);
180}
181
182struct crypt_priv {
183 struct serpent_ctx *ctx;
184 bool fpu_enabled;
185};
186
187static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
188{
189 const unsigned int bsize = SERPENT_BLOCK_SIZE;
190 struct crypt_priv *ctx = priv;
191 int i;
192
193 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
194
195 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
196 serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
197 return;
198 }
199
200 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
201 __serpent_encrypt(ctx->ctx, srcdst, srcdst);
202}
203
204static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
205{
206 const unsigned int bsize = SERPENT_BLOCK_SIZE;
207 struct crypt_priv *ctx = priv;
208 int i;
209
210 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
211
212 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
213 serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
214 return;
215 }
216
217 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
218 __serpent_decrypt(ctx->ctx, srcdst, srcdst);
219}
220
221struct serpent_lrw_ctx {
222 struct lrw_table_ctx lrw_table;
223 struct serpent_ctx serpent_ctx;
224};
225
226static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
227 unsigned int keylen)
228{
229 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
230 int err;
231
232 err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
233 SERPENT_BLOCK_SIZE);
234 if (err)
235 return err;
236
237 return lrw_init_table(&ctx->lrw_table, key + keylen -
238 SERPENT_BLOCK_SIZE);
239}
240
241static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
242 struct scatterlist *src, unsigned int nbytes)
243{
244 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
245 be128 buf[SERPENT_PARALLEL_BLOCKS];
246 struct crypt_priv crypt_ctx = {
247 .ctx = &ctx->serpent_ctx,
248 .fpu_enabled = false,
249 };
250 struct lrw_crypt_req req = {
251 .tbuf = buf,
252 .tbuflen = sizeof(buf),
253
254 .table_ctx = &ctx->lrw_table,
255 .crypt_ctx = &crypt_ctx,
256 .crypt_fn = encrypt_callback,
257 };
258 int ret;
259
260 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
261 ret = lrw_crypt(desc, dst, src, nbytes, &req);
262 serpent_fpu_end(crypt_ctx.fpu_enabled);
263
264 return ret;
265}
266
267static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
268 struct scatterlist *src, unsigned int nbytes)
269{
270 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
271 be128 buf[SERPENT_PARALLEL_BLOCKS];
272 struct crypt_priv crypt_ctx = {
273 .ctx = &ctx->serpent_ctx,
274 .fpu_enabled = false,
275 };
276 struct lrw_crypt_req req = {
277 .tbuf = buf,
278 .tbuflen = sizeof(buf),
279
280 .table_ctx = &ctx->lrw_table,
281 .crypt_ctx = &crypt_ctx,
282 .crypt_fn = decrypt_callback,
283 };
284 int ret;
285
286 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
287 ret = lrw_crypt(desc, dst, src, nbytes, &req);
288 serpent_fpu_end(crypt_ctx.fpu_enabled);
289
290 return ret;
291}
292
293static void lrw_exit_tfm(struct crypto_tfm *tfm)
294{
295 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
296
297 lrw_free_table(&ctx->lrw_table);
298}
299
300struct serpent_xts_ctx {
301 struct serpent_ctx tweak_ctx;
302 struct serpent_ctx crypt_ctx;
303};
304
305static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
306 unsigned int keylen)
307{
308 struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
309 u32 *flags = &tfm->crt_flags;
310 int err;
311
312 /* key consists of keys of equal size concatenated, therefore
313 * the length must be even
314 */
315 if (keylen % 2) {
316 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
317 return -EINVAL;
318 }
319
320 /* first half of xts-key is for crypt */
321 err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
322 if (err)
323 return err;
324
325 /* second half of xts-key is for tweak */
326 return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
327}
328
329static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
330 struct scatterlist *src, unsigned int nbytes)
331{
332 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333 be128 buf[SERPENT_PARALLEL_BLOCKS];
334 struct crypt_priv crypt_ctx = {
335 .ctx = &ctx->crypt_ctx,
336 .fpu_enabled = false,
337 };
338 struct xts_crypt_req req = {
339 .tbuf = buf,
340 .tbuflen = sizeof(buf),
341
342 .tweak_ctx = &ctx->tweak_ctx,
343 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
344 .crypt_ctx = &crypt_ctx,
345 .crypt_fn = encrypt_callback,
346 };
347 int ret;
348
349 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
350 ret = xts_crypt(desc, dst, src, nbytes, &req);
351 serpent_fpu_end(crypt_ctx.fpu_enabled);
352
353 return ret;
354}
355
356static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
357 struct scatterlist *src, unsigned int nbytes)
358{
359 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
360 be128 buf[SERPENT_PARALLEL_BLOCKS];
361 struct crypt_priv crypt_ctx = {
362 .ctx = &ctx->crypt_ctx,
363 .fpu_enabled = false,
364 };
365 struct xts_crypt_req req = {
366 .tbuf = buf,
367 .tbuflen = sizeof(buf),
368
369 .tweak_ctx = &ctx->tweak_ctx,
370 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
371 .crypt_ctx = &crypt_ctx,
372 .crypt_fn = decrypt_callback,
373 };
374 int ret;
375
376 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
377 ret = xts_crypt(desc, dst, src, nbytes, &req);
378 serpent_fpu_end(crypt_ctx.fpu_enabled);
379
380 return ret;
381}
382
383static struct crypto_alg serpent_algs[10] = { {
384 .cra_name = "__ecb-serpent-avx",
385 .cra_driver_name = "__driver-ecb-serpent-avx",
386 .cra_priority = 0,
387 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
388 .cra_blocksize = SERPENT_BLOCK_SIZE,
389 .cra_ctxsize = sizeof(struct serpent_ctx),
390 .cra_alignmask = 0,
391 .cra_type = &crypto_blkcipher_type,
392 .cra_module = THIS_MODULE,
393 .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
394 .cra_u = {
395 .blkcipher = {
396 .min_keysize = SERPENT_MIN_KEY_SIZE,
397 .max_keysize = SERPENT_MAX_KEY_SIZE,
398 .setkey = serpent_setkey,
399 .encrypt = ecb_encrypt,
400 .decrypt = ecb_decrypt,
401 },
402 },
403}, {
404 .cra_name = "__cbc-serpent-avx",
405 .cra_driver_name = "__driver-cbc-serpent-avx",
406 .cra_priority = 0,
407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
408 .cra_blocksize = SERPENT_BLOCK_SIZE,
409 .cra_ctxsize = sizeof(struct serpent_ctx),
410 .cra_alignmask = 0,
411 .cra_type = &crypto_blkcipher_type,
412 .cra_module = THIS_MODULE,
413 .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
414 .cra_u = {
415 .blkcipher = {
416 .min_keysize = SERPENT_MIN_KEY_SIZE,
417 .max_keysize = SERPENT_MAX_KEY_SIZE,
418 .setkey = serpent_setkey,
419 .encrypt = cbc_encrypt,
420 .decrypt = cbc_decrypt,
421 },
422 },
423}, {
424 .cra_name = "__ctr-serpent-avx",
425 .cra_driver_name = "__driver-ctr-serpent-avx",
426 .cra_priority = 0,
427 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
428 .cra_blocksize = 1,
429 .cra_ctxsize = sizeof(struct serpent_ctx),
430 .cra_alignmask = 0,
431 .cra_type = &crypto_blkcipher_type,
432 .cra_module = THIS_MODULE,
433 .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
434 .cra_u = {
435 .blkcipher = {
436 .min_keysize = SERPENT_MIN_KEY_SIZE,
437 .max_keysize = SERPENT_MAX_KEY_SIZE,
438 .ivsize = SERPENT_BLOCK_SIZE,
439 .setkey = serpent_setkey,
440 .encrypt = ctr_crypt,
441 .decrypt = ctr_crypt,
442 },
443 },
444}, {
445 .cra_name = "__lrw-serpent-avx",
446 .cra_driver_name = "__driver-lrw-serpent-avx",
447 .cra_priority = 0,
448 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
449 .cra_blocksize = SERPENT_BLOCK_SIZE,
450 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
451 .cra_alignmask = 0,
452 .cra_type = &crypto_blkcipher_type,
453 .cra_module = THIS_MODULE,
454 .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
455 .cra_exit = lrw_exit_tfm,
456 .cra_u = {
457 .blkcipher = {
458 .min_keysize = SERPENT_MIN_KEY_SIZE +
459 SERPENT_BLOCK_SIZE,
460 .max_keysize = SERPENT_MAX_KEY_SIZE +
461 SERPENT_BLOCK_SIZE,
462 .ivsize = SERPENT_BLOCK_SIZE,
463 .setkey = lrw_serpent_setkey,
464 .encrypt = lrw_encrypt,
465 .decrypt = lrw_decrypt,
466 },
467 },
468}, {
469 .cra_name = "__xts-serpent-avx",
470 .cra_driver_name = "__driver-xts-serpent-avx",
471 .cra_priority = 0,
472 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
473 .cra_blocksize = SERPENT_BLOCK_SIZE,
474 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
475 .cra_alignmask = 0,
476 .cra_type = &crypto_blkcipher_type,
477 .cra_module = THIS_MODULE,
478 .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
479 .cra_u = {
480 .blkcipher = {
481 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
482 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
483 .ivsize = SERPENT_BLOCK_SIZE,
484 .setkey = xts_serpent_setkey,
485 .encrypt = xts_encrypt,
486 .decrypt = xts_decrypt,
487 },
488 },
489}, {
490 .cra_name = "ecb(serpent)",
491 .cra_driver_name = "ecb-serpent-avx",
492 .cra_priority = 500,
493 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
494 .cra_blocksize = SERPENT_BLOCK_SIZE,
495 .cra_ctxsize = sizeof(struct async_helper_ctx),
496 .cra_alignmask = 0,
497 .cra_type = &crypto_ablkcipher_type,
498 .cra_module = THIS_MODULE,
499 .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
500 .cra_init = ablk_init,
501 .cra_exit = ablk_exit,
502 .cra_u = {
503 .ablkcipher = {
504 .min_keysize = SERPENT_MIN_KEY_SIZE,
505 .max_keysize = SERPENT_MAX_KEY_SIZE,
506 .setkey = ablk_set_key,
507 .encrypt = ablk_encrypt,
508 .decrypt = ablk_decrypt,
509 },
510 },
511}, {
512 .cra_name = "cbc(serpent)",
513 .cra_driver_name = "cbc-serpent-avx",
514 .cra_priority = 500,
515 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
516 .cra_blocksize = SERPENT_BLOCK_SIZE,
517 .cra_ctxsize = sizeof(struct async_helper_ctx),
518 .cra_alignmask = 0,
519 .cra_type = &crypto_ablkcipher_type,
520 .cra_module = THIS_MODULE,
521 .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
522 .cra_init = ablk_init,
523 .cra_exit = ablk_exit,
524 .cra_u = {
525 .ablkcipher = {
526 .min_keysize = SERPENT_MIN_KEY_SIZE,
527 .max_keysize = SERPENT_MAX_KEY_SIZE,
528 .ivsize = SERPENT_BLOCK_SIZE,
529 .setkey = ablk_set_key,
530 .encrypt = __ablk_encrypt,
531 .decrypt = ablk_decrypt,
532 },
533 },
534}, {
535 .cra_name = "ctr(serpent)",
536 .cra_driver_name = "ctr-serpent-avx",
537 .cra_priority = 500,
538 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
539 .cra_blocksize = 1,
540 .cra_ctxsize = sizeof(struct async_helper_ctx),
541 .cra_alignmask = 0,
542 .cra_type = &crypto_ablkcipher_type,
543 .cra_module = THIS_MODULE,
544 .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
545 .cra_init = ablk_init,
546 .cra_exit = ablk_exit,
547 .cra_u = {
548 .ablkcipher = {
549 .min_keysize = SERPENT_MIN_KEY_SIZE,
550 .max_keysize = SERPENT_MAX_KEY_SIZE,
551 .ivsize = SERPENT_BLOCK_SIZE,
552 .setkey = ablk_set_key,
553 .encrypt = ablk_encrypt,
554 .decrypt = ablk_encrypt,
555 .geniv = "chainiv",
556 },
557 },
558}, {
559 .cra_name = "lrw(serpent)",
560 .cra_driver_name = "lrw-serpent-avx",
561 .cra_priority = 500,
562 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
563 .cra_blocksize = SERPENT_BLOCK_SIZE,
564 .cra_ctxsize = sizeof(struct async_helper_ctx),
565 .cra_alignmask = 0,
566 .cra_type = &crypto_ablkcipher_type,
567 .cra_module = THIS_MODULE,
568 .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
569 .cra_init = ablk_init,
570 .cra_exit = ablk_exit,
571 .cra_u = {
572 .ablkcipher = {
573 .min_keysize = SERPENT_MIN_KEY_SIZE +
574 SERPENT_BLOCK_SIZE,
575 .max_keysize = SERPENT_MAX_KEY_SIZE +
576 SERPENT_BLOCK_SIZE,
577 .ivsize = SERPENT_BLOCK_SIZE,
578 .setkey = ablk_set_key,
579 .encrypt = ablk_encrypt,
580 .decrypt = ablk_decrypt,
581 },
582 },
583}, {
584 .cra_name = "xts(serpent)",
585 .cra_driver_name = "xts-serpent-avx",
586 .cra_priority = 500,
587 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
588 .cra_blocksize = SERPENT_BLOCK_SIZE,
589 .cra_ctxsize = sizeof(struct async_helper_ctx),
590 .cra_alignmask = 0,
591 .cra_type = &crypto_ablkcipher_type,
592 .cra_module = THIS_MODULE,
593 .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
594 .cra_init = ablk_init,
595 .cra_exit = ablk_exit,
596 .cra_u = {
597 .ablkcipher = {
598 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
599 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
600 .ivsize = SERPENT_BLOCK_SIZE,
601 .setkey = ablk_set_key,
602 .encrypt = ablk_encrypt,
603 .decrypt = ablk_decrypt,
604 },
605 },
606} };
607
608static int __init serpent_init(void)
609{
610 u64 xcr0;
611
612 if (!cpu_has_avx || !cpu_has_osxsave) {
613 printk(KERN_INFO "AVX instructions are not detected.\n");
614 return -ENODEV;
615 }
616
617 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
618 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
619 printk(KERN_INFO "AVX detected but unusable.\n");
620 return -ENODEV;
621 }
622
623 return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
624}
625
626static void __exit serpent_exit(void)
627{
628 crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
629}
630
631module_init(serpent_init);
632module_exit(serpent_exit);
633
634MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized");
635MODULE_LICENSE("GPL");
636MODULE_ALIAS("serpent");
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 4b21be85e0a1..d679c8675f4a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -41,358 +41,145 @@
41#include <crypto/ctr.h> 41#include <crypto/ctr.h>
42#include <crypto/lrw.h> 42#include <crypto/lrw.h>
43#include <crypto/xts.h> 43#include <crypto/xts.h>
44#include <asm/i387.h> 44#include <asm/crypto/serpent-sse2.h>
45#include <asm/serpent.h> 45#include <asm/crypto/ablk_helper.h>
46#include <crypto/scatterwalk.h> 46#include <asm/crypto/glue_helper.h>
47#include <linux/workqueue.h>
48#include <linux/spinlock.h>
49
50struct async_serpent_ctx {
51 struct cryptd_ablkcipher *cryptd_tfm;
52};
53 47
54static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) 48static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
55{
56 if (fpu_enabled)
57 return true;
58
59 /* SSE2 is only used when chunk to be processed is large enough, so
60 * do not enable FPU until it is necessary.
61 */
62 if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
63 return false;
64
65 kernel_fpu_begin();
66 return true;
67}
68
69static inline void serpent_fpu_end(bool fpu_enabled)
70{ 49{
71 if (fpu_enabled) 50 u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
72 kernel_fpu_end(); 51 unsigned int j;
73}
74
75static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76 bool enc)
77{
78 bool fpu_enabled = false;
79 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 const unsigned int bsize = SERPENT_BLOCK_SIZE;
81 unsigned int nbytes;
82 int err;
83
84 err = blkcipher_walk_virt(desc, walk);
85 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86
87 while ((nbytes = walk->nbytes)) {
88 u8 *wsrc = walk->src.virt.addr;
89 u8 *wdst = walk->dst.virt.addr;
90
91 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
92
93 /* Process multi-block batch */
94 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
95 do {
96 if (enc)
97 serpent_enc_blk_xway(ctx, wdst, wsrc);
98 else
99 serpent_dec_blk_xway(ctx, wdst, wsrc);
100
101 wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
102 wdst += bsize * SERPENT_PARALLEL_BLOCKS;
103 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
104 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
105
106 if (nbytes < bsize)
107 goto done;
108 }
109
110 /* Handle leftovers */
111 do {
112 if (enc)
113 __serpent_encrypt(ctx, wdst, wsrc);
114 else
115 __serpent_decrypt(ctx, wdst, wsrc);
116
117 wsrc += bsize;
118 wdst += bsize;
119 nbytes -= bsize;
120 } while (nbytes >= bsize);
121
122done:
123 err = blkcipher_walk_done(desc, walk, nbytes);
124 }
125 52
126 serpent_fpu_end(fpu_enabled); 53 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
127 return err; 54 ivs[j] = src[j];
128}
129 55
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 56 serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct blkcipher_walk walk;
134 57
135 blkcipher_walk_init(&walk, dst, src, nbytes); 58 for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
136 return ecb_crypt(desc, &walk, true); 59 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
137} 60}
138 61
139static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 62static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
140 struct scatterlist *src, unsigned int nbytes)
141{ 63{
142 struct blkcipher_walk walk; 64 be128 ctrblk;
143 65
144 blkcipher_walk_init(&walk, dst, src, nbytes); 66 u128_to_be128(&ctrblk, iv);
145 return ecb_crypt(desc, &walk, false); 67 u128_inc(iv);
146}
147 68
148static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 69 __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
149 struct blkcipher_walk *walk) 70 u128_xor(dst, src, (u128 *)&ctrblk);
150{
151 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
152 const unsigned int bsize = SERPENT_BLOCK_SIZE;
153 unsigned int nbytes = walk->nbytes;
154 u128 *src = (u128 *)walk->src.virt.addr;
155 u128 *dst = (u128 *)walk->dst.virt.addr;
156 u128 *iv = (u128 *)walk->iv;
157
158 do {
159 u128_xor(dst, src, iv);
160 __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
161 iv = dst;
162
163 src += 1;
164 dst += 1;
165 nbytes -= bsize;
166 } while (nbytes >= bsize);
167
168 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
169 return nbytes;
170} 71}
171 72
172static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 73static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
173 struct scatterlist *src, unsigned int nbytes) 74 u128 *iv)
174{ 75{
175 struct blkcipher_walk walk; 76 be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
176 int err; 77 unsigned int i;
177 78
178 blkcipher_walk_init(&walk, dst, src, nbytes); 79 for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
179 err = blkcipher_walk_virt(desc, &walk); 80 if (dst != src)
81 dst[i] = src[i];
180 82
181 while ((nbytes = walk.nbytes)) { 83 u128_to_be128(&ctrblks[i], iv);
182 nbytes = __cbc_encrypt(desc, &walk); 84 u128_inc(iv);
183 err = blkcipher_walk_done(desc, &walk, nbytes);
184 } 85 }
185 86
186 return err; 87 serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
187} 88}
188 89
189static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 90static const struct common_glue_ctx serpent_enc = {
190 struct blkcipher_walk *walk) 91 .num_funcs = 2,
191{ 92 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
192 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193 const unsigned int bsize = SERPENT_BLOCK_SIZE;
194 unsigned int nbytes = walk->nbytes;
195 u128 *src = (u128 *)walk->src.virt.addr;
196 u128 *dst = (u128 *)walk->dst.virt.addr;
197 u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
198 u128 last_iv;
199 int i;
200
201 /* Start of the last block. */
202 src += nbytes / bsize - 1;
203 dst += nbytes / bsize - 1;
204
205 last_iv = *src;
206
207 /* Process multi-block batch */
208 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
209 do {
210 nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
211 src -= SERPENT_PARALLEL_BLOCKS - 1;
212 dst -= SERPENT_PARALLEL_BLOCKS - 1;
213
214 for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
215 ivs[i] = src[i];
216
217 serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
218
219 for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
220 u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
221
222 nbytes -= bsize;
223 if (nbytes < bsize)
224 goto done;
225 93
226 u128_xor(dst, dst, src - 1); 94 .funcs = { {
227 src -= 1; 95 .num_blocks = SERPENT_PARALLEL_BLOCKS,
228 dst -= 1; 96 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
229 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); 97 }, {
230 98 .num_blocks = 1,
231 if (nbytes < bsize) 99 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
232 goto done; 100 } }
233 } 101};
234
235 /* Handle leftovers */
236 for (;;) {
237 __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
238
239 nbytes -= bsize;
240 if (nbytes < bsize)
241 break;
242 102
243 u128_xor(dst, dst, src - 1); 103static const struct common_glue_ctx serpent_ctr = {
244 src -= 1; 104 .num_funcs = 2,
245 dst -= 1; 105 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
246 } 106
107 .funcs = { {
108 .num_blocks = SERPENT_PARALLEL_BLOCKS,
109 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
110 }, {
111 .num_blocks = 1,
112 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
113 } }
114};
247 115
248done: 116static const struct common_glue_ctx serpent_dec = {
249 u128_xor(dst, dst, (u128 *)walk->iv); 117 .num_funcs = 2,
250 *(u128 *)walk->iv = last_iv; 118 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
119
120 .funcs = { {
121 .num_blocks = SERPENT_PARALLEL_BLOCKS,
122 .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
123 }, {
124 .num_blocks = 1,
125 .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
126 } }
127};
251 128
252 return nbytes; 129static const struct common_glue_ctx serpent_dec_cbc = {
253} 130 .num_funcs = 2,
131 .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
132
133 .funcs = { {
134 .num_blocks = SERPENT_PARALLEL_BLOCKS,
135 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
136 }, {
137 .num_blocks = 1,
138 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
139 } }
140};
254 141
255static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 142static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
256 struct scatterlist *src, unsigned int nbytes) 143 struct scatterlist *src, unsigned int nbytes)
257{ 144{
258 bool fpu_enabled = false; 145 return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
259 struct blkcipher_walk walk;
260 int err;
261
262 blkcipher_walk_init(&walk, dst, src, nbytes);
263 err = blkcipher_walk_virt(desc, &walk);
264 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
265
266 while ((nbytes = walk.nbytes)) {
267 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
268 nbytes = __cbc_decrypt(desc, &walk);
269 err = blkcipher_walk_done(desc, &walk, nbytes);
270 }
271
272 serpent_fpu_end(fpu_enabled);
273 return err;
274} 146}
275 147
276static inline void u128_to_be128(be128 *dst, const u128 *src) 148static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
149 struct scatterlist *src, unsigned int nbytes)
277{ 150{
278 dst->a = cpu_to_be64(src->a); 151 return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
279 dst->b = cpu_to_be64(src->b);
280} 152}
281 153
282static inline void be128_to_u128(u128 *dst, const be128 *src) 154static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155 struct scatterlist *src, unsigned int nbytes)
283{ 156{
284 dst->a = be64_to_cpu(src->a); 157 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
285 dst->b = be64_to_cpu(src->b); 158 dst, src, nbytes);
286} 159}
287 160
288static inline void u128_inc(u128 *i) 161static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
162 struct scatterlist *src, unsigned int nbytes)
289{ 163{
290 i->b++; 164 return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
291 if (!i->b) 165 nbytes);
292 i->a++;
293} 166}
294 167
295static void ctr_crypt_final(struct blkcipher_desc *desc, 168static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
296 struct blkcipher_walk *walk) 169 struct scatterlist *src, unsigned int nbytes)
297{ 170{
298 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 171 return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
299 u8 *ctrblk = walk->iv;
300 u8 keystream[SERPENT_BLOCK_SIZE];
301 u8 *src = walk->src.virt.addr;
302 u8 *dst = walk->dst.virt.addr;
303 unsigned int nbytes = walk->nbytes;
304
305 __serpent_encrypt(ctx, keystream, ctrblk);
306 crypto_xor(keystream, src, nbytes);
307 memcpy(dst, keystream, nbytes);
308
309 crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
310} 172}
311 173
312static unsigned int __ctr_crypt(struct blkcipher_desc *desc, 174static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
313 struct blkcipher_walk *walk)
314{ 175{
315 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 176 return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
316 const unsigned int bsize = SERPENT_BLOCK_SIZE; 177 NULL, fpu_enabled, nbytes);
317 unsigned int nbytes = walk->nbytes;
318 u128 *src = (u128 *)walk->src.virt.addr;
319 u128 *dst = (u128 *)walk->dst.virt.addr;
320 u128 ctrblk;
321 be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
322 int i;
323
324 be128_to_u128(&ctrblk, (be128 *)walk->iv);
325
326 /* Process multi-block batch */
327 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
328 do {
329 /* create ctrblks for parallel encrypt */
330 for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
331 if (dst != src)
332 dst[i] = src[i];
333
334 u128_to_be128(&ctrblocks[i], &ctrblk);
335 u128_inc(&ctrblk);
336 }
337
338 serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
339 (u8 *)ctrblocks);
340
341 src += SERPENT_PARALLEL_BLOCKS;
342 dst += SERPENT_PARALLEL_BLOCKS;
343 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
344 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
345
346 if (nbytes < bsize)
347 goto done;
348 }
349
350 /* Handle leftovers */
351 do {
352 if (dst != src)
353 *dst = *src;
354
355 u128_to_be128(&ctrblocks[0], &ctrblk);
356 u128_inc(&ctrblk);
357
358 __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
359 u128_xor(dst, dst, (u128 *)ctrblocks);
360
361 src += 1;
362 dst += 1;
363 nbytes -= bsize;
364 } while (nbytes >= bsize);
365
366done:
367 u128_to_be128((be128 *)walk->iv, &ctrblk);
368 return nbytes;
369} 178}
370 179
371static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 180static inline void serpent_fpu_end(bool fpu_enabled)
372 struct scatterlist *src, unsigned int nbytes)
373{ 181{
374 bool fpu_enabled = false; 182 glue_fpu_end(fpu_enabled);
375 struct blkcipher_walk walk;
376 int err;
377
378 blkcipher_walk_init(&walk, dst, src, nbytes);
379 err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
380 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
381
382 while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
383 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
384 nbytes = __ctr_crypt(desc, &walk);
385 err = blkcipher_walk_done(desc, &walk, nbytes);
386 }
387
388 serpent_fpu_end(fpu_enabled);
389
390 if (walk.nbytes) {
391 ctr_crypt_final(desc, &walk);
392 err = blkcipher_walk_done(desc, &walk, 0);
393 }
394
395 return err;
396} 183}
397 184
398struct crypt_priv { 185struct crypt_priv {
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
596 return ret; 383 return ret;
597} 384}
598 385
599static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
600 unsigned int key_len)
601{
602 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
603 struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
604 int err;
605
606 crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
607 crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
608 & CRYPTO_TFM_REQ_MASK);
609 err = crypto_ablkcipher_setkey(child, key, key_len);
610 crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
611 & CRYPTO_TFM_RES_MASK);
612 return err;
613}
614
615static int __ablk_encrypt(struct ablkcipher_request *req)
616{
617 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
618 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
619 struct blkcipher_desc desc;
620
621 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
622 desc.info = req->info;
623 desc.flags = 0;
624
625 return crypto_blkcipher_crt(desc.tfm)->encrypt(
626 &desc, req->dst, req->src, req->nbytes);
627}
628
629static int ablk_encrypt(struct ablkcipher_request *req)
630{
631 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
632 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
633
634 if (!irq_fpu_usable()) {
635 struct ablkcipher_request *cryptd_req =
636 ablkcipher_request_ctx(req);
637
638 memcpy(cryptd_req, req, sizeof(*req));
639 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
640
641 return crypto_ablkcipher_encrypt(cryptd_req);
642 } else {
643 return __ablk_encrypt(req);
644 }
645}
646
647static int ablk_decrypt(struct ablkcipher_request *req)
648{
649 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
650 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
651
652 if (!irq_fpu_usable()) {
653 struct ablkcipher_request *cryptd_req =
654 ablkcipher_request_ctx(req);
655
656 memcpy(cryptd_req, req, sizeof(*req));
657 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
658
659 return crypto_ablkcipher_decrypt(cryptd_req);
660 } else {
661 struct blkcipher_desc desc;
662
663 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
664 desc.info = req->info;
665 desc.flags = 0;
666
667 return crypto_blkcipher_crt(desc.tfm)->decrypt(
668 &desc, req->dst, req->src, req->nbytes);
669 }
670}
671
672static void ablk_exit(struct crypto_tfm *tfm)
673{
674 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
675
676 cryptd_free_ablkcipher(ctx->cryptd_tfm);
677}
678
679static int ablk_init(struct crypto_tfm *tfm)
680{
681 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
682 struct cryptd_ablkcipher *cryptd_tfm;
683 char drv_name[CRYPTO_MAX_ALG_NAME];
684
685 snprintf(drv_name, sizeof(drv_name), "__driver-%s",
686 crypto_tfm_alg_driver_name(tfm));
687
688 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
689 if (IS_ERR(cryptd_tfm))
690 return PTR_ERR(cryptd_tfm);
691
692 ctx->cryptd_tfm = cryptd_tfm;
693 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
694 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
695
696 return 0;
697}
698
699static struct crypto_alg serpent_algs[10] = { { 386static struct crypto_alg serpent_algs[10] = { {
700 .cra_name = "__ecb-serpent-sse2", 387 .cra_name = "__ecb-serpent-sse2",
701 .cra_driver_name = "__driver-ecb-serpent-sse2", 388 .cra_driver_name = "__driver-ecb-serpent-sse2",
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { {
808 .cra_priority = 400, 495 .cra_priority = 400,
809 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 496 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
810 .cra_blocksize = SERPENT_BLOCK_SIZE, 497 .cra_blocksize = SERPENT_BLOCK_SIZE,
811 .cra_ctxsize = sizeof(struct async_serpent_ctx), 498 .cra_ctxsize = sizeof(struct async_helper_ctx),
812 .cra_alignmask = 0, 499 .cra_alignmask = 0,
813 .cra_type = &crypto_ablkcipher_type, 500 .cra_type = &crypto_ablkcipher_type,
814 .cra_module = THIS_MODULE, 501 .cra_module = THIS_MODULE,
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { {
830 .cra_priority = 400, 517 .cra_priority = 400,
831 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 518 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
832 .cra_blocksize = SERPENT_BLOCK_SIZE, 519 .cra_blocksize = SERPENT_BLOCK_SIZE,
833 .cra_ctxsize = sizeof(struct async_serpent_ctx), 520 .cra_ctxsize = sizeof(struct async_helper_ctx),
834 .cra_alignmask = 0, 521 .cra_alignmask = 0,
835 .cra_type = &crypto_ablkcipher_type, 522 .cra_type = &crypto_ablkcipher_type,
836 .cra_module = THIS_MODULE, 523 .cra_module = THIS_MODULE,
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { {
853 .cra_priority = 400, 540 .cra_priority = 400,
854 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 541 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
855 .cra_blocksize = 1, 542 .cra_blocksize = 1,
856 .cra_ctxsize = sizeof(struct async_serpent_ctx), 543 .cra_ctxsize = sizeof(struct async_helper_ctx),
857 .cra_alignmask = 0, 544 .cra_alignmask = 0,
858 .cra_type = &crypto_ablkcipher_type, 545 .cra_type = &crypto_ablkcipher_type,
859 .cra_module = THIS_MODULE, 546 .cra_module = THIS_MODULE,
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { {
877 .cra_priority = 400, 564 .cra_priority = 400,
878 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 565 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
879 .cra_blocksize = SERPENT_BLOCK_SIZE, 566 .cra_blocksize = SERPENT_BLOCK_SIZE,
880 .cra_ctxsize = sizeof(struct async_serpent_ctx), 567 .cra_ctxsize = sizeof(struct async_helper_ctx),
881 .cra_alignmask = 0, 568 .cra_alignmask = 0,
882 .cra_type = &crypto_ablkcipher_type, 569 .cra_type = &crypto_ablkcipher_type,
883 .cra_module = THIS_MODULE, 570 .cra_module = THIS_MODULE,
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { {
902 .cra_priority = 400, 589 .cra_priority = 400,
903 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, 590 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
904 .cra_blocksize = SERPENT_BLOCK_SIZE, 591 .cra_blocksize = SERPENT_BLOCK_SIZE,
905 .cra_ctxsize = sizeof(struct async_serpent_ctx), 592 .cra_ctxsize = sizeof(struct async_helper_ctx),
906 .cra_alignmask = 0, 593 .cra_alignmask = 0,
907 .cra_type = &crypto_ablkcipher_type, 594 .cra_type = &crypto_ablkcipher_type,
908 .cra_module = THIS_MODULE, 595 .cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index b2c2f57d70e8..49d6987a73d9 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3
468 */ 468 */
469SHA1_VECTOR_ASM sha1_transform_ssse3 469SHA1_VECTOR_ASM sha1_transform_ssse3
470 470
471#ifdef SHA1_ENABLE_AVX_SUPPORT 471#ifdef CONFIG_AS_AVX
472 472
473.macro W_PRECALC_AVX 473.macro W_PRECALC_AVX
474 474
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index f916499d0abe..4a11a9d72451 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -35,7 +35,7 @@
35 35
36asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, 36asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
37 unsigned int rounds); 37 unsigned int rounds);
38#ifdef SHA1_ENABLE_AVX_SUPPORT 38#ifdef CONFIG_AS_AVX
39asmlinkage void sha1_transform_avx(u32 *digest, const char *data, 39asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
40 unsigned int rounds); 40 unsigned int rounds);
41#endif 41#endif
@@ -184,7 +184,7 @@ static struct shash_alg alg = {
184 } 184 }
185}; 185};
186 186
187#ifdef SHA1_ENABLE_AVX_SUPPORT 187#ifdef CONFIG_AS_AVX
188static bool __init avx_usable(void) 188static bool __init avx_usable(void)
189{ 189{
190 u64 xcr0; 190 u64 xcr0;
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void)
209 if (cpu_has_ssse3) 209 if (cpu_has_ssse3)
210 sha1_transform_asm = sha1_transform_ssse3; 210 sha1_transform_asm = sha1_transform_ssse3;
211 211
212#ifdef SHA1_ENABLE_AVX_SUPPORT 212#ifdef CONFIG_AS_AVX
213 /* allow AVX to override SSSE3, it's a little faster */ 213 /* allow AVX to override SSSE3, it's a little faster */
214 if (avx_usable()) 214 if (avx_usable())
215 sha1_transform_asm = sha1_transform_avx; 215 sha1_transform_asm = sha1_transform_avx;
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..35f45574390d
--- /dev/null
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -0,0 +1,300 @@
1/*
2 * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24.file "twofish-avx-x86_64-asm_64.S"
25.text
26
27/* structure of crypto context */
28#define s0 0
29#define s1 1024
30#define s2 2048
31#define s3 3072
32#define w 4096
33#define k 4128
34
35/**********************************************************************
36 8-way AVX twofish
37 **********************************************************************/
38#define CTX %rdi
39
40#define RA1 %xmm0
41#define RB1 %xmm1
42#define RC1 %xmm2
43#define RD1 %xmm3
44
45#define RA2 %xmm4
46#define RB2 %xmm5
47#define RC2 %xmm6
48#define RD2 %xmm7
49
50#define RX %xmm8
51#define RY %xmm9
52
53#define RK1 %xmm10
54#define RK2 %xmm11
55
56#define RID1 %rax
57#define RID1b %al
58#define RID2 %rbx
59#define RID2b %bl
60
61#define RGI1 %rdx
62#define RGI1bl %dl
63#define RGI1bh %dh
64#define RGI2 %rcx
65#define RGI2bl %cl
66#define RGI2bh %ch
67
68#define RGS1 %r8
69#define RGS1d %r8d
70#define RGS2 %r9
71#define RGS2d %r9d
72#define RGS3 %r10
73#define RGS3d %r10d
74
75
76#define lookup_32bit(t0, t1, t2, t3, src, dst) \
77 movb src ## bl, RID1b; \
78 movb src ## bh, RID2b; \
79 movl t0(CTX, RID1, 4), dst ## d; \
80 xorl t1(CTX, RID2, 4), dst ## d; \
81 shrq $16, src; \
82 movb src ## bl, RID1b; \
83 movb src ## bh, RID2b; \
84 xorl t2(CTX, RID1, 4), dst ## d; \
85 xorl t3(CTX, RID2, 4), dst ## d;
86
87#define G(a, x, t0, t1, t2, t3) \
88 vmovq a, RGI1; \
89 vpsrldq $8, a, x; \
90 vmovq x, RGI2; \
91 \
92 lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
93 shrq $16, RGI1; \
94 lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
95 shlq $32, RGS2; \
96 orq RGS1, RGS2; \
97 \
98 lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
99 shrq $16, RGI2; \
100 lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
101 shlq $32, RGS3; \
102 orq RGS1, RGS3; \
103 \
104 vmovq RGS2, x; \
105 vpinsrq $1, RGS3, x, x;
106
107#define encround(a, b, c, d, x, y) \
108 G(a, x, s0, s1, s2, s3); \
109 G(b, y, s1, s2, s3, s0); \
110 vpaddd x, y, x; \
111 vpaddd y, x, y; \
112 vpaddd x, RK1, x; \
113 vpaddd y, RK2, y; \
114 vpxor x, c, c; \
115 vpsrld $1, c, x; \
116 vpslld $(32 - 1), c, c; \
117 vpor c, x, c; \
118 vpslld $1, d, x; \
119 vpsrld $(32 - 1), d, d; \
120 vpor d, x, d; \
121 vpxor d, y, d;
122
123#define decround(a, b, c, d, x, y) \
124 G(a, x, s0, s1, s2, s3); \
125 G(b, y, s1, s2, s3, s0); \
126 vpaddd x, y, x; \
127 vpaddd y, x, y; \
128 vpaddd y, RK2, y; \
129 vpxor d, y, d; \
130 vpsrld $1, d, y; \
131 vpslld $(32 - 1), d, d; \
132 vpor d, y, d; \
133 vpslld $1, c, y; \
134 vpsrld $(32 - 1), c, c; \
135 vpor c, y, c; \
136 vpaddd x, RK1, x; \
137 vpxor x, c, c;
138
139#define encrypt_round(n, a, b, c, d) \
140 vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
141 vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
142 encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
143 encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
144
145#define decrypt_round(n, a, b, c, d) \
146 vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
147 vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
148 decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
149 decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
150
151#define encrypt_cycle(n) \
152 encrypt_round((2*n), RA, RB, RC, RD); \
153 encrypt_round(((2*n) + 1), RC, RD, RA, RB);
154
155#define decrypt_cycle(n) \
156 decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
157 decrypt_round((2*n), RA, RB, RC, RD);
158
159
160#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
161 vpunpckldq x1, x0, t0; \
162 vpunpckhdq x1, x0, t2; \
163 vpunpckldq x3, x2, t1; \
164 vpunpckhdq x3, x2, x3; \
165 \
166 vpunpcklqdq t1, t0, x0; \
167 vpunpckhqdq t1, t0, x1; \
168 vpunpcklqdq x3, t2, x2; \
169 vpunpckhqdq x3, t2, x3;
170
171#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
172 vpxor (0*4*4)(in), wkey, x0; \
173 vpxor (1*4*4)(in), wkey, x1; \
174 vpxor (2*4*4)(in), wkey, x2; \
175 vpxor (3*4*4)(in), wkey, x3; \
176 \
177 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
178
179#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
180 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
181 \
182 vpxor x0, wkey, x0; \
183 vmovdqu x0, (0*4*4)(out); \
184 vpxor x1, wkey, x1; \
185 vmovdqu x1, (1*4*4)(out); \
186 vpxor x2, wkey, x2; \
187 vmovdqu x2, (2*4*4)(out); \
188 vpxor x3, wkey, x3; \
189 vmovdqu x3, (3*4*4)(out);
190
191#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
192 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
193 \
194 vpxor x0, wkey, x0; \
195 vpxor (0*4*4)(out), x0, x0; \
196 vmovdqu x0, (0*4*4)(out); \
197 vpxor x1, wkey, x1; \
198 vpxor (1*4*4)(out), x1, x1; \
199 vmovdqu x1, (1*4*4)(out); \
200 vpxor x2, wkey, x2; \
201 vpxor (2*4*4)(out), x2, x2; \
202 vmovdqu x2, (2*4*4)(out); \
203 vpxor x3, wkey, x3; \
204 vpxor (3*4*4)(out), x3, x3; \
205 vmovdqu x3, (3*4*4)(out);
206
207.align 8
208.global __twofish_enc_blk_8way
209.type __twofish_enc_blk_8way,@function;
210
211__twofish_enc_blk_8way:
212 /* input:
213 * %rdi: ctx, CTX
214 * %rsi: dst
215 * %rdx: src
216 * %rcx: bool, if true: xor output
217 */
218
219 pushq %rbx;
220 pushq %rcx;
221
222 vmovdqu w(CTX), RK1;
223
224 leaq (4*4*4)(%rdx), %rax;
225 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
226 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
227
228 xorq RID1, RID1;
229 xorq RID2, RID2;
230
231 encrypt_cycle(0);
232 encrypt_cycle(1);
233 encrypt_cycle(2);
234 encrypt_cycle(3);
235 encrypt_cycle(4);
236 encrypt_cycle(5);
237 encrypt_cycle(6);
238 encrypt_cycle(7);
239
240 vmovdqu (w+4*4)(CTX), RK1;
241
242 popq %rcx;
243 popq %rbx;
244
245 leaq (4*4*4)(%rsi), %rax;
246
247 testb %cl, %cl;
248 jnz __enc_xor8;
249
250 outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
251 outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
252
253 ret;
254
255__enc_xor8:
256 outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
257 outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
258
259 ret;
260
261.align 8
262.global twofish_dec_blk_8way
263.type twofish_dec_blk_8way,@function;
264
265twofish_dec_blk_8way:
266 /* input:
267 * %rdi: ctx, CTX
268 * %rsi: dst
269 * %rdx: src
270 */
271
272 pushq %rbx;
273
274 vmovdqu (w+4*4)(CTX), RK1;
275
276 leaq (4*4*4)(%rdx), %rax;
277 inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
278 inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
279
280 xorq RID1, RID1;
281 xorq RID2, RID2;
282
283 decrypt_cycle(7);
284 decrypt_cycle(6);
285 decrypt_cycle(5);
286 decrypt_cycle(4);
287 decrypt_cycle(3);
288 decrypt_cycle(2);
289 decrypt_cycle(1);
290 decrypt_cycle(0);
291
292 vmovdqu (w)(CTX), RK1;
293
294 popq %rbx;
295
296 leaq (4*4*4)(%rsi), %rax;
297 outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
298 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
299
300 ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
new file mode 100644
index 000000000000..782b67ddaf6a
--- /dev/null
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -0,0 +1,624 @@
1/*
2 * Glue Code for AVX assembler version of Twofish Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/algapi.h>
30#include <crypto/twofish.h>
31#include <crypto/cryptd.h>
32#include <crypto/b128ops.h>
33#include <crypto/ctr.h>
34#include <crypto/lrw.h>
35#include <crypto/xts.h>
36#include <asm/i387.h>
37#include <asm/xcr.h>
38#include <asm/xsave.h>
39#include <asm/crypto/twofish.h>
40#include <asm/crypto/ablk_helper.h>
41#include <asm/crypto/glue_helper.h>
42#include <crypto/scatterwalk.h>
43#include <linux/workqueue.h>
44#include <linux/spinlock.h>
45
46#define TWOFISH_PARALLEL_BLOCKS 8
47
48static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 const u8 *src)
50{
51 __twofish_enc_blk_3way(ctx, dst, src, false);
52}
53
54/* 8-way parallel cipher functions */
55asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
56 const u8 *src, bool xor);
57asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
58 const u8 *src);
59
60static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 __twofish_enc_blk_8way(ctx, dst, src, false);
64}
65
66static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
67 const u8 *src)
68{
69 __twofish_enc_blk_8way(ctx, dst, src, true);
70}
71
72static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
73 const u8 *src)
74{
75 twofish_dec_blk_8way(ctx, dst, src);
76}
77
78static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
79{
80 u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
81 unsigned int j;
82
83 for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
84 ivs[j] = src[j];
85
86 twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
87
88 for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
89 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
90}
91
92static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
93 u128 *iv)
94{
95 be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
96 unsigned int i;
97
98 for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
99 if (dst != src)
100 dst[i] = src[i];
101
102 u128_to_be128(&ctrblks[i], iv);
103 u128_inc(iv);
104 }
105
106 twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
107}
108
109static const struct common_glue_ctx twofish_enc = {
110 .num_funcs = 3,
111 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
112
113 .funcs = { {
114 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
116 }, {
117 .num_blocks = 3,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
119 }, {
120 .num_blocks = 1,
121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
122 } }
123};
124
125static const struct common_glue_ctx twofish_ctr = {
126 .num_funcs = 3,
127 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
128
129 .funcs = { {
130 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
131 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
132 }, {
133 .num_blocks = 3,
134 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
135 }, {
136 .num_blocks = 1,
137 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
138 } }
139};
140
141static const struct common_glue_ctx twofish_dec = {
142 .num_funcs = 3,
143 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
144
145 .funcs = { {
146 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
147 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
148 }, {
149 .num_blocks = 3,
150 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
151 }, {
152 .num_blocks = 1,
153 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
154 } }
155};
156
157static const struct common_glue_ctx twofish_dec_cbc = {
158 .num_funcs = 3,
159 .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
160
161 .funcs = { {
162 .num_blocks = TWOFISH_PARALLEL_BLOCKS,
163 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
164 }, {
165 .num_blocks = 3,
166 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
167 }, {
168 .num_blocks = 1,
169 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
170 } }
171};
172
173static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
174 struct scatterlist *src, unsigned int nbytes)
175{
176 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
177}
178
179static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
183}
184
185static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186 struct scatterlist *src, unsigned int nbytes)
187{
188 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
189 dst, src, nbytes);
190}
191
192static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
193 struct scatterlist *src, unsigned int nbytes)
194{
195 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
196 nbytes);
197}
198
199static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
200 struct scatterlist *src, unsigned int nbytes)
201{
202 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
203}
204
205static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
206{
207 return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
208 fpu_enabled, nbytes);
209}
210
211static inline void twofish_fpu_end(bool fpu_enabled)
212{
213 glue_fpu_end(fpu_enabled);
214}
215
216struct crypt_priv {
217 struct twofish_ctx *ctx;
218 bool fpu_enabled;
219};
220
221static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
222{
223 const unsigned int bsize = TF_BLOCK_SIZE;
224 struct crypt_priv *ctx = priv;
225 int i;
226
227 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
228
229 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
230 twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
231 return;
232 }
233
234 for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
235 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
236
237 nbytes %= bsize * 3;
238
239 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
240 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
241}
242
243static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
244{
245 const unsigned int bsize = TF_BLOCK_SIZE;
246 struct crypt_priv *ctx = priv;
247 int i;
248
249 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
250
251 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
252 twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
253 return;
254 }
255
256 for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
257 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
258
259 nbytes %= bsize * 3;
260
261 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
262 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
263}
264
265static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
266 struct scatterlist *src, unsigned int nbytes)
267{
268 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
269 be128 buf[TWOFISH_PARALLEL_BLOCKS];
270 struct crypt_priv crypt_ctx = {
271 .ctx = &ctx->twofish_ctx,
272 .fpu_enabled = false,
273 };
274 struct lrw_crypt_req req = {
275 .tbuf = buf,
276 .tbuflen = sizeof(buf),
277
278 .table_ctx = &ctx->lrw_table,
279 .crypt_ctx = &crypt_ctx,
280 .crypt_fn = encrypt_callback,
281 };
282 int ret;
283
284 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
285 ret = lrw_crypt(desc, dst, src, nbytes, &req);
286 twofish_fpu_end(crypt_ctx.fpu_enabled);
287
288 return ret;
289}
290
291static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
295 be128 buf[TWOFISH_PARALLEL_BLOCKS];
296 struct crypt_priv crypt_ctx = {
297 .ctx = &ctx->twofish_ctx,
298 .fpu_enabled = false,
299 };
300 struct lrw_crypt_req req = {
301 .tbuf = buf,
302 .tbuflen = sizeof(buf),
303
304 .table_ctx = &ctx->lrw_table,
305 .crypt_ctx = &crypt_ctx,
306 .crypt_fn = decrypt_callback,
307 };
308 int ret;
309
310 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
311 ret = lrw_crypt(desc, dst, src, nbytes, &req);
312 twofish_fpu_end(crypt_ctx.fpu_enabled);
313
314 return ret;
315}
316
317static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
318 struct scatterlist *src, unsigned int nbytes)
319{
320 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
321 be128 buf[TWOFISH_PARALLEL_BLOCKS];
322 struct crypt_priv crypt_ctx = {
323 .ctx = &ctx->crypt_ctx,
324 .fpu_enabled = false,
325 };
326 struct xts_crypt_req req = {
327 .tbuf = buf,
328 .tbuflen = sizeof(buf),
329
330 .tweak_ctx = &ctx->tweak_ctx,
331 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
332 .crypt_ctx = &crypt_ctx,
333 .crypt_fn = encrypt_callback,
334 };
335 int ret;
336
337 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
338 ret = xts_crypt(desc, dst, src, nbytes, &req);
339 twofish_fpu_end(crypt_ctx.fpu_enabled);
340
341 return ret;
342}
343
344static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
345 struct scatterlist *src, unsigned int nbytes)
346{
347 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
348 be128 buf[TWOFISH_PARALLEL_BLOCKS];
349 struct crypt_priv crypt_ctx = {
350 .ctx = &ctx->crypt_ctx,
351 .fpu_enabled = false,
352 };
353 struct xts_crypt_req req = {
354 .tbuf = buf,
355 .tbuflen = sizeof(buf),
356
357 .tweak_ctx = &ctx->tweak_ctx,
358 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
359 .crypt_ctx = &crypt_ctx,
360 .crypt_fn = decrypt_callback,
361 };
362 int ret;
363
364 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
365 ret = xts_crypt(desc, dst, src, nbytes, &req);
366 twofish_fpu_end(crypt_ctx.fpu_enabled);
367
368 return ret;
369}
370
371static struct crypto_alg twofish_algs[10] = { {
372 .cra_name = "__ecb-twofish-avx",
373 .cra_driver_name = "__driver-ecb-twofish-avx",
374 .cra_priority = 0,
375 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
376 .cra_blocksize = TF_BLOCK_SIZE,
377 .cra_ctxsize = sizeof(struct twofish_ctx),
378 .cra_alignmask = 0,
379 .cra_type = &crypto_blkcipher_type,
380 .cra_module = THIS_MODULE,
381 .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list),
382 .cra_u = {
383 .blkcipher = {
384 .min_keysize = TF_MIN_KEY_SIZE,
385 .max_keysize = TF_MAX_KEY_SIZE,
386 .setkey = twofish_setkey,
387 .encrypt = ecb_encrypt,
388 .decrypt = ecb_decrypt,
389 },
390 },
391}, {
392 .cra_name = "__cbc-twofish-avx",
393 .cra_driver_name = "__driver-cbc-twofish-avx",
394 .cra_priority = 0,
395 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
396 .cra_blocksize = TF_BLOCK_SIZE,
397 .cra_ctxsize = sizeof(struct twofish_ctx),
398 .cra_alignmask = 0,
399 .cra_type = &crypto_blkcipher_type,
400 .cra_module = THIS_MODULE,
401 .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list),
402 .cra_u = {
403 .blkcipher = {
404 .min_keysize = TF_MIN_KEY_SIZE,
405 .max_keysize = TF_MAX_KEY_SIZE,
406 .setkey = twofish_setkey,
407 .encrypt = cbc_encrypt,
408 .decrypt = cbc_decrypt,
409 },
410 },
411}, {
412 .cra_name = "__ctr-twofish-avx",
413 .cra_driver_name = "__driver-ctr-twofish-avx",
414 .cra_priority = 0,
415 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
416 .cra_blocksize = 1,
417 .cra_ctxsize = sizeof(struct twofish_ctx),
418 .cra_alignmask = 0,
419 .cra_type = &crypto_blkcipher_type,
420 .cra_module = THIS_MODULE,
421 .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list),
422 .cra_u = {
423 .blkcipher = {
424 .min_keysize = TF_MIN_KEY_SIZE,
425 .max_keysize = TF_MAX_KEY_SIZE,
426 .ivsize = TF_BLOCK_SIZE,
427 .setkey = twofish_setkey,
428 .encrypt = ctr_crypt,
429 .decrypt = ctr_crypt,
430 },
431 },
432}, {
433 .cra_name = "__lrw-twofish-avx",
434 .cra_driver_name = "__driver-lrw-twofish-avx",
435 .cra_priority = 0,
436 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
437 .cra_blocksize = TF_BLOCK_SIZE,
438 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
439 .cra_alignmask = 0,
440 .cra_type = &crypto_blkcipher_type,
441 .cra_module = THIS_MODULE,
442 .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list),
443 .cra_exit = lrw_twofish_exit_tfm,
444 .cra_u = {
445 .blkcipher = {
446 .min_keysize = TF_MIN_KEY_SIZE +
447 TF_BLOCK_SIZE,
448 .max_keysize = TF_MAX_KEY_SIZE +
449 TF_BLOCK_SIZE,
450 .ivsize = TF_BLOCK_SIZE,
451 .setkey = lrw_twofish_setkey,
452 .encrypt = lrw_encrypt,
453 .decrypt = lrw_decrypt,
454 },
455 },
456}, {
457 .cra_name = "__xts-twofish-avx",
458 .cra_driver_name = "__driver-xts-twofish-avx",
459 .cra_priority = 0,
460 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
461 .cra_blocksize = TF_BLOCK_SIZE,
462 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
463 .cra_alignmask = 0,
464 .cra_type = &crypto_blkcipher_type,
465 .cra_module = THIS_MODULE,
466 .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list),
467 .cra_u = {
468 .blkcipher = {
469 .min_keysize = TF_MIN_KEY_SIZE * 2,
470 .max_keysize = TF_MAX_KEY_SIZE * 2,
471 .ivsize = TF_BLOCK_SIZE,
472 .setkey = xts_twofish_setkey,
473 .encrypt = xts_encrypt,
474 .decrypt = xts_decrypt,
475 },
476 },
477}, {
478 .cra_name = "ecb(twofish)",
479 .cra_driver_name = "ecb-twofish-avx",
480 .cra_priority = 400,
481 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
482 .cra_blocksize = TF_BLOCK_SIZE,
483 .cra_ctxsize = sizeof(struct async_helper_ctx),
484 .cra_alignmask = 0,
485 .cra_type = &crypto_ablkcipher_type,
486 .cra_module = THIS_MODULE,
487 .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list),
488 .cra_init = ablk_init,
489 .cra_exit = ablk_exit,
490 .cra_u = {
491 .ablkcipher = {
492 .min_keysize = TF_MIN_KEY_SIZE,
493 .max_keysize = TF_MAX_KEY_SIZE,
494 .setkey = ablk_set_key,
495 .encrypt = ablk_encrypt,
496 .decrypt = ablk_decrypt,
497 },
498 },
499}, {
500 .cra_name = "cbc(twofish)",
501 .cra_driver_name = "cbc-twofish-avx",
502 .cra_priority = 400,
503 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
504 .cra_blocksize = TF_BLOCK_SIZE,
505 .cra_ctxsize = sizeof(struct async_helper_ctx),
506 .cra_alignmask = 0,
507 .cra_type = &crypto_ablkcipher_type,
508 .cra_module = THIS_MODULE,
509 .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list),
510 .cra_init = ablk_init,
511 .cra_exit = ablk_exit,
512 .cra_u = {
513 .ablkcipher = {
514 .min_keysize = TF_MIN_KEY_SIZE,
515 .max_keysize = TF_MAX_KEY_SIZE,
516 .ivsize = TF_BLOCK_SIZE,
517 .setkey = ablk_set_key,
518 .encrypt = __ablk_encrypt,
519 .decrypt = ablk_decrypt,
520 },
521 },
522}, {
523 .cra_name = "ctr(twofish)",
524 .cra_driver_name = "ctr-twofish-avx",
525 .cra_priority = 400,
526 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
527 .cra_blocksize = 1,
528 .cra_ctxsize = sizeof(struct async_helper_ctx),
529 .cra_alignmask = 0,
530 .cra_type = &crypto_ablkcipher_type,
531 .cra_module = THIS_MODULE,
532 .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list),
533 .cra_init = ablk_init,
534 .cra_exit = ablk_exit,
535 .cra_u = {
536 .ablkcipher = {
537 .min_keysize = TF_MIN_KEY_SIZE,
538 .max_keysize = TF_MAX_KEY_SIZE,
539 .ivsize = TF_BLOCK_SIZE,
540 .setkey = ablk_set_key,
541 .encrypt = ablk_encrypt,
542 .decrypt = ablk_encrypt,
543 .geniv = "chainiv",
544 },
545 },
546}, {
547 .cra_name = "lrw(twofish)",
548 .cra_driver_name = "lrw-twofish-avx",
549 .cra_priority = 400,
550 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
551 .cra_blocksize = TF_BLOCK_SIZE,
552 .cra_ctxsize = sizeof(struct async_helper_ctx),
553 .cra_alignmask = 0,
554 .cra_type = &crypto_ablkcipher_type,
555 .cra_module = THIS_MODULE,
556 .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list),
557 .cra_init = ablk_init,
558 .cra_exit = ablk_exit,
559 .cra_u = {
560 .ablkcipher = {
561 .min_keysize = TF_MIN_KEY_SIZE +
562 TF_BLOCK_SIZE,
563 .max_keysize = TF_MAX_KEY_SIZE +
564 TF_BLOCK_SIZE,
565 .ivsize = TF_BLOCK_SIZE,
566 .setkey = ablk_set_key,
567 .encrypt = ablk_encrypt,
568 .decrypt = ablk_decrypt,
569 },
570 },
571}, {
572 .cra_name = "xts(twofish)",
573 .cra_driver_name = "xts-twofish-avx",
574 .cra_priority = 400,
575 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
576 .cra_blocksize = TF_BLOCK_SIZE,
577 .cra_ctxsize = sizeof(struct async_helper_ctx),
578 .cra_alignmask = 0,
579 .cra_type = &crypto_ablkcipher_type,
580 .cra_module = THIS_MODULE,
581 .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list),
582 .cra_init = ablk_init,
583 .cra_exit = ablk_exit,
584 .cra_u = {
585 .ablkcipher = {
586 .min_keysize = TF_MIN_KEY_SIZE * 2,
587 .max_keysize = TF_MAX_KEY_SIZE * 2,
588 .ivsize = TF_BLOCK_SIZE,
589 .setkey = ablk_set_key,
590 .encrypt = ablk_encrypt,
591 .decrypt = ablk_decrypt,
592 },
593 },
594} };
595
596static int __init twofish_init(void)
597{
598 u64 xcr0;
599
600 if (!cpu_has_avx || !cpu_has_osxsave) {
601 printk(KERN_INFO "AVX instructions are not detected.\n");
602 return -ENODEV;
603 }
604
605 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
606 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
607 printk(KERN_INFO "AVX detected but unusable.\n");
608 return -ENODEV;
609 }
610
611 return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
612}
613
614static void __exit twofish_exit(void)
615{
616 crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
617}
618
619module_init(twofish_init);
620module_exit(twofish_exit);
621
622MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
623MODULE_LICENSE("GPL");
624MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 922ab24cce31..15f9347316c8 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -3,11 +3,6 @@
3 * 3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 * 5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
@@ -33,20 +28,13 @@
33#include <crypto/algapi.h> 28#include <crypto/algapi.h>
34#include <crypto/twofish.h> 29#include <crypto/twofish.h>
35#include <crypto/b128ops.h> 30#include <crypto/b128ops.h>
31#include <asm/crypto/twofish.h>
32#include <asm/crypto/glue_helper.h>
36#include <crypto/lrw.h> 33#include <crypto/lrw.h>
37#include <crypto/xts.h> 34#include <crypto/xts.h>
38 35
39/* regular block cipher functions from twofish_x86_64 module */ 36EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
40asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, 37EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
41 const u8 *src);
42asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
43 const u8 *src);
44
45/* 3-way parallel cipher functions */
46asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
47 const u8 *src, bool xor);
48asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 const u8 *src);
50 38
51static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 39static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
52 const u8 *src) 40 const u8 *src)
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
60 __twofish_enc_blk_3way(ctx, dst, src, true); 48 __twofish_enc_blk_3way(ctx, dst, src, true);
61} 49}
62 50
63static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, 51void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
64 void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
65 void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
66{
67 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
68 unsigned int bsize = TF_BLOCK_SIZE;
69 unsigned int nbytes;
70 int err;
71
72 err = blkcipher_walk_virt(desc, walk);
73
74 while ((nbytes = walk->nbytes)) {
75 u8 *wsrc = walk->src.virt.addr;
76 u8 *wdst = walk->dst.virt.addr;
77
78 /* Process three block batch */
79 if (nbytes >= bsize * 3) {
80 do {
81 fn_3way(ctx, wdst, wsrc);
82
83 wsrc += bsize * 3;
84 wdst += bsize * 3;
85 nbytes -= bsize * 3;
86 } while (nbytes >= bsize * 3);
87
88 if (nbytes < bsize)
89 goto done;
90 }
91
92 /* Handle leftovers */
93 do {
94 fn(ctx, wdst, wsrc);
95
96 wsrc += bsize;
97 wdst += bsize;
98 nbytes -= bsize;
99 } while (nbytes >= bsize);
100
101done:
102 err = blkcipher_walk_done(desc, walk, nbytes);
103 }
104
105 return err;
106}
107
108static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
109 struct scatterlist *src, unsigned int nbytes)
110{ 52{
111 struct blkcipher_walk walk; 53 u128 ivs[2];
112 54
113 blkcipher_walk_init(&walk, dst, src, nbytes); 55 ivs[0] = src[0];
114 return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); 56 ivs[1] = src[1];
115}
116 57
117static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 58 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
118 struct scatterlist *src, unsigned int nbytes)
119{
120 struct blkcipher_walk walk;
121 59
122 blkcipher_walk_init(&walk, dst, src, nbytes); 60 u128_xor(&dst[1], &dst[1], &ivs[0]);
123 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); 61 u128_xor(&dst[2], &dst[2], &ivs[1]);
124} 62}
63EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
125 64
126static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, 65void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
127 struct blkcipher_walk *walk)
128{
129 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
130 unsigned int bsize = TF_BLOCK_SIZE;
131 unsigned int nbytes = walk->nbytes;
132 u128 *src = (u128 *)walk->src.virt.addr;
133 u128 *dst = (u128 *)walk->dst.virt.addr;
134 u128 *iv = (u128 *)walk->iv;
135
136 do {
137 u128_xor(dst, src, iv);
138 twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
139 iv = dst;
140
141 src += 1;
142 dst += 1;
143 nbytes -= bsize;
144 } while (nbytes >= bsize);
145
146 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
147 return nbytes;
148}
149
150static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
151 struct scatterlist *src, unsigned int nbytes)
152{ 66{
153 struct blkcipher_walk walk; 67 be128 ctrblk;
154 int err;
155 68
156 blkcipher_walk_init(&walk, dst, src, nbytes); 69 if (dst != src)
157 err = blkcipher_walk_virt(desc, &walk); 70 *dst = *src;
158 71
159 while ((nbytes = walk.nbytes)) { 72 u128_to_be128(&ctrblk, iv);
160 nbytes = __cbc_encrypt(desc, &walk); 73 u128_inc(iv);
161 err = blkcipher_walk_done(desc, &walk, nbytes);
162 }
163 74
164 return err; 75 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
76 u128_xor(dst, dst, (u128 *)&ctrblk);
165} 77}
78EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
166 79
167static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, 80void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
168 struct blkcipher_walk *walk) 81 u128 *iv)
169{ 82{
170 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 83 be128 ctrblks[3];
171 unsigned int bsize = TF_BLOCK_SIZE;
172 unsigned int nbytes = walk->nbytes;
173 u128 *src = (u128 *)walk->src.virt.addr;
174 u128 *dst = (u128 *)walk->dst.virt.addr;
175 u128 ivs[3 - 1];
176 u128 last_iv;
177
178 /* Start of the last block. */
179 src += nbytes / bsize - 1;
180 dst += nbytes / bsize - 1;
181
182 last_iv = *src;
183
184 /* Process three block batch */
185 if (nbytes >= bsize * 3) {
186 do {
187 nbytes -= bsize * (3 - 1);
188 src -= 3 - 1;
189 dst -= 3 - 1;
190
191 ivs[0] = src[0];
192 ivs[1] = src[1];
193
194 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
195
196 u128_xor(dst + 1, dst + 1, ivs + 0);
197 u128_xor(dst + 2, dst + 2, ivs + 1);
198
199 nbytes -= bsize;
200 if (nbytes < bsize)
201 goto done;
202
203 u128_xor(dst, dst, src - 1);
204 src -= 1;
205 dst -= 1;
206 } while (nbytes >= bsize * 3);
207
208 if (nbytes < bsize)
209 goto done;
210 }
211
212 /* Handle leftovers */
213 for (;;) {
214 twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
215
216 nbytes -= bsize;
217 if (nbytes < bsize)
218 break;
219 84
220 u128_xor(dst, dst, src - 1); 85 if (dst != src) {
221 src -= 1; 86 dst[0] = src[0];
222 dst -= 1; 87 dst[1] = src[1];
88 dst[2] = src[2];
223 } 89 }
224 90
225done: 91 u128_to_be128(&ctrblks[0], iv);
226 u128_xor(dst, dst, (u128 *)walk->iv); 92 u128_inc(iv);
227 *(u128 *)walk->iv = last_iv; 93 u128_to_be128(&ctrblks[1], iv);
94 u128_inc(iv);
95 u128_to_be128(&ctrblks[2], iv);
96 u128_inc(iv);
228 97
229 return nbytes; 98 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
230} 99}
100EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
101
102static const struct common_glue_ctx twofish_enc = {
103 .num_funcs = 2,
104 .fpu_blocks_limit = -1,
105
106 .funcs = { {
107 .num_blocks = 3,
108 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
109 }, {
110 .num_blocks = 1,
111 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
112 } }
113};
231 114
232static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 115static const struct common_glue_ctx twofish_ctr = {
233 struct scatterlist *src, unsigned int nbytes) 116 .num_funcs = 2,
234{ 117 .fpu_blocks_limit = -1,
235 struct blkcipher_walk walk; 118
236 int err; 119 .funcs = { {
237 120 .num_blocks = 3,
238 blkcipher_walk_init(&walk, dst, src, nbytes); 121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
239 err = blkcipher_walk_virt(desc, &walk); 122 }, {
123 .num_blocks = 1,
124 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
125 } }
126};
240 127
241 while ((nbytes = walk.nbytes)) { 128static const struct common_glue_ctx twofish_dec = {
242 nbytes = __cbc_decrypt(desc, &walk); 129 .num_funcs = 2,
243 err = blkcipher_walk_done(desc, &walk, nbytes); 130 .fpu_blocks_limit = -1,
244 } 131
132 .funcs = { {
133 .num_blocks = 3,
134 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
135 }, {
136 .num_blocks = 1,
137 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
138 } }
139};
245 140
246 return err; 141static const struct common_glue_ctx twofish_dec_cbc = {
247} 142 .num_funcs = 2,
143 .fpu_blocks_limit = -1,
144
145 .funcs = { {
146 .num_blocks = 3,
147 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
148 }, {
149 .num_blocks = 1,
150 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
151 } }
152};
248 153
249static inline void u128_to_be128(be128 *dst, const u128 *src) 154static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
155 struct scatterlist *src, unsigned int nbytes)
250{ 156{
251 dst->a = cpu_to_be64(src->a); 157 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
252 dst->b = cpu_to_be64(src->b);
253} 158}
254 159
255static inline void be128_to_u128(u128 *dst, const be128 *src) 160static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
256{ 162{
257 dst->a = be64_to_cpu(src->a); 163 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
258 dst->b = be64_to_cpu(src->b);
259} 164}
260 165
261static inline void u128_inc(u128 *i) 166static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
262{ 168{
263 i->b++; 169 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
264 if (!i->b) 170 dst, src, nbytes);
265 i->a++;
266} 171}
267 172
268static void ctr_crypt_final(struct blkcipher_desc *desc, 173static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
269 struct blkcipher_walk *walk) 174 struct scatterlist *src, unsigned int nbytes)
270{ 175{
271 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 176 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
272 u8 *ctrblk = walk->iv; 177 nbytes);
273 u8 keystream[TF_BLOCK_SIZE];
274 u8 *src = walk->src.virt.addr;
275 u8 *dst = walk->dst.virt.addr;
276 unsigned int nbytes = walk->nbytes;
277
278 twofish_enc_blk(ctx, keystream, ctrblk);
279 crypto_xor(keystream, src, nbytes);
280 memcpy(dst, keystream, nbytes);
281
282 crypto_inc(ctrblk, TF_BLOCK_SIZE);
283}
284
285static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
286 struct blkcipher_walk *walk)
287{
288 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
289 unsigned int bsize = TF_BLOCK_SIZE;
290 unsigned int nbytes = walk->nbytes;
291 u128 *src = (u128 *)walk->src.virt.addr;
292 u128 *dst = (u128 *)walk->dst.virt.addr;
293 u128 ctrblk;
294 be128 ctrblocks[3];
295
296 be128_to_u128(&ctrblk, (be128 *)walk->iv);
297
298 /* Process three block batch */
299 if (nbytes >= bsize * 3) {
300 do {
301 if (dst != src) {
302 dst[0] = src[0];
303 dst[1] = src[1];
304 dst[2] = src[2];
305 }
306
307 /* create ctrblks for parallel encrypt */
308 u128_to_be128(&ctrblocks[0], &ctrblk);
309 u128_inc(&ctrblk);
310 u128_to_be128(&ctrblocks[1], &ctrblk);
311 u128_inc(&ctrblk);
312 u128_to_be128(&ctrblocks[2], &ctrblk);
313 u128_inc(&ctrblk);
314
315 twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
316 (u8 *)ctrblocks);
317
318 src += 3;
319 dst += 3;
320 nbytes -= bsize * 3;
321 } while (nbytes >= bsize * 3);
322
323 if (nbytes < bsize)
324 goto done;
325 }
326
327 /* Handle leftovers */
328 do {
329 if (dst != src)
330 *dst = *src;
331
332 u128_to_be128(&ctrblocks[0], &ctrblk);
333 u128_inc(&ctrblk);
334
335 twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
336 u128_xor(dst, dst, (u128 *)ctrblocks);
337
338 src += 1;
339 dst += 1;
340 nbytes -= bsize;
341 } while (nbytes >= bsize);
342
343done:
344 u128_to_be128((be128 *)walk->iv, &ctrblk);
345 return nbytes;
346} 178}
347 179
348static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 180static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
349 struct scatterlist *src, unsigned int nbytes) 181 struct scatterlist *src, unsigned int nbytes)
350{ 182{
351 struct blkcipher_walk walk; 183 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
352 int err;
353
354 blkcipher_walk_init(&walk, dst, src, nbytes);
355 err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
356
357 while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
358 nbytes = __ctr_crypt(desc, &walk);
359 err = blkcipher_walk_done(desc, &walk, nbytes);
360 }
361
362 if (walk.nbytes) {
363 ctr_crypt_final(desc, &walk);
364 err = blkcipher_walk_done(desc, &walk, 0);
365 }
366
367 return err;
368} 184}
369 185
370static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 186static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
397 twofish_dec_blk(ctx, srcdst, srcdst); 213 twofish_dec_blk(ctx, srcdst, srcdst);
398} 214}
399 215
400struct twofish_lrw_ctx { 216int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
401 struct lrw_table_ctx lrw_table; 217 unsigned int keylen)
402 struct twofish_ctx twofish_ctx;
403};
404
405static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
406 unsigned int keylen)
407{ 218{
408 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 219 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
409 int err; 220 int err;
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
415 226
416 return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); 227 return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
417} 228}
229EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
418 230
419static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 231static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
420 struct scatterlist *src, unsigned int nbytes) 232 struct scatterlist *src, unsigned int nbytes)
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
450 return lrw_crypt(desc, dst, src, nbytes, &req); 262 return lrw_crypt(desc, dst, src, nbytes, &req);
451} 263}
452 264
453static void lrw_exit_tfm(struct crypto_tfm *tfm) 265void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
454{ 266{
455 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 267 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
456 268
457 lrw_free_table(&ctx->lrw_table); 269 lrw_free_table(&ctx->lrw_table);
458} 270}
271EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
459 272
460struct twofish_xts_ctx { 273int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
461 struct twofish_ctx tweak_ctx; 274 unsigned int keylen)
462 struct twofish_ctx crypt_ctx;
463};
464
465static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
466 unsigned int keylen)
467{ 275{
468 struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); 276 struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
469 u32 *flags = &tfm->crt_flags; 277 u32 *flags = &tfm->crt_flags;
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
486 return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, 294 return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
487 flags); 295 flags);
488} 296}
297EXPORT_SYMBOL_GPL(xts_twofish_setkey);
489 298
490static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 299static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
491 struct scatterlist *src, unsigned int nbytes) 300 struct scatterlist *src, unsigned int nbytes)
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { {
596 .cra_type = &crypto_blkcipher_type, 405 .cra_type = &crypto_blkcipher_type,
597 .cra_module = THIS_MODULE, 406 .cra_module = THIS_MODULE,
598 .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), 407 .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
599 .cra_exit = lrw_exit_tfm, 408 .cra_exit = lrw_twofish_exit_tfm,
600 .cra_u = { 409 .cra_u = {
601 .blkcipher = { 410 .blkcipher = {
602 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, 411 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3ea51a84a0e4..f34261296ffb 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -546,7 +546,7 @@ static inline const struct cpumask *online_target_cpus(void)
546 return cpu_online_mask; 546 return cpu_online_mask;
547} 547}
548 548
549DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); 549DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
550 550
551 551
552static inline unsigned int read_apic_id(void) 552static inline unsigned int read_apic_id(void)
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index eb45aa6b1f27..2ad874cb661c 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -66,6 +66,7 @@ struct setup_header {
66 __u64 setup_data; 66 __u64 setup_data;
67 __u64 pref_address; 67 __u64 pref_address;
68 __u32 init_size; 68 __u32 init_size;
69 __u32 handover_offset;
69} __attribute__((packed)); 70} __attribute__((packed));
70 71
71struct sys_desc_table { 72struct sys_desc_table {
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f91e80f4f180..6b7ee5ff6820 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -207,6 +207,8 @@
207#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 207#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
208#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ 208#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
209#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ 209#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
210#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
211#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
210 212
211#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 213#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
212 214
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h
new file mode 100644
index 000000000000..4f93df50c23e
--- /dev/null
+++ b/arch/x86/include/asm/crypto/ablk_helper.h
@@ -0,0 +1,31 @@
1/*
2 * Shared async block cipher helpers
3 */
4
5#ifndef _CRYPTO_ABLK_HELPER_H
6#define _CRYPTO_ABLK_HELPER_H
7
8#include <linux/crypto.h>
9#include <linux/kernel.h>
10#include <crypto/cryptd.h>
11
12struct async_helper_ctx {
13 struct cryptd_ablkcipher *cryptd_tfm;
14};
15
16extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
17 unsigned int key_len);
18
19extern int __ablk_encrypt(struct ablkcipher_request *req);
20
21extern int ablk_encrypt(struct ablkcipher_request *req);
22
23extern int ablk_decrypt(struct ablkcipher_request *req);
24
25extern void ablk_exit(struct crypto_tfm *tfm);
26
27extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
28
29extern int ablk_init(struct crypto_tfm *tfm);
30
31#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h
index 80545a1cbe39..80545a1cbe39 100644
--- a/arch/x86/include/asm/aes.h
+++ b/arch/x86/include/asm/crypto/aes.h
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
new file mode 100644
index 000000000000..3e408bddc96f
--- /dev/null
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -0,0 +1,115 @@
1/*
2 * Shared glue code for 128bit block ciphers
3 */
4
5#ifndef _CRYPTO_GLUE_HELPER_H
6#define _CRYPTO_GLUE_HELPER_H
7
8#include <linux/kernel.h>
9#include <linux/crypto.h>
10#include <asm/i387.h>
11#include <crypto/b128ops.h>
12
13typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
14typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
15typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
16 u128 *iv);
17
18#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
19#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
20#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
21
22struct common_glue_func_entry {
23 unsigned int num_blocks; /* number of blocks that @fn will process */
24 union {
25 common_glue_func_t ecb;
26 common_glue_cbc_func_t cbc;
27 common_glue_ctr_func_t ctr;
28 } fn_u;
29};
30
31struct common_glue_ctx {
32 unsigned int num_funcs;
33 int fpu_blocks_limit; /* -1 means fpu not needed at all */
34
35 /*
36 * First funcs entry must have largest num_blocks and last funcs entry
37 * must have num_blocks == 1!
38 */
39 struct common_glue_func_entry funcs[];
40};
41
42static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
43 struct blkcipher_desc *desc,
44 bool fpu_enabled, unsigned int nbytes)
45{
46 if (likely(fpu_blocks_limit < 0))
47 return false;
48
49 if (fpu_enabled)
50 return true;
51
52 /*
53 * Vector-registers are only used when chunk to be processed is large
54 * enough, so do not enable FPU until it is necessary.
55 */
56 if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
57 return false;
58
59 if (desc) {
60 /* prevent sleeping if FPU is in use */
61 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
62 }
63
64 kernel_fpu_begin();
65 return true;
66}
67
68static inline void glue_fpu_end(bool fpu_enabled)
69{
70 if (fpu_enabled)
71 kernel_fpu_end();
72}
73
74static inline void u128_to_be128(be128 *dst, const u128 *src)
75{
76 dst->a = cpu_to_be64(src->a);
77 dst->b = cpu_to_be64(src->b);
78}
79
80static inline void be128_to_u128(u128 *dst, const be128 *src)
81{
82 dst->a = be64_to_cpu(src->a);
83 dst->b = be64_to_cpu(src->b);
84}
85
86static inline void u128_inc(u128 *i)
87{
88 i->b++;
89 if (!i->b)
90 i->a++;
91}
92
93extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
94 struct blkcipher_desc *desc,
95 struct scatterlist *dst,
96 struct scatterlist *src, unsigned int nbytes);
97
98extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
99 struct blkcipher_desc *desc,
100 struct scatterlist *dst,
101 struct scatterlist *src,
102 unsigned int nbytes);
103
104extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
105 struct blkcipher_desc *desc,
106 struct scatterlist *dst,
107 struct scatterlist *src,
108 unsigned int nbytes);
109
110extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
111 struct blkcipher_desc *desc,
112 struct scatterlist *dst,
113 struct scatterlist *src, unsigned int nbytes);
114
115#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
new file mode 100644
index 000000000000..432deedd2945
--- /dev/null
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -0,0 +1,32 @@
1#ifndef ASM_X86_SERPENT_AVX_H
2#define ASM_X86_SERPENT_AVX_H
3
4#include <linux/crypto.h>
5#include <crypto/serpent.h>
6
7#define SERPENT_PARALLEL_BLOCKS 8
8
9asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
10 const u8 *src, bool xor);
11asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
12 const u8 *src);
13
14static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
15 const u8 *src)
16{
17 __serpent_enc_blk_8way_avx(ctx, dst, src, false);
18}
19
20static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
21 const u8 *src)
22{
23 __serpent_enc_blk_8way_avx(ctx, dst, src, true);
24}
25
26static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
27 const u8 *src)
28{
29 serpent_dec_blk_8way_avx(ctx, dst, src);
30}
31
32#endif
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h
index d3ef63fe0c81..e6e77dffbdab 100644
--- a/arch/x86/include/asm/serpent.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86_SERPENT_H 1#ifndef ASM_X86_SERPENT_SSE2_H
2#define ASM_X86_SERPENT_H 2#define ASM_X86_SERPENT_SSE2_H
3 3
4#include <linux/crypto.h> 4#include <linux/crypto.h>
5#include <crypto/serpent.h> 5#include <crypto/serpent.h>
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
new file mode 100644
index 000000000000..9d2c514bd5f9
--- /dev/null
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -0,0 +1,46 @@
1#ifndef ASM_X86_TWOFISH_H
2#define ASM_X86_TWOFISH_H
3
4#include <linux/crypto.h>
5#include <crypto/twofish.h>
6#include <crypto/lrw.h>
7#include <crypto/b128ops.h>
8
9struct twofish_lrw_ctx {
10 struct lrw_table_ctx lrw_table;
11 struct twofish_ctx twofish_ctx;
12};
13
14struct twofish_xts_ctx {
15 struct twofish_ctx tweak_ctx;
16 struct twofish_ctx crypt_ctx;
17};
18
19/* regular block cipher functions from twofish_x86_64 module */
20asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
21 const u8 *src);
22asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
23 const u8 *src);
24
25/* 3-way parallel cipher functions */
26asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
27 const u8 *src, bool xor);
28asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
29 const u8 *src);
30
31/* helpers from twofish_x86_64-3way module */
32extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
33extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
34 u128 *iv);
35extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
36 u128 *iv);
37
38extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
39 unsigned int keylen);
40
41extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
42
43extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
44 unsigned int keylen);
45
46#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 0baa628e330c..40afa0005c69 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18
19.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
20 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
21.if NUM_INVALIDATE_TLB_VECTORS > \idx
22BUILD_INTERRUPT3(invalidate_interrupt\idx,
23 (INVALIDATE_TLB_VECTOR_START)+\idx,
24 smp_invalidate_interrupt)
25.endif
26.endr
27#endif 18#endif
28 19
29BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) 20BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4b4448761e88..1508e518c7e3 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -119,17 +119,6 @@
119 */ 119 */
120#define LOCAL_TIMER_VECTOR 0xef 120#define LOCAL_TIMER_VECTOR 0xef
121 121
122/* up to 32 vectors used for spreading out TLB flushes: */
123#if NR_CPUS <= 32
124# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
125#else
126# define NUM_INVALIDATE_TLB_VECTORS (32)
127#endif
128
129#define INVALIDATE_TLB_VECTOR_END (0xee)
130#define INVALIDATE_TLB_VECTOR_START \
131 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
132
133#define NR_VECTORS 256 122#define NR_VECTORS 256
134 123
135#define FPU_IRQ 13 124#define FPU_IRQ 13
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0b47ddb6f00b..a0facf3908d7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -360,9 +360,10 @@ static inline void __flush_tlb_single(unsigned long addr)
360 360
361static inline void flush_tlb_others(const struct cpumask *cpumask, 361static inline void flush_tlb_others(const struct cpumask *cpumask,
362 struct mm_struct *mm, 362 struct mm_struct *mm,
363 unsigned long va) 363 unsigned long start,
364 unsigned long end)
364{ 365{
365 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); 366 PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
366} 367}
367 368
368static inline int paravirt_pgd_alloc(struct mm_struct *mm) 369static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8613cbb7ba41..142236ed83af 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -248,7 +248,8 @@ struct pv_mmu_ops {
248 void (*flush_tlb_single)(unsigned long addr); 248 void (*flush_tlb_single)(unsigned long addr);
249 void (*flush_tlb_others)(const struct cpumask *cpus, 249 void (*flush_tlb_others)(const struct cpumask *cpus,
250 struct mm_struct *mm, 250 struct mm_struct *mm,
251 unsigned long va); 251 unsigned long start,
252 unsigned long end);
252 253
253 /* Hooks for allocating and freeing a pagetable top-level */ 254 /* Hooks for allocating and freeing a pagetable top-level */
254 int (*pgd_alloc)(struct mm_struct *mm); 255 int (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d9b8e3f7f42a..1104afaba52b 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
551 { [0 ... NR_CPUS-1] = _initvalue }; \ 551 { [0 ... NR_CPUS-1] = _initvalue }; \
552 __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map 552 __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
553 553
554#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
555 DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \
556 __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
557 { [0 ... NR_CPUS-1] = _initvalue }; \
558 __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
559
554#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ 560#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
555 EXPORT_PER_CPU_SYMBOL(_name) 561 EXPORT_PER_CPU_SYMBOL(_name)
556 562
@@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
559 extern __typeof__(_type) *_name##_early_ptr; \ 565 extern __typeof__(_type) *_name##_early_ptr; \
560 extern __typeof__(_type) _name##_early_map[] 566 extern __typeof__(_type) _name##_early_map[]
561 567
568#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
569 DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
570 extern __typeof__(_type) *_name##_early_ptr; \
571 extern __typeof__(_type) _name##_early_map[]
572
562#define early_per_cpu_ptr(_name) (_name##_early_ptr) 573#define early_per_cpu_ptr(_name) (_name##_early_ptr)
563#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) 574#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
564#define early_per_cpu(_name, _cpu) \ 575#define early_per_cpu(_name, _cpu) \
@@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
570#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ 581#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
571 DEFINE_PER_CPU(_type, _name) = _initvalue 582 DEFINE_PER_CPU(_type, _name) = _initvalue
572 583
584#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
585 DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
586
573#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ 587#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
574 EXPORT_PER_CPU_SYMBOL(_name) 588 EXPORT_PER_CPU_SYMBOL(_name)
575 589
576#define DECLARE_EARLY_PER_CPU(_type, _name) \ 590#define DECLARE_EARLY_PER_CPU(_type, _name) \
577 DECLARE_PER_CPU(_type, _name) 591 DECLARE_PER_CPU(_type, _name)
578 592
593#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
594 DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
595
579#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) 596#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
580#define early_per_cpu_ptr(_name) NULL 597#define early_per_cpu_ptr(_name) NULL
581/* no early_per_cpu_map() */ 598/* no early_per_cpu_map() */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 39bc5777211a..d048cad9bcad 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -61,6 +61,19 @@ static inline void *current_text_addr(void)
61# define ARCH_MIN_MMSTRUCT_ALIGN 0 61# define ARCH_MIN_MMSTRUCT_ALIGN 0
62#endif 62#endif
63 63
64enum tlb_infos {
65 ENTRIES,
66 NR_INFO
67};
68
69extern u16 __read_mostly tlb_lli_4k[NR_INFO];
70extern u16 __read_mostly tlb_lli_2m[NR_INFO];
71extern u16 __read_mostly tlb_lli_4m[NR_INFO];
72extern u16 __read_mostly tlb_lld_4k[NR_INFO];
73extern u16 __read_mostly tlb_lld_2m[NR_INFO];
74extern u16 __read_mostly tlb_lld_4m[NR_INFO];
75extern s8 __read_mostly tlb_flushall_shift;
76
64/* 77/*
65 * CPU type and hardware bug flags. Kept separately for each CPU. 78 * CPU type and hardware bug flags. Kept separately for each CPU.
66 * Members of this structure are referenced in head.S, so think twice 79 * Members of this structure are referenced in head.S, so think twice
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2ffa95dc2333..4f19a1526037 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void)
31 return has_siblings; 31 return has_siblings;
32} 32}
33 33
34DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); 34DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
35DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); 35DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
36/* cpus sharing the last level cache: */ 36/* cpus sharing the last level cache: */
37DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); 37DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
38DECLARE_PER_CPU(u16, cpu_llc_id); 38DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
39DECLARE_PER_CPU(int, cpu_number); 39DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
40 40
41static inline struct cpumask *cpu_sibling_mask(int cpu) 41static inline struct cpumask *cpu_sibling_mask(int cpu)
42{ 42{
@@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
53 return per_cpu(cpu_llc_shared_map, cpu); 53 return per_cpu(cpu_llc_shared_map, cpu);
54} 54}
55 55
56DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); 56DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
57DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); 57DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
58#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 58#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
59DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); 59DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
60#endif 60#endif
61 61
62/* Static state in head.S used to set up a CPU */ 62/* Static state in head.S used to set up a CPU */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 829215fef9ee..4fef20773b8f 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -4,7 +4,14 @@
4#define tlb_start_vma(tlb, vma) do { } while (0) 4#define tlb_start_vma(tlb, vma) do { } while (0)
5#define tlb_end_vma(tlb, vma) do { } while (0) 5#define tlb_end_vma(tlb, vma) do { } while (0)
6#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) 6#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
7#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) 7
8#define tlb_flush(tlb) \
9{ \
10 if (tlb->fullmm == 0) \
11 flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
12 else \
13 flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
14}
8 15
9#include <asm-generic/tlb.h> 16#include <asm-generic/tlb.h>
10 17
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 36a1a2ab87d2..74a44333545a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr)
73 * - flush_tlb_page(vma, vmaddr) flushes one page 73 * - flush_tlb_page(vma, vmaddr) flushes one page
74 * - flush_tlb_range(vma, start, end) flushes a range of pages 74 * - flush_tlb_range(vma, start, end) flushes a range of pages
75 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages 75 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
76 * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus 76 * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
77 * 77 *
78 * ..but the i386 has somewhat limited tlb flushing capabilities, 78 * ..but the i386 has somewhat limited tlb flushing capabilities,
79 * and page-granular flushes are available only on i486 and up. 79 * and page-granular flushes are available only on i486 and up.
80 *
81 * x86-64 can only flush individual pages or full VMs. For a range flush
82 * we always do the full VM. Might be worth trying if for a small
83 * range a few INVLPGs in a row are a win.
84 */ 80 */
85 81
86#ifndef CONFIG_SMP 82#ifndef CONFIG_SMP
@@ -109,9 +105,17 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
109 __flush_tlb(); 105 __flush_tlb();
110} 106}
111 107
108static inline void flush_tlb_mm_range(struct mm_struct *mm,
109 unsigned long start, unsigned long end, unsigned long vmflag)
110{
111 if (mm == current->active_mm)
112 __flush_tlb();
113}
114
112static inline void native_flush_tlb_others(const struct cpumask *cpumask, 115static inline void native_flush_tlb_others(const struct cpumask *cpumask,
113 struct mm_struct *mm, 116 struct mm_struct *mm,
114 unsigned long va) 117 unsigned long start,
118 unsigned long end)
115{ 119{
116} 120}
117 121
@@ -119,27 +123,35 @@ static inline void reset_lazy_tlbstate(void)
119{ 123{
120} 124}
121 125
126static inline void flush_tlb_kernel_range(unsigned long start,
127 unsigned long end)
128{
129 flush_tlb_all();
130}
131
122#else /* SMP */ 132#else /* SMP */
123 133
124#include <asm/smp.h> 134#include <asm/smp.h>
125 135
126#define local_flush_tlb() __flush_tlb() 136#define local_flush_tlb() __flush_tlb()
127 137
138#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
139
140#define flush_tlb_range(vma, start, end) \
141 flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
142
128extern void flush_tlb_all(void); 143extern void flush_tlb_all(void);
129extern void flush_tlb_current_task(void); 144extern void flush_tlb_current_task(void);
130extern void flush_tlb_mm(struct mm_struct *);
131extern void flush_tlb_page(struct vm_area_struct *, unsigned long); 145extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
146extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
147 unsigned long end, unsigned long vmflag);
148extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
132 149
133#define flush_tlb() flush_tlb_current_task() 150#define flush_tlb() flush_tlb_current_task()
134 151
135static inline void flush_tlb_range(struct vm_area_struct *vma,
136 unsigned long start, unsigned long end)
137{
138 flush_tlb_mm(vma->vm_mm);
139}
140
141void native_flush_tlb_others(const struct cpumask *cpumask, 152void native_flush_tlb_others(const struct cpumask *cpumask,
142 struct mm_struct *mm, unsigned long va); 153 struct mm_struct *mm,
154 unsigned long start, unsigned long end);
143 155
144#define TLBSTATE_OK 1 156#define TLBSTATE_OK 1
145#define TLBSTATE_LAZY 2 157#define TLBSTATE_LAZY 2
@@ -159,13 +171,8 @@ static inline void reset_lazy_tlbstate(void)
159#endif /* SMP */ 171#endif /* SMP */
160 172
161#ifndef CONFIG_PARAVIRT 173#ifndef CONFIG_PARAVIRT
162#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) 174#define flush_tlb_others(mask, mm, start, end) \
175 native_flush_tlb_others(mask, mm, start, end)
163#endif 176#endif
164 177
165static inline void flush_tlb_kernel_range(unsigned long start,
166 unsigned long end)
167{
168 flush_tlb_all();
169}
170
171#endif /* _ASM_X86_TLBFLUSH_H */ 178#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 3bb9491b7659..b47c2a82ff15 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void);
15extern void uv_system_init(void); 15extern void uv_system_init(void);
16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
17 struct mm_struct *mm, 17 struct mm_struct *mm,
18 unsigned long va, 18 unsigned long start,
19 unsigned end,
19 unsigned int cpu); 20 unsigned int cpu);
20 21
21#else /* X86_UV */ 22#else /* X86_UV */
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { }
26static inline void uv_system_init(void) { } 27static inline void uv_system_init(void) { }
27static inline const struct cpumask * 28static inline const struct cpumask *
28uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, 29uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
29 unsigned long va, unsigned int cpu) 30 unsigned long start, unsigned long end, unsigned int cpu)
30{ return cpumask; } 31{ return cpumask; }
31 32
32#endif /* X86_UV */ 33#endif /* X86_UV */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 98e24131ff3a..24deb3082328 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map;
75/* 75/*
76 * Map cpu index to physical APIC ID 76 * Map cpu index to physical APIC ID
77 */ 77 */
78DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 78DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
79DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 79DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
82 82
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
88 * used for the mapping. This is where the behaviors of x86_64 and 32 88 * used for the mapping. This is where the behaviors of x86_64 and 32
89 * actually diverge. Let's keep it ugly for now. 89 * actually diverge. Let's keep it ugly for now.
90 */ 90 */
91DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); 91DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
92 92
93/* 93/*
94 * Knob to control our willingness to enable the local APIC. 94 * Knob to control our willingness to enable the local APIC.
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index bac4c3804cc7..d30a6a9a0121 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)
14 14
15obj-y := intel_cacheinfo.o scattered.o topology.o 15obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o mshyperv.o
18obj-y += rdrand.o 18obj-y += rdrand.o
19obj-y += match.o 19obj-y += match.o
20 20
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5bbc082c47ad..46d8786d655e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
452 c->x86_cache_size = l2size; 452 c->x86_cache_size = l2size;
453} 453}
454 454
455u16 __read_mostly tlb_lli_4k[NR_INFO];
456u16 __read_mostly tlb_lli_2m[NR_INFO];
457u16 __read_mostly tlb_lli_4m[NR_INFO];
458u16 __read_mostly tlb_lld_4k[NR_INFO];
459u16 __read_mostly tlb_lld_2m[NR_INFO];
460u16 __read_mostly tlb_lld_4m[NR_INFO];
461
462/*
463 * tlb_flushall_shift shows the balance point in replacing cr3 write
464 * with multiple 'invlpg'. It will do this replacement when
465 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
466 * If tlb_flushall_shift is -1, means the replacement will be disabled.
467 */
468s8 __read_mostly tlb_flushall_shift = -1;
469
470void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
471{
472 if (this_cpu->c_detect_tlb)
473 this_cpu->c_detect_tlb(c);
474
475 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
476 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
477 "tlb_flushall_shift is 0x%x\n",
478 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
479 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
480 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
481 tlb_flushall_shift);
482}
483
455void __cpuinit detect_ht(struct cpuinfo_x86 *c) 484void __cpuinit detect_ht(struct cpuinfo_x86 *c)
456{ 485{
457#ifdef CONFIG_X86_HT 486#ifdef CONFIG_X86_HT
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void)
911#else 940#else
912 vgetcpu_set_mode(); 941 vgetcpu_set_mode();
913#endif 942#endif
943 if (boot_cpu_data.cpuid_level >= 2)
944 cpu_detect_tlb(&boot_cpu_data);
914} 945}
915 946
916void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 947void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 8bacc7826fb3..4041c24ae7db 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -20,10 +20,19 @@ struct cpu_dev {
20 void (*c_bsp_init)(struct cpuinfo_x86 *); 20 void (*c_bsp_init)(struct cpuinfo_x86 *);
21 void (*c_init)(struct cpuinfo_x86 *); 21 void (*c_init)(struct cpuinfo_x86 *);
22 void (*c_identify)(struct cpuinfo_x86 *); 22 void (*c_identify)(struct cpuinfo_x86 *);
23 void (*c_detect_tlb)(struct cpuinfo_x86 *);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); 24 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
24 int c_x86_vendor; 25 int c_x86_vendor;
25}; 26};
26 27
28struct _tlb_table {
29 unsigned char descriptor;
30 char tlb_type;
31 unsigned int entries;
32 /* unsigned int ways; */
33 char info[128];
34};
35
27#define cpu_dev_register(cpu_devX) \ 36#define cpu_dev_register(cpu_devX) \
28 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ 37 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \ 38 __attribute__((__section__(".x86_cpu_dev.init"))) = \
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3e6ff6cbf42a..0a4ce2980a5a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
491} 491}
492#endif 492#endif
493 493
494#define TLB_INST_4K 0x01
495#define TLB_INST_4M 0x02
496#define TLB_INST_2M_4M 0x03
497
498#define TLB_INST_ALL 0x05
499#define TLB_INST_1G 0x06
500
501#define TLB_DATA_4K 0x11
502#define TLB_DATA_4M 0x12
503#define TLB_DATA_2M_4M 0x13
504#define TLB_DATA_4K_4M 0x14
505
506#define TLB_DATA_1G 0x16
507
508#define TLB_DATA0_4K 0x21
509#define TLB_DATA0_4M 0x22
510#define TLB_DATA0_2M_4M 0x23
511
512#define STLB_4K 0x41
513
514static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
515 { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
516 { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
517 { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
518 { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
519 { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
520 { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
521 { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
522 { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
523 { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
524 { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
525 { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
526 { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
527 { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
528 { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
529 { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
530 { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
531 { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
532 { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
533 { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
534 { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
535 { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
536 { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
537 { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
538 { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
539 { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
540 { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
541 { 0x00, 0, 0 }
542};
543
544static void __cpuinit intel_tlb_lookup(const unsigned char desc)
545{
546 unsigned char k;
547 if (desc == 0)
548 return;
549
550 /* look up this descriptor in the table */
551 for (k = 0; intel_tlb_table[k].descriptor != desc && \
552 intel_tlb_table[k].descriptor != 0; k++)
553 ;
554
555 if (intel_tlb_table[k].tlb_type == 0)
556 return;
557
558 switch (intel_tlb_table[k].tlb_type) {
559 case STLB_4K:
560 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
561 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
562 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
563 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
564 break;
565 case TLB_INST_ALL:
566 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
567 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
568 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
569 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
570 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
571 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
572 break;
573 case TLB_INST_4K:
574 if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
575 tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
576 break;
577 case TLB_INST_4M:
578 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
579 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
580 break;
581 case TLB_INST_2M_4M:
582 if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
583 tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
584 if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
585 tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
586 break;
587 case TLB_DATA_4K:
588 case TLB_DATA0_4K:
589 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
590 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
591 break;
592 case TLB_DATA_4M:
593 case TLB_DATA0_4M:
594 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
595 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
596 break;
597 case TLB_DATA_2M_4M:
598 case TLB_DATA0_2M_4M:
599 if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
600 tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
601 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
602 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
603 break;
604 case TLB_DATA_4K_4M:
605 if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
606 tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
607 if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
608 tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
609 break;
610 }
611}
612
613static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
614{
615 if (!cpu_has_invlpg) {
616 tlb_flushall_shift = -1;
617 return;
618 }
619 switch ((c->x86 << 8) + c->x86_model) {
620 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
621 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
622 case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
623 case 0x61d: /* six-core 45 nm xeon "Dunnington" */
624 tlb_flushall_shift = -1;
625 break;
626 case 0x61a: /* 45 nm nehalem, "Bloomfield" */
627 case 0x61e: /* 45 nm nehalem, "Lynnfield" */
628 case 0x625: /* 32 nm nehalem, "Clarkdale" */
629 case 0x62c: /* 32 nm nehalem, "Gulftown" */
630 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
631 case 0x62f: /* 32 nm Xeon E7 */
632 tlb_flushall_shift = 6;
633 break;
634 case 0x62a: /* SandyBridge */
635 case 0x62d: /* SandyBridge, "Romely-EP" */
636 tlb_flushall_shift = 5;
637 break;
638 case 0x63a: /* Ivybridge */
639 tlb_flushall_shift = 1;
640 break;
641 default:
642 tlb_flushall_shift = 6;
643 }
644}
645
646static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
647{
648 int i, j, n;
649 unsigned int regs[4];
650 unsigned char *desc = (unsigned char *)regs;
651 /* Number of times to iterate */
652 n = cpuid_eax(2) & 0xFF;
653
654 for (i = 0 ; i < n ; i++) {
655 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
656
657 /* If bit 31 is set, this is an unknown format */
658 for (j = 0 ; j < 3 ; j++)
659 if (regs[j] & (1 << 31))
660 regs[j] = 0;
661
662 /* Byte 0 is level count, not a descriptor */
663 for (j = 1 ; j < 16 ; j++)
664 intel_tlb_lookup(desc[j]);
665 }
666 intel_tlb_flushall_shift_set(c);
667}
668
494static const struct cpu_dev __cpuinitconst intel_cpu_dev = { 669static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
495 .c_vendor = "Intel", 670 .c_vendor = "Intel",
496 .c_ident = { "GenuineIntel" }, 671 .c_ident = { "GenuineIntel" },
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
546 }, 721 },
547 .c_size_cache = intel_size_cache, 722 .c_size_cache = intel_size_cache,
548#endif 723#endif
724 .c_detect_tlb = intel_detect_tlb,
549 .c_early_init = early_init_intel, 725 .c_early_init = early_init_intel,
550 .c_init = init_intel, 726 .c_init = init_intel,
551 .c_x86_vendor = X86_VENDOR_INTEL, 727 .c_x86_vendor = X86_VENDOR_INTEL,
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
deleted file mode 100644
index a640ae5ad201..000000000000
--- a/arch/x86/kernel/cpu/sched.c
+++ /dev/null
@@ -1,55 +0,0 @@
1#include <linux/sched.h>
2#include <linux/math64.h>
3#include <linux/percpu.h>
4#include <linux/irqflags.h>
5
6#include <asm/cpufeature.h>
7#include <asm/processor.h>
8
9#ifdef CONFIG_SMP
10
11static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
12
13static unsigned long scale_aperfmperf(void)
14{
15 struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
16 unsigned long ratio, flags;
17
18 local_irq_save(flags);
19 get_aperfmperf(&val);
20 local_irq_restore(flags);
21
22 ratio = calc_aperfmperf_ratio(old, &val);
23 *old = val;
24
25 return ratio;
26}
27
28unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
29{
30 /*
31 * do aperf/mperf on the cpu level because it includes things
32 * like turbo mode, which are relevant to full cores.
33 */
34 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
35 return scale_aperfmperf();
36
37 /*
38 * maybe have something cpufreq here
39 */
40
41 return default_scale_freq_power(sd, cpu);
42}
43
44unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
45{
46 /*
47 * aperf/mperf already includes the smt gain
48 */
49 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
50 return SCHED_LOAD_SCALE;
51
52 return default_scale_smt_power(sd, cpu);
53}
54
55#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 111f6bbd8b38..69babd8c834f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \
1048apicinterrupt X86_PLATFORM_IPI_VECTOR \ 1048apicinterrupt X86_PLATFORM_IPI_VECTOR \
1049 x86_platform_ipi smp_x86_platform_ipi 1049 x86_platform_ipi smp_x86_platform_ipi
1050 1050
1051#ifdef CONFIG_SMP
1052 ALIGN
1053 INTR_FRAME
1054.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
1055 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
1056.if NUM_INVALIDATE_TLB_VECTORS > \idx
1057ENTRY(invalidate_interrupt\idx)
1058 pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
1059 jmp .Lcommon_invalidate_interrupt0
1060 CFI_ADJUST_CFA_OFFSET -8
1061END(invalidate_interrupt\idx)
1062.endif
1063.endr
1064 CFI_ENDPROC
1065apicinterrupt INVALIDATE_TLB_VECTOR_START, \
1066 invalidate_interrupt0, smp_invalidate_interrupt
1067#endif
1068
1069apicinterrupt THRESHOLD_APIC_VECTOR \ 1051apicinterrupt THRESHOLD_APIC_VECTOR \
1070 threshold_interrupt smp_threshold_interrupt 1052 threshold_interrupt smp_threshold_interrupt
1071apicinterrupt THERMAL_APIC_VECTOR \ 1053apicinterrupt THERMAL_APIC_VECTOR \
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 252981afd6c4..6e03b0d69138 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void)
171 */ 171 */
172 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 172 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
173 173
174 /* IPIs for invalidation */
175#define ALLOC_INVTLB_VEC(NR) \
176 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
177 invalidate_interrupt##NR)
178
179 switch (NUM_INVALIDATE_TLB_VECTORS) {
180 default:
181 ALLOC_INVTLB_VEC(31);
182 case 31:
183 ALLOC_INVTLB_VEC(30);
184 case 30:
185 ALLOC_INVTLB_VEC(29);
186 case 29:
187 ALLOC_INVTLB_VEC(28);
188 case 28:
189 ALLOC_INVTLB_VEC(27);
190 case 27:
191 ALLOC_INVTLB_VEC(26);
192 case 26:
193 ALLOC_INVTLB_VEC(25);
194 case 25:
195 ALLOC_INVTLB_VEC(24);
196 case 24:
197 ALLOC_INVTLB_VEC(23);
198 case 23:
199 ALLOC_INVTLB_VEC(22);
200 case 22:
201 ALLOC_INVTLB_VEC(21);
202 case 21:
203 ALLOC_INVTLB_VEC(20);
204 case 20:
205 ALLOC_INVTLB_VEC(19);
206 case 19:
207 ALLOC_INVTLB_VEC(18);
208 case 18:
209 ALLOC_INVTLB_VEC(17);
210 case 17:
211 ALLOC_INVTLB_VEC(16);
212 case 16:
213 ALLOC_INVTLB_VEC(15);
214 case 15:
215 ALLOC_INVTLB_VEC(14);
216 case 14:
217 ALLOC_INVTLB_VEC(13);
218 case 13:
219 ALLOC_INVTLB_VEC(12);
220 case 12:
221 ALLOC_INVTLB_VEC(11);
222 case 11:
223 ALLOC_INVTLB_VEC(10);
224 case 10:
225 ALLOC_INVTLB_VEC(9);
226 case 9:
227 ALLOC_INVTLB_VEC(8);
228 case 8:
229 ALLOC_INVTLB_VEC(7);
230 case 7:
231 ALLOC_INVTLB_VEC(6);
232 case 6:
233 ALLOC_INVTLB_VEC(5);
234 case 5:
235 ALLOC_INVTLB_VEC(4);
236 case 4:
237 ALLOC_INVTLB_VEC(3);
238 case 3:
239 ALLOC_INVTLB_VEC(2);
240 case 2:
241 ALLOC_INVTLB_VEC(1);
242 case 1:
243 ALLOC_INVTLB_VEC(0);
244 break;
245 }
246
247 /* IPI for generic function call */ 174 /* IPI for generic function call */
248 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 175 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
249 176
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5a98aa272184..5cdff0357746 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,7 +21,7 @@
21#include <asm/cpu.h> 21#include <asm/cpu.h>
22#include <asm/stackprotector.h> 22#include <asm/stackprotector.h>
23 23
24DEFINE_PER_CPU(int, cpu_number); 24DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
25EXPORT_PER_CPU_SYMBOL(cpu_number); 25EXPORT_PER_CPU_SYMBOL(cpu_number);
26 26
27#ifdef CONFIG_X86_64 27#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c1a310fb8309..7c5a8c314c02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,17 +106,17 @@ int smp_num_siblings = 1;
106EXPORT_SYMBOL(smp_num_siblings); 106EXPORT_SYMBOL(smp_num_siblings);
107 107
108/* Last level cache ID of each logical CPU */ 108/* Last level cache ID of each logical CPU */
109DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 109DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
110 110
111/* representing HT siblings of each logical CPU */ 111/* representing HT siblings of each logical CPU */
112DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 112DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
113EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 113EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
114 114
115/* representing HT and core siblings of each logical CPU */ 115/* representing HT and core siblings of each logical CPU */
116DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 116DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
117EXPORT_PER_CPU_SYMBOL(cpu_core_map); 117EXPORT_PER_CPU_SYMBOL(cpu_core_map);
118 118
119DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); 119DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
120 120
121/* Per CPU bogomips and other parameters */ 121/* Per CPU bogomips and other parameters */
122DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 122DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d23503..931930a96160 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
919 919
920 /* 920 /*
921 * On success we use clflush, when the CPU supports it to 921 * On success we use clflush, when the CPU supports it to
922 * avoid the wbindv. If the CPU does not support it and in the 922 * avoid the wbindv. If the CPU does not support it, in the
923 * error case we fall back to cpa_flush_all (which uses 923 * error case, and during early boot (for EFI) we fall back
924 * wbindv): 924 * to cpa_flush_all (which uses wbinvd):
925 */ 925 */
926 if (!ret && cpu_has_clflush) { 926 if (early_boot_irqs_disabled)
927 __cpa_flush_all((void *)(long)cache);
928 else if (!ret && cpu_has_clflush) {
927 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { 929 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
928 cpa_flush_array(addr, numpages, cache, 930 cpa_flush_array(addr, numpages, cache,
929 cpa.flags, pages); 931 cpa.flags, pages);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5e57e113b72c..613cd83e8c0c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -12,6 +12,7 @@
12#include <asm/cache.h> 12#include <asm/cache.h>
13#include <asm/apic.h> 13#include <asm/apic.h>
14#include <asm/uv/uv.h> 14#include <asm/uv/uv.h>
15#include <linux/debugfs.h>
15 16
16DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 17DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
17 = { &init_mm, 0, }; 18 = { &init_mm, 0, };
@@ -27,33 +28,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
27 * 28 *
28 * More scalable flush, from Andi Kleen 29 * More scalable flush, from Andi Kleen
29 * 30 *
30 * To avoid global state use 8 different call vectors. 31 * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
31 * Each CPU uses a specific vector to trigger flushes on other
32 * CPUs. Depending on the received vector the target CPUs look into
33 * the right array slot for the flush data.
34 *
35 * With more than 8 CPUs they are hashed to the 8 available
36 * vectors. The limited global vector space forces us to this right now.
37 * In future when interrupts are split into per CPU domains this could be
38 * fixed, at the cost of triggering multiple IPIs in some cases.
39 */ 32 */
40 33
41union smp_flush_state { 34struct flush_tlb_info {
42 struct { 35 struct mm_struct *flush_mm;
43 struct mm_struct *flush_mm; 36 unsigned long flush_start;
44 unsigned long flush_va; 37 unsigned long flush_end;
45 raw_spinlock_t tlbstate_lock; 38};
46 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
47 };
48 char pad[INTERNODE_CACHE_BYTES];
49} ____cacheline_internodealigned_in_smp;
50
51/* State is put into the per CPU data section, but padded
52 to a full cache line because other CPUs can access it and we don't
53 want false sharing in the per cpu data segment. */
54static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
55
56static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
57 39
58/* 40/*
59 * We cannot call mmdrop() because we are in interrupt context, 41 * We cannot call mmdrop() because we are in interrupt context,
@@ -72,28 +54,25 @@ void leave_mm(int cpu)
72EXPORT_SYMBOL_GPL(leave_mm); 54EXPORT_SYMBOL_GPL(leave_mm);
73 55
74/* 56/*
75 *
76 * The flush IPI assumes that a thread switch happens in this order: 57 * The flush IPI assumes that a thread switch happens in this order:
77 * [cpu0: the cpu that switches] 58 * [cpu0: the cpu that switches]
78 * 1) switch_mm() either 1a) or 1b) 59 * 1) switch_mm() either 1a) or 1b)
79 * 1a) thread switch to a different mm 60 * 1a) thread switch to a different mm
80 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); 61 * 1a1) set cpu_tlbstate to TLBSTATE_OK
81 * Stop ipi delivery for the old mm. This is not synchronized with 62 * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm
82 * the other cpus, but smp_invalidate_interrupt ignore flush ipis 63 * if cpu0 was in lazy tlb mode.
83 * for the wrong mm, and in the worst case we perform a superfluous 64 * 1a2) update cpu active_mm
84 * tlb flush.
85 * 1a2) set cpu mmu_state to TLBSTATE_OK
86 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
87 * was in lazy tlb mode.
88 * 1a3) update cpu active_mm
89 * Now cpu0 accepts tlb flushes for the new mm. 65 * Now cpu0 accepts tlb flushes for the new mm.
90 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); 66 * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask);
91 * Now the other cpus will send tlb flush ipis. 67 * Now the other cpus will send tlb flush ipis.
92 * 1a4) change cr3. 68 * 1a4) change cr3.
69 * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask);
70 * Stop ipi delivery for the old mm. This is not synchronized with
71 * the other cpus, but flush_tlb_func ignore flush ipis for the wrong
72 * mm, and in the worst case we perform a superfluous tlb flush.
93 * 1b) thread switch without mm change 73 * 1b) thread switch without mm change
94 * cpu active_mm is correct, cpu0 already handles 74 * cpu active_mm is correct, cpu0 already handles flush ipis.
95 * flush ipis. 75 * 1b1) set cpu_tlbstate to TLBSTATE_OK
96 * 1b1) set cpu mmu_state to TLBSTATE_OK
97 * 1b2) test_and_set the cpu bit in cpu_vm_mask. 76 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
98 * Atomically set the bit [other cpus will start sending flush ipis], 77 * Atomically set the bit [other cpus will start sending flush ipis],
99 * and test the bit. 78 * and test the bit.
@@ -106,174 +85,62 @@ EXPORT_SYMBOL_GPL(leave_mm);
106 * runs in kernel space, the cpu could load tlb entries for user space 85 * runs in kernel space, the cpu could load tlb entries for user space
107 * pages. 86 * pages.
108 * 87 *
109 * The good news is that cpu mmu_state is local to each cpu, no 88 * The good news is that cpu_tlbstate is local to each cpu, no
110 * write/read ordering problems. 89 * write/read ordering problems.
111 */ 90 */
112 91
113/* 92/*
114 * TLB flush IPI: 93 * TLB flush funcation:
115 *
116 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 94 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
117 * 2) Leave the mm if we are in the lazy tlb mode. 95 * 2) Leave the mm if we are in the lazy tlb mode.
118 *
119 * Interrupts are disabled.
120 */
121
122/*
123 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
124 * but still used for documentation purpose but the usage is slightly
125 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
126 * entry calls in with the first parameter in %eax. Maybe define
127 * intrlinkage?
128 */ 96 */
129#ifdef CONFIG_X86_64 97static void flush_tlb_func(void *info)
130asmlinkage
131#endif
132void smp_invalidate_interrupt(struct pt_regs *regs)
133{ 98{
134 unsigned int cpu; 99 struct flush_tlb_info *f = info;
135 unsigned int sender;
136 union smp_flush_state *f;
137
138 cpu = smp_processor_id();
139 /*
140 * orig_rax contains the negated interrupt vector.
141 * Use that to determine where the sender put the data.
142 */
143 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
144 f = &flush_state[sender];
145
146 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
147 goto out;
148 /*
149 * This was a BUG() but until someone can quote me the
150 * line from the intel manual that guarantees an IPI to
151 * multiple CPUs is retried _only_ on the erroring CPUs
152 * its staying as a return
153 *
154 * BUG();
155 */
156
157 if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
158 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
159 if (f->flush_va == TLB_FLUSH_ALL)
160 local_flush_tlb();
161 else
162 __flush_tlb_one(f->flush_va);
163 } else
164 leave_mm(cpu);
165 }
166out:
167 ack_APIC_irq();
168 smp_mb__before_clear_bit();
169 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
170 smp_mb__after_clear_bit();
171 inc_irq_stat(irq_tlb_count);
172}
173 100
174static void flush_tlb_others_ipi(const struct cpumask *cpumask, 101 if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
175 struct mm_struct *mm, unsigned long va) 102 return;
176{ 103
177 unsigned int sender; 104 if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
178 union smp_flush_state *f; 105 if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg)
179 106 local_flush_tlb();
180 /* Caller has disabled preemption */ 107 else if (!f->flush_end)
181 sender = this_cpu_read(tlb_vector_offset); 108 __flush_tlb_single(f->flush_start);
182 f = &flush_state[sender]; 109 else {
183 110 unsigned long addr;
184 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) 111 addr = f->flush_start;
185 raw_spin_lock(&f->tlbstate_lock); 112 while (addr < f->flush_end) {
186 113 __flush_tlb_single(addr);
187 f->flush_mm = mm; 114 addr += PAGE_SIZE;
188 f->flush_va = va; 115 }
189 if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { 116 }
190 /* 117 } else
191 * We have to send the IPI only to 118 leave_mm(smp_processor_id());
192 * CPUs affected.
193 */
194 apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
195 INVALIDATE_TLB_VECTOR_START + sender);
196
197 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
198 cpu_relax();
199 }
200 119
201 f->flush_mm = NULL;
202 f->flush_va = 0;
203 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
204 raw_spin_unlock(&f->tlbstate_lock);
205} 120}
206 121
207void native_flush_tlb_others(const struct cpumask *cpumask, 122void native_flush_tlb_others(const struct cpumask *cpumask,
208 struct mm_struct *mm, unsigned long va) 123 struct mm_struct *mm, unsigned long start,
124 unsigned long end)
209{ 125{
126 struct flush_tlb_info info;
127 info.flush_mm = mm;
128 info.flush_start = start;
129 info.flush_end = end;
130
210 if (is_uv_system()) { 131 if (is_uv_system()) {
211 unsigned int cpu; 132 unsigned int cpu;
212 133
213 cpu = smp_processor_id(); 134 cpu = smp_processor_id();
214 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 135 cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu);
215 if (cpumask) 136 if (cpumask)
216 flush_tlb_others_ipi(cpumask, mm, va); 137 smp_call_function_many(cpumask, flush_tlb_func,
138 &info, 1);
217 return; 139 return;
218 } 140 }
219 flush_tlb_others_ipi(cpumask, mm, va); 141 smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
220} 142}
221 143
222static void __cpuinit calculate_tlb_offset(void)
223{
224 int cpu, node, nr_node_vecs, idx = 0;
225 /*
226 * we are changing tlb_vector_offset for each CPU in runtime, but this
227 * will not cause inconsistency, as the write is atomic under X86. we
228 * might see more lock contentions in a short time, but after all CPU's
229 * tlb_vector_offset are changed, everything should go normal
230 *
231 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
232 * waste some vectors.
233 **/
234 if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
235 nr_node_vecs = 1;
236 else
237 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
238
239 for_each_online_node(node) {
240 int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
241 nr_node_vecs;
242 int cpu_offset = 0;
243 for_each_cpu(cpu, cpumask_of_node(node)) {
244 per_cpu(tlb_vector_offset, cpu) = node_offset +
245 cpu_offset;
246 cpu_offset++;
247 cpu_offset = cpu_offset % nr_node_vecs;
248 }
249 idx++;
250 }
251}
252
253static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
254 unsigned long action, void *hcpu)
255{
256 switch (action & 0xf) {
257 case CPU_ONLINE:
258 case CPU_DEAD:
259 calculate_tlb_offset();
260 }
261 return NOTIFY_OK;
262}
263
264static int __cpuinit init_smp_flush(void)
265{
266 int i;
267
268 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
269 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
270
271 calculate_tlb_offset();
272 hotcpu_notifier(tlb_cpuhp_notify, 0);
273 return 0;
274}
275core_initcall(init_smp_flush);
276
277void flush_tlb_current_task(void) 144void flush_tlb_current_task(void)
278{ 145{
279 struct mm_struct *mm = current->mm; 146 struct mm_struct *mm = current->mm;
@@ -282,27 +149,91 @@ void flush_tlb_current_task(void)
282 149
283 local_flush_tlb(); 150 local_flush_tlb();
284 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 151 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
285 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); 152 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
286 preempt_enable(); 153 preempt_enable();
287} 154}
288 155
289void flush_tlb_mm(struct mm_struct *mm) 156/*
157 * It can find out the THP large page, or
158 * HUGETLB page in tlb_flush when THP disabled
159 */
160static inline unsigned long has_large_page(struct mm_struct *mm,
161 unsigned long start, unsigned long end)
162{
163 pgd_t *pgd;
164 pud_t *pud;
165 pmd_t *pmd;
166 unsigned long addr = ALIGN(start, HPAGE_SIZE);
167 for (; addr < end; addr += HPAGE_SIZE) {
168 pgd = pgd_offset(mm, addr);
169 if (likely(!pgd_none(*pgd))) {
170 pud = pud_offset(pgd, addr);
171 if (likely(!pud_none(*pud))) {
172 pmd = pmd_offset(pud, addr);
173 if (likely(!pmd_none(*pmd)))
174 if (pmd_large(*pmd))
175 return addr;
176 }
177 }
178 }
179 return 0;
180}
181
182void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
183 unsigned long end, unsigned long vmflag)
290{ 184{
185 unsigned long addr;
186 unsigned act_entries, tlb_entries = 0;
187
291 preempt_disable(); 188 preempt_disable();
189 if (current->active_mm != mm)
190 goto flush_all;
292 191
293 if (current->active_mm == mm) { 192 if (!current->mm) {
294 if (current->mm) 193 leave_mm(smp_processor_id());
194 goto flush_all;
195 }
196
197 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
198 || vmflag == VM_HUGETLB) {
199 local_flush_tlb();
200 goto flush_all;
201 }
202
203 /* In modern CPU, last level tlb used for both data/ins */
204 if (vmflag & VM_EXEC)
205 tlb_entries = tlb_lli_4k[ENTRIES];
206 else
207 tlb_entries = tlb_lld_4k[ENTRIES];
208 /* Assume all of TLB entries was occupied by this task */
209 act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
210
211 /* tlb_flushall_shift is on balance point, details in commit log */
212 if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
213 local_flush_tlb();
214 else {
215 if (has_large_page(mm, start, end)) {
295 local_flush_tlb(); 216 local_flush_tlb();
296 else 217 goto flush_all;
297 leave_mm(smp_processor_id()); 218 }
219 /* flush range by one by one 'invlpg' */
220 for (addr = start; addr < end; addr += PAGE_SIZE)
221 __flush_tlb_single(addr);
222
223 if (cpumask_any_but(mm_cpumask(mm),
224 smp_processor_id()) < nr_cpu_ids)
225 flush_tlb_others(mm_cpumask(mm), mm, start, end);
226 preempt_enable();
227 return;
298 } 228 }
299 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
300 flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
301 229
230flush_all:
231 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
232 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
302 preempt_enable(); 233 preempt_enable();
303} 234}
304 235
305void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 236void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
306{ 237{
307 struct mm_struct *mm = vma->vm_mm; 238 struct mm_struct *mm = vma->vm_mm;
308 239
@@ -310,13 +241,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
310 241
311 if (current->active_mm == mm) { 242 if (current->active_mm == mm) {
312 if (current->mm) 243 if (current->mm)
313 __flush_tlb_one(va); 244 __flush_tlb_one(start);
314 else 245 else
315 leave_mm(smp_processor_id()); 246 leave_mm(smp_processor_id());
316 } 247 }
317 248
318 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 249 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
319 flush_tlb_others(mm_cpumask(mm), mm, va); 250 flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
320 251
321 preempt_enable(); 252 preempt_enable();
322} 253}
@@ -332,3 +263,83 @@ void flush_tlb_all(void)
332{ 263{
333 on_each_cpu(do_flush_tlb_all, NULL, 1); 264 on_each_cpu(do_flush_tlb_all, NULL, 1);
334} 265}
266
267static void do_kernel_range_flush(void *info)
268{
269 struct flush_tlb_info *f = info;
270 unsigned long addr;
271
272 /* flush range by one by one 'invlpg' */
273 for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE)
274 __flush_tlb_single(addr);
275}
276
277void flush_tlb_kernel_range(unsigned long start, unsigned long end)
278{
279 unsigned act_entries;
280 struct flush_tlb_info info;
281
282 /* In modern CPU, last level tlb used for both data/ins */
283 act_entries = tlb_lld_4k[ENTRIES];
284
285 /* Balance as user space task's flush, a bit conservative */
286 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 ||
287 (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
288
289 on_each_cpu(do_flush_tlb_all, NULL, 1);
290 else {
291 info.flush_start = start;
292 info.flush_end = end;
293 on_each_cpu(do_kernel_range_flush, &info, 1);
294 }
295}
296
297#ifdef CONFIG_DEBUG_TLBFLUSH
298static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
299 size_t count, loff_t *ppos)
300{
301 char buf[32];
302 unsigned int len;
303
304 len = sprintf(buf, "%hd\n", tlb_flushall_shift);
305 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
306}
307
308static ssize_t tlbflush_write_file(struct file *file,
309 const char __user *user_buf, size_t count, loff_t *ppos)
310{
311 char buf[32];
312 ssize_t len;
313 s8 shift;
314
315 len = min(count, sizeof(buf) - 1);
316 if (copy_from_user(buf, user_buf, len))
317 return -EFAULT;
318
319 buf[len] = '\0';
320 if (kstrtos8(buf, 0, &shift))
321 return -EINVAL;
322
323 if (shift > 64)
324 return -EINVAL;
325
326 tlb_flushall_shift = shift;
327 return count;
328}
329
330static const struct file_operations fops_tlbflush = {
331 .read = tlbflush_read_file,
332 .write = tlbflush_write_file,
333 .llseek = default_llseek,
334};
335
336static int __cpuinit create_tlb_flushall_shift(void)
337{
338 if (cpu_has_invlpg) {
339 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
340 arch_debugfs_dir, NULL, &fops_tlbflush);
341 }
342 return 0;
343}
344late_initcall(create_tlb_flushall_shift);
345#endif
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660edaa1e7..2dc29f51e75a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -234,22 +234,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
234 return status; 234 return status;
235} 235}
236 236
237static efi_status_t __init phys_efi_get_time(efi_time_t *tm, 237static int efi_set_rtc_mmss(unsigned long nowtime)
238 efi_time_cap_t *tc)
239{
240 unsigned long flags;
241 efi_status_t status;
242
243 spin_lock_irqsave(&rtc_lock, flags);
244 efi_call_phys_prelog();
245 status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
246 virt_to_phys(tc));
247 efi_call_phys_epilog();
248 spin_unlock_irqrestore(&rtc_lock, flags);
249 return status;
250}
251
252int efi_set_rtc_mmss(unsigned long nowtime)
253{ 238{
254 int real_seconds, real_minutes; 239 int real_seconds, real_minutes;
255 efi_status_t status; 240 efi_status_t status;
@@ -278,7 +263,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
278 return 0; 263 return 0;
279} 264}
280 265
281unsigned long efi_get_time(void) 266static unsigned long efi_get_time(void)
282{ 267{
283 efi_status_t status; 268 efi_status_t status;
284 efi_time_t eft; 269 efi_time_t eft;
@@ -621,18 +606,13 @@ static int __init efi_runtime_init(void)
621 } 606 }
622 /* 607 /*
623 * We will only need *early* access to the following 608 * We will only need *early* access to the following
624 * two EFI runtime services before set_virtual_address_map 609 * EFI runtime service before set_virtual_address_map
625 * is invoked. 610 * is invoked.
626 */ 611 */
627 efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
628 efi_phys.set_virtual_address_map = 612 efi_phys.set_virtual_address_map =
629 (efi_set_virtual_address_map_t *) 613 (efi_set_virtual_address_map_t *)
630 runtime->set_virtual_address_map; 614 runtime->set_virtual_address_map;
631 /* 615
632 * Make efi_get_time can be called before entering
633 * virtual mode.
634 */
635 efi.get_time = phys_efi_get_time;
636 early_iounmap(runtime, sizeof(efi_runtime_services_t)); 616 early_iounmap(runtime, sizeof(efi_runtime_services_t));
637 617
638 return 0; 618 return 0;
@@ -720,12 +700,10 @@ void __init efi_init(void)
720 efi_enabled = 0; 700 efi_enabled = 0;
721 return; 701 return;
722 } 702 }
723#ifdef CONFIG_X86_32
724 if (efi_native) { 703 if (efi_native) {
725 x86_platform.get_wallclock = efi_get_time; 704 x86_platform.get_wallclock = efi_get_time;
726 x86_platform.set_wallclock = efi_set_rtc_mmss; 705 x86_platform.set_wallclock = efi_set_rtc_mmss;
727 } 706 }
728#endif
729 707
730#if EFI_DEBUG 708#if EFI_DEBUG
731 print_efi_memmap(); 709 print_efi_memmap();
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 71b5d5a07d7b..b8b3a37c80cd 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1055,8 +1055,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
1055 * done. The returned pointer is valid till preemption is re-enabled. 1055 * done. The returned pointer is valid till preemption is re-enabled.
1056 */ 1056 */
1057const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 1057const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1058 struct mm_struct *mm, unsigned long va, 1058 struct mm_struct *mm, unsigned long start,
1059 unsigned int cpu) 1059 unsigned end, unsigned int cpu)
1060{ 1060{
1061 int locals = 0; 1061 int locals = 0;
1062 int remotes = 0; 1062 int remotes = 0;
@@ -1113,7 +1113,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1113 1113
1114 record_send_statistics(stat, locals, hubs, remotes, bau_desc); 1114 record_send_statistics(stat, locals, hubs, remotes, bau_desc);
1115 1115
1116 bau_desc->payload.address = va; 1116 bau_desc->payload.address = start;
1117 bau_desc->payload.sending_cpu = cpu; 1117 bau_desc->payload.sending_cpu = cpu;
1118 /* 1118 /*
1119 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1119 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 27336dfcda8e..b65a76133f4f 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1256,7 +1256,8 @@ static void xen_flush_tlb_single(unsigned long addr)
1256} 1256}
1257 1257
1258static void xen_flush_tlb_others(const struct cpumask *cpus, 1258static void xen_flush_tlb_others(const struct cpumask *cpus,
1259 struct mm_struct *mm, unsigned long va) 1259 struct mm_struct *mm, unsigned long start,
1260 unsigned long end)
1260{ 1261{
1261 struct { 1262 struct {
1262 struct mmuext_op op; 1263 struct mmuext_op op;
@@ -1268,7 +1269,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1268 } *args; 1269 } *args;
1269 struct multicall_space mcs; 1270 struct multicall_space mcs;
1270 1271
1271 trace_xen_mmu_flush_tlb_others(cpus, mm, va); 1272 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1272 1273
1273 if (cpumask_empty(cpus)) 1274 if (cpumask_empty(cpus))
1274 return; /* nothing to do */ 1275 return; /* nothing to do */
@@ -1281,11 +1282,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
1281 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); 1282 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1282 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); 1283 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1283 1284
1284 if (va == TLB_FLUSH_ALL) { 1285 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1285 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 1286 if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1286 } else {
1287 args->op.cmd = MMUEXT_INVLPG_MULTI; 1287 args->op.cmd = MMUEXT_INVLPG_MULTI;
1288 args->op.arg1.linear_addr = va; 1288 args->op.arg1.linear_addr = start;
1289 } 1289 }
1290 1290
1291 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 1291 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);