diff options
Diffstat (limited to 'arch/x86')
57 files changed, 4004 insertions, 1650 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index e46c2147397f..b322f124ee3c 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -129,6 +129,25 @@ config DOUBLEFAULT | |||
129 | option saves about 4k and might cause you much additional grey | 129 | option saves about 4k and might cause you much additional grey |
130 | hair. | 130 | hair. |
131 | 131 | ||
132 | config DEBUG_TLBFLUSH | ||
133 | bool "Set upper limit of TLB entries to flush one-by-one" | ||
134 | depends on DEBUG_KERNEL && (X86_64 || X86_INVLPG) | ||
135 | ---help--- | ||
136 | |||
137 | X86-only for now. | ||
138 | |||
139 | This option allows the user to tune the amount of TLB entries the | ||
140 | kernel flushes one-by-one instead of doing a full TLB flush. In | ||
141 | certain situations, the former is cheaper. This is controlled by the | ||
142 | tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it | ||
143 | to -1, the code flushes the whole TLB unconditionally. Otherwise, | ||
144 | for positive values of it, the kernel will use single TLB entry | ||
145 | invalidating instructions according to the following formula: | ||
146 | |||
147 | flush_entries <= active_tlb_entries / 2^tlb_flushall_shift | ||
148 | |||
149 | If in doubt, say "N". | ||
150 | |||
132 | config IOMMU_DEBUG | 151 | config IOMMU_DEBUG |
133 | bool "Enable IOMMU debugging" | 152 | bool "Enable IOMMU debugging" |
134 | depends on GART_IOMMU && DEBUG_KERNEL | 153 | depends on GART_IOMMU && DEBUG_KERNEL |
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index cb62f786990d..10f6b1178c68 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -1,5 +1,7 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | static unsigned long fs; | 5 | static unsigned long fs; |
4 | static inline void set_fs(unsigned long seg) | 6 | static inline void set_fs(unsigned long seg) |
5 | { | 7 | { |
@@ -19,3 +21,5 @@ int cmdline_find_option_bool(const char *option) | |||
19 | { | 21 | { |
20 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | 22 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); |
21 | } | 23 | } |
24 | |||
25 | #endif | ||
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c index 261e81fb9582..d3d003cb5481 100644 --- a/arch/x86/boot/compressed/early_serial_console.c +++ b/arch/x86/boot/compressed/early_serial_console.c | |||
@@ -1,5 +1,9 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | ||
4 | |||
3 | int early_serial_base; | 5 | int early_serial_base; |
4 | 6 | ||
5 | #include "../early_serial_console.c" | 7 | #include "../early_serial_console.c" |
8 | |||
9 | #endif | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 4e85f5f85837..b3e0227df2c9 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -729,32 +729,68 @@ fail: | |||
729 | * need to create one ourselves (usually the bootloader would create | 729 | * need to create one ourselves (usually the bootloader would create |
730 | * one for us). | 730 | * one for us). |
731 | */ | 731 | */ |
732 | static efi_status_t make_boot_params(struct boot_params *boot_params, | 732 | struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) |
733 | efi_loaded_image_t *image, | ||
734 | void *handle) | ||
735 | { | 733 | { |
736 | struct efi_info *efi = &boot_params->efi_info; | 734 | struct boot_params *boot_params; |
737 | struct apm_bios_info *bi = &boot_params->apm_bios_info; | 735 | struct sys_desc_table *sdt; |
738 | struct sys_desc_table *sdt = &boot_params->sys_desc_table; | 736 | struct apm_bios_info *bi; |
739 | struct e820entry *e820_map = &boot_params->e820_map[0]; | 737 | struct setup_header *hdr; |
740 | struct e820entry *prev = NULL; | 738 | struct efi_info *efi; |
741 | struct setup_header *hdr = &boot_params->hdr; | 739 | efi_loaded_image_t *image; |
742 | unsigned long size, key, desc_size, _size; | 740 | void *options; |
743 | efi_memory_desc_t *mem_map; | 741 | u32 load_options_size; |
744 | void *options = image->load_options; | 742 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; |
745 | u32 load_options_size = image->load_options_size / 2; /* ASCII */ | ||
746 | int options_size = 0; | 743 | int options_size = 0; |
747 | efi_status_t status; | 744 | efi_status_t status; |
748 | __u32 desc_version; | ||
749 | unsigned long cmdline; | 745 | unsigned long cmdline; |
750 | u8 nr_entries; | ||
751 | u16 *s2; | 746 | u16 *s2; |
752 | u8 *s1; | 747 | u8 *s1; |
753 | int i; | 748 | int i; |
754 | 749 | ||
750 | sys_table = _table; | ||
751 | |||
752 | /* Check if we were booted by the EFI firmware */ | ||
753 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
754 | return NULL; | ||
755 | |||
756 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
757 | handle, &proto, (void *)&image); | ||
758 | if (status != EFI_SUCCESS) { | ||
759 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
760 | return NULL; | ||
761 | } | ||
762 | |||
763 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
764 | if (status != EFI_SUCCESS) { | ||
765 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
766 | return NULL; | ||
767 | } | ||
768 | |||
769 | memset(boot_params, 0x0, 0x4000); | ||
770 | |||
771 | hdr = &boot_params->hdr; | ||
772 | efi = &boot_params->efi_info; | ||
773 | bi = &boot_params->apm_bios_info; | ||
774 | sdt = &boot_params->sys_desc_table; | ||
775 | |||
776 | /* Copy the second sector to boot_params */ | ||
777 | memcpy(&hdr->jump, image->image_base + 512, 512); | ||
778 | |||
779 | /* | ||
780 | * Fill out some of the header fields ourselves because the | ||
781 | * EFI firmware loader doesn't load the first sector. | ||
782 | */ | ||
783 | hdr->root_flags = 1; | ||
784 | hdr->vid_mode = 0xffff; | ||
785 | hdr->boot_flag = 0xAA55; | ||
786 | |||
787 | hdr->code32_start = (__u64)(unsigned long)image->image_base; | ||
788 | |||
755 | hdr->type_of_loader = 0x21; | 789 | hdr->type_of_loader = 0x21; |
756 | 790 | ||
757 | /* Convert unicode cmdline to ascii */ | 791 | /* Convert unicode cmdline to ascii */ |
792 | options = image->load_options; | ||
793 | load_options_size = image->load_options_size / 2; /* ASCII */ | ||
758 | cmdline = 0; | 794 | cmdline = 0; |
759 | s2 = (u16 *)options; | 795 | s2 = (u16 *)options; |
760 | 796 | ||
@@ -791,18 +827,36 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, | |||
791 | hdr->ramdisk_image = 0; | 827 | hdr->ramdisk_image = 0; |
792 | hdr->ramdisk_size = 0; | 828 | hdr->ramdisk_size = 0; |
793 | 829 | ||
794 | status = handle_ramdisks(image, hdr); | ||
795 | if (status != EFI_SUCCESS) | ||
796 | goto free_cmdline; | ||
797 | |||
798 | setup_graphics(boot_params); | ||
799 | |||
800 | /* Clear APM BIOS info */ | 830 | /* Clear APM BIOS info */ |
801 | memset(bi, 0, sizeof(*bi)); | 831 | memset(bi, 0, sizeof(*bi)); |
802 | 832 | ||
803 | memset(sdt, 0, sizeof(*sdt)); | 833 | memset(sdt, 0, sizeof(*sdt)); |
804 | 834 | ||
805 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | 835 | status = handle_ramdisks(image, hdr); |
836 | if (status != EFI_SUCCESS) | ||
837 | goto fail2; | ||
838 | |||
839 | return boot_params; | ||
840 | fail2: | ||
841 | if (options_size) | ||
842 | low_free(options_size, hdr->cmd_line_ptr); | ||
843 | fail: | ||
844 | low_free(0x4000, (unsigned long)boot_params); | ||
845 | return NULL; | ||
846 | } | ||
847 | |||
848 | static efi_status_t exit_boot(struct boot_params *boot_params, | ||
849 | void *handle) | ||
850 | { | ||
851 | struct efi_info *efi = &boot_params->efi_info; | ||
852 | struct e820entry *e820_map = &boot_params->e820_map[0]; | ||
853 | struct e820entry *prev = NULL; | ||
854 | unsigned long size, key, desc_size, _size; | ||
855 | efi_memory_desc_t *mem_map; | ||
856 | efi_status_t status; | ||
857 | __u32 desc_version; | ||
858 | u8 nr_entries; | ||
859 | int i; | ||
806 | 860 | ||
807 | size = sizeof(*mem_map) * 32; | 861 | size = sizeof(*mem_map) * 32; |
808 | 862 | ||
@@ -811,7 +865,7 @@ again: | |||
811 | _size = size; | 865 | _size = size; |
812 | status = low_alloc(size, 1, (unsigned long *)&mem_map); | 866 | status = low_alloc(size, 1, (unsigned long *)&mem_map); |
813 | if (status != EFI_SUCCESS) | 867 | if (status != EFI_SUCCESS) |
814 | goto free_cmdline; | 868 | return status; |
815 | 869 | ||
816 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, | 870 | status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, |
817 | mem_map, &key, &desc_size, &desc_version); | 871 | mem_map, &key, &desc_size, &desc_version); |
@@ -823,6 +877,7 @@ again: | |||
823 | if (status != EFI_SUCCESS) | 877 | if (status != EFI_SUCCESS) |
824 | goto free_mem_map; | 878 | goto free_mem_map; |
825 | 879 | ||
880 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | ||
826 | efi->efi_systab = (unsigned long)sys_table; | 881 | efi->efi_systab = (unsigned long)sys_table; |
827 | efi->efi_memdesc_size = desc_size; | 882 | efi->efi_memdesc_size = desc_size; |
828 | efi->efi_memdesc_version = desc_version; | 883 | efi->efi_memdesc_version = desc_version; |
@@ -906,61 +961,13 @@ again: | |||
906 | 961 | ||
907 | free_mem_map: | 962 | free_mem_map: |
908 | low_free(_size, (unsigned long)mem_map); | 963 | low_free(_size, (unsigned long)mem_map); |
909 | free_cmdline: | ||
910 | if (options_size) | ||
911 | low_free(options_size, hdr->cmd_line_ptr); | ||
912 | fail: | ||
913 | return status; | 964 | return status; |
914 | } | 965 | } |
915 | 966 | ||
916 | /* | 967 | static efi_status_t relocate_kernel(struct setup_header *hdr) |
917 | * On success we return a pointer to a boot_params structure, and NULL | ||
918 | * on failure. | ||
919 | */ | ||
920 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | ||
921 | { | 968 | { |
922 | struct boot_params *boot_params; | ||
923 | unsigned long start, nr_pages; | 969 | unsigned long start, nr_pages; |
924 | struct desc_ptr *gdt, *idt; | ||
925 | efi_loaded_image_t *image; | ||
926 | struct setup_header *hdr; | ||
927 | efi_status_t status; | 970 | efi_status_t status; |
928 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; | ||
929 | struct desc_struct *desc; | ||
930 | |||
931 | sys_table = _table; | ||
932 | |||
933 | /* Check if we were booted by the EFI firmware */ | ||
934 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
935 | goto fail; | ||
936 | |||
937 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
938 | handle, &proto, (void *)&image); | ||
939 | if (status != EFI_SUCCESS) { | ||
940 | efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | ||
941 | goto fail; | ||
942 | } | ||
943 | |||
944 | status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); | ||
945 | if (status != EFI_SUCCESS) { | ||
946 | efi_printk("Failed to alloc lowmem for boot params\n"); | ||
947 | goto fail; | ||
948 | } | ||
949 | |||
950 | memset(boot_params, 0x0, 0x4000); | ||
951 | |||
952 | hdr = &boot_params->hdr; | ||
953 | |||
954 | /* Copy the second sector to boot_params */ | ||
955 | memcpy(&hdr->jump, image->image_base + 512, 512); | ||
956 | |||
957 | /* | ||
958 | * Fill out some of the header fields ourselves because the | ||
959 | * EFI firmware loader doesn't load the first sector. | ||
960 | */ | ||
961 | hdr->root_flags = 1; | ||
962 | hdr->vid_mode = 0xffff; | ||
963 | hdr->boot_flag = 0xAA55; | ||
964 | 971 | ||
965 | /* | 972 | /* |
966 | * The EFI firmware loader could have placed the kernel image | 973 | * The EFI firmware loader could have placed the kernel image |
@@ -978,16 +985,40 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
978 | if (status != EFI_SUCCESS) { | 985 | if (status != EFI_SUCCESS) { |
979 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, | 986 | status = low_alloc(hdr->init_size, hdr->kernel_alignment, |
980 | &start); | 987 | &start); |
981 | if (status != EFI_SUCCESS) { | 988 | if (status != EFI_SUCCESS) |
982 | efi_printk("Failed to alloc mem for kernel\n"); | 989 | efi_printk("Failed to alloc mem for kernel\n"); |
983 | goto fail; | ||
984 | } | ||
985 | } | 990 | } |
986 | 991 | ||
992 | if (status == EFI_SUCCESS) | ||
993 | memcpy((void *)start, (void *)(unsigned long)hdr->code32_start, | ||
994 | hdr->init_size); | ||
995 | |||
996 | hdr->pref_address = hdr->code32_start; | ||
987 | hdr->code32_start = (__u32)start; | 997 | hdr->code32_start = (__u32)start; |
988 | hdr->pref_address = (__u64)(unsigned long)image->image_base; | ||
989 | 998 | ||
990 | memcpy((void *)start, image->image_base, image->image_size); | 999 | return status; |
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * On success we return a pointer to a boot_params structure, and NULL | ||
1004 | * on failure. | ||
1005 | */ | ||
1006 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | ||
1007 | struct boot_params *boot_params) | ||
1008 | { | ||
1009 | struct desc_ptr *gdt, *idt; | ||
1010 | efi_loaded_image_t *image; | ||
1011 | struct setup_header *hdr = &boot_params->hdr; | ||
1012 | efi_status_t status; | ||
1013 | struct desc_struct *desc; | ||
1014 | |||
1015 | sys_table = _table; | ||
1016 | |||
1017 | /* Check if we were booted by the EFI firmware */ | ||
1018 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
1019 | goto fail; | ||
1020 | |||
1021 | setup_graphics(boot_params); | ||
991 | 1022 | ||
992 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1023 | status = efi_call_phys3(sys_table->boottime->allocate_pool, |
993 | EFI_LOADER_DATA, sizeof(*gdt), | 1024 | EFI_LOADER_DATA, sizeof(*gdt), |
@@ -1015,7 +1046,18 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) | |||
1015 | idt->size = 0; | 1046 | idt->size = 0; |
1016 | idt->address = 0; | 1047 | idt->address = 0; |
1017 | 1048 | ||
1018 | status = make_boot_params(boot_params, image, handle); | 1049 | /* |
1050 | * If the kernel isn't already loaded at the preferred load | ||
1051 | * address, relocate it. | ||
1052 | */ | ||
1053 | if (hdr->pref_address != hdr->code32_start) { | ||
1054 | status = relocate_kernel(hdr); | ||
1055 | |||
1056 | if (status != EFI_SUCCESS) | ||
1057 | goto fail; | ||
1058 | } | ||
1059 | |||
1060 | status = exit_boot(boot_params, handle); | ||
1019 | if (status != EFI_SUCCESS) | 1061 | if (status != EFI_SUCCESS) |
1020 | goto fail; | 1062 | goto fail; |
1021 | 1063 | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index c85e3ac99bba..aa4aaf1b2380 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -42,6 +42,16 @@ ENTRY(startup_32) | |||
42 | */ | 42 | */ |
43 | add $0x4, %esp | 43 | add $0x4, %esp |
44 | 44 | ||
45 | call make_boot_params | ||
46 | cmpl $0, %eax | ||
47 | je 1f | ||
48 | movl 0x4(%esp), %esi | ||
49 | movl (%esp), %ecx | ||
50 | pushl %eax | ||
51 | pushl %esi | ||
52 | pushl %ecx | ||
53 | |||
54 | .org 0x30,0x90 | ||
45 | call efi_main | 55 | call efi_main |
46 | cmpl $0, %eax | 56 | cmpl $0, %eax |
47 | movl %eax, %esi | 57 | movl %eax, %esi |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 87e03a13d8e3..2c4b171eec33 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -209,6 +209,16 @@ ENTRY(startup_64) | |||
209 | .org 0x210 | 209 | .org 0x210 |
210 | mov %rcx, %rdi | 210 | mov %rcx, %rdi |
211 | mov %rdx, %rsi | 211 | mov %rdx, %rsi |
212 | pushq %rdi | ||
213 | pushq %rsi | ||
214 | call make_boot_params | ||
215 | cmpq $0,%rax | ||
216 | je 1f | ||
217 | mov %rax, %rdx | ||
218 | popq %rsi | ||
219 | popq %rdi | ||
220 | |||
221 | .org 0x230,0x90 | ||
212 | call efi_main | 222 | call efi_main |
213 | movq %rax,%rsi | 223 | movq %rax,%rsi |
214 | cmpq $0,%rax | 224 | cmpq $0,%rax |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 7116dcba0c9e..88f7ff6da404 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -108,8 +108,6 @@ static void error(char *m); | |||
108 | * This is set up by the setup-routine at boot-time | 108 | * This is set up by the setup-routine at boot-time |
109 | */ | 109 | */ |
110 | struct boot_params *real_mode; /* Pointer to real-mode data */ | 110 | struct boot_params *real_mode; /* Pointer to real-mode data */ |
111 | static int quiet; | ||
112 | static int debug; | ||
113 | 111 | ||
114 | void *memset(void *s, int c, size_t n); | 112 | void *memset(void *s, int c, size_t n); |
115 | void *memcpy(void *dest, const void *src, size_t n); | 113 | void *memcpy(void *dest, const void *src, size_t n); |
@@ -170,15 +168,11 @@ static void serial_putchar(int ch) | |||
170 | outb(ch, early_serial_base + TXR); | 168 | outb(ch, early_serial_base + TXR); |
171 | } | 169 | } |
172 | 170 | ||
173 | void __putstr(int error, const char *s) | 171 | void __putstr(const char *s) |
174 | { | 172 | { |
175 | int x, y, pos; | 173 | int x, y, pos; |
176 | char c; | 174 | char c; |
177 | 175 | ||
178 | #ifndef CONFIG_X86_VERBOSE_BOOTUP | ||
179 | if (!error) | ||
180 | return; | ||
181 | #endif | ||
182 | if (early_serial_base) { | 176 | if (early_serial_base) { |
183 | const char *str = s; | 177 | const char *str = s; |
184 | while (*str) { | 178 | while (*str) { |
@@ -265,9 +259,9 @@ void *memcpy(void *dest, const void *src, size_t n) | |||
265 | 259 | ||
266 | static void error(char *x) | 260 | static void error(char *x) |
267 | { | 261 | { |
268 | __putstr(1, "\n\n"); | 262 | error_putstr("\n\n"); |
269 | __putstr(1, x); | 263 | error_putstr(x); |
270 | __putstr(1, "\n\n -- System halted"); | 264 | error_putstr("\n\n -- System halted"); |
271 | 265 | ||
272 | while (1) | 266 | while (1) |
273 | asm("hlt"); | 267 | asm("hlt"); |
@@ -294,8 +288,7 @@ static void parse_elf(void *output) | |||
294 | return; | 288 | return; |
295 | } | 289 | } |
296 | 290 | ||
297 | if (!quiet) | 291 | debug_putstr("Parsing ELF... "); |
298 | putstr("Parsing ELF... "); | ||
299 | 292 | ||
300 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); | 293 | phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); |
301 | if (!phdrs) | 294 | if (!phdrs) |
@@ -332,11 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
332 | { | 325 | { |
333 | real_mode = rmode; | 326 | real_mode = rmode; |
334 | 327 | ||
335 | if (cmdline_find_option_bool("quiet")) | ||
336 | quiet = 1; | ||
337 | if (cmdline_find_option_bool("debug")) | ||
338 | debug = 1; | ||
339 | |||
340 | if (real_mode->screen_info.orig_video_mode == 7) { | 328 | if (real_mode->screen_info.orig_video_mode == 7) { |
341 | vidmem = (char *) 0xb0000; | 329 | vidmem = (char *) 0xb0000; |
342 | vidport = 0x3b4; | 330 | vidport = 0x3b4; |
@@ -349,8 +337,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
349 | cols = real_mode->screen_info.orig_video_cols; | 337 | cols = real_mode->screen_info.orig_video_cols; |
350 | 338 | ||
351 | console_init(); | 339 | console_init(); |
352 | if (debug) | 340 | debug_putstr("early console in decompress_kernel\n"); |
353 | putstr("early console in decompress_kernel\n"); | ||
354 | 341 | ||
355 | free_mem_ptr = heap; /* Heap */ | 342 | free_mem_ptr = heap; /* Heap */ |
356 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 343 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
@@ -369,11 +356,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
369 | error("Wrong destination address"); | 356 | error("Wrong destination address"); |
370 | #endif | 357 | #endif |
371 | 358 | ||
372 | if (!quiet) | 359 | debug_putstr("\nDecompressing Linux... "); |
373 | putstr("\nDecompressing Linux... "); | ||
374 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 360 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); |
375 | parse_elf(output); | 361 | parse_elf(output); |
376 | if (!quiet) | 362 | debug_putstr("done.\nBooting the kernel.\n"); |
377 | putstr("done.\nBooting the kernel.\n"); | ||
378 | return; | 363 | return; |
379 | } | 364 | } |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3f19c81a6203..0e6dc0ee0eea 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -24,9 +24,21 @@ | |||
24 | 24 | ||
25 | /* misc.c */ | 25 | /* misc.c */ |
26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 26 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ |
27 | void __putstr(int error, const char *s); | 27 | void __putstr(const char *s); |
28 | #define putstr(__x) __putstr(0, __x) | 28 | #define error_putstr(__x) __putstr(__x) |
29 | #define puts(__x) __putstr(0, __x) | 29 | |
30 | #ifdef CONFIG_X86_VERBOSE_BOOTUP | ||
31 | |||
32 | #define debug_putstr(__x) __putstr(__x) | ||
33 | |||
34 | #else | ||
35 | |||
36 | static inline void debug_putstr(const char *s) | ||
37 | { } | ||
38 | |||
39 | #endif | ||
40 | |||
41 | #ifdef CONFIG_EARLY_PRINTK | ||
30 | 42 | ||
31 | /* cmdline.c */ | 43 | /* cmdline.c */ |
32 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 44 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
@@ -36,4 +48,13 @@ int cmdline_find_option_bool(const char *option); | |||
36 | extern int early_serial_base; | 48 | extern int early_serial_base; |
37 | void console_init(void); | 49 | void console_init(void); |
38 | 50 | ||
51 | #else | ||
52 | |||
53 | /* early_serial_console.c */ | ||
54 | static const int early_serial_base; | ||
55 | static inline void console_init(void) | ||
56 | { } | ||
57 | |||
58 | #endif | ||
59 | |||
39 | #endif | 60 | #endif |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index efe5acfc79c3..b4e15dd6786a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -283,7 +283,7 @@ _start: | |||
283 | # Part 2 of the header, from the old setup.S | 283 | # Part 2 of the header, from the old setup.S |
284 | 284 | ||
285 | .ascii "HdrS" # header signature | 285 | .ascii "HdrS" # header signature |
286 | .word 0x020a # header version number (>= 0x0105) | 286 | .word 0x020b # header version number (>= 0x0105) |
287 | # or else old loadlin-1.5 will fail) | 287 | # or else old loadlin-1.5 will fail) |
288 | .globl realmode_swtch | 288 | .globl realmode_swtch |
289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -401,18 +401,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | |||
401 | #define INIT_SIZE VO_INIT_SIZE | 401 | #define INIT_SIZE VO_INIT_SIZE |
402 | #endif | 402 | #endif |
403 | init_size: .long INIT_SIZE # kernel initialization size | 403 | init_size: .long INIT_SIZE # kernel initialization size |
404 | handover_offset: .long 0x30 # offset to the handover | ||
405 | # protocol entry point | ||
404 | 406 | ||
405 | # End of setup header ##################################################### | 407 | # End of setup header ##################################################### |
406 | 408 | ||
407 | .section ".entrytext", "ax" | 409 | .section ".entrytext", "ax" |
408 | start_of_setup: | 410 | start_of_setup: |
409 | #ifdef SAFE_RESET_DISK_CONTROLLER | ||
410 | # Reset the disk controller. | ||
411 | movw $0x0000, %ax # Reset disk controller | ||
412 | movb $0x80, %dl # All disks | ||
413 | int $0x13 | ||
414 | #endif | ||
415 | |||
416 | # Force %es = %ds | 411 | # Force %es = %ds |
417 | movw %ds, %ax | 412 | movw %ds, %ax |
418 | movw %ax, %es | 413 | movw %ax, %es |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac048b59..e908e5de82d3 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,9 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | ||
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | ||
7 | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 8 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 9 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 10 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 15 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 16 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 17 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
18 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
15 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
16 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o |
21 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 22 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
18 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 23 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
19 | 24 | ||
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | |||
30 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 35 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
31 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 36 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
32 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 37 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
38 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
33 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 39 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
34 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | 40 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
41 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | ||
35 | 42 | ||
36 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 43 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
37 | |||
38 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 44 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
39 | |||
40 | # enable AVX support only when $(AS) can actually assemble the instructions | ||
41 | ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) | ||
42 | AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
43 | CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
44 | endif | ||
45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c new file mode 100644 index 000000000000..43282fe04a8b --- /dev/null +++ b/arch/x86/crypto/ablk_helper.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <crypto/algapi.h> | ||
32 | #include <crypto/cryptd.h> | ||
33 | #include <asm/i387.h> | ||
34 | #include <asm/crypto/ablk_helper.h> | ||
35 | |||
36 | int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
37 | unsigned int key_len) | ||
38 | { | ||
39 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
40 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
41 | int err; | ||
42 | |||
43 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
44 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
45 | & CRYPTO_TFM_REQ_MASK); | ||
46 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
47 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
48 | & CRYPTO_TFM_RES_MASK); | ||
49 | return err; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(ablk_set_key); | ||
52 | |||
53 | int __ablk_encrypt(struct ablkcipher_request *req) | ||
54 | { | ||
55 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
56 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
57 | struct blkcipher_desc desc; | ||
58 | |||
59 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
60 | desc.info = req->info; | ||
61 | desc.flags = 0; | ||
62 | |||
63 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
64 | &desc, req->dst, req->src, req->nbytes); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__ablk_encrypt); | ||
67 | |||
68 | int ablk_encrypt(struct ablkcipher_request *req) | ||
69 | { | ||
70 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
71 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
72 | |||
73 | if (!irq_fpu_usable()) { | ||
74 | struct ablkcipher_request *cryptd_req = | ||
75 | ablkcipher_request_ctx(req); | ||
76 | |||
77 | memcpy(cryptd_req, req, sizeof(*req)); | ||
78 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
79 | |||
80 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
81 | } else { | ||
82 | return __ablk_encrypt(req); | ||
83 | } | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(ablk_encrypt); | ||
86 | |||
87 | int ablk_decrypt(struct ablkcipher_request *req) | ||
88 | { | ||
89 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
90 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
91 | |||
92 | if (!irq_fpu_usable()) { | ||
93 | struct ablkcipher_request *cryptd_req = | ||
94 | ablkcipher_request_ctx(req); | ||
95 | |||
96 | memcpy(cryptd_req, req, sizeof(*req)); | ||
97 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
98 | |||
99 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
100 | } else { | ||
101 | struct blkcipher_desc desc; | ||
102 | |||
103 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
104 | desc.info = req->info; | ||
105 | desc.flags = 0; | ||
106 | |||
107 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
108 | &desc, req->dst, req->src, req->nbytes); | ||
109 | } | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(ablk_decrypt); | ||
112 | |||
113 | void ablk_exit(struct crypto_tfm *tfm) | ||
114 | { | ||
115 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
116 | |||
117 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ablk_exit); | ||
120 | |||
121 | int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
122 | { | ||
123 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
124 | struct cryptd_ablkcipher *cryptd_tfm; | ||
125 | |||
126 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
127 | if (IS_ERR(cryptd_tfm)) | ||
128 | return PTR_ERR(cryptd_tfm); | ||
129 | |||
130 | ctx->cryptd_tfm = cryptd_tfm; | ||
131 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
132 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(ablk_init_common); | ||
137 | |||
138 | int ablk_init(struct crypto_tfm *tfm) | ||
139 | { | ||
140 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
141 | |||
142 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
143 | crypto_tfm_alg_driver_name(tfm)); | ||
144 | |||
145 | return ablk_init_common(tfm, drv_name); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(ablk_init); | ||
148 | |||
149 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 8efcf42a9d7e..59b37deb8c8d 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <crypto/aes.h> | 7 | #include <crypto/aes.h> |
8 | #include <asm/aes.h> | 8 | #include <asm/crypto/aes.h> |
9 | 9 | ||
10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ac7f5cd019e8..34fdcff4d2c8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -30,7 +30,8 @@ | |||
30 | #include <crypto/ctr.h> | 30 | #include <crypto/ctr.h> |
31 | #include <asm/cpu_device_id.h> | 31 | #include <asm/cpu_device_id.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/aes.h> | 33 | #include <asm/crypto/aes.h> |
34 | #include <asm/crypto/ablk_helper.h> | ||
34 | #include <crypto/scatterwalk.h> | 35 | #include <crypto/scatterwalk.h> |
35 | #include <crypto/internal/aead.h> | 36 | #include <crypto/internal/aead.h> |
36 | #include <linux/workqueue.h> | 37 | #include <linux/workqueue.h> |
@@ -52,10 +53,6 @@ | |||
52 | #define HAS_XTS | 53 | #define HAS_XTS |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | struct async_aes_ctx { | ||
56 | struct cryptd_ablkcipher *cryptd_tfm; | ||
57 | }; | ||
58 | |||
59 | /* This data is stored at the end of the crypto_tfm struct. | 56 | /* This data is stored at the end of the crypto_tfm struct. |
60 | * It's a type of per "session" data storage location. | 57 | * It's a type of per "session" data storage location. |
61 | * This needs to be 16 byte aligned. | 58 | * This needs to be 16 byte aligned. |
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
377 | } | 374 | } |
378 | #endif | 375 | #endif |
379 | 376 | ||
380 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
381 | unsigned int key_len) | ||
382 | { | ||
383 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
384 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
385 | int err; | ||
386 | |||
387 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
388 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
389 | & CRYPTO_TFM_REQ_MASK); | ||
390 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
391 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
392 | & CRYPTO_TFM_RES_MASK); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
397 | { | ||
398 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
399 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
400 | |||
401 | if (!irq_fpu_usable()) { | ||
402 | struct ablkcipher_request *cryptd_req = | ||
403 | ablkcipher_request_ctx(req); | ||
404 | memcpy(cryptd_req, req, sizeof(*req)); | ||
405 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
406 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
407 | } else { | ||
408 | struct blkcipher_desc desc; | ||
409 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
410 | desc.info = req->info; | ||
411 | desc.flags = 0; | ||
412 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
413 | &desc, req->dst, req->src, req->nbytes); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
418 | { | ||
419 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
420 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
421 | |||
422 | if (!irq_fpu_usable()) { | ||
423 | struct ablkcipher_request *cryptd_req = | ||
424 | ablkcipher_request_ctx(req); | ||
425 | memcpy(cryptd_req, req, sizeof(*req)); | ||
426 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
427 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
428 | } else { | ||
429 | struct blkcipher_desc desc; | ||
430 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
431 | desc.info = req->info; | ||
432 | desc.flags = 0; | ||
433 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
434 | &desc, req->dst, req->src, req->nbytes); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | static void ablk_exit(struct crypto_tfm *tfm) | ||
439 | { | ||
440 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
441 | |||
442 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
443 | } | ||
444 | |||
445 | static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
446 | { | ||
447 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
448 | struct cryptd_ablkcipher *cryptd_tfm; | ||
449 | |||
450 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
451 | if (IS_ERR(cryptd_tfm)) | ||
452 | return PTR_ERR(cryptd_tfm); | ||
453 | |||
454 | ctx->cryptd_tfm = cryptd_tfm; | ||
455 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
456 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static int ablk_ecb_init(struct crypto_tfm *tfm) | 377 | static int ablk_ecb_init(struct crypto_tfm *tfm) |
462 | { | 378 | { |
463 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); | 379 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); |
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
613 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 529 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); |
614 | struct aesni_rfc4106_gcm_ctx *child_ctx = | 530 | struct aesni_rfc4106_gcm_ctx *child_ctx = |
615 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | 531 | aesni_rfc4106_gcm_ctx_get(cryptd_child); |
616 | u8 *new_key_mem = NULL; | 532 | u8 *new_key_align, *new_key_mem = NULL; |
617 | 533 | ||
618 | if (key_len < 4) { | 534 | if (key_len < 4) { |
619 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 535 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
637 | if (!new_key_mem) | 553 | if (!new_key_mem) |
638 | return -ENOMEM; | 554 | return -ENOMEM; |
639 | 555 | ||
640 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | 556 | new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); |
641 | memcpy(new_key_mem, key, key_len); | 557 | memcpy(new_key_align, key, key_len); |
642 | key = new_key_mem; | 558 | key = new_key_align; |
643 | } | 559 | } |
644 | 560 | ||
645 | if (!irq_fpu_usable()) | 561 | if (!irq_fpu_usable()) |
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
968 | .cra_priority = 400, | 884 | .cra_priority = 400, |
969 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 885 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
970 | .cra_blocksize = AES_BLOCK_SIZE, | 886 | .cra_blocksize = AES_BLOCK_SIZE, |
971 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 887 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
972 | .cra_alignmask = 0, | 888 | .cra_alignmask = 0, |
973 | .cra_type = &crypto_ablkcipher_type, | 889 | .cra_type = &crypto_ablkcipher_type, |
974 | .cra_module = THIS_MODULE, | 890 | .cra_module = THIS_MODULE, |
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
989 | .cra_priority = 400, | 905 | .cra_priority = 400, |
990 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 906 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
991 | .cra_blocksize = AES_BLOCK_SIZE, | 907 | .cra_blocksize = AES_BLOCK_SIZE, |
992 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 908 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
993 | .cra_alignmask = 0, | 909 | .cra_alignmask = 0, |
994 | .cra_type = &crypto_ablkcipher_type, | 910 | .cra_type = &crypto_ablkcipher_type, |
995 | .cra_module = THIS_MODULE, | 911 | .cra_module = THIS_MODULE, |
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1033 | .cra_priority = 400, | 949 | .cra_priority = 400, |
1034 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 950 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1035 | .cra_blocksize = 1, | 951 | .cra_blocksize = 1, |
1036 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 952 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1037 | .cra_alignmask = 0, | 953 | .cra_alignmask = 0, |
1038 | .cra_type = &crypto_ablkcipher_type, | 954 | .cra_type = &crypto_ablkcipher_type, |
1039 | .cra_module = THIS_MODULE, | 955 | .cra_module = THIS_MODULE, |
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1098 | .cra_priority = 400, | 1014 | .cra_priority = 400, |
1099 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1015 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1100 | .cra_blocksize = 1, | 1016 | .cra_blocksize = 1, |
1101 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1017 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1102 | .cra_alignmask = 0, | 1018 | .cra_alignmask = 0, |
1103 | .cra_type = &crypto_ablkcipher_type, | 1019 | .cra_type = &crypto_ablkcipher_type, |
1104 | .cra_module = THIS_MODULE, | 1020 | .cra_module = THIS_MODULE, |
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1126 | .cra_priority = 400, | 1042 | .cra_priority = 400, |
1127 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1043 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1128 | .cra_blocksize = AES_BLOCK_SIZE, | 1044 | .cra_blocksize = AES_BLOCK_SIZE, |
1129 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1045 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1130 | .cra_alignmask = 0, | 1046 | .cra_alignmask = 0, |
1131 | .cra_type = &crypto_ablkcipher_type, | 1047 | .cra_type = &crypto_ablkcipher_type, |
1132 | .cra_module = THIS_MODULE, | 1048 | .cra_module = THIS_MODULE, |
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1150 | .cra_priority = 400, | 1066 | .cra_priority = 400, |
1151 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1067 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1152 | .cra_blocksize = AES_BLOCK_SIZE, | 1068 | .cra_blocksize = AES_BLOCK_SIZE, |
1153 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1069 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1154 | .cra_alignmask = 0, | 1070 | .cra_alignmask = 0, |
1155 | .cra_type = &crypto_ablkcipher_type, | 1071 | .cra_type = &crypto_ablkcipher_type, |
1156 | .cra_module = THIS_MODULE, | 1072 | .cra_module = THIS_MODULE, |
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1174 | .cra_priority = 400, | 1090 | .cra_priority = 400, |
1175 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1091 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1176 | .cra_blocksize = AES_BLOCK_SIZE, | 1092 | .cra_blocksize = AES_BLOCK_SIZE, |
1177 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1093 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1178 | .cra_alignmask = 0, | 1094 | .cra_alignmask = 0, |
1179 | .cra_type = &crypto_ablkcipher_type, | 1095 | .cra_type = &crypto_ablkcipher_type, |
1180 | .cra_module = THIS_MODULE, | 1096 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3306dc0b139e..eeb2b3b743e9 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c | |||
@@ -5,10 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Camellia parts based on code by: | 6 | * Camellia parts based on code by: |
7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) | 7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) |
8 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
9 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
10 | * CTR part based on code (crypto/ctr.c) by: | ||
11 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
12 | * | 8 | * |
13 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,9 +30,9 @@ | |||
34 | #include <linux/module.h> | 30 | #include <linux/module.h> |
35 | #include <linux/types.h> | 31 | #include <linux/types.h> |
36 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
37 | #include <crypto/b128ops.h> | ||
38 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
39 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
35 | #include <asm/crypto/glue_helper.h> | ||
40 | 36 | ||
41 | #define CAMELLIA_MIN_KEY_SIZE 16 | 37 | #define CAMELLIA_MIN_KEY_SIZE 16 |
42 | #define CAMELLIA_MAX_KEY_SIZE 32 | 38 | #define CAMELLIA_MAX_KEY_SIZE 32 |
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | |||
1312 | &tfm->crt_flags); | 1308 | &tfm->crt_flags); |
1313 | } | 1309 | } |
1314 | 1310 | ||
1315 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 1311 | static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) |
1316 | void (*fn)(struct camellia_ctx *, u8 *, const u8 *), | ||
1317 | void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) | ||
1318 | { | 1312 | { |
1319 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1313 | u128 iv = *src; |
1320 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1321 | unsigned int nbytes; | ||
1322 | int err; | ||
1323 | |||
1324 | err = blkcipher_walk_virt(desc, walk); | ||
1325 | |||
1326 | while ((nbytes = walk->nbytes)) { | ||
1327 | u8 *wsrc = walk->src.virt.addr; | ||
1328 | u8 *wdst = walk->dst.virt.addr; | ||
1329 | |||
1330 | /* Process two block batch */ | ||
1331 | if (nbytes >= bsize * 2) { | ||
1332 | do { | ||
1333 | fn_2way(ctx, wdst, wsrc); | ||
1334 | |||
1335 | wsrc += bsize * 2; | ||
1336 | wdst += bsize * 2; | ||
1337 | nbytes -= bsize * 2; | ||
1338 | } while (nbytes >= bsize * 2); | ||
1339 | |||
1340 | if (nbytes < bsize) | ||
1341 | goto done; | ||
1342 | } | ||
1343 | |||
1344 | /* Handle leftovers */ | ||
1345 | do { | ||
1346 | fn(ctx, wdst, wsrc); | ||
1347 | |||
1348 | wsrc += bsize; | ||
1349 | wdst += bsize; | ||
1350 | nbytes -= bsize; | ||
1351 | } while (nbytes >= bsize); | ||
1352 | |||
1353 | done: | ||
1354 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
1355 | } | ||
1356 | |||
1357 | return err; | ||
1358 | } | ||
1359 | |||
1360 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1361 | struct scatterlist *src, unsigned int nbytes) | ||
1362 | { | ||
1363 | struct blkcipher_walk walk; | ||
1364 | |||
1365 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1366 | return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); | ||
1367 | } | ||
1368 | 1314 | ||
1369 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1315 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); |
1370 | struct scatterlist *src, unsigned int nbytes) | ||
1371 | { | ||
1372 | struct blkcipher_walk walk; | ||
1373 | |||
1374 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1375 | return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); | ||
1376 | } | ||
1377 | 1316 | ||
1378 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 1317 | u128_xor(&dst[1], &dst[1], &iv); |
1379 | struct blkcipher_walk *walk) | ||
1380 | { | ||
1381 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1382 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1383 | unsigned int nbytes = walk->nbytes; | ||
1384 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1385 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1386 | u128 *iv = (u128 *)walk->iv; | ||
1387 | |||
1388 | do { | ||
1389 | u128_xor(dst, src, iv); | ||
1390 | camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
1391 | iv = dst; | ||
1392 | |||
1393 | src += 1; | ||
1394 | dst += 1; | ||
1395 | nbytes -= bsize; | ||
1396 | } while (nbytes >= bsize); | ||
1397 | |||
1398 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
1399 | return nbytes; | ||
1400 | } | 1318 | } |
1401 | 1319 | ||
1402 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1320 | static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
1403 | struct scatterlist *src, unsigned int nbytes) | ||
1404 | { | 1321 | { |
1405 | struct blkcipher_walk walk; | 1322 | be128 ctrblk; |
1406 | int err; | ||
1407 | 1323 | ||
1408 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1324 | if (dst != src) |
1409 | err = blkcipher_walk_virt(desc, &walk); | 1325 | *dst = *src; |
1410 | 1326 | ||
1411 | while ((nbytes = walk.nbytes)) { | 1327 | u128_to_be128(&ctrblk, iv); |
1412 | nbytes = __cbc_encrypt(desc, &walk); | 1328 | u128_inc(iv); |
1413 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1414 | } | ||
1415 | 1329 | ||
1416 | return err; | 1330 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); |
1417 | } | 1331 | } |
1418 | 1332 | ||
1419 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 1333 | static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, |
1420 | struct blkcipher_walk *walk) | 1334 | u128 *iv) |
1421 | { | 1335 | { |
1422 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1336 | be128 ctrblks[2]; |
1423 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1424 | unsigned int nbytes = walk->nbytes; | ||
1425 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1426 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1427 | u128 ivs[2 - 1]; | ||
1428 | u128 last_iv; | ||
1429 | 1337 | ||
1430 | /* Start of the last block. */ | 1338 | if (dst != src) { |
1431 | src += nbytes / bsize - 1; | 1339 | dst[0] = src[0]; |
1432 | dst += nbytes / bsize - 1; | 1340 | dst[1] = src[1]; |
1433 | |||
1434 | last_iv = *src; | ||
1435 | |||
1436 | /* Process two block batch */ | ||
1437 | if (nbytes >= bsize * 2) { | ||
1438 | do { | ||
1439 | nbytes -= bsize * (2 - 1); | ||
1440 | src -= 2 - 1; | ||
1441 | dst -= 2 - 1; | ||
1442 | |||
1443 | ivs[0] = src[0]; | ||
1444 | |||
1445 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); | ||
1446 | |||
1447 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
1448 | |||
1449 | nbytes -= bsize; | ||
1450 | if (nbytes < bsize) | ||
1451 | goto done; | ||
1452 | |||
1453 | u128_xor(dst, dst, src - 1); | ||
1454 | src -= 1; | ||
1455 | dst -= 1; | ||
1456 | } while (nbytes >= bsize * 2); | ||
1457 | |||
1458 | if (nbytes < bsize) | ||
1459 | goto done; | ||
1460 | } | 1341 | } |
1461 | 1342 | ||
1462 | /* Handle leftovers */ | 1343 | u128_to_be128(&ctrblks[0], iv); |
1463 | for (;;) { | 1344 | u128_inc(iv); |
1464 | camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); | 1345 | u128_to_be128(&ctrblks[1], iv); |
1465 | 1346 | u128_inc(iv); | |
1466 | nbytes -= bsize; | ||
1467 | if (nbytes < bsize) | ||
1468 | break; | ||
1469 | 1347 | ||
1470 | u128_xor(dst, dst, src - 1); | 1348 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); |
1471 | src -= 1; | ||
1472 | dst -= 1; | ||
1473 | } | ||
1474 | |||
1475 | done: | ||
1476 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
1477 | *(u128 *)walk->iv = last_iv; | ||
1478 | |||
1479 | return nbytes; | ||
1480 | } | 1349 | } |
1481 | 1350 | ||
1482 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1351 | static const struct common_glue_ctx camellia_enc = { |
1483 | struct scatterlist *src, unsigned int nbytes) | 1352 | .num_funcs = 2, |
1484 | { | 1353 | .fpu_blocks_limit = -1, |
1485 | struct blkcipher_walk walk; | 1354 | |
1486 | int err; | 1355 | .funcs = { { |
1487 | 1356 | .num_blocks = 2, | |
1488 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1357 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } |
1489 | err = blkcipher_walk_virt(desc, &walk); | 1358 | }, { |
1359 | .num_blocks = 1, | ||
1360 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
1361 | } } | ||
1362 | }; | ||
1490 | 1363 | ||
1491 | while ((nbytes = walk.nbytes)) { | 1364 | static const struct common_glue_ctx camellia_ctr = { |
1492 | nbytes = __cbc_decrypt(desc, &walk); | 1365 | .num_funcs = 2, |
1493 | err = blkcipher_walk_done(desc, &walk, nbytes); | 1366 | .fpu_blocks_limit = -1, |
1494 | } | 1367 | |
1368 | .funcs = { { | ||
1369 | .num_blocks = 2, | ||
1370 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
1371 | }, { | ||
1372 | .num_blocks = 1, | ||
1373 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
1374 | } } | ||
1375 | }; | ||
1495 | 1376 | ||
1496 | return err; | 1377 | static const struct common_glue_ctx camellia_dec = { |
1497 | } | 1378 | .num_funcs = 2, |
1379 | .fpu_blocks_limit = -1, | ||
1380 | |||
1381 | .funcs = { { | ||
1382 | .num_blocks = 2, | ||
1383 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
1384 | }, { | ||
1385 | .num_blocks = 1, | ||
1386 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
1387 | } } | ||
1388 | }; | ||
1498 | 1389 | ||
1499 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 1390 | static const struct common_glue_ctx camellia_dec_cbc = { |
1500 | { | 1391 | .num_funcs = 2, |
1501 | dst->a = cpu_to_be64(src->a); | 1392 | .fpu_blocks_limit = -1, |
1502 | dst->b = cpu_to_be64(src->b); | 1393 | |
1503 | } | 1394 | .funcs = { { |
1395 | .num_blocks = 2, | ||
1396 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
1397 | }, { | ||
1398 | .num_blocks = 1, | ||
1399 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
1400 | } } | ||
1401 | }; | ||
1504 | 1402 | ||
1505 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 1403 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1404 | struct scatterlist *src, unsigned int nbytes) | ||
1506 | { | 1405 | { |
1507 | dst->a = be64_to_cpu(src->a); | 1406 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); |
1508 | dst->b = be64_to_cpu(src->b); | ||
1509 | } | 1407 | } |
1510 | 1408 | ||
1511 | static inline void u128_inc(u128 *i) | 1409 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1410 | struct scatterlist *src, unsigned int nbytes) | ||
1512 | { | 1411 | { |
1513 | i->b++; | 1412 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); |
1514 | if (!i->b) | ||
1515 | i->a++; | ||
1516 | } | 1413 | } |
1517 | 1414 | ||
1518 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 1415 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1519 | struct blkcipher_walk *walk) | 1416 | struct scatterlist *src, unsigned int nbytes) |
1520 | { | 1417 | { |
1521 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1418 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, |
1522 | u8 keystream[CAMELLIA_BLOCK_SIZE]; | 1419 | dst, src, nbytes); |
1523 | u8 *src = walk->src.virt.addr; | ||
1524 | u8 *dst = walk->dst.virt.addr; | ||
1525 | unsigned int nbytes = walk->nbytes; | ||
1526 | u128 ctrblk; | ||
1527 | |||
1528 | memcpy(keystream, src, nbytes); | ||
1529 | camellia_enc_blk_xor(ctx, keystream, walk->iv); | ||
1530 | memcpy(dst, keystream, nbytes); | ||
1531 | |||
1532 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1533 | u128_inc(&ctrblk); | ||
1534 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1535 | } | 1420 | } |
1536 | 1421 | ||
1537 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 1422 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1538 | struct blkcipher_walk *walk) | 1423 | struct scatterlist *src, unsigned int nbytes) |
1539 | { | 1424 | { |
1540 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1425 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, |
1541 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | 1426 | nbytes); |
1542 | unsigned int nbytes = walk->nbytes; | ||
1543 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1544 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1545 | u128 ctrblk; | ||
1546 | be128 ctrblocks[2]; | ||
1547 | |||
1548 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1549 | |||
1550 | /* Process two block batch */ | ||
1551 | if (nbytes >= bsize * 2) { | ||
1552 | do { | ||
1553 | if (dst != src) { | ||
1554 | dst[0] = src[0]; | ||
1555 | dst[1] = src[1]; | ||
1556 | } | ||
1557 | |||
1558 | /* create ctrblks for parallel encrypt */ | ||
1559 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1560 | u128_inc(&ctrblk); | ||
1561 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
1562 | u128_inc(&ctrblk); | ||
1563 | |||
1564 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, | ||
1565 | (u8 *)ctrblocks); | ||
1566 | |||
1567 | src += 2; | ||
1568 | dst += 2; | ||
1569 | nbytes -= bsize * 2; | ||
1570 | } while (nbytes >= bsize * 2); | ||
1571 | |||
1572 | if (nbytes < bsize) | ||
1573 | goto done; | ||
1574 | } | ||
1575 | |||
1576 | /* Handle leftovers */ | ||
1577 | do { | ||
1578 | if (dst != src) | ||
1579 | *dst = *src; | ||
1580 | |||
1581 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1582 | u128_inc(&ctrblk); | ||
1583 | |||
1584 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); | ||
1585 | |||
1586 | src += 1; | ||
1587 | dst += 1; | ||
1588 | nbytes -= bsize; | ||
1589 | } while (nbytes >= bsize); | ||
1590 | |||
1591 | done: | ||
1592 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1593 | return nbytes; | ||
1594 | } | 1427 | } |
1595 | 1428 | ||
1596 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1429 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1597 | struct scatterlist *src, unsigned int nbytes) | 1430 | struct scatterlist *src, unsigned int nbytes) |
1598 | { | 1431 | { |
1599 | struct blkcipher_walk walk; | 1432 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); |
1600 | int err; | ||
1601 | |||
1602 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1603 | err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); | ||
1604 | |||
1605 | while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { | ||
1606 | nbytes = __ctr_crypt(desc, &walk); | ||
1607 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1608 | } | ||
1609 | |||
1610 | if (walk.nbytes) { | ||
1611 | ctr_crypt_final(desc, &walk); | ||
1612 | err = blkcipher_walk_done(desc, &walk, 0); | ||
1613 | } | ||
1614 | |||
1615 | return err; | ||
1616 | } | 1433 | } |
1617 | 1434 | ||
1618 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 1435 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c new file mode 100644 index 000000000000..4854f0f31e4f --- /dev/null +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | #include <crypto/b128ops.h> | ||
30 | #include <crypto/lrw.h> | ||
31 | #include <crypto/xts.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/scatterwalk.h> | ||
34 | |||
35 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
36 | struct blkcipher_desc *desc, | ||
37 | struct blkcipher_walk *walk) | ||
38 | { | ||
39 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
40 | const unsigned int bsize = 128 / 8; | ||
41 | unsigned int nbytes, i, func_bytes; | ||
42 | bool fpu_enabled = false; | ||
43 | int err; | ||
44 | |||
45 | err = blkcipher_walk_virt(desc, walk); | ||
46 | |||
47 | while ((nbytes = walk->nbytes)) { | ||
48 | u8 *wsrc = walk->src.virt.addr; | ||
49 | u8 *wdst = walk->dst.virt.addr; | ||
50 | |||
51 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
52 | desc, fpu_enabled, nbytes); | ||
53 | |||
54 | for (i = 0; i < gctx->num_funcs; i++) { | ||
55 | func_bytes = bsize * gctx->funcs[i].num_blocks; | ||
56 | |||
57 | /* Process multi-block batch */ | ||
58 | if (nbytes >= func_bytes) { | ||
59 | do { | ||
60 | gctx->funcs[i].fn_u.ecb(ctx, wdst, | ||
61 | wsrc); | ||
62 | |||
63 | wsrc += func_bytes; | ||
64 | wdst += func_bytes; | ||
65 | nbytes -= func_bytes; | ||
66 | } while (nbytes >= func_bytes); | ||
67 | |||
68 | if (nbytes < bsize) | ||
69 | goto done; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | done: | ||
74 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
75 | } | ||
76 | |||
77 | glue_fpu_end(fpu_enabled); | ||
78 | return err; | ||
79 | } | ||
80 | |||
81 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
82 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
83 | struct scatterlist *src, unsigned int nbytes) | ||
84 | { | ||
85 | struct blkcipher_walk walk; | ||
86 | |||
87 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
88 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); | ||
91 | |||
92 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
93 | struct blkcipher_desc *desc, | ||
94 | struct blkcipher_walk *walk) | ||
95 | { | ||
96 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
97 | const unsigned int bsize = 128 / 8; | ||
98 | unsigned int nbytes = walk->nbytes; | ||
99 | u128 *src = (u128 *)walk->src.virt.addr; | ||
100 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
101 | u128 *iv = (u128 *)walk->iv; | ||
102 | |||
103 | do { | ||
104 | u128_xor(dst, src, iv); | ||
105 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
106 | iv = dst; | ||
107 | |||
108 | src += 1; | ||
109 | dst += 1; | ||
110 | nbytes -= bsize; | ||
111 | } while (nbytes >= bsize); | ||
112 | |||
113 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
114 | return nbytes; | ||
115 | } | ||
116 | |||
117 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
118 | struct blkcipher_desc *desc, | ||
119 | struct scatterlist *dst, | ||
120 | struct scatterlist *src, unsigned int nbytes) | ||
121 | { | ||
122 | struct blkcipher_walk walk; | ||
123 | int err; | ||
124 | |||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | err = blkcipher_walk_virt(desc, &walk); | ||
127 | |||
128 | while ((nbytes = walk.nbytes)) { | ||
129 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); | ||
130 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); | ||
136 | |||
137 | static unsigned int | ||
138 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
139 | struct blkcipher_desc *desc, | ||
140 | struct blkcipher_walk *walk) | ||
141 | { | ||
142 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
143 | const unsigned int bsize = 128 / 8; | ||
144 | unsigned int nbytes = walk->nbytes; | ||
145 | u128 *src = (u128 *)walk->src.virt.addr; | ||
146 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
147 | u128 last_iv; | ||
148 | unsigned int num_blocks, func_bytes; | ||
149 | unsigned int i; | ||
150 | |||
151 | /* Start of the last block. */ | ||
152 | src += nbytes / bsize - 1; | ||
153 | dst += nbytes / bsize - 1; | ||
154 | |||
155 | last_iv = *src; | ||
156 | |||
157 | for (i = 0; i < gctx->num_funcs; i++) { | ||
158 | num_blocks = gctx->funcs[i].num_blocks; | ||
159 | func_bytes = bsize * num_blocks; | ||
160 | |||
161 | /* Process multi-block batch */ | ||
162 | if (nbytes >= func_bytes) { | ||
163 | do { | ||
164 | nbytes -= func_bytes - bsize; | ||
165 | src -= num_blocks - 1; | ||
166 | dst -= num_blocks - 1; | ||
167 | |||
168 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
169 | |||
170 | nbytes -= bsize; | ||
171 | if (nbytes < bsize) | ||
172 | goto done; | ||
173 | |||
174 | u128_xor(dst, dst, src - 1); | ||
175 | src -= 1; | ||
176 | dst -= 1; | ||
177 | } while (nbytes >= func_bytes); | ||
178 | |||
179 | if (nbytes < bsize) | ||
180 | goto done; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | done: | ||
185 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
186 | *(u128 *)walk->iv = last_iv; | ||
187 | |||
188 | return nbytes; | ||
189 | } | ||
190 | |||
191 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
192 | struct blkcipher_desc *desc, | ||
193 | struct scatterlist *dst, | ||
194 | struct scatterlist *src, unsigned int nbytes) | ||
195 | { | ||
196 | const unsigned int bsize = 128 / 8; | ||
197 | bool fpu_enabled = false; | ||
198 | struct blkcipher_walk walk; | ||
199 | int err; | ||
200 | |||
201 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
202 | err = blkcipher_walk_virt(desc, &walk); | ||
203 | |||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
206 | desc, fpu_enabled, nbytes); | ||
207 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
208 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
209 | } | ||
210 | |||
211 | glue_fpu_end(fpu_enabled); | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | ||
215 | |||
216 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | ||
217 | struct blkcipher_desc *desc, | ||
218 | struct blkcipher_walk *walk) | ||
219 | { | ||
220 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | u8 *src = (u8 *)walk->src.virt.addr; | ||
222 | u8 *dst = (u8 *)walk->dst.virt.addr; | ||
223 | unsigned int nbytes = walk->nbytes; | ||
224 | u128 ctrblk; | ||
225 | u128 tmp; | ||
226 | |||
227 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
228 | |||
229 | memcpy(&tmp, src, nbytes); | ||
230 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
231 | memcpy(dst, &tmp, nbytes); | ||
232 | |||
233 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | |||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
238 | struct blkcipher_desc *desc, | ||
239 | struct blkcipher_walk *walk) | ||
240 | { | ||
241 | const unsigned int bsize = 128 / 8; | ||
242 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
243 | unsigned int nbytes = walk->nbytes; | ||
244 | u128 *src = (u128 *)walk->src.virt.addr; | ||
245 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
246 | u128 ctrblk; | ||
247 | unsigned int num_blocks, func_bytes; | ||
248 | unsigned int i; | ||
249 | |||
250 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
251 | |||
252 | /* Process multi-block batch */ | ||
253 | for (i = 0; i < gctx->num_funcs; i++) { | ||
254 | num_blocks = gctx->funcs[i].num_blocks; | ||
255 | func_bytes = bsize * num_blocks; | ||
256 | |||
257 | if (nbytes >= func_bytes) { | ||
258 | do { | ||
259 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
260 | |||
261 | src += num_blocks; | ||
262 | dst += num_blocks; | ||
263 | nbytes -= func_bytes; | ||
264 | } while (nbytes >= func_bytes); | ||
265 | |||
266 | if (nbytes < bsize) | ||
267 | goto done; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | done: | ||
272 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
277 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | const unsigned int bsize = 128 / 8; | ||
281 | bool fpu_enabled = false; | ||
282 | struct blkcipher_walk walk; | ||
283 | int err; | ||
284 | |||
285 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
286 | err = blkcipher_walk_virt_block(desc, &walk, bsize); | ||
287 | |||
288 | while ((nbytes = walk.nbytes) >= bsize) { | ||
289 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
290 | desc, fpu_enabled, nbytes); | ||
291 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
292 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
293 | } | ||
294 | |||
295 | glue_fpu_end(fpu_enabled); | ||
296 | |||
297 | if (walk.nbytes) { | ||
298 | glue_ctr_crypt_final_128bit( | ||
299 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
300 | err = blkcipher_walk_done(desc, &walk, 0); | ||
301 | } | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | ||
306 | |||
307 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..504106bf04a2 --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-avx-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way AVX serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define tp %xmm5 | ||
42 | |||
43 | #define RA2 %xmm6 | ||
44 | #define RB2 %xmm7 | ||
45 | #define RC2 %xmm8 | ||
46 | #define RD2 %xmm9 | ||
47 | #define RE2 %xmm10 | ||
48 | |||
49 | #define RNOT %xmm11 | ||
50 | |||
51 | #define RK0 %xmm12 | ||
52 | #define RK1 %xmm13 | ||
53 | #define RK2 %xmm14 | ||
54 | #define RK3 %xmm15 | ||
55 | |||
56 | |||
57 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
58 | vpor x0, x3, tp; \ | ||
59 | vpxor x3, x0, x0; \ | ||
60 | vpxor x2, x3, x4; \ | ||
61 | vpxor RNOT, x4, x4; \ | ||
62 | vpxor x1, tp, x3; \ | ||
63 | vpand x0, x1, x1; \ | ||
64 | vpxor x4, x1, x1; \ | ||
65 | vpxor x0, x2, x2; | ||
66 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
67 | vpxor x3, x0, x0; \ | ||
68 | vpor x0, x4, x4; \ | ||
69 | vpxor x2, x0, x0; \ | ||
70 | vpand x1, x2, x2; \ | ||
71 | vpxor x2, x3, x3; \ | ||
72 | vpxor RNOT, x1, x1; \ | ||
73 | vpxor x4, x2, x2; \ | ||
74 | vpxor x2, x1, x1; | ||
75 | |||
76 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
77 | vpxor x0, x1, tp; \ | ||
78 | vpxor x3, x0, x0; \ | ||
79 | vpxor RNOT, x3, x3; \ | ||
80 | vpand tp, x1, x4; \ | ||
81 | vpor tp, x0, x0; \ | ||
82 | vpxor x2, x3, x3; \ | ||
83 | vpxor x3, x0, x0; \ | ||
84 | vpxor x3, tp, x1; | ||
85 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
86 | vpxor x4, x3, x3; \ | ||
87 | vpor x4, x1, x1; \ | ||
88 | vpxor x2, x4, x4; \ | ||
89 | vpand x0, x2, x2; \ | ||
90 | vpxor x1, x2, x2; \ | ||
91 | vpor x0, x1, x1; \ | ||
92 | vpxor RNOT, x0, x0; \ | ||
93 | vpxor x2, x0, x0; \ | ||
94 | vpxor x1, x4, x4; | ||
95 | |||
96 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
97 | vpxor RNOT, x3, x3; \ | ||
98 | vpxor x0, x1, x1; \ | ||
99 | vpand x2, x0, tp; \ | ||
100 | vpxor x3, tp, tp; \ | ||
101 | vpor x0, x3, x3; \ | ||
102 | vpxor x1, x2, x2; \ | ||
103 | vpxor x1, x3, x3; \ | ||
104 | vpand tp, x1, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | vpxor x2, tp, tp; \ | ||
107 | vpand x3, x2, x2; \ | ||
108 | vpor x1, x3, x3; \ | ||
109 | vpxor RNOT, tp, tp; \ | ||
110 | vpxor tp, x3, x3; \ | ||
111 | vpxor tp, x0, x4; \ | ||
112 | vpxor x2, tp, x0; \ | ||
113 | vpor x2, x1, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | vpxor x3, x1, tp; \ | ||
117 | vpor x0, x3, x3; \ | ||
118 | vpand x0, x1, x4; \ | ||
119 | vpxor x2, x0, x0; \ | ||
120 | vpxor tp, x2, x2; \ | ||
121 | vpand x3, tp, x1; \ | ||
122 | vpxor x3, x2, x2; \ | ||
123 | vpor x4, x0, x0; \ | ||
124 | vpxor x3, x4, x4; | ||
125 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
126 | vpxor x0, x1, x1; \ | ||
127 | vpand x3, x0, x0; \ | ||
128 | vpand x4, x3, x3; \ | ||
129 | vpxor x2, x3, x3; \ | ||
130 | vpor x1, x4, x4; \ | ||
131 | vpand x1, x2, x2; \ | ||
132 | vpxor x3, x4, x4; \ | ||
133 | vpxor x3, x0, x0; \ | ||
134 | vpxor x2, x3, x3; | ||
135 | |||
136 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
137 | vpand x0, x3, tp; \ | ||
138 | vpxor x3, x0, x0; \ | ||
139 | vpxor x2, tp, tp; \ | ||
140 | vpor x3, x2, x2; \ | ||
141 | vpxor x1, x0, x0; \ | ||
142 | vpxor tp, x3, x4; \ | ||
143 | vpor x0, x2, x2; \ | ||
144 | vpxor x1, x2, x2; | ||
145 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
146 | vpand x0, x1, x1; \ | ||
147 | vpxor x4, x1, x1; \ | ||
148 | vpand x2, x4, x4; \ | ||
149 | vpxor tp, x2, x2; \ | ||
150 | vpxor x0, x4, x4; \ | ||
151 | vpor x1, tp, x3; \ | ||
152 | vpxor RNOT, x1, x1; \ | ||
153 | vpxor x0, x3, x3; | ||
154 | |||
155 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
156 | vpor x0, x1, tp; \ | ||
157 | vpxor tp, x2, x2; \ | ||
158 | vpxor RNOT, x3, x3; \ | ||
159 | vpxor x0, x1, x4; \ | ||
160 | vpxor x2, x0, x0; \ | ||
161 | vpand x4, tp, x1; \ | ||
162 | vpor x3, x4, x4; \ | ||
163 | vpxor x0, x4, x4; | ||
164 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
165 | vpand x3, x0, x0; \ | ||
166 | vpxor x3, x1, x1; \ | ||
167 | vpxor x2, x3, x3; \ | ||
168 | vpxor x1, x0, x0; \ | ||
169 | vpand x4, x2, x2; \ | ||
170 | vpxor x2, x1, x1; \ | ||
171 | vpand x0, x2, x2; \ | ||
172 | vpxor x2, x3, x3; | ||
173 | |||
174 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
175 | vpxor x0, x3, x3; \ | ||
176 | vpxor x2, x1, tp; \ | ||
177 | vpxor x0, x2, x2; \ | ||
178 | vpand x3, x0, x0; \ | ||
179 | vpor x3, tp, tp; \ | ||
180 | vpxor RNOT, x1, x4; \ | ||
181 | vpxor tp, x0, x0; \ | ||
182 | vpxor x2, tp, x1; | ||
183 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
184 | vpxor x4, x3, x3; \ | ||
185 | vpxor x0, x4, x4; \ | ||
186 | vpand x0, x2, x2; \ | ||
187 | vpxor x1, x4, x4; \ | ||
188 | vpxor x3, x2, x2; \ | ||
189 | vpand x1, x3, x3; \ | ||
190 | vpxor x0, x3, x3; \ | ||
191 | vpxor x2, x1, x1; | ||
192 | |||
193 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
194 | vpxor RNOT, x1, tp; \ | ||
195 | vpxor RNOT, x0, x0; \ | ||
196 | vpand x2, tp, x1; \ | ||
197 | vpxor x3, x1, x1; \ | ||
198 | vpor tp, x3, x3; \ | ||
199 | vpxor x2, tp, x4; \ | ||
200 | vpxor x3, x2, x2; \ | ||
201 | vpxor x0, x3, x3; \ | ||
202 | vpor x1, x0, x0; | ||
203 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
204 | vpand x0, x2, x2; \ | ||
205 | vpxor x4, x0, x0; \ | ||
206 | vpxor x3, x4, x4; \ | ||
207 | vpand x0, x3, x3; \ | ||
208 | vpxor x1, x4, x4; \ | ||
209 | vpxor x4, x2, x2; \ | ||
210 | vpxor x1, x3, x3; \ | ||
211 | vpor x0, x4, x4; \ | ||
212 | vpxor x1, x4, x4; | ||
213 | |||
214 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
215 | vpxor x0, x1, x1; \ | ||
216 | vpor x1, x3, tp; \ | ||
217 | vpxor x1, x3, x4; \ | ||
218 | vpxor RNOT, x0, x0; \ | ||
219 | vpxor tp, x2, x2; \ | ||
220 | vpxor x0, tp, x3; \ | ||
221 | vpand x1, x0, x0; \ | ||
222 | vpxor x2, x0, x0; | ||
223 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
224 | vpand x3, x2, x2; \ | ||
225 | vpxor x4, x3, x3; \ | ||
226 | vpxor x3, x2, x2; \ | ||
227 | vpxor x3, x1, x1; \ | ||
228 | vpand x0, x3, x3; \ | ||
229 | vpxor x0, x1, x1; \ | ||
230 | vpxor x2, x0, x0; \ | ||
231 | vpxor x3, x4, x4; | ||
232 | |||
233 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
234 | vpxor x3, x1, x1; \ | ||
235 | vpxor x2, x0, tp; \ | ||
236 | vpxor RNOT, x2, x2; \ | ||
237 | vpor x1, x0, x4; \ | ||
238 | vpxor x3, x4, x4; \ | ||
239 | vpand x1, x3, x3; \ | ||
240 | vpxor x2, x1, x1; \ | ||
241 | vpand x4, x2, x2; | ||
242 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
243 | vpxor x1, x4, x4; \ | ||
244 | vpor x3, x1, x1; \ | ||
245 | vpxor tp, x3, x3; \ | ||
246 | vpxor tp, x2, x2; \ | ||
247 | vpor x4, tp, x0; \ | ||
248 | vpxor x4, x2, x2; \ | ||
249 | vpxor x0, x1, x1; \ | ||
250 | vpxor x1, x4, x4; | ||
251 | |||
252 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
253 | vpxor x1, x2, x2; \ | ||
254 | vpxor RNOT, x3, tp; \ | ||
255 | vpor x2, tp, tp; \ | ||
256 | vpxor x3, x2, x2; \ | ||
257 | vpxor x0, x3, x4; \ | ||
258 | vpxor x1, tp, x3; \ | ||
259 | vpor x2, x1, x1; \ | ||
260 | vpxor x0, x2, x2; | ||
261 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
262 | vpxor x4, x1, x1; \ | ||
263 | vpor x3, x4, x4; \ | ||
264 | vpxor x3, x2, x2; \ | ||
265 | vpxor x2, x4, x4; \ | ||
266 | vpand x1, x2, x2; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x4, x3, x3; \ | ||
269 | vpxor x0, x4, x4; | ||
270 | |||
271 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
272 | vpxor x1, x2, x2; \ | ||
273 | vpand x2, x1, tp; \ | ||
274 | vpxor x0, tp, tp; \ | ||
275 | vpor x1, x0, x0; \ | ||
276 | vpxor x3, x1, x4; \ | ||
277 | vpxor x3, x0, x0; \ | ||
278 | vpor tp, x3, x3; \ | ||
279 | vpxor x2, tp, x1; | ||
280 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
281 | vpxor x3, x1, x1; \ | ||
282 | vpxor x2, x0, x0; \ | ||
283 | vpxor x3, x2, x2; \ | ||
284 | vpand x1, x3, x3; \ | ||
285 | vpxor x0, x1, x1; \ | ||
286 | vpand x2, x0, x0; \ | ||
287 | vpxor x3, x4, x4; \ | ||
288 | vpxor x0, x3, x3; \ | ||
289 | vpxor x1, x0, x0; | ||
290 | |||
291 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
292 | vpxor x3, x2, x2; \ | ||
293 | vpand x1, x0, tp; \ | ||
294 | vpxor x2, tp, tp; \ | ||
295 | vpor x3, x2, x2; \ | ||
296 | vpxor RNOT, x0, x4; \ | ||
297 | vpxor tp, x1, x1; \ | ||
298 | vpxor x2, tp, x0; \ | ||
299 | vpand x4, x2, x2; | ||
300 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
301 | vpxor x0, x2, x2; \ | ||
302 | vpor x4, x0, x0; \ | ||
303 | vpxor x3, x0, x0; \ | ||
304 | vpand x2, x3, x3; \ | ||
305 | vpxor x3, x4, x4; \ | ||
306 | vpxor x1, x3, x3; \ | ||
307 | vpand x0, x1, x1; \ | ||
308 | vpxor x1, x4, x4; \ | ||
309 | vpxor x3, x0, x0; | ||
310 | |||
311 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
312 | vpor x2, x1, tp; \ | ||
313 | vpxor x1, x2, x2; \ | ||
314 | vpxor x3, tp, tp; \ | ||
315 | vpand x1, x3, x3; \ | ||
316 | vpxor x3, x2, x2; \ | ||
317 | vpor x0, x3, x3; \ | ||
318 | vpxor RNOT, x0, x0; \ | ||
319 | vpxor x2, x3, x3; \ | ||
320 | vpor x0, x2, x2; | ||
321 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
322 | vpxor tp, x1, x4; \ | ||
323 | vpxor x4, x2, x2; \ | ||
324 | vpand x0, x4, x4; \ | ||
325 | vpxor tp, x0, x0; \ | ||
326 | vpxor x3, tp, x1; \ | ||
327 | vpand x2, x0, x0; \ | ||
328 | vpxor x3, x2, x2; \ | ||
329 | vpxor x2, x0, x0; \ | ||
330 | vpxor x4, x2, x2; \ | ||
331 | vpxor x3, x4, x4; | ||
332 | |||
333 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
334 | vpxor x2, x0, x0; \ | ||
335 | vpand x3, x0, tp; \ | ||
336 | vpxor x3, x2, x2; \ | ||
337 | vpxor x2, tp, tp; \ | ||
338 | vpxor x1, x3, x3; \ | ||
339 | vpor x0, x2, x2; \ | ||
340 | vpxor x3, x2, x2; \ | ||
341 | vpand tp, x3, x3; | ||
342 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
343 | vpxor RNOT, tp, tp; \ | ||
344 | vpxor x1, x3, x3; \ | ||
345 | vpand x2, x1, x1; \ | ||
346 | vpxor tp, x0, x4; \ | ||
347 | vpxor x4, x3, x3; \ | ||
348 | vpxor x2, x4, x4; \ | ||
349 | vpxor x1, tp, x0; \ | ||
350 | vpxor x0, x2, x2; | ||
351 | |||
352 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
353 | vpand x0, x3, tp; \ | ||
354 | vpxor x2, x0, x0; \ | ||
355 | vpor x3, x2, x2; \ | ||
356 | vpxor x1, x3, x4; \ | ||
357 | vpxor RNOT, x0, x0; \ | ||
358 | vpor tp, x1, x1; \ | ||
359 | vpxor x0, x4, x4; \ | ||
360 | vpand x2, x0, x0; \ | ||
361 | vpxor x1, x0, x0; | ||
362 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
363 | vpand x2, x1, x1; \ | ||
364 | vpxor x2, tp, x3; \ | ||
365 | vpxor x3, x4, x4; \ | ||
366 | vpand x3, x2, x2; \ | ||
367 | vpor x0, x3, x3; \ | ||
368 | vpxor x4, x1, x1; \ | ||
369 | vpxor x4, x3, x3; \ | ||
370 | vpand x0, x4, x4; \ | ||
371 | vpxor x2, x4, x4; | ||
372 | |||
373 | #define get_key(i, j, t) \ | ||
374 | vbroadcastss (4*(i)+(j))*4(CTX), t; | ||
375 | |||
376 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
377 | get_key(i, 0, RK0); \ | ||
378 | get_key(i, 1, RK1); \ | ||
379 | get_key(i, 2, RK2); \ | ||
380 | get_key(i, 3, RK3); \ | ||
381 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
382 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
383 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
384 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
385 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
386 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
387 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
388 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
389 | |||
390 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
391 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
392 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
393 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
394 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
395 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
396 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
397 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
398 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
399 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
400 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
401 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
402 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
403 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
404 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
405 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
406 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
407 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
408 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
409 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
410 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
411 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
412 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
413 | get_key(i, 1, RK1); \ | ||
414 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
415 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
416 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
417 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
418 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
419 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
420 | get_key(i, 3, RK3); \ | ||
421 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
422 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
423 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
424 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
425 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
426 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
427 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
428 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
429 | get_key(i, 0, RK0); \ | ||
430 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
431 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
432 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
433 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
434 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
435 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
436 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
437 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
438 | get_key(i, 2, RK2); \ | ||
439 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
440 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
441 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
442 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
443 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
444 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
446 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
447 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
448 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
449 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
450 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
451 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
452 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
453 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
454 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
456 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
457 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
458 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
459 | |||
460 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
461 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
462 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
463 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
464 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
465 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
466 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
467 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
468 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
469 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
470 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
471 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
472 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
473 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
474 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
475 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
476 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
477 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
478 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
479 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
480 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
481 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
482 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
483 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
484 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
485 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
486 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
487 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
488 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
489 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
490 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
491 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
492 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
493 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
494 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
495 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
496 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
497 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
498 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
499 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
500 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
501 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
502 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
504 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
505 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
506 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
507 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
508 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
510 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
511 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
512 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
513 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
514 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
515 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
516 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
517 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
518 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
519 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
520 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
521 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
522 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
523 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
524 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
525 | |||
526 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
527 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
528 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
529 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
530 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
531 | |||
532 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
533 | get_key(i, 0, RK0); \ | ||
534 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
535 | get_key(i, 2, RK2); \ | ||
536 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
537 | get_key(i, 3, RK3); \ | ||
538 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
539 | get_key(i, 1, RK1); \ | ||
540 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
541 | |||
542 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
543 | vpunpckldq x1, x0, t0; \ | ||
544 | vpunpckhdq x1, x0, t2; \ | ||
545 | vpunpckldq x3, x2, t1; \ | ||
546 | vpunpckhdq x3, x2, x3; \ | ||
547 | \ | ||
548 | vpunpcklqdq t1, t0, x0; \ | ||
549 | vpunpckhqdq t1, t0, x1; \ | ||
550 | vpunpcklqdq x3, t2, x2; \ | ||
551 | vpunpckhqdq x3, t2, x3; | ||
552 | |||
553 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
554 | vmovdqu (0*4*4)(in), x0; \ | ||
555 | vmovdqu (1*4*4)(in), x1; \ | ||
556 | vmovdqu (2*4*4)(in), x2; \ | ||
557 | vmovdqu (3*4*4)(in), x3; \ | ||
558 | \ | ||
559 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
560 | |||
561 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
562 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
563 | \ | ||
564 | vmovdqu x0, (0*4*4)(out); \ | ||
565 | vmovdqu x1, (1*4*4)(out); \ | ||
566 | vmovdqu x2, (2*4*4)(out); \ | ||
567 | vmovdqu x3, (3*4*4)(out); | ||
568 | |||
569 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
570 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
571 | \ | ||
572 | vpxor (0*4*4)(out), x0, x0; \ | ||
573 | vmovdqu x0, (0*4*4)(out); \ | ||
574 | vpxor (1*4*4)(out), x1, x1; \ | ||
575 | vmovdqu x1, (1*4*4)(out); \ | ||
576 | vpxor (2*4*4)(out), x2, x2; \ | ||
577 | vmovdqu x2, (2*4*4)(out); \ | ||
578 | vpxor (3*4*4)(out), x3, x3; \ | ||
579 | vmovdqu x3, (3*4*4)(out); | ||
580 | |||
581 | .align 8 | ||
582 | .global __serpent_enc_blk_8way_avx | ||
583 | .type __serpent_enc_blk_8way_avx,@function; | ||
584 | |||
585 | __serpent_enc_blk_8way_avx: | ||
586 | /* input: | ||
587 | * %rdi: ctx, CTX | ||
588 | * %rsi: dst | ||
589 | * %rdx: src | ||
590 | * %rcx: bool, if true: xor output | ||
591 | */ | ||
592 | |||
593 | vpcmpeqd RNOT, RNOT, RNOT; | ||
594 | |||
595 | leaq (4*4*4)(%rdx), %rax; | ||
596 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
597 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
598 | |||
599 | K2(RA, RB, RC, RD, RE, 0); | ||
600 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
601 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
602 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
603 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
604 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
605 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
606 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
607 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
608 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
609 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
610 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
611 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
612 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
613 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
614 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
615 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
616 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
617 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
618 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
619 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
620 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
621 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
622 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
623 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
624 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
625 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
626 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
627 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
628 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
629 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
630 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
631 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
632 | |||
633 | leaq (4*4*4)(%rsi), %rax; | ||
634 | |||
635 | testb %cl, %cl; | ||
636 | jnz __enc_xor8; | ||
637 | |||
638 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
639 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
640 | |||
641 | ret; | ||
642 | |||
643 | __enc_xor8: | ||
644 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
645 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
646 | |||
647 | ret; | ||
648 | |||
649 | .align 8 | ||
650 | .global serpent_dec_blk_8way_avx | ||
651 | .type serpent_dec_blk_8way_avx,@function; | ||
652 | |||
653 | serpent_dec_blk_8way_avx: | ||
654 | /* input: | ||
655 | * %rdi: ctx, CTX | ||
656 | * %rsi: dst | ||
657 | * %rdx: src | ||
658 | */ | ||
659 | |||
660 | vpcmpeqd RNOT, RNOT, RNOT; | ||
661 | |||
662 | leaq (4*4*4)(%rdx), %rax; | ||
663 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
664 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
665 | |||
666 | K2(RA, RB, RC, RD, RE, 32); | ||
667 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
668 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
669 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
670 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
671 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
672 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
673 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
674 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
675 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
676 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
677 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
678 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
679 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
680 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
681 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
682 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
683 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
684 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
685 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
686 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
687 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
688 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
689 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
690 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
691 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
692 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
693 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
694 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
695 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
696 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
697 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
698 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
699 | |||
700 | leaq (4*4*4)(%rsi), %rax; | ||
701 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
702 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 000000000000..b36bdac237eb --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Glue code based on serpent_sse2_glue.c by: | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/hardirq.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/err.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/serpent.h> | ||
34 | #include <crypto/cryptd.h> | ||
35 | #include <crypto/b128ops.h> | ||
36 | #include <crypto/ctr.h> | ||
37 | #include <crypto/lrw.h> | ||
38 | #include <crypto/xts.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | #include <asm/crypto/serpent-avx.h> | ||
42 | #include <asm/crypto/ablk_helper.h> | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | |||
45 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
46 | { | ||
47 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
48 | unsigned int j; | ||
49 | |||
50 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
51 | ivs[j] = src[j]; | ||
52 | |||
53 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
54 | |||
55 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
56 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
57 | } | ||
58 | |||
59 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) | ||
60 | { | ||
61 | be128 ctrblk; | ||
62 | |||
63 | u128_to_be128(&ctrblk, iv); | ||
64 | u128_inc(iv); | ||
65 | |||
66 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
67 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
68 | } | ||
69 | |||
70 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
71 | u128 *iv) | ||
72 | { | ||
73 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
74 | unsigned int i; | ||
75 | |||
76 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
77 | if (dst != src) | ||
78 | dst[i] = src[i]; | ||
79 | |||
80 | u128_to_be128(&ctrblks[i], iv); | ||
81 | u128_inc(iv); | ||
82 | } | ||
83 | |||
84 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
85 | } | ||
86 | |||
87 | static const struct common_glue_ctx serpent_enc = { | ||
88 | .num_funcs = 2, | ||
89 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
90 | |||
91 | .funcs = { { | ||
92 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
93 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
94 | }, { | ||
95 | .num_blocks = 1, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
97 | } } | ||
98 | }; | ||
99 | |||
100 | static const struct common_glue_ctx serpent_ctr = { | ||
101 | .num_funcs = 2, | ||
102 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
107 | }, { | ||
108 | .num_blocks = 1, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
110 | } } | ||
111 | }; | ||
112 | |||
113 | static const struct common_glue_ctx serpent_dec = { | ||
114 | .num_funcs = 2, | ||
115 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
116 | |||
117 | .funcs = { { | ||
118 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
119 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
120 | }, { | ||
121 | .num_blocks = 1, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
123 | } } | ||
124 | }; | ||
125 | |||
126 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
127 | .num_funcs = 2, | ||
128 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
129 | |||
130 | .funcs = { { | ||
131 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
132 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
133 | }, { | ||
134 | .num_blocks = 1, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
136 | } } | ||
137 | }; | ||
138 | |||
139 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
143 | } | ||
144 | |||
145 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
146 | struct scatterlist *src, unsigned int nbytes) | ||
147 | { | ||
148 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
149 | } | ||
150 | |||
151 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
152 | struct scatterlist *src, unsigned int nbytes) | ||
153 | { | ||
154 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
155 | dst, src, nbytes); | ||
156 | } | ||
157 | |||
158 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
159 | struct scatterlist *src, unsigned int nbytes) | ||
160 | { | ||
161 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
162 | nbytes); | ||
163 | } | ||
164 | |||
165 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
166 | struct scatterlist *src, unsigned int nbytes) | ||
167 | { | ||
168 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
169 | } | ||
170 | |||
171 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
172 | { | ||
173 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
174 | NULL, fpu_enabled, nbytes); | ||
175 | } | ||
176 | |||
177 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
178 | { | ||
179 | glue_fpu_end(fpu_enabled); | ||
180 | } | ||
181 | |||
182 | struct crypt_priv { | ||
183 | struct serpent_ctx *ctx; | ||
184 | bool fpu_enabled; | ||
185 | }; | ||
186 | |||
187 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
188 | { | ||
189 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
190 | struct crypt_priv *ctx = priv; | ||
191 | int i; | ||
192 | |||
193 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
194 | |||
195 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
196 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
197 | return; | ||
198 | } | ||
199 | |||
200 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
201 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
202 | } | ||
203 | |||
204 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
205 | { | ||
206 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
207 | struct crypt_priv *ctx = priv; | ||
208 | int i; | ||
209 | |||
210 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
211 | |||
212 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
213 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
218 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
219 | } | ||
220 | |||
221 | struct serpent_lrw_ctx { | ||
222 | struct lrw_table_ctx lrw_table; | ||
223 | struct serpent_ctx serpent_ctx; | ||
224 | }; | ||
225 | |||
226 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
227 | unsigned int keylen) | ||
228 | { | ||
229 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
230 | int err; | ||
231 | |||
232 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
233 | SERPENT_BLOCK_SIZE); | ||
234 | if (err) | ||
235 | return err; | ||
236 | |||
237 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
238 | SERPENT_BLOCK_SIZE); | ||
239 | } | ||
240 | |||
241 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
242 | struct scatterlist *src, unsigned int nbytes) | ||
243 | { | ||
244 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
245 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
246 | struct crypt_priv crypt_ctx = { | ||
247 | .ctx = &ctx->serpent_ctx, | ||
248 | .fpu_enabled = false, | ||
249 | }; | ||
250 | struct lrw_crypt_req req = { | ||
251 | .tbuf = buf, | ||
252 | .tbuflen = sizeof(buf), | ||
253 | |||
254 | .table_ctx = &ctx->lrw_table, | ||
255 | .crypt_ctx = &crypt_ctx, | ||
256 | .crypt_fn = encrypt_callback, | ||
257 | }; | ||
258 | int ret; | ||
259 | |||
260 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
261 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
262 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
263 | |||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
268 | struct scatterlist *src, unsigned int nbytes) | ||
269 | { | ||
270 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
271 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
272 | struct crypt_priv crypt_ctx = { | ||
273 | .ctx = &ctx->serpent_ctx, | ||
274 | .fpu_enabled = false, | ||
275 | }; | ||
276 | struct lrw_crypt_req req = { | ||
277 | .tbuf = buf, | ||
278 | .tbuflen = sizeof(buf), | ||
279 | |||
280 | .table_ctx = &ctx->lrw_table, | ||
281 | .crypt_ctx = &crypt_ctx, | ||
282 | .crypt_fn = decrypt_callback, | ||
283 | }; | ||
284 | int ret; | ||
285 | |||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
288 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
293 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
294 | { | ||
295 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
296 | |||
297 | lrw_free_table(&ctx->lrw_table); | ||
298 | } | ||
299 | |||
300 | struct serpent_xts_ctx { | ||
301 | struct serpent_ctx tweak_ctx; | ||
302 | struct serpent_ctx crypt_ctx; | ||
303 | }; | ||
304 | |||
305 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
306 | unsigned int keylen) | ||
307 | { | ||
308 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
309 | u32 *flags = &tfm->crt_flags; | ||
310 | int err; | ||
311 | |||
312 | /* key consists of keys of equal size concatenated, therefore | ||
313 | * the length must be even | ||
314 | */ | ||
315 | if (keylen % 2) { | ||
316 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | |||
320 | /* first half of xts-key is for crypt */ | ||
321 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
322 | if (err) | ||
323 | return err; | ||
324 | |||
325 | /* second half of xts-key is for tweak */ | ||
326 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
327 | } | ||
328 | |||
329 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
330 | struct scatterlist *src, unsigned int nbytes) | ||
331 | { | ||
332 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
334 | struct crypt_priv crypt_ctx = { | ||
335 | .ctx = &ctx->crypt_ctx, | ||
336 | .fpu_enabled = false, | ||
337 | }; | ||
338 | struct xts_crypt_req req = { | ||
339 | .tbuf = buf, | ||
340 | .tbuflen = sizeof(buf), | ||
341 | |||
342 | .tweak_ctx = &ctx->tweak_ctx, | ||
343 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
344 | .crypt_ctx = &crypt_ctx, | ||
345 | .crypt_fn = encrypt_callback, | ||
346 | }; | ||
347 | int ret; | ||
348 | |||
349 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
350 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
351 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
357 | struct scatterlist *src, unsigned int nbytes) | ||
358 | { | ||
359 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
361 | struct crypt_priv crypt_ctx = { | ||
362 | .ctx = &ctx->crypt_ctx, | ||
363 | .fpu_enabled = false, | ||
364 | }; | ||
365 | struct xts_crypt_req req = { | ||
366 | .tbuf = buf, | ||
367 | .tbuflen = sizeof(buf), | ||
368 | |||
369 | .tweak_ctx = &ctx->tweak_ctx, | ||
370 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
371 | .crypt_ctx = &crypt_ctx, | ||
372 | .crypt_fn = decrypt_callback, | ||
373 | }; | ||
374 | int ret; | ||
375 | |||
376 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
377 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
378 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
379 | |||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | static struct crypto_alg serpent_algs[10] = { { | ||
384 | .cra_name = "__ecb-serpent-avx", | ||
385 | .cra_driver_name = "__driver-ecb-serpent-avx", | ||
386 | .cra_priority = 0, | ||
387 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
388 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
389 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
390 | .cra_alignmask = 0, | ||
391 | .cra_type = &crypto_blkcipher_type, | ||
392 | .cra_module = THIS_MODULE, | ||
393 | .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), | ||
394 | .cra_u = { | ||
395 | .blkcipher = { | ||
396 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
397 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
398 | .setkey = serpent_setkey, | ||
399 | .encrypt = ecb_encrypt, | ||
400 | .decrypt = ecb_decrypt, | ||
401 | }, | ||
402 | }, | ||
403 | }, { | ||
404 | .cra_name = "__cbc-serpent-avx", | ||
405 | .cra_driver_name = "__driver-cbc-serpent-avx", | ||
406 | .cra_priority = 0, | ||
407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
408 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
409 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
410 | .cra_alignmask = 0, | ||
411 | .cra_type = &crypto_blkcipher_type, | ||
412 | .cra_module = THIS_MODULE, | ||
413 | .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), | ||
414 | .cra_u = { | ||
415 | .blkcipher = { | ||
416 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
417 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
418 | .setkey = serpent_setkey, | ||
419 | .encrypt = cbc_encrypt, | ||
420 | .decrypt = cbc_decrypt, | ||
421 | }, | ||
422 | }, | ||
423 | }, { | ||
424 | .cra_name = "__ctr-serpent-avx", | ||
425 | .cra_driver_name = "__driver-ctr-serpent-avx", | ||
426 | .cra_priority = 0, | ||
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
428 | .cra_blocksize = 1, | ||
429 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
430 | .cra_alignmask = 0, | ||
431 | .cra_type = &crypto_blkcipher_type, | ||
432 | .cra_module = THIS_MODULE, | ||
433 | .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), | ||
434 | .cra_u = { | ||
435 | .blkcipher = { | ||
436 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
437 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
438 | .ivsize = SERPENT_BLOCK_SIZE, | ||
439 | .setkey = serpent_setkey, | ||
440 | .encrypt = ctr_crypt, | ||
441 | .decrypt = ctr_crypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "__lrw-serpent-avx", | ||
446 | .cra_driver_name = "__driver-lrw-serpent-avx", | ||
447 | .cra_priority = 0, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_blkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), | ||
455 | .cra_exit = lrw_exit_tfm, | ||
456 | .cra_u = { | ||
457 | .blkcipher = { | ||
458 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
459 | SERPENT_BLOCK_SIZE, | ||
460 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
461 | SERPENT_BLOCK_SIZE, | ||
462 | .ivsize = SERPENT_BLOCK_SIZE, | ||
463 | .setkey = lrw_serpent_setkey, | ||
464 | .encrypt = lrw_encrypt, | ||
465 | .decrypt = lrw_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "__xts-serpent-avx", | ||
470 | .cra_driver_name = "__driver-xts-serpent-avx", | ||
471 | .cra_priority = 0, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
475 | .cra_alignmask = 0, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = SERPENT_BLOCK_SIZE, | ||
484 | .setkey = xts_serpent_setkey, | ||
485 | .encrypt = xts_encrypt, | ||
486 | .decrypt = xts_decrypt, | ||
487 | }, | ||
488 | }, | ||
489 | }, { | ||
490 | .cra_name = "ecb(serpent)", | ||
491 | .cra_driver_name = "ecb-serpent-avx", | ||
492 | .cra_priority = 500, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 0, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), | ||
500 | .cra_init = ablk_init, | ||
501 | .cra_exit = ablk_exit, | ||
502 | .cra_u = { | ||
503 | .ablkcipher = { | ||
504 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
505 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
506 | .setkey = ablk_set_key, | ||
507 | .encrypt = ablk_encrypt, | ||
508 | .decrypt = ablk_decrypt, | ||
509 | }, | ||
510 | }, | ||
511 | }, { | ||
512 | .cra_name = "cbc(serpent)", | ||
513 | .cra_driver_name = "cbc-serpent-avx", | ||
514 | .cra_priority = 500, | ||
515 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
516 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
517 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
518 | .cra_alignmask = 0, | ||
519 | .cra_type = &crypto_ablkcipher_type, | ||
520 | .cra_module = THIS_MODULE, | ||
521 | .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), | ||
522 | .cra_init = ablk_init, | ||
523 | .cra_exit = ablk_exit, | ||
524 | .cra_u = { | ||
525 | .ablkcipher = { | ||
526 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
527 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
528 | .ivsize = SERPENT_BLOCK_SIZE, | ||
529 | .setkey = ablk_set_key, | ||
530 | .encrypt = __ablk_encrypt, | ||
531 | .decrypt = ablk_decrypt, | ||
532 | }, | ||
533 | }, | ||
534 | }, { | ||
535 | .cra_name = "ctr(serpent)", | ||
536 | .cra_driver_name = "ctr-serpent-avx", | ||
537 | .cra_priority = 500, | ||
538 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
539 | .cra_blocksize = 1, | ||
540 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
541 | .cra_alignmask = 0, | ||
542 | .cra_type = &crypto_ablkcipher_type, | ||
543 | .cra_module = THIS_MODULE, | ||
544 | .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), | ||
545 | .cra_init = ablk_init, | ||
546 | .cra_exit = ablk_exit, | ||
547 | .cra_u = { | ||
548 | .ablkcipher = { | ||
549 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
550 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
551 | .ivsize = SERPENT_BLOCK_SIZE, | ||
552 | .setkey = ablk_set_key, | ||
553 | .encrypt = ablk_encrypt, | ||
554 | .decrypt = ablk_encrypt, | ||
555 | .geniv = "chainiv", | ||
556 | }, | ||
557 | }, | ||
558 | }, { | ||
559 | .cra_name = "lrw(serpent)", | ||
560 | .cra_driver_name = "lrw-serpent-avx", | ||
561 | .cra_priority = 500, | ||
562 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
563 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
564 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
565 | .cra_alignmask = 0, | ||
566 | .cra_type = &crypto_ablkcipher_type, | ||
567 | .cra_module = THIS_MODULE, | ||
568 | .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), | ||
569 | .cra_init = ablk_init, | ||
570 | .cra_exit = ablk_exit, | ||
571 | .cra_u = { | ||
572 | .ablkcipher = { | ||
573 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
574 | SERPENT_BLOCK_SIZE, | ||
575 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
576 | SERPENT_BLOCK_SIZE, | ||
577 | .ivsize = SERPENT_BLOCK_SIZE, | ||
578 | .setkey = ablk_set_key, | ||
579 | .encrypt = ablk_encrypt, | ||
580 | .decrypt = ablk_decrypt, | ||
581 | }, | ||
582 | }, | ||
583 | }, { | ||
584 | .cra_name = "xts(serpent)", | ||
585 | .cra_driver_name = "xts-serpent-avx", | ||
586 | .cra_priority = 500, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_ablkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), | ||
594 | .cra_init = ablk_init, | ||
595 | .cra_exit = ablk_exit, | ||
596 | .cra_u = { | ||
597 | .ablkcipher = { | ||
598 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
600 | .ivsize = SERPENT_BLOCK_SIZE, | ||
601 | .setkey = ablk_set_key, | ||
602 | .encrypt = ablk_encrypt, | ||
603 | .decrypt = ablk_decrypt, | ||
604 | }, | ||
605 | }, | ||
606 | } }; | ||
607 | |||
608 | static int __init serpent_init(void) | ||
609 | { | ||
610 | u64 xcr0; | ||
611 | |||
612 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
613 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
614 | return -ENODEV; | ||
615 | } | ||
616 | |||
617 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
618 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
619 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
620 | return -ENODEV; | ||
621 | } | ||
622 | |||
623 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
624 | } | ||
625 | |||
626 | static void __exit serpent_exit(void) | ||
627 | { | ||
628 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
629 | } | ||
630 | |||
631 | module_init(serpent_init); | ||
632 | module_exit(serpent_exit); | ||
633 | |||
634 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be85e0a1..d679c8675f4a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -41,358 +41,145 @@ | |||
41 | #include <crypto/ctr.h> | 41 | #include <crypto/ctr.h> |
42 | #include <crypto/lrw.h> | 42 | #include <crypto/lrw.h> |
43 | #include <crypto/xts.h> | 43 | #include <crypto/xts.h> |
44 | #include <asm/i387.h> | 44 | #include <asm/crypto/serpent-sse2.h> |
45 | #include <asm/serpent.h> | 45 | #include <asm/crypto/ablk_helper.h> |
46 | #include <crypto/scatterwalk.h> | 46 | #include <asm/crypto/glue_helper.h> |
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | 47 | ||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | 48 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) |
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | 49 | { |
71 | if (fpu_enabled) | 50 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; |
72 | kernel_fpu_end(); | 51 | unsigned int j; |
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | 52 | ||
126 | serpent_fpu_end(fpu_enabled); | 53 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
127 | return err; | 54 | ivs[j] = src[j]; |
128 | } | ||
129 | 55 | ||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 56 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); |
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | 57 | ||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | 58 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
136 | return ecb_crypt(desc, &walk, true); | 59 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); |
137 | } | 60 | } |
138 | 61 | ||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 62 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | 63 | { |
142 | struct blkcipher_walk walk; | 64 | be128 ctrblk; |
143 | 65 | ||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | 66 | u128_to_be128(&ctrblk, iv); |
145 | return ecb_crypt(desc, &walk, false); | 67 | u128_inc(iv); |
146 | } | ||
147 | 68 | ||
148 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 69 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
149 | struct blkcipher_walk *walk) | 70 | u128_xor(dst, src, (u128 *)&ctrblk); |
150 | { | ||
151 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
153 | unsigned int nbytes = walk->nbytes; | ||
154 | u128 *src = (u128 *)walk->src.virt.addr; | ||
155 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
156 | u128 *iv = (u128 *)walk->iv; | ||
157 | |||
158 | do { | ||
159 | u128_xor(dst, src, iv); | ||
160 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
161 | iv = dst; | ||
162 | |||
163 | src += 1; | ||
164 | dst += 1; | ||
165 | nbytes -= bsize; | ||
166 | } while (nbytes >= bsize); | ||
167 | |||
168 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
169 | return nbytes; | ||
170 | } | 71 | } |
171 | 72 | ||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 73 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, |
173 | struct scatterlist *src, unsigned int nbytes) | 74 | u128 *iv) |
174 | { | 75 | { |
175 | struct blkcipher_walk walk; | 76 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; |
176 | int err; | 77 | unsigned int i; |
177 | 78 | ||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | 79 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { |
179 | err = blkcipher_walk_virt(desc, &walk); | 80 | if (dst != src) |
81 | dst[i] = src[i]; | ||
180 | 82 | ||
181 | while ((nbytes = walk.nbytes)) { | 83 | u128_to_be128(&ctrblks[i], iv); |
182 | nbytes = __cbc_encrypt(desc, &walk); | 84 | u128_inc(iv); |
183 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
184 | } | 85 | } |
185 | 86 | ||
186 | return err; | 87 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); |
187 | } | 88 | } |
188 | 89 | ||
189 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 90 | static const struct common_glue_ctx serpent_enc = { |
190 | struct blkcipher_walk *walk) | 91 | .num_funcs = 2, |
191 | { | 92 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
192 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
194 | unsigned int nbytes = walk->nbytes; | ||
195 | u128 *src = (u128 *)walk->src.virt.addr; | ||
196 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
197 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
198 | u128 last_iv; | ||
199 | int i; | ||
200 | |||
201 | /* Start of the last block. */ | ||
202 | src += nbytes / bsize - 1; | ||
203 | dst += nbytes / bsize - 1; | ||
204 | |||
205 | last_iv = *src; | ||
206 | |||
207 | /* Process multi-block batch */ | ||
208 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
209 | do { | ||
210 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
211 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
212 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
213 | |||
214 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
215 | ivs[i] = src[i]; | ||
216 | |||
217 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
218 | |||
219 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
220 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
221 | |||
222 | nbytes -= bsize; | ||
223 | if (nbytes < bsize) | ||
224 | goto done; | ||
225 | 93 | ||
226 | u128_xor(dst, dst, src - 1); | 94 | .funcs = { { |
227 | src -= 1; | 95 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
228 | dst -= 1; | 96 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } |
229 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 97 | }, { |
230 | 98 | .num_blocks = 1, | |
231 | if (nbytes < bsize) | 99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } |
232 | goto done; | 100 | } } |
233 | } | 101 | }; |
234 | |||
235 | /* Handle leftovers */ | ||
236 | for (;;) { | ||
237 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
238 | |||
239 | nbytes -= bsize; | ||
240 | if (nbytes < bsize) | ||
241 | break; | ||
242 | 102 | ||
243 | u128_xor(dst, dst, src - 1); | 103 | static const struct common_glue_ctx serpent_ctr = { |
244 | src -= 1; | 104 | .num_funcs = 2, |
245 | dst -= 1; | 105 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
246 | } | 106 | |
107 | .funcs = { { | ||
108 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
113 | } } | ||
114 | }; | ||
247 | 115 | ||
248 | done: | 116 | static const struct common_glue_ctx serpent_dec = { |
249 | u128_xor(dst, dst, (u128 *)walk->iv); | 117 | .num_funcs = 2, |
250 | *(u128 *)walk->iv = last_iv; | 118 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
119 | |||
120 | .funcs = { { | ||
121 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
123 | }, { | ||
124 | .num_blocks = 1, | ||
125 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
126 | } } | ||
127 | }; | ||
251 | 128 | ||
252 | return nbytes; | 129 | static const struct common_glue_ctx serpent_dec_cbc = { |
253 | } | 130 | .num_funcs = 2, |
131 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
132 | |||
133 | .funcs = { { | ||
134 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
136 | }, { | ||
137 | .num_blocks = 1, | ||
138 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
139 | } } | ||
140 | }; | ||
254 | 141 | ||
255 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 142 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
256 | struct scatterlist *src, unsigned int nbytes) | 143 | struct scatterlist *src, unsigned int nbytes) |
257 | { | 144 | { |
258 | bool fpu_enabled = false; | 145 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); |
259 | struct blkcipher_walk walk; | ||
260 | int err; | ||
261 | |||
262 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
263 | err = blkcipher_walk_virt(desc, &walk); | ||
264 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
265 | |||
266 | while ((nbytes = walk.nbytes)) { | ||
267 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
268 | nbytes = __cbc_decrypt(desc, &walk); | ||
269 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
270 | } | ||
271 | |||
272 | serpent_fpu_end(fpu_enabled); | ||
273 | return err; | ||
274 | } | 146 | } |
275 | 147 | ||
276 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 148 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
149 | struct scatterlist *src, unsigned int nbytes) | ||
277 | { | 150 | { |
278 | dst->a = cpu_to_be64(src->a); | 151 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); |
279 | dst->b = cpu_to_be64(src->b); | ||
280 | } | 152 | } |
281 | 153 | ||
282 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 154 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
283 | { | 156 | { |
284 | dst->a = be64_to_cpu(src->a); | 157 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, |
285 | dst->b = be64_to_cpu(src->b); | 158 | dst, src, nbytes); |
286 | } | 159 | } |
287 | 160 | ||
288 | static inline void u128_inc(u128 *i) | 161 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
162 | struct scatterlist *src, unsigned int nbytes) | ||
289 | { | 163 | { |
290 | i->b++; | 164 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, |
291 | if (!i->b) | 165 | nbytes); |
292 | i->a++; | ||
293 | } | 166 | } |
294 | 167 | ||
295 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 168 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
296 | struct blkcipher_walk *walk) | 169 | struct scatterlist *src, unsigned int nbytes) |
297 | { | 170 | { |
298 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 171 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); |
299 | u8 *ctrblk = walk->iv; | ||
300 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
301 | u8 *src = walk->src.virt.addr; | ||
302 | u8 *dst = walk->dst.virt.addr; | ||
303 | unsigned int nbytes = walk->nbytes; | ||
304 | |||
305 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
306 | crypto_xor(keystream, src, nbytes); | ||
307 | memcpy(dst, keystream, nbytes); | ||
308 | |||
309 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
310 | } | 172 | } |
311 | 173 | ||
312 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 174 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) |
313 | struct blkcipher_walk *walk) | ||
314 | { | 175 | { |
315 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, |
316 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 177 | NULL, fpu_enabled, nbytes); |
317 | unsigned int nbytes = walk->nbytes; | ||
318 | u128 *src = (u128 *)walk->src.virt.addr; | ||
319 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
320 | u128 ctrblk; | ||
321 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
322 | int i; | ||
323 | |||
324 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
325 | |||
326 | /* Process multi-block batch */ | ||
327 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
328 | do { | ||
329 | /* create ctrblks for parallel encrypt */ | ||
330 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
331 | if (dst != src) | ||
332 | dst[i] = src[i]; | ||
333 | |||
334 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
335 | u128_inc(&ctrblk); | ||
336 | } | ||
337 | |||
338 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
339 | (u8 *)ctrblocks); | ||
340 | |||
341 | src += SERPENT_PARALLEL_BLOCKS; | ||
342 | dst += SERPENT_PARALLEL_BLOCKS; | ||
343 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
344 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
345 | |||
346 | if (nbytes < bsize) | ||
347 | goto done; | ||
348 | } | ||
349 | |||
350 | /* Handle leftovers */ | ||
351 | do { | ||
352 | if (dst != src) | ||
353 | *dst = *src; | ||
354 | |||
355 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
356 | u128_inc(&ctrblk); | ||
357 | |||
358 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
359 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
360 | |||
361 | src += 1; | ||
362 | dst += 1; | ||
363 | nbytes -= bsize; | ||
364 | } while (nbytes >= bsize); | ||
365 | |||
366 | done: | ||
367 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
368 | return nbytes; | ||
369 | } | 178 | } |
370 | 179 | ||
371 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static inline void serpent_fpu_end(bool fpu_enabled) |
372 | struct scatterlist *src, unsigned int nbytes) | ||
373 | { | 181 | { |
374 | bool fpu_enabled = false; | 182 | glue_fpu_end(fpu_enabled); |
375 | struct blkcipher_walk walk; | ||
376 | int err; | ||
377 | |||
378 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
379 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
380 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
381 | |||
382 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
383 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
384 | nbytes = __ctr_crypt(desc, &walk); | ||
385 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
386 | } | ||
387 | |||
388 | serpent_fpu_end(fpu_enabled); | ||
389 | |||
390 | if (walk.nbytes) { | ||
391 | ctr_crypt_final(desc, &walk); | ||
392 | err = blkcipher_walk_done(desc, &walk, 0); | ||
393 | } | ||
394 | |||
395 | return err; | ||
396 | } | 183 | } |
397 | 184 | ||
398 | struct crypt_priv { | 185 | struct crypt_priv { |
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
596 | return ret; | 383 | return ret; |
597 | } | 384 | } |
598 | 385 | ||
599 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
600 | unsigned int key_len) | ||
601 | { | ||
602 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
603 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
604 | int err; | ||
605 | |||
606 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
607 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
608 | & CRYPTO_TFM_REQ_MASK); | ||
609 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
610 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
611 | & CRYPTO_TFM_RES_MASK); | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
616 | { | ||
617 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
618 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
619 | struct blkcipher_desc desc; | ||
620 | |||
621 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
622 | desc.info = req->info; | ||
623 | desc.flags = 0; | ||
624 | |||
625 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
626 | &desc, req->dst, req->src, req->nbytes); | ||
627 | } | ||
628 | |||
629 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
630 | { | ||
631 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
632 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
633 | |||
634 | if (!irq_fpu_usable()) { | ||
635 | struct ablkcipher_request *cryptd_req = | ||
636 | ablkcipher_request_ctx(req); | ||
637 | |||
638 | memcpy(cryptd_req, req, sizeof(*req)); | ||
639 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
640 | |||
641 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
642 | } else { | ||
643 | return __ablk_encrypt(req); | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
648 | { | ||
649 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
650 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
651 | |||
652 | if (!irq_fpu_usable()) { | ||
653 | struct ablkcipher_request *cryptd_req = | ||
654 | ablkcipher_request_ctx(req); | ||
655 | |||
656 | memcpy(cryptd_req, req, sizeof(*req)); | ||
657 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
658 | |||
659 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
660 | } else { | ||
661 | struct blkcipher_desc desc; | ||
662 | |||
663 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
664 | desc.info = req->info; | ||
665 | desc.flags = 0; | ||
666 | |||
667 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
668 | &desc, req->dst, req->src, req->nbytes); | ||
669 | } | ||
670 | } | ||
671 | |||
672 | static void ablk_exit(struct crypto_tfm *tfm) | ||
673 | { | ||
674 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
675 | |||
676 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
677 | } | ||
678 | |||
679 | static int ablk_init(struct crypto_tfm *tfm) | ||
680 | { | ||
681 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
682 | struct cryptd_ablkcipher *cryptd_tfm; | ||
683 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
684 | |||
685 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
686 | crypto_tfm_alg_driver_name(tfm)); | ||
687 | |||
688 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
689 | if (IS_ERR(cryptd_tfm)) | ||
690 | return PTR_ERR(cryptd_tfm); | ||
691 | |||
692 | ctx->cryptd_tfm = cryptd_tfm; | ||
693 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
694 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
695 | |||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | static struct crypto_alg serpent_algs[10] = { { | 386 | static struct crypto_alg serpent_algs[10] = { { |
700 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
701 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
808 | .cra_priority = 400, | 495 | .cra_priority = 400, |
809 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 496 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
810 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 497 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
811 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 498 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
812 | .cra_alignmask = 0, | 499 | .cra_alignmask = 0, |
813 | .cra_type = &crypto_ablkcipher_type, | 500 | .cra_type = &crypto_ablkcipher_type, |
814 | .cra_module = THIS_MODULE, | 501 | .cra_module = THIS_MODULE, |
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
830 | .cra_priority = 400, | 517 | .cra_priority = 400, |
831 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 518 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
832 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 519 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
833 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 520 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
834 | .cra_alignmask = 0, | 521 | .cra_alignmask = 0, |
835 | .cra_type = &crypto_ablkcipher_type, | 522 | .cra_type = &crypto_ablkcipher_type, |
836 | .cra_module = THIS_MODULE, | 523 | .cra_module = THIS_MODULE, |
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
853 | .cra_priority = 400, | 540 | .cra_priority = 400, |
854 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 541 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
855 | .cra_blocksize = 1, | 542 | .cra_blocksize = 1, |
856 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 543 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
857 | .cra_alignmask = 0, | 544 | .cra_alignmask = 0, |
858 | .cra_type = &crypto_ablkcipher_type, | 545 | .cra_type = &crypto_ablkcipher_type, |
859 | .cra_module = THIS_MODULE, | 546 | .cra_module = THIS_MODULE, |
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
877 | .cra_priority = 400, | 564 | .cra_priority = 400, |
878 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 565 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
879 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 566 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
880 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 567 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
881 | .cra_alignmask = 0, | 568 | .cra_alignmask = 0, |
882 | .cra_type = &crypto_ablkcipher_type, | 569 | .cra_type = &crypto_ablkcipher_type, |
883 | .cra_module = THIS_MODULE, | 570 | .cra_module = THIS_MODULE, |
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
902 | .cra_priority = 400, | 589 | .cra_priority = 400, |
903 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 590 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
904 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 591 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
905 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 592 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
906 | .cra_alignmask = 0, | 593 | .cra_alignmask = 0, |
907 | .cra_type = &crypto_ablkcipher_type, | 594 | .cra_type = &crypto_ablkcipher_type, |
908 | .cra_module = THIS_MODULE, | 595 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57d70e8..49d6987a73d9 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3 | |||
468 | */ | 468 | */ |
469 | SHA1_VECTOR_ASM sha1_transform_ssse3 | 469 | SHA1_VECTOR_ASM sha1_transform_ssse3 |
470 | 470 | ||
471 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 471 | #ifdef CONFIG_AS_AVX |
472 | 472 | ||
473 | .macro W_PRECALC_AVX | 473 | .macro W_PRECALC_AVX |
474 | 474 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499d0abe..4a11a9d72451 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
37 | unsigned int rounds); | 37 | unsigned int rounds); |
38 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 38 | #ifdef CONFIG_AS_AVX |
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | 39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
40 | unsigned int rounds); | 40 | unsigned int rounds); |
41 | #endif | 41 | #endif |
@@ -184,7 +184,7 @@ static struct shash_alg alg = { | |||
184 | } | 184 | } |
185 | }; | 185 | }; |
186 | 186 | ||
187 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 187 | #ifdef CONFIG_AS_AVX |
188 | static bool __init avx_usable(void) | 188 | static bool __init avx_usable(void) |
189 | { | 189 | { |
190 | u64 xcr0; | 190 | u64 xcr0; |
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) | |||
209 | if (cpu_has_ssse3) | 209 | if (cpu_has_ssse3) |
210 | sha1_transform_asm = sha1_transform_ssse3; | 210 | sha1_transform_asm = sha1_transform_ssse3; |
211 | 211 | ||
212 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 212 | #ifdef CONFIG_AS_AVX |
213 | /* allow AVX to override SSSE3, it's a little faster */ | 213 | /* allow AVX to override SSSE3, it's a little faster */ |
214 | if (avx_usable()) | 214 | if (avx_usable()) |
215 | sha1_transform_asm = sha1_transform_avx; | 215 | sha1_transform_asm = sha1_transform_avx; |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..35f45574390d --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | .file "twofish-avx-x86_64-asm_64.S" | ||
25 | .text | ||
26 | |||
27 | /* structure of crypto context */ | ||
28 | #define s0 0 | ||
29 | #define s1 1024 | ||
30 | #define s2 2048 | ||
31 | #define s3 3072 | ||
32 | #define w 4096 | ||
33 | #define k 4128 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 8-way AVX twofish | ||
37 | **********************************************************************/ | ||
38 | #define CTX %rdi | ||
39 | |||
40 | #define RA1 %xmm0 | ||
41 | #define RB1 %xmm1 | ||
42 | #define RC1 %xmm2 | ||
43 | #define RD1 %xmm3 | ||
44 | |||
45 | #define RA2 %xmm4 | ||
46 | #define RB2 %xmm5 | ||
47 | #define RC2 %xmm6 | ||
48 | #define RD2 %xmm7 | ||
49 | |||
50 | #define RX %xmm8 | ||
51 | #define RY %xmm9 | ||
52 | |||
53 | #define RK1 %xmm10 | ||
54 | #define RK2 %xmm11 | ||
55 | |||
56 | #define RID1 %rax | ||
57 | #define RID1b %al | ||
58 | #define RID2 %rbx | ||
59 | #define RID2b %bl | ||
60 | |||
61 | #define RGI1 %rdx | ||
62 | #define RGI1bl %dl | ||
63 | #define RGI1bh %dh | ||
64 | #define RGI2 %rcx | ||
65 | #define RGI2bl %cl | ||
66 | #define RGI2bh %ch | ||
67 | |||
68 | #define RGS1 %r8 | ||
69 | #define RGS1d %r8d | ||
70 | #define RGS2 %r9 | ||
71 | #define RGS2d %r9d | ||
72 | #define RGS3 %r10 | ||
73 | #define RGS3d %r10d | ||
74 | |||
75 | |||
76 | #define lookup_32bit(t0, t1, t2, t3, src, dst) \ | ||
77 | movb src ## bl, RID1b; \ | ||
78 | movb src ## bh, RID2b; \ | ||
79 | movl t0(CTX, RID1, 4), dst ## d; \ | ||
80 | xorl t1(CTX, RID2, 4), dst ## d; \ | ||
81 | shrq $16, src; \ | ||
82 | movb src ## bl, RID1b; \ | ||
83 | movb src ## bh, RID2b; \ | ||
84 | xorl t2(CTX, RID1, 4), dst ## d; \ | ||
85 | xorl t3(CTX, RID2, 4), dst ## d; | ||
86 | |||
87 | #define G(a, x, t0, t1, t2, t3) \ | ||
88 | vmovq a, RGI1; \ | ||
89 | vpsrldq $8, a, x; \ | ||
90 | vmovq x, RGI2; \ | ||
91 | \ | ||
92 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ | ||
93 | shrq $16, RGI1; \ | ||
94 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ | ||
95 | shlq $32, RGS2; \ | ||
96 | orq RGS1, RGS2; \ | ||
97 | \ | ||
98 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ | ||
99 | shrq $16, RGI2; \ | ||
100 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ | ||
101 | shlq $32, RGS3; \ | ||
102 | orq RGS1, RGS3; \ | ||
103 | \ | ||
104 | vmovq RGS2, x; \ | ||
105 | vpinsrq $1, RGS3, x, x; | ||
106 | |||
107 | #define encround(a, b, c, d, x, y) \ | ||
108 | G(a, x, s0, s1, s2, s3); \ | ||
109 | G(b, y, s1, s2, s3, s0); \ | ||
110 | vpaddd x, y, x; \ | ||
111 | vpaddd y, x, y; \ | ||
112 | vpaddd x, RK1, x; \ | ||
113 | vpaddd y, RK2, y; \ | ||
114 | vpxor x, c, c; \ | ||
115 | vpsrld $1, c, x; \ | ||
116 | vpslld $(32 - 1), c, c; \ | ||
117 | vpor c, x, c; \ | ||
118 | vpslld $1, d, x; \ | ||
119 | vpsrld $(32 - 1), d, d; \ | ||
120 | vpor d, x, d; \ | ||
121 | vpxor d, y, d; | ||
122 | |||
123 | #define decround(a, b, c, d, x, y) \ | ||
124 | G(a, x, s0, s1, s2, s3); \ | ||
125 | G(b, y, s1, s2, s3, s0); \ | ||
126 | vpaddd x, y, x; \ | ||
127 | vpaddd y, x, y; \ | ||
128 | vpaddd y, RK2, y; \ | ||
129 | vpxor d, y, d; \ | ||
130 | vpsrld $1, d, y; \ | ||
131 | vpslld $(32 - 1), d, d; \ | ||
132 | vpor d, y, d; \ | ||
133 | vpslld $1, c, y; \ | ||
134 | vpsrld $(32 - 1), c, c; \ | ||
135 | vpor c, y, c; \ | ||
136 | vpaddd x, RK1, x; \ | ||
137 | vpxor x, c, c; | ||
138 | |||
139 | #define encrypt_round(n, a, b, c, d) \ | ||
140 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
141 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
142 | encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
143 | encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
144 | |||
145 | #define decrypt_round(n, a, b, c, d) \ | ||
146 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
147 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
148 | decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
149 | decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
150 | |||
151 | #define encrypt_cycle(n) \ | ||
152 | encrypt_round((2*n), RA, RB, RC, RD); \ | ||
153 | encrypt_round(((2*n) + 1), RC, RD, RA, RB); | ||
154 | |||
155 | #define decrypt_cycle(n) \ | ||
156 | decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ | ||
157 | decrypt_round((2*n), RA, RB, RC, RD); | ||
158 | |||
159 | |||
160 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
161 | vpunpckldq x1, x0, t0; \ | ||
162 | vpunpckhdq x1, x0, t2; \ | ||
163 | vpunpckldq x3, x2, t1; \ | ||
164 | vpunpckhdq x3, x2, x3; \ | ||
165 | \ | ||
166 | vpunpcklqdq t1, t0, x0; \ | ||
167 | vpunpckhqdq t1, t0, x1; \ | ||
168 | vpunpcklqdq x3, t2, x2; \ | ||
169 | vpunpckhqdq x3, t2, x3; | ||
170 | |||
171 | #define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
172 | vpxor (0*4*4)(in), wkey, x0; \ | ||
173 | vpxor (1*4*4)(in), wkey, x1; \ | ||
174 | vpxor (2*4*4)(in), wkey, x2; \ | ||
175 | vpxor (3*4*4)(in), wkey, x3; \ | ||
176 | \ | ||
177 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
178 | |||
179 | #define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
180 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
181 | \ | ||
182 | vpxor x0, wkey, x0; \ | ||
183 | vmovdqu x0, (0*4*4)(out); \ | ||
184 | vpxor x1, wkey, x1; \ | ||
185 | vmovdqu x1, (1*4*4)(out); \ | ||
186 | vpxor x2, wkey, x2; \ | ||
187 | vmovdqu x2, (2*4*4)(out); \ | ||
188 | vpxor x3, wkey, x3; \ | ||
189 | vmovdqu x3, (3*4*4)(out); | ||
190 | |||
191 | #define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
192 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
193 | \ | ||
194 | vpxor x0, wkey, x0; \ | ||
195 | vpxor (0*4*4)(out), x0, x0; \ | ||
196 | vmovdqu x0, (0*4*4)(out); \ | ||
197 | vpxor x1, wkey, x1; \ | ||
198 | vpxor (1*4*4)(out), x1, x1; \ | ||
199 | vmovdqu x1, (1*4*4)(out); \ | ||
200 | vpxor x2, wkey, x2; \ | ||
201 | vpxor (2*4*4)(out), x2, x2; \ | ||
202 | vmovdqu x2, (2*4*4)(out); \ | ||
203 | vpxor x3, wkey, x3; \ | ||
204 | vpxor (3*4*4)(out), x3, x3; \ | ||
205 | vmovdqu x3, (3*4*4)(out); | ||
206 | |||
207 | .align 8 | ||
208 | .global __twofish_enc_blk_8way | ||
209 | .type __twofish_enc_blk_8way,@function; | ||
210 | |||
211 | __twofish_enc_blk_8way: | ||
212 | /* input: | ||
213 | * %rdi: ctx, CTX | ||
214 | * %rsi: dst | ||
215 | * %rdx: src | ||
216 | * %rcx: bool, if true: xor output | ||
217 | */ | ||
218 | |||
219 | pushq %rbx; | ||
220 | pushq %rcx; | ||
221 | |||
222 | vmovdqu w(CTX), RK1; | ||
223 | |||
224 | leaq (4*4*4)(%rdx), %rax; | ||
225 | inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
226 | inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
227 | |||
228 | xorq RID1, RID1; | ||
229 | xorq RID2, RID2; | ||
230 | |||
231 | encrypt_cycle(0); | ||
232 | encrypt_cycle(1); | ||
233 | encrypt_cycle(2); | ||
234 | encrypt_cycle(3); | ||
235 | encrypt_cycle(4); | ||
236 | encrypt_cycle(5); | ||
237 | encrypt_cycle(6); | ||
238 | encrypt_cycle(7); | ||
239 | |||
240 | vmovdqu (w+4*4)(CTX), RK1; | ||
241 | |||
242 | popq %rcx; | ||
243 | popq %rbx; | ||
244 | |||
245 | leaq (4*4*4)(%rsi), %rax; | ||
246 | |||
247 | testb %cl, %cl; | ||
248 | jnz __enc_xor8; | ||
249 | |||
250 | outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
251 | outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
252 | |||
253 | ret; | ||
254 | |||
255 | __enc_xor8: | ||
256 | outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
257 | outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
258 | |||
259 | ret; | ||
260 | |||
261 | .align 8 | ||
262 | .global twofish_dec_blk_8way | ||
263 | .type twofish_dec_blk_8way,@function; | ||
264 | |||
265 | twofish_dec_blk_8way: | ||
266 | /* input: | ||
267 | * %rdi: ctx, CTX | ||
268 | * %rsi: dst | ||
269 | * %rdx: src | ||
270 | */ | ||
271 | |||
272 | pushq %rbx; | ||
273 | |||
274 | vmovdqu (w+4*4)(CTX), RK1; | ||
275 | |||
276 | leaq (4*4*4)(%rdx), %rax; | ||
277 | inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
278 | inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
279 | |||
280 | xorq RID1, RID1; | ||
281 | xorq RID2, RID2; | ||
282 | |||
283 | decrypt_cycle(7); | ||
284 | decrypt_cycle(6); | ||
285 | decrypt_cycle(5); | ||
286 | decrypt_cycle(4); | ||
287 | decrypt_cycle(3); | ||
288 | decrypt_cycle(2); | ||
289 | decrypt_cycle(1); | ||
290 | decrypt_cycle(0); | ||
291 | |||
292 | vmovdqu (w)(CTX), RK1; | ||
293 | |||
294 | popq %rbx; | ||
295 | |||
296 | leaq (4*4*4)(%rsi), %rax; | ||
297 | outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
298 | outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
299 | |||
300 | ret; | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 000000000000..782b67ddaf6a --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -0,0 +1,624 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler version of Twofish Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/twofish.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/i387.h> | ||
37 | #include <asm/xcr.h> | ||
38 | #include <asm/xsave.h> | ||
39 | #include <asm/crypto/twofish.h> | ||
40 | #include <asm/crypto/ablk_helper.h> | ||
41 | #include <asm/crypto/glue_helper.h> | ||
42 | #include <crypto/scatterwalk.h> | ||
43 | #include <linux/workqueue.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #define TWOFISH_PARALLEL_BLOCKS 8 | ||
47 | |||
48 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src) | ||
50 | { | ||
51 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
52 | } | ||
53 | |||
54 | /* 8-way parallel cipher functions */ | ||
55 | asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, bool xor); | ||
57 | asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
58 | const u8 *src); | ||
59 | |||
60 | static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
61 | const u8 *src) | ||
62 | { | ||
63 | __twofish_enc_blk_8way(ctx, dst, src, false); | ||
64 | } | ||
65 | |||
66 | static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, | ||
67 | const u8 *src) | ||
68 | { | ||
69 | __twofish_enc_blk_8way(ctx, dst, src, true); | ||
70 | } | ||
71 | |||
72 | static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
73 | const u8 *src) | ||
74 | { | ||
75 | twofish_dec_blk_8way(ctx, dst, src); | ||
76 | } | ||
77 | |||
78 | static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
79 | { | ||
80 | u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; | ||
81 | unsigned int j; | ||
82 | |||
83 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
84 | ivs[j] = src[j]; | ||
85 | |||
86 | twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
87 | |||
88 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
89 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
90 | } | ||
91 | |||
92 | static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
93 | u128 *iv) | ||
94 | { | ||
95 | be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; | ||
96 | unsigned int i; | ||
97 | |||
98 | for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { | ||
99 | if (dst != src) | ||
100 | dst[i] = src[i]; | ||
101 | |||
102 | u128_to_be128(&ctrblks[i], iv); | ||
103 | u128_inc(iv); | ||
104 | } | ||
105 | |||
106 | twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
107 | } | ||
108 | |||
109 | static const struct common_glue_ctx twofish_enc = { | ||
110 | .num_funcs = 3, | ||
111 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_ctr = { | ||
126 | .num_funcs = 3, | ||
127 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
131 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } | ||
132 | }, { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
138 | } } | ||
139 | }; | ||
140 | |||
141 | static const struct common_glue_ctx twofish_dec = { | ||
142 | .num_funcs = 3, | ||
143 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
147 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } | ||
148 | }, { | ||
149 | .num_blocks = 3, | ||
150 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
151 | }, { | ||
152 | .num_blocks = 1, | ||
153 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
154 | } } | ||
155 | }; | ||
156 | |||
157 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
158 | .num_funcs = 3, | ||
159 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
160 | |||
161 | .funcs = { { | ||
162 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
163 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } | ||
164 | }, { | ||
165 | .num_blocks = 3, | ||
166 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
167 | }, { | ||
168 | .num_blocks = 1, | ||
169 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
170 | } } | ||
171 | }; | ||
172 | |||
173 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
189 | dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
193 | struct scatterlist *src, unsigned int nbytes) | ||
194 | { | ||
195 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
196 | nbytes); | ||
197 | } | ||
198 | |||
199 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
200 | struct scatterlist *src, unsigned int nbytes) | ||
201 | { | ||
202 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
203 | } | ||
204 | |||
205 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
206 | { | ||
207 | return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, | ||
208 | fpu_enabled, nbytes); | ||
209 | } | ||
210 | |||
211 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
212 | { | ||
213 | glue_fpu_end(fpu_enabled); | ||
214 | } | ||
215 | |||
216 | struct crypt_priv { | ||
217 | struct twofish_ctx *ctx; | ||
218 | bool fpu_enabled; | ||
219 | }; | ||
220 | |||
221 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
222 | { | ||
223 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
224 | struct crypt_priv *ctx = priv; | ||
225 | int i; | ||
226 | |||
227 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
228 | |||
229 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
230 | twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
235 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
236 | |||
237 | nbytes %= bsize * 3; | ||
238 | |||
239 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
240 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
241 | } | ||
242 | |||
243 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
244 | { | ||
245 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
246 | struct crypt_priv *ctx = priv; | ||
247 | int i; | ||
248 | |||
249 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
250 | |||
251 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
252 | twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
257 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
258 | |||
259 | nbytes %= bsize * 3; | ||
260 | |||
261 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
262 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
263 | } | ||
264 | |||
265 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
266 | struct scatterlist *src, unsigned int nbytes) | ||
267 | { | ||
268 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
269 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
270 | struct crypt_priv crypt_ctx = { | ||
271 | .ctx = &ctx->twofish_ctx, | ||
272 | .fpu_enabled = false, | ||
273 | }; | ||
274 | struct lrw_crypt_req req = { | ||
275 | .tbuf = buf, | ||
276 | .tbuflen = sizeof(buf), | ||
277 | |||
278 | .table_ctx = &ctx->lrw_table, | ||
279 | .crypt_ctx = &crypt_ctx, | ||
280 | .crypt_fn = encrypt_callback, | ||
281 | }; | ||
282 | int ret; | ||
283 | |||
284 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
285 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
286 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
287 | |||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
296 | struct crypt_priv crypt_ctx = { | ||
297 | .ctx = &ctx->twofish_ctx, | ||
298 | .fpu_enabled = false, | ||
299 | }; | ||
300 | struct lrw_crypt_req req = { | ||
301 | .tbuf = buf, | ||
302 | .tbuflen = sizeof(buf), | ||
303 | |||
304 | .table_ctx = &ctx->lrw_table, | ||
305 | .crypt_ctx = &crypt_ctx, | ||
306 | .crypt_fn = decrypt_callback, | ||
307 | }; | ||
308 | int ret; | ||
309 | |||
310 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
311 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
312 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
318 | struct scatterlist *src, unsigned int nbytes) | ||
319 | { | ||
320 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
321 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
322 | struct crypt_priv crypt_ctx = { | ||
323 | .ctx = &ctx->crypt_ctx, | ||
324 | .fpu_enabled = false, | ||
325 | }; | ||
326 | struct xts_crypt_req req = { | ||
327 | .tbuf = buf, | ||
328 | .tbuflen = sizeof(buf), | ||
329 | |||
330 | .tweak_ctx = &ctx->tweak_ctx, | ||
331 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
332 | .crypt_ctx = &crypt_ctx, | ||
333 | .crypt_fn = encrypt_callback, | ||
334 | }; | ||
335 | int ret; | ||
336 | |||
337 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
338 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
339 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
340 | |||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
345 | struct scatterlist *src, unsigned int nbytes) | ||
346 | { | ||
347 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
348 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
349 | struct crypt_priv crypt_ctx = { | ||
350 | .ctx = &ctx->crypt_ctx, | ||
351 | .fpu_enabled = false, | ||
352 | }; | ||
353 | struct xts_crypt_req req = { | ||
354 | .tbuf = buf, | ||
355 | .tbuflen = sizeof(buf), | ||
356 | |||
357 | .tweak_ctx = &ctx->tweak_ctx, | ||
358 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
359 | .crypt_ctx = &crypt_ctx, | ||
360 | .crypt_fn = decrypt_callback, | ||
361 | }; | ||
362 | int ret; | ||
363 | |||
364 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
365 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
366 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
367 | |||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static struct crypto_alg twofish_algs[10] = { { | ||
372 | .cra_name = "__ecb-twofish-avx", | ||
373 | .cra_driver_name = "__driver-ecb-twofish-avx", | ||
374 | .cra_priority = 0, | ||
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
376 | .cra_blocksize = TF_BLOCK_SIZE, | ||
377 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
378 | .cra_alignmask = 0, | ||
379 | .cra_type = &crypto_blkcipher_type, | ||
380 | .cra_module = THIS_MODULE, | ||
381 | .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), | ||
382 | .cra_u = { | ||
383 | .blkcipher = { | ||
384 | .min_keysize = TF_MIN_KEY_SIZE, | ||
385 | .max_keysize = TF_MAX_KEY_SIZE, | ||
386 | .setkey = twofish_setkey, | ||
387 | .encrypt = ecb_encrypt, | ||
388 | .decrypt = ecb_decrypt, | ||
389 | }, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__cbc-twofish-avx", | ||
393 | .cra_driver_name = "__driver-cbc-twofish-avx", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
396 | .cra_blocksize = TF_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_type = &crypto_blkcipher_type, | ||
400 | .cra_module = THIS_MODULE, | ||
401 | .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), | ||
402 | .cra_u = { | ||
403 | .blkcipher = { | ||
404 | .min_keysize = TF_MIN_KEY_SIZE, | ||
405 | .max_keysize = TF_MAX_KEY_SIZE, | ||
406 | .setkey = twofish_setkey, | ||
407 | .encrypt = cbc_encrypt, | ||
408 | .decrypt = cbc_decrypt, | ||
409 | }, | ||
410 | }, | ||
411 | }, { | ||
412 | .cra_name = "__ctr-twofish-avx", | ||
413 | .cra_driver_name = "__driver-ctr-twofish-avx", | ||
414 | .cra_priority = 0, | ||
415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
416 | .cra_blocksize = 1, | ||
417 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
418 | .cra_alignmask = 0, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = TF_MIN_KEY_SIZE, | ||
425 | .max_keysize = TF_MAX_KEY_SIZE, | ||
426 | .ivsize = TF_BLOCK_SIZE, | ||
427 | .setkey = twofish_setkey, | ||
428 | .encrypt = ctr_crypt, | ||
429 | .decrypt = ctr_crypt, | ||
430 | }, | ||
431 | }, | ||
432 | }, { | ||
433 | .cra_name = "__lrw-twofish-avx", | ||
434 | .cra_driver_name = "__driver-lrw-twofish-avx", | ||
435 | .cra_priority = 0, | ||
436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
437 | .cra_blocksize = TF_BLOCK_SIZE, | ||
438 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
439 | .cra_alignmask = 0, | ||
440 | .cra_type = &crypto_blkcipher_type, | ||
441 | .cra_module = THIS_MODULE, | ||
442 | .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), | ||
443 | .cra_exit = lrw_twofish_exit_tfm, | ||
444 | .cra_u = { | ||
445 | .blkcipher = { | ||
446 | .min_keysize = TF_MIN_KEY_SIZE + | ||
447 | TF_BLOCK_SIZE, | ||
448 | .max_keysize = TF_MAX_KEY_SIZE + | ||
449 | TF_BLOCK_SIZE, | ||
450 | .ivsize = TF_BLOCK_SIZE, | ||
451 | .setkey = lrw_twofish_setkey, | ||
452 | .encrypt = lrw_encrypt, | ||
453 | .decrypt = lrw_decrypt, | ||
454 | }, | ||
455 | }, | ||
456 | }, { | ||
457 | .cra_name = "__xts-twofish-avx", | ||
458 | .cra_driver_name = "__driver-xts-twofish-avx", | ||
459 | .cra_priority = 0, | ||
460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
461 | .cra_blocksize = TF_BLOCK_SIZE, | ||
462 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
463 | .cra_alignmask = 0, | ||
464 | .cra_type = &crypto_blkcipher_type, | ||
465 | .cra_module = THIS_MODULE, | ||
466 | .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), | ||
467 | .cra_u = { | ||
468 | .blkcipher = { | ||
469 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
470 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
471 | .ivsize = TF_BLOCK_SIZE, | ||
472 | .setkey = xts_twofish_setkey, | ||
473 | .encrypt = xts_encrypt, | ||
474 | .decrypt = xts_decrypt, | ||
475 | }, | ||
476 | }, | ||
477 | }, { | ||
478 | .cra_name = "ecb(twofish)", | ||
479 | .cra_driver_name = "ecb-twofish-avx", | ||
480 | .cra_priority = 400, | ||
481 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
482 | .cra_blocksize = TF_BLOCK_SIZE, | ||
483 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
484 | .cra_alignmask = 0, | ||
485 | .cra_type = &crypto_ablkcipher_type, | ||
486 | .cra_module = THIS_MODULE, | ||
487 | .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), | ||
488 | .cra_init = ablk_init, | ||
489 | .cra_exit = ablk_exit, | ||
490 | .cra_u = { | ||
491 | .ablkcipher = { | ||
492 | .min_keysize = TF_MIN_KEY_SIZE, | ||
493 | .max_keysize = TF_MAX_KEY_SIZE, | ||
494 | .setkey = ablk_set_key, | ||
495 | .encrypt = ablk_encrypt, | ||
496 | .decrypt = ablk_decrypt, | ||
497 | }, | ||
498 | }, | ||
499 | }, { | ||
500 | .cra_name = "cbc(twofish)", | ||
501 | .cra_driver_name = "cbc-twofish-avx", | ||
502 | .cra_priority = 400, | ||
503 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
504 | .cra_blocksize = TF_BLOCK_SIZE, | ||
505 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
506 | .cra_alignmask = 0, | ||
507 | .cra_type = &crypto_ablkcipher_type, | ||
508 | .cra_module = THIS_MODULE, | ||
509 | .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), | ||
510 | .cra_init = ablk_init, | ||
511 | .cra_exit = ablk_exit, | ||
512 | .cra_u = { | ||
513 | .ablkcipher = { | ||
514 | .min_keysize = TF_MIN_KEY_SIZE, | ||
515 | .max_keysize = TF_MAX_KEY_SIZE, | ||
516 | .ivsize = TF_BLOCK_SIZE, | ||
517 | .setkey = ablk_set_key, | ||
518 | .encrypt = __ablk_encrypt, | ||
519 | .decrypt = ablk_decrypt, | ||
520 | }, | ||
521 | }, | ||
522 | }, { | ||
523 | .cra_name = "ctr(twofish)", | ||
524 | .cra_driver_name = "ctr-twofish-avx", | ||
525 | .cra_priority = 400, | ||
526 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
527 | .cra_blocksize = 1, | ||
528 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
529 | .cra_alignmask = 0, | ||
530 | .cra_type = &crypto_ablkcipher_type, | ||
531 | .cra_module = THIS_MODULE, | ||
532 | .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), | ||
533 | .cra_init = ablk_init, | ||
534 | .cra_exit = ablk_exit, | ||
535 | .cra_u = { | ||
536 | .ablkcipher = { | ||
537 | .min_keysize = TF_MIN_KEY_SIZE, | ||
538 | .max_keysize = TF_MAX_KEY_SIZE, | ||
539 | .ivsize = TF_BLOCK_SIZE, | ||
540 | .setkey = ablk_set_key, | ||
541 | .encrypt = ablk_encrypt, | ||
542 | .decrypt = ablk_encrypt, | ||
543 | .geniv = "chainiv", | ||
544 | }, | ||
545 | }, | ||
546 | }, { | ||
547 | .cra_name = "lrw(twofish)", | ||
548 | .cra_driver_name = "lrw-twofish-avx", | ||
549 | .cra_priority = 400, | ||
550 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
551 | .cra_blocksize = TF_BLOCK_SIZE, | ||
552 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
553 | .cra_alignmask = 0, | ||
554 | .cra_type = &crypto_ablkcipher_type, | ||
555 | .cra_module = THIS_MODULE, | ||
556 | .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), | ||
557 | .cra_init = ablk_init, | ||
558 | .cra_exit = ablk_exit, | ||
559 | .cra_u = { | ||
560 | .ablkcipher = { | ||
561 | .min_keysize = TF_MIN_KEY_SIZE + | ||
562 | TF_BLOCK_SIZE, | ||
563 | .max_keysize = TF_MAX_KEY_SIZE + | ||
564 | TF_BLOCK_SIZE, | ||
565 | .ivsize = TF_BLOCK_SIZE, | ||
566 | .setkey = ablk_set_key, | ||
567 | .encrypt = ablk_encrypt, | ||
568 | .decrypt = ablk_decrypt, | ||
569 | }, | ||
570 | }, | ||
571 | }, { | ||
572 | .cra_name = "xts(twofish)", | ||
573 | .cra_driver_name = "xts-twofish-avx", | ||
574 | .cra_priority = 400, | ||
575 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
576 | .cra_blocksize = TF_BLOCK_SIZE, | ||
577 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
578 | .cra_alignmask = 0, | ||
579 | .cra_type = &crypto_ablkcipher_type, | ||
580 | .cra_module = THIS_MODULE, | ||
581 | .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), | ||
582 | .cra_init = ablk_init, | ||
583 | .cra_exit = ablk_exit, | ||
584 | .cra_u = { | ||
585 | .ablkcipher = { | ||
586 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
587 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
588 | .ivsize = TF_BLOCK_SIZE, | ||
589 | .setkey = ablk_set_key, | ||
590 | .encrypt = ablk_encrypt, | ||
591 | .decrypt = ablk_decrypt, | ||
592 | }, | ||
593 | }, | ||
594 | } }; | ||
595 | |||
596 | static int __init twofish_init(void) | ||
597 | { | ||
598 | u64 xcr0; | ||
599 | |||
600 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
601 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
602 | return -ENODEV; | ||
603 | } | ||
604 | |||
605 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
606 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
607 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
608 | return -ENODEV; | ||
609 | } | ||
610 | |||
611 | return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
612 | } | ||
613 | |||
614 | static void __exit twofish_exit(void) | ||
615 | { | ||
616 | crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
617 | } | ||
618 | |||
619 | module_init(twofish_init); | ||
620 | module_exit(twofish_exit); | ||
621 | |||
622 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); | ||
623 | MODULE_LICENSE("GPL"); | ||
624 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24cce31..15f9347316c8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -3,11 +3,6 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
@@ -33,20 +28,13 @@ | |||
33 | #include <crypto/algapi.h> | 28 | #include <crypto/algapi.h> |
34 | #include <crypto/twofish.h> | 29 | #include <crypto/twofish.h> |
35 | #include <crypto/b128ops.h> | 30 | #include <crypto/b128ops.h> |
31 | #include <asm/crypto/twofish.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
36 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
37 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
38 | 35 | ||
39 | /* regular block cipher functions from twofish_x86_64 module */ | 36 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
40 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 37 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
41 | const u8 *src); | ||
42 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | |||
45 | /* 3-way parallel cipher functions */ | ||
46 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src, bool xor); | ||
48 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src); | ||
50 | 38 | ||
51 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 39 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src) | 40 | const u8 *src) |
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | |||
60 | __twofish_enc_blk_3way(ctx, dst, src, true); | 48 | __twofish_enc_blk_3way(ctx, dst, src, true); |
61 | } | 49 | } |
62 | 50 | ||
63 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 51 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) |
64 | void (*fn)(struct twofish_ctx *, u8 *, const u8 *), | ||
65 | void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) | ||
66 | { | ||
67 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
68 | unsigned int bsize = TF_BLOCK_SIZE; | ||
69 | unsigned int nbytes; | ||
70 | int err; | ||
71 | |||
72 | err = blkcipher_walk_virt(desc, walk); | ||
73 | |||
74 | while ((nbytes = walk->nbytes)) { | ||
75 | u8 *wsrc = walk->src.virt.addr; | ||
76 | u8 *wdst = walk->dst.virt.addr; | ||
77 | |||
78 | /* Process three block batch */ | ||
79 | if (nbytes >= bsize * 3) { | ||
80 | do { | ||
81 | fn_3way(ctx, wdst, wsrc); | ||
82 | |||
83 | wsrc += bsize * 3; | ||
84 | wdst += bsize * 3; | ||
85 | nbytes -= bsize * 3; | ||
86 | } while (nbytes >= bsize * 3); | ||
87 | |||
88 | if (nbytes < bsize) | ||
89 | goto done; | ||
90 | } | ||
91 | |||
92 | /* Handle leftovers */ | ||
93 | do { | ||
94 | fn(ctx, wdst, wsrc); | ||
95 | |||
96 | wsrc += bsize; | ||
97 | wdst += bsize; | ||
98 | nbytes -= bsize; | ||
99 | } while (nbytes >= bsize); | ||
100 | |||
101 | done: | ||
102 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
103 | } | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
109 | struct scatterlist *src, unsigned int nbytes) | ||
110 | { | 52 | { |
111 | struct blkcipher_walk walk; | 53 | u128 ivs[2]; |
112 | 54 | ||
113 | blkcipher_walk_init(&walk, dst, src, nbytes); | 55 | ivs[0] = src[0]; |
114 | return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); | 56 | ivs[1] = src[1]; |
115 | } | ||
116 | 57 | ||
117 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 58 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); |
118 | struct scatterlist *src, unsigned int nbytes) | ||
119 | { | ||
120 | struct blkcipher_walk walk; | ||
121 | 59 | ||
122 | blkcipher_walk_init(&walk, dst, src, nbytes); | 60 | u128_xor(&dst[1], &dst[1], &ivs[0]); |
123 | return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); | 61 | u128_xor(&dst[2], &dst[2], &ivs[1]); |
124 | } | 62 | } |
63 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); | ||
125 | 64 | ||
126 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 65 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
127 | struct blkcipher_walk *walk) | ||
128 | { | ||
129 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
130 | unsigned int bsize = TF_BLOCK_SIZE; | ||
131 | unsigned int nbytes = walk->nbytes; | ||
132 | u128 *src = (u128 *)walk->src.virt.addr; | ||
133 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
134 | u128 *iv = (u128 *)walk->iv; | ||
135 | |||
136 | do { | ||
137 | u128_xor(dst, src, iv); | ||
138 | twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
139 | iv = dst; | ||
140 | |||
141 | src += 1; | ||
142 | dst += 1; | ||
143 | nbytes -= bsize; | ||
144 | } while (nbytes >= bsize); | ||
145 | |||
146 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
147 | return nbytes; | ||
148 | } | ||
149 | |||
150 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
151 | struct scatterlist *src, unsigned int nbytes) | ||
152 | { | 66 | { |
153 | struct blkcipher_walk walk; | 67 | be128 ctrblk; |
154 | int err; | ||
155 | 68 | ||
156 | blkcipher_walk_init(&walk, dst, src, nbytes); | 69 | if (dst != src) |
157 | err = blkcipher_walk_virt(desc, &walk); | 70 | *dst = *src; |
158 | 71 | ||
159 | while ((nbytes = walk.nbytes)) { | 72 | u128_to_be128(&ctrblk, iv); |
160 | nbytes = __cbc_encrypt(desc, &walk); | 73 | u128_inc(iv); |
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | 74 | ||
164 | return err; | 75 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
76 | u128_xor(dst, dst, (u128 *)&ctrblk); | ||
165 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); | ||
166 | 79 | ||
167 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 80 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, |
168 | struct blkcipher_walk *walk) | 81 | u128 *iv) |
169 | { | 82 | { |
170 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 83 | be128 ctrblks[3]; |
171 | unsigned int bsize = TF_BLOCK_SIZE; | ||
172 | unsigned int nbytes = walk->nbytes; | ||
173 | u128 *src = (u128 *)walk->src.virt.addr; | ||
174 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
175 | u128 ivs[3 - 1]; | ||
176 | u128 last_iv; | ||
177 | |||
178 | /* Start of the last block. */ | ||
179 | src += nbytes / bsize - 1; | ||
180 | dst += nbytes / bsize - 1; | ||
181 | |||
182 | last_iv = *src; | ||
183 | |||
184 | /* Process three block batch */ | ||
185 | if (nbytes >= bsize * 3) { | ||
186 | do { | ||
187 | nbytes -= bsize * (3 - 1); | ||
188 | src -= 3 - 1; | ||
189 | dst -= 3 - 1; | ||
190 | |||
191 | ivs[0] = src[0]; | ||
192 | ivs[1] = src[1]; | ||
193 | |||
194 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
195 | |||
196 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
197 | u128_xor(dst + 2, dst + 2, ivs + 1); | ||
198 | |||
199 | nbytes -= bsize; | ||
200 | if (nbytes < bsize) | ||
201 | goto done; | ||
202 | |||
203 | u128_xor(dst, dst, src - 1); | ||
204 | src -= 1; | ||
205 | dst -= 1; | ||
206 | } while (nbytes >= bsize * 3); | ||
207 | |||
208 | if (nbytes < bsize) | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | /* Handle leftovers */ | ||
213 | for (;;) { | ||
214 | twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
215 | |||
216 | nbytes -= bsize; | ||
217 | if (nbytes < bsize) | ||
218 | break; | ||
219 | 84 | ||
220 | u128_xor(dst, dst, src - 1); | 85 | if (dst != src) { |
221 | src -= 1; | 86 | dst[0] = src[0]; |
222 | dst -= 1; | 87 | dst[1] = src[1]; |
88 | dst[2] = src[2]; | ||
223 | } | 89 | } |
224 | 90 | ||
225 | done: | 91 | u128_to_be128(&ctrblks[0], iv); |
226 | u128_xor(dst, dst, (u128 *)walk->iv); | 92 | u128_inc(iv); |
227 | *(u128 *)walk->iv = last_iv; | 93 | u128_to_be128(&ctrblks[1], iv); |
94 | u128_inc(iv); | ||
95 | u128_to_be128(&ctrblks[2], iv); | ||
96 | u128_inc(iv); | ||
228 | 97 | ||
229 | return nbytes; | 98 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); |
230 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); | ||
101 | |||
102 | static const struct common_glue_ctx twofish_enc = { | ||
103 | .num_funcs = 2, | ||
104 | .fpu_blocks_limit = -1, | ||
105 | |||
106 | .funcs = { { | ||
107 | .num_blocks = 3, | ||
108 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
109 | }, { | ||
110 | .num_blocks = 1, | ||
111 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
112 | } } | ||
113 | }; | ||
231 | 114 | ||
232 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 115 | static const struct common_glue_ctx twofish_ctr = { |
233 | struct scatterlist *src, unsigned int nbytes) | 116 | .num_funcs = 2, |
234 | { | 117 | .fpu_blocks_limit = -1, |
235 | struct blkcipher_walk walk; | 118 | |
236 | int err; | 119 | .funcs = { { |
237 | 120 | .num_blocks = 3, | |
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | 121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } |
239 | err = blkcipher_walk_virt(desc, &walk); | 122 | }, { |
123 | .num_blocks = 1, | ||
124 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | ||
125 | } } | ||
126 | }; | ||
240 | 127 | ||
241 | while ((nbytes = walk.nbytes)) { | 128 | static const struct common_glue_ctx twofish_dec = { |
242 | nbytes = __cbc_decrypt(desc, &walk); | 129 | .num_funcs = 2, |
243 | err = blkcipher_walk_done(desc, &walk, nbytes); | 130 | .fpu_blocks_limit = -1, |
244 | } | 131 | |
132 | .funcs = { { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
138 | } } | ||
139 | }; | ||
245 | 140 | ||
246 | return err; | 141 | static const struct common_glue_ctx twofish_dec_cbc = { |
247 | } | 142 | .num_funcs = 2, |
143 | .fpu_blocks_limit = -1, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = 3, | ||
147 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
151 | } } | ||
152 | }; | ||
248 | 153 | ||
249 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | 156 | { |
251 | dst->a = cpu_to_be64(src->a); | 157 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); |
252 | dst->b = cpu_to_be64(src->b); | ||
253 | } | 158 | } |
254 | 159 | ||
255 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
161 | struct scatterlist *src, unsigned int nbytes) | ||
256 | { | 162 | { |
257 | dst->a = be64_to_cpu(src->a); | 163 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); |
258 | dst->b = be64_to_cpu(src->b); | ||
259 | } | 164 | } |
260 | 165 | ||
261 | static inline void u128_inc(u128 *i) | 166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
167 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | 168 | { |
263 | i->b++; | 169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, |
264 | if (!i->b) | 170 | dst, src, nbytes); |
265 | i->a++; | ||
266 | } | 171 | } |
267 | 172 | ||
268 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
269 | struct blkcipher_walk *walk) | 174 | struct scatterlist *src, unsigned int nbytes) |
270 | { | 175 | { |
271 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, |
272 | u8 *ctrblk = walk->iv; | 177 | nbytes); |
273 | u8 keystream[TF_BLOCK_SIZE]; | ||
274 | u8 *src = walk->src.virt.addr; | ||
275 | u8 *dst = walk->dst.virt.addr; | ||
276 | unsigned int nbytes = walk->nbytes; | ||
277 | |||
278 | twofish_enc_blk(ctx, keystream, ctrblk); | ||
279 | crypto_xor(keystream, src, nbytes); | ||
280 | memcpy(dst, keystream, nbytes); | ||
281 | |||
282 | crypto_inc(ctrblk, TF_BLOCK_SIZE); | ||
283 | } | ||
284 | |||
285 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
286 | struct blkcipher_walk *walk) | ||
287 | { | ||
288 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
289 | unsigned int bsize = TF_BLOCK_SIZE; | ||
290 | unsigned int nbytes = walk->nbytes; | ||
291 | u128 *src = (u128 *)walk->src.virt.addr; | ||
292 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
293 | u128 ctrblk; | ||
294 | be128 ctrblocks[3]; | ||
295 | |||
296 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
297 | |||
298 | /* Process three block batch */ | ||
299 | if (nbytes >= bsize * 3) { | ||
300 | do { | ||
301 | if (dst != src) { | ||
302 | dst[0] = src[0]; | ||
303 | dst[1] = src[1]; | ||
304 | dst[2] = src[2]; | ||
305 | } | ||
306 | |||
307 | /* create ctrblks for parallel encrypt */ | ||
308 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
309 | u128_inc(&ctrblk); | ||
310 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
311 | u128_inc(&ctrblk); | ||
312 | u128_to_be128(&ctrblocks[2], &ctrblk); | ||
313 | u128_inc(&ctrblk); | ||
314 | |||
315 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, | ||
316 | (u8 *)ctrblocks); | ||
317 | |||
318 | src += 3; | ||
319 | dst += 3; | ||
320 | nbytes -= bsize * 3; | ||
321 | } while (nbytes >= bsize * 3); | ||
322 | |||
323 | if (nbytes < bsize) | ||
324 | goto done; | ||
325 | } | ||
326 | |||
327 | /* Handle leftovers */ | ||
328 | do { | ||
329 | if (dst != src) | ||
330 | *dst = *src; | ||
331 | |||
332 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
333 | u128_inc(&ctrblk); | ||
334 | |||
335 | twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
336 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
337 | |||
338 | src += 1; | ||
339 | dst += 1; | ||
340 | nbytes -= bsize; | ||
341 | } while (nbytes >= bsize); | ||
342 | |||
343 | done: | ||
344 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
345 | return nbytes; | ||
346 | } | 178 | } |
347 | 179 | ||
348 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
349 | struct scatterlist *src, unsigned int nbytes) | 181 | struct scatterlist *src, unsigned int nbytes) |
350 | { | 182 | { |
351 | struct blkcipher_walk walk; | 183 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); |
352 | int err; | ||
353 | |||
354 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
355 | err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); | ||
356 | |||
357 | while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { | ||
358 | nbytes = __ctr_crypt(desc, &walk); | ||
359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
360 | } | ||
361 | |||
362 | if (walk.nbytes) { | ||
363 | ctr_crypt_final(desc, &walk); | ||
364 | err = blkcipher_walk_done(desc, &walk, 0); | ||
365 | } | ||
366 | |||
367 | return err; | ||
368 | } | 184 | } |
369 | 185 | ||
370 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 186 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
397 | twofish_dec_blk(ctx, srcdst, srcdst); | 213 | twofish_dec_blk(ctx, srcdst, srcdst); |
398 | } | 214 | } |
399 | 215 | ||
400 | struct twofish_lrw_ctx { | 216 | int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
401 | struct lrw_table_ctx lrw_table; | 217 | unsigned int keylen) |
402 | struct twofish_ctx twofish_ctx; | ||
403 | }; | ||
404 | |||
405 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
406 | unsigned int keylen) | ||
407 | { | 218 | { |
408 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 219 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
409 | int err; | 220 | int err; |
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
415 | 226 | ||
416 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | 227 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); |
417 | } | 228 | } |
229 | EXPORT_SYMBOL_GPL(lrw_twofish_setkey); | ||
418 | 230 | ||
419 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 231 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
420 | struct scatterlist *src, unsigned int nbytes) | 232 | struct scatterlist *src, unsigned int nbytes) |
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
450 | return lrw_crypt(desc, dst, src, nbytes, &req); | 262 | return lrw_crypt(desc, dst, src, nbytes, &req); |
451 | } | 263 | } |
452 | 264 | ||
453 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 265 | void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) |
454 | { | 266 | { |
455 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 267 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
456 | 268 | ||
457 | lrw_free_table(&ctx->lrw_table); | 269 | lrw_free_table(&ctx->lrw_table); |
458 | } | 270 | } |
271 | EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); | ||
459 | 272 | ||
460 | struct twofish_xts_ctx { | 273 | int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
461 | struct twofish_ctx tweak_ctx; | 274 | unsigned int keylen) |
462 | struct twofish_ctx crypt_ctx; | ||
463 | }; | ||
464 | |||
465 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
466 | unsigned int keylen) | ||
467 | { | 275 | { |
468 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 276 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
469 | u32 *flags = &tfm->crt_flags; | 277 | u32 *flags = &tfm->crt_flags; |
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
486 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | 294 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, |
487 | flags); | 295 | flags); |
488 | } | 296 | } |
297 | EXPORT_SYMBOL_GPL(xts_twofish_setkey); | ||
489 | 298 | ||
490 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 299 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
491 | struct scatterlist *src, unsigned int nbytes) | 300 | struct scatterlist *src, unsigned int nbytes) |
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { { | |||
596 | .cra_type = &crypto_blkcipher_type, | 405 | .cra_type = &crypto_blkcipher_type, |
597 | .cra_module = THIS_MODULE, | 406 | .cra_module = THIS_MODULE, |
598 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), | 407 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), |
599 | .cra_exit = lrw_exit_tfm, | 408 | .cra_exit = lrw_twofish_exit_tfm, |
600 | .cra_u = { | 409 | .cra_u = { |
601 | .blkcipher = { | 410 | .blkcipher = { |
602 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | 411 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3ea51a84a0e4..f34261296ffb 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -546,7 +546,7 @@ static inline const struct cpumask *online_target_cpus(void) | |||
546 | return cpu_online_mask; | 546 | return cpu_online_mask; |
547 | } | 547 | } |
548 | 548 | ||
549 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 549 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); |
550 | 550 | ||
551 | 551 | ||
552 | static inline unsigned int read_apic_id(void) | 552 | static inline unsigned int read_apic_id(void) |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index eb45aa6b1f27..2ad874cb661c 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -66,6 +66,7 @@ struct setup_header { | |||
66 | __u64 setup_data; | 66 | __u64 setup_data; |
67 | __u64 pref_address; | 67 | __u64 pref_address; |
68 | __u32 init_size; | 68 | __u32 init_size; |
69 | __u32 handover_offset; | ||
69 | } __attribute__((packed)); | 70 | } __attribute__((packed)); |
70 | 71 | ||
71 | struct sys_desc_table { | 72 | struct sys_desc_table { |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f91e80f4f180..6b7ee5ff6820 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -207,6 +207,8 @@ | |||
207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 207 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ | 208 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ |
209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ | 209 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ |
210 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ | ||
211 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ | ||
210 | 212 | ||
211 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 213 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
212 | 214 | ||
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h new file mode 100644 index 000000000000..4f93df50c23e --- /dev/null +++ b/arch/x86/include/asm/crypto/ablk_helper.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_ABLK_HELPER_H | ||
6 | #define _CRYPTO_ABLK_HELPER_H | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/kernel.h> | ||
10 | #include <crypto/cryptd.h> | ||
11 | |||
12 | struct async_helper_ctx { | ||
13 | struct cryptd_ablkcipher *cryptd_tfm; | ||
14 | }; | ||
15 | |||
16 | extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
17 | unsigned int key_len); | ||
18 | |||
19 | extern int __ablk_encrypt(struct ablkcipher_request *req); | ||
20 | |||
21 | extern int ablk_encrypt(struct ablkcipher_request *req); | ||
22 | |||
23 | extern int ablk_decrypt(struct ablkcipher_request *req); | ||
24 | |||
25 | extern void ablk_exit(struct crypto_tfm *tfm); | ||
26 | |||
27 | extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name); | ||
28 | |||
29 | extern int ablk_init(struct crypto_tfm *tfm); | ||
30 | |||
31 | #endif /* _CRYPTO_ABLK_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h index 80545a1cbe39..80545a1cbe39 100644 --- a/arch/x86/include/asm/aes.h +++ b/arch/x86/include/asm/crypto/aes.h | |||
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h new file mode 100644 index 000000000000..3e408bddc96f --- /dev/null +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
@@ -0,0 +1,115 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | */ | ||
4 | |||
5 | #ifndef _CRYPTO_GLUE_HELPER_H | ||
6 | #define _CRYPTO_GLUE_HELPER_H | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/crypto.h> | ||
10 | #include <asm/i387.h> | ||
11 | #include <crypto/b128ops.h> | ||
12 | |||
13 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | ||
14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | ||
15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
16 | u128 *iv); | ||
17 | |||
18 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | ||
19 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | ||
20 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | ||
21 | |||
22 | struct common_glue_func_entry { | ||
23 | unsigned int num_blocks; /* number of blocks that @fn will process */ | ||
24 | union { | ||
25 | common_glue_func_t ecb; | ||
26 | common_glue_cbc_func_t cbc; | ||
27 | common_glue_ctr_func_t ctr; | ||
28 | } fn_u; | ||
29 | }; | ||
30 | |||
31 | struct common_glue_ctx { | ||
32 | unsigned int num_funcs; | ||
33 | int fpu_blocks_limit; /* -1 means fpu not needed at all */ | ||
34 | |||
35 | /* | ||
36 | * First funcs entry must have largest num_blocks and last funcs entry | ||
37 | * must have num_blocks == 1! | ||
38 | */ | ||
39 | struct common_glue_func_entry funcs[]; | ||
40 | }; | ||
41 | |||
42 | static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, | ||
43 | struct blkcipher_desc *desc, | ||
44 | bool fpu_enabled, unsigned int nbytes) | ||
45 | { | ||
46 | if (likely(fpu_blocks_limit < 0)) | ||
47 | return false; | ||
48 | |||
49 | if (fpu_enabled) | ||
50 | return true; | ||
51 | |||
52 | /* | ||
53 | * Vector-registers are only used when chunk to be processed is large | ||
54 | * enough, so do not enable FPU until it is necessary. | ||
55 | */ | ||
56 | if (nbytes < bsize * (unsigned int)fpu_blocks_limit) | ||
57 | return false; | ||
58 | |||
59 | if (desc) { | ||
60 | /* prevent sleeping if FPU is in use */ | ||
61 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
62 | } | ||
63 | |||
64 | kernel_fpu_begin(); | ||
65 | return true; | ||
66 | } | ||
67 | |||
68 | static inline void glue_fpu_end(bool fpu_enabled) | ||
69 | { | ||
70 | if (fpu_enabled) | ||
71 | kernel_fpu_end(); | ||
72 | } | ||
73 | |||
74 | static inline void u128_to_be128(be128 *dst, const u128 *src) | ||
75 | { | ||
76 | dst->a = cpu_to_be64(src->a); | ||
77 | dst->b = cpu_to_be64(src->b); | ||
78 | } | ||
79 | |||
80 | static inline void be128_to_u128(u128 *dst, const be128 *src) | ||
81 | { | ||
82 | dst->a = be64_to_cpu(src->a); | ||
83 | dst->b = be64_to_cpu(src->b); | ||
84 | } | ||
85 | |||
86 | static inline void u128_inc(u128 *i) | ||
87 | { | ||
88 | i->b++; | ||
89 | if (!i->b) | ||
90 | i->a++; | ||
91 | } | ||
92 | |||
93 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
94 | struct blkcipher_desc *desc, | ||
95 | struct scatterlist *dst, | ||
96 | struct scatterlist *src, unsigned int nbytes); | ||
97 | |||
98 | extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
99 | struct blkcipher_desc *desc, | ||
100 | struct scatterlist *dst, | ||
101 | struct scatterlist *src, | ||
102 | unsigned int nbytes); | ||
103 | |||
104 | extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
105 | struct blkcipher_desc *desc, | ||
106 | struct scatterlist *dst, | ||
107 | struct scatterlist *src, | ||
108 | unsigned int nbytes); | ||
109 | |||
110 | extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
111 | struct blkcipher_desc *desc, | ||
112 | struct scatterlist *dst, | ||
113 | struct scatterlist *src, unsigned int nbytes); | ||
114 | |||
115 | #endif /* _CRYPTO_GLUE_HELPER_H */ | ||
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h new file mode 100644 index 000000000000..432deedd2945 --- /dev/null +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef ASM_X86_SERPENT_AVX_H | ||
2 | #define ASM_X86_SERPENT_AVX_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/serpent.h> | ||
6 | |||
7 | #define SERPENT_PARALLEL_BLOCKS 8 | ||
8 | |||
9 | asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
10 | const u8 *src, bool xor); | ||
11 | asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
12 | const u8 *src); | ||
13 | |||
14 | static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
15 | const u8 *src) | ||
16 | { | ||
17 | __serpent_enc_blk_8way_avx(ctx, dst, src, false); | ||
18 | } | ||
19 | |||
20 | static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, | ||
21 | const u8 *src) | ||
22 | { | ||
23 | __serpent_enc_blk_8way_avx(ctx, dst, src, true); | ||
24 | } | ||
25 | |||
26 | static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, | ||
27 | const u8 *src) | ||
28 | { | ||
29 | serpent_dec_blk_8way_avx(ctx, dst, src); | ||
30 | } | ||
31 | |||
32 | #endif | ||
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h index d3ef63fe0c81..e6e77dffbdab 100644 --- a/arch/x86/include/asm/serpent.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef ASM_X86_SERPENT_H | 1 | #ifndef ASM_X86_SERPENT_SSE2_H |
2 | #define ASM_X86_SERPENT_H | 2 | #define ASM_X86_SERPENT_SSE2_H |
3 | 3 | ||
4 | #include <linux/crypto.h> | 4 | #include <linux/crypto.h> |
5 | #include <crypto/serpent.h> | 5 | #include <crypto/serpent.h> |
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h new file mode 100644 index 000000000000..9d2c514bd5f9 --- /dev/null +++ b/arch/x86/include/asm/crypto/twofish.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef ASM_X86_TWOFISH_H | ||
2 | #define ASM_X86_TWOFISH_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | #include <crypto/twofish.h> | ||
6 | #include <crypto/lrw.h> | ||
7 | #include <crypto/b128ops.h> | ||
8 | |||
9 | struct twofish_lrw_ctx { | ||
10 | struct lrw_table_ctx lrw_table; | ||
11 | struct twofish_ctx twofish_ctx; | ||
12 | }; | ||
13 | |||
14 | struct twofish_xts_ctx { | ||
15 | struct twofish_ctx tweak_ctx; | ||
16 | struct twofish_ctx crypt_ctx; | ||
17 | }; | ||
18 | |||
19 | /* regular block cipher functions from twofish_x86_64 module */ | ||
20 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | ||
21 | const u8 *src); | ||
22 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
23 | const u8 *src); | ||
24 | |||
25 | /* 3-way parallel cipher functions */ | ||
26 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
27 | const u8 *src, bool xor); | ||
28 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
29 | const u8 *src); | ||
30 | |||
31 | /* helpers from twofish_x86_64-3way module */ | ||
32 | extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); | ||
33 | extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, | ||
34 | u128 *iv); | ||
35 | extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, | ||
36 | u128 *iv); | ||
37 | |||
38 | extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
39 | unsigned int keylen); | ||
40 | |||
41 | extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); | ||
42 | |||
43 | extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
44 | unsigned int keylen); | ||
45 | |||
46 | #endif /* ASM_X86_TWOFISH_H */ | ||
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 0baa628e330c..40afa0005c69 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | |||
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
18 | |||
19 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
20 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
21 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
22 | BUILD_INTERRUPT3(invalidate_interrupt\idx, | ||
23 | (INVALIDATE_TLB_VECTOR_START)+\idx, | ||
24 | smp_invalidate_interrupt) | ||
25 | .endif | ||
26 | .endr | ||
27 | #endif | 18 | #endif |
28 | 19 | ||
29 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4b4448761e88..1508e518c7e3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -119,17 +119,6 @@ | |||
119 | */ | 119 | */ |
120 | #define LOCAL_TIMER_VECTOR 0xef | 120 | #define LOCAL_TIMER_VECTOR 0xef |
121 | 121 | ||
122 | /* up to 32 vectors used for spreading out TLB flushes: */ | ||
123 | #if NR_CPUS <= 32 | ||
124 | # define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) | ||
125 | #else | ||
126 | # define NUM_INVALIDATE_TLB_VECTORS (32) | ||
127 | #endif | ||
128 | |||
129 | #define INVALIDATE_TLB_VECTOR_END (0xee) | ||
130 | #define INVALIDATE_TLB_VECTOR_START \ | ||
131 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) | ||
132 | |||
133 | #define NR_VECTORS 256 | 122 | #define NR_VECTORS 256 |
134 | 123 | ||
135 | #define FPU_IRQ 13 | 124 | #define FPU_IRQ 13 |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0b47ddb6f00b..a0facf3908d7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -360,9 +360,10 @@ static inline void __flush_tlb_single(unsigned long addr) | |||
360 | 360 | ||
361 | static inline void flush_tlb_others(const struct cpumask *cpumask, | 361 | static inline void flush_tlb_others(const struct cpumask *cpumask, |
362 | struct mm_struct *mm, | 362 | struct mm_struct *mm, |
363 | unsigned long va) | 363 | unsigned long start, |
364 | unsigned long end) | ||
364 | { | 365 | { |
365 | PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); | 366 | PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end); |
366 | } | 367 | } |
367 | 368 | ||
368 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) | 369 | static inline int paravirt_pgd_alloc(struct mm_struct *mm) |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8613cbb7ba41..142236ed83af 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -248,7 +248,8 @@ struct pv_mmu_ops { | |||
248 | void (*flush_tlb_single)(unsigned long addr); | 248 | void (*flush_tlb_single)(unsigned long addr); |
249 | void (*flush_tlb_others)(const struct cpumask *cpus, | 249 | void (*flush_tlb_others)(const struct cpumask *cpus, |
250 | struct mm_struct *mm, | 250 | struct mm_struct *mm, |
251 | unsigned long va); | 251 | unsigned long start, |
252 | unsigned long end); | ||
252 | 253 | ||
253 | /* Hooks for allocating and freeing a pagetable top-level */ | 254 | /* Hooks for allocating and freeing a pagetable top-level */ |
254 | int (*pgd_alloc)(struct mm_struct *mm); | 255 | int (*pgd_alloc)(struct mm_struct *mm); |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d9b8e3f7f42a..1104afaba52b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
551 | { [0 ... NR_CPUS-1] = _initvalue }; \ | 551 | { [0 ... NR_CPUS-1] = _initvalue }; \ |
552 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map | 552 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map |
553 | 553 | ||
554 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ | ||
555 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \ | ||
556 | __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ | ||
557 | { [0 ... NR_CPUS-1] = _initvalue }; \ | ||
558 | __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map | ||
559 | |||
554 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ | 560 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
555 | EXPORT_PER_CPU_SYMBOL(_name) | 561 | EXPORT_PER_CPU_SYMBOL(_name) |
556 | 562 | ||
@@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
559 | extern __typeof__(_type) *_name##_early_ptr; \ | 565 | extern __typeof__(_type) *_name##_early_ptr; \ |
560 | extern __typeof__(_type) _name##_early_map[] | 566 | extern __typeof__(_type) _name##_early_map[] |
561 | 567 | ||
568 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ | ||
569 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \ | ||
570 | extern __typeof__(_type) *_name##_early_ptr; \ | ||
571 | extern __typeof__(_type) _name##_early_map[] | ||
572 | |||
562 | #define early_per_cpu_ptr(_name) (_name##_early_ptr) | 573 | #define early_per_cpu_ptr(_name) (_name##_early_ptr) |
563 | #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) | 574 | #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) |
564 | #define early_per_cpu(_name, _cpu) \ | 575 | #define early_per_cpu(_name, _cpu) \ |
@@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off); | |||
570 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ | 581 | #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ |
571 | DEFINE_PER_CPU(_type, _name) = _initvalue | 582 | DEFINE_PER_CPU(_type, _name) = _initvalue |
572 | 583 | ||
584 | #define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \ | ||
585 | DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue | ||
586 | |||
573 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ | 587 | #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ |
574 | EXPORT_PER_CPU_SYMBOL(_name) | 588 | EXPORT_PER_CPU_SYMBOL(_name) |
575 | 589 | ||
576 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ | 590 | #define DECLARE_EARLY_PER_CPU(_type, _name) \ |
577 | DECLARE_PER_CPU(_type, _name) | 591 | DECLARE_PER_CPU(_type, _name) |
578 | 592 | ||
593 | #define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \ | ||
594 | DECLARE_PER_CPU_READ_MOSTLY(_type, _name) | ||
595 | |||
579 | #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) | 596 | #define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu) |
580 | #define early_per_cpu_ptr(_name) NULL | 597 | #define early_per_cpu_ptr(_name) NULL |
581 | /* no early_per_cpu_map() */ | 598 | /* no early_per_cpu_map() */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 39bc5777211a..d048cad9bcad 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -61,6 +61,19 @@ static inline void *current_text_addr(void) | |||
61 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 | 61 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | enum tlb_infos { | ||
65 | ENTRIES, | ||
66 | NR_INFO | ||
67 | }; | ||
68 | |||
69 | extern u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
70 | extern u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
71 | extern u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
72 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
73 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
74 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
75 | extern s8 __read_mostly tlb_flushall_shift; | ||
76 | |||
64 | /* | 77 | /* |
65 | * CPU type and hardware bug flags. Kept separately for each CPU. | 78 | * CPU type and hardware bug flags. Kept separately for each CPU. |
66 | * Members of this structure are referenced in head.S, so think twice | 79 | * Members of this structure are referenced in head.S, so think twice |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2ffa95dc2333..4f19a1526037 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void) | |||
31 | return has_siblings; | 31 | return has_siblings; |
32 | } | 32 | } |
33 | 33 | ||
34 | DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 34 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
35 | DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); | 35 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
36 | /* cpus sharing the last level cache: */ | 36 | /* cpus sharing the last level cache: */ |
37 | DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 37 | DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
38 | DECLARE_PER_CPU(u16, cpu_llc_id); | 38 | DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); |
39 | DECLARE_PER_CPU(int, cpu_number); | 39 | DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); |
40 | 40 | ||
41 | static inline struct cpumask *cpu_sibling_mask(int cpu) | 41 | static inline struct cpumask *cpu_sibling_mask(int cpu) |
42 | { | 42 | { |
@@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) | |||
53 | return per_cpu(cpu_llc_shared_map, cpu); | 53 | return per_cpu(cpu_llc_shared_map, cpu); |
54 | } | 54 | } |
55 | 55 | ||
56 | DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); | 56 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); |
57 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 57 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); |
58 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 58 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
59 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); | 59 | DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); |
60 | #endif | 60 | #endif |
61 | 61 | ||
62 | /* Static state in head.S used to set up a CPU */ | 62 | /* Static state in head.S used to set up a CPU */ |
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 829215fef9ee..4fef20773b8f 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h | |||
@@ -4,7 +4,14 @@ | |||
4 | #define tlb_start_vma(tlb, vma) do { } while (0) | 4 | #define tlb_start_vma(tlb, vma) do { } while (0) |
5 | #define tlb_end_vma(tlb, vma) do { } while (0) | 5 | #define tlb_end_vma(tlb, vma) do { } while (0) |
6 | #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) | 6 | #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) |
7 | #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) | 7 | |
8 | #define tlb_flush(tlb) \ | ||
9 | { \ | ||
10 | if (tlb->fullmm == 0) \ | ||
11 | flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ | ||
12 | else \ | ||
13 | flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ | ||
14 | } | ||
8 | 15 | ||
9 | #include <asm-generic/tlb.h> | 16 | #include <asm-generic/tlb.h> |
10 | 17 | ||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 36a1a2ab87d2..74a44333545a 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
73 | * - flush_tlb_page(vma, vmaddr) flushes one page | 73 | * - flush_tlb_page(vma, vmaddr) flushes one page |
74 | * - flush_tlb_range(vma, start, end) flushes a range of pages | 74 | * - flush_tlb_range(vma, start, end) flushes a range of pages |
75 | * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages | 75 | * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages |
76 | * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus | 76 | * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus |
77 | * | 77 | * |
78 | * ..but the i386 has somewhat limited tlb flushing capabilities, | 78 | * ..but the i386 has somewhat limited tlb flushing capabilities, |
79 | * and page-granular flushes are available only on i486 and up. | 79 | * and page-granular flushes are available only on i486 and up. |
80 | * | ||
81 | * x86-64 can only flush individual pages or full VMs. For a range flush | ||
82 | * we always do the full VM. Might be worth trying if for a small | ||
83 | * range a few INVLPGs in a row are a win. | ||
84 | */ | 80 | */ |
85 | 81 | ||
86 | #ifndef CONFIG_SMP | 82 | #ifndef CONFIG_SMP |
@@ -109,9 +105,17 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, | |||
109 | __flush_tlb(); | 105 | __flush_tlb(); |
110 | } | 106 | } |
111 | 107 | ||
108 | static inline void flush_tlb_mm_range(struct mm_struct *mm, | ||
109 | unsigned long start, unsigned long end, unsigned long vmflag) | ||
110 | { | ||
111 | if (mm == current->active_mm) | ||
112 | __flush_tlb(); | ||
113 | } | ||
114 | |||
112 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, | 115 | static inline void native_flush_tlb_others(const struct cpumask *cpumask, |
113 | struct mm_struct *mm, | 116 | struct mm_struct *mm, |
114 | unsigned long va) | 117 | unsigned long start, |
118 | unsigned long end) | ||
115 | { | 119 | { |
116 | } | 120 | } |
117 | 121 | ||
@@ -119,27 +123,35 @@ static inline void reset_lazy_tlbstate(void) | |||
119 | { | 123 | { |
120 | } | 124 | } |
121 | 125 | ||
126 | static inline void flush_tlb_kernel_range(unsigned long start, | ||
127 | unsigned long end) | ||
128 | { | ||
129 | flush_tlb_all(); | ||
130 | } | ||
131 | |||
122 | #else /* SMP */ | 132 | #else /* SMP */ |
123 | 133 | ||
124 | #include <asm/smp.h> | 134 | #include <asm/smp.h> |
125 | 135 | ||
126 | #define local_flush_tlb() __flush_tlb() | 136 | #define local_flush_tlb() __flush_tlb() |
127 | 137 | ||
138 | #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) | ||
139 | |||
140 | #define flush_tlb_range(vma, start, end) \ | ||
141 | flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) | ||
142 | |||
128 | extern void flush_tlb_all(void); | 143 | extern void flush_tlb_all(void); |
129 | extern void flush_tlb_current_task(void); | 144 | extern void flush_tlb_current_task(void); |
130 | extern void flush_tlb_mm(struct mm_struct *); | ||
131 | extern void flush_tlb_page(struct vm_area_struct *, unsigned long); | 145 | extern void flush_tlb_page(struct vm_area_struct *, unsigned long); |
146 | extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | ||
147 | unsigned long end, unsigned long vmflag); | ||
148 | extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); | ||
132 | 149 | ||
133 | #define flush_tlb() flush_tlb_current_task() | 150 | #define flush_tlb() flush_tlb_current_task() |
134 | 151 | ||
135 | static inline void flush_tlb_range(struct vm_area_struct *vma, | ||
136 | unsigned long start, unsigned long end) | ||
137 | { | ||
138 | flush_tlb_mm(vma->vm_mm); | ||
139 | } | ||
140 | |||
141 | void native_flush_tlb_others(const struct cpumask *cpumask, | 152 | void native_flush_tlb_others(const struct cpumask *cpumask, |
142 | struct mm_struct *mm, unsigned long va); | 153 | struct mm_struct *mm, |
154 | unsigned long start, unsigned long end); | ||
143 | 155 | ||
144 | #define TLBSTATE_OK 1 | 156 | #define TLBSTATE_OK 1 |
145 | #define TLBSTATE_LAZY 2 | 157 | #define TLBSTATE_LAZY 2 |
@@ -159,13 +171,8 @@ static inline void reset_lazy_tlbstate(void) | |||
159 | #endif /* SMP */ | 171 | #endif /* SMP */ |
160 | 172 | ||
161 | #ifndef CONFIG_PARAVIRT | 173 | #ifndef CONFIG_PARAVIRT |
162 | #define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) | 174 | #define flush_tlb_others(mask, mm, start, end) \ |
175 | native_flush_tlb_others(mask, mm, start, end) | ||
163 | #endif | 176 | #endif |
164 | 177 | ||
165 | static inline void flush_tlb_kernel_range(unsigned long start, | ||
166 | unsigned long end) | ||
167 | { | ||
168 | flush_tlb_all(); | ||
169 | } | ||
170 | |||
171 | #endif /* _ASM_X86_TLBFLUSH_H */ | 178 | #endif /* _ASM_X86_TLBFLUSH_H */ |
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 3bb9491b7659..b47c2a82ff15 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void); | |||
15 | extern void uv_system_init(void); | 15 | extern void uv_system_init(void); |
16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
17 | struct mm_struct *mm, | 17 | struct mm_struct *mm, |
18 | unsigned long va, | 18 | unsigned long start, |
19 | unsigned end, | ||
19 | unsigned int cpu); | 20 | unsigned int cpu); |
20 | 21 | ||
21 | #else /* X86_UV */ | 22 | #else /* X86_UV */ |
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { } | |||
26 | static inline void uv_system_init(void) { } | 27 | static inline void uv_system_init(void) { } |
27 | static inline const struct cpumask * | 28 | static inline const struct cpumask * |
28 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, | 29 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, |
29 | unsigned long va, unsigned int cpu) | 30 | unsigned long start, unsigned long end, unsigned int cpu) |
30 | { return cpumask; } | 31 | { return cpumask; } |
31 | 32 | ||
32 | #endif /* X86_UV */ | 33 | #endif /* X86_UV */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 98e24131ff3a..24deb3082328 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map; | |||
75 | /* | 75 | /* |
76 | * Map cpu index to physical APIC ID | 76 | * Map cpu index to physical APIC ID |
77 | */ | 77 | */ |
78 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | 78 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
79 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | 79 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); |
80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | 80 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); |
81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 81 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
82 | 82 | ||
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | |||
88 | * used for the mapping. This is where the behaviors of x86_64 and 32 | 88 | * used for the mapping. This is where the behaviors of x86_64 and 32 |
89 | * actually diverge. Let's keep it ugly for now. | 89 | * actually diverge. Let's keep it ugly for now. |
90 | */ | 90 | */ |
91 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); | 91 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); |
92 | 92 | ||
93 | /* | 93 | /* |
94 | * Knob to control our willingness to enable the local APIC. | 94 | * Knob to control our willingness to enable the local APIC. |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index bac4c3804cc7..d30a6a9a0121 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o mshyperv.o |
18 | obj-y += rdrand.o | 18 | obj-y += rdrand.o |
19 | obj-y += match.o | 19 | obj-y += match.o |
20 | 20 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5bbc082c47ad..46d8786d655e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) | |||
452 | c->x86_cache_size = l2size; | 452 | c->x86_cache_size = l2size; |
453 | } | 453 | } |
454 | 454 | ||
455 | u16 __read_mostly tlb_lli_4k[NR_INFO]; | ||
456 | u16 __read_mostly tlb_lli_2m[NR_INFO]; | ||
457 | u16 __read_mostly tlb_lli_4m[NR_INFO]; | ||
458 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | ||
459 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | ||
460 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | ||
461 | |||
462 | /* | ||
463 | * tlb_flushall_shift shows the balance point in replacing cr3 write | ||
464 | * with multiple 'invlpg'. It will do this replacement when | ||
465 | * flush_tlb_lines <= active_lines/2^tlb_flushall_shift. | ||
466 | * If tlb_flushall_shift is -1, means the replacement will be disabled. | ||
467 | */ | ||
468 | s8 __read_mostly tlb_flushall_shift = -1; | ||
469 | |||
470 | void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) | ||
471 | { | ||
472 | if (this_cpu->c_detect_tlb) | ||
473 | this_cpu->c_detect_tlb(c); | ||
474 | |||
475 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
476 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | ||
477 | "tlb_flushall_shift is 0x%x\n", | ||
478 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | ||
479 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | ||
480 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | ||
481 | tlb_flushall_shift); | ||
482 | } | ||
483 | |||
455 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 484 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
456 | { | 485 | { |
457 | #ifdef CONFIG_X86_HT | 486 | #ifdef CONFIG_X86_HT |
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void) | |||
911 | #else | 940 | #else |
912 | vgetcpu_set_mode(); | 941 | vgetcpu_set_mode(); |
913 | #endif | 942 | #endif |
943 | if (boot_cpu_data.cpuid_level >= 2) | ||
944 | cpu_detect_tlb(&boot_cpu_data); | ||
914 | } | 945 | } |
915 | 946 | ||
916 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 947 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 8bacc7826fb3..4041c24ae7db 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -20,10 +20,19 @@ struct cpu_dev { | |||
20 | void (*c_bsp_init)(struct cpuinfo_x86 *); | 20 | void (*c_bsp_init)(struct cpuinfo_x86 *); |
21 | void (*c_init)(struct cpuinfo_x86 *); | 21 | void (*c_init)(struct cpuinfo_x86 *); |
22 | void (*c_identify)(struct cpuinfo_x86 *); | 22 | void (*c_identify)(struct cpuinfo_x86 *); |
23 | void (*c_detect_tlb)(struct cpuinfo_x86 *); | ||
23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); | 24 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); |
24 | int c_x86_vendor; | 25 | int c_x86_vendor; |
25 | }; | 26 | }; |
26 | 27 | ||
28 | struct _tlb_table { | ||
29 | unsigned char descriptor; | ||
30 | char tlb_type; | ||
31 | unsigned int entries; | ||
32 | /* unsigned int ways; */ | ||
33 | char info[128]; | ||
34 | }; | ||
35 | |||
27 | #define cpu_dev_register(cpu_devX) \ | 36 | #define cpu_dev_register(cpu_devX) \ |
28 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ | 37 | static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \ |
29 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ | 38 | __attribute__((__section__(".x86_cpu_dev.init"))) = \ |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3e6ff6cbf42a..0a4ce2980a5a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i | |||
491 | } | 491 | } |
492 | #endif | 492 | #endif |
493 | 493 | ||
494 | #define TLB_INST_4K 0x01 | ||
495 | #define TLB_INST_4M 0x02 | ||
496 | #define TLB_INST_2M_4M 0x03 | ||
497 | |||
498 | #define TLB_INST_ALL 0x05 | ||
499 | #define TLB_INST_1G 0x06 | ||
500 | |||
501 | #define TLB_DATA_4K 0x11 | ||
502 | #define TLB_DATA_4M 0x12 | ||
503 | #define TLB_DATA_2M_4M 0x13 | ||
504 | #define TLB_DATA_4K_4M 0x14 | ||
505 | |||
506 | #define TLB_DATA_1G 0x16 | ||
507 | |||
508 | #define TLB_DATA0_4K 0x21 | ||
509 | #define TLB_DATA0_4M 0x22 | ||
510 | #define TLB_DATA0_2M_4M 0x23 | ||
511 | |||
512 | #define STLB_4K 0x41 | ||
513 | |||
514 | static const struct _tlb_table intel_tlb_table[] __cpuinitconst = { | ||
515 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
516 | { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, | ||
517 | { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
518 | { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
519 | { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, | ||
520 | { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, | ||
521 | { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, | ||
522 | { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
523 | { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
524 | { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, | ||
525 | { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
526 | { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, | ||
527 | { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, | ||
528 | { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, | ||
529 | { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, | ||
530 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
531 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
532 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | ||
533 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | ||
534 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | ||
535 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | ||
536 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | ||
537 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
538 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | ||
539 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | ||
540 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | ||
541 | { 0x00, 0, 0 } | ||
542 | }; | ||
543 | |||
544 | static void __cpuinit intel_tlb_lookup(const unsigned char desc) | ||
545 | { | ||
546 | unsigned char k; | ||
547 | if (desc == 0) | ||
548 | return; | ||
549 | |||
550 | /* look up this descriptor in the table */ | ||
551 | for (k = 0; intel_tlb_table[k].descriptor != desc && \ | ||
552 | intel_tlb_table[k].descriptor != 0; k++) | ||
553 | ; | ||
554 | |||
555 | if (intel_tlb_table[k].tlb_type == 0) | ||
556 | return; | ||
557 | |||
558 | switch (intel_tlb_table[k].tlb_type) { | ||
559 | case STLB_4K: | ||
560 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
561 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
562 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
563 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
564 | break; | ||
565 | case TLB_INST_ALL: | ||
566 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
567 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
568 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
569 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
570 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
571 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
572 | break; | ||
573 | case TLB_INST_4K: | ||
574 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
575 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
576 | break; | ||
577 | case TLB_INST_4M: | ||
578 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
579 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
580 | break; | ||
581 | case TLB_INST_2M_4M: | ||
582 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
583 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
584 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
585 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
586 | break; | ||
587 | case TLB_DATA_4K: | ||
588 | case TLB_DATA0_4K: | ||
589 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
590 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
591 | break; | ||
592 | case TLB_DATA_4M: | ||
593 | case TLB_DATA0_4M: | ||
594 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
595 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
596 | break; | ||
597 | case TLB_DATA_2M_4M: | ||
598 | case TLB_DATA0_2M_4M: | ||
599 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
600 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
601 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
602 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
603 | break; | ||
604 | case TLB_DATA_4K_4M: | ||
605 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
606 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
607 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
608 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
609 | break; | ||
610 | } | ||
611 | } | ||
612 | |||
613 | static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | ||
614 | { | ||
615 | if (!cpu_has_invlpg) { | ||
616 | tlb_flushall_shift = -1; | ||
617 | return; | ||
618 | } | ||
619 | switch ((c->x86 << 8) + c->x86_model) { | ||
620 | case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
621 | case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
622 | case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
623 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | ||
624 | tlb_flushall_shift = -1; | ||
625 | break; | ||
626 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | ||
627 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | ||
628 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ | ||
629 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ | ||
630 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | ||
631 | case 0x62f: /* 32 nm Xeon E7 */ | ||
632 | tlb_flushall_shift = 6; | ||
633 | break; | ||
634 | case 0x62a: /* SandyBridge */ | ||
635 | case 0x62d: /* SandyBridge, "Romely-EP" */ | ||
636 | tlb_flushall_shift = 5; | ||
637 | break; | ||
638 | case 0x63a: /* Ivybridge */ | ||
639 | tlb_flushall_shift = 1; | ||
640 | break; | ||
641 | default: | ||
642 | tlb_flushall_shift = 6; | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) | ||
647 | { | ||
648 | int i, j, n; | ||
649 | unsigned int regs[4]; | ||
650 | unsigned char *desc = (unsigned char *)regs; | ||
651 | /* Number of times to iterate */ | ||
652 | n = cpuid_eax(2) & 0xFF; | ||
653 | |||
654 | for (i = 0 ; i < n ; i++) { | ||
655 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | ||
656 | |||
657 | /* If bit 31 is set, this is an unknown format */ | ||
658 | for (j = 0 ; j < 3 ; j++) | ||
659 | if (regs[j] & (1 << 31)) | ||
660 | regs[j] = 0; | ||
661 | |||
662 | /* Byte 0 is level count, not a descriptor */ | ||
663 | for (j = 1 ; j < 16 ; j++) | ||
664 | intel_tlb_lookup(desc[j]); | ||
665 | } | ||
666 | intel_tlb_flushall_shift_set(c); | ||
667 | } | ||
668 | |||
494 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | 669 | static const struct cpu_dev __cpuinitconst intel_cpu_dev = { |
495 | .c_vendor = "Intel", | 670 | .c_vendor = "Intel", |
496 | .c_ident = { "GenuineIntel" }, | 671 | .c_ident = { "GenuineIntel" }, |
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = { | |||
546 | }, | 721 | }, |
547 | .c_size_cache = intel_size_cache, | 722 | .c_size_cache = intel_size_cache, |
548 | #endif | 723 | #endif |
724 | .c_detect_tlb = intel_detect_tlb, | ||
549 | .c_early_init = early_init_intel, | 725 | .c_early_init = early_init_intel, |
550 | .c_init = init_intel, | 726 | .c_init = init_intel, |
551 | .c_x86_vendor = X86_VENDOR_INTEL, | 727 | .c_x86_vendor = X86_VENDOR_INTEL, |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c deleted file mode 100644 index a640ae5ad201..000000000000 --- a/arch/x86/kernel/cpu/sched.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 111f6bbd8b38..69babd8c834f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1048 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1049 | x86_platform_ipi smp_x86_platform_ipi | 1049 | x86_platform_ipi smp_x86_platform_ipi |
1050 | 1050 | ||
1051 | #ifdef CONFIG_SMP | ||
1052 | ALIGN | ||
1053 | INTR_FRAME | ||
1054 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
1055 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
1056 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
1057 | ENTRY(invalidate_interrupt\idx) | ||
1058 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) | ||
1059 | jmp .Lcommon_invalidate_interrupt0 | ||
1060 | CFI_ADJUST_CFA_OFFSET -8 | ||
1061 | END(invalidate_interrupt\idx) | ||
1062 | .endif | ||
1063 | .endr | ||
1064 | CFI_ENDPROC | ||
1065 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
1066 | invalidate_interrupt0, smp_invalidate_interrupt | ||
1067 | #endif | ||
1068 | |||
1069 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1051 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1070 | threshold_interrupt smp_threshold_interrupt | 1052 | threshold_interrupt smp_threshold_interrupt |
1071 | apicinterrupt THERMAL_APIC_VECTOR \ | 1053 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 252981afd6c4..6e03b0d69138 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void) | |||
171 | */ | 171 | */ |
172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 172 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
173 | 173 | ||
174 | /* IPIs for invalidation */ | ||
175 | #define ALLOC_INVTLB_VEC(NR) \ | ||
176 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ | ||
177 | invalidate_interrupt##NR) | ||
178 | |||
179 | switch (NUM_INVALIDATE_TLB_VECTORS) { | ||
180 | default: | ||
181 | ALLOC_INVTLB_VEC(31); | ||
182 | case 31: | ||
183 | ALLOC_INVTLB_VEC(30); | ||
184 | case 30: | ||
185 | ALLOC_INVTLB_VEC(29); | ||
186 | case 29: | ||
187 | ALLOC_INVTLB_VEC(28); | ||
188 | case 28: | ||
189 | ALLOC_INVTLB_VEC(27); | ||
190 | case 27: | ||
191 | ALLOC_INVTLB_VEC(26); | ||
192 | case 26: | ||
193 | ALLOC_INVTLB_VEC(25); | ||
194 | case 25: | ||
195 | ALLOC_INVTLB_VEC(24); | ||
196 | case 24: | ||
197 | ALLOC_INVTLB_VEC(23); | ||
198 | case 23: | ||
199 | ALLOC_INVTLB_VEC(22); | ||
200 | case 22: | ||
201 | ALLOC_INVTLB_VEC(21); | ||
202 | case 21: | ||
203 | ALLOC_INVTLB_VEC(20); | ||
204 | case 20: | ||
205 | ALLOC_INVTLB_VEC(19); | ||
206 | case 19: | ||
207 | ALLOC_INVTLB_VEC(18); | ||
208 | case 18: | ||
209 | ALLOC_INVTLB_VEC(17); | ||
210 | case 17: | ||
211 | ALLOC_INVTLB_VEC(16); | ||
212 | case 16: | ||
213 | ALLOC_INVTLB_VEC(15); | ||
214 | case 15: | ||
215 | ALLOC_INVTLB_VEC(14); | ||
216 | case 14: | ||
217 | ALLOC_INVTLB_VEC(13); | ||
218 | case 13: | ||
219 | ALLOC_INVTLB_VEC(12); | ||
220 | case 12: | ||
221 | ALLOC_INVTLB_VEC(11); | ||
222 | case 11: | ||
223 | ALLOC_INVTLB_VEC(10); | ||
224 | case 10: | ||
225 | ALLOC_INVTLB_VEC(9); | ||
226 | case 9: | ||
227 | ALLOC_INVTLB_VEC(8); | ||
228 | case 8: | ||
229 | ALLOC_INVTLB_VEC(7); | ||
230 | case 7: | ||
231 | ALLOC_INVTLB_VEC(6); | ||
232 | case 6: | ||
233 | ALLOC_INVTLB_VEC(5); | ||
234 | case 5: | ||
235 | ALLOC_INVTLB_VEC(4); | ||
236 | case 4: | ||
237 | ALLOC_INVTLB_VEC(3); | ||
238 | case 3: | ||
239 | ALLOC_INVTLB_VEC(2); | ||
240 | case 2: | ||
241 | ALLOC_INVTLB_VEC(1); | ||
242 | case 1: | ||
243 | ALLOC_INVTLB_VEC(0); | ||
244 | break; | ||
245 | } | ||
246 | |||
247 | /* IPI for generic function call */ | 174 | /* IPI for generic function call */ |
248 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 175 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
249 | 176 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa272184..5cdff0357746 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <asm/cpu.h> | 21 | #include <asm/cpu.h> |
22 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
23 | 23 | ||
24 | DEFINE_PER_CPU(int, cpu_number); | 24 | DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); |
25 | EXPORT_PER_CPU_SYMBOL(cpu_number); | 25 | EXPORT_PER_CPU_SYMBOL(cpu_number); |
26 | 26 | ||
27 | #ifdef CONFIG_X86_64 | 27 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c1a310fb8309..7c5a8c314c02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -106,17 +106,17 @@ int smp_num_siblings = 1; | |||
106 | EXPORT_SYMBOL(smp_num_siblings); | 106 | EXPORT_SYMBOL(smp_num_siblings); |
107 | 107 | ||
108 | /* Last level cache ID of each logical CPU */ | 108 | /* Last level cache ID of each logical CPU */ |
109 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 109 | DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; |
110 | 110 | ||
111 | /* representing HT siblings of each logical CPU */ | 111 | /* representing HT siblings of each logical CPU */ |
112 | DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 112 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); |
113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | 113 | EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); |
114 | 114 | ||
115 | /* representing HT and core siblings of each logical CPU */ | 115 | /* representing HT and core siblings of each logical CPU */ |
116 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); | 116 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); |
117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 117 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
118 | 118 | ||
119 | DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | 119 | DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); |
120 | 120 | ||
121 | /* Per CPU bogomips and other parameters */ | 121 | /* Per CPU bogomips and other parameters */ |
122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 122 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d23503..931930a96160 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
919 | 919 | ||
920 | /* | 920 | /* |
921 | * On success we use clflush, when the CPU supports it to | 921 | * On success we use clflush, when the CPU supports it to |
922 | * avoid the wbindv. If the CPU does not support it and in the | 922 | * avoid the wbindv. If the CPU does not support it, in the |
923 | * error case we fall back to cpa_flush_all (which uses | 923 | * error case, and during early boot (for EFI) we fall back |
924 | * wbindv): | 924 | * to cpa_flush_all (which uses wbinvd): |
925 | */ | 925 | */ |
926 | if (!ret && cpu_has_clflush) { | 926 | if (early_boot_irqs_disabled) |
927 | __cpa_flush_all((void *)(long)cache); | ||
928 | else if (!ret && cpu_has_clflush) { | ||
927 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { | 929 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { |
928 | cpa_flush_array(addr, numpages, cache, | 930 | cpa_flush_array(addr, numpages, cache, |
929 | cpa.flags, pages); | 931 | cpa.flags, pages); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5e57e113b72c..613cd83e8c0c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/cache.h> | 12 | #include <asm/cache.h> |
13 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
14 | #include <asm/uv/uv.h> | 14 | #include <asm/uv/uv.h> |
15 | #include <linux/debugfs.h> | ||
15 | 16 | ||
16 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | 17 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) |
17 | = { &init_mm, 0, }; | 18 | = { &init_mm, 0, }; |
@@ -27,33 +28,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) | |||
27 | * | 28 | * |
28 | * More scalable flush, from Andi Kleen | 29 | * More scalable flush, from Andi Kleen |
29 | * | 30 | * |
30 | * To avoid global state use 8 different call vectors. | 31 | * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi |
31 | * Each CPU uses a specific vector to trigger flushes on other | ||
32 | * CPUs. Depending on the received vector the target CPUs look into | ||
33 | * the right array slot for the flush data. | ||
34 | * | ||
35 | * With more than 8 CPUs they are hashed to the 8 available | ||
36 | * vectors. The limited global vector space forces us to this right now. | ||
37 | * In future when interrupts are split into per CPU domains this could be | ||
38 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
39 | */ | 32 | */ |
40 | 33 | ||
41 | union smp_flush_state { | 34 | struct flush_tlb_info { |
42 | struct { | 35 | struct mm_struct *flush_mm; |
43 | struct mm_struct *flush_mm; | 36 | unsigned long flush_start; |
44 | unsigned long flush_va; | 37 | unsigned long flush_end; |
45 | raw_spinlock_t tlbstate_lock; | 38 | }; |
46 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | ||
47 | }; | ||
48 | char pad[INTERNODE_CACHE_BYTES]; | ||
49 | } ____cacheline_internodealigned_in_smp; | ||
50 | |||
51 | /* State is put into the per CPU data section, but padded | ||
52 | to a full cache line because other CPUs can access it and we don't | ||
53 | want false sharing in the per cpu data segment. */ | ||
54 | static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; | ||
55 | |||
56 | static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); | ||
57 | 39 | ||
58 | /* | 40 | /* |
59 | * We cannot call mmdrop() because we are in interrupt context, | 41 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -72,28 +54,25 @@ void leave_mm(int cpu) | |||
72 | EXPORT_SYMBOL_GPL(leave_mm); | 54 | EXPORT_SYMBOL_GPL(leave_mm); |
73 | 55 | ||
74 | /* | 56 | /* |
75 | * | ||
76 | * The flush IPI assumes that a thread switch happens in this order: | 57 | * The flush IPI assumes that a thread switch happens in this order: |
77 | * [cpu0: the cpu that switches] | 58 | * [cpu0: the cpu that switches] |
78 | * 1) switch_mm() either 1a) or 1b) | 59 | * 1) switch_mm() either 1a) or 1b) |
79 | * 1a) thread switch to a different mm | 60 | * 1a) thread switch to a different mm |
80 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | 61 | * 1a1) set cpu_tlbstate to TLBSTATE_OK |
81 | * Stop ipi delivery for the old mm. This is not synchronized with | 62 | * Now the tlb flush NMI handler flush_tlb_func won't call leave_mm |
82 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | 63 | * if cpu0 was in lazy tlb mode. |
83 | * for the wrong mm, and in the worst case we perform a superfluous | 64 | * 1a2) update cpu active_mm |
84 | * tlb flush. | ||
85 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
86 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
87 | * was in lazy tlb mode. | ||
88 | * 1a3) update cpu active_mm | ||
89 | * Now cpu0 accepts tlb flushes for the new mm. | 65 | * Now cpu0 accepts tlb flushes for the new mm. |
90 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | 66 | * 1a3) cpu_set(cpu, new_mm->cpu_vm_mask); |
91 | * Now the other cpus will send tlb flush ipis. | 67 | * Now the other cpus will send tlb flush ipis. |
92 | * 1a4) change cr3. | 68 | * 1a4) change cr3. |
69 | * 1a5) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
70 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
71 | * the other cpus, but flush_tlb_func ignore flush ipis for the wrong | ||
72 | * mm, and in the worst case we perform a superfluous tlb flush. | ||
93 | * 1b) thread switch without mm change | 73 | * 1b) thread switch without mm change |
94 | * cpu active_mm is correct, cpu0 already handles | 74 | * cpu active_mm is correct, cpu0 already handles flush ipis. |
95 | * flush ipis. | 75 | * 1b1) set cpu_tlbstate to TLBSTATE_OK |
96 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
97 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | 76 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. |
98 | * Atomically set the bit [other cpus will start sending flush ipis], | 77 | * Atomically set the bit [other cpus will start sending flush ipis], |
99 | * and test the bit. | 78 | * and test the bit. |
@@ -106,174 +85,62 @@ EXPORT_SYMBOL_GPL(leave_mm); | |||
106 | * runs in kernel space, the cpu could load tlb entries for user space | 85 | * runs in kernel space, the cpu could load tlb entries for user space |
107 | * pages. | 86 | * pages. |
108 | * | 87 | * |
109 | * The good news is that cpu mmu_state is local to each cpu, no | 88 | * The good news is that cpu_tlbstate is local to each cpu, no |
110 | * write/read ordering problems. | 89 | * write/read ordering problems. |
111 | */ | 90 | */ |
112 | 91 | ||
113 | /* | 92 | /* |
114 | * TLB flush IPI: | 93 | * TLB flush funcation: |
115 | * | ||
116 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | 94 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
117 | * 2) Leave the mm if we are in the lazy tlb mode. | 95 | * 2) Leave the mm if we are in the lazy tlb mode. |
118 | * | ||
119 | * Interrupts are disabled. | ||
120 | */ | ||
121 | |||
122 | /* | ||
123 | * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop | ||
124 | * but still used for documentation purpose but the usage is slightly | ||
125 | * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt | ||
126 | * entry calls in with the first parameter in %eax. Maybe define | ||
127 | * intrlinkage? | ||
128 | */ | 96 | */ |
129 | #ifdef CONFIG_X86_64 | 97 | static void flush_tlb_func(void *info) |
130 | asmlinkage | ||
131 | #endif | ||
132 | void smp_invalidate_interrupt(struct pt_regs *regs) | ||
133 | { | 98 | { |
134 | unsigned int cpu; | 99 | struct flush_tlb_info *f = info; |
135 | unsigned int sender; | ||
136 | union smp_flush_state *f; | ||
137 | |||
138 | cpu = smp_processor_id(); | ||
139 | /* | ||
140 | * orig_rax contains the negated interrupt vector. | ||
141 | * Use that to determine where the sender put the data. | ||
142 | */ | ||
143 | sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; | ||
144 | f = &flush_state[sender]; | ||
145 | |||
146 | if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) | ||
147 | goto out; | ||
148 | /* | ||
149 | * This was a BUG() but until someone can quote me the | ||
150 | * line from the intel manual that guarantees an IPI to | ||
151 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
152 | * its staying as a return | ||
153 | * | ||
154 | * BUG(); | ||
155 | */ | ||
156 | |||
157 | if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) { | ||
158 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | ||
159 | if (f->flush_va == TLB_FLUSH_ALL) | ||
160 | local_flush_tlb(); | ||
161 | else | ||
162 | __flush_tlb_one(f->flush_va); | ||
163 | } else | ||
164 | leave_mm(cpu); | ||
165 | } | ||
166 | out: | ||
167 | ack_APIC_irq(); | ||
168 | smp_mb__before_clear_bit(); | ||
169 | cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); | ||
170 | smp_mb__after_clear_bit(); | ||
171 | inc_irq_stat(irq_tlb_count); | ||
172 | } | ||
173 | 100 | ||
174 | static void flush_tlb_others_ipi(const struct cpumask *cpumask, | 101 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
175 | struct mm_struct *mm, unsigned long va) | 102 | return; |
176 | { | 103 | |
177 | unsigned int sender; | 104 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
178 | union smp_flush_state *f; | 105 | if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg) |
179 | 106 | local_flush_tlb(); | |
180 | /* Caller has disabled preemption */ | 107 | else if (!f->flush_end) |
181 | sender = this_cpu_read(tlb_vector_offset); | 108 | __flush_tlb_single(f->flush_start); |
182 | f = &flush_state[sender]; | 109 | else { |
183 | 110 | unsigned long addr; | |
184 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | 111 | addr = f->flush_start; |
185 | raw_spin_lock(&f->tlbstate_lock); | 112 | while (addr < f->flush_end) { |
186 | 113 | __flush_tlb_single(addr); | |
187 | f->flush_mm = mm; | 114 | addr += PAGE_SIZE; |
188 | f->flush_va = va; | 115 | } |
189 | if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { | 116 | } |
190 | /* | 117 | } else |
191 | * We have to send the IPI only to | 118 | leave_mm(smp_processor_id()); |
192 | * CPUs affected. | ||
193 | */ | ||
194 | apic->send_IPI_mask(to_cpumask(f->flush_cpumask), | ||
195 | INVALIDATE_TLB_VECTOR_START + sender); | ||
196 | |||
197 | while (!cpumask_empty(to_cpumask(f->flush_cpumask))) | ||
198 | cpu_relax(); | ||
199 | } | ||
200 | 119 | ||
201 | f->flush_mm = NULL; | ||
202 | f->flush_va = 0; | ||
203 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) | ||
204 | raw_spin_unlock(&f->tlbstate_lock); | ||
205 | } | 120 | } |
206 | 121 | ||
207 | void native_flush_tlb_others(const struct cpumask *cpumask, | 122 | void native_flush_tlb_others(const struct cpumask *cpumask, |
208 | struct mm_struct *mm, unsigned long va) | 123 | struct mm_struct *mm, unsigned long start, |
124 | unsigned long end) | ||
209 | { | 125 | { |
126 | struct flush_tlb_info info; | ||
127 | info.flush_mm = mm; | ||
128 | info.flush_start = start; | ||
129 | info.flush_end = end; | ||
130 | |||
210 | if (is_uv_system()) { | 131 | if (is_uv_system()) { |
211 | unsigned int cpu; | 132 | unsigned int cpu; |
212 | 133 | ||
213 | cpu = smp_processor_id(); | 134 | cpu = smp_processor_id(); |
214 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); | 135 | cpumask = uv_flush_tlb_others(cpumask, mm, start, end, cpu); |
215 | if (cpumask) | 136 | if (cpumask) |
216 | flush_tlb_others_ipi(cpumask, mm, va); | 137 | smp_call_function_many(cpumask, flush_tlb_func, |
138 | &info, 1); | ||
217 | return; | 139 | return; |
218 | } | 140 | } |
219 | flush_tlb_others_ipi(cpumask, mm, va); | 141 | smp_call_function_many(cpumask, flush_tlb_func, &info, 1); |
220 | } | 142 | } |
221 | 143 | ||
222 | static void __cpuinit calculate_tlb_offset(void) | ||
223 | { | ||
224 | int cpu, node, nr_node_vecs, idx = 0; | ||
225 | /* | ||
226 | * we are changing tlb_vector_offset for each CPU in runtime, but this | ||
227 | * will not cause inconsistency, as the write is atomic under X86. we | ||
228 | * might see more lock contentions in a short time, but after all CPU's | ||
229 | * tlb_vector_offset are changed, everything should go normal | ||
230 | * | ||
231 | * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might | ||
232 | * waste some vectors. | ||
233 | **/ | ||
234 | if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS) | ||
235 | nr_node_vecs = 1; | ||
236 | else | ||
237 | nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; | ||
238 | |||
239 | for_each_online_node(node) { | ||
240 | int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * | ||
241 | nr_node_vecs; | ||
242 | int cpu_offset = 0; | ||
243 | for_each_cpu(cpu, cpumask_of_node(node)) { | ||
244 | per_cpu(tlb_vector_offset, cpu) = node_offset + | ||
245 | cpu_offset; | ||
246 | cpu_offset++; | ||
247 | cpu_offset = cpu_offset % nr_node_vecs; | ||
248 | } | ||
249 | idx++; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, | ||
254 | unsigned long action, void *hcpu) | ||
255 | { | ||
256 | switch (action & 0xf) { | ||
257 | case CPU_ONLINE: | ||
258 | case CPU_DEAD: | ||
259 | calculate_tlb_offset(); | ||
260 | } | ||
261 | return NOTIFY_OK; | ||
262 | } | ||
263 | |||
264 | static int __cpuinit init_smp_flush(void) | ||
265 | { | ||
266 | int i; | ||
267 | |||
268 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | ||
269 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); | ||
270 | |||
271 | calculate_tlb_offset(); | ||
272 | hotcpu_notifier(tlb_cpuhp_notify, 0); | ||
273 | return 0; | ||
274 | } | ||
275 | core_initcall(init_smp_flush); | ||
276 | |||
277 | void flush_tlb_current_task(void) | 144 | void flush_tlb_current_task(void) |
278 | { | 145 | { |
279 | struct mm_struct *mm = current->mm; | 146 | struct mm_struct *mm = current->mm; |
@@ -282,27 +149,91 @@ void flush_tlb_current_task(void) | |||
282 | 149 | ||
283 | local_flush_tlb(); | 150 | local_flush_tlb(); |
284 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 151 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
285 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); | 152 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
286 | preempt_enable(); | 153 | preempt_enable(); |
287 | } | 154 | } |
288 | 155 | ||
289 | void flush_tlb_mm(struct mm_struct *mm) | 156 | /* |
157 | * It can find out the THP large page, or | ||
158 | * HUGETLB page in tlb_flush when THP disabled | ||
159 | */ | ||
160 | static inline unsigned long has_large_page(struct mm_struct *mm, | ||
161 | unsigned long start, unsigned long end) | ||
162 | { | ||
163 | pgd_t *pgd; | ||
164 | pud_t *pud; | ||
165 | pmd_t *pmd; | ||
166 | unsigned long addr = ALIGN(start, HPAGE_SIZE); | ||
167 | for (; addr < end; addr += HPAGE_SIZE) { | ||
168 | pgd = pgd_offset(mm, addr); | ||
169 | if (likely(!pgd_none(*pgd))) { | ||
170 | pud = pud_offset(pgd, addr); | ||
171 | if (likely(!pud_none(*pud))) { | ||
172 | pmd = pmd_offset(pud, addr); | ||
173 | if (likely(!pmd_none(*pmd))) | ||
174 | if (pmd_large(*pmd)) | ||
175 | return addr; | ||
176 | } | ||
177 | } | ||
178 | } | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | ||
183 | unsigned long end, unsigned long vmflag) | ||
290 | { | 184 | { |
185 | unsigned long addr; | ||
186 | unsigned act_entries, tlb_entries = 0; | ||
187 | |||
291 | preempt_disable(); | 188 | preempt_disable(); |
189 | if (current->active_mm != mm) | ||
190 | goto flush_all; | ||
292 | 191 | ||
293 | if (current->active_mm == mm) { | 192 | if (!current->mm) { |
294 | if (current->mm) | 193 | leave_mm(smp_processor_id()); |
194 | goto flush_all; | ||
195 | } | ||
196 | |||
197 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 | ||
198 | || vmflag == VM_HUGETLB) { | ||
199 | local_flush_tlb(); | ||
200 | goto flush_all; | ||
201 | } | ||
202 | |||
203 | /* In modern CPU, last level tlb used for both data/ins */ | ||
204 | if (vmflag & VM_EXEC) | ||
205 | tlb_entries = tlb_lli_4k[ENTRIES]; | ||
206 | else | ||
207 | tlb_entries = tlb_lld_4k[ENTRIES]; | ||
208 | /* Assume all of TLB entries was occupied by this task */ | ||
209 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | ||
210 | |||
211 | /* tlb_flushall_shift is on balance point, details in commit log */ | ||
212 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | ||
213 | local_flush_tlb(); | ||
214 | else { | ||
215 | if (has_large_page(mm, start, end)) { | ||
295 | local_flush_tlb(); | 216 | local_flush_tlb(); |
296 | else | 217 | goto flush_all; |
297 | leave_mm(smp_processor_id()); | 218 | } |
219 | /* flush range by one by one 'invlpg' */ | ||
220 | for (addr = start; addr < end; addr += PAGE_SIZE) | ||
221 | __flush_tlb_single(addr); | ||
222 | |||
223 | if (cpumask_any_but(mm_cpumask(mm), | ||
224 | smp_processor_id()) < nr_cpu_ids) | ||
225 | flush_tlb_others(mm_cpumask(mm), mm, start, end); | ||
226 | preempt_enable(); | ||
227 | return; | ||
298 | } | 228 | } |
299 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | ||
300 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); | ||
301 | 229 | ||
230 | flush_all: | ||
231 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | ||
232 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | ||
302 | preempt_enable(); | 233 | preempt_enable(); |
303 | } | 234 | } |
304 | 235 | ||
305 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | 236 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) |
306 | { | 237 | { |
307 | struct mm_struct *mm = vma->vm_mm; | 238 | struct mm_struct *mm = vma->vm_mm; |
308 | 239 | ||
@@ -310,13 +241,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | |||
310 | 241 | ||
311 | if (current->active_mm == mm) { | 242 | if (current->active_mm == mm) { |
312 | if (current->mm) | 243 | if (current->mm) |
313 | __flush_tlb_one(va); | 244 | __flush_tlb_one(start); |
314 | else | 245 | else |
315 | leave_mm(smp_processor_id()); | 246 | leave_mm(smp_processor_id()); |
316 | } | 247 | } |
317 | 248 | ||
318 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 249 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
319 | flush_tlb_others(mm_cpumask(mm), mm, va); | 250 | flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); |
320 | 251 | ||
321 | preempt_enable(); | 252 | preempt_enable(); |
322 | } | 253 | } |
@@ -332,3 +263,83 @@ void flush_tlb_all(void) | |||
332 | { | 263 | { |
333 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 264 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
334 | } | 265 | } |
266 | |||
267 | static void do_kernel_range_flush(void *info) | ||
268 | { | ||
269 | struct flush_tlb_info *f = info; | ||
270 | unsigned long addr; | ||
271 | |||
272 | /* flush range by one by one 'invlpg' */ | ||
273 | for (addr = f->flush_start; addr < f->flush_end; addr += PAGE_SIZE) | ||
274 | __flush_tlb_single(addr); | ||
275 | } | ||
276 | |||
277 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) | ||
278 | { | ||
279 | unsigned act_entries; | ||
280 | struct flush_tlb_info info; | ||
281 | |||
282 | /* In modern CPU, last level tlb used for both data/ins */ | ||
283 | act_entries = tlb_lld_4k[ENTRIES]; | ||
284 | |||
285 | /* Balance as user space task's flush, a bit conservative */ | ||
286 | if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || | ||
287 | (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) | ||
288 | |||
289 | on_each_cpu(do_flush_tlb_all, NULL, 1); | ||
290 | else { | ||
291 | info.flush_start = start; | ||
292 | info.flush_end = end; | ||
293 | on_each_cpu(do_kernel_range_flush, &info, 1); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | #ifdef CONFIG_DEBUG_TLBFLUSH | ||
298 | static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf, | ||
299 | size_t count, loff_t *ppos) | ||
300 | { | ||
301 | char buf[32]; | ||
302 | unsigned int len; | ||
303 | |||
304 | len = sprintf(buf, "%hd\n", tlb_flushall_shift); | ||
305 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | ||
306 | } | ||
307 | |||
308 | static ssize_t tlbflush_write_file(struct file *file, | ||
309 | const char __user *user_buf, size_t count, loff_t *ppos) | ||
310 | { | ||
311 | char buf[32]; | ||
312 | ssize_t len; | ||
313 | s8 shift; | ||
314 | |||
315 | len = min(count, sizeof(buf) - 1); | ||
316 | if (copy_from_user(buf, user_buf, len)) | ||
317 | return -EFAULT; | ||
318 | |||
319 | buf[len] = '\0'; | ||
320 | if (kstrtos8(buf, 0, &shift)) | ||
321 | return -EINVAL; | ||
322 | |||
323 | if (shift > 64) | ||
324 | return -EINVAL; | ||
325 | |||
326 | tlb_flushall_shift = shift; | ||
327 | return count; | ||
328 | } | ||
329 | |||
330 | static const struct file_operations fops_tlbflush = { | ||
331 | .read = tlbflush_read_file, | ||
332 | .write = tlbflush_write_file, | ||
333 | .llseek = default_llseek, | ||
334 | }; | ||
335 | |||
336 | static int __cpuinit create_tlb_flushall_shift(void) | ||
337 | { | ||
338 | if (cpu_has_invlpg) { | ||
339 | debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, | ||
340 | arch_debugfs_dir, NULL, &fops_tlbflush); | ||
341 | } | ||
342 | return 0; | ||
343 | } | ||
344 | late_initcall(create_tlb_flushall_shift); | ||
345 | #endif | ||
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660edaa1e7..2dc29f51e75a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -234,22 +234,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
234 | return status; | 234 | return status; |
235 | } | 235 | } |
236 | 236 | ||
237 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | 237 | static int efi_set_rtc_mmss(unsigned long nowtime) |
238 | efi_time_cap_t *tc) | ||
239 | { | ||
240 | unsigned long flags; | ||
241 | efi_status_t status; | ||
242 | |||
243 | spin_lock_irqsave(&rtc_lock, flags); | ||
244 | efi_call_phys_prelog(); | ||
245 | status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), | ||
246 | virt_to_phys(tc)); | ||
247 | efi_call_phys_epilog(); | ||
248 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
249 | return status; | ||
250 | } | ||
251 | |||
252 | int efi_set_rtc_mmss(unsigned long nowtime) | ||
253 | { | 238 | { |
254 | int real_seconds, real_minutes; | 239 | int real_seconds, real_minutes; |
255 | efi_status_t status; | 240 | efi_status_t status; |
@@ -278,7 +263,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) | |||
278 | return 0; | 263 | return 0; |
279 | } | 264 | } |
280 | 265 | ||
281 | unsigned long efi_get_time(void) | 266 | static unsigned long efi_get_time(void) |
282 | { | 267 | { |
283 | efi_status_t status; | 268 | efi_status_t status; |
284 | efi_time_t eft; | 269 | efi_time_t eft; |
@@ -621,18 +606,13 @@ static int __init efi_runtime_init(void) | |||
621 | } | 606 | } |
622 | /* | 607 | /* |
623 | * We will only need *early* access to the following | 608 | * We will only need *early* access to the following |
624 | * two EFI runtime services before set_virtual_address_map | 609 | * EFI runtime service before set_virtual_address_map |
625 | * is invoked. | 610 | * is invoked. |
626 | */ | 611 | */ |
627 | efi_phys.get_time = (efi_get_time_t *)runtime->get_time; | ||
628 | efi_phys.set_virtual_address_map = | 612 | efi_phys.set_virtual_address_map = |
629 | (efi_set_virtual_address_map_t *) | 613 | (efi_set_virtual_address_map_t *) |
630 | runtime->set_virtual_address_map; | 614 | runtime->set_virtual_address_map; |
631 | /* | 615 | |
632 | * Make efi_get_time can be called before entering | ||
633 | * virtual mode. | ||
634 | */ | ||
635 | efi.get_time = phys_efi_get_time; | ||
636 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); | 616 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); |
637 | 617 | ||
638 | return 0; | 618 | return 0; |
@@ -720,12 +700,10 @@ void __init efi_init(void) | |||
720 | efi_enabled = 0; | 700 | efi_enabled = 0; |
721 | return; | 701 | return; |
722 | } | 702 | } |
723 | #ifdef CONFIG_X86_32 | ||
724 | if (efi_native) { | 703 | if (efi_native) { |
725 | x86_platform.get_wallclock = efi_get_time; | 704 | x86_platform.get_wallclock = efi_get_time; |
726 | x86_platform.set_wallclock = efi_set_rtc_mmss; | 705 | x86_platform.set_wallclock = efi_set_rtc_mmss; |
727 | } | 706 | } |
728 | #endif | ||
729 | 707 | ||
730 | #if EFI_DEBUG | 708 | #if EFI_DEBUG |
731 | print_efi_memmap(); | 709 | print_efi_memmap(); |
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 71b5d5a07d7b..b8b3a37c80cd 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -1055,8 +1055,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, | |||
1055 | * done. The returned pointer is valid till preemption is re-enabled. | 1055 | * done. The returned pointer is valid till preemption is re-enabled. |
1056 | */ | 1056 | */ |
1057 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 1057 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
1058 | struct mm_struct *mm, unsigned long va, | 1058 | struct mm_struct *mm, unsigned long start, |
1059 | unsigned int cpu) | 1059 | unsigned end, unsigned int cpu) |
1060 | { | 1060 | { |
1061 | int locals = 0; | 1061 | int locals = 0; |
1062 | int remotes = 0; | 1062 | int remotes = 0; |
@@ -1113,7 +1113,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
1113 | 1113 | ||
1114 | record_send_statistics(stat, locals, hubs, remotes, bau_desc); | 1114 | record_send_statistics(stat, locals, hubs, remotes, bau_desc); |
1115 | 1115 | ||
1116 | bau_desc->payload.address = va; | 1116 | bau_desc->payload.address = start; |
1117 | bau_desc->payload.sending_cpu = cpu; | 1117 | bau_desc->payload.sending_cpu = cpu; |
1118 | /* | 1118 | /* |
1119 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | 1119 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 27336dfcda8e..b65a76133f4f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1256,7 +1256,8 @@ static void xen_flush_tlb_single(unsigned long addr) | |||
1256 | } | 1256 | } |
1257 | 1257 | ||
1258 | static void xen_flush_tlb_others(const struct cpumask *cpus, | 1258 | static void xen_flush_tlb_others(const struct cpumask *cpus, |
1259 | struct mm_struct *mm, unsigned long va) | 1259 | struct mm_struct *mm, unsigned long start, |
1260 | unsigned long end) | ||
1260 | { | 1261 | { |
1261 | struct { | 1262 | struct { |
1262 | struct mmuext_op op; | 1263 | struct mmuext_op op; |
@@ -1268,7 +1269,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1268 | } *args; | 1269 | } *args; |
1269 | struct multicall_space mcs; | 1270 | struct multicall_space mcs; |
1270 | 1271 | ||
1271 | trace_xen_mmu_flush_tlb_others(cpus, mm, va); | 1272 | trace_xen_mmu_flush_tlb_others(cpus, mm, start, end); |
1272 | 1273 | ||
1273 | if (cpumask_empty(cpus)) | 1274 | if (cpumask_empty(cpus)) |
1274 | return; /* nothing to do */ | 1275 | return; /* nothing to do */ |
@@ -1281,11 +1282,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, | |||
1281 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); | 1282 | cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); |
1282 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); | 1283 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); |
1283 | 1284 | ||
1284 | if (va == TLB_FLUSH_ALL) { | 1285 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; |
1285 | args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; | 1286 | if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { |
1286 | } else { | ||
1287 | args->op.cmd = MMUEXT_INVLPG_MULTI; | 1287 | args->op.cmd = MMUEXT_INVLPG_MULTI; |
1288 | args->op.arg1.linear_addr = va; | 1288 | args->op.arg1.linear_addr = start; |
1289 | } | 1289 | } |
1290 | 1290 | ||
1291 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); | 1291 | MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); |